mod9Words.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import jieba
  2. import sys
  3. import csv
  4. # the pinyin library
  5. from pypinyin import pinyin, lazy_pinyin, Style
  6. from googletrans import Translator
  7. translator = Translator()
  8. fileName = str(sys.argv[1])
  9. theUnit = str(sys.argv[2])
  10. print ('the Unit : ', theUnit)
  11. outPutFileName = fileName.replace('hanzi.txt','words.csv')
  12. print ('Filename : ', fileName, '**', 'outPutFileName : ', outPutFileName)
  13. with open(outPutFileName,'w') as t:
  14. print('output csv file opened')
  15. fieldNames = ['Pinyin','Hanzi','English','module','unit','course']
  16. csvwriter = csv.DictWriter(t, fieldnames=fieldNames,delimiter='\t')
  17. #
  18. myFile = open(fileName, 'r')
  19. theLines = myFile.readlines()
  20. lineNumber = 0
  21. for l in theLines:
  22. finalRow = {}
  23. # print (l)
  24. lineNumber = lineNumber + 1
  25. print ('LineNumber : ', lineNumber)
  26. segments = jieba.cut(l, cut_all=True)
  27. for x in segments:
  28. if ((x in ' ;,。!:、?') or (x == '') or (x == '\n')):
  29. pass
  30. else:
  31. if (x != ''):
  32. finalRow['Hanzi'] = x
  33. temp = pinyin(x)
  34. finalRow['Pinyin'] = (''.join(str(x) for x in temp)).replace("'",'').replace("]",'').replace("[",'')
  35. finalRow['English'] = translator.translate(x).text
  36. finalRow['module'] = 9
  37. finalRow['unit'] = theUnit
  38. finalRow['course'] = 'FSI-Chinese'
  39. if (finalRow != ''):
  40. csvwriter.writerow(finalRow)
  41. myFile.close()
  42. t.close()