import jieba import sys import csv # the pinyin library from pypinyin import pinyin, lazy_pinyin, Style from googletrans import Translator translator = Translator() fileName = str(sys.argv[1]) theUnit = str(sys.argv[2]) print ('the Unit : ', theUnit) outPutFileName = fileName.replace('hanzi.txt','words.csv') print ('Filename : ', fileName, '**', 'outPutFileName : ', outPutFileName) with open(outPutFileName,'w') as t: print('output csv file opened') fieldNames = ['Pinyin','Hanzi','English','module','unit','course'] csvwriter = csv.DictWriter(t, fieldnames=fieldNames,delimiter='\t') # myFile = open(fileName, 'r') theLines = myFile.readlines() lineNumber = 0 for l in theLines: finalRow = {} # print (l) lineNumber = lineNumber + 1 print ('LineNumber : ', lineNumber) segments = jieba.cut(l, cut_all=True) for x in segments: if ((x in ' ;,。!:、?') or (x == '') or (x == '\n')): pass else: if (x != ''): finalRow['Hanzi'] = x temp = pinyin(x) finalRow['Pinyin'] = (''.join(str(x) for x in temp)).replace("'",'').replace("]",'').replace("[",'') finalRow['English'] = translator.translate(x).text finalRow['module'] = 9 finalRow['unit'] = theUnit finalRow['course'] = 'FSI-Chinese' if (finalRow != ''): csvwriter.writerow(finalRow) myFile.close() t.close()