eric
/
FSI-Chinese


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
							import jieba
import sys
import csv
# the pinyin library
from pypinyin import pinyin, lazy_pinyin, Style
from googletrans import Translator


translator = Translator()
fileName = str(sys.argv[1])
theUnit = str(sys.argv[2])

print ('the Unit : ', theUnit)
outPutFileName = fileName.replace('hanzi.txt','words.csv')

print ('Filename : ', fileName, '**', 'outPutFileName : ', outPutFileName)

with open(outPutFileName,'w') as t:
    print('output csv file opened')
    fieldNames = ['Pinyin','Hanzi','English','module','unit','course']
    csvwriter = csv.DictWriter(t, fieldnames=fieldNames,delimiter='\t')
    #
    myFile = open(fileName, 'r')
    theLines = myFile.readlines()
    lineNumber = 0
    for l in theLines:
        finalRow = {}
        # print (l) 
        lineNumber = lineNumber + 1
        print ('LineNumber : ', lineNumber)
        segments = jieba.cut(l, cut_all=True)
        for x in segments:
            if ((x in ' ；，。！：、？') or (x == '') or (x == '\n')):
                pass
            else:
                if (x != ''):
                    finalRow['Hanzi'] = x
                    temp = pinyin(x)
                    finalRow['Pinyin'] = (''.join(str(x) for x in temp)).replace("'",'').replace("]",'').replace("[",'')
                    finalRow['English'] = translator.translate(x).text
                    finalRow['module'] = 9
                    finalRow['unit'] = theUnit
                    finalRow['course'] = 'FSI-Chinese'
                    if (finalRow != ''):
                        csvwriter.writerow(finalRow)
    myFile.close()
t.close()