# Program to complete the grammar CSV (tab separated) file with audio and translations # Licence: MIT # Copyrights : Eric Streit 2022 # adding pinyin to the file # adding end of line (1-1-FSI-Chinese) # dumping all Hanzi parts into a hanzi.txt file # parameters: # * name of the file to deal with # * name of the unit (to be added at the end of each line) # 5 original fields: number, English1, Hanzi1,English2,Hanzi2 # the CSV library import csv from typing import TYPE_CHECKING, Generator # the library to split the Chinese sentences into words import jieba # the pinyin library from pypinyin import pinyin, lazy_pinyin, Style # the hanzipi library for decomposing, finding definitions and examples # os module import os # shutils (moving files) import shutil # random import random,copy, re, sys # google TTS from GoogleTTS import GoogleTTS # delay import time # theFileName = '' # the file name to work with theFileName = str(sys.argv[1]) print ('theFileName : ',theFileName) theFileNamePrefix = os.path.dirname(theFileName) print ('theFileNamePrefix : ',theFileNamePrefix) theHanziFile = os.path.join(theFileNamePrefix, 'hanzi.txt') theUnit = str(sys.argv[2]) print ('the Unit : ', theUnit) theOutputFileName = os.path.join(theFileNamePrefix, 'FSI-' + theUnit + '-frames.csv') # the output filename according to the input filename print ('theOutputFileName : ',theOutputFileName) endOfLine = '\t9\t' + theUnit + '\tFSI-Chinese' print ('the endOfLine : ', endOfLine) # baseDirectory = os.getcwd() print (' Base directory : ', baseDirectory) with open(theOutputFileName,'w') as o: fieldNames = ['numero','Pinyin1','Hanzi1','English1','Pinyin2','Hanzi2','English2','module','unit','course'] csvwriter = csv.DictWriter(o, fieldnames=fieldNames,delimiter='\t') with open(theHanziFile,'w') as t: print('text file opened') with open(theFileName,'r') as f: print('csv file opened') data = csv.DictReader(f, delimiter='\t') # see the names of the fields above finalRow = {} theLineNumber = 0 for row in data: theLineNumber = theLineNumber + 1 print ('theLineNumber : ',theLineNumber) #print (row['Hanzi1']) t.write(row['Hanzi1'] + '\n') t.write(row['Hanzi2'] + '\n') finalRow['numero'] = row['number'] temp = pinyin(row['Hanzi1']) # print ('Temp : ', temp) thePinyinSentence = ' '.join(str(x) for x in temp).replace("'",'').replace("]",'').replace("[",'') finalRow['Pinyin1'] = thePinyinSentence finalRow['Hanzi1'] = row['Hanzi1'] finalRow['English1'] = row['English1'] temp = pinyin(row['Hanzi2']) thePinyinSentence = ' '.join(str(x) for x in temp).replace("'",'').replace("]",'').replace("[",'') finalRow['Pinyin2'] = thePinyinSentence finalRow['Hanzi2'] = row['Hanzi2'] finalRow['English2'] = row['English2'] finalRow['module'] = 9 finalRow['unit'] = int(theUnit) finalRow['course'] = 'FSI-Chinese' csvwriter.writerow(finalRow) f.close() t.close() o.close()