eric
/
FSI-Chinese


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
							# Program to complete the grammar CSV (tab separated) file with audio and translations
# Licence: MIT
# Copyrights : Eric Streit <eric@yojik.eu> 2022

# adding pinyin to the file
# adding end of line (1-1-FSI-Chinese)
# dumping all Hanzi parts into a hanzi.txt file

# parameters:
# * name of the file to deal with
# * name of the unit (to be added at the end of each line)

# 5 original fields: number, English1, Hanzi1,English2,Hanzi2

# the CSV library
import csv
from typing import TYPE_CHECKING, Generator
# the library to split the Chinese sentences into words
import jieba
# the pinyin library
from pypinyin import pinyin, lazy_pinyin, Style
# the hanzipi library for decomposing, finding definitions and examples

# os module
import os
# shutils (moving files)
import shutil
# random
import random,copy, re, sys

# google TTS
from GoogleTTS import GoogleTTS
# delay
import time

#
theFileName = '' # the file name to work with


theFileName = str(sys.argv[1])
print ('theFileName : ',theFileName)

theFileNamePrefix = os.path.dirname(theFileName)
print ('theFileNamePrefix : ',theFileNamePrefix)

theHanziFile = os.path.join(theFileNamePrefix, 'hanzi.txt')

theUnit = str(sys.argv[2])
print ('the Unit : ', theUnit)

theOutputFileName = os.path.join(theFileNamePrefix, 'FSI-' + theUnit + '-frames.csv') # the output filename according to the input filename
print ('theOutputFileName : ',theOutputFileName)
endOfLine = '\t9\t' + theUnit + '\tFSI-Chinese'
print ('the endOfLine : ', endOfLine)

#
baseDirectory = os.getcwd()
print (' Base directory : ', baseDirectory)

with open(theOutputFileName,'w') as o:
    fieldNames = ['numero','Pinyin1','Hanzi1','English1','Pinyin2','Hanzi2','English2','module','unit','course']
    csvwriter = csv.DictWriter(o, fieldnames=fieldNames,delimiter='\t')

    with open(theHanziFile,'w') as t:
        print('text file opened')

        with open(theFileName,'r') as f:
            print('csv file opened')
            data = csv.DictReader(f, delimiter='\t')
            
            # see the names of the fields above
            finalRow = {}
            theLineNumber = 0
            for row in data:
                theLineNumber = theLineNumber + 1
                print ('theLineNumber : ',theLineNumber)
                #print (row['Hanzi1'])
                t.write(row['Hanzi1'] + '\n')
                t.write(row['Hanzi2'] + '\n')
                finalRow['numero'] = row['number']
                temp = pinyin(row['Hanzi1'])
                # print ('Temp : ', temp)
                thePinyinSentence = ' '.join(str(x) for x in temp).replace("'",'').replace("]",'').replace("[",'')
                finalRow['Pinyin1'] = thePinyinSentence
                finalRow['Hanzi1'] = row['Hanzi1']
                finalRow['English1'] = row['English1']
                temp = pinyin(row['Hanzi2'])
                thePinyinSentence = ' '.join(str(x) for x in temp).replace("'",'').replace("]",'').replace("[",'')
                finalRow['Pinyin2'] = thePinyinSentence
                finalRow['Hanzi2'] = row['Hanzi2']
                finalRow['English2'] = row['English2']
                finalRow['module'] = 9
                finalRow['unit'] = int(theUnit)
                finalRow['course'] = 'FSI-Chinese'
                csvwriter.writerow(finalRow) 

f.close()
t.close()
o.close()