mod9.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # Program to complete the grammar CSV (tab separated) file with audio and translations
  2. # Licence: MIT
  3. # Copyrights : Eric Streit <eric@yojik.eu> 2022
  4. # adding pinyin to the file
  5. # adding end of line (1-1-FSI-Chinese)
  6. # dumping all Hanzi parts into a hanzi.txt file
  7. # parameters:
  8. # * name of the file to deal with
  9. # * name of the unit (to be added at the end of each line)
  10. # 5 original fields: number, English1, Hanzi1,English2,Hanzi2
  11. # the CSV library
  12. import csv
  13. from typing import TYPE_CHECKING, Generator
  14. # the library to split the Chinese sentences into words
  15. import jieba
  16. # the pinyin library
  17. from pypinyin import pinyin, lazy_pinyin, Style
  18. # the hanzipi library for decomposing, finding definitions and examples
  19. # os module
  20. import os
  21. # shutils (moving files)
  22. import shutil
  23. # random
  24. import random,copy, re, sys
  25. # google TTS
  26. from GoogleTTS import GoogleTTS
  27. # delay
  28. import time
  29. #
  30. theFileName = '' # the file name to work with
  31. theFileName = str(sys.argv[1])
  32. print ('theFileName : ',theFileName)
  33. theFileNamePrefix = os.path.dirname(theFileName)
  34. print ('theFileNamePrefix : ',theFileNamePrefix)
  35. theHanziFile = os.path.join(theFileNamePrefix, 'hanzi.txt')
  36. theUnit = str(sys.argv[2])
  37. print ('the Unit : ', theUnit)
  38. theOutputFileName = os.path.join(theFileNamePrefix, 'FSI-' + theUnit + '-frames.csv') # the output filename according to the input filename
  39. print ('theOutputFileName : ',theOutputFileName)
  40. endOfLine = '\t9\t' + theUnit + '\tFSI-Chinese'
  41. print ('the endOfLine : ', endOfLine)
  42. #
  43. baseDirectory = os.getcwd()
  44. print (' Base directory : ', baseDirectory)
  45. with open(theOutputFileName,'w') as o:
  46. fieldNames = ['numero','Pinyin1','Hanzi1','English1','Pinyin2','Hanzi2','English2','module','unit','course']
  47. csvwriter = csv.DictWriter(o, fieldnames=fieldNames,delimiter='\t')
  48. with open(theHanziFile,'w') as t:
  49. print('text file opened')
  50. with open(theFileName,'r') as f:
  51. print('csv file opened')
  52. data = csv.DictReader(f, delimiter='\t')
  53. # see the names of the fields above
  54. finalRow = {}
  55. theLineNumber = 0
  56. for row in data:
  57. theLineNumber = theLineNumber + 1
  58. print ('theLineNumber : ',theLineNumber)
  59. #print (row['Hanzi1'])
  60. t.write(row['Hanzi1'] + '\n')
  61. t.write(row['Hanzi2'] + '\n')
  62. finalRow['numero'] = row['number']
  63. temp = pinyin(row['Hanzi1'])
  64. # print ('Temp : ', temp)
  65. thePinyinSentence = ' '.join(str(x) for x in temp).replace("'",'').replace("]",'').replace("[",'')
  66. finalRow['Pinyin1'] = thePinyinSentence
  67. finalRow['Hanzi1'] = row['Hanzi1']
  68. finalRow['English1'] = row['English1']
  69. temp = pinyin(row['Hanzi2'])
  70. thePinyinSentence = ' '.join(str(x) for x in temp).replace("'",'').replace("]",'').replace("[",'')
  71. finalRow['Pinyin2'] = thePinyinSentence
  72. finalRow['Hanzi2'] = row['Hanzi2']
  73. finalRow['English2'] = row['English2']
  74. finalRow['module'] = 9
  75. finalRow['unit'] = int(theUnit)
  76. finalRow['course'] = 'FSI-Chinese'
  77. csvwriter.writerow(finalRow)
  78. f.close()
  79. t.close()
  80. o.close()