chdict2csv.js 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. var fs = require('fs')
  2. var readline = require('readline')
  3. var Stream = require('stream')
  4. // librairie de traitement du pinyin
  5. const pinyinizer = require('pinyinizer')
  6. // utilitaires
  7. var tab = '\t'
  8. var endLine = '\n'
  9. var ligne = {
  10. hanzi: '',
  11. traditional: '',
  12. pinyin: '',
  13. translations: []
  14. }
  15. function resetLigne () {
  16. ligne.hanzi = ''
  17. ligne.traditional = ''
  18. ligne.pinyin = ''
  19. ligne.translations = []
  20. }
  21. // la ligne CSV qu'on va écrire dans le fichier CSV
  22. var ligneCSV = ''
  23. function readFileLineByLine (inputFile, outputFile) {
  24. var instream = fs.createReadStream(inputFile)
  25. var outstream = new Stream()
  26. outstream.readable = true
  27. outstream.writable = true
  28. var rl = readline.createInterface({
  29. input: instream,
  30. output: outstream,
  31. terminal: false
  32. })
  33. rl.on('line', function (line) {
  34. if (line[0] != '#') {
  35. if (line.length > 2) {
  36. chinois = line.split(' ', 2)
  37. // console.log(chinois[0], " ## ", chinois[1]);
  38. ligne.traditional = chinois[0]
  39. ligne.hanzi = chinois[1]
  40. i = line.search(/\[/)
  41. j = line.search(/\]/)
  42. phonetic = line.substr(i + 1, j - i - 1)
  43. // traitement des champs pinyin avec des chiffres
  44. phonetic = phonetic.toLowerCase()
  45. // on va tester ça ...
  46. try {
  47. var sauve = phonetic
  48. phonetic = pinyinizer.pinyinize(phonetic)
  49. phonetic = phonetic.replace(/5/g, '')
  50. } catch (err) {
  51. console.log('Erreur: ' + pinyin)
  52. phonetic = sauve
  53. }
  54. ligne.pinyin = phonetic
  55. // console.log(phonetic);
  56. ligne.translations = line.split(/\//g)
  57. var l = ligne.translations.length
  58. ligne.translations.pop()
  59. ligne.translations.shift()
  60. var temp = ligne.translations.join(' / ')
  61. // console.log(l);
  62. // console.log(ligne.translations);
  63. ligneCSV =
  64. ligne.hanzi +
  65. tab +
  66. ligne.traditional +
  67. tab +
  68. ligne.pinyin +
  69. tab +
  70. temp +
  71. endLine
  72. resetLigne()
  73. fs.appendFileSync(outputFile, ligneCSV)
  74. }
  75. }
  76. })
  77. }
  78. readFileLineByLine('chdict.u8', 'chdict.csv')