cedict2json.js 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. var fs = require('fs')
  2. var readline = require('readline')
  3. var Stream = require('stream')
  4. // librairie de traitement du pinyin
  5. const pinyinizer = require('pinyinizer')
  6. // utilitaires
  7. var tab = '\t'
  8. var endLine = '\n'
  9. var ligne = {
  10. hanzi: '',
  11. traditional: '',
  12. pinyin: '',
  13. translations: []
  14. }
  15. function resetLigne () {
  16. ligne.hanzi = ''
  17. ligne.traditional = ''
  18. ligne.pinyin = ''
  19. ligne.translations = []
  20. }
  21. function readFileLineByLine (inputFile, outputFile) {
  22. var instream = fs.createReadStream(inputFile)
  23. var outstream = new Stream()
  24. outstream.readable = true
  25. outstream.writable = true
  26. var rl = readline.createInterface({
  27. input: instream,
  28. output: outstream,
  29. terminal: false
  30. })
  31. rl.on('line', function (line) {
  32. if (line[0] != '#') {
  33. chinois = line.split(' ', 2)
  34. // console.log(chinois[0], " ## ", chinois[1]);
  35. ligne.traditional = chinois[0]
  36. ligne.hanzi = chinois[1]
  37. i = line.search(/\[/)
  38. j = line.search(/\]/)
  39. phonetic = line.substr(i + 1, j - i - 1)
  40. // traitement des champs pinyin avec des chiffres
  41. phonetic = phonetic.toLowerCase()
  42. // on va tester ça ...
  43. try {
  44. var sauve = phonetic
  45. phonetic = pinyinizer.pinyinize(phonetic)
  46. phonetic = phonetic.replace(/5/g, '')
  47. } catch (err) {
  48. console.log('Erreur: ' + pinyin)
  49. phonetic = sauve
  50. }
  51. ligne.pinyin = phonetic
  52. // console.log(phonetic);
  53. ligne.translations = line.split(/\//g)
  54. // var l = ligne.translations.length
  55. ligne.translations.pop()
  56. ligne.translations.shift()
  57. // var temp = ligne.translations.join(' / ')
  58. // console.log(l);
  59. // console.log(ligne.translations);
  60. // console.log(JSON.stringify(ligne));
  61. // on pinyinfie les parties entre crochets
  62. ligne.translations.forEach(function (latraduction, index) {
  63. var tableau = latraduction.match(/\[.*?\]/g)
  64. if (tableau != null) {
  65. tableau.forEach(function (t) {
  66. t = t.replace('u:', 'v')
  67. var t1 = pinyinizer.pinyinize(t)
  68. // console.log(t, '-', t1)
  69. latraduction = latraduction.replace(t, t1)
  70. latraduction = latraduction.replace('5', '')
  71. //console.log(latraduction)
  72. ligne.translations[index] = latraduction;
  73. })
  74. }
  75. })
  76. fs.appendFileSync(outputFile, JSON.stringify(ligne) + ',\n')
  77. resetLigne()
  78. }
  79. })
  80. rl.on('end', function () {
  81. console.log('EOF')
  82. })
  83. }
  84. readFileLineByLine('cedict.txt', 'cedict.json')