chdict2json.js 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. var fs = require('fs')
  2. var readline = require('readline')
  3. var Stream = require('stream')
  4. // librairie de traitement du pinyin
  5. const pinyinizer = require('pinyinizer')
  6. // utilitaires
  7. var tab = '\t'
  8. var endLine = '\n'
  9. var ligne = {
  10. hanzi: '',
  11. traditional: '',
  12. pinyin: '',
  13. translations: []
  14. }
  15. function resetLigne () {
  16. ligne.hanzi = ''
  17. ligne.traditional = ''
  18. ligne.pinyin = ''
  19. ligne.translations = []
  20. }
  21. function readFileLineByLine (inputFile, outputFile) {
  22. var instream = fs.createReadStream(inputFile)
  23. var outstream = new Stream()
  24. outstream.readable = true
  25. outstream.writable = true
  26. var rl = readline.createInterface({
  27. input: instream,
  28. output: outstream,
  29. terminal: false
  30. })
  31. rl.on('line', function (line) {
  32. if (line[0] != '#') {
  33. if (line.length > 2) {
  34. chinois = line.split(' ', 2)
  35. // console.log(chinois[0], " ## ", chinois[1]);
  36. ligne.traditional = chinois[0]
  37. ligne.hanzi = chinois[1]
  38. i = line.search(/\[/)
  39. j = line.search(/\]/)
  40. phonetic = line.substr(i + 1, j - i - 1)
  41. // traitement des champs pinyin avec des chiffres
  42. phonetic = phonetic.toLowerCase()
  43. // on va tester ça ...
  44. try {
  45. var sauve = phonetic
  46. phonetic = pinyinizer.pinyinize(phonetic)
  47. phonetic = phonetic.replace(/5/g, '')
  48. } catch (err) {
  49. console.log('Erreur: ' + pinyin)
  50. phonetic = sauve
  51. }
  52. ligne.pinyin = phonetic
  53. // console.log(phonetic);
  54. ligne.translations = line.split(/\//g)
  55. var l = ligne.translations.length
  56. ligne.translations.pop()
  57. ligne.translations.shift()
  58. var temp = ligne.translations.join(' / ')
  59. // console.log(l);
  60. // console.log(ligne.translations);
  61. // console.log(JSON.stringify(ligne));
  62. fs.appendFileSync(outputFile, JSON.stringify(ligne) + ',\n')
  63. resetLigne()
  64. }
  65. }
  66. })
  67. rl.on('end', function () {
  68. console.log('EOF')
  69. })
  70. }
  71. readFileLineByLine('chdict.u8', 'chdict.json')