ch2json.js 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. const fs = require('fs');
  2. const readline = require('readline');
  3. // librairie de traitement du pinyin
  4. const pinyinizer = require('pinyinizer')
  5. var stringify = require('json-stringify');
  6. // utilitaires
  7. var tab = '\t'
  8. var endLine = '\n'
  9. var ligne = {
  10. hanzi: '',
  11. traditional: '',
  12. pinyin: '',
  13. translations: []
  14. origin: ''
  15. }
  16. function resetLigne () {
  17. ligne.hanzi = ''
  18. ligne.traditional = ''
  19. ligne.pinyin = ''
  20. ligne.translations = []
  21. ligne.origin = ''
  22. }
  23. var outputFile = 'ch.json';
  24. const rl = readline.createInterface({
  25. input: fs.createReadStream('chdict.u8')
  26. });
  27. // Each new line emits an event - every time the stream receives \r, \n, or \r\n
  28. rl.on('line', (line) => {
  29. //console.log(line);
  30. if (line[0] != '#') {
  31. if (line.length > 2) {
  32. chinois = line.split(' ', 2)
  33. // console.log(chinois[0], " ## ", chinois[1]);
  34. ligne.traditional = chinois[0]
  35. ligne.hanzi = chinois[1]
  36. ligne.origin = 'Chdict'
  37. i = line.search(/\[/)
  38. j = line.search(/\]/)
  39. phonetic = line.substr(i + 1, j - i - 1)
  40. // traitement des champs pinyin avec des chiffres
  41. phonetic = phonetic.toLowerCase()
  42. // on va tester ça ...
  43. try {
  44. var sauve = phonetic
  45. phonetic = pinyinizer.pinyinize(phonetic)
  46. phonetic = phonetic.replace(/5/g, '')
  47. } catch (err) {
  48. console.log('Erreur: ' + pinyin)
  49. phonetic = sauve
  50. }
  51. ligne.pinyin = phonetic
  52. // console.log(phonetic);
  53. ligne.translations = line.split(/\//g)
  54. ligne.translations.pop()
  55. ligne.translations.shift()
  56. ligne.translations.forEach(function (latraduction, index) {
  57. var tableau = latraduction.match(/\[.*?\]/g)
  58. if (tableau != null) {
  59. tableau.forEach(function (t) {
  60. t = t.replace('u:', 'v')
  61. var t1 = pinyinizer.pinyinize(t)
  62. console.log(t, '-', t1)
  63. latraduction = latraduction.replace(t, t1)
  64. latraduction = latraduction.replace('5', '')
  65. console.log(latraduction)
  66. ligne.translations[index] = latraduction;
  67. })
  68. }
  69. })
  70. fs.appendFileSync(outputFile, stringify(ligne, null, 2, {offset: 4}) + ',\n');
  71. resetLigne()
  72. }
  73. }
  74. });
  75. rl.on('close', () => {
  76. fs.appendFileSync(outputFile, '{}]}');
  77. console.log('Done reading file');
  78. });