ch2xml.js 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. const fs = require('fs');
  2. const readline = require('readline');
  3. // librairie de traitement du pinyin
  4. const pinyinizer = require('pinyinizer')
  5. var stringify = require('json-stringify');
  6. // librairie de gestion des fichiers xml
  7. const builder = require('xmlbuilder');
  8. // les noms de fichiers
  9. var fichierXml = 'ch.xml';
  10. // on crée le fichier xml
  11. var feed = builder.create('Chdict', {
  12. version: '1.0',
  13. encoding: 'UTF-8',
  14. standalone: true
  15. });
  16. // utilitaires
  17. var tab = '\t'
  18. var endLine = '\n'
  19. var ligne = {
  20. hanzi: '',
  21. traditional: '',
  22. pinyin: '',
  23. translations: [],
  24. origin: ''
  25. }
  26. function resetLigne () {
  27. ligne.hanzi = ''
  28. ligne.traditional = ''
  29. ligne.pinyin = ''
  30. ligne.translations = []
  31. ligne.origin = 'Chdict'
  32. }
  33. var outputFile = fichierXml;
  34. const rl = readline.createInterface({
  35. input: fs.createReadStream('chdict.u8')
  36. });
  37. // Each new line emits an event - every time the stream receives \r, \n, or \r\n
  38. rl.on('line', (line) => {
  39. //console.log(line);
  40. if (line[0] != '#') {
  41. if (line.length > 2) {
  42. chinois = line.split(' ', 2)
  43. // console.log(chinois[0], " ## ", chinois[1]);
  44. ligne.traditional = chinois[0]
  45. ligne.hanzi = chinois[1]
  46. i = line.search(/\[/)
  47. j = line.search(/\]/)
  48. phonetic = line.substr(i + 1, j - i - 1)
  49. // traitement des champs pinyin avec des chiffres
  50. phonetic = phonetic.toLowerCase()
  51. // on va tester ça ...
  52. try {
  53. var sauve = phonetic
  54. phonetic = pinyinizer.pinyinize(phonetic)
  55. phonetic = phonetic.replace(/5/g, '')
  56. } catch (err) {
  57. console.log('Erreur: ' + pinyin)
  58. phonetic = sauve
  59. }
  60. ligne.pinyin = phonetic
  61. // console.log(phonetic);
  62. ligne.translations = line.split(/\//g)
  63. //var l = ligne.translations.length
  64. ligne.translations.pop()
  65. ligne.translations.shift()
  66. var temp = ligne.translations.join(' / ')
  67. ligne.translations.forEach(function (latraduction, index) {
  68. var tableau = latraduction.match(/\[.*?\]/g)
  69. if (tableau != null) {
  70. tableau.forEach(function (t) {
  71. t = t.replace('u:', 'v')
  72. var t1 = pinyinizer.pinyinize(t)
  73. console.log(t, '-', t1)
  74. latraduction = latraduction.replace(t, t1)
  75. latraduction = latraduction.replace('5', '')
  76. console.log(latraduction)
  77. ligne.translations[index] = latraduction;
  78. })
  79. }
  80. })
  81. var ele = feed.ele({ligne});
  82. resetLigne()
  83. }
  84. }
  85. });
  86. rl.on('close', () => {
  87. // on écrit le fichier xml
  88. fs.writeFileSync(fichierXml, feed.end({pretty: true}));
  89. console.log('Done reading file');
  90. });