h2xml.js 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. const fs = require('fs');
  2. const readline = require('readline');
  3. // librairie de traitement du pinyin
  4. const pinyinizer = require('pinyinizer')
  5. var stringify = require('json-stringify');
  6. // librairie de gestion des fichiers xml
  7. const builder = require('xmlbuilder');
  8. // les noms de fichiers
  9. var fichierXml = 'hd.xml';
  10. // on crée le fichier xml
  11. var feed = builder.create('Handedict', {
  12. version: '1.0',
  13. encoding: 'UTF-8',
  14. standalone: true
  15. });
  16. // utilitaires
  17. var tab = '\t'
  18. var endLine = '\n'
  19. var ligne = {
  20. hanzi: '',
  21. traditional: '',
  22. pinyin: '',
  23. translations: []
  24. }
  25. function resetLigne () {
  26. ligne.hanzi = ''
  27. ligne.traditional = ''
  28. ligne.pinyin = ''
  29. ligne.translations = []
  30. }
  31. var outputFile = fichierXml;
  32. const rl = readline.createInterface({
  33. input: fs.createReadStream('handedict.u8')
  34. });
  35. // Each new line emits an event - every time the stream receives \r, \n, or \r\n
  36. rl.on('line', (line) => {
  37. //console.log(line);
  38. if (line[0] != '#') {
  39. if (line.length > 2) {
  40. chinois = line.split(' ', 2)
  41. // console.log(chinois[0], " ## ", chinois[1]);
  42. ligne.traditional = chinois[0]
  43. ligne.hanzi = chinois[1]
  44. i = line.search(/\[/)
  45. j = line.search(/\]/)
  46. phonetic = line.substr(i + 1, j - i - 1)
  47. // traitement des champs pinyin avec des chiffres
  48. phonetic = phonetic.toLowerCase()
  49. // on va tester ça ...
  50. try {
  51. var sauve = phonetic
  52. phonetic = pinyinizer.pinyinize(phonetic)
  53. phonetic = phonetic.replace(/5/g, '')
  54. } catch (err) {
  55. console.log('Erreur: ' + pinyin)
  56. phonetic = sauve
  57. }
  58. ligne.pinyin = phonetic
  59. // console.log(phonetic);
  60. ligne.translations = line.split(/\//g)
  61. //var l = ligne.translations.length
  62. ligne.translations.pop()
  63. ligne.translations.shift()
  64. var temp = ligne.translations.join(' / ')
  65. // console.log(l);
  66. // console.log(ligne.translations);
  67. // console.log(JSON.stringify(ligne));
  68. var ele = feed.ele({ligne});
  69. resetLigne()
  70. }
  71. }
  72. });
  73. rl.on('close', () => {
  74. // on écrit le fichier xml
  75. fs.writeFileSync(fichierXml, feed.end({pretty: true}));
  76. console.log('Done reading file');
  77. });