h2json.js 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. const fs = require('fs');
  2. const readline = require('readline');
  3. // librairie de traitement du pinyin
  4. const pinyinizer = require('pinyinizer')
  5. var stringify = require('json-stringify');
  6. // utilitaires
  7. var tab = '\t'
  8. var endLine = '\n'
  9. var ligne = {
  10. hanzi: '',
  11. traditional: '',
  12. pinyin: '',
  13. translations: []
  14. }
  15. function resetLigne () {
  16. ligne.hanzi = ''
  17. ligne.traditional = ''
  18. ligne.pinyin = ''
  19. ligne.translations = []
  20. ligne.origin = ''
  21. }
  22. var outputFile = 'hd.json';
  23. const rl = readline.createInterface({
  24. input: fs.createReadStream('handedict.u8')
  25. });
  26. // Each new line emits an event - every time the stream receives \r, \n, or \r\n
  27. rl.on('line', (line) => {
  28. //console.log(line);
  29. if (line[0] != '#') {
  30. if (line.length > 2) {
  31. chinois = line.split(' ', 2)
  32. // console.log(chinois[0], " ## ", chinois[1]);
  33. ligne.traditional = chinois[0]
  34. ligne.hanzi = chinois[1]
  35. ligne.origin = 'Handedict'
  36. i = line.search(/\[/)
  37. j = line.search(/\]/)
  38. phonetic = line.substr(i + 1, j - i - 1)
  39. // traitement des champs pinyin avec des chiffres
  40. phonetic = phonetic.toLowerCase()
  41. // on va tester ça ...
  42. try {
  43. var sauve = phonetic
  44. phonetic = pinyinizer.pinyinize(phonetic)
  45. phonetic = phonetic.replace(/5/g, '')
  46. } catch (err) {
  47. console.log('Erreur: ' + pinyin)
  48. phonetic = sauve
  49. }
  50. ligne.pinyin = phonetic
  51. // console.log(phonetic);
  52. ligne.translations = line.split(/\//g)
  53. //var l = ligne.translations.length
  54. ligne.translations.pop()
  55. ligne.translations.shift()
  56. var temp = ligne.translations.join(' / ')
  57. // console.log(l);
  58. // console.log(ligne.translations);
  59. // console.log(JSON.stringify(ligne));
  60. fs.appendFileSync(outputFile, stringify(ligne, null, 2, {offset: 4}) + ',\n');
  61. resetLigne()
  62. }
  63. }
  64. });
  65. rl.on('close', () => {
  66. fs.appendFileSync(outputFile, '{}]}');
  67. console.log('Done reading file');
  68. });