h2cvs.js 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. const fs = require('fs');
  2. const readline = require('readline');
  3. // librairie de traitement du pinyin
  4. const pinyinizer = require('pinyinizer')
  5. var stringify = require('json-stringify');
  6. // utilitaires
  7. var tab = '\t'
  8. var endLine = '\n'
  9. var ligne = {
  10. hanzi: '',
  11. traditional: '',
  12. pinyin: '',
  13. translations: []
  14. }
  15. function resetLigne () {
  16. ligne.hanzi = ''
  17. ligne.traditional = ''
  18. ligne.pinyin = ''
  19. ligne.translations = []
  20. ligne.origin = ''
  21. }
  22. var outputFile = 'hd.csv';
  23. const rl = readline.createInterface({
  24. input: fs.createReadStream('handedict.u8')
  25. });
  26. // Each new line emits an event - every time the stream receives \r, \n, or \r\n
  27. rl.on('line', (line) => {
  28. //console.log(line);
  29. if (line[0] != '#') {
  30. if (line.length > 2) {
  31. chinois = line.split(' ', 2)
  32. // console.log(chinois[0], " ## ", chinois[1]);
  33. ligne.traditional = chinois[0]
  34. ligne.hanzi = chinois[1]
  35. ligne.origin = 'Handedict'
  36. i = line.search(/\[/)
  37. j = line.search(/\]/)
  38. phonetic = line.substr(i + 1, j - i - 1)
  39. // traitement des champs pinyin avec des chiffres
  40. phonetic = phonetic.toLowerCase()
  41. // on va tester ça ...
  42. try {
  43. var sauve = phonetic
  44. phonetic = pinyinizer.pinyinize(phonetic)
  45. phonetic = phonetic.replace(/5/g, '')
  46. } catch (err) {
  47. console.log('Erreur: ' + pinyin)
  48. phonetic = sauve
  49. }
  50. ligne.pinyin = phonetic
  51. // console.log(phonetic);
  52. ligne.translations = line.split(/\//g)
  53. //var l = ligne.translations.length
  54. ligne.translations.pop()
  55. ligne.translations.shift()
  56. var temp = ligne.translations.join(' / ')
  57. // console.log(l);
  58. // console.log(ligne.translations);
  59. // console.log(JSON.stringify(ligne));
  60. ligneCSV =
  61. ligne.hanzi +
  62. tab +
  63. ligne.traditional +
  64. tab +
  65. ligne.pinyin +
  66. tab +
  67. temp +
  68. tab +
  69. ligne.origin +
  70. tab +
  71. endLine
  72. fs.appendFileSync(outputFile, ligneCSV)
  73. resetLigne()
  74. }
  75. }
  76. });
  77. rl.on('close', () => {
  78. console.log('Done reading file');
  79. });