12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- var fs = require('fs')
- var readline = require('readline')
- var Stream = require('stream')
- // librairie de traitement du pinyin
- const pinyinizer = require('pinyinizer')
- // utilitaires
- var tab = '\t'
- var endLine = '\n'
- var ligne = {
- hanzi: '',
- traditional: '',
- pinyin: '',
- translations: []
- }
- function resetLigne () {
- ligne.hanzi = ''
- ligne.traditional = ''
- ligne.pinyin = ''
- ligne.translations = []
- }
- // la ligne CSV qu'on va écrire dans le fichier CSV
- var ligneCSV = ''
- function readFileLineByLine (inputFile, outputFile) {
- var instream = fs.createReadStream(inputFile)
- var outstream = new Stream()
- outstream.readable = true
- outstream.writable = true
- var rl = readline.createInterface({
- input: instream,
- output: outstream,
- terminal: false
- })
- rl.on('line', function (line) {
- if (line[0] != '#') {
- if (line.length > 2) {
- chinois = line.split(' ', 2)
- // console.log(chinois[0], " ## ", chinois[1]);
- ligne.traditional = chinois[0]
- ligne.hanzi = chinois[1]
- i = line.search(/\[/)
- j = line.search(/\]/)
- phonetic = line.substr(i + 1, j - i - 1)
- // traitement des champs pinyin avec des chiffres
- phonetic = phonetic.toLowerCase()
- // on va tester ça ...
- try {
- var sauve = phonetic
- phonetic = pinyinizer.pinyinize(phonetic)
- phonetic = phonetic.replace(/5/g, '')
- } catch (err) {
- console.log('Erreur: ' + pinyin)
- phonetic = sauve
- }
- ligne.pinyin = phonetic
- // console.log(phonetic);
- ligne.translations = line.split(/\//g)
- var l = ligne.translations.length
- ligne.translations.pop()
- ligne.translations.shift()
- var temp = ligne.translations.join(' / ')
- // console.log(l);
- // console.log(ligne.translations);
- ligneCSV =
- ligne.hanzi +
- tab +
- ligne.traditional +
- tab +
- ligne.pinyin +
- tab +
- temp +
- endLine
- resetLigne()
- fs.appendFileSync(outputFile, ligneCSV)
- }
- }
- })
- }
- readFileLineByLine('chdict.u8', 'chdict.csv')
|