var fs = require('fs') var readline = require('readline') var Stream = require('stream') // librairie de traitement du pinyin const pinyinizer = require('pinyinizer') // utilitaires var tab = '\t' var endLine = '\n' var ligne = { hanzi: '', traditional: '', pinyin: '', translations: [] } function resetLigne () { ligne.hanzi = '' ligne.traditional = '' ligne.pinyin = '' ligne.translations = [] } // la ligne CSV qu'on va écrire dans le fichier CSV var ligneCSV = '' function readFileLineByLine (inputFile, outputFile) { var instream = fs.createReadStream(inputFile) var outstream = new Stream() outstream.readable = true outstream.writable = true var rl = readline.createInterface({ input: instream, output: outstream, terminal: false }) rl.on('line', function (line) { if (line[0] != '#') { if (line.length > 2) { chinois = line.split(' ', 2) // console.log(chinois[0], " ## ", chinois[1]); ligne.traditional = chinois[0] ligne.hanzi = chinois[1] i = line.search(/\[/) j = line.search(/\]/) phonetic = line.substr(i + 1, j - i - 1) // traitement des champs pinyin avec des chiffres phonetic = phonetic.toLowerCase() // on va tester ça ... try { var sauve = phonetic phonetic = pinyinizer.pinyinize(phonetic) phonetic = phonetic.replace(/5/g, '') } catch (err) { console.log('Erreur: ' + pinyin) phonetic = sauve } ligne.pinyin = phonetic // console.log(phonetic); ligne.translations = line.split(/\//g) var l = ligne.translations.length ligne.translations.pop() ligne.translations.shift() var temp = ligne.translations.join(' / ') // console.log(l); // console.log(ligne.translations); ligneCSV = ligne.hanzi + tab + ligne.traditional + tab + ligne.pinyin + tab + temp + endLine resetLigne() fs.appendFileSync(outputFile, ligneCSV) } } }) } readFileLineByLine('chdict.u8', 'chdict.csv')