const fs = require('fs'); const readline = require('readline'); // librairie de traitement du pinyin const pinyinizer = require('pinyinizer') var stringify = require('json-stringify'); // utilitaires var tab = '\t' var endLine = '\n' var ligne = { hanzi: '', traditional: '', pinyin: '', translations: [] origin: '' } function resetLigne () { ligne.hanzi = '' ligne.traditional = '' ligne.pinyin = '' ligne.translations = [] ligne.origin = '' } var outputFile = 'ch.json'; const rl = readline.createInterface({ input: fs.createReadStream('chdict.u8') }); // Each new line emits an event - every time the stream receives \r, \n, or \r\n rl.on('line', (line) => { //console.log(line); if (line[0] != '#') { if (line.length > 2) { chinois = line.split(' ', 2) // console.log(chinois[0], " ## ", chinois[1]); ligne.traditional = chinois[0] ligne.hanzi = chinois[1] ligne.origin = 'Chdict' i = line.search(/\[/) j = line.search(/\]/) phonetic = line.substr(i + 1, j - i - 1) // traitement des champs pinyin avec des chiffres phonetic = phonetic.toLowerCase() // on va tester ça ... try { var sauve = phonetic phonetic = pinyinizer.pinyinize(phonetic) phonetic = phonetic.replace(/5/g, '') } catch (err) { console.log('Erreur: ' + pinyin) phonetic = sauve } ligne.pinyin = phonetic // console.log(phonetic); ligne.translations = line.split(/\//g) ligne.translations.pop() ligne.translations.shift() ligne.translations.forEach(function (latraduction, index) { var tableau = latraduction.match(/\[.*?\]/g) if (tableau != null) { tableau.forEach(function (t) { t = t.replace('u:', 'v') var t1 = pinyinizer.pinyinize(t) console.log(t, '-', t1) latraduction = latraduction.replace(t, t1) latraduction = latraduction.replace('5', '') console.log(latraduction) ligne.translations[index] = latraduction; }) } }) fs.appendFileSync(outputFile, stringify(ligne, null, 2, {offset: 4}) + ',\n'); resetLigne() } } }); rl.on('close', () => { fs.appendFileSync(outputFile, '{}]}'); console.log('Done reading file'); });