const fs = require('fs'); const readline = require('readline'); // librairie de traitement du pinyin const pinyinizer = require('pinyinizer') var stringify = require('json-stringify'); // librairie de gestion des fichiers xml const builder = require('xmlbuilder'); // les noms de fichiers var fichierXml = 'hd.xml'; // on crée le fichier xml var feed = builder.create('Handedict', { version: '1.0', encoding: 'UTF-8', standalone: true }); // utilitaires var tab = '\t' var endLine = '\n' var ligne = { hanzi: '', traditional: '', pinyin: '', translations: [] } function resetLigne () { ligne.hanzi = '' ligne.traditional = '' ligne.pinyin = '' ligne.translations = [] } var outputFile = fichierXml; const rl = readline.createInterface({ input: fs.createReadStream('handedict.u8') }); // Each new line emits an event - every time the stream receives \r, \n, or \r\n rl.on('line', (line) => { //console.log(line); if (line[0] != '#') { if (line.length > 2) { chinois = line.split(' ', 2) // console.log(chinois[0], " ## ", chinois[1]); ligne.traditional = chinois[0] ligne.hanzi = chinois[1] i = line.search(/\[/) j = line.search(/\]/) phonetic = line.substr(i + 1, j - i - 1) // traitement des champs pinyin avec des chiffres phonetic = phonetic.toLowerCase() // on va tester ça ... try { var sauve = phonetic phonetic = pinyinizer.pinyinize(phonetic) phonetic = phonetic.replace(/5/g, '') } catch (err) { console.log('Erreur: ' + pinyin) phonetic = sauve } ligne.pinyin = phonetic // console.log(phonetic); ligne.translations = line.split(/\//g) //var l = ligne.translations.length ligne.translations.pop() ligne.translations.shift() var temp = ligne.translations.join(' / ') // console.log(l); // console.log(ligne.translations); // console.log(JSON.stringify(ligne)); var ele = feed.ele({ligne}); resetLigne() } } }); rl.on('close', () => { // on écrit le fichier xml fs.writeFileSync(fichierXml, feed.end({pretty: true})); console.log('Done reading file'); });