123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- const fs = require('fs');
- const readline = require('readline');
- // librairie de traitement du pinyin
- const pinyinizer = require('pinyinizer')
- var stringify = require('json-stringify');
- // librairie de gestion des fichiers xml
- const builder = require('xmlbuilder');
- // les noms de fichiers
- var fichierXml = 'ch.xml';
- // on crée le fichier xml
- var feed = builder.create('Chdict', {
- version: '1.0',
- encoding: 'UTF-8',
- standalone: true
- });
- // utilitaires
- var tab = '\t'
- var endLine = '\n'
- var ligne = {
- hanzi: '',
- traditional: '',
- pinyin: '',
- translations: [],
- origin: ''
- }
- function resetLigne () {
- ligne.hanzi = ''
- ligne.traditional = ''
- ligne.pinyin = ''
- ligne.translations = []
- ligne.origin = 'Chdict'
- }
- var outputFile = fichierXml;
- const rl = readline.createInterface({
- input: fs.createReadStream('chdict.u8')
- });
- // Each new line emits an event - every time the stream receives \r, \n, or \r\n
- rl.on('line', (line) => {
- //console.log(line);
- if (line[0] != '#') {
- if (line.length > 2) {
- chinois = line.split(' ', 2)
- // console.log(chinois[0], " ## ", chinois[1]);
- ligne.traditional = chinois[0]
- ligne.hanzi = chinois[1]
- i = line.search(/\[/)
- j = line.search(/\]/)
- phonetic = line.substr(i + 1, j - i - 1)
- // traitement des champs pinyin avec des chiffres
- phonetic = phonetic.toLowerCase()
- // on va tester ça ...
- try {
- var sauve = phonetic
- phonetic = pinyinizer.pinyinize(phonetic)
- phonetic = phonetic.replace(/5/g, '')
- } catch (err) {
- console.log('Erreur: ' + pinyin)
- phonetic = sauve
- }
- ligne.pinyin = phonetic
- // console.log(phonetic);
- ligne.translations = line.split(/\//g)
- //var l = ligne.translations.length
- ligne.translations.pop()
- ligne.translations.shift()
- var temp = ligne.translations.join(' / ')
- ligne.translations.forEach(function (latraduction, index) {
- var tableau = latraduction.match(/\[.*?\]/g)
- if (tableau != null) {
- tableau.forEach(function (t) {
- t = t.replace('u:', 'v')
- var t1 = pinyinizer.pinyinize(t)
- console.log(t, '-', t1)
- latraduction = latraduction.replace(t, t1)
- latraduction = latraduction.replace('5', '')
- console.log(latraduction)
- ligne.translations[index] = latraduction;
- })
- }
- })
- var ele = feed.ele({ligne});
- resetLigne()
- }
- }
- });
- rl.on('close', () => {
- // on écrit le fichier xml
- fs.writeFileSync(fichierXml, feed.end({pretty: true}));
- console.log('Done reading file');
- });
|