123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- var fs = require("fs");
- var readline = require("readline");
- var Stream = require("stream");
- // librairie de traitement du pinyin
- const pinyinizer = require("pinyinizer");
- // utilitaires
- var tab = "\t";
- var endLine = "\n";
- var ligne = {
- hanzi: "",
- traditional: "",
- pinyin: "",
- translations: []
- };
- function resetLigne() {
- ligne.hanzi = "";
- ligne.traditional = "";
- ligne.pinyin = "";
- ligne.translations = [];
- }
- // la ligne CSV qu'on va écrire dans le fichier CSV
- var ligneCSV = "";
- function readFileLineByLine(inputFile, outputFile) {
- var instream = fs.createReadStream(inputFile);
- var outstream = new Stream();
- outstream.readable = true;
- outstream.writable = true;
- var rl = readline.createInterface({
- input: instream,
- output: outstream,
- terminal: false
- });
- rl.on("line", function(line) {
- if (line[0] != "#") {
- chinois = line.split(" ", 2);
- // console.log(chinois[0], " ## ", chinois[1]);
- ligne.traditional = chinois[0];
- ligne.hanzi = chinois[1];
- i = line.search(/\[/);
- j = line.search(/\]/);
- phonetic = line.substr(i + 1, j - i - 1);
- // traitement des champs pinyin avec des chiffres
- phonetic = phonetic.toLowerCase();
- // on va tester ça ...
- try {
- var sauve = phonetic;
- phonetic = pinyinizer.pinyinize(phonetic);
- phonetic = phonetic.replace(/5/g, "");
- } catch (err) {
- console.log("Erreur: " + pinyin);
- phonetic = sauve;
- }
- ligne.pinyin = phonetic;
- // console.log(phonetic);
- // mettre en pinyin accentué les parties entre crochets
- ligne.translations = line.split(/\//g);
- var l = ligne.translations.length;
- ligne.translations.pop();
- ligne.translations.shift();
- var temp = ligne.translations.join(" / ");
- var tableau = temp.match(/\[.*?\]/g);
- if (tableau != null) {
- tableau.forEach(function(t) {
- t = t.replace("u:","v");
- var t1 = pinyinizer.pinyinize(t);
- //console.log(t, "-", t1);
- temp = temp.replace(t, t1);
- temp = temp.replace("5","");
- //console.log(temp);
- });
- }
- // console.log(l);
- // console.log(ligne.translations);
- // console.log(temp);
- ligneCSV =
- ligne.hanzi +
- tab +
- ligne.traditional +
- tab +
- ligne.pinyin +
- tab +
- temp +
- endLine;
- console.log(ligneCSV);
- fs.appendFileSync(outputFile, ligneCSV);
- resetLigne();
- }
- });
- }
- readFileLineByLine("cedict.txt", "cedict.csv");
|