index.php 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. <?php
  2. require('vendor/autoload.php');
  3. require('config.php');
  4. use CcCedict\Entry;
  5. use CcCedict\Parser;
  6. use CcCedict\Unpacker;
  7. // UNPACKING
  8. // file comes from http://www.mdbg.net/chindict/chindict.php?page=cc-cedict
  9. // either zipped or gzipped - we need to unpack it
  10. $unpacker = new Unpacker();
  11. // optionally set a directory for the Unpacker to unpack into
  12. // $unpacker->setTempDirectory('/tmp');
  13. // tell Unpacker the file to operate on
  14. $unpacker->setInputFile(__DIR__ . '/cedict.gz');
  15. // do the unpack, and tell us where to find the uncompressed file
  16. $filePath = $unpacker->unpack();
  17. // PARSING
  18. // now we can parse it
  19. $parser = new Parser();
  20. // optionally, set options
  21. $parser->setOptions([
  22. Entry::F_ORIGINAL,
  23. Entry::F_SIMPLIFIED,
  24. Entry::F_TRADITIONAL,
  25. Entry::F_PINYIN_DIACRITIC,
  26. Entry::F_PINYIN_DIACRITIC_EXPANDED,
  27. Entry::F_ENGLISH_EXPANDED,
  28. ]);
  29. // tell the parser where the uncompressed data is
  30. $parser->setFilePath($filePath);
  31. // tell the parser how much data it should read at a time
  32. $parser->setBlockSize(50);
  33. // tell the parser where to begin
  34. $parser->setStartLine(0);
  35. // tell the parser how many blocks to get
  36. // this is really optional because you could achieve
  37. // the same with a combination of setBlockSize() and setStartLine())
  38. $parser->setNumberOfBlocks(INF);
  39. // do the parse
  40. foreach ($parser->parse() as $output) {
  41. print_r($output);
  42. }
  43. // remove the temporary file
  44. $unpacker->removeOutputFile();