diff options
Diffstat (limited to 'download/bz2toDB.py')
-rw-r--r-- | download/bz2toDB.py | 35 |
1 files changed, 0 insertions, 35 deletions
diff --git a/download/bz2toDB.py b/download/bz2toDB.py deleted file mode 100644 index a0c2cd3..0000000 --- a/download/bz2toDB.py +++ /dev/null @@ -1,35 +0,0 @@ -""" Not a script - -Don't use that script - - -This python file store function related to bz2 python module and then the -on the fly method of decompression. - -""" - -import bz2 -import sys - - -def unbz2(file): - decomp = bz2.BZ2Decompressor() - buf = b'' - for c in file: - buf += decomp.decompress(c) - - while b'\n' in buf: - i = buf.index(b'\n') - if i + 1 < len(buf): - ret = buf[:i + 1] - buf = buf[i + 1:] - yield ret.decode("utf-8") - else: - yield buf - buf = b'' - - -with open('./wiktionary_dump.xml.bz2', 'rb') as f: - it = iter(lambda: f.read(32768), b'') - for a in unbz2(it): - print(a, end='') |