diff options
author | ache <ache@ache.one> | 2021-10-03 02:31:32 +0200 |
---|---|---|
committer | ache <ache@ache.one> | 2021-10-03 02:31:54 +0200 |
commit | 7e1d9e251b517153db8b639133c9e3bee266ce1b (patch) | |
tree | c1e02297807107096cf5a8962ed51342a69f0626 /download/bz2toDB.py | |
parent | Command every scripts (diff) |
Rename files
Diffstat (limited to 'download/bz2toDB.py')
-rw-r--r-- | download/bz2toDB.py | 35 |
1 files changed, 0 insertions, 35 deletions
diff --git a/download/bz2toDB.py b/download/bz2toDB.py deleted file mode 100644 index a0c2cd3..0000000 --- a/download/bz2toDB.py +++ /dev/null @@ -1,35 +0,0 @@ -""" Not a script - -Don't use that script - - -This python file store function related to bz2 python module and then the -on the fly method of decompression. - -""" - -import bz2 -import sys - - -def unbz2(file): - decomp = bz2.BZ2Decompressor() - buf = b'' - for c in file: - buf += decomp.decompress(c) - - while b'\n' in buf: - i = buf.index(b'\n') - if i + 1 < len(buf): - ret = buf[:i + 1] - buf = buf[i + 1:] - yield ret.decode("utf-8") - else: - yield buf - buf = b'' - - -with open('./wiktionary_dump.xml.bz2', 'rb') as f: - it = iter(lambda: f.read(32768), b'') - for a in unbz2(it): - print(a, end='') |