""" Not a script Don't use that script This python file store function related to bz2 python module and then the on the fly method of decompression. """ import bz2 import sys def unbz2(file): decomp = bz2.BZ2Decompressor() buf = b'' for c in file: buf += decomp.decompress(c) while b'\n' in buf: i = buf.index(b'\n') if i + 1 < len(buf): ret = buf[:i + 1] buf = buf[i + 1:] yield ret.decode("utf-8") else: yield buf buf = b'' with open('./wiktionary_dump.xml.bz2', 'rb') as f: it = iter(lambda: f.read(32768), b'') for a in unbz2(it): print(a, end='')