diff options
Diffstat (limited to 'download/msgPack2sqlite_msgPack.py')
-rw-r--r-- | download/msgPack2sqlite_msgPack.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/download/msgPack2sqlite_msgPack.py b/download/msgPack2sqlite_msgPack.py new file mode 100644 index 0000000..38d34cd --- /dev/null +++ b/download/msgPack2sqlite_msgPack.py @@ -0,0 +1,52 @@ +import msgpack +import sys + +import sqlite3 +import argparse + + +def writeDB(outputF, data): + with sqlite3.connect(outputF) as con: + cur = con.cursor() + cur.execute('''CREATE TABLE IF NOT EXISTS entry ( + word TEXT, + cat_gram TEXT, + API TEXT, + infos TEXT, + genre TEXT, + accord TEXT, + defs BLOG, + ID INTEGER PRIMARY KEY)''') + con.commit() + + for w, listW in data.items(): + for word in listW: + data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']), + word['genre'], word['accord'], + msgpack.packb(word['def'])) + cur.execute('''INSERT INTO entry (word, cat_gram, API, infos, + genre, accord, defs) VALUES (?, ?, ?, ?, ?, ?, ?)''', data) + con.commit() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='wiktionary dump msgpack ' + 'to SQLite database file') + parser.add_argument('-o', '--out', dest='outputF', action='store', + help='the output filename') + parser.add_argument('-i', '--input', dest='inputF', action='store', + help='the input filename, a dump of witionary') + + arg = parser.parse_args() + + if arg.inputF is None: + print('Error input file needed', file=sys.stderr) + if arg.outputF is None: + print('Error output file needed', file=sys.stderr) + + with open(arg.inputF, 'rb') as f: + r = f.read() + + d = p = msgpack.unpackb(r, raw=False) + del r + writeDB(arg.outputF, d) |