From 7e1d9e251b517153db8b639133c9e3bee266ce1b Mon Sep 17 00:00:00 2001 From: ache Date: Sun, 3 Oct 2021 02:31:32 +0200 Subject: Rename files --- dfr/msgp2sqlite.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 dfr/msgp2sqlite.py (limited to 'dfr/msgp2sqlite.py') diff --git a/dfr/msgp2sqlite.py b/dfr/msgp2sqlite.py new file mode 100644 index 0000000..c08efdb --- /dev/null +++ b/dfr/msgp2sqlite.py @@ -0,0 +1,59 @@ +import msgpack +import os +import sys + +import sqlite3 +import argparse + + +def writeDB(outputF, data): + # Delete if exists + try: + os.remove(outputF) + except OSError: + pass + + with sqlite3.connect(outputF) as con: + cur = con.cursor() + cur.execute('''CREATE TABLE IF NOT EXISTS entry ( + word TEXT, + cat_gram TEXT, + API TEXT, + infos TEXT, + genre TEXT, + accord TEXT, + defs BLOG, + ID INTEGER PRIMARY KEY)''') + con.commit() + + for w, listW in data.items(): + for word in listW: + data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']), + word['genre'], word['accord'], + msgpack.packb(word['def'])) + cur.execute('''INSERT INTO entry (word, cat_gram, API, infos, + genre, accord, defs) VALUES (?, ?, ?, ?, ?, ?, ?)''', data) + con.commit() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='wiktionary dump msgpack ' + 'to SQLite database file') + parser.add_argument('-o', '--out', dest='outputF', action='store', + help='the output filename') + parser.add_argument('-i', '--input', dest='inputF', action='store', + help='the input filename, a dump of witionary') + + arg = parser.parse_args() + + if arg.inputF is None: + print('Error input file needed', file=sys.stderr) + if arg.outputF is None: + print('Error output file needed', file=sys.stderr) + + with open(arg.inputF, 'rb') as f: + r = f.read() + + d = p = msgpack.unpackb(r, raw=False) + del r + writeDB(arg.outputF, d) -- cgit v1.2.3