aboutsummaryrefslogtreecommitdiff
path: root/dfr/msgp2sqlite.py
blob: c08efdb914a13b13d6f125d9570de35397186712 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import msgpack
import os
import sys

import sqlite3
import argparse


def writeDB(outputF, data):
    # Delete if exists
    try:
        os.remove(outputF)
    except OSError:
        pass

    with sqlite3.connect(outputF) as con:
        cur = con.cursor()
        cur.execute('''CREATE TABLE IF NOT EXISTS entry (
            word TEXT,
            cat_gram TEXT,
            API TEXT,
            infos TEXT,
            genre TEXT,
            accord TEXT,
            defs BLOG,
            ID INTEGER PRIMARY KEY)''')
        con.commit()

        for w, listW in data.items():
            for word in listW:
                data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']),
                        word['genre'], word['accord'],
                        msgpack.packb(word['def']))
                cur.execute('''INSERT INTO entry (word, cat_gram, API, infos,
                genre, accord, defs) VALUES (?, ?, ?, ?,  ?, ?,  ?)''', data)
        con.commit()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='wiktionary dump msgpack '
                                     'to SQLite database file')
    parser.add_argument('-o', '--out', dest='outputF', action='store',
                        help='the output filename')
    parser.add_argument('-i', '--input', dest='inputF', action='store',
                        help='the input filename, a dump of witionary')

    arg = parser.parse_args()

    if arg.inputF is None:
        print('Error input file needed', file=sys.stderr)
    if arg.outputF is None:
        print('Error output file needed', file=sys.stderr)

    with open(arg.inputF, 'rb') as f:
        r = f.read()

    d = p = msgpack.unpackb(r, raw=False)
    del r
    writeDB(arg.outputF, d)