From ac7c2e5d071151f69872f8e97dac414e41976168 Mon Sep 17 00:00:00 2001
From: ache <ache@ache.one>
Date: Tue, 16 Jun 2020 17:37:32 +0200
Subject: Documentation README

---
 README.md                 | 102 +++++++++++++++
 dicofr.py                 |  17 ++-
 dump2msgp.py              | 309 ++++++++++++++++++++++++++++++++++++++++++++++
 main.py                   | 309 ----------------------------------------------
 msgPack2sqlite_msgPack.py |  73 ++++++-----
 5 files changed, 467 insertions(+), 343 deletions(-)
 create mode 100644 README.md
 create mode 100644 dump2msgp.py
 delete mode 100644 main.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6d9a041
--- /dev/null
+++ b/README.md
@@ -0,0 +1,102 @@
+Dicofr
+======
+
+An utility to create and query a French dictionary based on [Wiktionary archive dump](https://dumps.wikimedia.org/frwiktionary/20200601/).
+
+
+Technically
+----------
+
+A bunch of **Python** scripts to transform wiktionary archive dump to MySQL database file.
+
+ - With a simple CLI.
+ - With a simple WUI, flask based.
+ - Regex support.
+ - 
+
+How to create the database
+--------------------------
+
+First you have to download a wiktionary archive file.
+For example the file `frwiktionary-20200601-pages-articles.xml.bz2` witch is a full dump of the current version of every pages.
+
+For now you have to decompress it completely before you can treat it.
+The use of [bz2](https://docs.python.org/3/library/bz2.html) may be considered in the future to make this step optional and thus reduce disk usage.
+
+~~~shell
+$ bunzip2 frwiktionary-20200601-pages-articles.xml.bz2
+~~~
+
+Then, you will create an intermediary file, a msgpack file, of every data of wiktionary.
+This file is interesting for developers not for end users.
+It's a serialization of the internal used dictionary (python dictionary). 
+
+~~~shell
+$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml.bz2
+~~~
+
+Then, you can create the SQLite database file.
+~~~shell
+$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk -o dicofr.db
+~~~
+
+You can then use `dicofr.py` to search a word from the CLI or use the WUI with the command:
+~~~shell
+$ python web.py
+~~~
+
+How to use it
+-------------
+
+You can use the CLI.
+
+~~~shell
+$ dicofr -h
+usage: dicofr [-h] [--sql] [--matching] PATTERN
+
+Get a french word's definition.
+
+positional arguments:
+  PATTERN     the word or the pattern to match
+
+optional arguments:
+  -h, --help  show this help message and exit
+  --sql       search a definition using SQL regex, _ to match a letter, % to match a group of letters
+  --matching  search the french words that match the regex
+~~~
+
+For example
+
+~~~shell
+$ dicofr julien
+   julien
+   /ʒy.ljɛ̃/, adjectif
+   	(Chronologie) Qui est lié à Jules César et à sa décision d’instaurer l’alternance entre trois années de trois cent soixante-cinq jours et une année bissextile de trois cent soixante-six jours.
+   		 * Calendrier *julien*.
+   		 * Année *julienne*.
+   		 * Correction *julienne*.
+~~~
+
+~~~shell
+$ dicofr -m /julien/
+julienois
+juliennette
+juliennoises
+juliennes
+julien
+julienne
+julienoises
+juliennettes
+juliennoise
+julienoise
+juliennois
+juliens
+~~~
+
+How to contribute ?
+-------------------
+
+This project is free, you are free to send me a PR to improove this software.
+Respect each other is the only rule.
+
+License: MIT like.
diff --git a/dicofr.py b/dicofr.py
index ea5741c..9bef7ac 100755
--- a/dicofr.py
+++ b/dicofr.py
@@ -12,7 +12,7 @@ sys.path.insert(-1, DIR_PATH)
 
 import ui
 
-dico = 'wiktfr.sql'
+dico = 'dicofr.db'
 
 
 def get_def_sql(word):
@@ -56,7 +56,11 @@ def matching(word):
 
     matchingWord = []
 
-    with open('list_word.msgpk', 'rb') as f:
+    if not exists(arg.wordList):
+        print(f'Error: Word list {arg.wordList} not found', file=sys.stderr)
+        return
+
+    with open(arg.wordList, 'rb') as f:
         msgpackList = f.read()
         listWord = msgpack.unpackb(msgpackList, raw=False)
 
@@ -81,8 +85,9 @@ def matching(word):
 
 if __name__ == '__main__':
     if len(sys.argv) < 2:
-      print("Erreur: Rechercher un mot", file=sys.stderr)
-      exit()
+      print("Erreur: Rechercher un mot\nUtilisez l'option -h pour avoir de l'aide",
+            file=sys.stderr)
+      exit(-1)
 
     # Si on n'arrive pas à trouver le dictionnaire
     if not exists(dico):
@@ -97,6 +102,8 @@ if __name__ == '__main__':
                         const=get_def_sql_reg, default=get_def_sql,
                         help='search a definition using SQL regex, '
                              '_ to match a letter, %% to match a group of letters')
+    parser.add_argument('-w', '--wordlist', dest='wordList',
+                        action='store_const', default='list_word.msgpack')
     parser.add_argument('-m', '--matching', dest='matching', action='store_true',
                         help='search the french words that match the regex')
     parser.add_argument('word', metavar='PATTERN', type=str,
@@ -113,6 +120,4 @@ if __name__ == '__main__':
     else:
         for w in arg.action(arg.word):
             ui.show_terminal(w)
-        if not ret:
-            exit(1)
 
diff --git a/dump2msgp.py b/dump2msgp.py
new file mode 100644
index 0000000..d4fb050
--- /dev/null
+++ b/dump2msgp.py
@@ -0,0 +1,309 @@
+import tempfile as tmp
+import re
+import sys
+import msgpack
+
+from listSection import listInfoSection
+from template import template
+
+
+"""
+
+Extract words from the Wiktionnary archive
+
+"""
+
+template_second = ['link', 'bd', 'pc', 'nom w pc', 'w', 'smcp', 'lien', 'ws',
+                   'in', 'siècle2', 'fchim', 'nobr', 'wp', 'r',
+                   'clé de tri', 'contexte', 'emploi', 'l', 'polytonique',
+                   'pron-API', 'registre', 'scmp', 'siècle', 'x',
+                   ]
+
+
+template_second_lambda_trd = {
+    'refnec': (lambda x: '(Référence nécessaire : ' + x + ')'),
+    'refnéc': (lambda x: '(Référence nécessaire : ' + x + ')'),
+}
+
+template_second_lambda_snd = {
+    'term': (lambda x: '(' + x.title() + ')'),
+    'terme': (lambda x: '(' + x.title() + ')'),
+    'ex': (lambda x: '^{' + x if x else 'e' + '}'),
+    'exp': (lambda x: '^{' + x if x else 'e' + '}'),
+    'e': (lambda x: '^{' + x if x else 'e' + '}'),
+    'er': (lambda x: '^{' + x if x else 'er' + '}'),
+    'ère': (lambda x: '^{' + x if x else 'ère' + '}'),
+    'ème': (lambda x: '^{' + x if x else 'ème' + '}'),
+    'Ier': (lambda x: '^{' + x if x else 'Ier' + '}'),
+    'III': (lambda x: '^{' + x if x else 'III' + '}'),
+    'III': (lambda x: '^{' + x if x else 'III' + '}'),
+    'small': (lambda x: '_{' + x if x else '' + '}'),
+    'indice': (lambda x: '_{' + x if x else '' + '}'),
+    'graphie': (lambda x: '«' + x if x else '»'),
+    'petites capitales': (lambda x: x.upper()),
+    'isbn': (lambda x: 'cf. ISBN ' + x),
+    'OCLC': (lambda x: 'cf. OCLC ' + x),
+    'variante de': (lambda x: 'Variante de ' + x),
+    'variante  de': (lambda x: 'Variante de ' + x),
+    'variante ortho de': (lambda x: 'Variante orthographique de ' + x),
+    'variante  ortho de': (lambda x: 'Variante orthographique de ' + x),
+    'variante ortho  de': (lambda x: 'Variante orthographique de ' + x),
+    'variante orthographique de': (lambda x: 'Variante orthographique de ' + x),
+    'sic !': (lambda x: '^{sic ' + x + '}'),
+    'sic': (lambda x: '^{sic ' + x + '}'),
+    'incise': (lambda x: '_' + x + '_'),
+    'n°': (lambda x: 'n°' + x),
+    'superlatif de': (lambda x: 'Superlatif de' + x),
+    'vérifier': (lambda x: '(À vérifier : ' + x + ')'),
+}
+
+dictMatch = {x['match']: i for (i, x) in enumerate(listInfoSection)}
+
+interdit = " :"
+
+
+def transclusion(trans, info):
+    trans = trans[2:-2]
+
+    while '{{' in trans:
+        l0 = trans.rfind('{{')
+        l1 = trans.find('}}', l0)
+        if l1 == -1:
+            break
+        else:
+            l1 += 2
+        t = trans[l0:l1]
+        t = transclusion(t, info)
+        trans = trans[:l0] + t + trans[l1:]
+
+    s = list(map(lambda x: x.strip(), trans.split('|')))
+    if s[0] in template:
+        return template[s[0]]
+
+    if s[0].lower() in template_second:
+
+        return s[1] if len(s) > 1 else title
+
+    if s[0].lower().startswith('citation'):
+        cit = s[0].split('/')
+        if len(cit) == 4:
+            return 'Par ' + cit[1] + ', ' + cit[2] + ', ' + cit[3]
+        if len(cit) == 3:
+            return 'Par ' + cit[1] + ', ' + cit[2]
+        if len(cit) == 5:
+            return cit[1] + '/' + cit[2] + '/' + cit[3] + ', ' + cit[4]
+        if len(cit) <= 2:
+            return ''
+        else:
+            return '/'.join(c[1:])
+
+    if s[0].lower() in template_second_lambda_snd:
+        return template_second_lambda_snd[s[0].lower()](s[1] if len(s) > 1 else '')
+
+    if s[0].lower() in template_second_lambda_trd:
+        return template_second_lambda_trd[s[0].lower()](s[2] if len(s) > 2 else '')
+
+    with open('wiki_err.log', 'a') as err:
+        print(s[0], file=err)
+#       print("Incompréhension de la transclusion {} du mot {}".format(trans,
+#             info['mot']), file=err)
+    return ''
+
+
+def extract(f, w):
+    infoFin = []
+
+    toRead = True
+    goBack = 0
+
+    while toRead:
+        toRead = False
+
+        info = {'mot': w,
+                'cat-gram': None,
+                'def': [],
+                'API': None,
+                'infos': [],
+                'genre': '',
+                'accord': None}
+
+        # State 0 // Initialisation !
+        while line := f.readline():
+
+            if line.startswith('=== ') or line.startswith('==={'):
+                if re.match('^=== *{{ *S\\|([^|]+)|.*$', line):
+                    try:
+                        r = re.match('^=== *{{ *S\\|([^|]+)|.*$', line)
+                        r = r.groups()
+                        nat = r[0].strip()
+                        if nat in dictMatch.keys():
+                            info['cat-gram'] = nat
+                            toRead = True
+                            break
+                    except e:
+                        with open('wiki_err.log', 'a') as err:
+                            print("^[1] Problème à l'initialisation du mot {}:"
+                                  " {}".format(info['mot'], e), file=err)
+                            print('line: [{}]'.format(line, e), file=err)
+                            e = sys.exc_info()[0]
+                            print("Erreur :", e, file=err)
+
+        if not toRead:
+            break
+
+        # State 1
+        while line := f.readline():
+            if line.startswith('{{fr-'):
+                e = line.find('}}')
+                if e == -1:
+                    continue
+                ex = line[:e]
+                try:
+                    infos = list(map(lambda x: x.strip(), ex.split('|')))
+                    info['infos'] = infos
+                    info['accord'] = infos[0]
+                    if len(infos) > 1:
+                        info['API'] = infos[1]
+                except e:
+                    err = sys.exc_info()[0]
+                    print(ex)
+                    print("Erreur :", e)
+                    print("Erreur :", err)
+            if line.rstrip().startswith("'''"):
+                if '{{pron' in line:
+                    p0 = line.find('{{pron')
+                    p1 = line.find('}}', p0)
+                    if p1 > 0:
+                        p1 += 2
+                        p = line[p0:p1]
+                        p = p.split('|')
+                        info['API'] = p[1]
+                if '{{m}}' in line:
+                    info['genre'] = 'mas'
+                elif '{{f}}' in line:
+                    info['genre'] = 'fem'
+
+            if line.startswith('# '):
+                info['def'].append({'def': wikiToMd(line[2:], info)})
+            elif line.startswith('#* '):
+                if not info['def']:
+                    with open('wiki_err.log', 'a') as err:
+                        print("Exemple sans définition pour le mot {}".format(
+                              info['mot']), file=err)
+                elif 'ex' in info['def'][-1]:
+                    info['def'][-1]['ex'].append(wikiToMd(line[3:], info))
+                else:
+                    info['def'][-1]['ex'] = [wikiToMd(line[3:], info)]
+            elif line.startswith('#') and not line.startswith('##'):
+                info['def'].append({'def': wikiToMd(line[1:], info)})
+            if line.startswith('==='):
+                goBack = len(line)
+                break
+        if goBack:
+            tf.seek(tf.tell() - goBack)
+            goBack = 0
+            toRead = True
+        infoFin.append(info)
+
+    return infoFin
+
+
+def wikiToMd(line, info):
+    line = line.strip()
+    # 3 Étapes:
+    #   - Links [...]
+    #   - Style ''ita'' / '''bold'''
+    #   - Template / Transclusion {{info}} = (Informatique)
+
+    # Template
+    while '{{' in line:
+        l0 = line.rfind('{{')
+        l1 = line.find('}}', l0)
+        if l1 == -1:
+            break
+        else:
+            l1 += 2
+        trans = line[l0:l1]
+        trans = transclusion(trans, info)
+        line = line[:l0] + trans + line[l1:]
+
+    # Links !
+    while '[[' in line:
+        link0 = line.rfind('[[')
+        link1 = line.find(']]', link0)
+        if link1 == -1:
+            break
+        else:
+            link1 += 2
+
+        link = line[link0:link1]
+        link = link[2:-2].split('|')
+        line = line[:link0] + (link[1] if len(link) > 1 else link[0]) + line[link1:]
+
+    # Style
+    line = line.replace("'''", '*').replace("''", '')
+
+    return line
+
+
+with open("./fr_wiktionary_all.xml", 'r') as f:
+    title = ""
+    isFr = False
+    hasForbidden = False
+    hasText = False
+    tf = None
+
+    dict_ = dict()
+
+    for line in f:
+        if "</page>" in line and tf:
+            tf.seek(0)
+            i = extract(tf, title)
+
+            for w in i:
+                if w['mot'] == 'président':
+                    print("What we exstract from it:")
+                    print(i)
+
+            dict_[title] = i
+            tf.close()
+
+            tf = None
+            hasForbidden = False
+            hasText = False
+            isFr = False
+            title = ""
+        elif "</page>" in line:
+            tf = None
+            hasForbidden = False
+            hasText = False
+            isFr = False
+            title = ""
+
+        if "<title>" in line:
+            title = line[line.find('>') + 1:]
+            title = title[:title.find('<')]
+
+            for c in interdit:
+                if c in title:
+                    hasForbidden = True
+        if not hasForbidden and "<text xml:space=\"preserve\">" in line:
+            hasText = True
+        if not hasForbidden and "== {{langue|fr}}" in line and hasText:
+            isFr = True
+            if tf:
+                print("Erreur tf encore ouvert !")
+                exit(-1)
+            tf = tmp.NamedTemporaryFile(mode="w+t")
+            # print(title)
+        elif not hasForbidden and "== {{langue|" in line:
+            isFr = False
+        if not hasForbidden and isFr and tf:
+            tf.write(line)
+
+    print("Will save the result")
+
+    with open('result_all.pack', 'wb') as f:
+        to_w = msgpack.packb(dict_)
+        f.write(to_w)
+
diff --git a/main.py b/main.py
deleted file mode 100644
index d4fb050..0000000
--- a/main.py
+++ /dev/null
@@ -1,309 +0,0 @@
-import tempfile as tmp
-import re
-import sys
-import msgpack
-
-from listSection import listInfoSection
-from template import template
-
-
-"""
-
-Extract words from the Wiktionnary archive
-
-"""
-
-template_second = ['link', 'bd', 'pc', 'nom w pc', 'w', 'smcp', 'lien', 'ws',
-                   'in', 'siècle2', 'fchim', 'nobr', 'wp', 'r',
-                   'clé de tri', 'contexte', 'emploi', 'l', 'polytonique',
-                   'pron-API', 'registre', 'scmp', 'siècle', 'x',
-                   ]
-
-
-template_second_lambda_trd = {
-    'refnec': (lambda x: '(Référence nécessaire : ' + x + ')'),
-    'refnéc': (lambda x: '(Référence nécessaire : ' + x + ')'),
-}
-
-template_second_lambda_snd = {
-    'term': (lambda x: '(' + x.title() + ')'),
-    'terme': (lambda x: '(' + x.title() + ')'),
-    'ex': (lambda x: '^{' + x if x else 'e' + '}'),
-    'exp': (lambda x: '^{' + x if x else 'e' + '}'),
-    'e': (lambda x: '^{' + x if x else 'e' + '}'),
-    'er': (lambda x: '^{' + x if x else 'er' + '}'),
-    'ère': (lambda x: '^{' + x if x else 'ère' + '}'),
-    'ème': (lambda x: '^{' + x if x else 'ème' + '}'),
-    'Ier': (lambda x: '^{' + x if x else 'Ier' + '}'),
-    'III': (lambda x: '^{' + x if x else 'III' + '}'),
-    'III': (lambda x: '^{' + x if x else 'III' + '}'),
-    'small': (lambda x: '_{' + x if x else '' + '}'),
-    'indice': (lambda x: '_{' + x if x else '' + '}'),
-    'graphie': (lambda x: '«' + x if x else '»'),
-    'petites capitales': (lambda x: x.upper()),
-    'isbn': (lambda x: 'cf. ISBN ' + x),
-    'OCLC': (lambda x: 'cf. OCLC ' + x),
-    'variante de': (lambda x: 'Variante de ' + x),
-    'variante  de': (lambda x: 'Variante de ' + x),
-    'variante ortho de': (lambda x: 'Variante orthographique de ' + x),
-    'variante  ortho de': (lambda x: 'Variante orthographique de ' + x),
-    'variante ortho  de': (lambda x: 'Variante orthographique de ' + x),
-    'variante orthographique de': (lambda x: 'Variante orthographique de ' + x),
-    'sic !': (lambda x: '^{sic ' + x + '}'),
-    'sic': (lambda x: '^{sic ' + x + '}'),
-    'incise': (lambda x: '_' + x + '_'),
-    'n°': (lambda x: 'n°' + x),
-    'superlatif de': (lambda x: 'Superlatif de' + x),
-    'vérifier': (lambda x: '(À vérifier : ' + x + ')'),
-}
-
-dictMatch = {x['match']: i for (i, x) in enumerate(listInfoSection)}
-
-interdit = " :"
-
-
-def transclusion(trans, info):
-    trans = trans[2:-2]
-
-    while '{{' in trans:
-        l0 = trans.rfind('{{')
-        l1 = trans.find('}}', l0)
-        if l1 == -1:
-            break
-        else:
-            l1 += 2
-        t = trans[l0:l1]
-        t = transclusion(t, info)
-        trans = trans[:l0] + t + trans[l1:]
-
-    s = list(map(lambda x: x.strip(), trans.split('|')))
-    if s[0] in template:
-        return template[s[0]]
-
-    if s[0].lower() in template_second:
-
-        return s[1] if len(s) > 1 else title
-
-    if s[0].lower().startswith('citation'):
-        cit = s[0].split('/')
-        if len(cit) == 4:
-            return 'Par ' + cit[1] + ', ' + cit[2] + ', ' + cit[3]
-        if len(cit) == 3:
-            return 'Par ' + cit[1] + ', ' + cit[2]
-        if len(cit) == 5:
-            return cit[1] + '/' + cit[2] + '/' + cit[3] + ', ' + cit[4]
-        if len(cit) <= 2:
-            return ''
-        else:
-            return '/'.join(c[1:])
-
-    if s[0].lower() in template_second_lambda_snd:
-        return template_second_lambda_snd[s[0].lower()](s[1] if len(s) > 1 else '')
-
-    if s[0].lower() in template_second_lambda_trd:
-        return template_second_lambda_trd[s[0].lower()](s[2] if len(s) > 2 else '')
-
-    with open('wiki_err.log', 'a') as err:
-        print(s[0], file=err)
-#       print("Incompréhension de la transclusion {} du mot {}".format(trans,
-#             info['mot']), file=err)
-    return ''
-
-
-def extract(f, w):
-    infoFin = []
-
-    toRead = True
-    goBack = 0
-
-    while toRead:
-        toRead = False
-
-        info = {'mot': w,
-                'cat-gram': None,
-                'def': [],
-                'API': None,
-                'infos': [],
-                'genre': '',
-                'accord': None}
-
-        # State 0 // Initialisation !
-        while line := f.readline():
-
-            if line.startswith('=== ') or line.startswith('==={'):
-                if re.match('^=== *{{ *S\\|([^|]+)|.*$', line):
-                    try:
-                        r = re.match('^=== *{{ *S\\|([^|]+)|.*$', line)
-                        r = r.groups()
-                        nat = r[0].strip()
-                        if nat in dictMatch.keys():
-                            info['cat-gram'] = nat
-                            toRead = True
-                            break
-                    except e:
-                        with open('wiki_err.log', 'a') as err:
-                            print("^[1] Problème à l'initialisation du mot {}:"
-                                  " {}".format(info['mot'], e), file=err)
-                            print('line: [{}]'.format(line, e), file=err)
-                            e = sys.exc_info()[0]
-                            print("Erreur :", e, file=err)
-
-        if not toRead:
-            break
-
-        # State 1
-        while line := f.readline():
-            if line.startswith('{{fr-'):
-                e = line.find('}}')
-                if e == -1:
-                    continue
-                ex = line[:e]
-                try:
-                    infos = list(map(lambda x: x.strip(), ex.split('|')))
-                    info['infos'] = infos
-                    info['accord'] = infos[0]
-                    if len(infos) > 1:
-                        info['API'] = infos[1]
-                except e:
-                    err = sys.exc_info()[0]
-                    print(ex)
-                    print("Erreur :", e)
-                    print("Erreur :", err)
-            if line.rstrip().startswith("'''"):
-                if '{{pron' in line:
-                    p0 = line.find('{{pron')
-                    p1 = line.find('}}', p0)
-                    if p1 > 0:
-                        p1 += 2
-                        p = line[p0:p1]
-                        p = p.split('|')
-                        info['API'] = p[1]
-                if '{{m}}' in line:
-                    info['genre'] = 'mas'
-                elif '{{f}}' in line:
-                    info['genre'] = 'fem'
-
-            if line.startswith('# '):
-                info['def'].append({'def': wikiToMd(line[2:], info)})
-            elif line.startswith('#* '):
-                if not info['def']:
-                    with open('wiki_err.log', 'a') as err:
-                        print("Exemple sans définition pour le mot {}".format(
-                              info['mot']), file=err)
-                elif 'ex' in info['def'][-1]:
-                    info['def'][-1]['ex'].append(wikiToMd(line[3:], info))
-                else:
-                    info['def'][-1]['ex'] = [wikiToMd(line[3:], info)]
-            elif line.startswith('#') and not line.startswith('##'):
-                info['def'].append({'def': wikiToMd(line[1:], info)})
-            if line.startswith('==='):
-                goBack = len(line)
-                break
-        if goBack:
-            tf.seek(tf.tell() - goBack)
-            goBack = 0
-            toRead = True
-        infoFin.append(info)
-
-    return infoFin
-
-
-def wikiToMd(line, info):
-    line = line.strip()
-    # 3 Étapes:
-    #   - Links [...]
-    #   - Style ''ita'' / '''bold'''
-    #   - Template / Transclusion {{info}} = (Informatique)
-
-    # Template
-    while '{{' in line:
-        l0 = line.rfind('{{')
-        l1 = line.find('}}', l0)
-        if l1 == -1:
-            break
-        else:
-            l1 += 2
-        trans = line[l0:l1]
-        trans = transclusion(trans, info)
-        line = line[:l0] + trans + line[l1:]
-
-    # Links !
-    while '[[' in line:
-        link0 = line.rfind('[[')
-        link1 = line.find(']]', link0)
-        if link1 == -1:
-            break
-        else:
-            link1 += 2
-
-        link = line[link0:link1]
-        link = link[2:-2].split('|')
-        line = line[:link0] + (link[1] if len(link) > 1 else link[0]) + line[link1:]
-
-    # Style
-    line = line.replace("'''", '*').replace("''", '')
-
-    return line
-
-
-with open("./fr_wiktionary_all.xml", 'r') as f:
-    title = ""
-    isFr = False
-    hasForbidden = False
-    hasText = False
-    tf = None
-
-    dict_ = dict()
-
-    for line in f:
-        if "</page>" in line and tf:
-            tf.seek(0)
-            i = extract(tf, title)
-
-            for w in i:
-                if w['mot'] == 'président':
-                    print("What we exstract from it:")
-                    print(i)
-
-            dict_[title] = i
-            tf.close()
-
-            tf = None
-            hasForbidden = False
-            hasText = False
-            isFr = False
-            title = ""
-        elif "</page>" in line:
-            tf = None
-            hasForbidden = False
-            hasText = False
-            isFr = False
-            title = ""
-
-        if "<title>" in line:
-            title = line[line.find('>') + 1:]
-            title = title[:title.find('<')]
-
-            for c in interdit:
-                if c in title:
-                    hasForbidden = True
-        if not hasForbidden and "<text xml:space=\"preserve\">" in line:
-            hasText = True
-        if not hasForbidden and "== {{langue|fr}}" in line and hasText:
-            isFr = True
-            if tf:
-                print("Erreur tf encore ouvert !")
-                exit(-1)
-            tf = tmp.NamedTemporaryFile(mode="w+t")
-            # print(title)
-        elif not hasForbidden and "== {{langue|" in line:
-            isFr = False
-        if not hasForbidden and isFr and tf:
-            tf.write(line)
-
-    print("Will save the result")
-
-    with open('result_all.pack', 'wb') as f:
-        to_w = msgpack.packb(dict_)
-        f.write(to_w)
-
diff --git a/msgPack2sqlite_msgPack.py b/msgPack2sqlite_msgPack.py
index b77dd2e..c251d59 100644
--- a/msgPack2sqlite_msgPack.py
+++ b/msgPack2sqlite_msgPack.py
@@ -2,36 +2,53 @@
 
 import msgpack
 import ui
+import sys
 
 import sqlite3
-
-with open('result_all.pack', 'rb') as f:
-    r = f.read()
-
-d = p = msgpack.unpackb(r, raw=False)
-del r
-
-with sqlite3.connect("result_all.sql") as con:
-    cur = con.cursor()
-    cur.execute('''CREATE TABLE IF NOT EXISTS entry (
-        word TEXT,
-        cat_gram TEXT,
-        API TEXT,
-        infos TEXT,
-        genre TEXT,
-        accord TEXT,
-        defs BLOG,
-        ID INTEGER PRIMARY KEY)''')
-    con.commit()
-
-    for w, listW in d.items():
-        for word in listW:
-            data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']),
-                    word['genre'], word['accord'],
-                    msgpack.packb(word['def']))
-            cur.execute('''INSERT INTO entry (word, cat_gram, API, infos,
-            genre, accord, defs) VALUES (?, ?, ?, ?,  ?, ?,  ?)''', data)
-    con.commit()
+import argparse
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='wiktionary dump msgpack '
+                                     'to SQLite database file')
+    parser.add_argument('-o', '--out', dest='outputF', action='store',
+                        help='the output filename')
+    parser.add_argument('-i', '--input', dest='inputF', action='store',
+                        help='the input filename, a dump of witionary')
+
+    arg = parser.parse_args()
+
+    if arg.inputF is None:
+        print('Error input file needed', file=sys.stderr)
+    if arg.outputF is None:
+        print('Error output file needed', file=sys.stderr)
+
+    with open(arg.inputF, 'rb') as f:
+        r = f.read()
+
+    d = p = msgpack.unpackb(r, raw=False)
+    del r
+
+    with sqlite3.connect(arg.outputF) as con:
+        cur = con.cursor()
+        cur.execute('''CREATE TABLE IF NOT EXISTS entry (
+            word TEXT,
+            cat_gram TEXT,
+            API TEXT,
+            infos TEXT,
+            genre TEXT,
+            accord TEXT,
+            defs BLOG,
+            ID INTEGER PRIMARY KEY)''')
+        con.commit()
+
+        for w, listW in d.items():
+            for word in listW:
+                data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']),
+                        word['genre'], word['accord'],
+                        msgpack.packb(word['def']))
+                cur.execute('''INSERT INTO entry (word, cat_gram, API, infos,
+                genre, accord, defs) VALUES (?, ?, ?, ?,  ?, ?,  ?)''', data)
+        con.commit()
 
 
 def give_def(w):
-- 
cgit v1.3-2-g11bf