Update everything

author: ache <ache@ache.one> 2020-11-23 03:41:54 +0100
committer: ache <ache@ache.one> 2020-11-23 03:41:54 +0100
commit: 451d7bf0db58d42afc5a5086353558d227040dff (patch)
tree: 3cf473a791ba425e09da566a142f9bb32da692e6
parent: Documentation README (diff)
13 files changed, 319 insertions, 224 deletions
diff --git a/Makefile b/Makefile
index 33a63fd..95d438f 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ DIR_BIN=/usr/bin/
 
 install:
 	mkdir -p ${DIR_INSTALL_PATH}
-	cp -u *.py wiktfr.sql ${DIR_INSTALL_PATH}
+	cp -u *.py *.sql ${DIR_INSTALL_PATH}
 	cp -u dicofr.py ${DIR_BIN}/dicofr
 	chmod +rw ${DIR_BIN}/dicofr
 
diff --git a/README.md b/README.md
index 6d9a041..dfa1c84 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,6 @@ A bunch of **Python** scripts to transform wiktionary archive dump to MySQL data
  - With a simple CLI.
  - With a simple WUI, flask based.
  - Regex support.
- - 
 
 How to create the database
 --------------------------
@@ -32,12 +31,12 @@ This file is interesting for developers not for end users.
 It's a serialization of the internal used dictionary (python dictionary). 
 
 ~~~shell
-$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml.bz2
+$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml
 ~~~
 
 Then, you can create the SQLite database file.
 ~~~shell
-$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk -o dicofr.db
+$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk
 ~~~
 
 You can then use `dicofr.py` to search a word from the CLI or use the WUI with the command:
@@ -93,6 +92,20 @@ juliennois
 juliens
 ~~~
 
+Or the WUI :
+
+~~~shell
+$ python web.py
+~~~
+
+Why only french ?
+-----------------
+
+Because that's the only language I'm able to tackle.
+I can't verify anything about others languages.
+
+Feel free to contribute.
+
 How to contribute ?
 -------------------
 
diff --git a/dicofr.py b/dicofr.py
index 9bef7ac..7e6888d 100755
--- a/dicofr.py
+++ b/dicofr.py
@@ -1,16 +1,32 @@
 #!/bin/env python
 
+"""
+# Main file.
+
+A program to retrieve the definition of a french word (in french).
+
+Maybe extended to other languages later.
+"""
+
 import sys
 import argparse
 import msgpack
 import sqlite3
 from os.path import exists
 
+import ui
+
+
+debug = False
+
+if debug:
+    DIR_PATH = 'assets/'
+else:
+    DIR_PATH = '/usr/share/dicofr/assets/'
 
-DIR_PATH = '/usr/share/dicofr'
 sys.path.insert(-1, DIR_PATH)
 
-import ui
+
 
 dico = 'dicofr.db'
 
@@ -102,8 +118,6 @@ if __name__ == '__main__':
                         const=get_def_sql_reg, default=get_def_sql,
                         help='search a definition using SQL regex, '
                              '_ to match a letter, %% to match a group of letters')
-    parser.add_argument('-w', '--wordlist', dest='wordList',
-                        action='store_const', default='list_word.msgpack')
     parser.add_argument('-m', '--matching', dest='matching', action='store_true',
                         help='search the french words that match the regex')
     parser.add_argument('word', metavar='PATTERN', type=str,
diff --git a/download/bz2toDB.py b/download/bz2toDB.py
new file mode 100644
index 0000000..1fddd85
--- /dev/null
+++ b/download/bz2toDB.py
@@ -0,0 +1,25 @@
+import bz2
+import sys
+
+
+def unbz2(file):
+    decomp = bz2.BZ2Decompressor()
+    buf = b''
+    for c in file:
+        buf += decomp.decompress(c)
+
+        while b'\n' in buf:
+            i = buf.index(b'\n')
+            if i + 1 < len(buf):
+                ret = buf[:i + 1]
+                buf = buf[i + 1:]
+                yield ret.decode("utf-8")
+            else:
+                yield buf
+                buf = b''
+
+
+with open('./wiktionary_dump.xml.bz2', 'rb') as f:
+    it = iter(lambda: f.read(32768), b'')
+    for a in unbz2(it):
+        print(a, end='')
diff --git a/download/download.py b/download/download.py
new file mode 100755
index 0000000..a941b90
--- /dev/null
+++ b/download/download.py
@@ -0,0 +1,130 @@
+import argparse
+import sys
+import urllib.request
+import dump2msgp
+import msgPack2sqlite_msgPack
+import subprocess
+
+from os.path import exists
+
+
+URL_DUMP = "https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2"
+
+
+def unbz2(file):
+    decomp = bz2.BZ2Decompressor()
+    buf = b''
+    for c in file:
+        buf += decomp.decompress(c)
+
+        while b'\n' in buf:
+            i = buf.index(b'\n')
+            if i + 1 < len(buf):
+                ret = buf[:i + 1]
+                buf = buf[i + 1:]
+                yield ret.decode("utf-8")
+            else:
+                yield buf.decode("utf-8")
+                buf = b''
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Download and create the database')
+    parser.add_argument('-o', '--out', dest='outputF', action='store',
+                        help='the output, the database file',
+                        default='dicofr.db')
+    parser.add_argument('-i', '--in', dest='dumpF', action='store',
+                        help='the input dump file\'s filename',
+                        default='')
+    parser.add_argument('-d', '--download', dest='download', action='store_true',
+                        help='to download the lastest dump')
+
+    arg = parser.parse_args()
+
+    download = True
+
+    if download and arg.dumpF:
+        print("Incompatible options '-i' and '-d'.")
+        exit(1)
+    elif download:
+        arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:]
+
+    if not arg.dumpF or not arg.dumpF.endswith('bz2'):
+        print('A bz2 dump file filename needed', file=sys.stderr)
+        exit(-1)
+
+    if exists(arg.dumpF) and download:
+        print(f"{arg.dumpF} exists. Force downloading ? (y/N)")
+        answer = input('> ')
+        if answer.lower()[0] != 'y':
+            download = False
+
+    if download:
+        print(f"Downloading the dump ({arg.dumpF})\nIt should take some time")
+        urllib.request.urlretrieve(URL_DUMP, arg.dumpF)
+
+    if not exists(arg.dumpF):
+        print('Download failed.\nExiting.', file=sys.stderr)
+        exit(-2)
+
+    decompress = False
+
+    try:
+        print("Trying the bzip2 command")
+        assert(subprocess.call(['bzip2', '-d', arg.dumpF]) == 0)
+        decompress = True
+    except:
+        print("The command “bzip” doesn't exists, or doesn't work as intended")
+        print("Fallback to Python bz2 module decompressor")
+
+    # Decompression using bzip2
+    if not decompress:
+        try:
+            import bz2
+            with open(arg.dumpF, 'rb') as f:
+                it = iter(lambda: f.read(2**16), b'')
+
+                output_fn = arg.dumpF[:-4]
+
+                with open(output_fn, 'wb') as fout:
+                    dcomp = bz2.BZ2Decompressor()
+                    for chunk in it:
+                        datal = len(chunk)
+                        data = dcomp.decompress(chunk)
+                        fout.write(data)
+            decompress = True
+        except:
+            print("Python bz2 module decompressor failed, maybe you don't have any space available")
+            print("Fallback to on the fly decompressor (RAM will be needed)")
+
+    if not decompress:
+        try:
+            # On the fly Decompression
+            with open(arg.dumpF, 'rb') as f:
+                it = iter(lambda: f.read(2**16), b'')
+                print("Data extraction on the fly")
+                res = dump2msgp.extractAll(unbz2(it), "error.log", False)
+                msgPack2sqlite_msgPack.writeDB(arg.outputF, res)
+            print(f"Database { arg.outputF } created ! 👏 🎉")
+        except:
+            print("Error: Can't extract the dump file")
+            print("Exiting (-1)")
+            exit(-1)
+
+        print(f"Removing temporary files")
+        os.remove(arg.dumpF)
+    else:
+        try:
+            output_fn = arg.dumpF[:-4]
+            with open(output_fn, 'r') as f:
+                print("Create the database")
+                res = dump2msgp.extractAll(f, "error.log", False)
+                msgPack2sqlite_msgPack.writeDB(arg.outputF, res)
+            print(f"Database { arg.outputF } created ! 👏 🎉")
+        except:
+            print("Failed to extract database")
+            print(("Exiting (-3)")
+            exit(-3)
+
+        print(f"Removing temporary files")
+        os.remove(output_fn)
diff --git a/dump2msgp.py b/download/dump2msgp.py
index d4fb050..70b483c 100644
--- a/dump2msgp.py
+++ b/download/dump2msgp.py
@@ -2,8 +2,9 @@ import tempfile as tmp
 import re
 import sys
 import msgpack
+import argparse
 
-from listSection import listInfoSection
+from sectionList import listInfoSection
 from template import template
 
 
@@ -13,6 +14,9 @@ Extract words from the Wiktionnary archive
 
 """
 
+DEFAULT_OUTPUT = 'dicofr.msgpk'
+
+
 template_second = ['link', 'bd', 'pc', 'nom w pc', 'w', 'smcp', 'lien', 'ws',
                    'in', 'siècle2', 'fchim', 'nobr', 'wp', 'r',
                    'clé de tri', 'contexte', 'emploi', 'l', 'polytonique',
@@ -36,7 +40,6 @@ template_second_lambda_snd = {
     'ème': (lambda x: '^{' + x if x else 'ème' + '}'),
     'Ier': (lambda x: '^{' + x if x else 'Ier' + '}'),
     'III': (lambda x: '^{' + x if x else 'III' + '}'),
-    'III': (lambda x: '^{' + x if x else 'III' + '}'),
     'small': (lambda x: '_{' + x if x else '' + '}'),
     'indice': (lambda x: '_{' + x if x else '' + '}'),
     'graphie': (lambda x: '«' + x if x else '»'),
@@ -62,7 +65,7 @@ dictMatch = {x['match']: i for (i, x) in enumerate(listInfoSection)}
 interdit = " :"
 
 
-def transclusion(trans, info):
+def transclusion(trans, info, errorF):
     trans = trans[2:-2]
 
     while '{{' in trans:
@@ -73,7 +76,7 @@ def transclusion(trans, info):
         else:
             l1 += 2
         t = trans[l0:l1]
-        t = transclusion(t, info)
+        t = transclusion(t, info, errorF)
         trans = trans[:l0] + t + trans[l1:]
 
     s = list(map(lambda x: x.strip(), trans.split('|')))
@@ -81,8 +84,7 @@ def transclusion(trans, info):
         return template[s[0]]
 
     if s[0].lower() in template_second:
-
-        return s[1] if len(s) > 1 else title
+        return s[1] if len(s) > 1 else info['mot']
 
     if s[0].lower().startswith('citation'):
         cit = s[0].split('/')
@@ -95,7 +97,7 @@ def transclusion(trans, info):
         if len(cit) <= 2:
             return ''
         else:
-            return '/'.join(c[1:])
+            return '/'.join(cit[1:])
 
     if s[0].lower() in template_second_lambda_snd:
         return template_second_lambda_snd[s[0].lower()](s[1] if len(s) > 1 else '')
@@ -103,14 +105,15 @@ def transclusion(trans, info):
     if s[0].lower() in template_second_lambda_trd:
         return template_second_lambda_trd[s[0].lower()](s[2] if len(s) > 2 else '')
 
-    with open('wiki_err.log', 'a') as err:
-        print(s[0], file=err)
-#       print("Incompréhension de la transclusion {} du mot {}".format(trans,
-#             info['mot']), file=err)
+    if errorF:
+        with open(errorF, 'a') as err:
+           print(s[0], file=err)
+           print("Incompréhension de la transclusion {} du mot {}".format(trans,
+                 info['mot']), file=err)
     return ''
 
 
-def extract(f, w):
+def extract(f, w, errorF):
     infoFin = []
 
     toRead = True
@@ -140,13 +143,13 @@ def extract(f, w):
                             info['cat-gram'] = nat
                             toRead = True
                             break
-                    except e:
-                        with open('wiki_err.log', 'a') as err:
-                            print("^[1] Problème à l'initialisation du mot {}:"
-                                  " {}".format(info['mot'], e), file=err)
-                            print('line: [{}]'.format(line, e), file=err)
-                            e = sys.exc_info()[0]
-                            print("Erreur :", e, file=err)
+                    except Exception as e:
+                        if errorF:
+                            with open(errorF, 'a') as err:
+                                print("^[1] Problème à l'initialisation du mot {mot}: {e}", file=err)
+                                print(f'line: [{line}]: {e}', file=err)
+                                e = sys.exc_info()[0]
+                                print("Erreur :", e, file=err)
 
         if not toRead:
             break
@@ -184,23 +187,23 @@ def extract(f, w):
                     info['genre'] = 'fem'
 
             if line.startswith('# '):
-                info['def'].append({'def': wikiToMd(line[2:], info)})
+                info['def'].append({'def': wikiToMd(line[2:], info, errorF)})
             elif line.startswith('#* '):
                 if not info['def']:
                     with open('wiki_err.log', 'a') as err:
                         print("Exemple sans définition pour le mot {}".format(
                               info['mot']), file=err)
                 elif 'ex' in info['def'][-1]:
-                    info['def'][-1]['ex'].append(wikiToMd(line[3:], info))
+                    info['def'][-1]['ex'].append(wikiToMd(line[3:], info, errorF))
                 else:
-                    info['def'][-1]['ex'] = [wikiToMd(line[3:], info)]
+                    info['def'][-1]['ex'] = [wikiToMd(line[3:], info, errorF)]
             elif line.startswith('#') and not line.startswith('##'):
-                info['def'].append({'def': wikiToMd(line[1:], info)})
+                info['def'].append({'def': wikiToMd(line[1:], info, errorF)})
             if line.startswith('==='):
                 goBack = len(line)
                 break
         if goBack:
-            tf.seek(tf.tell() - goBack)
+            f.seek(f.tell() - goBack)
             goBack = 0
             toRead = True
         infoFin.append(info)
@@ -208,7 +211,7 @@ def extract(f, w):
     return infoFin
 
 
-def wikiToMd(line, info):
+def wikiToMd(line, info, errorF):
     line = line.strip()
     # 3 Étapes:
     #   - Links [...]
@@ -224,7 +227,7 @@ def wikiToMd(line, info):
         else:
             l1 += 2
         trans = line[l0:l1]
-        trans = transclusion(trans, info)
+        trans = transclusion(trans, info, errorF)
         line = line[:l0] + trans + line[l1:]
 
     # Links !
@@ -246,7 +249,7 @@ def wikiToMd(line, info):
     return line
 
 
-with open("./fr_wiktionary_all.xml", 'r') as f:
+def extractAll(f, errorF, ignore):
     title = ""
     isFr = False
     hasForbidden = False
@@ -258,12 +261,7 @@ with open("./fr_wiktionary_all.xml", 'r') as f:
     for line in f:
         if "</page>" in line and tf:
             tf.seek(0)
-            i = extract(tf, title)
-
-            for w in i:
-                if w['mot'] == 'président':
-                    print("What we exstract from it:")
-                    print(i)
+            i = extract(tf, title, errorF)
 
             dict_[title] = i
             tf.close()
@@ -287,23 +285,55 @@ with open("./fr_wiktionary_all.xml", 'r') as f:
             for c in interdit:
                 if c in title:
                     hasForbidden = True
-        if not hasForbidden and "<text xml:space=\"preserve\">" in line:
+        if not hasForbidden and "<text bytes=\"" in line and "\" xml:space=\"preserve\">" in line:
             hasText = True
         if not hasForbidden and "== {{langue|fr}}" in line and hasText:
             isFr = True
             if tf:
-                print("Erreur tf encore ouvert !")
-                exit(-1)
-            tf = tmp.NamedTemporaryFile(mode="w+t")
-            # print(title)
+                if not ignore:
+                    if errorF:
+                        with open(errorF, 'a') as err:
+                            print(f"{title}: Erreur tf encore ouvert !",
+                                  file=err)
+                    else:
+                        print(f"{title}: Erreur tf encore ouvert !")
+                        tf.seek(0)
+                        while line2 := tf.readline():
+                            print(line2, end='')
+                        print(line)
+
+                        exit(-1)
+            else:
+                tf = tmp.NamedTemporaryFile(mode="w+t")
         elif not hasForbidden and "== {{langue|" in line:
             isFr = False
         if not hasForbidden and isFr and tf:
             tf.write(line)
 
-    print("Will save the result")
+    return dict_
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='wiktionary dump to msgpack')
+    parser.add_argument('-o', '--out', dest='outputF', action='store_const',
+                        const=DEFAULT_OUTPUT, default=DEFAULT_OUTPUT,
+                        help='the output filename')
+    parser.add_argument('-i', '--input', dest='inputF', action='store',
+                        help='the input filename, a dump of witionary')
+    parser.add_argument('-e', '--error', dest='errorF', action='store',
+                        help='the filename to log errors')
+    parser.add_argument('--ignore', dest='ignoreError', action='store_true',
+                        help='the filename to log errors')
+
+    arg = parser.parse_args()
+
+    if arg.inputF is None:
+        print('A wiktionary dump is needed', file=sys.stderr)
+        exit(-1)
 
-    with open('result_all.pack', 'wb') as f:
-        to_w = msgpack.packb(dict_)
-        f.write(to_w)
+    with open(arg.inputF, 'r') as f:
+        res = extractAll(f, arg.errorF, arg.ignoreError)
 
+        with open(arg.outputF, 'wb') as f:
+            to_w = msgpack.packb(res)
+            f.write(to_w)
diff --git a/msgPack2sqlite_msgPack.py b/download/msgPack2sqlite_msgPack.py
index c251d59..38d34cd 100644
--- a/msgPack2sqlite_msgPack.py
+++ b/download/msgPack2sqlite_msgPack.py
@@ -1,34 +1,12 @@
-# To load with python interpreter
-
 import msgpack
-import ui
 import sys
 
 import sqlite3
 import argparse
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='wiktionary dump msgpack '
-                                     'to SQLite database file')
-    parser.add_argument('-o', '--out', dest='outputF', action='store',
-                        help='the output filename')
-    parser.add_argument('-i', '--input', dest='inputF', action='store',
-                        help='the input filename, a dump of witionary')
-
-    arg = parser.parse_args()
-
-    if arg.inputF is None:
-        print('Error input file needed', file=sys.stderr)
-    if arg.outputF is None:
-        print('Error output file needed', file=sys.stderr)
-
-    with open(arg.inputF, 'rb') as f:
-        r = f.read()
-
-    d = p = msgpack.unpackb(r, raw=False)
-    del r
 
-    with sqlite3.connect(arg.outputF) as con:
+def writeDB(outputF, data):
+    with sqlite3.connect(outputF) as con:
         cur = con.cursor()
         cur.execute('''CREATE TABLE IF NOT EXISTS entry (
             word TEXT,
@@ -41,7 +19,7 @@ if __name__ == '__main__':
             ID INTEGER PRIMARY KEY)''')
         con.commit()
 
-        for w, listW in d.items():
+        for w, listW in data.items():
             for word in listW:
                 data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']),
                         word['genre'], word['accord'],
@@ -51,6 +29,24 @@ if __name__ == '__main__':
         con.commit()
 
 
-def give_def(w):
-    ui.show_terminal(d[w])
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='wiktionary dump msgpack '
+                                     'to SQLite database file')
+    parser.add_argument('-o', '--out', dest='outputF', action='store',
+                        help='the output filename')
+    parser.add_argument('-i', '--input', dest='inputF', action='store',
+                        help='the input filename, a dump of witionary')
+
+    arg = parser.parse_args()
+
+    if arg.inputF is None:
+        print('Error input file needed', file=sys.stderr)
+    if arg.outputF is None:
+        print('Error output file needed', file=sys.stderr)
+
+    with open(arg.inputF, 'rb') as f:
+        r = f.read()
 
+    d = p = msgpack.unpackb(r, raw=False)
+    del r
+    writeDB(arg.outputF, d)
diff --git a/sectionList.py b/download/sectionList.py
index 68dd657..68dd657 100644
--- a/sectionList.py
+++ b/download/sectionList.py
diff --git a/template.py b/download/template.py
index fa0394a..fa0394a 100644
--- a/template.py
+++ b/download/template.py
diff --git a/getSection.py b/getSection.py
deleted file mode 100644
index 62b74be..0000000
--- a/getSection.py
+++ /dev/null
@@ -1,129 +0,0 @@
-listInfoSection= [
- {'type': 'adjectif', 'match': 'adj', 'o': 'adjectif'},
- {'type': 'adjectif', 'match': 'adjectif', 'o': 'adjectif'},
- {'type': 'adjectif', 'match': 'adj-dém', 'o': 'adjectif démonstratif'},
- {'type': 'adjectif', 'match': 'adjectif démonstratif', 'o': 'adjectif démonstratif'},
- {'type': 'adjectif', 'match': 'adjectif exclamatif', 'o': 'adjectif exclamatif'},
- {'type': 'adjectif', 'match': 'adjectif indéfini', 'o': 'adjectif indéfini'},
- {'type': 'adjectif', 'match': 'adjectif interrogatif', 'o': 'adjectif interrogatif'},
- {'type': 'adjectif', 'match': 'adjectif numéral', 'o': 'adjectif numéral'},
- {'type': 'adjectif', 'match': 'adjectif possessif', 'o': 'adjectif possessif'},
- {'type': 'adjectif', 'match': 'adjectif relatif', 'o': 'adjectif relatif'},
-
- {'type': 'adverbe', 'match': 'adv', 'o': 'adverbe'},
- {'type': 'adverbe', 'match': 'adverbe', 'o': 'adverbe'},
- {'type': 'adverbe', 'match': 'adverbe interrogatif', 'o': 'adverbe interrogatif'},
- {'type': 'adverbe', 'match': 'adverbe relatif', 'o': 'adverbe relatif'},
-
- {'type': 'article', 'match': 'article', 'o': 'article'},
- {'type': 'article', 'match': 'article défini', 'o': 'article défini'},
- {'type': 'article', 'match': 'article indéfini', 'o': 'article indéfini'},
- {'type': 'article', 'match': 'article partitif', 'o': 'article partitif'},
-
- {'type': 'conjonction', 'match': 'conjonction', 'o': 'conjonction'},
- {'type': 'conjonction', 'match': 'conjonction de coordination', 'o': 'conjonction de coordination'},
-
-
- {'type': 'erreur', 'match': 'erreur', 'o': 'faute courante'},
- {'type': 'erreur', 'match': 'faute', 'o': 'faute courante'},
-
- {'type': 'interjection', 'match': 'interj', 'o': 'interjection'},
- {'type': 'interjection', 'match': 'interjection', 'o': 'interjection'},
-
- {'type': 'interjection', 'match': 'interjection', 'o': 'interjection'},
-
- {'type': 'locuton nominale', 'match': 'loc-phr', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution nominale', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution phrase', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution-phrase', 'o': 'locution nominale'},
-
- {'type': 'nom', 'match': 'nom', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom1', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom2', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom3', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom4', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom5', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom6', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom7', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom8', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom9', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom10', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom11', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom12', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom13', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom14', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom15', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom16', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom17', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom commun', 'o': 'nom'},
- {'type': 'nom', 'match': 'substantif', 'o': 'substantif'},
-
-
- {'type': 'nom de famille', 'match': 'nom de famille', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom de famille anglais', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom-fam', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom-pr', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom-propre', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom pr', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom propre', 'o': 'nom de famille'},
- {'type': 'nom scientifique', 'match': 'nom scientifique', 'o': 'nom scientifique'},
- {'type': 'particule', 'match': 'particule', 'o': 'particule'},
- {'type': 'nom de famille', 'match': 'patronyme', 'o': 'nom de famille'},
- {'type': 'préfixe', 'match': 'préfixe', 'o': 'préfixe'},
- {'type': 'suffixe', 'match': 'suffixe', 'o': 'suffixe'},
- {'type': 'prénom', 'match': 'prénom', 'o': 'prénom'},
-
- {'type': 'onomatopée', 'match': 'onomatopée', 'o': 'onomatopée'},
- {'type': 'onomatopée', 'match': 'onom', 'o': 'onomatopée'},
-
- {'type': 'préposition', 'match': 'prép', 'o': 'préposition'},
- {'type': 'préposition', 'match': 'préposition', 'o': 'préposition'},
-
- {'type': 'pronom', 'match': 'pronom', 'o': 'pronom'},
- {'type': 'pronom', 'match': 'pronom démonstratif', 'o': 'pronom démonstratif'},
- {'type': 'pronom', 'match': 'pronom indéfini', 'o': 'pronom indéfini'},
- {'type': 'pronom', 'match': 'pronom interrogatif', 'o': 'pronom interrogatif'},
- {'type': 'pronom', 'match': 'pronom personnel', 'o': 'pronom personnel'},
- {'type': 'pronom', 'match': 'pronom possessif', 'o': 'pronom possessif'},
- {'type': 'pronom', 'match': 'pronom relatif', 'o': 'pronom relatif'},
-
- {'type': 'verbe', 'match': 'verb', 'o': 'verbe'},
- {'type': 'verbe', 'match': 'verbe', 'o': 'verbe'},
- {'type': 'verbe', 'match': 'verbe pronominal', 'o': 'verbe pronominal'}
-]
-
-listSections = []
-with open("./listSections", "r") as f:
-    for line in f:
-        line = line.strip()
-        if line[-3:] != "===":
-            continue
-
-        s = line.find('{{')+2
-        e = line.find('}}')
-        if not s < e:
-            continue
-
-        argsStr = line[s:e]
-        args = list(map(lambda x: x.strip().lower(), argsStr.split('|')))
-
-        if len(args) <= 2 or not args[1]:
-            continue
-
-        if not args[2].startswith('fr'):
-            continue
-
-        classed = False
-        for m in listInfoSection:
-            if m['match'] == args[1]:
-                classed = True
-                continue
-        if not classed:
-            listSections.append(args[1])
-
-for section in (list(set(listSections))):
-    print(section)
-
-
-
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c429cce
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+msgpack==1.0.0
+PyMySQL==0.9.3
+Flask==1.1.2
diff --git a/ui.py b/ui.py
index e2a166a..c198cfd 100644
--- a/ui.py
+++ b/ui.py
@@ -5,21 +5,22 @@ tui_show_example = True
 
 
 def show_terminal(word):
-    """Display the definition to the terminal
+    """
+        Display the definition to the terminal
 
-    @word Format:
-        {
-         mot: ''
-         cat-gram: ''
-         def: [{
-                def: ''
-                ex: ['', '']
-               }]
-         API: ''
-         infos: ['', '']
-         genre: ''
-         accord: ''
-        }
+        @word Format:
+            {
+             mot: ''
+             cat-gram: ''
+             def: [{
+                    def: ''
+                    ex: ['', '']
+                   }]
+             API: ''
+             infos: ['', '']
+             genre: ''
+             accord: ''
+            }
     """
     indent = tui_indent * ' '
     print(indent + word['mot'])
@@ -45,7 +46,3 @@ def show_terminal(word):
                     print(indent + '\t\t * ' + ex)
             print('')
 
-
-def show_web(word):
-    """Display the definition in HTML format"""
-    pass
diff --git a/web.py b/web.py
index c5e0b1c..82ad696 100644
--- a/web.py
+++ b/web.py
@@ -1,3 +1,7 @@
+"""
+A simple Web application to serve dicofr
+"""
+
 from flask import Flask, request, Response, send_file
 import msgpack
 # from flask_cors import CORS
@@ -20,17 +24,26 @@ app.config.from_object(__name__)
 
 @app.route('/', methods=['GET'])
 def index_client():
+    """
+        Send the single file
+    """
     return send_file("index.html", mimetype='text/html')
 
 def get_def_reg(w):
+    """
+        Search a word, can deal with regex and casse problem.
+    """
     if res := dicofr.get_def_sql_reg(w):
         return msgpack.packb(res)
+
     # Recherche du mot en minuscule
     elif res := dicofr.get_def_sql_reg(w.lower()):
         return msgpack.packb(res)
+
     # Recherche du mot en nom propre
     elif res := dicofr.get_def_sql_reg(w.title()):
         return msgpack.packb(res)
+
     else:
         return Response("", status=404)
 
@@ -38,6 +51,9 @@ def get_def_reg(w):
 
 @app.route('/def', methods=['GET'])
 def get_def():
+    """
+        Retrieve a definition
+    """
     w = request.args.get('w')
 
     if '_' in w:
author	ache <ache@ache.one>	2020-11-23 03:41:54 +0100
committer	ache <ache@ache.one>	2020-11-23 03:41:54 +0100
commit	451d7bf0db58d42afc5a5086353558d227040dff (patch)
tree	3cf473a791ba425e09da566a142f9bb32da692e6
parent	Documentation README (diff)