aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorache <ache@ache.one>2020-11-23 03:41:54 +0100
committerache <ache@ache.one>2020-11-23 03:41:54 +0100
commit451d7bf0db58d42afc5a5086353558d227040dff (patch)
tree3cf473a791ba425e09da566a142f9bb32da692e6
parentDocumentation README (diff)
Update everything
-rw-r--r--Makefile2
-rw-r--r--README.md19
-rwxr-xr-xdicofr.py22
-rw-r--r--download/bz2toDB.py25
-rwxr-xr-xdownload/download.py130
-rw-r--r--download/dump2msgp.py (renamed from dump2msgp.py)114
-rw-r--r--download/msgPack2sqlite_msgPack.py (renamed from msgPack2sqlite_msgPack.py)50
-rw-r--r--download/sectionList.py (renamed from sectionList.py)0
-rw-r--r--download/template.py (renamed from template.py)0
-rw-r--r--getSection.py129
-rw-r--r--requirements.txt3
-rw-r--r--ui.py35
-rw-r--r--web.py16
13 files changed, 320 insertions, 225 deletions
diff --git a/Makefile b/Makefile
index 33a63fd..95d438f 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ DIR_BIN=/usr/bin/
install:
mkdir -p ${DIR_INSTALL_PATH}
- cp -u *.py wiktfr.sql ${DIR_INSTALL_PATH}
+ cp -u *.py *.sql ${DIR_INSTALL_PATH}
cp -u dicofr.py ${DIR_BIN}/dicofr
chmod +rw ${DIR_BIN}/dicofr
diff --git a/README.md b/README.md
index 6d9a041..dfa1c84 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,6 @@ A bunch of **Python** scripts to transform wiktionary archive dump to MySQL data
- With a simple CLI.
- With a simple WUI, flask based.
- Regex support.
- -
How to create the database
--------------------------
@@ -32,12 +31,12 @@ This file is interesting for developers not for end users.
It's a serialization of the internal used dictionary (python dictionary).
~~~shell
-$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml.bz2
+$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml
~~~
Then, you can create the SQLite database file.
~~~shell
-$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk -o dicofr.db
+$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk
~~~
You can then use `dicofr.py` to search a word from the CLI or use the WUI with the command:
@@ -93,6 +92,20 @@ juliennois
juliens
~~~
+Or the WUI :
+
+~~~shell
+$ python web.py
+~~~
+
+Why only french ?
+-----------------
+
+Because that's the only language I'm able to tackle.
+I can't verify anything about others languages.
+
+Feel free to contribute.
+
How to contribute ?
-------------------
diff --git a/dicofr.py b/dicofr.py
index 9bef7ac..7e6888d 100755
--- a/dicofr.py
+++ b/dicofr.py
@@ -1,16 +1,32 @@
#!/bin/env python
+"""
+# Main file.
+
+A program to retrieve the definition of a french word (in french).
+
+Maybe extended to other languages later.
+"""
+
import sys
import argparse
import msgpack
import sqlite3
from os.path import exists
+import ui
+
+
+debug = False
+
+if debug:
+ DIR_PATH = 'assets/'
+else:
+ DIR_PATH = '/usr/share/dicofr/assets/'
-DIR_PATH = '/usr/share/dicofr'
sys.path.insert(-1, DIR_PATH)
-import ui
+
dico = 'dicofr.db'
@@ -102,8 +118,6 @@ if __name__ == '__main__':
const=get_def_sql_reg, default=get_def_sql,
help='search a definition using SQL regex, '
'_ to match a letter, %% to match a group of letters')
- parser.add_argument('-w', '--wordlist', dest='wordList',
- action='store_const', default='list_word.msgpack')
parser.add_argument('-m', '--matching', dest='matching', action='store_true',
help='search the french words that match the regex')
parser.add_argument('word', metavar='PATTERN', type=str,
diff --git a/download/bz2toDB.py b/download/bz2toDB.py
new file mode 100644
index 0000000..1fddd85
--- /dev/null
+++ b/download/bz2toDB.py
@@ -0,0 +1,25 @@
+import bz2
+import sys
+
+
+def unbz2(file):
+ decomp = bz2.BZ2Decompressor()
+ buf = b''
+ for c in file:
+ buf += decomp.decompress(c)
+
+ while b'\n' in buf:
+ i = buf.index(b'\n')
+ if i + 1 < len(buf):
+ ret = buf[:i + 1]
+ buf = buf[i + 1:]
+ yield ret.decode("utf-8")
+ else:
+ yield buf
+ buf = b''
+
+
+with open('./wiktionary_dump.xml.bz2', 'rb') as f:
+ it = iter(lambda: f.read(32768), b'')
+ for a in unbz2(it):
+ print(a, end='')
diff --git a/download/download.py b/download/download.py
new file mode 100755
index 0000000..a941b90
--- /dev/null
+++ b/download/download.py
@@ -0,0 +1,130 @@
+import argparse
+import sys
+import urllib.request
+import dump2msgp
+import msgPack2sqlite_msgPack
+import subprocess
+
+from os.path import exists
+
+
+URL_DUMP = "https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2"
+
+
+def unbz2(file):
+ decomp = bz2.BZ2Decompressor()
+ buf = b''
+ for c in file:
+ buf += decomp.decompress(c)
+
+ while b'\n' in buf:
+ i = buf.index(b'\n')
+ if i + 1 < len(buf):
+ ret = buf[:i + 1]
+ buf = buf[i + 1:]
+ yield ret.decode("utf-8")
+ else:
+ yield buf.decode("utf-8")
+ buf = b''
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Download and create the database')
+ parser.add_argument('-o', '--out', dest='outputF', action='store',
+ help='the output, the database file',
+ default='dicofr.db')
+ parser.add_argument('-i', '--in', dest='dumpF', action='store',
+ help='the input dump file\'s filename',
+ default='')
+ parser.add_argument('-d', '--download', dest='download', action='store_true',
+ help='to download the lastest dump')
+
+ arg = parser.parse_args()
+
+ download = True
+
+ if download and arg.dumpF:
+ print("Incompatible options '-i' and '-d'.")
+ exit(1)
+ elif download:
+ arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:]
+
+ if not arg.dumpF or not arg.dumpF.endswith('bz2'):
+ print('A bz2 dump file filename needed', file=sys.stderr)
+ exit(-1)
+
+ if exists(arg.dumpF) and download:
+ print(f"{arg.dumpF} exists. Force downloading ? (y/N)")
+ answer = input('> ')
+ if answer.lower()[0] != 'y':
+ download = False
+
+ if download:
+ print(f"Downloading the dump ({arg.dumpF})\nIt should take some time")
+ urllib.request.urlretrieve(URL_DUMP, arg.dumpF)
+
+ if not exists(arg.dumpF):
+ print('Download failed.\nExiting.', file=sys.stderr)
+ exit(-2)
+
+ decompress = False
+
+ try:
+ print("Trying the bzip2 command")
+ assert(subprocess.call(['bzip2', '-d', arg.dumpF]) == 0)
+ decompress = True
+ except:
+ print("The command “bzip” doesn't exists, or doesn't work as intended")
+ print("Fallback to Python bz2 module decompressor")
+
+ # Decompression using bzip2
+ if not decompress:
+ try:
+ import bz2
+ with open(arg.dumpF, 'rb') as f:
+ it = iter(lambda: f.read(2**16), b'')
+
+ output_fn = arg.dumpF[:-4]
+
+ with open(output_fn, 'wb') as fout:
+ dcomp = bz2.BZ2Decompressor()
+ for chunk in it:
+ datal = len(chunk)
+ data = dcomp.decompress(chunk)
+ fout.write(data)
+ decompress = True
+ except:
+ print("Python bz2 module decompressor failed, maybe you don't have any space available")
+ print("Fallback to on the fly decompressor (RAM will be needed)")
+
+ if not decompress:
+ try:
+ # On the fly Decompression
+ with open(arg.dumpF, 'rb') as f:
+ it = iter(lambda: f.read(2**16), b'')
+ print("Data extraction on the fly")
+ res = dump2msgp.extractAll(unbz2(it), "error.log", False)
+ msgPack2sqlite_msgPack.writeDB(arg.outputF, res)
+ print(f"Database { arg.outputF } created ! 👏 🎉")
+ except:
+ print("Error: Can't extract the dump file")
+ print("Exiting (-1)")
+ exit(-1)
+
+ print(f"Removing temporary files")
+ os.remove(arg.dumpF)
+ else:
+ try:
+ output_fn = arg.dumpF[:-4]
+ with open(output_fn, 'r') as f:
+ print("Create the database")
+ res = dump2msgp.extractAll(f, "error.log", False)
+ msgPack2sqlite_msgPack.writeDB(arg.outputF, res)
+ print(f"Database { arg.outputF } created ! 👏 🎉")
+ except:
+ print("Failed to extract database")
+ print(("Exiting (-3)")
+ exit(-3)
+
+ print(f"Removing temporary files")
+ os.remove(output_fn)
diff --git a/dump2msgp.py b/download/dump2msgp.py
index d4fb050..70b483c 100644
--- a/dump2msgp.py
+++ b/download/dump2msgp.py
@@ -2,8 +2,9 @@ import tempfile as tmp
import re
import sys
import msgpack
+import argparse
-from listSection import listInfoSection
+from sectionList import listInfoSection
from template import template
@@ -13,6 +14,9 @@ Extract words from the Wiktionnary archive
"""
+DEFAULT_OUTPUT = 'dicofr.msgpk'
+
+
template_second = ['link', 'bd', 'pc', 'nom w pc', 'w', 'smcp', 'lien', 'ws',
'in', 'siècle2', 'fchim', 'nobr', 'wp', 'r',
'clé de tri', 'contexte', 'emploi', 'l', 'polytonique',
@@ -36,7 +40,6 @@ template_second_lambda_snd = {
'ème': (lambda x: '^{' + x if x else 'ème' + '}'),
'Ier': (lambda x: '^{' + x if x else 'Ier' + '}'),
'III': (lambda x: '^{' + x if x else 'III' + '}'),
- 'III': (lambda x: '^{' + x if x else 'III' + '}'),
'small': (lambda x: '_{' + x if x else '' + '}'),
'indice': (lambda x: '_{' + x if x else '' + '}'),
'graphie': (lambda x: '«' + x if x else '»'),
@@ -62,7 +65,7 @@ dictMatch = {x['match']: i for (i, x) in enumerate(listInfoSection)}
interdit = " :"
-def transclusion(trans, info):
+def transclusion(trans, info, errorF):
trans = trans[2:-2]
while '{{' in trans:
@@ -73,7 +76,7 @@ def transclusion(trans, info):
else:
l1 += 2
t = trans[l0:l1]
- t = transclusion(t, info)
+ t = transclusion(t, info, errorF)
trans = trans[:l0] + t + trans[l1:]
s = list(map(lambda x: x.strip(), trans.split('|')))
@@ -81,8 +84,7 @@ def transclusion(trans, info):
return template[s[0]]
if s[0].lower() in template_second:
-
- return s[1] if len(s) > 1 else title
+ return s[1] if len(s) > 1 else info['mot']
if s[0].lower().startswith('citation'):
cit = s[0].split('/')
@@ -95,7 +97,7 @@ def transclusion(trans, info):
if len(cit) <= 2:
return ''
else:
- return '/'.join(c[1:])
+ return '/'.join(cit[1:])
if s[0].lower() in template_second_lambda_snd:
return template_second_lambda_snd[s[0].lower()](s[1] if len(s) > 1 else '')
@@ -103,14 +105,15 @@ def transclusion(trans, info):
if s[0].lower() in template_second_lambda_trd:
return template_second_lambda_trd[s[0].lower()](s[2] if len(s) > 2 else '')
- with open('wiki_err.log', 'a') as err:
- print(s[0], file=err)
-# print("Incompréhension de la transclusion {} du mot {}".format(trans,
-# info['mot']), file=err)
+ if errorF:
+ with open(errorF, 'a') as err:
+ print(s[0], file=err)
+ print("Incompréhension de la transclusion {} du mot {}".format(trans,
+ info['mot']), file=err)
return ''
-def extract(f, w):
+def extract(f, w, errorF):
infoFin = []
toRead = True
@@ -140,13 +143,13 @@ def extract(f, w):
info['cat-gram'] = nat
toRead = True
break
- except e:
- with open('wiki_err.log', 'a') as err:
- print("^[1] Problème à l'initialisation du mot {}:"
- " {}".format(info['mot'], e), file=err)
- print('line: [{}]'.format(line, e), file=err)
- e = sys.exc_info()[0]
- print("Erreur :", e, file=err)
+ except Exception as e:
+ if errorF:
+ with open(errorF, 'a') as err:
+ print("^[1] Problème à l'initialisation du mot {mot}: {e}", file=err)
+ print(f'line: [{line}]: {e}', file=err)
+ e = sys.exc_info()[0]
+ print("Erreur :", e, file=err)
if not toRead:
break
@@ -184,23 +187,23 @@ def extract(f, w):
info['genre'] = 'fem'
if line.startswith('# '):
- info['def'].append({'def': wikiToMd(line[2:], info)})
+ info['def'].append({'def': wikiToMd(line[2:], info, errorF)})
elif line.startswith('#* '):
if not info['def']:
with open('wiki_err.log', 'a') as err:
print("Exemple sans définition pour le mot {}".format(
info['mot']), file=err)
elif 'ex' in info['def'][-1]:
- info['def'][-1]['ex'].append(wikiToMd(line[3:], info))
+ info['def'][-1]['ex'].append(wikiToMd(line[3:], info, errorF))
else:
- info['def'][-1]['ex'] = [wikiToMd(line[3:], info)]
+ info['def'][-1]['ex'] = [wikiToMd(line[3:], info, errorF)]
elif line.startswith('#') and not line.startswith('##'):
- info['def'].append({'def': wikiToMd(line[1:], info)})
+ info['def'].append({'def': wikiToMd(line[1:], info, errorF)})
if line.startswith('==='):
goBack = len(line)
break
if goBack:
- tf.seek(tf.tell() - goBack)
+ f.seek(f.tell() - goBack)
goBack = 0
toRead = True
infoFin.append(info)
@@ -208,7 +211,7 @@ def extract(f, w):
return infoFin
-def wikiToMd(line, info):
+def wikiToMd(line, info, errorF):
line = line.strip()
# 3 Étapes:
# - Links [...]
@@ -224,7 +227,7 @@ def wikiToMd(line, info):
else:
l1 += 2
trans = line[l0:l1]
- trans = transclusion(trans, info)
+ trans = transclusion(trans, info, errorF)
line = line[:l0] + trans + line[l1:]
# Links !
@@ -246,7 +249,7 @@ def wikiToMd(line, info):
return line
-with open("./fr_wiktionary_all.xml", 'r') as f:
+def extractAll(f, errorF, ignore):
title = ""
isFr = False
hasForbidden = False
@@ -258,12 +261,7 @@ with open("./fr_wiktionary_all.xml", 'r') as f:
for line in f:
if "</page>" in line and tf:
tf.seek(0)
- i = extract(tf, title)
-
- for w in i:
- if w['mot'] == 'président':
- print("What we exstract from it:")
- print(i)
+ i = extract(tf, title, errorF)
dict_[title] = i
tf.close()
@@ -287,23 +285,55 @@ with open("./fr_wiktionary_all.xml", 'r') as f:
for c in interdit:
if c in title:
hasForbidden = True
- if not hasForbidden and "<text xml:space=\"preserve\">" in line:
+ if not hasForbidden and "<text bytes=\"" in line and "\" xml:space=\"preserve\">" in line:
hasText = True
if not hasForbidden and "== {{langue|fr}}" in line and hasText:
isFr = True
if tf:
- print("Erreur tf encore ouvert !")
- exit(-1)
- tf = tmp.NamedTemporaryFile(mode="w+t")
- # print(title)
+ if not ignore:
+ if errorF:
+ with open(errorF, 'a') as err:
+ print(f"{title}: Erreur tf encore ouvert !",
+ file=err)
+ else:
+ print(f"{title}: Erreur tf encore ouvert !")
+ tf.seek(0)
+ while line2 := tf.readline():
+ print(line2, end='')
+ print(line)
+
+ exit(-1)
+ else:
+ tf = tmp.NamedTemporaryFile(mode="w+t")
elif not hasForbidden and "== {{langue|" in line:
isFr = False
if not hasForbidden and isFr and tf:
tf.write(line)
- print("Will save the result")
+ return dict_
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='wiktionary dump to msgpack')
+ parser.add_argument('-o', '--out', dest='outputF', action='store_const',
+ const=DEFAULT_OUTPUT, default=DEFAULT_OUTPUT,
+ help='the output filename')
+ parser.add_argument('-i', '--input', dest='inputF', action='store',
+ help='the input filename, a dump of witionary')
+ parser.add_argument('-e', '--error', dest='errorF', action='store',
+ help='the filename to log errors')
+ parser.add_argument('--ignore', dest='ignoreError', action='store_true',
+ help='the filename to log errors')
+
+ arg = parser.parse_args()
+
+ if arg.inputF is None:
+ print('A wiktionary dump is needed', file=sys.stderr)
+ exit(-1)
- with open('result_all.pack', 'wb') as f:
- to_w = msgpack.packb(dict_)
- f.write(to_w)
+ with open(arg.inputF, 'r') as f:
+ res = extractAll(f, arg.errorF, arg.ignoreError)
+ with open(arg.outputF, 'wb') as f:
+ to_w = msgpack.packb(res)
+ f.write(to_w)
diff --git a/msgPack2sqlite_msgPack.py b/download/msgPack2sqlite_msgPack.py
index c251d59..38d34cd 100644
--- a/msgPack2sqlite_msgPack.py
+++ b/download/msgPack2sqlite_msgPack.py
@@ -1,34 +1,12 @@
-# To load with python interpreter
-
import msgpack
-import ui
import sys
import sqlite3
import argparse
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='wiktionary dump msgpack '
- 'to SQLite database file')
- parser.add_argument('-o', '--out', dest='outputF', action='store',
- help='the output filename')
- parser.add_argument('-i', '--input', dest='inputF', action='store',
- help='the input filename, a dump of witionary')
-
- arg = parser.parse_args()
-
- if arg.inputF is None:
- print('Error input file needed', file=sys.stderr)
- if arg.outputF is None:
- print('Error output file needed', file=sys.stderr)
-
- with open(arg.inputF, 'rb') as f:
- r = f.read()
-
- d = p = msgpack.unpackb(r, raw=False)
- del r
- with sqlite3.connect(arg.outputF) as con:
+def writeDB(outputF, data):
+ with sqlite3.connect(outputF) as con:
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS entry (
word TEXT,
@@ -41,7 +19,7 @@ if __name__ == '__main__':
ID INTEGER PRIMARY KEY)''')
con.commit()
- for w, listW in d.items():
+ for w, listW in data.items():
for word in listW:
data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']),
word['genre'], word['accord'],
@@ -51,6 +29,24 @@ if __name__ == '__main__':
con.commit()
-def give_def(w):
- ui.show_terminal(d[w])
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='wiktionary dump msgpack '
+ 'to SQLite database file')
+ parser.add_argument('-o', '--out', dest='outputF', action='store',
+ help='the output filename')
+ parser.add_argument('-i', '--input', dest='inputF', action='store',
+ help='the input filename, a dump of witionary')
+
+ arg = parser.parse_args()
+
+ if arg.inputF is None:
+ print('Error input file needed', file=sys.stderr)
+ if arg.outputF is None:
+ print('Error output file needed', file=sys.stderr)
+
+ with open(arg.inputF, 'rb') as f:
+ r = f.read()
+ d = p = msgpack.unpackb(r, raw=False)
+ del r
+ writeDB(arg.outputF, d)
diff --git a/sectionList.py b/download/sectionList.py
index 68dd657..68dd657 100644
--- a/sectionList.py
+++ b/download/sectionList.py
diff --git a/template.py b/download/template.py
index fa0394a..fa0394a 100644
--- a/template.py
+++ b/download/template.py
diff --git a/getSection.py b/getSection.py
deleted file mode 100644
index 62b74be..0000000
--- a/getSection.py
+++ /dev/null
@@ -1,129 +0,0 @@
-listInfoSection= [
- {'type': 'adjectif', 'match': 'adj', 'o': 'adjectif'},
- {'type': 'adjectif', 'match': 'adjectif', 'o': 'adjectif'},
- {'type': 'adjectif', 'match': 'adj-dém', 'o': 'adjectif démonstratif'},
- {'type': 'adjectif', 'match': 'adjectif démonstratif', 'o': 'adjectif démonstratif'},
- {'type': 'adjectif', 'match': 'adjectif exclamatif', 'o': 'adjectif exclamatif'},
- {'type': 'adjectif', 'match': 'adjectif indéfini', 'o': 'adjectif indéfini'},
- {'type': 'adjectif', 'match': 'adjectif interrogatif', 'o': 'adjectif interrogatif'},
- {'type': 'adjectif', 'match': 'adjectif numéral', 'o': 'adjectif numéral'},
- {'type': 'adjectif', 'match': 'adjectif possessif', 'o': 'adjectif possessif'},
- {'type': 'adjectif', 'match': 'adjectif relatif', 'o': 'adjectif relatif'},
-
- {'type': 'adverbe', 'match': 'adv', 'o': 'adverbe'},
- {'type': 'adverbe', 'match': 'adverbe', 'o': 'adverbe'},
- {'type': 'adverbe', 'match': 'adverbe interrogatif', 'o': 'adverbe interrogatif'},
- {'type': 'adverbe', 'match': 'adverbe relatif', 'o': 'adverbe relatif'},
-
- {'type': 'article', 'match': 'article', 'o': 'article'},
- {'type': 'article', 'match': 'article défini', 'o': 'article défini'},
- {'type': 'article', 'match': 'article indéfini', 'o': 'article indéfini'},
- {'type': 'article', 'match': 'article partitif', 'o': 'article partitif'},
-
- {'type': 'conjonction', 'match': 'conjonction', 'o': 'conjonction'},
- {'type': 'conjonction', 'match': 'conjonction de coordination', 'o': 'conjonction de coordination'},
-
-
- {'type': 'erreur', 'match': 'erreur', 'o': 'faute courante'},
- {'type': 'erreur', 'match': 'faute', 'o': 'faute courante'},
-
- {'type': 'interjection', 'match': 'interj', 'o': 'interjection'},
- {'type': 'interjection', 'match': 'interjection', 'o': 'interjection'},
-
- {'type': 'interjection', 'match': 'interjection', 'o': 'interjection'},
-
- {'type': 'locuton nominale', 'match': 'loc-phr', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution nominale', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution phrase', 'o': 'locution nominale'},
- {'type': 'locuton nominale', 'match': 'locution-phrase', 'o': 'locution nominale'},
-
- {'type': 'nom', 'match': 'nom', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom1', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom2', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom3', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom4', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom5', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom6', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom7', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom8', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom9', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom10', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom11', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom12', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom13', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom14', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom15', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom16', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom17', 'o': 'nom'},
- {'type': 'nom', 'match': 'nom commun', 'o': 'nom'},
- {'type': 'nom', 'match': 'substantif', 'o': 'substantif'},
-
-
- {'type': 'nom de famille', 'match': 'nom de famille', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom de famille anglais', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom-fam', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom-pr', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom-propre', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom pr', 'o': 'nom de famille'},
- {'type': 'nom de famille', 'match': 'nom propre', 'o': 'nom de famille'},
- {'type': 'nom scientifique', 'match': 'nom scientifique', 'o': 'nom scientifique'},
- {'type': 'particule', 'match': 'particule', 'o': 'particule'},
- {'type': 'nom de famille', 'match': 'patronyme', 'o': 'nom de famille'},
- {'type': 'préfixe', 'match': 'préfixe', 'o': 'préfixe'},
- {'type': 'suffixe', 'match': 'suffixe', 'o': 'suffixe'},
- {'type': 'prénom', 'match': 'prénom', 'o': 'prénom'},
-
- {'type': 'onomatopée', 'match': 'onomatopée', 'o': 'onomatopée'},
- {'type': 'onomatopée', 'match': 'onom', 'o': 'onomatopée'},
-
- {'type': 'préposition', 'match': 'prép', 'o': 'préposition'},
- {'type': 'préposition', 'match': 'préposition', 'o': 'préposition'},
-
- {'type': 'pronom', 'match': 'pronom', 'o': 'pronom'},
- {'type': 'pronom', 'match': 'pronom démonstratif', 'o': 'pronom démonstratif'},
- {'type': 'pronom', 'match': 'pronom indéfini', 'o': 'pronom indéfini'},
- {'type': 'pronom', 'match': 'pronom interrogatif', 'o': 'pronom interrogatif'},
- {'type': 'pronom', 'match': 'pronom personnel', 'o': 'pronom personnel'},
- {'type': 'pronom', 'match': 'pronom possessif', 'o': 'pronom possessif'},
- {'type': 'pronom', 'match': 'pronom relatif', 'o': 'pronom relatif'},
-
- {'type': 'verbe', 'match': 'verb', 'o': 'verbe'},
- {'type': 'verbe', 'match': 'verbe', 'o': 'verbe'},
- {'type': 'verbe', 'match': 'verbe pronominal', 'o': 'verbe pronominal'}
-]
-
-listSections = []
-with open("./listSections", "r") as f:
- for line in f:
- line = line.strip()
- if line[-3:] != "===":
- continue
-
- s = line.find('{{')+2
- e = line.find('}}')
- if not s < e:
- continue
-
- argsStr = line[s:e]
- args = list(map(lambda x: x.strip().lower(), argsStr.split('|')))
-
- if len(args) <= 2 or not args[1]:
- continue
-
- if not args[2].startswith('fr'):
- continue
-
- classed = False
- for m in listInfoSection:
- if m['match'] == args[1]:
- classed = True
- continue
- if not classed:
- listSections.append(args[1])
-
-for section in (list(set(listSections))):
- print(section)
-
-
-
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c429cce
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+msgpack==1.0.0
+PyMySQL==0.9.3
+Flask==1.1.2
diff --git a/ui.py b/ui.py
index e2a166a..c198cfd 100644
--- a/ui.py
+++ b/ui.py
@@ -5,21 +5,22 @@ tui_show_example = True
def show_terminal(word):
- """Display the definition to the terminal
-
- @word Format:
- {
- mot: ''
- cat-gram: ''
- def: [{
- def: ''
- ex: ['', '']
- }]
- API: ''
- infos: ['', '']
- genre: ''
- accord: ''
- }
+ """
+ Display the definition to the terminal
+
+ @word Format:
+ {
+ mot: ''
+ cat-gram: ''
+ def: [{
+ def: ''
+ ex: ['', '']
+ }]
+ API: ''
+ infos: ['', '']
+ genre: ''
+ accord: ''
+ }
"""
indent = tui_indent * ' '
print(indent + word['mot'])
@@ -45,7 +46,3 @@ def show_terminal(word):
print(indent + '\t\t * ' + ex)
print('')
-
-def show_web(word):
- """Display the definition in HTML format"""
- pass
diff --git a/web.py b/web.py
index c5e0b1c..82ad696 100644
--- a/web.py
+++ b/web.py
@@ -1,3 +1,7 @@
+"""
+A simple Web application to serve dicofr
+"""
+
from flask import Flask, request, Response, send_file
import msgpack
# from flask_cors import CORS
@@ -20,17 +24,26 @@ app.config.from_object(__name__)
@app.route('/', methods=['GET'])
def index_client():
+ """
+ Send the single file
+ """
return send_file("index.html", mimetype='text/html')
def get_def_reg(w):
+ """
+ Search a word, can deal with regex and casse problem.
+ """
if res := dicofr.get_def_sql_reg(w):
return msgpack.packb(res)
+
# Recherche du mot en minuscule
elif res := dicofr.get_def_sql_reg(w.lower()):
return msgpack.packb(res)
+
# Recherche du mot en nom propre
elif res := dicofr.get_def_sql_reg(w.title()):
return msgpack.packb(res)
+
else:
return Response("", status=404)
@@ -38,6 +51,9 @@ def get_def_reg(w):
@app.route('/def', methods=['GET'])
def get_def():
+ """
+ Retrieve a definition
+ """
w = request.args.get('w')
if '_' in w: