diff options
author | ache <ache@ache.one> | 2020-11-23 03:41:54 +0100 |
---|---|---|
committer | ache <ache@ache.one> | 2020-11-23 03:41:54 +0100 |
commit | 451d7bf0db58d42afc5a5086353558d227040dff (patch) | |
tree | 3cf473a791ba425e09da566a142f9bb32da692e6 | |
parent | Documentation README (diff) |
Update everything
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | README.md | 19 | ||||
-rwxr-xr-x | dicofr.py | 22 | ||||
-rw-r--r-- | download/bz2toDB.py | 25 | ||||
-rwxr-xr-x | download/download.py | 130 | ||||
-rw-r--r-- | download/dump2msgp.py (renamed from dump2msgp.py) | 114 | ||||
-rw-r--r-- | download/msgPack2sqlite_msgPack.py (renamed from msgPack2sqlite_msgPack.py) | 50 | ||||
-rw-r--r-- | download/sectionList.py (renamed from sectionList.py) | 0 | ||||
-rw-r--r-- | download/template.py (renamed from template.py) | 0 | ||||
-rw-r--r-- | getSection.py | 129 | ||||
-rw-r--r-- | requirements.txt | 3 | ||||
-rw-r--r-- | ui.py | 35 | ||||
-rw-r--r-- | web.py | 16 |
13 files changed, 320 insertions, 225 deletions
@@ -4,7 +4,7 @@ DIR_BIN=/usr/bin/ install: mkdir -p ${DIR_INSTALL_PATH} - cp -u *.py wiktfr.sql ${DIR_INSTALL_PATH} + cp -u *.py *.sql ${DIR_INSTALL_PATH} cp -u dicofr.py ${DIR_BIN}/dicofr chmod +rw ${DIR_BIN}/dicofr @@ -12,7 +12,6 @@ A bunch of **Python** scripts to transform wiktionary archive dump to MySQL data - With a simple CLI. - With a simple WUI, flask based. - Regex support. - - How to create the database -------------------------- @@ -32,12 +31,12 @@ This file is interesting for developers not for end users. It's a serialization of the internal used dictionary (python dictionary). ~~~shell -$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml.bz2 +$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml ~~~ Then, you can create the SQLite database file. ~~~shell -$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk -o dicofr.db +$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk ~~~ You can then use `dicofr.py` to search a word from the CLI or use the WUI with the command: @@ -93,6 +92,20 @@ juliennois juliens ~~~ +Or the WUI : + +~~~shell +$ python web.py +~~~ + +Why only french ? +----------------- + +Because that's the only language I'm able to tackle. +I can't verify anything about others languages. + +Feel free to contribute. + How to contribute ? ------------------- @@ -1,16 +1,32 @@ #!/bin/env python +""" +# Main file. + +A program to retrieve the definition of a french word (in french). + +Maybe extended to other languages later. +""" + import sys import argparse import msgpack import sqlite3 from os.path import exists +import ui + + +debug = False + +if debug: + DIR_PATH = 'assets/' +else: + DIR_PATH = '/usr/share/dicofr/assets/' -DIR_PATH = '/usr/share/dicofr' sys.path.insert(-1, DIR_PATH) -import ui + dico = 'dicofr.db' @@ -102,8 +118,6 @@ if __name__ == '__main__': const=get_def_sql_reg, default=get_def_sql, help='search a definition using SQL regex, ' '_ to match a letter, %% to match a group of letters') - parser.add_argument('-w', '--wordlist', dest='wordList', - action='store_const', default='list_word.msgpack') parser.add_argument('-m', '--matching', dest='matching', action='store_true', help='search the french words that match the regex') parser.add_argument('word', metavar='PATTERN', type=str, diff --git a/download/bz2toDB.py b/download/bz2toDB.py new file mode 100644 index 0000000..1fddd85 --- /dev/null +++ b/download/bz2toDB.py @@ -0,0 +1,25 @@ +import bz2 +import sys + + +def unbz2(file): + decomp = bz2.BZ2Decompressor() + buf = b'' + for c in file: + buf += decomp.decompress(c) + + while b'\n' in buf: + i = buf.index(b'\n') + if i + 1 < len(buf): + ret = buf[:i + 1] + buf = buf[i + 1:] + yield ret.decode("utf-8") + else: + yield buf + buf = b'' + + +with open('./wiktionary_dump.xml.bz2', 'rb') as f: + it = iter(lambda: f.read(32768), b'') + for a in unbz2(it): + print(a, end='') diff --git a/download/download.py b/download/download.py new file mode 100755 index 0000000..a941b90 --- /dev/null +++ b/download/download.py @@ -0,0 +1,130 @@ +import argparse +import sys +import urllib.request +import dump2msgp +import msgPack2sqlite_msgPack +import subprocess + +from os.path import exists + + +URL_DUMP = "https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2" + + +def unbz2(file): + decomp = bz2.BZ2Decompressor() + buf = b'' + for c in file: + buf += decomp.decompress(c) + + while b'\n' in buf: + i = buf.index(b'\n') + if i + 1 < len(buf): + ret = buf[:i + 1] + buf = buf[i + 1:] + yield ret.decode("utf-8") + else: + yield buf.decode("utf-8") + buf = b'' + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Download and create the database') + parser.add_argument('-o', '--out', dest='outputF', action='store', + help='the output, the database file', + default='dicofr.db') + parser.add_argument('-i', '--in', dest='dumpF', action='store', + help='the input dump file\'s filename', + default='') + parser.add_argument('-d', '--download', dest='download', action='store_true', + help='to download the lastest dump') + + arg = parser.parse_args() + + download = True + + if download and arg.dumpF: + print("Incompatible options '-i' and '-d'.") + exit(1) + elif download: + arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:] + + if not arg.dumpF or not arg.dumpF.endswith('bz2'): + print('A bz2 dump file filename needed', file=sys.stderr) + exit(-1) + + if exists(arg.dumpF) and download: + print(f"{arg.dumpF} exists. Force downloading ? (y/N)") + answer = input('> ') + if answer.lower()[0] != 'y': + download = False + + if download: + print(f"Downloading the dump ({arg.dumpF})\nIt should take some time") + urllib.request.urlretrieve(URL_DUMP, arg.dumpF) + + if not exists(arg.dumpF): + print('Download failed.\nExiting.', file=sys.stderr) + exit(-2) + + decompress = False + + try: + print("Trying the bzip2 command") + assert(subprocess.call(['bzip2', '-d', arg.dumpF]) == 0) + decompress = True + except: + print("The command “bzip” doesn't exists, or doesn't work as intended") + print("Fallback to Python bz2 module decompressor") + + # Decompression using bzip2 + if not decompress: + try: + import bz2 + with open(arg.dumpF, 'rb') as f: + it = iter(lambda: f.read(2**16), b'') + + output_fn = arg.dumpF[:-4] + + with open(output_fn, 'wb') as fout: + dcomp = bz2.BZ2Decompressor() + for chunk in it: + datal = len(chunk) + data = dcomp.decompress(chunk) + fout.write(data) + decompress = True + except: + print("Python bz2 module decompressor failed, maybe you don't have any space available") + print("Fallback to on the fly decompressor (RAM will be needed)") + + if not decompress: + try: + # On the fly Decompression + with open(arg.dumpF, 'rb') as f: + it = iter(lambda: f.read(2**16), b'') + print("Data extraction on the fly") + res = dump2msgp.extractAll(unbz2(it), "error.log", False) + msgPack2sqlite_msgPack.writeDB(arg.outputF, res) + print(f"Database { arg.outputF } created ! 👏 🎉") + except: + print("Error: Can't extract the dump file") + print("Exiting (-1)") + exit(-1) + + print(f"Removing temporary files") + os.remove(arg.dumpF) + else: + try: + output_fn = arg.dumpF[:-4] + with open(output_fn, 'r') as f: + print("Create the database") + res = dump2msgp.extractAll(f, "error.log", False) + msgPack2sqlite_msgPack.writeDB(arg.outputF, res) + print(f"Database { arg.outputF } created ! 👏 🎉") + except: + print("Failed to extract database") + print(("Exiting (-3)") + exit(-3) + + print(f"Removing temporary files") + os.remove(output_fn) diff --git a/dump2msgp.py b/download/dump2msgp.py index d4fb050..70b483c 100644 --- a/dump2msgp.py +++ b/download/dump2msgp.py @@ -2,8 +2,9 @@ import tempfile as tmp import re import sys import msgpack +import argparse -from listSection import listInfoSection +from sectionList import listInfoSection from template import template @@ -13,6 +14,9 @@ Extract words from the Wiktionnary archive """ +DEFAULT_OUTPUT = 'dicofr.msgpk' + + template_second = ['link', 'bd', 'pc', 'nom w pc', 'w', 'smcp', 'lien', 'ws', 'in', 'siècle2', 'fchim', 'nobr', 'wp', 'r', 'clé de tri', 'contexte', 'emploi', 'l', 'polytonique', @@ -36,7 +40,6 @@ template_second_lambda_snd = { 'ème': (lambda x: '^{' + x if x else 'ème' + '}'), 'Ier': (lambda x: '^{' + x if x else 'Ier' + '}'), 'III': (lambda x: '^{' + x if x else 'III' + '}'), - 'III': (lambda x: '^{' + x if x else 'III' + '}'), 'small': (lambda x: '_{' + x if x else '' + '}'), 'indice': (lambda x: '_{' + x if x else '' + '}'), 'graphie': (lambda x: '«' + x if x else '»'), @@ -62,7 +65,7 @@ dictMatch = {x['match']: i for (i, x) in enumerate(listInfoSection)} interdit = " :" -def transclusion(trans, info): +def transclusion(trans, info, errorF): trans = trans[2:-2] while '{{' in trans: @@ -73,7 +76,7 @@ def transclusion(trans, info): else: l1 += 2 t = trans[l0:l1] - t = transclusion(t, info) + t = transclusion(t, info, errorF) trans = trans[:l0] + t + trans[l1:] s = list(map(lambda x: x.strip(), trans.split('|'))) @@ -81,8 +84,7 @@ def transclusion(trans, info): return template[s[0]] if s[0].lower() in template_second: - - return s[1] if len(s) > 1 else title + return s[1] if len(s) > 1 else info['mot'] if s[0].lower().startswith('citation'): cit = s[0].split('/') @@ -95,7 +97,7 @@ def transclusion(trans, info): if len(cit) <= 2: return '' else: - return '/'.join(c[1:]) + return '/'.join(cit[1:]) if s[0].lower() in template_second_lambda_snd: return template_second_lambda_snd[s[0].lower()](s[1] if len(s) > 1 else '') @@ -103,14 +105,15 @@ def transclusion(trans, info): if s[0].lower() in template_second_lambda_trd: return template_second_lambda_trd[s[0].lower()](s[2] if len(s) > 2 else '') - with open('wiki_err.log', 'a') as err: - print(s[0], file=err) -# print("Incompréhension de la transclusion {} du mot {}".format(trans, -# info['mot']), file=err) + if errorF: + with open(errorF, 'a') as err: + print(s[0], file=err) + print("Incompréhension de la transclusion {} du mot {}".format(trans, + info['mot']), file=err) return '' -def extract(f, w): +def extract(f, w, errorF): infoFin = [] toRead = True @@ -140,13 +143,13 @@ def extract(f, w): info['cat-gram'] = nat toRead = True break - except e: - with open('wiki_err.log', 'a') as err: - print("^[1] Problème à l'initialisation du mot {}:" - " {}".format(info['mot'], e), file=err) - print('line: [{}]'.format(line, e), file=err) - e = sys.exc_info()[0] - print("Erreur :", e, file=err) + except Exception as e: + if errorF: + with open(errorF, 'a') as err: + print("^[1] Problème à l'initialisation du mot {mot}: {e}", file=err) + print(f'line: [{line}]: {e}', file=err) + e = sys.exc_info()[0] + print("Erreur :", e, file=err) if not toRead: break @@ -184,23 +187,23 @@ def extract(f, w): info['genre'] = 'fem' if line.startswith('# '): - info['def'].append({'def': wikiToMd(line[2:], info)}) + info['def'].append({'def': wikiToMd(line[2:], info, errorF)}) elif line.startswith('#* '): if not info['def']: with open('wiki_err.log', 'a') as err: print("Exemple sans définition pour le mot {}".format( info['mot']), file=err) elif 'ex' in info['def'][-1]: - info['def'][-1]['ex'].append(wikiToMd(line[3:], info)) + info['def'][-1]['ex'].append(wikiToMd(line[3:], info, errorF)) else: - info['def'][-1]['ex'] = [wikiToMd(line[3:], info)] + info['def'][-1]['ex'] = [wikiToMd(line[3:], info, errorF)] elif line.startswith('#') and not line.startswith('##'): - info['def'].append({'def': wikiToMd(line[1:], info)}) + info['def'].append({'def': wikiToMd(line[1:], info, errorF)}) if line.startswith('==='): goBack = len(line) break if goBack: - tf.seek(tf.tell() - goBack) + f.seek(f.tell() - goBack) goBack = 0 toRead = True infoFin.append(info) @@ -208,7 +211,7 @@ def extract(f, w): return infoFin -def wikiToMd(line, info): +def wikiToMd(line, info, errorF): line = line.strip() # 3 Étapes: # - Links [...] @@ -224,7 +227,7 @@ def wikiToMd(line, info): else: l1 += 2 trans = line[l0:l1] - trans = transclusion(trans, info) + trans = transclusion(trans, info, errorF) line = line[:l0] + trans + line[l1:] # Links ! @@ -246,7 +249,7 @@ def wikiToMd(line, info): return line -with open("./fr_wiktionary_all.xml", 'r') as f: +def extractAll(f, errorF, ignore): title = "" isFr = False hasForbidden = False @@ -258,12 +261,7 @@ with open("./fr_wiktionary_all.xml", 'r') as f: for line in f: if "</page>" in line and tf: tf.seek(0) - i = extract(tf, title) - - for w in i: - if w['mot'] == 'président': - print("What we exstract from it:") - print(i) + i = extract(tf, title, errorF) dict_[title] = i tf.close() @@ -287,23 +285,55 @@ with open("./fr_wiktionary_all.xml", 'r') as f: for c in interdit: if c in title: hasForbidden = True - if not hasForbidden and "<text xml:space=\"preserve\">" in line: + if not hasForbidden and "<text bytes=\"" in line and "\" xml:space=\"preserve\">" in line: hasText = True if not hasForbidden and "== {{langue|fr}}" in line and hasText: isFr = True if tf: - print("Erreur tf encore ouvert !") - exit(-1) - tf = tmp.NamedTemporaryFile(mode="w+t") - # print(title) + if not ignore: + if errorF: + with open(errorF, 'a') as err: + print(f"{title}: Erreur tf encore ouvert !", + file=err) + else: + print(f"{title}: Erreur tf encore ouvert !") + tf.seek(0) + while line2 := tf.readline(): + print(line2, end='') + print(line) + + exit(-1) + else: + tf = tmp.NamedTemporaryFile(mode="w+t") elif not hasForbidden and "== {{langue|" in line: isFr = False if not hasForbidden and isFr and tf: tf.write(line) - print("Will save the result") + return dict_ + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='wiktionary dump to msgpack') + parser.add_argument('-o', '--out', dest='outputF', action='store_const', + const=DEFAULT_OUTPUT, default=DEFAULT_OUTPUT, + help='the output filename') + parser.add_argument('-i', '--input', dest='inputF', action='store', + help='the input filename, a dump of witionary') + parser.add_argument('-e', '--error', dest='errorF', action='store', + help='the filename to log errors') + parser.add_argument('--ignore', dest='ignoreError', action='store_true', + help='the filename to log errors') + + arg = parser.parse_args() + + if arg.inputF is None: + print('A wiktionary dump is needed', file=sys.stderr) + exit(-1) - with open('result_all.pack', 'wb') as f: - to_w = msgpack.packb(dict_) - f.write(to_w) + with open(arg.inputF, 'r') as f: + res = extractAll(f, arg.errorF, arg.ignoreError) + with open(arg.outputF, 'wb') as f: + to_w = msgpack.packb(res) + f.write(to_w) diff --git a/msgPack2sqlite_msgPack.py b/download/msgPack2sqlite_msgPack.py index c251d59..38d34cd 100644 --- a/msgPack2sqlite_msgPack.py +++ b/download/msgPack2sqlite_msgPack.py @@ -1,34 +1,12 @@ -# To load with python interpreter - import msgpack -import ui import sys import sqlite3 import argparse -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='wiktionary dump msgpack ' - 'to SQLite database file') - parser.add_argument('-o', '--out', dest='outputF', action='store', - help='the output filename') - parser.add_argument('-i', '--input', dest='inputF', action='store', - help='the input filename, a dump of witionary') - - arg = parser.parse_args() - - if arg.inputF is None: - print('Error input file needed', file=sys.stderr) - if arg.outputF is None: - print('Error output file needed', file=sys.stderr) - - with open(arg.inputF, 'rb') as f: - r = f.read() - - d = p = msgpack.unpackb(r, raw=False) - del r - with sqlite3.connect(arg.outputF) as con: +def writeDB(outputF, data): + with sqlite3.connect(outputF) as con: cur = con.cursor() cur.execute('''CREATE TABLE IF NOT EXISTS entry ( word TEXT, @@ -41,7 +19,7 @@ if __name__ == '__main__': ID INTEGER PRIMARY KEY)''') con.commit() - for w, listW in d.items(): + for w, listW in data.items(): for word in listW: data = (w, word['cat-gram'], word['API'], "\t".join(word['infos']), word['genre'], word['accord'], @@ -51,6 +29,24 @@ if __name__ == '__main__': con.commit() -def give_def(w): - ui.show_terminal(d[w]) +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='wiktionary dump msgpack ' + 'to SQLite database file') + parser.add_argument('-o', '--out', dest='outputF', action='store', + help='the output filename') + parser.add_argument('-i', '--input', dest='inputF', action='store', + help='the input filename, a dump of witionary') + + arg = parser.parse_args() + + if arg.inputF is None: + print('Error input file needed', file=sys.stderr) + if arg.outputF is None: + print('Error output file needed', file=sys.stderr) + + with open(arg.inputF, 'rb') as f: + r = f.read() + d = p = msgpack.unpackb(r, raw=False) + del r + writeDB(arg.outputF, d) diff --git a/sectionList.py b/download/sectionList.py index 68dd657..68dd657 100644 --- a/sectionList.py +++ b/download/sectionList.py diff --git a/template.py b/download/template.py index fa0394a..fa0394a 100644 --- a/template.py +++ b/download/template.py diff --git a/getSection.py b/getSection.py deleted file mode 100644 index 62b74be..0000000 --- a/getSection.py +++ /dev/null @@ -1,129 +0,0 @@ -listInfoSection= [ - {'type': 'adjectif', 'match': 'adj', 'o': 'adjectif'}, - {'type': 'adjectif', 'match': 'adjectif', 'o': 'adjectif'}, - {'type': 'adjectif', 'match': 'adj-dém', 'o': 'adjectif démonstratif'}, - {'type': 'adjectif', 'match': 'adjectif démonstratif', 'o': 'adjectif démonstratif'}, - {'type': 'adjectif', 'match': 'adjectif exclamatif', 'o': 'adjectif exclamatif'}, - {'type': 'adjectif', 'match': 'adjectif indéfini', 'o': 'adjectif indéfini'}, - {'type': 'adjectif', 'match': 'adjectif interrogatif', 'o': 'adjectif interrogatif'}, - {'type': 'adjectif', 'match': 'adjectif numéral', 'o': 'adjectif numéral'}, - {'type': 'adjectif', 'match': 'adjectif possessif', 'o': 'adjectif possessif'}, - {'type': 'adjectif', 'match': 'adjectif relatif', 'o': 'adjectif relatif'}, - - {'type': 'adverbe', 'match': 'adv', 'o': 'adverbe'}, - {'type': 'adverbe', 'match': 'adverbe', 'o': 'adverbe'}, - {'type': 'adverbe', 'match': 'adverbe interrogatif', 'o': 'adverbe interrogatif'}, - {'type': 'adverbe', 'match': 'adverbe relatif', 'o': 'adverbe relatif'}, - - {'type': 'article', 'match': 'article', 'o': 'article'}, - {'type': 'article', 'match': 'article défini', 'o': 'article défini'}, - {'type': 'article', 'match': 'article indéfini', 'o': 'article indéfini'}, - {'type': 'article', 'match': 'article partitif', 'o': 'article partitif'}, - - {'type': 'conjonction', 'match': 'conjonction', 'o': 'conjonction'}, - {'type': 'conjonction', 'match': 'conjonction de coordination', 'o': 'conjonction de coordination'}, - - - {'type': 'erreur', 'match': 'erreur', 'o': 'faute courante'}, - {'type': 'erreur', 'match': 'faute', 'o': 'faute courante'}, - - {'type': 'interjection', 'match': 'interj', 'o': 'interjection'}, - {'type': 'interjection', 'match': 'interjection', 'o': 'interjection'}, - - {'type': 'interjection', 'match': 'interjection', 'o': 'interjection'}, - - {'type': 'locuton nominale', 'match': 'loc-phr', 'o': 'locution nominale'}, - {'type': 'locuton nominale', 'match': 'locution', 'o': 'locution nominale'}, - {'type': 'locuton nominale', 'match': 'locution nominale', 'o': 'locution nominale'}, - {'type': 'locuton nominale', 'match': 'locution phrase', 'o': 'locution nominale'}, - {'type': 'locuton nominale', 'match': 'locution-phrase', 'o': 'locution nominale'}, - - {'type': 'nom', 'match': 'nom', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom1', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom2', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom3', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom4', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom5', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom6', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom7', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom8', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom9', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom10', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom11', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom12', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom13', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom14', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom15', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom16', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom17', 'o': 'nom'}, - {'type': 'nom', 'match': 'nom commun', 'o': 'nom'}, - {'type': 'nom', 'match': 'substantif', 'o': 'substantif'}, - - - {'type': 'nom de famille', 'match': 'nom de famille', 'o': 'nom de famille'}, - {'type': 'nom de famille', 'match': 'nom de famille anglais', 'o': 'nom de famille'}, - {'type': 'nom de famille', 'match': 'nom-fam', 'o': 'nom de famille'}, - {'type': 'nom de famille', 'match': 'nom-pr', 'o': 'nom de famille'}, - {'type': 'nom de famille', 'match': 'nom-propre', 'o': 'nom de famille'}, - {'type': 'nom de famille', 'match': 'nom pr', 'o': 'nom de famille'}, - {'type': 'nom de famille', 'match': 'nom propre', 'o': 'nom de famille'}, - {'type': 'nom scientifique', 'match': 'nom scientifique', 'o': 'nom scientifique'}, - {'type': 'particule', 'match': 'particule', 'o': 'particule'}, - {'type': 'nom de famille', 'match': 'patronyme', 'o': 'nom de famille'}, - {'type': 'préfixe', 'match': 'préfixe', 'o': 'préfixe'}, - {'type': 'suffixe', 'match': 'suffixe', 'o': 'suffixe'}, - {'type': 'prénom', 'match': 'prénom', 'o': 'prénom'}, - - {'type': 'onomatopée', 'match': 'onomatopée', 'o': 'onomatopée'}, - {'type': 'onomatopée', 'match': 'onom', 'o': 'onomatopée'}, - - {'type': 'préposition', 'match': 'prép', 'o': 'préposition'}, - {'type': 'préposition', 'match': 'préposition', 'o': 'préposition'}, - - {'type': 'pronom', 'match': 'pronom', 'o': 'pronom'}, - {'type': 'pronom', 'match': 'pronom démonstratif', 'o': 'pronom démonstratif'}, - {'type': 'pronom', 'match': 'pronom indéfini', 'o': 'pronom indéfini'}, - {'type': 'pronom', 'match': 'pronom interrogatif', 'o': 'pronom interrogatif'}, - {'type': 'pronom', 'match': 'pronom personnel', 'o': 'pronom personnel'}, - {'type': 'pronom', 'match': 'pronom possessif', 'o': 'pronom possessif'}, - {'type': 'pronom', 'match': 'pronom relatif', 'o': 'pronom relatif'}, - - {'type': 'verbe', 'match': 'verb', 'o': 'verbe'}, - {'type': 'verbe', 'match': 'verbe', 'o': 'verbe'}, - {'type': 'verbe', 'match': 'verbe pronominal', 'o': 'verbe pronominal'} -] - -listSections = [] -with open("./listSections", "r") as f: - for line in f: - line = line.strip() - if line[-3:] != "===": - continue - - s = line.find('{{')+2 - e = line.find('}}') - if not s < e: - continue - - argsStr = line[s:e] - args = list(map(lambda x: x.strip().lower(), argsStr.split('|'))) - - if len(args) <= 2 or not args[1]: - continue - - if not args[2].startswith('fr'): - continue - - classed = False - for m in listInfoSection: - if m['match'] == args[1]: - classed = True - continue - if not classed: - listSections.append(args[1]) - -for section in (list(set(listSections))): - print(section) - - - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c429cce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +msgpack==1.0.0 +PyMySQL==0.9.3 +Flask==1.1.2 @@ -5,21 +5,22 @@ tui_show_example = True def show_terminal(word): - """Display the definition to the terminal - - @word Format: - { - mot: '' - cat-gram: '' - def: [{ - def: '' - ex: ['', ''] - }] - API: '' - infos: ['', ''] - genre: '' - accord: '' - } + """ + Display the definition to the terminal + + @word Format: + { + mot: '' + cat-gram: '' + def: [{ + def: '' + ex: ['', ''] + }] + API: '' + infos: ['', ''] + genre: '' + accord: '' + } """ indent = tui_indent * ' ' print(indent + word['mot']) @@ -45,7 +46,3 @@ def show_terminal(word): print(indent + '\t\t * ' + ex) print('') - -def show_web(word): - """Display the definition in HTML format""" - pass @@ -1,3 +1,7 @@ +""" +A simple Web application to serve dicofr +""" + from flask import Flask, request, Response, send_file import msgpack # from flask_cors import CORS @@ -20,17 +24,26 @@ app.config.from_object(__name__) @app.route('/', methods=['GET']) def index_client(): + """ + Send the single file + """ return send_file("index.html", mimetype='text/html') def get_def_reg(w): + """ + Search a word, can deal with regex and casse problem. + """ if res := dicofr.get_def_sql_reg(w): return msgpack.packb(res) + # Recherche du mot en minuscule elif res := dicofr.get_def_sql_reg(w.lower()): return msgpack.packb(res) + # Recherche du mot en nom propre elif res := dicofr.get_def_sql_reg(w.title()): return msgpack.packb(res) + else: return Response("", status=404) @@ -38,6 +51,9 @@ def get_def_reg(w): @app.route('/def', methods=['GET']) def get_def(): + """ + Retrieve a definition + """ w = request.args.get('w') if '_' in w: |