diff options
| -rw-r--r-- | assets/web.js | 78 | ||||
| -rwxr-xr-x | dfr/createDB.py | 162 |
2 files changed, 136 insertions, 104 deletions
diff --git a/assets/web.js b/assets/web.js index 3801335..bc01532 100644 --- a/assets/web.js +++ b/assets/web.js @@ -1,65 +1,77 @@ const web = new Vue({ - el: '#web', + el: "#web", data: { - word: '', + word: "", timer: undefined, definitions: null, is_word: null, - placeholders: ['mot', 'bonjour', 'manger', 'rire', 'jour', 'gagner', 'chanter', 'danser', 'village', - 'France', 'baguette', 'cola', 'marguerite'] + placeholders: [ + "mot", + "bonjour", + "manger", + "rire", + "jour", + "gagner", + "chanter", + "danser", + "village", + "France", + "baguette", + "cola", + "marguerite", + ], }, methods: { - searchWord: function() { - if(!this.word) return; - if(this.timer) { + searchWord: function () { + if (!this.word) return; + if (this.timer) { clearTimeout(this.timer); this.timer = undefined; } - fetch(`/def?w=${this.word}`) - .then((response) => { - this.is_word = response.ok; + fetch(`/${this.word}`).then((response) => { + this.is_word = response.ok; - if( response.ok ) { - response.arrayBuffer().then(res => { - this.definitions = msgpack.decode(new Uint8Array(res)); - if( this.definitions && this.definitions[0] && this.definitions[0].mot) { - if(!this.word.includes('_')) { - this.word = this.definitions[0].mot; - } + if (response.ok) { + response.arrayBuffer().then((res) => { + this.definitions = json.decode(res); + if (this.Def && this.Def[0] && this.Def[0].Γ©criture) { + /* + if (!this.word.includes("_")) { + this.word = this.Def[0].mot; } - }); - } - }) + */ + } + }); + } + }); }, - rand: (min,max) => - Math.floor(Math.random()*(max-min+1)+min) - , - randomplaceholder: function() { - const r = this.rand(0, this.placeholders.length-1); + rand: (min, max) => Math.floor(Math.random() * (max - min + 1) + min), + randomplaceholder: function () { + const r = this.rand(0, this.placeholders.length - 1); const a = this.placeholders[r]; return a; }, }, watch: { - word: function(w) { - if( w === '' ) { + word: function (w) { + if (w === "") { this.is_word = null; } - if(this.timer) { + if (this.timer) { clearTimeout(this.timer); this.timer = undefined; } this.timer = setTimeout(this.searchWord, 800); - } + }, }, - mounted: function() { + mounted: function () { const param = window.location.search.substr(1); - param.split('&').forEach( p => { - if (p.startsWith('w=')) { + param.split("&").forEach((p) => { + if (p.startsWith("w=")) { this.word = p.slice(2); } }); - } + }, }); diff --git a/dfr/createDB.py b/dfr/createDB.py index acf443d..4db4f61 100755 --- a/dfr/createDB.py +++ b/dfr/createDB.py @@ -69,119 +69,137 @@ In every case, a lot of memory (RAM) is necessary to process the last wiktionary # TODO: Optimize the bz2 module process to write the msgpack file on the fly. The goal is to never store a lot of information in memory. This optimization could reduce a lot the memory (RAM) usage and possibly allow creation of the database on low memory computer (less than 2Gio). -URL_DUMP = 'https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2' +URL_DUMP = "https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2" def unbz2(file): decomp = bz2.BZ2Decompressor() - buf = b'' + buf = b"" for c in file: buf += decomp.decompress(c) - while b'\n' in buf: - i = buf.index(b'\n') + while b"\n" in buf: + i = buf.index(b"\n") if i + 1 < len(buf): - ret = buf[:i + 1] - buf = buf[i + 1:] - yield ret.decode('utf-8') + ret = buf[: i + 1] + buf = buf[i + 1 :] + yield ret.decode("utf-8") else: - yield buf.decode('utf-8') - buf = b'' + yield buf.decode("utf-8") + buf = b"" -if __name__ == '__main__': +if __name__ == "__main__": import argparse + import os + import subprocess import sys import urllib.request + from os.path import exists + import dump2msgp import msgp2sqlite - import subprocess - import os - from os.path import exists - - parser = argparse.ArgumentParser(description='Download and create the database') - parser.add_argument('-o', '--output', dest='outputF', action='store', - help='the output, the database filename', - default='dfr.db') - parser.add_argument('-i', '--input', dest='dumpF', action='store', - help='the input dump file\'s filename', - default='') - parser.add_argument('-l', '--word-list', dest='wordList', action='store', - help='the alternative output, filename of the word list', - default=None) - parser.add_argument('-d', '--download', dest='download', action='store_true', - help='to download the lastest dump') + parser = argparse.ArgumentParser(description="Download and create the database") + parser.add_argument( + "-o", + "--output", + dest="outputF", + action="store", + help="the output, the database filename", + default="dfr.db", + ) + parser.add_argument( + "-i", + "--input", + dest="dumpF", + action="store", + help="the input dump file's filename", + default="", + ) + parser.add_argument( + "-l", + "--word-list", + dest="wordList", + action="store", + help="the alternative output, filename of the word list", + default=None, + ) + parser.add_argument( + "-d", + "--download", + dest="download", + action="store_true", + help="to download the lastest dump", + ) download = True - arg = parser.parse_args() - if not arg.wordList: - arg.wordList = arg.outputF + '.wordlist' + arg.wordList = arg.outputF + ".wordlist" if arg.download and arg.dumpF: - print('''Incompatible options '-i' and '-d'.''') + print("""Incompatible options '-i' and '-d'.""") exit(1) elif arg.download: - arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:] + arg.dumpF = URL_DUMP[URL_DUMP.rindex("/") + 1 :] elif arg.dumpF: download = False - - if not arg.dumpF or not arg.dumpF.endswith('bz2'): - print('A bz2 dump file filename needed', file=sys.stderr) + if not arg.dumpF or not arg.dumpF.endswith("bz2"): + print("A bz2 dump file filename needed", file=sys.stderr) exit(-1) if exists(arg.dumpF) and download: - print(f'{arg.dumpF} exists. Force downloading ? (y/N)') - answer = input('> ') - if answer.lower()[0] != 'y': + print(f"{arg.dumpF} exists. Force downloading ? (y/N)") + answer = input("> ") + if answer.lower()[0] != "y": download = False if download: - print(download); - print(f'Downloading the dump ({arg.dumpF})\nIt should take some time') + print(download) + print(f"Downloading the dump ({arg.dumpF})\nIt should take some time") try: urllib.request.urlretrieve(URL_DUMP, arg.dumpF) except urllib.error.URLError: - print('Error: Unable to download from internet') - print(f'Check connection and source URL : ({ URL_DUMP })') - print('Exiting') + print("Error: Unable to download from internet") + print(f"Check connection and source URL : ({ URL_DUMP })") + print("Exiting") exit(-10) except: - print('Download failed.') - print('Exiting') + print("Download failed.") + print("Exiting") exit(-1) if not exists(arg.dumpF): if download: - print('Download failed.\nExiting.', file=sys.stderr) + print("Download failed.\nExiting.", file=sys.stderr) else: - print(f'Fichier { arg.dumpF } introuvable.\nArrΓͺt.') + print(f"Fichier { arg.dumpF } introuvable.\nArrΓͺt.") exit(-2) decompress = False try: - print('Trying the bzip2 command') - assert(subprocess.call(['bzip2', '-d', arg.dumpF]) == 0) + print("Trying the bzip2 command") + assert subprocess.call(["bzip2", "-d", arg.dumpF]) == 0 decompress = True except: - print('''The command "bzip" doesn't exists, or doesn't work as intended''') - print('Fallback to Python bz2 module decompressor') + print("""The command "bzip" doesn't exists, or doesn't work as intended""") + print("Fallback to Python bz2 module decompressor") # Decompression using bzip2 if not decompress: try: import bz2 - with open(arg.dumpF, 'rb') as f: - it = iter(lambda: f.read(2**16), b'') + + with open(arg.dumpF, "rb") as f: + it = iter(lambda: f.read(2**16), b"") output_fn = arg.dumpF[:-4] - with open(output_fn, 'wb') as fout: + with open(output_fn, "wb") as fout: dcomp = bz2.BZ2Decompressor() for chunk in it: datal = len(chunk) @@ -189,36 +207,38 @@ if __name__ == '__main__': fout.write(data) decompress = True except: - print('''Python bz2 module decompressor failed, maybe you don't have any space available''') - print('Fallback to on the fly decompressor (RAM will be needed)') + print( + """Python bz2 module decompressor failed, maybe you don't have any space available""" + ) + print("Fallback to on the fly decompressor (RAM will be needed)") if not decompress: try: # On the fly Decompression - with open(arg.dumpF, 'rb') as f: - it = iter(lambda: f.read(2**16), b'') - print('Data extraction on the fly') - res = dump2msgp.extractAll(unbz2(it), 'error.log', False) - with open(arg.wordList, 'wb'): - f.write('\n'.join(a.keys())) + with open(arg.dumpF, "rb") as f: + it = iter(lambda: f.read(2**16), b"") + print("Data extraction on the fly") + res = dump2msgp.extractAll(unbz2(it), "error.log", False) + with open(arg.wordList, "wb"): + f.write("\n".join(a.keys())) msgp2sqlite.writeDB(arg.outputF, res) - print(f'Word list { arg.wordList } created ! π π') - print(f'Database { arg.outputF } created ! π π') + print(f"Word list { arg.wordList } created ! π π") + print(f"Database { arg.outputF } created ! π π") except: - print('''Error: Can't extract the dump file''') - print('Exiting (-1)') + print("""Error: Can't extract the dump file""") + print("Exiting (-1)") exit(-1) - print(f'Removing temporary files') + print(f"Removing temporary files") os.remove(arg.dumpF) else: output_fn = arg.dumpF[:-4] - with open(output_fn, 'r') as f: - print('Create the database') - res = dump2msgp.extractAll(f, 'error.log', False) + with open(output_fn, "r") as f: + print("Create the database") + res = dump2msgp.extractAll(f, "error.log", False) msgp2sqlite.writeDB(arg.outputF, res) - print(f'Database { arg.outputF } created ! π π') + print(f"Database { arg.outputF } created ! π π") - print('Removing temporary files') + print("Removing temporary files") os.remove(output_fn) |