aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--assets/web.js78
-rwxr-xr-xdfr/createDB.py162
2 files changed, 136 insertions, 104 deletions
diff --git a/assets/web.js b/assets/web.js
index 3801335..bc01532 100644
--- a/assets/web.js
+++ b/assets/web.js
@@ -1,65 +1,77 @@
const web = new Vue({
- el: '#web',
+ el: "#web",
data: {
- word: '',
+ word: "",
timer: undefined,
definitions: null,
is_word: null,
- placeholders: ['mot', 'bonjour', 'manger', 'rire', 'jour', 'gagner', 'chanter', 'danser', 'village',
- 'France', 'baguette', 'cola', 'marguerite']
+ placeholders: [
+ "mot",
+ "bonjour",
+ "manger",
+ "rire",
+ "jour",
+ "gagner",
+ "chanter",
+ "danser",
+ "village",
+ "France",
+ "baguette",
+ "cola",
+ "marguerite",
+ ],
},
methods: {
- searchWord: function() {
- if(!this.word) return;
- if(this.timer) {
+ searchWord: function () {
+ if (!this.word) return;
+ if (this.timer) {
clearTimeout(this.timer);
this.timer = undefined;
}
- fetch(`/def?w=${this.word}`)
- .then((response) => {
- this.is_word = response.ok;
+ fetch(`/${this.word}`).then((response) => {
+ this.is_word = response.ok;
- if( response.ok ) {
- response.arrayBuffer().then(res => {
- this.definitions = msgpack.decode(new Uint8Array(res));
- if( this.definitions && this.definitions[0] && this.definitions[0].mot) {
- if(!this.word.includes('_')) {
- this.word = this.definitions[0].mot;
- }
+ if (response.ok) {
+ response.arrayBuffer().then((res) => {
+ this.definitions = json.decode(res);
+ if (this.Def && this.Def[0] && this.Def[0].Γ©criture) {
+ /*
+ if (!this.word.includes("_")) {
+ this.word = this.Def[0].mot;
}
- });
- }
- })
+ */
+ }
+ });
+ }
+ });
},
- rand: (min,max) =>
- Math.floor(Math.random()*(max-min+1)+min)
- ,
- randomplaceholder: function() {
- const r = this.rand(0, this.placeholders.length-1);
+ rand: (min, max) => Math.floor(Math.random() * (max - min + 1) + min),
+ randomplaceholder: function () {
+ const r = this.rand(0, this.placeholders.length - 1);
const a = this.placeholders[r];
return a;
},
},
watch: {
- word: function(w) {
- if( w === '' ) {
+ word: function (w) {
+ if (w === "") {
this.is_word = null;
}
- if(this.timer) {
+ if (this.timer) {
clearTimeout(this.timer);
this.timer = undefined;
}
this.timer = setTimeout(this.searchWord, 800);
- }
+ },
},
- mounted: function() {
+ mounted: function () {
const param = window.location.search.substr(1);
- param.split('&').forEach( p => {
- if (p.startsWith('w=')) {
+ param.split("&").forEach((p) => {
+ if (p.startsWith("w=")) {
this.word = p.slice(2);
}
});
- }
+ },
});
diff --git a/dfr/createDB.py b/dfr/createDB.py
index acf443d..4db4f61 100755
--- a/dfr/createDB.py
+++ b/dfr/createDB.py
@@ -69,119 +69,137 @@ In every case, a lot of memory (RAM) is necessary to process the last wiktionary
# TODO: Optimize the bz2 module process to write the msgpack file on the fly. The goal is to never store a lot of information in memory. This optimization could reduce a lot the memory (RAM) usage and possibly allow creation of the database on low memory computer (less than 2Gio).
-URL_DUMP = 'https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2'
+URL_DUMP = "https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2"
def unbz2(file):
decomp = bz2.BZ2Decompressor()
- buf = b''
+ buf = b""
for c in file:
buf += decomp.decompress(c)
- while b'\n' in buf:
- i = buf.index(b'\n')
+ while b"\n" in buf:
+ i = buf.index(b"\n")
if i + 1 < len(buf):
- ret = buf[:i + 1]
- buf = buf[i + 1:]
- yield ret.decode('utf-8')
+ ret = buf[: i + 1]
+ buf = buf[i + 1 :]
+ yield ret.decode("utf-8")
else:
- yield buf.decode('utf-8')
- buf = b''
+ yield buf.decode("utf-8")
+ buf = b""
-if __name__ == '__main__':
+if __name__ == "__main__":
import argparse
+ import os
+ import subprocess
import sys
import urllib.request
+ from os.path import exists
+
import dump2msgp
import msgp2sqlite
- import subprocess
- import os
- from os.path import exists
-
- parser = argparse.ArgumentParser(description='Download and create the database')
- parser.add_argument('-o', '--output', dest='outputF', action='store',
- help='the output, the database filename',
- default='dfr.db')
- parser.add_argument('-i', '--input', dest='dumpF', action='store',
- help='the input dump file\'s filename',
- default='')
- parser.add_argument('-l', '--word-list', dest='wordList', action='store',
- help='the alternative output, filename of the word list',
- default=None)
- parser.add_argument('-d', '--download', dest='download', action='store_true',
- help='to download the lastest dump')
+ parser = argparse.ArgumentParser(description="Download and create the database")
+ parser.add_argument(
+ "-o",
+ "--output",
+ dest="outputF",
+ action="store",
+ help="the output, the database filename",
+ default="dfr.db",
+ )
+ parser.add_argument(
+ "-i",
+ "--input",
+ dest="dumpF",
+ action="store",
+ help="the input dump file's filename",
+ default="",
+ )
+ parser.add_argument(
+ "-l",
+ "--word-list",
+ dest="wordList",
+ action="store",
+ help="the alternative output, filename of the word list",
+ default=None,
+ )
+ parser.add_argument(
+ "-d",
+ "--download",
+ dest="download",
+ action="store_true",
+ help="to download the lastest dump",
+ )
download = True
-
arg = parser.parse_args()
-
if not arg.wordList:
- arg.wordList = arg.outputF + '.wordlist'
+ arg.wordList = arg.outputF + ".wordlist"
if arg.download and arg.dumpF:
- print('''Incompatible options '-i' and '-d'.''')
+ print("""Incompatible options '-i' and '-d'.""")
exit(1)
elif arg.download:
- arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:]
+ arg.dumpF = URL_DUMP[URL_DUMP.rindex("/") + 1 :]
elif arg.dumpF:
download = False
-
- if not arg.dumpF or not arg.dumpF.endswith('bz2'):
- print('A bz2 dump file filename needed', file=sys.stderr)
+ if not arg.dumpF or not arg.dumpF.endswith("bz2"):
+ print("A bz2 dump file filename needed", file=sys.stderr)
exit(-1)
if exists(arg.dumpF) and download:
- print(f'{arg.dumpF} exists. Force downloading ? (y/N)')
- answer = input('> ')
- if answer.lower()[0] != 'y':
+ print(f"{arg.dumpF} exists. Force downloading ? (y/N)")
+ answer = input("> ")
+ if answer.lower()[0] != "y":
download = False
if download:
- print(download);
- print(f'Downloading the dump ({arg.dumpF})\nIt should take some time')
+ print(download)
+ print(f"Downloading the dump ({arg.dumpF})\nIt should take some time")
try:
urllib.request.urlretrieve(URL_DUMP, arg.dumpF)
except urllib.error.URLError:
- print('Error: Unable to download from internet')
- print(f'Check connection and source URL : ({ URL_DUMP })')
- print('Exiting')
+ print("Error: Unable to download from internet")
+ print(f"Check connection and source URL : ({ URL_DUMP })")
+ print("Exiting")
exit(-10)
except:
- print('Download failed.')
- print('Exiting')
+ print("Download failed.")
+ print("Exiting")
exit(-1)
if not exists(arg.dumpF):
if download:
- print('Download failed.\nExiting.', file=sys.stderr)
+ print("Download failed.\nExiting.", file=sys.stderr)
else:
- print(f'Fichier { arg.dumpF } introuvable.\nArrΓͺt.')
+ print(f"Fichier { arg.dumpF } introuvable.\nArrΓͺt.")
exit(-2)
decompress = False
try:
- print('Trying the bzip2 command')
- assert(subprocess.call(['bzip2', '-d', arg.dumpF]) == 0)
+ print("Trying the bzip2 command")
+ assert subprocess.call(["bzip2", "-d", arg.dumpF]) == 0
decompress = True
except:
- print('''The command "bzip" doesn't exists, or doesn't work as intended''')
- print('Fallback to Python bz2 module decompressor')
+ print("""The command "bzip" doesn't exists, or doesn't work as intended""")
+ print("Fallback to Python bz2 module decompressor")
# Decompression using bzip2
if not decompress:
try:
import bz2
- with open(arg.dumpF, 'rb') as f:
- it = iter(lambda: f.read(2**16), b'')
+
+ with open(arg.dumpF, "rb") as f:
+ it = iter(lambda: f.read(2**16), b"")
output_fn = arg.dumpF[:-4]
- with open(output_fn, 'wb') as fout:
+ with open(output_fn, "wb") as fout:
dcomp = bz2.BZ2Decompressor()
for chunk in it:
datal = len(chunk)
@@ -189,36 +207,38 @@ if __name__ == '__main__':
fout.write(data)
decompress = True
except:
- print('''Python bz2 module decompressor failed, maybe you don't have any space available''')
- print('Fallback to on the fly decompressor (RAM will be needed)')
+ print(
+ """Python bz2 module decompressor failed, maybe you don't have any space available"""
+ )
+ print("Fallback to on the fly decompressor (RAM will be needed)")
if not decompress:
try:
# On the fly Decompression
- with open(arg.dumpF, 'rb') as f:
- it = iter(lambda: f.read(2**16), b'')
- print('Data extraction on the fly')
- res = dump2msgp.extractAll(unbz2(it), 'error.log', False)
- with open(arg.wordList, 'wb'):
- f.write('\n'.join(a.keys()))
+ with open(arg.dumpF, "rb") as f:
+ it = iter(lambda: f.read(2**16), b"")
+ print("Data extraction on the fly")
+ res = dump2msgp.extractAll(unbz2(it), "error.log", False)
+ with open(arg.wordList, "wb"):
+ f.write("\n".join(a.keys()))
msgp2sqlite.writeDB(arg.outputF, res)
- print(f'Word list { arg.wordList } created ! πŸ‘ πŸŽ‰')
- print(f'Database { arg.outputF } created ! πŸ‘ πŸŽ‰')
+ print(f"Word list { arg.wordList } created ! πŸ‘ πŸŽ‰")
+ print(f"Database { arg.outputF } created ! πŸ‘ πŸŽ‰")
except:
- print('''Error: Can't extract the dump file''')
- print('Exiting (-1)')
+ print("""Error: Can't extract the dump file""")
+ print("Exiting (-1)")
exit(-1)
- print(f'Removing temporary files')
+ print(f"Removing temporary files")
os.remove(arg.dumpF)
else:
output_fn = arg.dumpF[:-4]
- with open(output_fn, 'r') as f:
- print('Create the database')
- res = dump2msgp.extractAll(f, 'error.log', False)
+ with open(output_fn, "r") as f:
+ print("Create the database")
+ res = dump2msgp.extractAll(f, "error.log", False)
msgp2sqlite.writeDB(arg.outputF, res)
- print(f'Database { arg.outputF } created ! πŸ‘ πŸŽ‰')
+ print(f"Database { arg.outputF } created ! πŸ‘ πŸŽ‰")
- print('Removing temporary files')
+ print("Removing temporary files")
os.remove(output_fn)