From e1df63db39d7fe01541f13054ad467064274f426 Mon Sep 17 00:00:00 2001 From: ache Date: Thu, 10 Dec 2020 18:49:41 +0100 Subject: Uniformisation and rename --- Makefile | 2 +- dicofr.py | 28 ++++++++++------------ download/download.py | 67 ++++++++++++++++++++++++++++------------------------ 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 8538be7..72046f0 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -DIR_INSTALL_PATH=/usr/share/dicofr/ +DIR_INSTALL_PATH=/usr/share/dfr/ DIR_BIN=/usr/sbin/ install: diff --git a/dicofr.py b/dicofr.py index 68bec87..7fa018d 100755 --- a/dicofr.py +++ b/dicofr.py @@ -1,18 +1,19 @@ #!/bin/env python -""" +''' # Main file. -A program to retrieve the definition of a french word (in french). +A program to retrieve the definition of a french word (in french language). Maybe extended to other languages later. -""" +''' import sys import argparse import msgpack import sqlite3 from os.path import exists +import os import ui @@ -20,15 +21,11 @@ import ui DEBUG = False if DEBUG: - DIR_PATH = 'assets/' + DIR_PATH = os.getcwd() else: - DIR_PATH = '/usr/share/dicofr/assets/' + DIR_PATH = '/usr/share/dfr/' -sys.path.insert(-1, DIR_PATH) - - - -dico = 'dicofr.db' +dico = 'dfr.db' def get_def_sql(word): @@ -41,7 +38,7 @@ def get_def_sql(word): return list(map(lambda w: {'mot': w[0], 'cat-gram': w[1], 'API': w[2], - 'infos': w[3].split("\t"), + 'infos': w[3].split('\t'), 'genre': w[4], 'accord': w[5], 'def': msgpack.unpackb(w[6], raw=False), @@ -58,7 +55,7 @@ def get_def_sql_reg(word): return list(map(lambda w: {'mot': w[0], 'cat-gram': w[1], 'API': w[2], - 'infos': w[3].split("\t"), + 'infos': w[3].split('\t'), 'genre': w[4], 'accord': w[5], 'def': msgpack.unpackb(w[6], raw=False), @@ -66,9 +63,9 @@ def get_def_sql_reg(word): def matching(word): - """ + ''' Find matching words in the list of words - """ + ''' matchingWord = [] @@ -86,7 +83,7 @@ def matching(word): matchingWord.append(w) else: if word[-1] != '/' or len(word) <= 2: - print("Erreur: Le format matching pour les regex est /MOT/", + print('Erreur: Le format matching pour les regex est /MOT/', file=sys.stderr) return [] import re @@ -135,4 +132,3 @@ if __name__ == '__main__': else: for w in arg.action(arg.word): ui.show_terminal(w) - diff --git a/download/download.py b/download/download.py index 04c337c..25fae89 100755 --- a/download/download.py +++ b/download/download.py @@ -1,3 +1,5 @@ +#!/bin/env python + import argparse import sys import urllib.request @@ -9,7 +11,7 @@ import os from os.path import exists -URL_DUMP = "https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2" +URL_DUMP = 'https://dumps.wikimedia.org/frwiktionary/latest/frwiktionary-latest-pages-meta-current.xml.bz2' def unbz2(file): @@ -23,17 +25,17 @@ def unbz2(file): if i + 1 < len(buf): ret = buf[:i + 1] buf = buf[i + 1:] - yield ret.decode("utf-8") + yield ret.decode('utf-8') else: - yield buf.decode("utf-8") + yield buf.decode('utf-8') buf = b'' if __name__ == '__main__': parser = argparse.ArgumentParser(description='Download and create the database') parser.add_argument('-o', '--out', dest='outputF', action='store', - help='the output, the database file', - default='dicofr.db') + help='the output, the database filename', + default='dfr.db') parser.add_argument('-i', '--in', dest='dumpF', action='store', help='the input dump file\'s filename', default='') @@ -45,7 +47,7 @@ if __name__ == '__main__': download = True if download and arg.dumpF: - print("Incompatible options '-i' and '-d'.") + print('''Incompatible options '-i' and '-d'.''') exit(1) elif download: arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:] @@ -55,26 +57,25 @@ if __name__ == '__main__': exit(-1) if exists(arg.dumpF) and download: - print(f"{arg.dumpF} exists. Force downloading ? (y/N)") + print(f'{arg.dumpF} exists. Force downloading ? (y/N)') answer = input('> ') if answer.lower()[0] != 'y': download = False if download: - print(f"Downloading the dump ({arg.dumpF})\nIt should take some time") + print(f'Downloading the dump ({arg.dumpF})\nIt should take some time') try: urllib.request.urlretrieve(URL_DUMP, arg.dumpF) except urllib.error.URLError: - print("Error: Unable to download from internet") - print(f"Check connection and source URL : ({ URL_DUMP })") - print("Exiting") + print('Error: Unable to download from internet') + print(f'Check connection and source URL : ({ URL_DUMP })') + print('Exiting') exit(-10) except: - print("Download failed.") - print("Exiting") + print('Download failed.') + print('Exiting') exit(-1) - if not exists(arg.dumpF): print('Download failed.\nExiting.', file=sys.stderr) exit(-2) @@ -82,12 +83,12 @@ if __name__ == '__main__': decompress = False try: - print("Trying the bzip2 command") + print('Trying the bzip2 command') assert(subprocess.call(['bzip2', '-d', arg.dumpF]) == 0) decompress = True except: - print("The command “bzip” doesn't exists, or doesn't work as intended") - print("Fallback to Python bz2 module decompressor") + print('''The command "bzip" doesn't exists, or doesn't work as intended''') + print('Fallback to Python bz2 module decompressor') # Decompression using bzip2 if not decompress: @@ -106,37 +107,41 @@ if __name__ == '__main__': fout.write(data) decompress = True except: - print("Python bz2 module decompressor failed, maybe you don't have any space available") - print("Fallback to on the fly decompressor (RAM will be needed)") + print('''Python bz2 module decompressor failed, maybe you don't have any space available''') + print('Fallback to on the fly decompressor (RAM will be needed)') if not decompress: try: # On the fly Decompression with open(arg.dumpF, 'rb') as f: it = iter(lambda: f.read(2**16), b'') - print("Data extraction on the fly") - res = dump2msgp.extractAll(unbz2(it), "error.log", False) + print('Data extraction on the fly') + res = dump2msgp.extractAll(unbz2(it), 'error.log', False) + with open(arg.wordList, 'wb'): + f.write('\n'.join(a.keys())) + msgPack2sqlite_msgPack.writeDB(arg.outputF, res) - print(f"Database { arg.outputF } created ! 👏 🎉") + print(f'Word list { arg.wordList } created ! 👏 🎉') + print(f'Database { arg.outputF } created ! 👏 🎉') except: - print("Error: Can't extract the dump file") - print("Exiting (-1)") + print('''Error: Can't extract the dump file''') + print('Exiting (-1)') exit(-1) - print(f"Removing temporary files") + print(f'Removing temporary files') os.remove(arg.dumpF) else: try: output_fn = arg.dumpF[:-4] with open(output_fn, 'r') as f: - print("Create the database") - res = dump2msgp.extractAll(f, "error.log", False) + print('Create the database') + res = dump2msgp.extractAll(f, 'error.log', False) msgPack2sqlite_msgPack.writeDB(arg.outputF, res) - print(f"Database { arg.outputF } created ! 👏 🎉") + print(f'Database { arg.outputF } created ! 👏 🎉') except: - print("Failed to extract database") - print("Exiting (-3)") + print('Failed to extract database') + print('Exiting (-3)') exit(-3) - print(f"Removing temporary files") + print(f'Removing temporary files') os.remove(output_fn) -- cgit v1.2.3