#!/bin/env python ''' # Main file. A program to retrieve the definition of a french word (in french language). Maybe extended to other languages later. ''' import sys import argparse import msgpack import gzip import sqlite3 from os.path import exists import os import ui DEBUG = True if DEBUG: DIR_PATH = os.getcwd() else: DIR_PATH = '/usr/share/dfr/' dico = 'dfr.db' def initWordList(): if (not arg.wordList) or (arg.wordList == f'{DIR_PATH}/assets/wordList' and not exists(f'{DIR_PATH}/assets/wordList')): create_wordlist() arg.wordList = f'{DIR_PATH}/assets/wordList' def didYouMean(word): A = "abcdefghijklmnopqrstuvwxyzéèïœä" def var(word_i, i=1, alpha=A, memo={}): if word_i == "": for c in A: memo[c] = True yield c if word_i in memo: return if i == 1: yield word_i return for word in var(word_i, i - 1, alpha, memo={}): if word not in memo: memo[word] = True yield word for i in range(len(word)): for c in A: # Replace if c != word[i]: if word[:i] + c + word[i + 1:] not in memo: memo[word[:i] + c + word[i + 1:]] = True yield word[:i] + c + word[i + 1:] # Insert if word[:i] + c + word[i:] not in memo: memo[word[:i] + c + word[i:]] = True yield word[:i] + c + word[i:] # Insert at the end for c in alpha: memo[word + c] = True yield word + c for i in range(len(word)): if word[:i] + word[i + 1:] not in memo: memo[word[:i] + word[i + 1:]] = True yield word[:i] + word[i + 1:] with gzip.open(arg.wordList, 'r') as f: wl = f.read().decode() listWord = {w for w in wl.split('\n')} ret = [w for w in var(word, 2, memo={}) if w in listWord] if ret == []: return [w for w in var(word, 3, memo={}) if w in listWord] else: return ret def get_def_sql(word): with sqlite3.connect(dico) as con: cur = con.cursor() data = (word, ) cur.execute('''SELECT * FROM entry WHERE word = ?''', data) res = cur.fetchall() return list(map(lambda w: {'mot': w[0], 'cat-gram': w[1], 'API': w[2], 'infos': w[3].split('\t'), 'genre': w[4], 'accord': w[5], 'def': msgpack.unpackb(w[6], raw=False), }, res)) def create_wordlist(): with sqlite3.connect(dico) as con: cur = con.cursor() cur.execute('''SELECT word FROM entry''') with gzip.open(f"{DIR_PATH}/assets/wordList", 'w') as f: to_w = list({f[0] for f in cur.fetchall()}) f.write(str.encode("\n".join(to_w))) def get_def_sql_reg(word): with sqlite3.connect(dico) as con: cur = con.cursor() data = (word, ) cur.execute('''SELECT * FROM entry WHERE word LIKE ?''', data) res = cur.fetchall() return list(map(lambda w: {'mot': w[0], 'cat-gram': w[1], 'API': w[2], 'infos': w[3].split('\t'), 'genre': w[4], 'accord': w[5], 'def': msgpack.unpackb(w[6], raw=False), }, res)) def matching(word): ''' Find matching words in the list of words ''' matchingWord = [] if not exists(arg.wordList): print(f'Error: Word list {arg.wordList} not found', file=sys.stderr) return with gzip.open(arg.wordList, 'r') as f: msgpackList = f.read().decode() listWord = msgpackList.split('\n') if word[0] != '/': for w in listWord: if word == w: matchingWord.append(w) else: if word[-1] != '/' or len(word) <= 2: print('Erreur: Le format matching pour les regex est /MOT/', file=sys.stderr) return [] import re regex = re.compile(word[1:-1]) for w in listWord: if regex.match(w): matchingWord.append(w) return matchingWord if __name__ == '__main__': if len(sys.argv) < 2: print('''Erreur: Rechercher un mot\nUtilisez l'option -h pour avoir de l'aide''', file=sys.stderr) exit(-1) parser = argparse.ArgumentParser(description='Get a french word\'s definition') parser.add_argument('--sql', dest='action', action='store_const', const=get_def_sql_reg, default=get_def_sql, help='search a definition using SQL regex, ' '_ to match a letter, %% to match a group of letters') parser.add_argument('-m', '--matching', dest='matching', action='store_true', help='search the french words that match the regex') parser.add_argument('word', metavar='PATTERN', type=str, nargs='?', help='the word or the pattern to match') parser.add_argument('-l', '--word-list', dest='wordList', action='store', help='the filename of the word list', default=f'{DIR_PATH}/assets/wordList') parser.add_argument('-d', '--dico', dest='dico', action='store', help='the filename of the dictionnary', default='dfr.db') parser.add_argument('-f', '--first', dest='first', action='store_true', help='get only the first definition') parser.add_argument('-u', '--update', dest='update', type=str, nargs='?', const=dico, help='update the database') arg = parser.parse_args() if arg.dico: dico = arg.dico if arg.update: os.chdir(DIR_PATH) from subprocess import call if arg.wordList: r = call(['/usr/bin/env', 'python', f'{DIR_PATH}/download/download.py', '--download', '--output', dico, '--word-list', arg.wordList]) else: print(['/usr/bin/env', 'python', f'{DIR_PATH}/download/download.py', '--download', '--output', dico]) r = call(['/usr/bin/env', 'python', f'{DIR_PATH}/download/download.py', '--download', '--output', dico]) exit(r) # Si on n'arrive pas à trouver le dictionnaire if not exists(dico): if not exists(f'{DIR_PATH}/assets/{dico}'): print('Error: No sqlite dictionnary', file=sys.stderr) print(f'Default directory is set to "{DIR_PATH}"', file=sys.stderr) exit(1) else: dico = f'{DIR_PATH}/assets/{dico}' if arg.matching: initWordList() ret = matching(arg.word) for word in ret: print(word) if not ret: exit(1) else: if arg.first: a = arg.action(arg.word) if a: if a[0]['def']: a[0]['def'] = [a[0]['def'][0]] a[0]['def'][0]['ex'] = [] ui.show_terminal(a[0]) else: res = arg.action(arg.word) for w in res: ui.show_terminal(w) if not res: print("Word not found") initWordList() listVar = didYouMean(arg.word) if len(listVar) == 1: print("Did you mean ", listVar[0]) elif len(listVar) > 1: print("Did you mean ", ", ".join(listVar[:-1]), 'or', listVar[-1], '?')