aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorache <ache@ache.one>2022-01-06 03:54:18 +0100
committerache <ache@ache.one>2022-01-06 03:54:18 +0100
commitf6586d1862e6fe1acb617b6219901357e8ea6345 (patch)
tree214ea64b71f25c4f6908b70b1e10c381a6bf9711
parentLicense stuffs (diff)
Ajout d'une correction orthographique
-rwxr-xr-xdfr.py90
1 files changed, 84 insertions, 6 deletions
diff --git a/dfr.py b/dfr.py
index e664264..1ed5d40 100755
--- a/dfr.py
+++ b/dfr.py
@@ -11,6 +11,7 @@ Maybe extended to other languages later.
import sys
import argparse
import msgpack
+import gzip
import sqlite3
from os.path import exists
import os
@@ -27,6 +28,60 @@ else:
dico = 'dfr.db'
+def initWordList():
+ if (not arg.wordList) or (arg.wordList == f'{DIR_PATH}/assets/wordList' and not exists(f'{DIR_PATH}/assets/wordList')):
+ create_wordlist()
+ arg.wordList = f'{DIR_PATH}/assets/wordList'
+
+
+
+def didYouMean(word):
+ A = "abcdefghijklmnopqrstuvwxyzéèïœä"
+ def var(word_i, i=1, alpha=A, memo={}):
+
+ if word_i == "":
+ for c in A:
+ memo[c] = True
+ yield c
+
+ if word_i in memo:
+ return
+
+ if i == 1:
+ yield word_i
+ return
+
+ for word in var(word_i, i - 1, alpha, memo={}):
+ if word not in memo:
+ memo[word] = True
+ yield word
+ for i in range(len(word)):
+ for c in A:
+ # Replace
+ if c != word[i]:
+ if word[:i] + c + word[i + 1:] not in memo:
+ memo[word[:i] + c + word[i + 1:]] = True
+ yield word[:i] + c + word[i + 1:]
+ # Insert
+ if word[:i] + c + word[i:] not in memo:
+ memo[word[:i] + c + word[i:]] = True
+ yield word[:i] + c + word[i:]
+
+ for i in range(len(word)):
+ if word[:i] + word[i + 1:] not in memo:
+ memo[word[:i] + word[i + 1:]] = True
+ yield word[:i] + word[i + 1:]
+
+
+ with gzip.open(arg.wordList, 'r') as f:
+ wl = f.read().decode()
+ listWord = {w for w in wl.split('\n')}
+ ret = [w for w in var(word, 2, memo={}) if w in listWord]
+ if ret == []:
+ return [w for w in var(word, 3, memo={}) if w in listWord]
+ else:
+ return ret
+
def get_def_sql(word):
with sqlite3.connect(dico) as con:
@@ -45,6 +100,16 @@ def get_def_sql(word):
}, res))
+def create_wordlist():
+ with sqlite3.connect(dico) as con:
+ cur = con.cursor()
+ cur.execute('''SELECT word FROM entry''')
+
+ with gzip.open(f"{DIR_PATH}/assets/wordList", 'w') as f:
+ to_w = list({f[0] for f in cur.fetchall()})
+ f.write(str.encode("\n".join(to_w)))
+
+
def get_def_sql_reg(word):
with sqlite3.connect(dico) as con:
cur = con.cursor()
@@ -73,9 +138,9 @@ def matching(word):
print(f'Error: Word list {arg.wordList} not found', file=sys.stderr)
return
- with open(arg.wordList, 'rb') as f:
- msgpackList = f.read()
- listWord = msgpack.unpackb(msgpackList, raw=False)
+ with gzip.open(arg.wordList, 'r') as f:
+ msgpackList = f.read().decode()
+ listWord = msgpackList.split('\n')
if word[0] != '/':
for w in listWord:
@@ -113,7 +178,7 @@ if __name__ == '__main__':
help='the word or the pattern to match')
parser.add_argument('-l', '--word-list', dest='wordList', action='store',
help='the filename of the word list',
- default=None)
+ default=f'{DIR_PATH}/assets/wordList')
parser.add_argument('-d', '--dico', dest='dico', action='store',
help='the filename of the dictionnary',
default='dfr.db')
@@ -149,6 +214,8 @@ if __name__ == '__main__':
dico = f'{DIR_PATH}/assets/{dico}'
if arg.matching:
+ initWordList()
+
ret = matching(arg.word)
for word in ret:
print(word)
@@ -157,11 +224,22 @@ if __name__ == '__main__':
else:
if arg.first:
a = arg.action(arg.word)
- if a :
+ if a:
if a[0]['def']:
a[0]['def'] = [a[0]['def'][0]]
a[0]['def'][0]['ex'] = []
ui.show_terminal(a[0])
else:
- for w in arg.action(arg.word):
+ res = arg.action(arg.word)
+ for w in res:
ui.show_terminal(w)
+ if not res:
+ print("Word not found")
+
+ initWordList()
+ listVar = didYouMean(arg.word)
+
+ if len(listVar) == 1:
+ print("Did you mean ", listVar[0])
+ elif len(listVar) > 1:
+ print("Did you mean ", ", ".join(listVar[:-1]), 'or', listVar[-1], '?')