From 6eae426a7ee632a40f916286b0fa88db916c8bbc Mon Sep 17 00:00:00 2001 From: ache Date: Tue, 24 Nov 2020 08:27:44 +0100 Subject: New instructions --- Makefile | 17 ++++++++++++----- README.md | 52 +++++++++++++++++++++++++++++++++++++++++++--------- dicofr.py | 5 +++-- download/download.py | 15 +++++++++++++-- 4 files changed, 71 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 95d438f..89e3087 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,17 @@ - DIR_INSTALL_PATH=/usr/share/dicofr/ -DIR_BIN=/usr/bin/ +DIR_BIN=/usr/sbin/ install: mkdir -p ${DIR_INSTALL_PATH} - cp -u *.py *.sql ${DIR_INSTALL_PATH} - cp -u dicofr.py ${DIR_BIN}/dicofr - chmod +rw ${DIR_BIN}/dicofr + # Copy everything to ${DIR_INSTALL_PATH} + cp -u *.py ${DIR_INSTALL_PATH} + cp -r download ${DIR_INSTALL_PATH}/download + cp -r assets ${DIR_INSTALL_PATH}/assets + # + # Disabled DEBUG to production + sed -i 's/DEBUG = True/DEBUG = False/' ${DIR_INSTALL_PATH}/*.py + # Set permission and command + chmod +x ${DIR_INSTALL_PATH}/dicofr.py + ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dicofr + chmod +x ${DIR_BIN}/dicofr diff --git a/README.md b/README.md index dfa1c84..ec0ab7f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -Dicofr -====== +Dfr +=== An utility to create and query a French dictionary based on [Wiktionary archive dump](https://dumps.wikimedia.org/frwiktionary/20200601/). @@ -13,14 +13,46 @@ A bunch of **Python** scripts to transform wiktionary archive dump to MySQL data - With a simple WUI, flask based. - Regex support. +***[WUI]: Web User Interface + +Install +------- + +There is a Makefile to do that. + +~~~shell +$ git clone https://git.ache.one/dfr/ +$ cd dfr +$ make install +~~~ + +It will download a dump of wiktionary, create the sqlite database and install dfr. +Then delete the dump of wiktionary. + +You can uninstall everything with `uninstall`. +~~~shell +$ make uninstall +~~~ + How to create the database -------------------------- +You can use the script `download/download.py` to automate this process to create a sqlite database based on the lastest available dump of wiktionary (fr) : + +~~~shell +$ python download/download.py -d -o datadb.sqlite +~~~ + +It will download the dump, and create the whole database. + + +But by hand, you can do the following. + First you have to download a wiktionary archive file. For example the file `frwiktionary-20200601-pages-articles.xml.bz2` witch is a full dump of the current version of every pages. For now you have to decompress it completely before you can treat it. -The use of [bz2](https://docs.python.org/3/library/bz2.html) may be considered in the future to make this step optional and thus reduce disk usage. +The use of [bz2](https://docs.python.org/3/library/bz2.html) may be considered in the future to make this step optional and thus reduce disk usage (it's now possible with `download.py`). ~~~shell $ bunzip2 frwiktionary-20200601-pages-articles.xml.bz2 @@ -31,12 +63,12 @@ This file is interesting for developers not for end users. It's a serialization of the internal used dictionary (python dictionary). ~~~shell -$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml +$ python download/dump2msgp.py -i frwiktionary-20200601-pages-articles.xml ~~~ Then, you can create the SQLite database file. ~~~shell -$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk +$ python download/msgPack2sqlite_msgPack.py -i dicofr.msgpk ~~~ You can then use `dicofr.py` to search a word from the CLI or use the WUI with the command: @@ -50,8 +82,8 @@ How to use it You can use the CLI. ~~~shell -$ dicofr -h -usage: dicofr [-h] [--sql] [--matching] PATTERN +$ dfr -h +usage: dfr [-h] [--sql] [--matching] PATTERN Get a french word's definition. @@ -67,7 +99,7 @@ optional arguments: For example ~~~shell -$ dicofr julien +$ dfr julien julien /ʒy.ljɛ̃/, adjectif (Chronologie) Qui est lié à Jules César et à sa décision d’instaurer l’alternance entre trois années de trois cent soixante-cinq jours et une année bissextile de trois cent soixante-six jours. @@ -77,7 +109,7 @@ $ dicofr julien ~~~ ~~~shell -$ dicofr -m /julien/ +$ dfr -m /julien/ julienois juliennette juliennoises @@ -104,6 +136,8 @@ Why only french ? Because that's the only language I'm able to tackle. I can't verify anything about others languages. +**On the way to support other languages.** + Feel free to contribute. How to contribute ? diff --git a/dicofr.py b/dicofr.py index 7e6888d..68bec87 100755 --- a/dicofr.py +++ b/dicofr.py @@ -17,9 +17,9 @@ from os.path import exists import ui -debug = False +DEBUG = False -if debug: +if DEBUG: DIR_PATH = 'assets/' else: DIR_PATH = '/usr/share/dicofr/assets/' @@ -109,6 +109,7 @@ if __name__ == '__main__': if not exists(dico): if not exists(f'{DIR_PATH}/{dico}'): print('Error: No sqlite dictionnary', file=sys.stderr) + print(f'Default directory is set to "{DIR_PATH}"', file=sys.stderr) exit(1) else: dico = f'{DIR_PATH}/{dico}' diff --git a/download/download.py b/download/download.py index a941b90..cf33314 100755 --- a/download/download.py +++ b/download/download.py @@ -61,7 +61,18 @@ if __name__ == '__main__': if download: print(f"Downloading the dump ({arg.dumpF})\nIt should take some time") - urllib.request.urlretrieve(URL_DUMP, arg.dumpF) + try: + urllib.request.urlretrieve(URL_DUMP, arg.dumpF) + except urllib.error.URLError: + print("Error: Unable to download from internet") + print(f"Check connection and source URL : ({ URL_DUMP })") + print("Exiting") + exit(-10) + except: + print("Download failed.") + print("Exiting") + exit(-1) + if not exists(arg.dumpF): print('Download failed.\nExiting.', file=sys.stderr) @@ -123,7 +134,7 @@ if __name__ == '__main__': print(f"Database { arg.outputF } created ! 👏 🎉") except: print("Failed to extract database") - print(("Exiting (-3)") + print("Exiting (-3)") exit(-3) print(f"Removing temporary files") -- cgit v1.2.3