aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorache <ache@ache.one>2020-11-24 08:27:44 +0100
committerache <ache@ache.one>2020-11-24 08:27:44 +0100
commit6eae426a7ee632a40f916286b0fa88db916c8bbc (patch)
tree8265f650cecaa485d8a5c2a46286e4e1e94b9e1e
parentUpdate everything (diff)
New instructions
-rw-r--r--Makefile17
-rw-r--r--README.md52
-rwxr-xr-xdicofr.py5
-rwxr-xr-xdownload/download.py15
4 files changed, 71 insertions, 18 deletions
diff --git a/Makefile b/Makefile
index 95d438f..89e3087 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,17 @@
-
DIR_INSTALL_PATH=/usr/share/dicofr/
-DIR_BIN=/usr/bin/
+DIR_BIN=/usr/sbin/
install:
mkdir -p ${DIR_INSTALL_PATH}
- cp -u *.py *.sql ${DIR_INSTALL_PATH}
- cp -u dicofr.py ${DIR_BIN}/dicofr
- chmod +rw ${DIR_BIN}/dicofr
+ # Copy everything to ${DIR_INSTALL_PATH}
+ cp -u *.py ${DIR_INSTALL_PATH}
+ cp -r download ${DIR_INSTALL_PATH}/download
+ cp -r assets ${DIR_INSTALL_PATH}/assets
+ #
+ # Disabled DEBUG to production
+ sed -i 's/DEBUG = True/DEBUG = False/' ${DIR_INSTALL_PATH}/*.py
+ # Set permission and command
+ chmod +x ${DIR_INSTALL_PATH}/dicofr.py
+ ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dicofr
+ chmod +x ${DIR_BIN}/dicofr
diff --git a/README.md b/README.md
index dfa1c84..ec0ab7f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-Dicofr
-======
+Dfr
+===
An utility to create and query a French dictionary based on [Wiktionary archive dump](https://dumps.wikimedia.org/frwiktionary/20200601/).
@@ -13,14 +13,46 @@ A bunch of **Python** scripts to transform wiktionary archive dump to MySQL data
- With a simple WUI, flask based.
- Regex support.
+***[WUI]: Web User Interface
+
+Install
+-------
+
+There is a Makefile to do that.
+
+~~~shell
+$ git clone https://git.ache.one/dfr/
+$ cd dfr
+$ make install
+~~~
+
+It will download a dump of wiktionary, create the sqlite database and install dfr.
+Then delete the dump of wiktionary.
+
+You can uninstall everything with `uninstall`.
+~~~shell
+$ make uninstall
+~~~
+
How to create the database
--------------------------
+You can use the script `download/download.py` to automate this process to create a sqlite database based on the lastest available dump of wiktionary (fr) :
+
+~~~shell
+$ python download/download.py -d -o datadb.sqlite
+~~~
+
+It will download the dump, and create the whole database.
+
+
+But by hand, you can do the following.
+
First you have to download a wiktionary archive file.
For example the file `frwiktionary-20200601-pages-articles.xml.bz2` witch is a full dump of the current version of every pages.
For now you have to decompress it completely before you can treat it.
-The use of [bz2](https://docs.python.org/3/library/bz2.html) may be considered in the future to make this step optional and thus reduce disk usage.
+The use of [bz2](https://docs.python.org/3/library/bz2.html) may be considered in the future to make this step optional and thus reduce disk usage (it's now possible with `download.py`).
~~~shell
$ bunzip2 frwiktionary-20200601-pages-articles.xml.bz2
@@ -31,12 +63,12 @@ This file is interesting for developers not for end users.
It's a serialization of the internal used dictionary (python dictionary).
~~~shell
-$ python dump2msgp.py -i frwiktionary-20200601-pages-articles.xml
+$ python download/dump2msgp.py -i frwiktionary-20200601-pages-articles.xml
~~~
Then, you can create the SQLite database file.
~~~shell
-$ python msgPack2sqlite_msgPack.py -i dicofr.msgpk
+$ python download/msgPack2sqlite_msgPack.py -i dicofr.msgpk
~~~
You can then use `dicofr.py` to search a word from the CLI or use the WUI with the command:
@@ -50,8 +82,8 @@ How to use it
You can use the CLI.
~~~shell
-$ dicofr -h
-usage: dicofr [-h] [--sql] [--matching] PATTERN
+$ dfr -h
+usage: dfr [-h] [--sql] [--matching] PATTERN
Get a french word's definition.
@@ -67,7 +99,7 @@ optional arguments:
For example
~~~shell
-$ dicofr julien
+$ dfr julien
julien
/ʒy.ljɛ̃/, adjectif
(Chronologie) Qui est lié à Jules César et à sa décision d’instaurer l’alternance entre trois années de trois cent soixante-cinq jours et une année bissextile de trois cent soixante-six jours.
@@ -77,7 +109,7 @@ $ dicofr julien
~~~
~~~shell
-$ dicofr -m /julien/
+$ dfr -m /julien/
julienois
juliennette
juliennoises
@@ -104,6 +136,8 @@ Why only french ?
Because that's the only language I'm able to tackle.
I can't verify anything about others languages.
+**On the way to support other languages.**
+
Feel free to contribute.
How to contribute ?
diff --git a/dicofr.py b/dicofr.py
index 7e6888d..68bec87 100755
--- a/dicofr.py
+++ b/dicofr.py
@@ -17,9 +17,9 @@ from os.path import exists
import ui
-debug = False
+DEBUG = False
-if debug:
+if DEBUG:
DIR_PATH = 'assets/'
else:
DIR_PATH = '/usr/share/dicofr/assets/'
@@ -109,6 +109,7 @@ if __name__ == '__main__':
if not exists(dico):
if not exists(f'{DIR_PATH}/{dico}'):
print('Error: No sqlite dictionnary', file=sys.stderr)
+ print(f'Default directory is set to "{DIR_PATH}"', file=sys.stderr)
exit(1)
else:
dico = f'{DIR_PATH}/{dico}'
diff --git a/download/download.py b/download/download.py
index a941b90..cf33314 100755
--- a/download/download.py
+++ b/download/download.py
@@ -61,7 +61,18 @@ if __name__ == '__main__':
if download:
print(f"Downloading the dump ({arg.dumpF})\nIt should take some time")
- urllib.request.urlretrieve(URL_DUMP, arg.dumpF)
+ try:
+ urllib.request.urlretrieve(URL_DUMP, arg.dumpF)
+ except urllib.error.URLError:
+ print("Error: Unable to download from internet")
+ print(f"Check connection and source URL : ({ URL_DUMP })")
+ print("Exiting")
+ exit(-10)
+ except:
+ print("Download failed.")
+ print("Exiting")
+ exit(-1)
+
if not exists(arg.dumpF):
print('Download failed.\nExiting.', file=sys.stderr)
@@ -123,7 +134,7 @@ if __name__ == '__main__':
print(f"Database { arg.outputF } created ! 👏 🎉")
except:
print("Failed to extract database")
- print(("Exiting (-3)")
+ print("Exiting (-3)")
exit(-3)
print(f"Removing temporary files")