aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorache <ache@ache.one>2021-08-31 08:21:42 +0200
committerache <ache@ache.one>2021-08-31 08:21:42 +0200
commiteed13c1b587c292f86d4b302918418ce78637126 (patch)
treea2c008669214d2e9122c1d5b33c1bf079d9b4ecd
parentFix argument name (diff)
Create a new database
-rw-r--r--Makefile32
-rwxr-xr-xdownload/download.py16
-rw-r--r--download/dump2msgp.py12
-rw-r--r--download/msgPack2sqlite_msgPack.py7
4 files changed, 52 insertions, 15 deletions
diff --git a/Makefile b/Makefile
index 72046f0..ab4165e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,21 +1,33 @@
-DIR_INSTALL_PATH=/usr/share/dfr/
-DIR_BIN=/usr/sbin/
+DIR_INSTALL_PATH=/usr/share/dfr
+DIR_BIN=/usr/sbin
install:
+ # Testing root
+ @[ "$(shell id -u)" = 0 ] || echo "You *may need root* to run that target"
+ @[ ! "$(shell id -u)" = 0 ] || echo "You are root"
+
+ # Install to ${DIR_INSTALL_PATH}
mkdir -p ${DIR_INSTALL_PATH}
- # Copy everything to ${DIR_INSTALL_PATH}
- cp -u *.py ${DIR_INSTALL_PATH}
+ @ echo ''
+ # Copy code then assets
+ cp -u *.py ${DIR_INSTALL_PATH}/
cp -r download ${DIR_INSTALL_PATH}/download
cp -r assets ${DIR_INSTALL_PATH}/assets
+ @[ -f frwiktionary-latest-pages-meta-current.xml.bz2 ] && \
+ cp -u frwiktionary-latest-pages-meta-current.xml.bz2 ${DIR_INSTALL_PATH}/
#
- # Disabled DEBUG to production
+ # Disabled DEBUG in production
sed -i 's/DEBUG = True/DEBUG = False/' ${DIR_INSTALL_PATH}/*.py
- # Get the external assets
- cd ${DIR_INSTALL_PATH}; python ${DIR_INSTALL_PATH}/download/download.py -d -o "${DIR_INSTALL_PATH}/assets/dicofr.db"
+ # Get the external assets if needed
+ cd ${DIR_INSTALL_PATH}; python ${DIR_INSTALL_PATH}/download/download.py -d -o "${DIR_INSTALL_PATH}/assets/dfr.db"
# Set permission and install command
- chmod +x ${DIR_INSTALL_PATH}/dicofr.py || sudo chmod +x ${DIR_INSTALL_PATH}/dicofr.py
- ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dfr || sudo ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dfr
- chmod +x ${DIR_BIN}/dfr || sudo chmod +x ${DIR_BIN}/dfr
+ chmod +x ${DIR_INSTALL_PATH}/dicofr.py
+ # delete symlink if it exists
+ [ -h ${DIR_BIN}/dfr ] && unlink ${DIR_BIN}/dfr
+ # create it
+ ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dfr
+ chmod +x ${DIR_BIN}/dfr
+ @echo 'Enjoy dfr !'
uninstall:
rm -R ${DIR_INSTALL_PATH}
diff --git a/download/download.py b/download/download.py
index 18a60fe..b97bb8f 100755
--- a/download/download.py
+++ b/download/download.py
@@ -45,18 +45,22 @@ if __name__ == '__main__':
parser.add_argument('-d', '--download', dest='download', action='store_true',
help='to download the lastest dump')
+ download = True
+
arg = parser.parse_args()
- download = True
if not arg.wordList:
arg.wordList = arg.outputF + '.wordlist'
- if download and arg.dumpF:
+ if arg.download and arg.dumpF:
print('''Incompatible options '-i' and '-d'.''')
exit(1)
- elif download:
+ elif arg.download:
arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:]
+ elif arg.dumpF:
+ download = False
+
if not arg.dumpF or not arg.dumpF.endswith('bz2'):
print('A bz2 dump file filename needed', file=sys.stderr)
@@ -69,6 +73,7 @@ if __name__ == '__main__':
download = False
if download:
+ print(download);
print(f'Downloading the dump ({arg.dumpF})\nIt should take some time')
try:
urllib.request.urlretrieve(URL_DUMP, arg.dumpF)
@@ -83,7 +88,10 @@ if __name__ == '__main__':
exit(-1)
if not exists(arg.dumpF):
- print('Download failed.\nExiting.', file=sys.stderr)
+ if download:
+ print('Download failed.\nExiting.', file=sys.stderr)
+ else:
+ print(f'Fichier { arg.dumpF } introuvable.\nArrêt.')
exit(-2)
decompress = False
diff --git a/download/dump2msgp.py b/download/dump2msgp.py
index 70b483c..c0186af 100644
--- a/download/dump2msgp.py
+++ b/download/dump2msgp.py
@@ -255,6 +255,7 @@ def extractAll(f, errorF, ignore):
hasForbidden = False
hasText = False
tf = None
+ isEnd = False
dict_ = dict()
@@ -271,12 +272,16 @@ def extractAll(f, errorF, ignore):
hasText = False
isFr = False
title = ""
+ isEnd = False
elif "</page>" in line:
tf = None
hasForbidden = False
hasText = False
isFr = False
title = ""
+ isEnd = False
+ if isEnd:
+ continue
if "<title>" in line:
title = line[line.find('>') + 1:]
@@ -308,7 +313,12 @@ def extractAll(f, errorF, ignore):
elif not hasForbidden and "== {{langue|" in line:
isFr = False
if not hasForbidden and isFr and tf:
- tf.write(line)
+ try:
+ ind = line.index('</text>')
+ tf.write(line[:ind])
+ isEnd = True
+ except:
+ tf.write(line)
return dict_
diff --git a/download/msgPack2sqlite_msgPack.py b/download/msgPack2sqlite_msgPack.py
index 38d34cd..c08efdb 100644
--- a/download/msgPack2sqlite_msgPack.py
+++ b/download/msgPack2sqlite_msgPack.py
@@ -1,4 +1,5 @@
import msgpack
+import os
import sys
import sqlite3
@@ -6,6 +7,12 @@ import argparse
def writeDB(outputF, data):
+ # Delete if exists
+ try:
+ os.remove(outputF)
+ except OSError:
+ pass
+
with sqlite3.connect(outputF) as con:
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS entry (