From eed13c1b587c292f86d4b302918418ce78637126 Mon Sep 17 00:00:00 2001 From: ache Date: Tue, 31 Aug 2021 08:21:42 +0200 Subject: Create a new database --- Makefile | 32 ++++++++++++++++++++++---------- download/download.py | 16 ++++++++++++---- download/dump2msgp.py | 12 +++++++++++- download/msgPack2sqlite_msgPack.py | 7 +++++++ 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 72046f0..ab4165e 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,33 @@ -DIR_INSTALL_PATH=/usr/share/dfr/ -DIR_BIN=/usr/sbin/ +DIR_INSTALL_PATH=/usr/share/dfr +DIR_BIN=/usr/sbin install: + # Testing root + @[ "$(shell id -u)" = 0 ] || echo "You *may need root* to run that target" + @[ ! "$(shell id -u)" = 0 ] || echo "You are root" + + # Install to ${DIR_INSTALL_PATH} mkdir -p ${DIR_INSTALL_PATH} - # Copy everything to ${DIR_INSTALL_PATH} - cp -u *.py ${DIR_INSTALL_PATH} + @ echo '' + # Copy code then assets + cp -u *.py ${DIR_INSTALL_PATH}/ cp -r download ${DIR_INSTALL_PATH}/download cp -r assets ${DIR_INSTALL_PATH}/assets + @[ -f frwiktionary-latest-pages-meta-current.xml.bz2 ] && \ + cp -u frwiktionary-latest-pages-meta-current.xml.bz2 ${DIR_INSTALL_PATH}/ # - # Disabled DEBUG to production + # Disabled DEBUG in production sed -i 's/DEBUG = True/DEBUG = False/' ${DIR_INSTALL_PATH}/*.py - # Get the external assets - cd ${DIR_INSTALL_PATH}; python ${DIR_INSTALL_PATH}/download/download.py -d -o "${DIR_INSTALL_PATH}/assets/dicofr.db" + # Get the external assets if needed + cd ${DIR_INSTALL_PATH}; python ${DIR_INSTALL_PATH}/download/download.py -d -o "${DIR_INSTALL_PATH}/assets/dfr.db" # Set permission and install command - chmod +x ${DIR_INSTALL_PATH}/dicofr.py || sudo chmod +x ${DIR_INSTALL_PATH}/dicofr.py - ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dfr || sudo ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dfr - chmod +x ${DIR_BIN}/dfr || sudo chmod +x ${DIR_BIN}/dfr + chmod +x ${DIR_INSTALL_PATH}/dicofr.py + # delete symlink if it exists + [ -h ${DIR_BIN}/dfr ] && unlink ${DIR_BIN}/dfr + # create it + ln -s ${DIR_INSTALL_PATH}/dicofr.py ${DIR_BIN}/dfr + chmod +x ${DIR_BIN}/dfr + @echo 'Enjoy dfr !' uninstall: rm -R ${DIR_INSTALL_PATH} diff --git a/download/download.py b/download/download.py index 18a60fe..b97bb8f 100755 --- a/download/download.py +++ b/download/download.py @@ -45,18 +45,22 @@ if __name__ == '__main__': parser.add_argument('-d', '--download', dest='download', action='store_true', help='to download the lastest dump') + download = True + arg = parser.parse_args() - download = True if not arg.wordList: arg.wordList = arg.outputF + '.wordlist' - if download and arg.dumpF: + if arg.download and arg.dumpF: print('''Incompatible options '-i' and '-d'.''') exit(1) - elif download: + elif arg.download: arg.dumpF = URL_DUMP[URL_DUMP.rindex('/') + 1:] + elif arg.dumpF: + download = False + if not arg.dumpF or not arg.dumpF.endswith('bz2'): print('A bz2 dump file filename needed', file=sys.stderr) @@ -69,6 +73,7 @@ if __name__ == '__main__': download = False if download: + print(download); print(f'Downloading the dump ({arg.dumpF})\nIt should take some time') try: urllib.request.urlretrieve(URL_DUMP, arg.dumpF) @@ -83,7 +88,10 @@ if __name__ == '__main__': exit(-1) if not exists(arg.dumpF): - print('Download failed.\nExiting.', file=sys.stderr) + if download: + print('Download failed.\nExiting.', file=sys.stderr) + else: + print(f'Fichier { arg.dumpF } introuvable.\nArrĂȘt.') exit(-2) decompress = False diff --git a/download/dump2msgp.py b/download/dump2msgp.py index 70b483c..c0186af 100644 --- a/download/dump2msgp.py +++ b/download/dump2msgp.py @@ -255,6 +255,7 @@ def extractAll(f, errorF, ignore): hasForbidden = False hasText = False tf = None + isEnd = False dict_ = dict() @@ -271,12 +272,16 @@ def extractAll(f, errorF, ignore): hasText = False isFr = False title = "" + isEnd = False elif "" in line: tf = None hasForbidden = False hasText = False isFr = False title = "" + isEnd = False + if isEnd: + continue if "" in line: title = line[line.find('>') + 1:] @@ -308,7 +313,12 @@ def extractAll(f, errorF, ignore): elif not hasForbidden and "== {{langue|" in line: isFr = False if not hasForbidden and isFr and tf: - tf.write(line) + try: + ind = line.index('</text>') + tf.write(line[:ind]) + isEnd = True + except: + tf.write(line) return dict_ diff --git a/download/msgPack2sqlite_msgPack.py b/download/msgPack2sqlite_msgPack.py index 38d34cd..c08efdb 100644 --- a/download/msgPack2sqlite_msgPack.py +++ b/download/msgPack2sqlite_msgPack.py @@ -1,4 +1,5 @@ import msgpack +import os import sys import sqlite3 @@ -6,6 +7,12 @@ import argparse def writeDB(outputF, data): + # Delete if exists + try: + os.remove(outputF) + except OSError: + pass + with sqlite3.connect(outputF) as con: cur = con.cursor() cur.execute('''CREATE TABLE IF NOT EXISTS entry ( -- cgit v1.2.3