From eed13c1b587c292f86d4b302918418ce78637126 Mon Sep 17 00:00:00 2001 From: ache Date: Tue, 31 Aug 2021 08:21:42 +0200 Subject: Create a new database --- download/dump2msgp.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'download/dump2msgp.py') diff --git a/download/dump2msgp.py b/download/dump2msgp.py index 70b483c..c0186af 100644 --- a/download/dump2msgp.py +++ b/download/dump2msgp.py @@ -255,6 +255,7 @@ def extractAll(f, errorF, ignore): hasForbidden = False hasText = False tf = None + isEnd = False dict_ = dict() @@ -271,12 +272,16 @@ def extractAll(f, errorF, ignore): hasText = False isFr = False title = "" + isEnd = False elif "" in line: tf = None hasForbidden = False hasText = False isFr = False title = "" + isEnd = False + if isEnd: + continue if "" in line: title = line[line.find('>') + 1:] @@ -308,7 +313,12 @@ def extractAll(f, errorF, ignore): elif not hasForbidden and "== {{langue|" in line: isFr = False if not hasForbidden and isFr and tf: - tf.write(line) + try: + ind = line.index('</text>') + tf.write(line[:ind]) + isEnd = True + except: + tf.write(line) return dict_ -- cgit v1.2.3-54-g00ecf