From 0ea5c54c7fefbe3b340a2edc1e6ea4c95c54c19c Mon Sep 17 00:00:00 2001 From: ache Date: Fri, 13 Jan 2023 03:07:16 +0100 Subject: Improove bot4chan --- bot4chan.py | 55 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 20 deletions(-) (limited to 'bot4chan.py') diff --git a/bot4chan.py b/bot4chan.py index f1ebbab..c351e7c 100755 --- a/bot4chan.py +++ b/bot4chan.py @@ -1,41 +1,56 @@ -#!/usr/bin/python -# -*-coding:utf-8 -* +#!/bin/env python import re import os import sys import tempfile -sujet = [] -sujetT = [] + +usage = """Usage: + $ bot4chan "URL_BOARD_4CHAN" ["DIRECTORY"] + + DIRECTORY is optional and by default is "dump\"""" + +# Use http instead of https to avoid the overhead of encryption +PROTOCOL = 'http' +REGEX = '' + + +if len(sys.argv) <= 1 or sys.argv[1] in ['-h', '--help']: + print(usage) + sys.exit(0) new_file, filename = tempfile.mkstemp() if not new_file: filename = "index.html" -os.system("wget " + sys.argv[1] + " -O " + filename + " -N -q") -os.system("sed -i 's/<\\/a>/<\\/a>\\n/g' " + filename) - -regex = '(File : |File)(.*?)' -regex = '' - dumpDir = "dump" - if len(sys.argv) >= 3: dumpDir = sys.argv[2] -os.system("mkdir -p " + dumpDir) +# Download the board +os.system(f"wget {sys.argv[1]} -O {filename} -N -q") +# The result is on a single line so we put line feed after every link to space markup out +# We use sed instead of python to not read the whole file +os.system(f"sed -i 's/<\\/a>/<\\/a>\\n/g' {filename}") + +# Create the output directory +os.system(f"mkdir -p {dumpDir}") + +# We look for a image link on each line since one line should have a uniq link with open(filename, 'r') as f: for line in f: - yes = re.search(regex, line) + yes = re.search(REGEX, line) if yes: - print(yes.group(1)) - sujetT.append([yes.group(1)]) - print("Téléchargement de : " + yes.group(1)) - os.system("wget " + 'http:' + yes.group(1) + " -N -q") - os.system("mv " + yes.group(1)[yes.group(1).rfind('/')+1:] + " " + - dumpDir) + image = yes.group(1) + print(f"Téléchargement de : {image}") + res = os.system(f"wget {PROTOCOL}:{image} -N -q") + if res != 0: + print(f"💀 Error downloading imgage : {image}") + continue + res = os.system(f"mv {image[image.rfind('/') + 1:]} {dumpDir}") + if res != 0: + print(f"💀 Error moving imgage to {dumpDir}") os.remove(filename) -- cgit v1.2.3