aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorache <ache@ache.one>2023-01-13 03:07:16 +0100
committerache <ache@ache.one>2023-01-13 03:07:16 +0100
commit0ea5c54c7fefbe3b340a2edc1e6ea4c95c54c19c (patch)
tree0b861fe59321c5fd659d03af886628b4228a6c6a
parentMake a script to download images (diff)
Improove bot4chan
-rwxr-xr-xbot4chan.py55
1 files changed, 35 insertions, 20 deletions
diff --git a/bot4chan.py b/bot4chan.py
index f1ebbab..c351e7c 100755
--- a/bot4chan.py
+++ b/bot4chan.py
@@ -1,41 +1,56 @@
-#!/usr/bin/python
-# -*-coding:utf-8 -*
+#!/bin/env python
import re
import os
import sys
import tempfile
-sujet = []
-sujetT = []
+
+usage = """Usage:
+ $ bot4chan "URL_BOARD_4CHAN" ["DIRECTORY"]
+
+ DIRECTORY is optional and by default is "dump\""""
+
+# Use http instead of https to avoid the overhead of encryption
+PROTOCOL = 'http'
+REGEX = '<a class="fileThumb" href="(.*?)" target="_blank"( data-m)?>'
+
+
+if len(sys.argv) <= 1 or sys.argv[1] in ['-h', '--help']:
+ print(usage)
+ sys.exit(0)
new_file, filename = tempfile.mkstemp()
if not new_file:
filename = "index.html"
-os.system("wget " + sys.argv[1] + " -O " + filename + " -N -q")
-os.system("sed -i 's/<\\/a>/<\\/a>\\n/g' " + filename)
-
-regex = '<span class="filesize">(File : |File)<a href="(.*?)" \
- target="_blank">(.*?)</a>'
-regex = '<a class="fileThumb" href="(.*?)" target="_blank">'
-
dumpDir = "dump"
-
if len(sys.argv) >= 3:
dumpDir = sys.argv[2]
-os.system("mkdir -p " + dumpDir)
+# Download the board
+os.system(f"wget {sys.argv[1]} -O {filename} -N -q")
+# The result is on a single line so we put line feed after every link to space markup out
+# We use sed instead of python to not read the whole file
+os.system(f"sed -i 's/<\\/a>/<\\/a>\\n/g' {filename}")
+
+# Create the output directory
+os.system(f"mkdir -p {dumpDir}")
+
+# We look for a image link on each line since one line should have a uniq link
with open(filename, 'r') as f:
for line in f:
- yes = re.search(regex, line)
+ yes = re.search(REGEX, line)
if yes:
- print(yes.group(1))
- sujetT.append([yes.group(1)])
- print("Téléchargement de : " + yes.group(1))
- os.system("wget " + 'http:' + yes.group(1) + " -N -q")
- os.system("mv " + yes.group(1)[yes.group(1).rfind('/')+1:] + " " +
- dumpDir)
+ image = yes.group(1)
+ print(f"Téléchargement de : {image}")
+ res = os.system(f"wget {PROTOCOL}:{image} -N -q")
+ if res != 0:
+ print(f"💀 Error downloading imgage : {image}")
+ continue
+ res = os.system(f"mv {image[image.rfind('/') + 1:]} {dumpDir}")
+ if res != 0:
+ print(f"💀 Error moving imgage to {dumpDir}")
os.remove(filename)