Improove bot4chan

author: ache <ache@ache.one> 2023-01-13 03:07:16 +0100
committer: ache <ache@ache.one> 2023-01-13 03:07:16 +0100
commit: 0ea5c54c7fefbe3b340a2edc1e6ea4c95c54c19c (patch)
tree: 0b861fe59321c5fd659d03af886628b4228a6c6a
parent: Make a script to download images (diff)
1 files changed, 35 insertions, 20 deletions
diff --git a/bot4chan.py b/bot4chan.py
index f1ebbab..c351e7c 100755
--- a/bot4chan.py
+++ b/bot4chan.py
@@ -1,41 +1,56 @@
-#!/usr/bin/python
-# -*-coding:utf-8 -*
+#!/bin/env python
 
 import re
 import os
 import sys
 import tempfile
 
-sujet = []
-sujetT = []
+
+usage = """Usage:
+    $ bot4chan "URL_BOARD_4CHAN" ["DIRECTORY"]
+
+    DIRECTORY is optional and by default is "dump\""""
+
+# Use http instead of https to avoid the overhead of encryption
+PROTOCOL = 'http'
+REGEX = '<a class="fileThumb" href="(.*?)" target="_blank"( data-m)?>'
+
+
+if len(sys.argv) <= 1 or sys.argv[1] in ['-h', '--help']:
+    print(usage)
+    sys.exit(0)
 
 new_file, filename = tempfile.mkstemp()
 if not new_file:
     filename = "index.html"
 
-os.system("wget " + sys.argv[1] + "  -O " + filename + " -N -q")
-os.system("sed -i 's/<\\/a>/<\\/a>\\n/g' " + filename)
-
-regex = '<span class="filesize">(File : |File)<a href="(.*?)" \
-         target="_blank">(.*?)</a>'
-regex = '<a class="fileThumb" href="(.*?)" target="_blank">'
-
 dumpDir = "dump"
-
 if len(sys.argv) >= 3:
     dumpDir = sys.argv[2]
 
-os.system("mkdir -p " + dumpDir)
+# Download the board
+os.system(f"wget {sys.argv[1]} -O {filename} -N -q")
+# The result is on a single line so we put line feed after every link to space markup out
+# We use sed instead of python to not read the whole file
+os.system(f"sed -i 's/<\\/a>/<\\/a>\\n/g' {filename}")
+
+# Create the output directory
+os.system(f"mkdir -p {dumpDir}")
+
+# We look for a image link on each line since one line should have a uniq link
 with open(filename, 'r') as f:
     for line in f:
-        yes = re.search(regex, line)
+        yes = re.search(REGEX, line)
         if yes:
-            print(yes.group(1))
-            sujetT.append([yes.group(1)])
-            print("Téléchargement de : " + yes.group(1))
-            os.system("wget " + 'http:' + yes.group(1) + " -N -q")
-            os.system("mv " + yes.group(1)[yes.group(1).rfind('/')+1:] + " " +
-                      dumpDir)
+            image = yes.group(1)
+            print(f"Téléchargement de : {image}")
+            res = os.system(f"wget {PROTOCOL}:{image} -N -q")
+            if res != 0:
+                print(f"💀 Error downloading imgage : {image}")
+                continue
+            res = os.system(f"mv {image[image.rfind('/') + 1:]} {dumpDir}")
+            if res != 0:
+                print(f"💀 Error moving imgage to {dumpDir}")
 
 
 os.remove(filename)
author	ache <ache@ache.one>	2023-01-13 03:07:16 +0100
committer	ache <ache@ache.one>	2023-01-13 03:07:16 +0100
commit	0ea5c54c7fefbe3b340a2edc1e6ea4c95c54c19c (patch)
tree	0b861fe59321c5fd659d03af886628b4228a6c6a
parent	Make a script to download images (diff)