aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rwxr-xr-xautoDHCP.py18
-rw-r--r--autoJPEG.py51
-rwxr-xr-xautoMMS.py10
-rwxr-xr-xbot4chan.py55
-rw-r--r--clip2file.py123
-rwxr-xr-xcoWifi4
-rwxr-xr-xdlr34.py302
-rwxr-xr-xdown.sh8
-rwxr-xr-xdown_imgs.py226
-rw-r--r--pyhttpd.py74
11 files changed, 837 insertions, 36 deletions
diff --git a/Makefile b/Makefile
index 3d497ce..483effd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-INSTALL_LIST=autoDHCP.sh autoWall.sh cmount.sh coWifi.sh cumount.sh imgs2pdf.sh light2.sh toMp3.sh bot4chan.py track.py autoMMS.py
+INSTALL_LIST=autoDHCP.py autoWall.sh cmount.sh coWifi.sh cumount.sh imgs2pdf.sh light2.sh toMp3.sh bot4chan.py track.py autoMMS.py pyhttpd.py clip2file.py
install:
@if [ -z $(filter-out $@,$(MAKECMDGOALS)) ] ; then \
diff --git a/autoDHCP.py b/autoDHCP.py
index ce699c4..cdada36 100755
--- a/autoDHCP.py
+++ b/autoDHCP.py
@@ -9,21 +9,21 @@ import subprocess
import tempfile
-def guess_wifi():
+def guess_wifi() -> list[str]:
return [interface for interface in os.listdir('/sys/class/net/') if
exists(f'/sys/class/net/{interface}/wireless')]
-def guess_nowifi():
+def guess_nowifi() -> list[str]:
return [interface for interface in os.listdir('/sys/class/net/') if
not exists(f'/sys/class/net/{interface}/wireless')]
-def guess_interface():
+def guess_interface() -> list[str]:
return [interface for interface in os.listdir('/sys/class/net/')]
-def guess_internet():
+def guess_internet() -> list[str]:
com = subprocess.Popen(['ip', 'route', 'show'], stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
stdout, stderr = com.communicate()
@@ -39,7 +39,7 @@ def guess_internet():
return list(set(internet_i))
-def stripedLine(line):
+def stripedLine(line) -> str:
words = line.split(' ')
if 'dev' in words and words.index('dev'):
return words[words.index('dev') + 1]
@@ -47,10 +47,11 @@ def stripedLine(line):
return ''
-def menu(listEntry, strInput, refresh=None):
+def menu(listEntry, strInput, refresh=None) -> int:
if listEntry == []:
print(f"Error menu: Empty list\n({strInput})", file=sys.stderr)
- exit(1)
+ sys.exit(1)
+
c = -1
if len(listEntry) == 1:
c = 0
@@ -73,7 +74,7 @@ def menu(listEntry, strInput, refresh=None):
ip = '10.5.6.11'
netRange = '24'
-ssid = '🦄_🌈'
+ssid = '🦖'
password = 'chocoball'
hostapd_conf = '''
@@ -252,6 +253,7 @@ if __name__ == '__main__':
if arg.teth:
os.system('sysctl net.ipv4.ip_forward=1')
+ os.system('sysctl net.ipv6.conf.all.forwarding=1')
os.system(f'iptables -t nat -A POSTROUTING -o {inetInt} -j MASQUERADE')
os.system('iptables -A FORWARD -m conntrack --ctstate'
' RELATED,ESTABLISHED -j ACCEPT')
diff --git a/autoJPEG.py b/autoJPEG.py
new file mode 100644
index 0000000..6297dac
--- /dev/null
+++ b/autoJPEG.py
@@ -0,0 +1,51 @@
+from os import listdir
+from os.path import isfile, join
+
+
+ORIG_FILES = '/var/spool/sms/inbox/'
+DEST_FILES = '/var/spool/sms/mms/'
+
+JPEG_TAG = b'\xFF\xD8\xFF\xE0\x00\x10JFIF'
+
+
+def listMMS():
+ return sorted([f for f in listdir(ORIG_FILES) if
+ isfile(join(ORIG_FILES, f)) and f.endswith('.bin')])[::-1]
+
+def listNewMMS():
+ mmss = listMMS()
+
+ listFile = [f[:f.rfind('_')] for f in listdir(DEST_FILES) if
+ isfile(join(DEST_FILES, f)) and f.rfind('_') > 0]
+
+ for mms in mmss:
+ mmsTitle = mms[:-4]
+ if mmsTitle not in listFile:
+ yield mms
+
+
+def extractJPG(mmsFile, name):
+ i = 1
+
+ fileOut = None
+
+ with open('mmsFile', 'rb') as f:
+ for line in f:
+ if JPEG_TAG in line:
+ index = line.index(JPEG_TAG)
+ if fileOut is not None:
+ fileOut.write(line[:index])
+ fileOut.close()
+ fileOut = open(DEST_FILES + name + '_' + str(i) + '.jpg', 'wb')
+ i += 1
+ fileOut.write(line[index:])
+ else:
+ if fileOut is not None:
+ fileOut.write(line)
+
+
+if __name__ == "__main__":
+ for f in listNewMMS():
+ print(f)
+ extractJPG('./mmsFile3', 'romane2')
+ # listMMS = listMMS():
diff --git a/autoMMS.py b/autoMMS.py
index f691e19..34302de 100755
--- a/autoMMS.py
+++ b/autoMMS.py
@@ -17,13 +17,13 @@ if __name__ == "__main__":
with open(sys.argv[1],'r') as f:
for line in f:
if "Text00 = " in line:
- info = line[9:-1] # 9 = len('Text00 = ') ,strip the final '\n'
+ info = line.split(' = ')[1].strip() # 9 = len('Text00 = ') ,strip the final '\n'
elif line.startswith('Text'):
- info += line[9:-1]
+ info += line.split(' = ')[1].strip()
if line.startswith('DateTime'):
- date = line[9:-1]
- time = time.strptime(date, '%Y%m%dT%H%M%S')
- if info :
+ date = line.split(' = ')[1].strip()
+ timeInfo = time.strptime(date, '%Y%m%dT%H%M%S')
+ if info:
a = info
else:
print("Error : MMS invalide format, no data info found")
diff --git a/bot4chan.py b/bot4chan.py
index f1ebbab..c351e7c 100755
--- a/bot4chan.py
+++ b/bot4chan.py
@@ -1,41 +1,56 @@
-#!/usr/bin/python
-# -*-coding:utf-8 -*
+#!/bin/env python
import re
import os
import sys
import tempfile
-sujet = []
-sujetT = []
+
+usage = """Usage:
+ $ bot4chan "URL_BOARD_4CHAN" ["DIRECTORY"]
+
+ DIRECTORY is optional and by default is "dump\""""
+
+# Use http instead of https to avoid the overhead of encryption
+PROTOCOL = 'http'
+REGEX = '<a class="fileThumb" href="(.*?)" target="_blank"( data-m)?>'
+
+
+if len(sys.argv) <= 1 or sys.argv[1] in ['-h', '--help']:
+ print(usage)
+ sys.exit(0)
new_file, filename = tempfile.mkstemp()
if not new_file:
filename = "index.html"
-os.system("wget " + sys.argv[1] + " -O " + filename + " -N -q")
-os.system("sed -i 's/<\\/a>/<\\/a>\\n/g' " + filename)
-
-regex = '<span class="filesize">(File : |File)<a href="(.*?)" \
- target="_blank">(.*?)</a>'
-regex = '<a class="fileThumb" href="(.*?)" target="_blank">'
-
dumpDir = "dump"
-
if len(sys.argv) >= 3:
dumpDir = sys.argv[2]
-os.system("mkdir -p " + dumpDir)
+# Download the board
+os.system(f"wget {sys.argv[1]} -O {filename} -N -q")
+# The result is on a single line so we put line feed after every link to space markup out
+# We use sed instead of python to not read the whole file
+os.system(f"sed -i 's/<\\/a>/<\\/a>\\n/g' {filename}")
+
+# Create the output directory
+os.system(f"mkdir -p {dumpDir}")
+
+# We look for a image link on each line since one line should have a uniq link
with open(filename, 'r') as f:
for line in f:
- yes = re.search(regex, line)
+ yes = re.search(REGEX, line)
if yes:
- print(yes.group(1))
- sujetT.append([yes.group(1)])
- print("Téléchargement de : " + yes.group(1))
- os.system("wget " + 'http:' + yes.group(1) + " -N -q")
- os.system("mv " + yes.group(1)[yes.group(1).rfind('/')+1:] + " " +
- dumpDir)
+ image = yes.group(1)
+ print(f"Téléchargement de : {image}")
+ res = os.system(f"wget {PROTOCOL}:{image} -N -q")
+ if res != 0:
+ print(f"💀 Error downloading imgage : {image}")
+ continue
+ res = os.system(f"mv {image[image.rfind('/') + 1:]} {dumpDir}")
+ if res != 0:
+ print(f"💀 Error moving imgage to {dumpDir}")
os.remove(filename)
diff --git a/clip2file.py b/clip2file.py
new file mode 100644
index 0000000..d62b725
--- /dev/null
+++ b/clip2file.py
@@ -0,0 +1,123 @@
+#!/bin/env python
+
+import subprocess
+import datetime
+import sys
+import os
+import argparse
+
+
+"""
+ This program will write the clipboard to a file.
+ The format of the file is determined by the available targets and the
+ priority list.
+"""
+
+OUTPUT_DIR = "~"
+
+def get_clipboard_content(target):
+ # Get the clipboard contents.
+ try:
+ clipfile = subprocess.check_output(
+ ['xclip', '-o', '-t', target, '-selection', 'clipboard']
+ )
+ except subprocess.CalledProcessError as e:
+ print(f"Failed to get clipboard contents: {e}", file=sys.stderr)
+ sys.exit(e.returncode)
+ # Check if the clipboard contents are empty.
+ if not clipfile:
+ print("Clipboard is empty.", file=sys.stderr)
+ sys.exit(1)
+
+ return clipfile
+
+def get_available_targets():
+ # Get the lists of available targets.
+ # Executes `xclip -o -t TARGETS -selection clipboard` and get the output
+ # as a list of strings.
+ try:
+ output = subprocess.check_output(
+ ['xclip', '-o', '-t', 'TARGETS', '-selection', 'clipboard']
+ )
+ except subprocess.CalledProcessError as e:
+ print(f"xclip failed: {e}", file=sys.stderr)
+ sys.exit(e.returncode)
+
+ return output.decode('utf-8').split('\n')
+ # The output is a list of strings, each string is a target.
+
+def save_file(clipfile, filename, output_dir, ext):
+ # Write the clipboard contents to a file.
+ pathfile = os.path.expanduser(f"{output_dir}/{filename}.{ext}")
+
+ try:
+ with open(pathfile, 'wb') as f:
+ try:
+ f.write(clipfile)
+ except IOError as e:
+ print(f"Failed to write to file {filename} in {output_dir}: {e}", file=sys.stderr)
+ sys.exit(e.errno)
+ except IOError as e:
+ print(f"Failed to open file {filename} in {output_dir}: {e}", file=sys.stderr)
+
+
+def main(filename, output_dir):
+ # The priority of the target.
+ # The first target in the list is the most important.
+ # The final target will be the first of this list matching the available targets
+ priority = [
+ ("image/avif", "avif"),
+ ("image/png", "png"),
+ ("text/plain", "txt"),
+ ("UTF8_STRING", "txt"),
+ ]
+
+ targets = get_available_targets()
+
+ # Get the first target in the priority list that is available.
+ # If no target is found, the script exits.
+ target = None
+ ext = None
+ for f, e in priority:
+ if f in targets:
+ target, ext = f, e
+ break
+
+ if target is None or not isinstance(target, str):
+ print("No supported clipboard target found.", file=sys.stderr)
+ sys.exit(1)
+
+ clipfile = get_clipboard_content(target)
+
+ save_file(clipfile, filename, output_dir, ext)
+
+ print(f"Saved clipboard contents to {output_dir}/{filename}.{ext}")
+
+
+if __name__ == '__main__':
+ date = datetime.datetime.now().isoformat()
+ default_filename = f"{date}_clip"
+
+ parser = argparse.ArgumentParser(
+ description="Save clipboard contents to a file."
+ )
+ # Select the directory
+ parser.add_argument(
+ '-d', '--directory',
+ help="Directory to save the clipboard contents to.",
+ default=OUTPUT_DIR,
+ type=str
+ )
+ # Select the filename
+ parser.add_argument(
+ '-f', '--filename',
+ help="Filename to save the clipboard contents to.",
+ default=default_filename,
+ type=str
+ )
+
+ args = parser.parse_args()
+
+ main(args.filename, args.directory)
+
+
diff --git a/coWifi b/coWifi
index 5e17619..0f52fcc 100755
--- a/coWifi
+++ b/coWifi
@@ -26,8 +26,8 @@ function guess_wifi {
-if [ "$1" == 'off' ] ; then
- if [ -z "$2" ] ; then
+if [ "$1" == 'off' ] ; then
+ if [ -z "$2" ] ; then
interface=$(guess_wifi '')
else
interface=$(guess_wifi 'all')
diff --git a/dlr34.py b/dlr34.py
new file mode 100755
index 0000000..5045274
--- /dev/null
+++ b/dlr34.py
@@ -0,0 +1,302 @@
+#!/bin/env python
+
+import os
+import re
+import requests
+import sys
+import csv
+import lxml
+import json
+from urllib.parse import urlparse
+from pathlib import Path
+import lxml.html
+import argparse
+
+
+DEBUG_MODE = True
+dirOut = Path("r34")
+
+
+class Spinner(object):
+ def __init__(self):
+ self.SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
+ self.SPINNER_status = 1
+
+ def __call__(self):
+ self.SPINNER_status = (self.SPINNER_status + 1) % len(self.SPINNER)
+ return self.SPINNER[self.SPINNER_status]
+
+
+quickSpinner = Spinner()
+
+
+def pretty_print_Request_header(req):
+ print(f"{req.method} {req.url}")
+
+ for k, v in req.headers.items():
+ print(f"{k}: {v}")
+
+
+def outputFileFromURL(url):
+ u = urlparse(url)
+ name = Path(u.path).name
+ alreadyExists = False
+
+ finalDest = dirOut.joinpath(Path(name))
+
+ i = 1
+
+ while finalDest.exists():
+ alreadyExists = True
+
+ s_name = Path(u.path).stem
+ s_suffix = Path(u.path).suffix
+
+ next_name = f"{s_name}_{i}{s_suffix}"
+ i += 1
+
+ finalDest = dirOut.joinpath(Path(next_name))
+
+ return alreadyExists, str(finalDest)
+
+
+def downloadURL(s, url, idLink, output, headers={}):
+ r = s.get(url, stream=True, headers=headers)
+
+ u = urlparse(url)
+
+ if r.ok:
+ if DEBUG_MODE:
+ print(f"\n{url} >> {output}")
+
+ print(f"\r⬇️ {Path(u.path).name}", end='')
+
+ try:
+ with open(output, "wb") as f:
+ print(f"\r⬇️ {quickSpinner()} {Path(u.path).name}", end='')
+ chunk_size = 4 * 1024
+
+ for chunk in r.iter_content(chunk_size=chunk_size):
+ f.write(chunk)
+ print(f"\rDownloaded {Path(u.path).name} - {idLink}")
+ except Exception as e:
+ print(f"\r☠️ error {Path(u.path).name} - {idLink}")
+ if DEBUG_MODE:
+ print(e)
+
+def downloadFromItems(items, s, u):
+ replace = {
+ 'mov480.': 'mov.',
+ 'mov720.': 'mov.',
+ 'mov256.': 'mov.',
+ 'pic256.': 'pic.',
+ 'pic480.': 'pic.',
+ 'pic720.': 'pic.',
+ 'picsmall.': 'pic.'
+ }
+
+ for item in items:
+ isDownloaded = False
+ isVideo = item['duration'] is not None
+ for file in item['imageLinks']:
+ url = file['url']
+ if isVideo and not url.endswith('.mp4'):
+ continue
+
+ for r,t in replace.items():
+ if r in url:
+ url = url.replace(r, t)
+ e, outputFile = outputFileFromURL(url)
+ if e:
+ print("Already downloaded")
+ return
+
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+ downloadURL(s, url, "???", outputFile, headers)
+ isDownloaded = True
+ break
+ if isDownloaded:
+ break
+
+
+def downloadImagesFromPlaylist(index, link):
+ s = requests.Session()
+
+ u = urlparse(link)
+ p = Path(u.path)
+ playlistId = p.stem
+
+ print(f"Link n°{index} - {playlistId}")
+
+ nbPage = 0
+ while True:
+ urlPlaylistFormat = f"https://rule34.world/api/playlist-item?playlistId={playlistId}&Skip={nbPage*60}&Take=60&DisableTotal=true"
+ resp = s.get(urlPlaylistFormat)
+ if resp.ok and resp.status_code == 200:
+ data = json.loads(resp.content)
+ if len(data['items']) > 0:
+ print(f"Ok ! {len(data['items'])}")
+ downloadFromItems(data['items'], s, u)
+ else:
+ break
+
+ nbPage += 1
+
+ return
+ try:
+ html = lxml.html.fromstring(resp.content)
+ boxes = html.cssselect("div.box a.boxInner")
+ for box in boxes:
+ url = box.attrib['href']
+ url = f"{u.scheme}://{u.hostname}{url}"
+ downloadImageFromPage(index, url)
+ except Exception as e:
+ print(f"\r☠️ error {Path(u.path).name} - {idLink}")
+ if DEBUG_MODE:
+ print(e)
+
+
+def downloadImagesFromUserboard(index, link):
+ s = requests.Session()
+
+ u = urlparse(link)
+ p = Path(u.path)
+ boardName = p.stem
+
+ print(f"Link n°{index} - {boardName}", end='')
+ resp = s.get(link)
+
+ findImagesRegex = re.compile('<img class="image" src="/images/t.png" data-src="(.+)\?width=300"')
+ if resp.ok and resp.status_code == 200:
+ for subIndex, urlImg in enumerate(findImagesRegex.findall(resp.content.decode('utf8'))):
+ match = re.search(r'(\d{7,})', urlImg)
+ if not match:
+ return
+
+ e, outputFile = outputFileFromURL(urlImg)
+ if e:
+ print("Already downloaded")
+ return
+
+ if DEBUG_MODE:
+ print(f"\n{urlImg} >> {outputFile}")
+
+ idLink = match.group(1)
+ print(f"Link n°{index}.{subIndex} - {boardName}>{idLink}", end='')
+
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+ downloadURL(s, urlImg, idLink, outputFile, headers)
+
+
+def downloadVideoFromSource(index, urlBase, idVid, urlVideo):
+ s = requests.Session()
+
+ print(f"Link n°{index} - Vidéo {idVid}", end='')
+ print(f"Link : {urlVideo} !", end='\n')
+
+
+ u = urlparse(urlBase)
+ urlVideo = urlVideo.replace('mov480', 'mov')
+
+ e, outputFile = outputFileFromURL(urlVideo)
+ if e:
+ print("Already downloaded")
+ return
+
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+
+ print(f"Download to {outputFile}")
+ downloadURL(s, urlVideo, idVid, outputFile, headers)
+
+
+def downloadImageFromPage(index, link):
+ s = requests.Session()
+ match = re.search(r'(\d{6,})', link)
+
+ if not match:
+ return
+
+ idLink = match.group(1)
+ print(f"Link n°{index} - {idLink}\n", end='')
+ resp = s.get(link)
+
+ if resp.ok and resp.status_code == 200:
+ try:
+ html = lxml.html.fromstring(resp.content)
+ imgs = html.cssselect("img.img.shadow-base")
+
+ if len(imgs) == 0:
+ vids = html.cssselect("video.video.shadow-base source")
+ if len(vids) > 0:
+ url = vids[0].attrib['src']
+
+ u = urlparse(link)
+ if url[0] == '/':
+ url = f"{u.scheme}://{u.hostname}{url}"
+
+ downloadVideoFromSource(index, link, idLink, url)
+ else:
+ img = imgs[0]
+
+ if 'src' in img.attrib:
+ url = img.attrib['src']
+ u = urlparse(link)
+ if url[0] == '/':
+ url = f"{u.scheme}://{u.hostname}{url}"
+
+ url = url.replace('picsmall', 'pic')
+
+ e, outputFile = outputFileFromURL(url)
+ if e:
+ print("Already downloaded")
+ return
+
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+ downloadURL(s, url, idLink, outputFile, headers)
+
+ except:
+ print(f"\r☠️ error {Path(u.path).name} - {idLink}")
+ if DEBUG_MODE:
+ print(e)
+ return
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-d", "--directory", help="Output directory")
+ parser.add_argument('files', nargs=argparse.REMAINDER)
+ args = parser.parse_args()
+
+ if args.directory:
+ global dirOut
+ dirOut = Path(args.directory)
+
+ if not dirOut.exists():
+ print(f"Output dir \"{dirOut}\" doesn't exists")
+ return
+
+ if not dirOut.is_dir():
+ print(f"Output dir \"{dirOut}\" isn't a directory")
+ return
+
+ for file in args.files:
+ with open(file) as f_listLink:
+ for index, link in enumerate(f_listLink.read().split()):
+ if '/post/' in link:
+ downloadImageFromPage(index, link)
+ if '/user/' in link:
+ downloadImagesFromUserboard(index, link)
+ if '/playlists/' in link:
+ downloadImagesFromPlaylist(index, link)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/down.sh b/down.sh
new file mode 100755
index 0000000..c4de236
--- /dev/null
+++ b/down.sh
@@ -0,0 +1,8 @@
+#!/bin/env fish
+
+
+while read line
+ while not yt-dlp --abort-on-unavailable-fragment --no-playlist -R 2 "$line"
+ sleep 5m
+ end
+end < "$argv[1]"
diff --git a/down_imgs.py b/down_imgs.py
new file mode 100755
index 0000000..5d44b05
--- /dev/null
+++ b/down_imgs.py
@@ -0,0 +1,226 @@
+#!/bin/env python
+
+import os
+import re
+import requests
+import sys
+import csv
+import lxml
+from urllib.parse import urlparse
+from pathlib import Path
+import lxml.html
+import argparse
+
+
+DEBUG_MODE = False
+dirOut = Path("ss")
+
+
+class Spinner(object):
+ def __init__(self):
+ self.SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
+ self.SPINNER_status = 1
+
+ def __call__(self):
+ self.SPINNER_status = (self.SPINNER_status + 1) % len(self.SPINNER)
+ return self.SPINNER[self.SPINNER_status]
+
+
+quickSpinner = Spinner()
+
+
+def pretty_print_Request_header(req):
+ print(f"{req.method} {req.url}")
+
+ for k, v in req.headers.items():
+ print(f"{k}: {v}")
+
+
+def outputFileFromURL(url):
+ u = urlparse(url)
+ name = Path(u.path).name
+
+ finalDest = dirOut.joinpath(Path(name))
+
+ i = 1
+
+ while finalDest.exists():
+ s_name = Path(u.path).stem
+ s_suffix = Path(u.path).suffix
+
+ next_name = f"{s_name}_{i}{s_suffix}"
+ i += 1
+
+ finalDest = dirOut.joinpath(Path(next_name))
+
+ return str(finalDest)
+
+
+def downloadURL(s, url, idLink, output, headers={}):
+ r = s.get(url, stream=True, headers=headers)
+
+ if DEBUG_MODE:
+ pretty_print_Request_header(r.request)
+
+ u = urlparse(url)
+
+ if r.ok:
+ print(f"\r⬇️ {Path(u.path).name}", end='')
+
+ try:
+ with open(output, "wb") as f:
+ print(f"\r⬇️ {quickSpinner()} {Path(u.path).name}", end='')
+ chunk_size = 4 * 1024
+
+ for chunk in r.iter_content(chunk_size=chunk_size):
+ f.write(chunk)
+ print(f"\rDownloaded {Path(u.path).name} - {idLink}")
+ except Exception as e:
+ print(f"\r☠️ error {Path(u.path).name} - {idLink}")
+ if DEBUG_MODE:
+ print(e)
+
+
+def downloadImagesFromUserboard(index, link):
+ s = requests.Session()
+
+ u = urlparse(link)
+ p = Path(u.path)
+ boardName = p.stem
+
+ print(f"Link n°{index} - {boardName}", end='')
+ resp = s.get(link)
+
+ if DEBUG_MODE:
+ pretty_print_Request_header(resp.request)
+
+ findImagesRegex = re.compile('<img class="image" src="/images/t.png" data-src="(.+)\?width=300"')
+ if resp.ok and resp.status_code == 200:
+ for subIndex, urlImg in enumerate(findImagesRegex.findall(resp.content.decode('utf8'))):
+ match = re.search(r'(\d{7,})', urlImg)
+ if not match:
+ return
+
+ outputFile = outputFileFromURL(urlImg)
+ if DEBUG_MODE:
+ print(f"\n{urlImg} >> {outputFile}")
+
+ idLink = match.group(1)
+ print(f"Link n°{index}.{subIndex} - {boardName}>{idLink}", end='')
+
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+ downloadURL(s, urlImg, idLink, outputFile, headers)
+
+
+def downloadVideoFromScript(index, urlBase, idVid, scriptElem):
+ s = requests.Session()
+
+ print(f"Link n°{index} - Vidéo {idVid}", end='')
+
+ urlRegex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
+ scriptHTML = lxml.html.tostring(scriptElem).decode('utf8')
+ startsWithCode = 'player.updateSrc([\n'
+
+ if scriptHTML.index(startsWithCode) >= 0:
+ startIndex = scriptHTML.index(startsWithCode)
+ scriptHTML = scriptHTML[startIndex + len(startsWithCode):]
+
+ match = re.search(urlRegex, scriptHTML)
+ if not match:
+ print(f"No found in {scriptHTML}")
+ return
+
+ urlVideo = match.group(0)
+ print(f"Link : {urlVideo} !", end='\n')
+
+ if '\'' in urlVideo:
+ urlVideo = urlVideo[:urlVideo.index('\'')]
+
+ outputFile = outputFileFromURL(urlVideo)
+
+ u = urlparse(urlBase)
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+
+ print(f"Download to {outputFile}")
+ downloadURL(s, urlVideo, idVid, outputFile, headers)
+
+
+def downloadImageFromPage(index, link):
+ s = requests.Session()
+ match = re.search(r'(\d{7,})', link)
+
+ if not match:
+ return
+
+ idLink = match.group(1)
+ print(f"Link n°{index} - {idLink}", end='')
+ resp = s.get(link)
+
+ if resp.ok and resp.status_code == 200:
+ try:
+ html = lxml.html.fromstring(resp.content)
+ imgs = html.cssselect(".image_frame img")
+
+ if len(imgs) == 0:
+ vids = html.cssselect(".image_frame video")
+ vid_script = html.cssselect(".image_frame script")
+ if len(vids) > 0:
+ print(": it's a video !")
+ downloadVideoFromScript(index, link, idLink, vid_script[-1])
+ else:
+ img = imgs[0]
+
+ if DEBUG_MODE:
+ print(lxml.html.tostring(img))
+
+ if 'src' in img.attrib:
+ url = img.attrib['src']
+ outputFile = outputFileFromURL(url)
+
+ if DEBUG_MODE:
+ print(f"\n{url} >> {outputFile}")
+
+ u = urlparse(link)
+
+ headers = {"Referer": f"{u.scheme}://{u.hostname}/"}
+ headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+ headers['Accept-encoding'] = "gzip, deflate, br"
+ downloadURL(s, url, idLink, outputFile, headers)
+
+ except:
+ return
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-d", "--directory", help="Output directory")
+ parser.add_argument('files', nargs=argparse.REMAINDER)
+ args = parser.parse_args()
+
+ if args.directory:
+ global dirOut
+ dirOut = Path(args.directory)
+
+ if not dirOut.exists():
+ print(f"Output dir \"{dirOut}\" doesn't exists")
+ return
+
+ if not dirOut.is_dir():
+ print(f"Output dir \"{dirOut}\" isn't a directory")
+ return
+
+ for file in args.files:
+ with open(file) as f_listLink:
+ for index, link in enumerate(f_listLink.read().split()):
+ if '/pin/' in link:
+ downloadImageFromPage(index, link)
+ if '/user/' in link:
+ downloadImagesFromUserboard(index, link)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/pyhttpd.py b/pyhttpd.py
new file mode 100644
index 0000000..78bd156
--- /dev/null
+++ b/pyhttpd.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""Use this instead of `python3 -m http.server` when you need CORS"""
+
+import argparse
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+from pathlib import Path
+
+
+class CORSRequestHandler(SimpleHTTPRequestHandler):
+ def end_headers(self):
+ self.send_header('Access-Control-Allow-Origin', '*')
+ self.send_header('Access-Control-Allow-Methods', '*')
+ self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate')
+ return super(CORSRequestHandler, self).end_headers()
+
+ def do_POST(self):
+ # Redirect POST to the correct endpoint
+ if self.path.startswith('/like'):
+ self.send_response(307)
+ self.send_header('Location', f'http://localhost:3000{self.path}')
+ self.end_headers()
+
+
+ def do_GET(self):
+ # Redirect likes to the correct endpoint
+ if self.path.startswith('/like'):
+ self.send_response(302)
+ self.send_header('Location', f'http://localhost:3000{self.path}')
+ self.end_headers()
+ elif self.path == '/':
+ self.path = '/fr/index.html'
+ else:
+ path = Path('./' + self.path)
+ # Redirect to index
+
+ print(self.path)
+ print(path)
+ if path.is_dir():
+ self.path += '/index.html'
+ else:
+ # If it has not an extension add .html
+ if not path.suffix:
+ self.path += '.html'
+
+ print(self.path)
+ return super(CORSRequestHandler, self).do_GET()
+
+ def send_error(self, code, message=None):
+ if code == 404:
+ if self.path.endswith('/'):
+ pass
+ elif not self.path.endswith('.html'):
+ self.code = 304
+ self.redirect_to = self.path + '.html'
+ self.path += '.html'
+ self.end_headers()
+ SimpleHTTPRequestHandler.send_error(self, code, message)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Automaticaly set up a HTTP server')
+ parser.add_argument('-H', '--host', type=str, default='0.0.0.0', help='Host to serve on')
+ parser.add_argument('-p', '--port', type=int, default=8080, help='Port to serve on')
+ args = parser.parse_args()
+
+ host, port = args.host, args.port
+
+ # Print the server's host and port
+ print(f"Opening http://{host}:{port}", end='')
+
+ httpd = HTTPServer((host, port), CORSRequestHandler)
+ print(f"\rServed on http://{host}:{port}")
+
+ httpd.serve_forever()