diff options
-rw-r--r-- | Makefile | 2 | ||||
-rwxr-xr-x | autoDHCP.py | 18 | ||||
-rw-r--r-- | autoJPEG.py | 51 | ||||
-rwxr-xr-x | autoMMS.py | 10 | ||||
-rwxr-xr-x | bot4chan.py | 55 | ||||
-rw-r--r-- | clip2file.py | 123 | ||||
-rwxr-xr-x | coWifi | 4 | ||||
-rwxr-xr-x | dlr34.py | 302 | ||||
-rwxr-xr-x | down.sh | 8 | ||||
-rwxr-xr-x | down_imgs.py | 226 | ||||
-rw-r--r-- | pyhttpd.py | 74 |
11 files changed, 837 insertions, 36 deletions
@@ -1,4 +1,4 @@ -INSTALL_LIST=autoDHCP.sh autoWall.sh cmount.sh coWifi.sh cumount.sh imgs2pdf.sh light2.sh toMp3.sh bot4chan.py track.py autoMMS.py +INSTALL_LIST=autoDHCP.py autoWall.sh cmount.sh coWifi.sh cumount.sh imgs2pdf.sh light2.sh toMp3.sh bot4chan.py track.py autoMMS.py pyhttpd.py clip2file.py install: @if [ -z $(filter-out $@,$(MAKECMDGOALS)) ] ; then \ diff --git a/autoDHCP.py b/autoDHCP.py index ce699c4..cdada36 100755 --- a/autoDHCP.py +++ b/autoDHCP.py @@ -9,21 +9,21 @@ import subprocess import tempfile -def guess_wifi(): +def guess_wifi() -> list[str]: return [interface for interface in os.listdir('/sys/class/net/') if exists(f'/sys/class/net/{interface}/wireless')] -def guess_nowifi(): +def guess_nowifi() -> list[str]: return [interface for interface in os.listdir('/sys/class/net/') if not exists(f'/sys/class/net/{interface}/wireless')] -def guess_interface(): +def guess_interface() -> list[str]: return [interface for interface in os.listdir('/sys/class/net/')] -def guess_internet(): +def guess_internet() -> list[str]: com = subprocess.Popen(['ip', 'route', 'show'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = com.communicate() @@ -39,7 +39,7 @@ def guess_internet(): return list(set(internet_i)) -def stripedLine(line): +def stripedLine(line) -> str: words = line.split(' ') if 'dev' in words and words.index('dev'): return words[words.index('dev') + 1] @@ -47,10 +47,11 @@ def stripedLine(line): return '' -def menu(listEntry, strInput, refresh=None): +def menu(listEntry, strInput, refresh=None) -> int: if listEntry == []: print(f"Error menu: Empty list\n({strInput})", file=sys.stderr) - exit(1) + sys.exit(1) + c = -1 if len(listEntry) == 1: c = 0 @@ -73,7 +74,7 @@ def menu(listEntry, strInput, refresh=None): ip = '10.5.6.11' netRange = '24' -ssid = '🦄_🌈' +ssid = '🦖' password = 'chocoball' hostapd_conf = ''' @@ -252,6 +253,7 @@ if __name__ == '__main__': if arg.teth: os.system('sysctl net.ipv4.ip_forward=1') + os.system('sysctl net.ipv6.conf.all.forwarding=1') os.system(f'iptables -t nat -A POSTROUTING -o {inetInt} -j MASQUERADE') os.system('iptables -A FORWARD -m conntrack --ctstate' ' RELATED,ESTABLISHED -j ACCEPT') diff --git a/autoJPEG.py b/autoJPEG.py new file mode 100644 index 0000000..6297dac --- /dev/null +++ b/autoJPEG.py @@ -0,0 +1,51 @@ +from os import listdir +from os.path import isfile, join + + +ORIG_FILES = '/var/spool/sms/inbox/' +DEST_FILES = '/var/spool/sms/mms/' + +JPEG_TAG = b'\xFF\xD8\xFF\xE0\x00\x10JFIF' + + +def listMMS(): + return sorted([f for f in listdir(ORIG_FILES) if + isfile(join(ORIG_FILES, f)) and f.endswith('.bin')])[::-1] + +def listNewMMS(): + mmss = listMMS() + + listFile = [f[:f.rfind('_')] for f in listdir(DEST_FILES) if + isfile(join(DEST_FILES, f)) and f.rfind('_') > 0] + + for mms in mmss: + mmsTitle = mms[:-4] + if mmsTitle not in listFile: + yield mms + + +def extractJPG(mmsFile, name): + i = 1 + + fileOut = None + + with open('mmsFile', 'rb') as f: + for line in f: + if JPEG_TAG in line: + index = line.index(JPEG_TAG) + if fileOut is not None: + fileOut.write(line[:index]) + fileOut.close() + fileOut = open(DEST_FILES + name + '_' + str(i) + '.jpg', 'wb') + i += 1 + fileOut.write(line[index:]) + else: + if fileOut is not None: + fileOut.write(line) + + +if __name__ == "__main__": + for f in listNewMMS(): + print(f) + extractJPG('./mmsFile3', 'romane2') + # listMMS = listMMS(): @@ -17,13 +17,13 @@ if __name__ == "__main__": with open(sys.argv[1],'r') as f: for line in f: if "Text00 = " in line: - info = line[9:-1] # 9 = len('Text00 = ') ,strip the final '\n' + info = line.split(' = ')[1].strip() # 9 = len('Text00 = ') ,strip the final '\n' elif line.startswith('Text'): - info += line[9:-1] + info += line.split(' = ')[1].strip() if line.startswith('DateTime'): - date = line[9:-1] - time = time.strptime(date, '%Y%m%dT%H%M%S') - if info : + date = line.split(' = ')[1].strip() + timeInfo = time.strptime(date, '%Y%m%dT%H%M%S') + if info: a = info else: print("Error : MMS invalide format, no data info found") diff --git a/bot4chan.py b/bot4chan.py index f1ebbab..c351e7c 100755 --- a/bot4chan.py +++ b/bot4chan.py @@ -1,41 +1,56 @@ -#!/usr/bin/python -# -*-coding:utf-8 -* +#!/bin/env python import re import os import sys import tempfile -sujet = [] -sujetT = [] + +usage = """Usage: + $ bot4chan "URL_BOARD_4CHAN" ["DIRECTORY"] + + DIRECTORY is optional and by default is "dump\"""" + +# Use http instead of https to avoid the overhead of encryption +PROTOCOL = 'http' +REGEX = '<a class="fileThumb" href="(.*?)" target="_blank"( data-m)?>' + + +if len(sys.argv) <= 1 or sys.argv[1] in ['-h', '--help']: + print(usage) + sys.exit(0) new_file, filename = tempfile.mkstemp() if not new_file: filename = "index.html" -os.system("wget " + sys.argv[1] + " -O " + filename + " -N -q") -os.system("sed -i 's/<\\/a>/<\\/a>\\n/g' " + filename) - -regex = '<span class="filesize">(File : |File)<a href="(.*?)" \ - target="_blank">(.*?)</a>' -regex = '<a class="fileThumb" href="(.*?)" target="_blank">' - dumpDir = "dump" - if len(sys.argv) >= 3: dumpDir = sys.argv[2] -os.system("mkdir -p " + dumpDir) +# Download the board +os.system(f"wget {sys.argv[1]} -O {filename} -N -q") +# The result is on a single line so we put line feed after every link to space markup out +# We use sed instead of python to not read the whole file +os.system(f"sed -i 's/<\\/a>/<\\/a>\\n/g' {filename}") + +# Create the output directory +os.system(f"mkdir -p {dumpDir}") + +# We look for a image link on each line since one line should have a uniq link with open(filename, 'r') as f: for line in f: - yes = re.search(regex, line) + yes = re.search(REGEX, line) if yes: - print(yes.group(1)) - sujetT.append([yes.group(1)]) - print("Téléchargement de : " + yes.group(1)) - os.system("wget " + 'http:' + yes.group(1) + " -N -q") - os.system("mv " + yes.group(1)[yes.group(1).rfind('/')+1:] + " " + - dumpDir) + image = yes.group(1) + print(f"Téléchargement de : {image}") + res = os.system(f"wget {PROTOCOL}:{image} -N -q") + if res != 0: + print(f"💀 Error downloading imgage : {image}") + continue + res = os.system(f"mv {image[image.rfind('/') + 1:]} {dumpDir}") + if res != 0: + print(f"💀 Error moving imgage to {dumpDir}") os.remove(filename) diff --git a/clip2file.py b/clip2file.py new file mode 100644 index 0000000..d62b725 --- /dev/null +++ b/clip2file.py @@ -0,0 +1,123 @@ +#!/bin/env python + +import subprocess +import datetime +import sys +import os +import argparse + + +""" + This program will write the clipboard to a file. + The format of the file is determined by the available targets and the + priority list. +""" + +OUTPUT_DIR = "~" + +def get_clipboard_content(target): + # Get the clipboard contents. + try: + clipfile = subprocess.check_output( + ['xclip', '-o', '-t', target, '-selection', 'clipboard'] + ) + except subprocess.CalledProcessError as e: + print(f"Failed to get clipboard contents: {e}", file=sys.stderr) + sys.exit(e.returncode) + # Check if the clipboard contents are empty. + if not clipfile: + print("Clipboard is empty.", file=sys.stderr) + sys.exit(1) + + return clipfile + +def get_available_targets(): + # Get the lists of available targets. + # Executes `xclip -o -t TARGETS -selection clipboard` and get the output + # as a list of strings. + try: + output = subprocess.check_output( + ['xclip', '-o', '-t', 'TARGETS', '-selection', 'clipboard'] + ) + except subprocess.CalledProcessError as e: + print(f"xclip failed: {e}", file=sys.stderr) + sys.exit(e.returncode) + + return output.decode('utf-8').split('\n') + # The output is a list of strings, each string is a target. + +def save_file(clipfile, filename, output_dir, ext): + # Write the clipboard contents to a file. + pathfile = os.path.expanduser(f"{output_dir}/{filename}.{ext}") + + try: + with open(pathfile, 'wb') as f: + try: + f.write(clipfile) + except IOError as e: + print(f"Failed to write to file {filename} in {output_dir}: {e}", file=sys.stderr) + sys.exit(e.errno) + except IOError as e: + print(f"Failed to open file {filename} in {output_dir}: {e}", file=sys.stderr) + + +def main(filename, output_dir): + # The priority of the target. + # The first target in the list is the most important. + # The final target will be the first of this list matching the available targets + priority = [ + ("image/avif", "avif"), + ("image/png", "png"), + ("text/plain", "txt"), + ("UTF8_STRING", "txt"), + ] + + targets = get_available_targets() + + # Get the first target in the priority list that is available. + # If no target is found, the script exits. + target = None + ext = None + for f, e in priority: + if f in targets: + target, ext = f, e + break + + if target is None or not isinstance(target, str): + print("No supported clipboard target found.", file=sys.stderr) + sys.exit(1) + + clipfile = get_clipboard_content(target) + + save_file(clipfile, filename, output_dir, ext) + + print(f"Saved clipboard contents to {output_dir}/{filename}.{ext}") + + +if __name__ == '__main__': + date = datetime.datetime.now().isoformat() + default_filename = f"{date}_clip" + + parser = argparse.ArgumentParser( + description="Save clipboard contents to a file." + ) + # Select the directory + parser.add_argument( + '-d', '--directory', + help="Directory to save the clipboard contents to.", + default=OUTPUT_DIR, + type=str + ) + # Select the filename + parser.add_argument( + '-f', '--filename', + help="Filename to save the clipboard contents to.", + default=default_filename, + type=str + ) + + args = parser.parse_args() + + main(args.filename, args.directory) + + @@ -26,8 +26,8 @@ function guess_wifi { -if [ "$1" == 'off' ] ; then - if [ -z "$2" ] ; then +if [ "$1" == 'off' ] ; then + if [ -z "$2" ] ; then interface=$(guess_wifi '') else interface=$(guess_wifi 'all') diff --git a/dlr34.py b/dlr34.py new file mode 100755 index 0000000..5045274 --- /dev/null +++ b/dlr34.py @@ -0,0 +1,302 @@ +#!/bin/env python + +import os +import re +import requests +import sys +import csv +import lxml +import json +from urllib.parse import urlparse +from pathlib import Path +import lxml.html +import argparse + + +DEBUG_MODE = True +dirOut = Path("r34") + + +class Spinner(object): + def __init__(self): + self.SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] + self.SPINNER_status = 1 + + def __call__(self): + self.SPINNER_status = (self.SPINNER_status + 1) % len(self.SPINNER) + return self.SPINNER[self.SPINNER_status] + + +quickSpinner = Spinner() + + +def pretty_print_Request_header(req): + print(f"{req.method} {req.url}") + + for k, v in req.headers.items(): + print(f"{k}: {v}") + + +def outputFileFromURL(url): + u = urlparse(url) + name = Path(u.path).name + alreadyExists = False + + finalDest = dirOut.joinpath(Path(name)) + + i = 1 + + while finalDest.exists(): + alreadyExists = True + + s_name = Path(u.path).stem + s_suffix = Path(u.path).suffix + + next_name = f"{s_name}_{i}{s_suffix}" + i += 1 + + finalDest = dirOut.joinpath(Path(next_name)) + + return alreadyExists, str(finalDest) + + +def downloadURL(s, url, idLink, output, headers={}): + r = s.get(url, stream=True, headers=headers) + + u = urlparse(url) + + if r.ok: + if DEBUG_MODE: + print(f"\n{url} >> {output}") + + print(f"\r⬇️ {Path(u.path).name}", end='') + + try: + with open(output, "wb") as f: + print(f"\r⬇️ {quickSpinner()} {Path(u.path).name}", end='') + chunk_size = 4 * 1024 + + for chunk in r.iter_content(chunk_size=chunk_size): + f.write(chunk) + print(f"\rDownloaded {Path(u.path).name} - {idLink}") + except Exception as e: + print(f"\r☠️ error {Path(u.path).name} - {idLink}") + if DEBUG_MODE: + print(e) + +def downloadFromItems(items, s, u): + replace = { + 'mov480.': 'mov.', + 'mov720.': 'mov.', + 'mov256.': 'mov.', + 'pic256.': 'pic.', + 'pic480.': 'pic.', + 'pic720.': 'pic.', + 'picsmall.': 'pic.' + } + + for item in items: + isDownloaded = False + isVideo = item['duration'] is not None + for file in item['imageLinks']: + url = file['url'] + if isVideo and not url.endswith('.mp4'): + continue + + for r,t in replace.items(): + if r in url: + url = url.replace(r, t) + e, outputFile = outputFileFromURL(url) + if e: + print("Already downloaded") + return + + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + downloadURL(s, url, "???", outputFile, headers) + isDownloaded = True + break + if isDownloaded: + break + + +def downloadImagesFromPlaylist(index, link): + s = requests.Session() + + u = urlparse(link) + p = Path(u.path) + playlistId = p.stem + + print(f"Link n°{index} - {playlistId}") + + nbPage = 0 + while True: + urlPlaylistFormat = f"https://rule34.world/api/playlist-item?playlistId={playlistId}&Skip={nbPage*60}&Take=60&DisableTotal=true" + resp = s.get(urlPlaylistFormat) + if resp.ok and resp.status_code == 200: + data = json.loads(resp.content) + if len(data['items']) > 0: + print(f"Ok ! {len(data['items'])}") + downloadFromItems(data['items'], s, u) + else: + break + + nbPage += 1 + + return + try: + html = lxml.html.fromstring(resp.content) + boxes = html.cssselect("div.box a.boxInner") + for box in boxes: + url = box.attrib['href'] + url = f"{u.scheme}://{u.hostname}{url}" + downloadImageFromPage(index, url) + except Exception as e: + print(f"\r☠️ error {Path(u.path).name} - {idLink}") + if DEBUG_MODE: + print(e) + + +def downloadImagesFromUserboard(index, link): + s = requests.Session() + + u = urlparse(link) + p = Path(u.path) + boardName = p.stem + + print(f"Link n°{index} - {boardName}", end='') + resp = s.get(link) + + findImagesRegex = re.compile('<img class="image" src="/images/t.png" data-src="(.+)\?width=300"') + if resp.ok and resp.status_code == 200: + for subIndex, urlImg in enumerate(findImagesRegex.findall(resp.content.decode('utf8'))): + match = re.search(r'(\d{7,})', urlImg) + if not match: + return + + e, outputFile = outputFileFromURL(urlImg) + if e: + print("Already downloaded") + return + + if DEBUG_MODE: + print(f"\n{urlImg} >> {outputFile}") + + idLink = match.group(1) + print(f"Link n°{index}.{subIndex} - {boardName}>{idLink}", end='') + + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + downloadURL(s, urlImg, idLink, outputFile, headers) + + +def downloadVideoFromSource(index, urlBase, idVid, urlVideo): + s = requests.Session() + + print(f"Link n°{index} - Vidéo {idVid}", end='') + print(f"Link : {urlVideo} !", end='\n') + + + u = urlparse(urlBase) + urlVideo = urlVideo.replace('mov480', 'mov') + + e, outputFile = outputFileFromURL(urlVideo) + if e: + print("Already downloaded") + return + + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + + print(f"Download to {outputFile}") + downloadURL(s, urlVideo, idVid, outputFile, headers) + + +def downloadImageFromPage(index, link): + s = requests.Session() + match = re.search(r'(\d{6,})', link) + + if not match: + return + + idLink = match.group(1) + print(f"Link n°{index} - {idLink}\n", end='') + resp = s.get(link) + + if resp.ok and resp.status_code == 200: + try: + html = lxml.html.fromstring(resp.content) + imgs = html.cssselect("img.img.shadow-base") + + if len(imgs) == 0: + vids = html.cssselect("video.video.shadow-base source") + if len(vids) > 0: + url = vids[0].attrib['src'] + + u = urlparse(link) + if url[0] == '/': + url = f"{u.scheme}://{u.hostname}{url}" + + downloadVideoFromSource(index, link, idLink, url) + else: + img = imgs[0] + + if 'src' in img.attrib: + url = img.attrib['src'] + u = urlparse(link) + if url[0] == '/': + url = f"{u.scheme}://{u.hostname}{url}" + + url = url.replace('picsmall', 'pic') + + e, outputFile = outputFileFromURL(url) + if e: + print("Already downloaded") + return + + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + downloadURL(s, url, idLink, outputFile, headers) + + except: + print(f"\r☠️ error {Path(u.path).name} - {idLink}") + if DEBUG_MODE: + print(e) + return + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--directory", help="Output directory") + parser.add_argument('files', nargs=argparse.REMAINDER) + args = parser.parse_args() + + if args.directory: + global dirOut + dirOut = Path(args.directory) + + if not dirOut.exists(): + print(f"Output dir \"{dirOut}\" doesn't exists") + return + + if not dirOut.is_dir(): + print(f"Output dir \"{dirOut}\" isn't a directory") + return + + for file in args.files: + with open(file) as f_listLink: + for index, link in enumerate(f_listLink.read().split()): + if '/post/' in link: + downloadImageFromPage(index, link) + if '/user/' in link: + downloadImagesFromUserboard(index, link) + if '/playlists/' in link: + downloadImagesFromPlaylist(index, link) + + +if __name__ == "__main__": + main() @@ -0,0 +1,8 @@ +#!/bin/env fish + + +while read line + while not yt-dlp --abort-on-unavailable-fragment --no-playlist -R 2 "$line" + sleep 5m + end +end < "$argv[1]" diff --git a/down_imgs.py b/down_imgs.py new file mode 100755 index 0000000..5d44b05 --- /dev/null +++ b/down_imgs.py @@ -0,0 +1,226 @@ +#!/bin/env python + +import os +import re +import requests +import sys +import csv +import lxml +from urllib.parse import urlparse +from pathlib import Path +import lxml.html +import argparse + + +DEBUG_MODE = False +dirOut = Path("ss") + + +class Spinner(object): + def __init__(self): + self.SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] + self.SPINNER_status = 1 + + def __call__(self): + self.SPINNER_status = (self.SPINNER_status + 1) % len(self.SPINNER) + return self.SPINNER[self.SPINNER_status] + + +quickSpinner = Spinner() + + +def pretty_print_Request_header(req): + print(f"{req.method} {req.url}") + + for k, v in req.headers.items(): + print(f"{k}: {v}") + + +def outputFileFromURL(url): + u = urlparse(url) + name = Path(u.path).name + + finalDest = dirOut.joinpath(Path(name)) + + i = 1 + + while finalDest.exists(): + s_name = Path(u.path).stem + s_suffix = Path(u.path).suffix + + next_name = f"{s_name}_{i}{s_suffix}" + i += 1 + + finalDest = dirOut.joinpath(Path(next_name)) + + return str(finalDest) + + +def downloadURL(s, url, idLink, output, headers={}): + r = s.get(url, stream=True, headers=headers) + + if DEBUG_MODE: + pretty_print_Request_header(r.request) + + u = urlparse(url) + + if r.ok: + print(f"\r⬇️ {Path(u.path).name}", end='') + + try: + with open(output, "wb") as f: + print(f"\r⬇️ {quickSpinner()} {Path(u.path).name}", end='') + chunk_size = 4 * 1024 + + for chunk in r.iter_content(chunk_size=chunk_size): + f.write(chunk) + print(f"\rDownloaded {Path(u.path).name} - {idLink}") + except Exception as e: + print(f"\r☠️ error {Path(u.path).name} - {idLink}") + if DEBUG_MODE: + print(e) + + +def downloadImagesFromUserboard(index, link): + s = requests.Session() + + u = urlparse(link) + p = Path(u.path) + boardName = p.stem + + print(f"Link n°{index} - {boardName}", end='') + resp = s.get(link) + + if DEBUG_MODE: + pretty_print_Request_header(resp.request) + + findImagesRegex = re.compile('<img class="image" src="/images/t.png" data-src="(.+)\?width=300"') + if resp.ok and resp.status_code == 200: + for subIndex, urlImg in enumerate(findImagesRegex.findall(resp.content.decode('utf8'))): + match = re.search(r'(\d{7,})', urlImg) + if not match: + return + + outputFile = outputFileFromURL(urlImg) + if DEBUG_MODE: + print(f"\n{urlImg} >> {outputFile}") + + idLink = match.group(1) + print(f"Link n°{index}.{subIndex} - {boardName}>{idLink}", end='') + + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + downloadURL(s, urlImg, idLink, outputFile, headers) + + +def downloadVideoFromScript(index, urlBase, idVid, scriptElem): + s = requests.Session() + + print(f"Link n°{index} - Vidéo {idVid}", end='') + + urlRegex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' + scriptHTML = lxml.html.tostring(scriptElem).decode('utf8') + startsWithCode = 'player.updateSrc([\n' + + if scriptHTML.index(startsWithCode) >= 0: + startIndex = scriptHTML.index(startsWithCode) + scriptHTML = scriptHTML[startIndex + len(startsWithCode):] + + match = re.search(urlRegex, scriptHTML) + if not match: + print(f"No found in {scriptHTML}") + return + + urlVideo = match.group(0) + print(f"Link : {urlVideo} !", end='\n') + + if '\'' in urlVideo: + urlVideo = urlVideo[:urlVideo.index('\'')] + + outputFile = outputFileFromURL(urlVideo) + + u = urlparse(urlBase) + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + + print(f"Download to {outputFile}") + downloadURL(s, urlVideo, idVid, outputFile, headers) + + +def downloadImageFromPage(index, link): + s = requests.Session() + match = re.search(r'(\d{7,})', link) + + if not match: + return + + idLink = match.group(1) + print(f"Link n°{index} - {idLink}", end='') + resp = s.get(link) + + if resp.ok and resp.status_code == 200: + try: + html = lxml.html.fromstring(resp.content) + imgs = html.cssselect(".image_frame img") + + if len(imgs) == 0: + vids = html.cssselect(".image_frame video") + vid_script = html.cssselect(".image_frame script") + if len(vids) > 0: + print(": it's a video !") + downloadVideoFromScript(index, link, idLink, vid_script[-1]) + else: + img = imgs[0] + + if DEBUG_MODE: + print(lxml.html.tostring(img)) + + if 'src' in img.attrib: + url = img.attrib['src'] + outputFile = outputFileFromURL(url) + + if DEBUG_MODE: + print(f"\n{url} >> {outputFile}") + + u = urlparse(link) + + headers = {"Referer": f"{u.scheme}://{u.hostname}/"} + headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + headers['Accept-encoding'] = "gzip, deflate, br" + downloadURL(s, url, idLink, outputFile, headers) + + except: + return + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--directory", help="Output directory") + parser.add_argument('files', nargs=argparse.REMAINDER) + args = parser.parse_args() + + if args.directory: + global dirOut + dirOut = Path(args.directory) + + if not dirOut.exists(): + print(f"Output dir \"{dirOut}\" doesn't exists") + return + + if not dirOut.is_dir(): + print(f"Output dir \"{dirOut}\" isn't a directory") + return + + for file in args.files: + with open(file) as f_listLink: + for index, link in enumerate(f_listLink.read().split()): + if '/pin/' in link: + downloadImageFromPage(index, link) + if '/user/' in link: + downloadImagesFromUserboard(index, link) + + +if __name__ == "__main__": + main() diff --git a/pyhttpd.py b/pyhttpd.py new file mode 100644 index 0000000..78bd156 --- /dev/null +++ b/pyhttpd.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +"""Use this instead of `python3 -m http.server` when you need CORS""" + +import argparse +from http.server import HTTPServer, SimpleHTTPRequestHandler +from pathlib import Path + + +class CORSRequestHandler(SimpleHTTPRequestHandler): + def end_headers(self): + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', '*') + self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate') + return super(CORSRequestHandler, self).end_headers() + + def do_POST(self): + # Redirect POST to the correct endpoint + if self.path.startswith('/like'): + self.send_response(307) + self.send_header('Location', f'http://localhost:3000{self.path}') + self.end_headers() + + + def do_GET(self): + # Redirect likes to the correct endpoint + if self.path.startswith('/like'): + self.send_response(302) + self.send_header('Location', f'http://localhost:3000{self.path}') + self.end_headers() + elif self.path == '/': + self.path = '/fr/index.html' + else: + path = Path('./' + self.path) + # Redirect to index + + print(self.path) + print(path) + if path.is_dir(): + self.path += '/index.html' + else: + # If it has not an extension add .html + if not path.suffix: + self.path += '.html' + + print(self.path) + return super(CORSRequestHandler, self).do_GET() + + def send_error(self, code, message=None): + if code == 404: + if self.path.endswith('/'): + pass + elif not self.path.endswith('.html'): + self.code = 304 + self.redirect_to = self.path + '.html' + self.path += '.html' + self.end_headers() + SimpleHTTPRequestHandler.send_error(self, code, message) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Automaticaly set up a HTTP server') + parser.add_argument('-H', '--host', type=str, default='0.0.0.0', help='Host to serve on') + parser.add_argument('-p', '--port', type=int, default=8080, help='Port to serve on') + args = parser.parse_args() + + host, port = args.host, args.port + + # Print the server's host and port + print(f"Opening http://{host}:{port}", end='') + + httpd = HTTPServer((host, port), CORSRequestHandler) + print(f"\rServed on http://{host}:{port}") + + httpd.serve_forever() |