婆罗门
精华
|
战斗力 鹅
|
回帖 0
注册时间 2012-1-30
|
本帖最后由 革萌 于 2021-1-12 22:12 编辑
基本上Cookie那一行换成自己的就行
- nas_url = "http://192.168.50.136:8888/transfers/action"
复制代码 这一行需要改成自己的tixati的webui地址,session.verify视自己的网络情况看是否需要。(感谢一楼提醒)
解释一下哈 这个脚本是需要随便写个守护进程来周期性运行的,比如每小时执行一次,自动把最下面url列表里的url抓一边,把抓到的种子丢到下载器去下载。
不过我跑了一个月,下了几百G完全不想看的图集,最后硬盘还坏了
比如把python代码存成ex.py的话,随便写个bat
- :loop
- python ex.py
- sleep 3600
- goto loop
复制代码
忘了说,下载用的是tixati,这个bt下载器带一个web ui,换成aria2也是没啥问题的,只是需要改一下提交下载任务的代码
- import requests
- import bs4
- import re
- import sqlite3
- from datetime import datetime
- from requests.packages.urllib3.util.retry import Retry
- from requests.adapters import HTTPAdapter
- def get_time():
- now = datetime.now() # current date and time
- return now.strftime("%m/%d/%Y, %H:%M:%S")
- conn = sqlite3.connect('exhentai.db')
- c = conn.cursor()
- c.execute("""CREATE TABLE IF NOT EXISTS gallerys (
- name TEXT NOT NULL,
- url TEXT NOT NULL,
- gid TEXT PRIMARY KEY NOT NULL,
- tor_url TEXT,
- modified_date TEXT
- )""")
- c.execute("""CREATE TABLE IF NOT EXISTS gallerys_tor (
- gid TEXT NOT NULL,
- tor_url TEXT,
- hash TEXT PRIMARY KEY NOT NULL,
- filename TEXT,
- seeds INTEGER ,
- file_size REAL,
- upload_date TEXT,
- modified_date TEXT
- )""")
- c.execute("""CREATE TABLE IF NOT EXISTS tor_task (
- hash TEXT PRIMARY KEY NOT NULL,
- url TEXT,
- status TEXT,
- modified_date TEXT
- )""")
- headers = {
- "Connection": "keep-alive",
- "Cache-Control": "max-age=0",
- "Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
- "Sec-Fetch-Site": "same-origin",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-User": "?1",
- "Sec-Fetch-Dest": "document",
- "Referer": "https://exhentai.org/",
- "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
- "Cookie": "ipb_member_id=; ipb_pass_hash=; igneous=; sl=dm_1; sk=; u="
- }
- session = requests.Session()
- session.keep_alive = False
- session.verify = 'GotoXCA.crt'
- retry = Retry(connect=30, backoff_factor=0.5)
- adapter = HTTPAdapter(max_retries=retry)
- session.mount('http://', adapter)
- session.mount('https://', adapter)
- def addTask(tor_file_url):
- print("下载种子 " + tor_file_url)
- hash = re.search("https://exhentai\.org/torrent/\d+/([\da-z]+)\.torrent\?p=[\da-z-]+", tor_file_url).group(1)
- c.execute("select count(1) from tor_task where hash = '"+hash+"'")
- result=c.fetchone()
- if(result[0]>0):
- return
- r = session.get(tor_file_url, headers=headers)
- if (r.status_code == 200):
- nas_url = "http://192.168.50.136:8888/transfers/action"
- files = {'metafile': r.content}
- data = {'noautostart': "0", "addmetafile": "Add"}
- add_res=session.post(nas_url, files=files, data=data)
- if(add_res == 200):
- print("添加任务成功")
- c.execute( "insert into tor_task (hash,url,status,modified_date) VALUES(?, ?, ?,?) ", (hash,tor_file_url,"Add", get_time()))
- conn.commit()
- def processTor(tor_page_url):
- print("process "+tor_page_url)
- r = session.get(tor_page_url, headers=headers)
- if(r.status_code==200):
- gid_1 = re.search("https://exhentai\.org/gallerytorrents\.php\?gid=(\d+)", tor_page_url).group(1)
- doc = bs4.BeautifulSoup(r.text,features="html.parser")
- torlist=[]
- for f in doc.select("form"):
- if(len(f.select("div table tr td")))>3 and f.select("div table tr")[2].select_one("td a") != None:
- tor_url = re.search("document\.location='([^']+)",f.select("div table tr")[2].select_one("td a")["onclick"]).group(1)
- tor_filename = f.select("div table tr")[2].select_one("td a").text
- tor_upload_date = re.search("Posted:\s([\d\s:-]+)",f.select("div table tr td")[0].text).group(1)
- tor_size = re.search("Size:\s(.+)",f.select("div table tr td")[1].text).group(1)
- tor_seed = re.search("Seeds:\s(\d+)",f.select("div table tr td")[3].text).group(1)
- hash = re.search("https://exhentai\.org/torrent/\d+/([\da-z]+)\.torrent\?p=[\da-z-]+",tor_url).group(1)
- torlist.append({"upload_time":tor_upload_date,"tor_url":tor_url})
- c.execute("replace into gallerys_tor (gid,tor_url,seeds,file_size,upload_date,modified_date,hash,filename) VALUES(?, ?, ?, ?,?,?,?,?) ", ( gid_1, tor_url,tor_seed,tor_size, tor_upload_date , get_time(),hash,tor_filename))
- conn.commit()
- addTask(sorted(torlist, key=lambda i: datetime.strptime(i['upload_time'],"%Y-%m-%d %H:%M"))[-1]["tor_url"])
- def processPage(page_url):
- print("处理页面 "+page_url)
- r = session.get(page_url, headers=headers)
- if (r.status_code == 200):
- doc = bs4.BeautifulSoup(r.text,features="html.parser")
- file_entrys = doc.select(".gl1t")
- for file_entry in file_entrys:
- name = file_entry.select_one(".gl3t a img")['title']
- url = file_entry.select_one(".gl3t a")['href']
- gid = re.search("https://exhentai\.org/g/(\d+)", url).group(1)
- tor_url_a = file_entry.select_one(".gldown a")
- if tor_url_a is None:
- tor_url = ""
- else:
- tor_url = re.search("return\spopUp\('([^']+)", tor_url_a['onclick']).group(1)
- processTor(tor_url)
- c.execute("replace into gallerys VALUES(?, ?, ?, ?,?) ", (name, url, gid, tor_url, get_time()))
- conn.commit()
- def processTask(url_arr):
- for url in url_arr:
- processPage(url)
- watch_urls=[
- "https://exhentai.org/watched?page=11",
- "https://exhentai.org/watched?page=10",
- "https://exhentai.org/watched?page=9",
- "https://exhentai.org/watched?page=8",
- "https://exhentai.org/watched?page=7",
- "https://exhentai.org/watched?page=6",
- "https://exhentai.org/watched?page=5",
- "https://exhentai.org/watched?page=4",
- "https://exhentai.org/watched?page=3",
- "https://exhentai.org/watched?page=2",
- "https://exhentai.org/watched?page=1",
- "https://exhentai.org/watched"
- ]
- pop_urls = [
- "https://exhentai.org/popular"
- ]
- processTask(pop_urls)
- processTask(watch_urls)
复制代码
|
|