阅读量:0
import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor # url = 'https://www.umei.cc/meinvtupian/meinvxiezhen/' def down(url): headers = { 'authority': 'www.umei.cc', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'max-age=0', 'referer': 'https://www.umei.cc/bizhitupian/', 'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36', } response = requests.get(url, cookies=cookies, headers=headers) response.encoding="utf-8" soup =BeautifulSoup(response.text,'lxml') photos = soup.select("div.title a") for p in photos: p_url = p['href'].split("=")[-1] pnurl = "https://www.umei.cc/"+p_url p_name =p.text print(pnurl,p_name) res = requests.get(url=pnurl,cookies=cookies, headers=headers) res.encoding=("utf-8"), soup = BeautifulSoup(res.text, 'lxml') pp = soup.select("div.big-pic a img")[-1] pp_url=pp.get('src') res1 = requests.get(url=pp_url, cookies=cookies, headers=headers) open(f'img/{p_name}.jpg','wb').write(res1.content) if __name__ == "__main__": with ThreadPoolExecutor(10) as t: for i in range(1,217): url = f'https://www.umei.cc/meinvtupian/rentiyishu/index_{i}.htm' t.submit(down,url)