# -*- encoding:utf-8 -*- ''' @Author : dingjiawen @Date : 2024/03/27 14:00 @Usage : @Desc :爬取起点 指定章节的指定详情页 ''' import requests from pyquery import PyQuery as pq def crawel_detail(book_id, chatpter_id): url = f'https://www.qidian.com/chapter/{book_id}/{chatpter_id}/' header = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", "Cookie": "newstatisticUUID=1669693998_1518827460; _csrfToken=9VUvhprKzOz80xLUYXqgOzIcm011iQ9DfhwSyUD2; Hm_lvt_f00f67093ce2f38f215010b699629083=1701842016; supportwebp=true; supportWebp=true; _ga=GA1.1.67407022.1669694001; _ga_FZMMH98S83=GS1.1.1701860853.4.1.1701860855.0.0.0; _ga_PFYW0QLV3P=GS1.1.1701860853.4.1.1701860855.0.0.0; fu=923381569; traffic_utm_referer=https%3A%2F%2Fcn.bing.com%2F; trkf=1; w_tsfp=ltvgWVEE2utBvS0Q6KvslUKvEj87Z2R7xFw0D+M9Os09AaYjU5yD14d5vdfldCyCt5Mxutrd9MVxYnGAU9QkfxcSQs+Qb5tH1VPHx8NlntdKRQJtA5KOD1McdbpzvTJCL24LIRDu3mt3ItRJmONgj14K5yZ137ZlCa8hbMFbixsAqOPFm/97DxvSliPXAHGHM3wLc+6C6rgv8LlSgWyEtBu/eRlhAcxD0EaT3CwfCHoi9BPNc+lYNx+oJ8arTe9Gvy/hk2upNdLxiEox60I3sB49AtX02TXKL3ZEIAtrZViygr4ke66rNuYluTEZXL5TWwpN/FxC9qdk605dDi69YCeHAq555FZRF/pYrMuocHqW0JPrcltbvN4px1kl9g==", "Host": "www.qidian.com", "Pragma": "no-cache", "Referer": f"https://www.qidian.com/book/{book_id}/", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", "sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "Windows" } response = requests.get(url, headers=header) doc = pq(response.text) title = doc('.title')[0] with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file: for line in doc('.content p').items(): file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符 print(f"爬取成功【{title.text}】") if __name__ == '__main__': book_id = 1031940621 chapter_id = 705235484 crawel_detail(book_id=book_id, chatpter_id=chapter_id)