# -*- encoding:utf-8 -*- ''' @Author : dingjiawen @Date : 2024/03/27 15:33 @Usage : @Desc : 正式爬取起点书籍 ''' import requests from pyquery import PyQuery as pq from crawel_detail import crawel_detail ''' 观察请求,可以发现,有两种方式获得chapter_id ''' # url = 'https://www.qidian.com/ajax/book/category?_csrfToken={_csrfToken}&bookId={bookId}&w_tsfp={w_tsfp}' url = 'https://www.qidian.com/book/1031940621/' header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Cookie': '_yep_uuid=59404ab0-2696-4162-b763-1256a5ca1dca; e1=%7B%22l6%22%3A%221%22%2C%22l1%22%3A%22%22%2C%22pid%22%3A%22qd_P_xiangqing%22%2C%22eid%22%3A%22%22%7D; e2=%7B%22l6%22%3A%221%22%2C%22l1%22%3A%22%22%2C%22pid%22%3A%22qd_P_xiangqing%22%2C%22eid%22%3A%22%22%7D; newstatisticUUID=1669693998_1518827460; _csrfToken=9VUvhprKzOz80xLUYXqgOzIcm011iQ9DfhwSyUD2; Hm_lvt_f00f67093ce2f38f215010b699629083=1701842016; supportwebp=true; supportWebp=true; _ga=GA1.1.67407022.1669694001; _ga_FZMMH98S83=GS1.1.1701860853.4.1.1701860855.0.0.0; _ga_PFYW0QLV3P=GS1.1.1701860853.4.1.1701860855.0.0.0; fu=923381569; trkf=1; traffic_utm_referer=https%3A//cn.bing.com/; w_tsfp=ltvgWVEE2utBvS0Q6KvslUKvEj87Z2R7xFw0D+M9Os09AaYjV5iM2IZ+utfldCyCt5Mxutrd9MVxYnGAU9QgexgdRcSYb5tH1VPHx8NlntdKRQJtA5KOD1McdbpzvTJCL24LIRDu3mt3ItRJmONgj14K5yZ137ZlCa8hbMFbixsAqOPFm/97DxvSliPXAHGHM3wLc+6C6rgv8LlSgW2DugDuLi11A7lD2UGS0yoeG3pV8w2pJbsDal7wcpK9Uv8wrTPzwjn3apCs2RYj4VA3sB49AtX02TXKL3ZEIAtrZUqukO18Lv3wdaN4qzsLX/hITghGqlkd5usw+EBJWXnsZSOLAf8r4wEEQ/JcrZ6+NA==', 'Host': 'www.qidian.com', 'Pragma': 'no-cache', # TODO refer是防盗链,即访问当前请求的上一级,确保访问当前页面是合理的 'Referer': 'https://www.qidian.com/all/', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', } response = requests.get(url, headers=header) doc = pq(response.text) result =[] for item in doc('.chapter-item a').items(): strs = item.attr('href').split('/') chapter_id, book_id = strs[-2], strs[-3] # result.append((book_id,chapter_id)) crawel_detail(book_id,chatpter_id=chapter_id)