# -*- encoding:utf-8 -*- ''' @Author : dingjiawen @Date : 2024/03/27 14:00 @Usage : @Desc :爬取起点 指定章节的指定详情页 ''' import requests from pyquery import PyQuery as pq with open(f"./test1.html", 'r', encoding='utf-8') as file: content = file.read() doc = pq(content) result =[] for item in doc('.chapter-item a').items(): strs = item.attr('href').split('/') chapter_id, book_id = strs[-2], strs[-3] result.append((book_id,chapter_id)) # title = doc('.chapter-item') # with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file: # for line in doc('.content p').items(): # file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符 # if __name__ == '__main__': # book_id = 1031940621 # chapter_id = 705235484 # crawel_detail(book_id=book_id, chatpter_id=chapter_id)