34 lines
864 B
Python
34 lines
864 B
Python
# -*- encoding:utf-8 -*-
|
|
|
|
'''
|
|
@Author : dingjiawen
|
|
@Date : 2024/03/27 14:00
|
|
@Usage :
|
|
@Desc :爬取起点 指定章节的指定详情页
|
|
'''
|
|
|
|
import requests
|
|
from pyquery import PyQuery as pq
|
|
|
|
with open(f"./test1.html", 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
|
|
doc = pq(content)
|
|
result =[]
|
|
for item in doc('.chapter-item a').items():
|
|
strs = item.attr('href').split('/')
|
|
chapter_id, book_id = strs[-2], strs[-3]
|
|
result.append((book_id,chapter_id))
|
|
|
|
|
|
# title = doc('.chapter-item')
|
|
# with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
|
|
# for line in doc('.content p').items():
|
|
# file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符
|
|
|
|
|
|
# if __name__ == '__main__':
|
|
# book_id = 1031940621
|
|
# chapter_id = 705235484
|
|
# crawel_detail(book_id=book_id, chatpter_id=chapter_id)
|