25 lines
504 B
Python
25 lines
504 B
Python
# -*- encoding:utf-8 -*-
|
|
|
|
'''
|
|
@Author : dingjiawen
|
|
@Date : 2024/03/27 14:49
|
|
@Usage :
|
|
@Desc :尝试使用PyQuery爬取test.html
|
|
'''
|
|
from pyquery import PyQuery as pq
|
|
|
|
with open("./test.html", 'r', encoding='utf-8') as f:
|
|
html_content = f.read()
|
|
doc = pq(html_content)
|
|
title = doc('.title')[0]
|
|
|
|
|
|
with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
|
|
for line in doc('.content p').items():
|
|
file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符
|
|
|
|
|
|
|
|
|
|
|