self_example/Spider/spider_practice/起点/test.py

25 lines
504 B
Python

# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 14:49
@Usage :
@Desc :尝试使用PyQuery爬取test.html
'''
from pyquery import PyQuery as pq
with open("./test.html", 'r', encoding='utf-8') as f:
html_content = f.read()
doc = pq(html_content)
title = doc('.title')[0]
with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
for line in doc('.content p').items():
file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符