self_example/Spider/Chapter04_数据存储/saveText.py

39 lines
1011 B
Python

#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 14:08
@Usage :
@Desc :保存为Text
'''
import requests
from pyquery import PyQuery as pq
import re
url = 'https://ssr1.scrape.center/'
html = requests.get(url).text
doc = pq(html)
items = doc('.el-card').items()
file = open('movies.txt', 'w', encoding='utf-8')
for item in items:
# 名称
name = item.find('a > h2').text()
file.write(f'名称: {name}\n')
# 类别
categories = [item.text() for item in item.find('.categories button span').items()]
file.write(f'类别: {categories}\n')
# 上映时间
published_at = item.find('.info:contains(上映)').text()
published_at = re.search('(\d{4}-\d{2}-\d{2})', published_at).group(1) \
if published_at and re.search('\d{4}-\d{2}-\d{2}', published_at) else None
file.write(f'上映时间: {published_at}\n')
# 评分
score = item.find('p.score').text()
file.write(f'评分: {score}\n')
file.write(f'{"=" * 50}\n')
file.close()