# -*- encoding:utf-8 -*- ''' @Author : dingjiawen @Date : 2024/03/26 20:32 @Usage : 使用Python实现列表页的爬取 @Desc : 使用逆向分析之后,已经基本确定了token的生成逻辑: - 将`/api/movie `放到一个列表里 - 在列表中加人当前时间戳; - 将列表内容用逗号拼接; - 将拼接的结果进行 SHA1 编码; - 将编码的结果和时间戳再次拼接; - 将拼接后的结果进行 Base64 编码。 ''' import hashlib import time import base64 from typing import Any, List import requests BASE_URL = 'https://spa6.scrape.center' INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}" DETAIL_URL = BASE_URL + "/api/movie/{id}?token={token}" SECRET = 'ef34#teuq0btua#(-57w1q5o5--j@98xygimlyfxs*-!i-0-mb' MAX_PAGE = 10 LIMIT = 10 def generate_token(args: List[Any]): cur_time = str(int(time.time())) args.append(cur_time) sign = hashlib.sha1(','.join(args).encode('utf-8')).hexdigest() return base64.b64encode(','.join([sign, cur_time]).encode('utf-8')).decode('utf-8') # 爬取列表页(这里暂时只爬取) for i in range(MAX_PAGE): args = ['/api/movie'] token = generate_token(args) OFFSET = i * LIMIT url = INDEX_URL.format(limit=LIMIT, offset=OFFSET, token=token) response = requests.get(url) print('response:', response.json()) result = response.json() # 爬取详情页 for movie in result['results']: id = movie['id'] encrypt_id = base64.b64encode((SECRET + str(id)).encode('utf-8')).decode('utf-8') args = [f'/api/movie/{encrypt_id}'] detail_url = DETAIL_URL.format(id=encrypt_id, token=generate_token(args)) response = requests.get(detail_url) print(response.json())