self_example/Spider/Chapter11_JavaScript逆向/JS逆向实战/token_decode.py

# -*- encoding:utf-8 -*-

'''
@Author : dingjiawen
@Date : 2024/03/26 20:32
@Usage : 使用Python实现列表页的爬取
@Desc : 使用逆向分析之后，已经基本确定了token的生成逻辑：
            - 将`/api/movie `放到一个列表里
            - 在列表中加人当前时间戳;
            - 将列表内容用逗号拼接;
            - 将拼接的结果进行 SHA1 编码;
            - 将编码的结果和时间戳再次拼接;
            - 将拼接后的结果进行 Base64 编码。
'''

import hashlib
import time
import base64
from typing import Any, List

import requests

BASE_URL = 'https://spa6.scrape.center'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
DETAIL_URL = BASE_URL + "/api/movie/{id}?token={token}"
SECRET = 'ef34#teuq0btua#(-57w1q5o5--j@98xygimlyfxs*-!i-0-mb'
MAX_PAGE = 10
LIMIT = 10


def generate_token(args: List[Any]):
    cur_time = str(int(time.time()))
    args.append(cur_time)
    sign = hashlib.sha1(','.join(args).encode('utf-8')).hexdigest()
    return base64.b64encode(','.join([sign, cur_time]).encode('utf-8')).decode('utf-8')


# 爬取列表页(这里暂时只爬取)
for i in range(MAX_PAGE):
    args = ['/api/movie']
    token = generate_token(args)
    OFFSET = i * LIMIT
    url = INDEX_URL.format(limit=LIMIT, offset=OFFSET, token=token)
    response = requests.get(url)
    print('response:', response.json())
    result = response.json()

    # 爬取详情页
    for movie in result['results']:
        id = movie['id']
        encrypt_id = base64.b64encode((SECRET + str(id)).encode('utf-8')).decode('utf-8')
        args = [f'/api/movie/{encrypt_id}']
        detail_url = DETAIL_URL.format(id=encrypt_id, token=generate_token(args))
        response = requests.get(detail_url)
        print(response.json())