self_example/Spider/Chapter11_JavaScript逆向/JS逆向实战/token_decode.py

56 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 20:32
@Usage : 使用Python实现列表页的爬取
@Desc : 使用逆向分析之后已经基本确定了token的生成逻辑
- 将`/api/movie `放到一个列表里
- 在列表中加人当前时间戳;
- 将列表内容用逗号拼接;
- 将拼接的结果进行 SHA1 编码;
- 将编码的结果和时间戳再次拼接;
- 将拼接后的结果进行 Base64 编码。
'''
import hashlib
import time
import base64
from typing import Any, List
import requests
BASE_URL = 'https://spa6.scrape.center'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
DETAIL_URL = BASE_URL + "/api/movie/{id}?token={token}"
SECRET = 'ef34#teuq0btua#(-57w1q5o5--j@98xygimlyfxs*-!i-0-mb'
MAX_PAGE = 10
LIMIT = 10
def generate_token(args: List[Any]):
cur_time = str(int(time.time()))
args.append(cur_time)
sign = hashlib.sha1(','.join(args).encode('utf-8')).hexdigest()
return base64.b64encode(','.join([sign, cur_time]).encode('utf-8')).decode('utf-8')
# 爬取列表页(这里暂时只爬取)
for i in range(MAX_PAGE):
args = ['/api/movie']
token = generate_token(args)
OFFSET = i * LIMIT
url = INDEX_URL.format(limit=LIMIT, offset=OFFSET, token=token)
response = requests.get(url)
print('response:', response.json())
result = response.json()
# 爬取详情页
for movie in result['results']:
id = movie['id']
encrypt_id = base64.b64encode((SECRET + str(id)).encode('utf-8')).decode('utf-8')
args = [f'/api/movie/{encrypt_id}']
detail_url = DETAIL_URL.format(id=encrypt_id, token=generate_token(args))
response = requests.get(detail_url)
print(response.json())