self_example/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/main.py

53 lines
1.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 18:59
@Usage :
@Desc :使用playwright跳过加密逻辑爬取网站
'''
from playwright.sync_api import sync_playwright
import time
import requests
BASE_URL= 'https://spa6.scrape.center/'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
MAX_PAGE = 10
LIMIT = 10
content = sync_playwright().start()
browser = content.chromium.launch()
page = browser.new_page()
# 注意这里路径需要加上**
page.route(
"**/js/chunk-19c920f8.c3a1129d.js",
lambda route: route.fulfill(path='chunk.js')
)
page.goto(BASE_URL, wait_until='networkidle')
def get_token():
# page.wait_for_function('window.encrypt !== undefined')
try:
result = page.evaluate('''() => {
console.log('window',window)
console.log('encrypt',window.encrypt)
return window.encrypt("%s")
}''' % ('/api/movie'))
return result
except Exception as e:
time.sleep(100)
print(e)
def get_key():
pass
for i in range(MAX_PAGE):
offset = LIMIT*i
result = requests.get(INDEX_URL.format(limit=LIMIT,offset=offset,token=get_token()))
print(result.text)
# 到这里已经基本可以爬出了但是对于详情页的key爬取逻辑有点逆向不出来无论是关键字还是啥都断不住