20240325爬虫更新JS逆向
This commit is contained in:
parent
4a1d6ad72b
commit
50902dd83a
|
|
@ -30,7 +30,7 @@ def scrape_api(url):
|
|||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
logging.error('get invalid status code %s while scraping %s',
|
||||
logging.error('get invalid status codes %s while scraping %s',
|
||||
response.status_code, url)
|
||||
except requests.RequestException:
|
||||
logging.error('error occurred while scraping %s', url, exc_info=True)
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ with sync_playwright() as p:
|
|||
page.wait_for_load_state('networkidle')
|
||||
browser.close()
|
||||
|
||||
获取页面源代码
|
||||
# 获取页面源代码
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2024/03/21 13:48
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,22 @@
|
|||
const CryptoJS = require("./crypto")
|
||||
|
||||
function getToken(player) {
|
||||
let key = CryptoJS.enc.Utf8.parse("fipFfVsZsTda94hJNKJfLoaqyqMZFFimwLt")
|
||||
const {name, birthday, height, weight} = player
|
||||
let base64Name = CryptoJS.enc.Base64.stringify(CryptoJS.enc.Utf8.parse(name))
|
||||
let encrypted = CryptoJS.DES.encrypt(`${base64Name}${birthday}${height}${weight}`, key, {
|
||||
mode: CryptoJS.mode.ECB,
|
||||
padding: CryptoJS.pad.Pkcs7
|
||||
})
|
||||
return encrypted.toString()
|
||||
}
|
||||
|
||||
const player = {
|
||||
"name": "凯文-杜兰特",
|
||||
"image": "durant.png",
|
||||
"birthday": "1988-09-29",
|
||||
"height": "208cm",
|
||||
"weight": "108.9KG"
|
||||
}
|
||||
|
||||
console.log(getToken(player))
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
const CryptoJS = require("./crypto")
|
||||
const express = require("express")
|
||||
const app = express();
|
||||
const port = 3000;
|
||||
app.use(express.json());
|
||||
|
||||
function getToken(player) {
|
||||
let key = CryptoJS.enc.Utf8.parse("fipFfVsZsTda94hJNKJfLoaqyqMZFFimwLt")
|
||||
const {name, birthday, height, weight} = player
|
||||
let base64Name = CryptoJS.enc.Base64.stringify(CryptoJS.enc.Utf8.parse(name))
|
||||
let encrypted = CryptoJS.DES.encrypt(`${base64Name}${birthday}${height}${weight}`, key, {
|
||||
mode: CryptoJS.mode.ECB,
|
||||
padding: CryptoJS.pad.Pkcs7
|
||||
})
|
||||
return encrypted.toString()
|
||||
}
|
||||
|
||||
app.post("/", (req, res) => {
|
||||
const data = req.body;
|
||||
console.log(data)
|
||||
res.send(getToken(data));
|
||||
});
|
||||
|
||||
app.listen(port, () => {
|
||||
console.log(`Example app listening on port ${port}!`)
|
||||
})
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2024/03/21 14:23
|
||||
@Usage :
|
||||
@Desc :使用python调用Node的服务
|
||||
'''
|
||||
|
||||
import requests
|
||||
|
||||
data = {
|
||||
"name": "凯文-杜兰特",
|
||||
"image": "durant.png",
|
||||
"birthday": "1988-09-29",
|
||||
"height": "208cm",
|
||||
"weight": "108.9KG"
|
||||
}
|
||||
|
||||
url = 'http://localhost:3000'
|
||||
response = requests.post(url, json=data)
|
||||
print(response.text)
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"presets": [
|
||||
"@babel/preset-env"
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import {parse} from "@babel/parser";
|
||||
import fs from "fs";
|
||||
|
||||
|
||||
const code = fs.readFileSync("codes/code1.js", "utf8");
|
||||
let ast = parse(code);
|
||||
// console.log(ast)
|
||||
// console.log(ast.program.body)
|
||||
|
||||
|
||||
import CodeGenerator from "@babel/generator";
|
||||
|
||||
|
||||
const {code: output} = CodeGenerator.default(ast,{
|
||||
retainLines:true,
|
||||
});
|
||||
console.log(output)
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
import {parse} from "@babel/parser"
|
||||
import CodeGenerator from "@babel/generator"
|
||||
import fs from "fs"
|
||||
import {traverse} from "@babel/core";
|
||||
|
||||
|
||||
const code = fs.readFileSync("codes/code1.js", "utf-8");
|
||||
let ast = parse(code)
|
||||
// traverse(ast, {
|
||||
// enter(path) {
|
||||
// let node = path.node
|
||||
// if (node.type === "NumericLiteral" && node.value === 3) {
|
||||
// node.value = 5;
|
||||
// }
|
||||
// if (node.type === "StringLiteral" && node.value === "hello") {
|
||||
// node.value = "hi";
|
||||
// }
|
||||
// },
|
||||
// });
|
||||
|
||||
// traverse(ast, {
|
||||
// NumericLiteral(path) {
|
||||
//
|
||||
// if (path.node.value === 3) {
|
||||
// path.node.value = 5;
|
||||
// }
|
||||
//
|
||||
// },
|
||||
// StringLiteral(path) {
|
||||
// if (path.node.value === "hello") {
|
||||
// path.node.value = "hi";
|
||||
// }
|
||||
// },
|
||||
// });
|
||||
|
||||
traverse(ast, {
|
||||
CallExpression(path) {
|
||||
let node = path.node;
|
||||
if (node.callee.object.name === "console" &&node.callee.property.name === "log") {
|
||||
path.remove();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const {code: output} = CodeGenerator.default(ast, {
|
||||
retainLinesL: true,
|
||||
});
|
||||
console.log(output)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
const a = 3;
|
||||
let string = "hello";
|
||||
for (let i = 0; i < a; i++) {
|
||||
string += "world";
|
||||
}
|
||||
console.log("string", string);
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "learn-ast",
|
||||
"version": "1.0.0",
|
||||
"description": "学习ast",
|
||||
"main": "index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.24.1",
|
||||
"@babel/core": "^7.24.3",
|
||||
"@babel/preset-env": "^7.24.3"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
//变量混淆
|
||||
|
||||
// const code = `
|
||||
// const codes = `
|
||||
// let x = '1'+1
|
||||
// console.log('x',x)
|
||||
// `
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2024/03/21 15:55
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,204 @@
|
|||
(window["webpackJsonp"] = window["webpackJsonp"] || []).push([["chunk-10192a00"], {
|
||||
"5a19": function(t, a, e) {},
|
||||
"79ee": function(t, a, e) {},
|
||||
ca9c: function(t, a, e) {
|
||||
"use strict";
|
||||
var s = e("5a19")
|
||||
, n = e.n(s);
|
||||
n.a
|
||||
},
|
||||
d504: function(t, a, e) {
|
||||
"use strict";
|
||||
e.r(a);
|
||||
var s = function() {
|
||||
var t = this
|
||||
, a = t.$createElement
|
||||
, e = t._self._c || a;
|
||||
return e("div", {
|
||||
attrs: {
|
||||
id: "index"
|
||||
}
|
||||
}, [e("el-row", {
|
||||
directives: [{
|
||||
name: "loading",
|
||||
rawName: "v-loading",
|
||||
value: t.loading,
|
||||
expression: "loading"
|
||||
}]
|
||||
}, [e("el-col", {
|
||||
attrs: {
|
||||
span: 18,
|
||||
offset: 3
|
||||
}
|
||||
}, t._l(t.movies, (function(a) {
|
||||
return e("el-card", {
|
||||
key: a.name,
|
||||
staticClass: "item m-t",
|
||||
attrs: {
|
||||
shadow: "hover"
|
||||
}
|
||||
}, [e("el-row", [e("el-col", {
|
||||
attrs: {
|
||||
xs: 8,
|
||||
sm: 6,
|
||||
md: 4
|
||||
}
|
||||
}, [e("router-link", {
|
||||
attrs: {
|
||||
to: {
|
||||
name: "detail",
|
||||
params: {
|
||||
key: t.transfer(a.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}, [e("img", {
|
||||
staticClass: "cover",
|
||||
attrs: {
|
||||
src: a.cover
|
||||
}
|
||||
})])], 1), e("el-col", {
|
||||
staticClass: "p-h",
|
||||
attrs: {
|
||||
xs: 9,
|
||||
sm: 13,
|
||||
md: 16
|
||||
}
|
||||
}, [e("router-link", {
|
||||
staticClass: "name",
|
||||
attrs: {
|
||||
to: {
|
||||
name: "detail",
|
||||
params: {
|
||||
key: t.transfer(a.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}, [e("h2", {
|
||||
staticClass: "m-b-sm"
|
||||
}, [t._v(t._s(a.name) + " - " + t._s(a.alias))])]), e("div", {
|
||||
staticClass: "categories"
|
||||
}, t._l(a.categories, (function(a) {
|
||||
return e("el-button", {
|
||||
key: a,
|
||||
staticClass: "category",
|
||||
attrs: {
|
||||
size: "mini",
|
||||
type: "primary"
|
||||
}
|
||||
}, [t._v(t._s(a) + "\n ")])
|
||||
}
|
||||
)), 1), e("div", {
|
||||
staticClass: "m-v-sm info"
|
||||
}, [e("span", [t._v(t._s(a.regions.join("、")))]), e("span", [t._v(" / ")]), e("span", [t._v(t._s(a.minute) + " 分钟")])]), e("div", {
|
||||
staticClass: "m-v-sm info"
|
||||
}, [e("span", [t._v(t._s(a.published_at) + " 上映")])])], 1), e("el-col", {
|
||||
attrs: {
|
||||
xs: 5,
|
||||
sm: 5,
|
||||
md: 4
|
||||
}
|
||||
}, [e("p", {
|
||||
staticClass: "score m-t-md m-b-n-sm"
|
||||
}, [t._v(t._s(a.score.toFixed(1)))]), e("p", [e("el-rate", {
|
||||
attrs: {
|
||||
value: a.score / 2,
|
||||
disabled: "",
|
||||
max: 5,
|
||||
"text-color": "#ff9900"
|
||||
}
|
||||
})], 1)])], 1)], 1)
|
||||
}
|
||||
)), 1)], 1), e("el-row", [e("el-col", {
|
||||
attrs: {
|
||||
span: 10,
|
||||
offset: 11
|
||||
}
|
||||
}, [e("div", {
|
||||
staticClass: "pagination m-v-lg"
|
||||
}, [e("el-pagination", {
|
||||
attrs: {
|
||||
background: "",
|
||||
"current-page": t.page,
|
||||
"page-size": t.limit,
|
||||
layout: "total, prev, pager, next",
|
||||
total: t.total
|
||||
},
|
||||
on: {
|
||||
"current-change": t.onPageChange,
|
||||
"update:currentPage": function(a) {
|
||||
t.page = a
|
||||
},
|
||||
"update:current-page": function(a) {
|
||||
t.page = a
|
||||
}
|
||||
}
|
||||
})], 1)])], 1)], 1)
|
||||
}
|
||||
, n = []
|
||||
, i = e("7d92")
|
||||
, r = e("3e22")
|
||||
, o = {
|
||||
name: "Index",
|
||||
components: {},
|
||||
data: function() {
|
||||
return {
|
||||
loading: !1,
|
||||
total: null,
|
||||
page: parseInt(this.$route.params.page || 1),
|
||||
limit: 10,
|
||||
movies: null
|
||||
}
|
||||
},
|
||||
mounted: function() {
|
||||
this.onFetchData()
|
||||
},
|
||||
methods: {
|
||||
transfer: r["a"],
|
||||
onPageChange: function(t) {
|
||||
this.$router.push({
|
||||
name: "indexPage",
|
||||
params: {
|
||||
page: t
|
||||
}
|
||||
}),
|
||||
this.onFetchData()
|
||||
},
|
||||
onFetchData: function() {
|
||||
var t = this;
|
||||
this.loading = !0;
|
||||
var a = (this.page - 1) * this.limit
|
||||
, e = Object(i["a"])(this.$store.state.url.index, a);
|
||||
window.encrypt = Object(i["a"]);
|
||||
this.$axios.get(this.$store.state.url.index, {
|
||||
params: {
|
||||
limit: this.limit,
|
||||
offset: a,
|
||||
token: e
|
||||
}
|
||||
}).then((function(a) {
|
||||
var e = a.data
|
||||
, s = e.results
|
||||
, n = e.count;
|
||||
t.loading = !1,
|
||||
t.movies = s,
|
||||
t.total = n
|
||||
}
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
, l = o
|
||||
, c = (e("ca9c"),
|
||||
e("e93d"),
|
||||
e("2877"))
|
||||
, u = Object(c["a"])(l, s, n, !1, null, "8a85e5c6", null);
|
||||
a["default"] = u.exports
|
||||
},
|
||||
e93d: function(t, a, e) {
|
||||
"use strict";
|
||||
var s = e("79ee")
|
||||
, n = e.n(s);
|
||||
n.a
|
||||
}
|
||||
}]);
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2024/03/21 15:57
|
||||
@Usage :
|
||||
@Desc :使用playwright自动一个浏览器,执行js
|
||||
'''
|
||||
import requests
|
||||
from playwright.sync_api import sync_playwright
|
||||
import time
|
||||
import os
|
||||
|
||||
BASE_URL = 'https://spa2.scrape.center/'
|
||||
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
|
||||
MAX_PAGE = 10
|
||||
LIMIT = 10
|
||||
|
||||
context = sync_playwright().start()
|
||||
browser = context.chromium.launch(devtools=True, headless=False)
|
||||
|
||||
|
||||
page = browser.new_page()
|
||||
|
||||
|
||||
# 注意这里路径需要加上**
|
||||
page.route(
|
||||
"**/js/chunk-10192a00.243cb8b7.js",
|
||||
lambda route: route.fulfill(path='chunk.js')
|
||||
)
|
||||
page.goto(BASE_URL, wait_until='networkidle')
|
||||
|
||||
|
||||
def get_token(offset):
|
||||
# page.wait_for_function('window.encrypt !== undefined')
|
||||
try:
|
||||
result = page.evaluate('''() => {
|
||||
console.log('window',window)
|
||||
console.log('encrypt',window.encrypt)
|
||||
return window.encrypt("%s","%s")
|
||||
}''' % ('/api/movie', offset))
|
||||
except Exception as e:
|
||||
time.sleep(100)
|
||||
print(e)
|
||||
return result
|
||||
|
||||
|
||||
for i in range(MAX_PAGE):
|
||||
offset = i * LIMIT
|
||||
token = get_token(offset)
|
||||
print(token)
|
||||
index_url = INDEX_URL.format(limit=LIMIT, offset=offset, token=token)
|
||||
response = requests.get(index_url)
|
||||
print('response', response.json())
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2024/03/21 15:57
|
||||
@Usage :
|
||||
@Desc :使用playwright自动一个浏览器,执行js
|
||||
'''
|
||||
import requests
|
||||
from playwright.sync_api import sync_playwright
|
||||
import time
|
||||
import os
|
||||
|
||||
BASE_URL = 'https://spa2.scrape.center'
|
||||
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
|
||||
MAX_PAGE = 10
|
||||
LIMIT = 10
|
||||
|
||||
context = sync_playwright().start()
|
||||
browser = context.chromium.launch()
|
||||
|
||||
page = browser.new_page()
|
||||
|
||||
# 注意这里路径需要加上**
|
||||
page.route(
|
||||
"/js/chunk-10192a00.243cb8b7.js",
|
||||
lambda route: route.fulfill(path='chunk.js')
|
||||
)
|
||||
page.goto(BASE_URL)
|
||||
|
||||
|
||||
def get_token(offset):
|
||||
result = page.evaluate('''() => {
|
||||
return window.encrypt("%s","%s")
|
||||
}''' % ('/api/movie', offset))
|
||||
return result
|
||||
|
||||
for i in range(MAX_PAGE):
|
||||
offset = i * LIMIT
|
||||
token = get_token(offset)
|
||||
print(token)
|
||||
index_url = INDEX_URL.format(limit=LIMIT, offset=offset, token=token)
|
||||
response = requests.get(index_url)
|
||||
print('response', response.json())
|
||||
|
|
@ -37,7 +37,7 @@ def scrape_page(url):
|
|||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
logging.error('get invalid status code %s while scraping %s', response.status_code, url)
|
||||
logging.error('get invalid status codes %s while scraping %s', response.status_code, url)
|
||||
except requests.RequestException:
|
||||
logging.error('error occurred while scraping %s', url, exc_info=True)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue