20240325爬虫更新JS逆向

This commit is contained in:
markilue 2024-03-25 12:12:47 +08:00
parent 4a1d6ad72b
commit 50902dd83a
19 changed files with 7408 additions and 4 deletions

View File

@ -30,7 +30,7 @@ def scrape_api(url):
response = requests.get(url) response = requests.get(url)
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
logging.error('get invalid status code %s while scraping %s', logging.error('get invalid status codes %s while scraping %s',
response.status_code, url) response.status_code, url)
except requests.RequestException: except requests.RequestException:
logging.error('error occurred while scraping %s', url, exc_info=True) logging.error('error occurred while scraping %s', url, exc_info=True)

View File

@ -31,7 +31,7 @@ with sync_playwright() as p:
page.wait_for_load_state('networkidle') page.wait_for_load_state('networkidle')
browser.close() browser.close()
获取页面源代码 # 获取页面源代码
with sync_playwright() as p: with sync_playwright() as p:
browser = p.chromium.launch(headless=False) browser = p.chromium.launch(headless=False)
page = browser.new_page() page = browser.new_page()

View File

@ -0,0 +1,8 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/21 13:48
@Usage :
@Desc :
'''

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
const CryptoJS = require("./crypto")
function getToken(player) {
let key = CryptoJS.enc.Utf8.parse("fipFfVsZsTda94hJNKJfLoaqyqMZFFimwLt")
const {name, birthday, height, weight} = player
let base64Name = CryptoJS.enc.Base64.stringify(CryptoJS.enc.Utf8.parse(name))
let encrypted = CryptoJS.DES.encrypt(`${base64Name}${birthday}${height}${weight}`, key, {
mode: CryptoJS.mode.ECB,
padding: CryptoJS.pad.Pkcs7
})
return encrypted.toString()
}
const player = {
"name": "凯文-杜兰特",
"image": "durant.png",
"birthday": "1988-09-29",
"height": "208cm",
"weight": "108.9KG"
}
console.log(getToken(player))

View File

@ -0,0 +1,26 @@
const CryptoJS = require("./crypto")
const express = require("express")
const app = express();
const port = 3000;
app.use(express.json());
function getToken(player) {
let key = CryptoJS.enc.Utf8.parse("fipFfVsZsTda94hJNKJfLoaqyqMZFFimwLt")
const {name, birthday, height, weight} = player
let base64Name = CryptoJS.enc.Base64.stringify(CryptoJS.enc.Utf8.parse(name))
let encrypted = CryptoJS.DES.encrypt(`${base64Name}${birthday}${height}${weight}`, key, {
mode: CryptoJS.mode.ECB,
padding: CryptoJS.pad.Pkcs7
})
return encrypted.toString()
}
app.post("/", (req, res) => {
const data = req.body;
console.log(data)
res.send(getToken(data));
});
app.listen(port, () => {
console.log(`Example app listening on port ${port}!`)
})

View File

@ -0,0 +1,22 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/21 14:23
@Usage :
@Desc :使用python调用Node的服务
'''
import requests
data = {
"name": "凯文-杜兰特",
"image": "durant.png",
"birthday": "1988-09-29",
"height": "208cm",
"weight": "108.9KG"
}
url = 'http://localhost:3000'
response = requests.post(url, json=data)
print(response.text)

View File

@ -0,0 +1,5 @@
{
"presets": [
"@babel/preset-env"
]
}

View File

@ -0,0 +1,17 @@
import {parse} from "@babel/parser";
import fs from "fs";
const code = fs.readFileSync("codes/code1.js", "utf8");
let ast = parse(code);
// console.log(ast)
// console.log(ast.program.body)
import CodeGenerator from "@babel/generator";
const {code: output} = CodeGenerator.default(ast,{
retainLines:true,
});
console.log(output)

View File

@ -0,0 +1,52 @@
import {parse} from "@babel/parser"
import CodeGenerator from "@babel/generator"
import fs from "fs"
import {traverse} from "@babel/core";
const code = fs.readFileSync("codes/code1.js", "utf-8");
let ast = parse(code)
// traverse(ast, {
// enter(path) {
// let node = path.node
// if (node.type === "NumericLiteral" && node.value === 3) {
// node.value = 5;
// }
// if (node.type === "StringLiteral" && node.value === "hello") {
// node.value = "hi";
// }
// },
// });
// traverse(ast, {
// NumericLiteral(path) {
//
// if (path.node.value === 3) {
// path.node.value = 5;
// }
//
// },
// StringLiteral(path) {
// if (path.node.value === "hello") {
// path.node.value = "hi";
// }
// },
// });
traverse(ast, {
CallExpression(path) {
let node = path.node;
if (node.callee.object.name === "console" &&node.callee.property.name === "log") {
path.remove();
}
},
});
const {code: output} = CodeGenerator.default(ast, {
retainLinesL: true,
});
console.log(output)

View File

@ -0,0 +1,6 @@
const a = 3;
let string = "hello";
for (let i = 0; i < a; i++) {
string += "world";
}
console.log("string", string);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
{
"name": "learn-ast",
"version": "1.0.0",
"description": "学习ast",
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"devDependencies": {
"@babel/cli": "^7.24.1",
"@babel/core": "^7.24.3",
"@babel/preset-env": "^7.24.3"
}
}

View File

@ -1,6 +1,6 @@
//变量混淆 //变量混淆
// const code = ` // const codes = `
// let x = '1'+1 // let x = '1'+1
// console.log('x',x) // console.log('x',x)
// ` // `

View File

@ -0,0 +1,8 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/21 15:55
@Usage :
@Desc :
'''

View File

@ -0,0 +1,204 @@
(window["webpackJsonp"] = window["webpackJsonp"] || []).push([["chunk-10192a00"], {
"5a19": function(t, a, e) {},
"79ee": function(t, a, e) {},
ca9c: function(t, a, e) {
"use strict";
var s = e("5a19")
, n = e.n(s);
n.a
},
d504: function(t, a, e) {
"use strict";
e.r(a);
var s = function() {
var t = this
, a = t.$createElement
, e = t._self._c || a;
return e("div", {
attrs: {
id: "index"
}
}, [e("el-row", {
directives: [{
name: "loading",
rawName: "v-loading",
value: t.loading,
expression: "loading"
}]
}, [e("el-col", {
attrs: {
span: 18,
offset: 3
}
}, t._l(t.movies, (function(a) {
return e("el-card", {
key: a.name,
staticClass: "item m-t",
attrs: {
shadow: "hover"
}
}, [e("el-row", [e("el-col", {
attrs: {
xs: 8,
sm: 6,
md: 4
}
}, [e("router-link", {
attrs: {
to: {
name: "detail",
params: {
key: t.transfer(a.id)
}
}
}
}, [e("img", {
staticClass: "cover",
attrs: {
src: a.cover
}
})])], 1), e("el-col", {
staticClass: "p-h",
attrs: {
xs: 9,
sm: 13,
md: 16
}
}, [e("router-link", {
staticClass: "name",
attrs: {
to: {
name: "detail",
params: {
key: t.transfer(a.id)
}
}
}
}, [e("h2", {
staticClass: "m-b-sm"
}, [t._v(t._s(a.name) + " - " + t._s(a.alias))])]), e("div", {
staticClass: "categories"
}, t._l(a.categories, (function(a) {
return e("el-button", {
key: a,
staticClass: "category",
attrs: {
size: "mini",
type: "primary"
}
}, [t._v(t._s(a) + "\n ")])
}
)), 1), e("div", {
staticClass: "m-v-sm info"
}, [e("span", [t._v(t._s(a.regions.join("、")))]), e("span", [t._v(" / ")]), e("span", [t._v(t._s(a.minute) + " 分钟")])]), e("div", {
staticClass: "m-v-sm info"
}, [e("span", [t._v(t._s(a.published_at) + " 上映")])])], 1), e("el-col", {
attrs: {
xs: 5,
sm: 5,
md: 4
}
}, [e("p", {
staticClass: "score m-t-md m-b-n-sm"
}, [t._v(t._s(a.score.toFixed(1)))]), e("p", [e("el-rate", {
attrs: {
value: a.score / 2,
disabled: "",
max: 5,
"text-color": "#ff9900"
}
})], 1)])], 1)], 1)
}
)), 1)], 1), e("el-row", [e("el-col", {
attrs: {
span: 10,
offset: 11
}
}, [e("div", {
staticClass: "pagination m-v-lg"
}, [e("el-pagination", {
attrs: {
background: "",
"current-page": t.page,
"page-size": t.limit,
layout: "total, prev, pager, next",
total: t.total
},
on: {
"current-change": t.onPageChange,
"update:currentPage": function(a) {
t.page = a
},
"update:current-page": function(a) {
t.page = a
}
}
})], 1)])], 1)], 1)
}
, n = []
, i = e("7d92")
, r = e("3e22")
, o = {
name: "Index",
components: {},
data: function() {
return {
loading: !1,
total: null,
page: parseInt(this.$route.params.page || 1),
limit: 10,
movies: null
}
},
mounted: function() {
this.onFetchData()
},
methods: {
transfer: r["a"],
onPageChange: function(t) {
this.$router.push({
name: "indexPage",
params: {
page: t
}
}),
this.onFetchData()
},
onFetchData: function() {
var t = this;
this.loading = !0;
var a = (this.page - 1) * this.limit
, e = Object(i["a"])(this.$store.state.url.index, a);
window.encrypt = Object(i["a"]);
this.$axios.get(this.$store.state.url.index, {
params: {
limit: this.limit,
offset: a,
token: e
}
}).then((function(a) {
var e = a.data
, s = e.results
, n = e.count;
t.loading = !1,
t.movies = s,
t.total = n
}
))
}
}
}
, l = o
, c = (e("ca9c"),
e("e93d"),
e("2877"))
, u = Object(c["a"])(l, s, n, !1, null, "8a85e5c6", null);
a["default"] = u.exports
},
e93d: function(t, a, e) {
"use strict";
var s = e("79ee")
, n = e.n(s);
n.a
}
}]);

View File

@ -0,0 +1,54 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/21 15:57
@Usage :
@Desc :使用playwright自动一个浏览器执行js
'''
import requests
from playwright.sync_api import sync_playwright
import time
import os
BASE_URL = 'https://spa2.scrape.center/'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
MAX_PAGE = 10
LIMIT = 10
context = sync_playwright().start()
browser = context.chromium.launch(devtools=True, headless=False)
page = browser.new_page()
# 注意这里路径需要加上**
page.route(
"**/js/chunk-10192a00.243cb8b7.js",
lambda route: route.fulfill(path='chunk.js')
)
page.goto(BASE_URL, wait_until='networkidle')
def get_token(offset):
# page.wait_for_function('window.encrypt !== undefined')
try:
result = page.evaluate('''() => {
console.log('window',window)
console.log('encrypt',window.encrypt)
return window.encrypt("%s","%s")
}''' % ('/api/movie', offset))
except Exception as e:
time.sleep(100)
print(e)
return result
for i in range(MAX_PAGE):
offset = i * LIMIT
token = get_token(offset)
print(token)
index_url = INDEX_URL.format(limit=LIMIT, offset=offset, token=token)
response = requests.get(index_url)
print('response', response.json())

View File

@ -0,0 +1,44 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/21 15:57
@Usage :
@Desc :使用playwright自动一个浏览器执行js
'''
import requests
from playwright.sync_api import sync_playwright
import time
import os
BASE_URL = 'https://spa2.scrape.center'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
MAX_PAGE = 10
LIMIT = 10
context = sync_playwright().start()
browser = context.chromium.launch()
page = browser.new_page()
# 注意这里路径需要加上**
page.route(
"/js/chunk-10192a00.243cb8b7.js",
lambda route: route.fulfill(path='chunk.js')
)
page.goto(BASE_URL)
def get_token(offset):
result = page.evaluate('''() => {
return window.encrypt("%s","%s")
}''' % ('/api/movie', offset))
return result
for i in range(MAX_PAGE):
offset = i * LIMIT
token = get_token(offset)
print(token)
index_url = INDEX_URL.format(limit=LIMIT, offset=offset, token=token)
response = requests.get(index_url)
print('response', response.json())

View File

@ -37,7 +37,7 @@ def scrape_page(url):
response = requests.get(url) response = requests.get(url)
if response.status_code == 200: if response.status_code == 200:
return response.text return response.text
logging.error('get invalid status code %s while scraping %s', response.status_code, url) logging.error('get invalid status codes %s while scraping %s', response.status_code, url)
except requests.RequestException: except requests.RequestException:
logging.error('error occurred while scraping %s', url, exc_info=True) logging.error('error occurred while scraping %s', url, exc_info=True)