20240327爬虫更新JS逆向

This commit is contained in:
markilue 2024-03-31 11:31:46 +08:00
parent 50902dd83a
commit 4a5c4125df
39 changed files with 2098 additions and 4450 deletions

View File

@ -0,0 +1,8 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 20:30
@Usage :
@Desc :
'''

View File

@ -0,0 +1,27 @@
var page = 0
var limit = 10
window = global
//到这里有点补不下去了因为不知道_0x34777a是什么
function encrypt(args) {
_0x34777a('6b54');
var _0x189cbb = _0x34777a('3452')
, _0x358b1f = _0x34777a('27ae')['Base64'];
for (var _0x5da681 = Math['round'](new Date()['getTime']() / 0x3e8)['toString'](), _0x2a83dd = arguments['length'], _0x31a891 = new Array(_0x2a83dd), _0x596a02 = 0x0; _0x596a02 < _0x2a83dd; _0x596a02++)
_0x31a891[_0x596a02] = arguments[_0x596a02];
_0x31a891['push'](_0x5da681);
var _0xf7c3c7 = _0x189cbb['SHA1'](_0x31a891['join'](','))['toString'](_0x189cbb['enc']['Hex'])
, _0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](',')
, _0x104b5b = _0x358b1f['encode'](_0x3c8435);
return _0x104b5b;
}
function generate_token(args) {
var _0x422986 = (page - 0x1) * limit, _0x263439 = encrypt(args);
return _0x263439;
}
console.log(generate_token('api/movie'))

View File

@ -0,0 +1,55 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 20:32
@Usage : 使用Python实现列表页的爬取
@Desc : 使用逆向分析之后已经基本确定了token的生成逻辑
- `/api/movie `放到一个列表里
- 在列表中加人当前时间戳;
- 将列表内容用逗号拼接;
- 将拼接的结果进行 SHA1 编码;
- 将编码的结果和时间戳再次拼接;
- 将拼接后的结果进行 Base64 编码
'''
import hashlib
import time
import base64
from typing import Any, List
import requests
BASE_URL = 'https://spa6.scrape.center'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
DETAIL_URL = BASE_URL + "/api/movie/{id}?token={token}"
SECRET = 'ef34#teuq0btua#(-57w1q5o5--j@98xygimlyfxs*-!i-0-mb'
MAX_PAGE = 10
LIMIT = 10
def generate_token(args: List[Any]):
cur_time = str(int(time.time()))
args.append(cur_time)
sign = hashlib.sha1(','.join(args).encode('utf-8')).hexdigest()
return base64.b64encode(','.join([sign, cur_time]).encode('utf-8')).decode('utf-8')
# 爬取列表页(这里暂时只爬取)
for i in range(MAX_PAGE):
args = ['/api/movie']
token = generate_token(args)
OFFSET = i * LIMIT
url = INDEX_URL.format(limit=LIMIT, offset=OFFSET, token=token)
response = requests.get(url)
print('response:', response.json())
result = response.json()
# 爬取详情页
for movie in result['results']:
id = movie['id']
encrypt_id = base64.b64encode((SECRET + str(id)).encode('utf-8')).decode('utf-8')
args = [f'/api/movie/{encrypt_id}']
detail_url = DETAIL_URL.format(id=encrypt_id, token=generate_token(args))
response = requests.get(detail_url)
print(response.json())

View File

@ -0,0 +1,8 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 20:30
@Usage :
@Desc :
'''

View File

@ -0,0 +1,201 @@
(window['webpackJsonp'] = window['webpackJsonp'] || [])['push']([['chunk-19c920f8'], {
'5a19': function(_0x1588d2, _0x49ff45, _0x493500) {},
'c6bf': function(_0x1ff78d, _0x1a7aa3, _0x6392a5) {},
'ca9c': function(_0x34ea17, _0x1d01c8, _0x1a974c) {
'use strict';
var _0x116bc9 = _0x1a974c('5a19')
, _0x14ee23 = _0x1a974c['n'](_0x116bc9);
_0x14ee23['a'];
},
'd504': function(_0x4c4705, _0x3c93b9, _0x4c22a6) {
'use strict';
_0x4c22a6['r'](_0x3c93b9);
var _0x4b4f78 = function() {
var _0x1dc0eb = this
, _0x559ed0 = _0x1dc0eb['$createElement']
, _0x28c6bc = _0x1dc0eb['_self']['_c'] || _0x559ed0;
return _0x28c6bc('div', {
'attrs': {
'id': 'index'
}
}, [_0x28c6bc('el-row', {
'directives': [{
'name': 'loading',
'rawName': 'v-loading',
'value': _0x1dc0eb['loading'],
'expression': 'loading'
}]
}, [_0x28c6bc('el-col', {
'attrs': {
'span': 0x12,
'offset': 0x3
}
}, _0x1dc0eb['_l'](_0x1dc0eb['movies'], function(_0x1355ed) {
return _0x28c6bc('el-card', {
'key': _0x1355ed['name'],
'staticClass': 'item\x20m-t',
'attrs': {
'shadow': 'hover'
}
}, [_0x28c6bc('el-row', [_0x28c6bc('el-col', {
'attrs': {
'xs': 0x8,
'sm': 0x6,
'md': 0x4
}
}, [_0x28c6bc('router-link', {
'attrs': {
'to': {
'name': 'detail',
'params': {
'key': _0x1dc0eb['transfer'](_0x1355ed['id'])
}
}
}
}, [_0x28c6bc('img', {
'staticClass': 'cover',
'attrs': {
'src': _0x1355ed['cover']
}
})])], 0x1), _0x28c6bc('el-col', {
'staticClass': 'p-h',
'attrs': {
'xs': 0x9,
'sm': 0xd,
'md': 0x10
}
}, [_0x28c6bc('router-link', {
'staticClass': 'name',
'attrs': {
'to': {
'name': 'detail',
'params': {
'key': _0x1dc0eb['transfer'](_0x1355ed['id'])
}
}
}
}, [_0x28c6bc('h2', {
'staticClass': 'm-b-sm'
}, [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['name']) + '\x20-\x20' + _0x1dc0eb['_s'](_0x1355ed['alias']))])]), _0x28c6bc('div', {
'staticClass': 'categories'
}, _0x1dc0eb['_l'](_0x1355ed['categories'], function(_0x3f20be) {
return _0x28c6bc('el-button', {
'key': _0x3f20be,
'staticClass': 'category',
'attrs': {
'size': 'mini',
'type': 'primary'
}
}, [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x3f20be) + '\x0a\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20')]);
}), 0x1), _0x28c6bc('div', {
'staticClass': 'm-v-sm\x20info'
}, [_0x28c6bc('span', [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['regions']['join']('、')))]), _0x28c6bc('span', [_0x1dc0eb['_v']('\x20/\x20')]), _0x28c6bc('span', [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['minute']) + '\x20分钟')])]), _0x28c6bc('div', {
'staticClass': 'm-v-sm\x20info'
}, [_0x28c6bc('span', [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['published_at']) + '\x20上映')])])], 0x1), _0x28c6bc('el-col', {
'attrs': {
'xs': 0x5,
'sm': 0x5,
'md': 0x4
}
}, [_0x28c6bc('p', {
'staticClass': 'score\x20m-t-md\x20m-b-n-sm'
}, [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['score']['toFixed'](0x1)))]), _0x28c6bc('p', [_0x28c6bc('el-rate', {
'attrs': {
'value': _0x1355ed['score'] / 0x2,
'disabled': '',
'max': 0x5,
'text-color': '#ff9900'
}
})], 0x1)])], 0x1)], 0x1);
}), 0x1)], 0x1), _0x28c6bc('el-row', [_0x28c6bc('el-col', {
'attrs': {
'span': 0xa,
'offset': 0xb
}
}, [_0x28c6bc('div', {
'staticClass': 'pagination\x20m-v-lg'
}, [_0x28c6bc('el-pagination', {
'attrs': {
'background': '',
'current-page': _0x1dc0eb['page'],
'page-size': _0x1dc0eb['limit'],
'layout': 'total,\x20prev,\x20pager,\x20next',
'total': _0x1dc0eb['total']
},
'on': {
'current-change': _0x1dc0eb['onPageChange'],
'update:currentPage': function(_0x241449) {
_0x1dc0eb['page'] = _0x241449;
},
'update:current-page': function(_0x240a84) {
_0x1dc0eb['page'] = _0x240a84;
}
}
})], 0x1)])], 0x1)], 0x1);
}
, _0x33c195 = []
, _0x2fa7bd = _0x4c22a6('7d92')
, _0x49ecf1 = _0x4c22a6('3e22')
, _0x4d1fd7 = {
'name': 'Index',
'components': {},
'data': function() {
return {
'loading': !0x1,
'total': null,
'page': parseInt(this['$route']['params']['page'] || 0x1),
'limit': 0xa,
'movies': null
};
},
'mounted': function() {
this['onFetchData']();
},
'methods': {
'transfer': _0x49ecf1['a'],
'onPageChange': function(_0x12422f) {
this['$router']['push']({
'name': 'indexPage',
'params': {
'page': _0x12422f
}
}),
this['onFetchData']();
},
'onFetchData': function() {
var _0xd5d754 = this;
this['loading'] = !0x0;
var _0x422986 = (this['page'] - 0x1) * this['limit']
, _0x263439 = Object(_0x2fa7bd['a'])(this['$store']['state']['url']['index']);
window.encrypt=Object(_0x2fa7bd['a']);
this['$axios']['get'](this['$store']['state']['url']['index'], {
'params': {
'limit': this['limit'],
'offset': _0x422986,
'token': _0x263439
}
})['then'](function(_0x464186) {
var _0x148e87 = _0x464186['data']
, _0x2f29ad = _0x148e87['results']
, _0x4829b0 = _0x148e87['count'];
_0xd5d754['loading'] = !0x1,
_0xd5d754['movies'] = _0x2f29ad,
_0xd5d754['total'] = _0x4829b0;
});
}
}
}
, _0x15f73f = _0x4d1fd7
, _0x3a0944 = (_0x4c22a6('ca9c'),
_0x4c22a6('eb45'),
_0x4c22a6('2877'))
, _0x5b3502 = Object(_0x3a0944['a'])(_0x15f73f, _0x4b4f78, _0x33c195, !0x1, null, '724ecf3b', null);
_0x3c93b9['default'] = _0x5b3502['exports'];
},
'eb45': function(_0x5e6316, _0x331917, _0x1ca927) {
'use strict';
var _0x26eff7 = _0x1ca927('c6bf')
, _0x3af8d4 = _0x1ca927['n'](_0x26eff7);
_0x3af8d4['a'];
}
}]);

View File

@ -0,0 +1,174 @@
{
"count": 102,
"results": [
{
"id": 1,
"name": "霸王别姬",
"alias": "Farewell My Concubine",
"cover": "https://p0.meituan.net/movie/ce4da3e03e655b5b88ed31b5cd7896cf62472.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"爱情"
],
"published_at": "1993-07-26",
"minute": 171,
"score": 9.5,
"regions": [
"中国内地",
"中国香港"
]
},
{
"id": 2,
"name": "这个杀手不太冷",
"alias": "Léon",
"cover": "https://p1.meituan.net/movie/6bea9af4524dfbd0b668eaa7e187c3df767253.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"动作",
"犯罪"
],
"published_at": "1994-09-14",
"minute": 110,
"score": 9.5,
"regions": [
"法国"
]
},
{
"id": 3,
"name": "肖申克的救赎",
"alias": "The Shawshank Redemption",
"cover": "https://p0.meituan.net/movie/283292171619cdfd5b240c8fd093f1eb255670.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"犯罪"
],
"published_at": "1994-09-10",
"minute": 142,
"score": 9.5,
"regions": [
"美国"
]
},
{
"id": 4,
"name": "泰坦尼克号",
"alias": "Titanic",
"cover": "https://p1.meituan.net/movie/b607fba7513e7f15eab170aac1e1400d878112.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"爱情",
"灾难"
],
"published_at": "1998-04-03",
"minute": 194,
"score": 9.5,
"regions": [
"美国"
]
},
{
"id": 5,
"name": "罗马假日",
"alias": "Roman Holiday",
"cover": "https://p0.meituan.net/movie/289f98ceaa8a0ae737d3dc01cd05ab052213631.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"喜剧",
"爱情"
],
"published_at": "1953-08-20",
"minute": 118,
"score": 9.5,
"regions": [
"美国"
]
},
{
"id": 6,
"name": "唐伯虎点秋香",
"alias": "Flirting Scholar",
"cover": "https://p0.meituan.net/movie/da64660f82b98cdc1b8a3804e69609e041108.jpg@464w_644h_1e_1c",
"categories": [
"喜剧",
"爱情",
"古装"
],
"published_at": "1993-07-01",
"minute": 102,
"score": 9.5,
"regions": [
"中国香港"
]
},
{
"id": 7,
"name": "乱世佳人",
"alias": "Gone with the Wind",
"cover": "https://p0.meituan.net/movie/223c3e186db3ab4ea3bb14508c709400427933.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"爱情",
"历史",
"战争"
],
"published_at": "1939-12-15",
"minute": 238,
"score": 9.5,
"regions": [
"美国"
]
},
{
"id": 8,
"name": "喜剧之王",
"alias": "The King of Comedy",
"cover": "https://p0.meituan.net/movie/1f0d671f6a37f9d7b015e4682b8b113e174332.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"喜剧",
"爱情"
],
"published_at": "1999-02-13",
"minute": 85,
"score": 9.5,
"regions": [
"中国香港"
]
},
{
"id": 9,
"name": "楚门的世界",
"alias": "The Truman Show",
"cover": "https://p0.meituan.net/movie/8959888ee0c399b0fe53a714bc8a5a17460048.jpg@464w_644h_1e_1c",
"categories": [
"剧情",
"科幻"
],
"published_at": null,
"minute": 103,
"score": 9.0,
"regions": [
"美国"
]
},
{
"id": 10,
"name": "狮子王",
"alias": "The Lion King",
"cover": "https://p0.meituan.net/movie/27b76fe6cf3903f3d74963f70786001e1438406.jpg@464w_644h_1e_1c",
"categories": [
"动画",
"歌舞",
"冒险"
],
"published_at": "1995-07-15",
"minute": 89,
"score": 9.0,
"regions": [
"美国"
]
}
]
}

View File

@ -0,0 +1,53 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 18:59
@Usage :
@Desc :使用playwright跳过加密逻辑爬取网站
'''
from playwright.sync_api import sync_playwright
import time
import requests
BASE_URL= 'https://spa6.scrape.center/'
INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
MAX_PAGE = 10
LIMIT = 10
content = sync_playwright().start()
browser = content.chromium.launch()
page = browser.new_page()
# 注意这里路径需要加上**
page.route(
"**/js/chunk-19c920f8.c3a1129d.js",
lambda route: route.fulfill(path='chunk.js')
)
page.goto(BASE_URL, wait_until='networkidle')
def get_token():
# page.wait_for_function('window.encrypt !== undefined')
try:
result = page.evaluate('''() => {
console.log('window',window)
console.log('encrypt',window.encrypt)
return window.encrypt("%s")
}''' % ('/api/movie'))
return result
except Exception as e:
time.sleep(100)
print(e)
def get_key():
pass
for i in range(MAX_PAGE):
offset = LIMIT*i
result = requests.get(INDEX_URL.format(limit=LIMIT,offset=offset,token=get_token()))
print(result.text)
# 到这里已经基本可以爬出了但是对于详情页的key爬取逻辑有点逆向不出来无论是关键字还是啥都断不住

View File

@ -0,0 +1,9 @@
function _0x456254() {
for (var _0x5da681 = Math['round'](new Date()['getTime']() / 0x3e8)['toString'](), _0x2a83dd = arguments['length'], _0x31a891 = new Array(_0x2a83dd), _0x596a02 = 0x0; _0x596a02 < _0x2a83dd; _0x596a02++)
_0x31a891[_0x596a02] = arguments[_0x596a02];
_0x31a891['push'](_0x5da681);
var _0xf7c3c7 = _0x189cbb['SHA1'](_0x31a891['join'](','))['toString'](_0x189cbb['enc']['Hex'])
, _0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](',')
, _0x104b5b = _0x358b1f['encode'](_0x3c8435);
return _0x104b5b;
}

View File

@ -0,0 +1,10 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 12:26
@Usage :
@Desc :测试Base64的特点
'''
print(len('ZWYzNCN0ZXVxMGJ0dWEjKC01N3cxcTVvNS0takA5OHh5Z2ltbHlmeHMqLSFpLTAtbWIx'))

View File

@ -0,0 +1,14 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 12:54
@Usage :
@Desc :使用pywasm执行wasm文件
'''
import pywasm
runtime = pywasm.load('./Wasm.wasm')
result = runtime.exec('encrypt', [1, 2])
print(result)

View File

@ -0,0 +1,2 @@
const a = parseInt(Math.round((new Date).getTime() / 1e3).toString());
console.log(a)

View File

@ -0,0 +1,24 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 13:08
@Usage :
@Desc :使用pywasm库完成爬取https://spa14.scrape.center/
'''
import time
import pywasm
import requests
baseurl = 'https://spa14.scrape.center/api/movie/?limit={limit}&offset={offset}&sign={sign}'
MAX_PAGE = 10
limit = 10
runtime = pywasm.load('./Wasm.wasm')
print(time.time())
print(type(time.time()))
for i in range(MAX_PAGE):
offset = i * limit
sign = runtime.exec('encrypt', [offset, int(time.time())])
result = requests.get(baseurl.format(limit=limit, offset=offset, sign=sign))
print(result.text)

View File

@ -0,0 +1,28 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 13:08
@Usage :
@Desc :使用wasmer库完成爬取https://spa14.scrape.center/
'''
import time
from wasmer import engine, Store, Module, Instance
from wasmer_compiler_cranelift import Compiler
import requests
baseurl = 'https://spa14.scrape.center/api/movie/?limit={limit}&offset={offset}&sign={sign}'
MAX_PAGE = 10
limit = 10
store = Store(engine.JIT(Compiler))
module = Module(store, open('Wasm.wasm', 'rb').read())
instance = Instance(module)
for i in range(MAX_PAGE):
offset = i * limit
sign = instance.exports.encrypt(offset, int(time.time()))
result = requests.get(baseurl.format(limit=limit, offset=offset, sign=sign))
print(result.text)

View File

@ -0,0 +1,17 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/26 12:54
@Usage :
@Desc :使用pywasm执行wasm文件
'''
from wasmer import engine, Store, Module, Instance
from wasmer_compiler_cranelift import Compiler
store = Store(engine.JIT(Compiler))
module = Module(store, open('Wasm.wasm', 'rb').read())
instance = Instance(module)
result = instance.exports.encrypt(1, 2)
print(result)

View File

@ -0,0 +1,5 @@
{
"presets": [
"@babel/preset-env"
]
}

View File

@ -0,0 +1,27 @@
//@Author : dingjiawen
//@Date : 2024/03/25 15:26
//@Usage :
//@Desc :使用AST技术还原code1.js
import {traverse,types} from "@babel/core";
import {parse} from "@babel/parser";
import CodeGenerator from "@babel/generator";
// import * as types from "@babel/types";
import fs from "fs";
const code =fs.readFileSync("code/code1.js","utf-8");
let ast =parse(code)
traverse(ast,{
"UnaryExpression|BinaryExpression|ConditionalExpression|CallExpression":(path) =>{
const {confident,value} = path.evaluate();
if(value==Infinity ||value==-Infinity) return;
confident&&path.replaceWith(types.valueToNode(value));
},
});
const {code:output} = CodeGenerator.default(ast);
console.log(output)

View File

@ -0,0 +1,27 @@
//@Author : dingjiawen
//@Date : 2024/03/25 15:26
//@Usage :
//@Desc :使用AST技术还原code1.js
import {traverse, types} from "@babel/core";
import {parse} from "@babel/parser";
import CodeGenerator from "@babel/generator";
// import * as types from "@babel/types";
import fs from "fs";
const code = fs.readFileSync("code/code2.js", "utf-8");
let ast = parse(code)
traverse(ast, {
"StringLiteral"({node}) {
//这个正则表达式用于匹配字符串中是否包含\u或\x的转义序列这两个序列分别用于Unicode和十六进制的字符转义,g表示全局,i表示不区分大小写
if (node.extra && /\\[ux]/gi.test(node.extra.raw)) {
node.extra.raw = node.extra.rawValue;
}
},
});
const {code: output} = CodeGenerator.default(ast);
console.log(output)

View File

@ -0,0 +1,41 @@
//@Author : dingjiawen
//@Date : 2024/03/25 15:26
//@Usage :
//@Desc :使用AST技术还原code1.js
import {traverse, types} from "@babel/core";
import {parse} from "@babel/parser";
import CodeGenerator from "@babel/generator";
// import * as types from "@babel/types";
import fs from "fs";
const code = fs.readFileSync("code/code3.js", "utf-8");
let ast = parse(code)
traverse(ast, {
IfStatement(path) {
let {consequent, alternate} = path.node;
let testpath = path.get("test");
const evaluateTest = testpath.evaluateTruthy();
if (evaluateTest === true) {
if (types.isBlockStatement(consequent)) {
consequent = consequent.body;
}
path.replaceWithMultiple(consequent);
} else if (evaluateTest === false) {
if (alternate != null) {
if (types.isBlockStatement(alternate)) {
alternate = alternate.body;
}
path.replaceWithMultiple(alternate);
} else {
path.remove();
}
}
},
});
const {code: output} = CodeGenerator.default(ast);
console.log(output)

View File

@ -0,0 +1,54 @@
//@Author : dingjiawen
//@Date : 2024/03/25 15:26
//@Usage :
//@Desc :使用AST技术还原code1.js
import {traverse, types} from "@babel/core";
import {parse} from "@babel/parser";
import CodeGenerator from "@babel/generator";
// import * as types from "@babel/types";
import fs from "fs";
//从系统盘读取文件 code4.js
const code = fs.readFileSync("code/code4.js", "utf-8");
let ast = parse(code)
traverse(ast, {
WhileStatement(path) {
const {node, scope} = path;
const {test, body} = node;
let switchNode = body.body[0];
let {discriminant, cases} = switchNode;
let {object, property} = discriminant;
let arrName = object.name;
let binding = scope.getBinding(arrName);
let {init} = binding.path.node;
object = init.callee.object;
property = init.callee.property;
let argument = init.arguments[0].value;
let arrayFlow = object.value[property.name](argument);
let resultBody = [];
arrayFlow.forEach((index) => {
let switchCase = cases.filter((c) => c.test.value == index)[0];
let caseBody = switchCase.consequent;
if (types.isContinueStatement(caseBody[caseBody.length - 1])) {
caseBody.pop();
}
resultBody = resultBody.concat(caseBody);
}
);
path.replaceWithMultiple(resultBody);
},
});
const {code: output} = CodeGenerator.default(ast);
console.log(output)

View File

@ -0,0 +1,4 @@
const a = !![];
const b = "abc" == "bcd";
const c = (1 << 3) | 2;
const d = parseInt("5" + "0");

View File

@ -0,0 +1 @@
const strings = ["\x68\x65\x6c\x6c\x6f", "\x77\x6f\x72\x6c\x64"];

View File

@ -0,0 +1,23 @@
const _0x16c18d = function () {
if (!![[]]) {
console.log("hello world");
} else {
console.log("this");
console.log("is");
console.log("dead");
console.log("code");
}
};
const _0x1f7292 = function () {
if ("xmv2nOdfy2N".charAt(4) !== String.fromCharCode(110)) {
console.log("this");
console.log("is");
console.log("dead");
console.log("code");
} else {
console.log("nice to meet you");
}
};
_0x16c18d();
_0x1f7292();

View File

@ -0,0 +1,16 @@
const s = "3|1|2".split("|");
let x = 0;
while (true) {
switch (s[x++]) {
case "1":
const a = 1;
continue;
case "2":
const b = 3;
continue;
case "3":
const c = 0;
continue;
}
break;
}

View File

@ -0,0 +1,18 @@
{
"name": "learn-ast",
"version": "1.0.0",
"description": "学习ast",
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"devDependencies": {
"@babel/cli": "^7.14.8",
"@babel/core": "^7.15.0",
"@babel/node": "^7.14.9",
"@babel/preset-env": "^7.15.0"
}
}

View File

@ -1,3 +1,4 @@
//usage:讲述@babel/parse和generate
import {parse} from "@babel/parser"; import {parse} from "@babel/parser";
import fs from "fs"; import fs from "fs";

View File

@ -1,3 +1,4 @@
//usage:讲述@babel/traverse
import {parse} from "@babel/parser" import {parse} from "@babel/parser"
import CodeGenerator from "@babel/generator" import CodeGenerator from "@babel/generator"
import fs from "fs" import fs from "fs"

View File

@ -0,0 +1,31 @@
//usage:讲述@babel/type
import {traverse,types} from "@babel/core";
import {parse} from "@babel/parser";
import CodeGenerator from "@babel/generator";
// import * as t from "@babel/types";
const code = "const a = 1";
let ast = parse(code);
traverse(ast, {
VariableDeclaration(path) {
const a = types.identifier("a")
let init = types.binaryExpression(
"+",
types.identifier("a"),
types.numericLiteral(1)
);
let declarator = types.variableDeclarator(types.identifier("b"), init);
let declaration = types.variableDeclaration("const", [declarator]);
path.insertAfter(declaration);
path.stop();
},
}
);
const output = CodeGenerator.default(ast, {
retainLines: true,
}).code;
console.log(output);

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,12 @@
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"devDependencies": { "devDependencies": {
"@babel/cli": "^7.24.1", "@babel/cli": "^7.14.8",
"@babel/core": "^7.24.3", "@babel/core": "^7.15.0",
"@babel/preset-env": "^7.24.3" "@babel/node": "^7.10.5",
"@babel/preset-env": "^7.15.0"
},
"dependencies": {
"javascript-obfuscator": "^2.15.4"
} }
} }

View File

@ -0,0 +1,21 @@
import * as t from '@babel/types';
// 创建标识符"a"
const idA = t.identifier("a");
// 创建数字字面量1
const literalOne = t.numericLiteral(1);
// 创建二元表达式 "a + 1"
const binaryExpression = t.binaryExpression("+", idA, literalOne);
// 创建标识符"b"
const idB = t.identifier("b");
// 创建变量声明器
const variableDeclarator = t.variableDeclarator(idB, binaryExpression);
// 创建变量声明const
const variableDeclaration = t.variableDeclaration("const", [variableDeclarator]);
console.log(variableDeclaration);

View File

@ -19,10 +19,8 @@ LIMIT = 10
context = sync_playwright().start() context = sync_playwright().start()
browser = context.chromium.launch(devtools=True, headless=False) browser = context.chromium.launch(devtools=True, headless=False)
page = browser.new_page() page = browser.new_page()
# 注意这里路径需要加上** # 注意这里路径需要加上**
page.route( page.route(
"**/js/chunk-10192a00.243cb8b7.js", "**/js/chunk-10192a00.243cb8b7.js",
@ -39,10 +37,10 @@ def get_token(offset):
console.log('encrypt',window.encrypt) console.log('encrypt',window.encrypt)
return window.encrypt("%s","%s") return window.encrypt("%s","%s")
}''' % ('/api/movie', offset)) }''' % ('/api/movie', offset))
return result
except Exception as e: except Exception as e:
time.sleep(100) time.sleep(100)
print(e) print(e)
return result
for i in range(MAX_PAGE): for i in range(MAX_PAGE):

View File

@ -0,0 +1,8 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 13:59
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 13:59
@Usage :
@Desc :
'''

View File

@ -0,0 +1,52 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 14:00
@Usage :
@Desc :爬取起点 指定章节的指定详情页
'''
import requests
from pyquery import PyQuery as pq
def crawel_detail(book_id, chatpter_id):
url = f'https://www.qidian.com/chapter/{book_id}/{chatpter_id}/'
header = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Cookie": "newstatisticUUID=1669693998_1518827460; _csrfToken=9VUvhprKzOz80xLUYXqgOzIcm011iQ9DfhwSyUD2; Hm_lvt_f00f67093ce2f38f215010b699629083=1701842016; supportwebp=true; supportWebp=true; _ga=GA1.1.67407022.1669694001; _ga_FZMMH98S83=GS1.1.1701860853.4.1.1701860855.0.0.0; _ga_PFYW0QLV3P=GS1.1.1701860853.4.1.1701860855.0.0.0; fu=923381569; traffic_utm_referer=https%3A%2F%2Fcn.bing.com%2F; trkf=1; w_tsfp=ltvgWVEE2utBvS0Q6KvslUKvEj87Z2R7xFw0D+M9Os09AaYjU5yD14d5vdfldCyCt5Mxutrd9MVxYnGAU9QkfxcSQs+Qb5tH1VPHx8NlntdKRQJtA5KOD1McdbpzvTJCL24LIRDu3mt3ItRJmONgj14K5yZ137ZlCa8hbMFbixsAqOPFm/97DxvSliPXAHGHM3wLc+6C6rgv8LlSgWyEtBu/eRlhAcxD0EaT3CwfCHoi9BPNc+lYNx+oJ8arTe9Gvy/hk2upNdLxiEox60I3sB49AtX02TXKL3ZEIAtrZViygr4ke66rNuYluTEZXL5TWwpN/FxC9qdk605dDi69YCeHAq555FZRF/pYrMuocHqW0JPrcltbvN4px1kl9g==",
"Host": "www.qidian.com",
"Pragma": "no-cache",
"Referer": f"https://www.qidian.com/book/{book_id}/",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "Windows"
}
response = requests.get(url, headers=header)
doc = pq(response.text)
title = doc('.title')[0]
with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
for line in doc('.content p').items():
file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符
print(f"爬取成功【{title.text}")
if __name__ == '__main__':
book_id = 1031940621
chapter_id = 705235484
crawel_detail(book_id=book_id, chatpter_id=chapter_id)

View File

@ -0,0 +1,33 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 14:00
@Usage :
@Desc :爬取起点 指定章节的指定详情页
'''
import requests
from pyquery import PyQuery as pq
with open(f"./test1.html", 'r', encoding='utf-8') as file:
content = file.read()
doc = pq(content)
result =[]
for item in doc('.chapter-item a').items():
strs = item.attr('href').split('/')
chapter_id, book_id = strs[-2], strs[-3]
result.append((book_id,chapter_id))
# title = doc('.chapter-item')
# with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
# for line in doc('.content p').items():
# file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符
# if __name__ == '__main__':
# book_id = 1031940621
# chapter_id = 705235484
# crawel_detail(book_id=book_id, chatpter_id=chapter_id)

View File

@ -0,0 +1,50 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 15:33
@Usage :
@Desc : 正式爬取起点书籍
'''
import requests
from pyquery import PyQuery as pq
from crawel_detail import crawel_detail
'''
观察请求可以发现有两种方式获得chapter_id
'''
# url = 'https://www.qidian.com/ajax/book/category?_csrfToken={_csrfToken}&bookId={bookId}&w_tsfp={w_tsfp}'
url = 'https://www.qidian.com/book/1031940621/'
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': '_yep_uuid=59404ab0-2696-4162-b763-1256a5ca1dca; e1=%7B%22l6%22%3A%221%22%2C%22l1%22%3A%22%22%2C%22pid%22%3A%22qd_P_xiangqing%22%2C%22eid%22%3A%22%22%7D; e2=%7B%22l6%22%3A%221%22%2C%22l1%22%3A%22%22%2C%22pid%22%3A%22qd_P_xiangqing%22%2C%22eid%22%3A%22%22%7D; newstatisticUUID=1669693998_1518827460; _csrfToken=9VUvhprKzOz80xLUYXqgOzIcm011iQ9DfhwSyUD2; Hm_lvt_f00f67093ce2f38f215010b699629083=1701842016; supportwebp=true; supportWebp=true; _ga=GA1.1.67407022.1669694001; _ga_FZMMH98S83=GS1.1.1701860853.4.1.1701860855.0.0.0; _ga_PFYW0QLV3P=GS1.1.1701860853.4.1.1701860855.0.0.0; fu=923381569; trkf=1; traffic_utm_referer=https%3A//cn.bing.com/; w_tsfp=ltvgWVEE2utBvS0Q6KvslUKvEj87Z2R7xFw0D+M9Os09AaYjV5iM2IZ+utfldCyCt5Mxutrd9MVxYnGAU9QgexgdRcSYb5tH1VPHx8NlntdKRQJtA5KOD1McdbpzvTJCL24LIRDu3mt3ItRJmONgj14K5yZ137ZlCa8hbMFbixsAqOPFm/97DxvSliPXAHGHM3wLc+6C6rgv8LlSgW2DugDuLi11A7lD2UGS0yoeG3pV8w2pJbsDal7wcpK9Uv8wrTPzwjn3apCs2RYj4VA3sB49AtX02TXKL3ZEIAtrZUqukO18Lv3wdaN4qzsLX/hITghGqlkd5usw+EBJWXnsZSOLAf8r4wEEQ/JcrZ6+NA==',
'Host': 'www.qidian.com',
'Pragma': 'no-cache',
# TODO refer是防盗链即访问当前请求的上一级确保访问当前页面是合理的
'Referer': 'https://www.qidian.com/all/',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
response = requests.get(url, headers=header)
doc = pq(response.text)
result =[]
for item in doc('.chapter-item a').items():
strs = item.attr('href').split('/')
chapter_id, book_id = strs[-2], strs[-3]
# result.append((book_id,chapter_id))
crawel_detail(book_id,chatpter_id=chapter_id)

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,24 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2024/03/27 14:49
@Usage :
@Desc :尝试使用PyQuery爬取test.html
'''
from pyquery import PyQuery as pq
with open("./test.html", 'r', encoding='utf-8') as f:
html_content = f.read()
doc = pq(html_content)
title = doc('.title')[0]
with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
for line in doc('.content p').items():
file.write(line.text() + '\n') # 写入每行,并在末尾添加换行符