20240327爬虫更新JS逆向

2024-03-31 11:31:46 +08:00 · 2024-03-31 11:31:46 +08:00 · 4a5c4125df
parent 50902dd83a
commit 4a5c4125df
39 changed files with 2098 additions and 4450 deletions
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/init.py
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/init.py
@ -0,0 +1,8 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 20:30
@Usage : 
@Desc :
 '''
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/js_try/generate_token.js
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/js_try/generate_token.js
@ -0,0 +1,27 @@
 var page = 0
 var limit = 10
 window = global
 //到这里有点补不下去了，因为不知道_0x34777a是什么
 function encrypt(args) {
    _0x34777a('6b54');
    var _0x189cbb = _0x34777a('3452')
        , _0x358b1f = _0x34777a('27ae')['Base64'];
    for (var _0x5da681 = Math['round'](new Date()['getTime']() / 0x3e8)['toString'](), _0x2a83dd = arguments['length'], _0x31a891 = new Array(_0x2a83dd), _0x596a02 = 0x0; _0x596a02 < _0x2a83dd; _0x596a02++)
        _0x31a891[_0x596a02] = arguments[_0x596a02];
    _0x31a891['push'](_0x5da681);
    var _0xf7c3c7 = _0x189cbb['SHA1'](_0x31a891['join'](','))['toString'](_0x189cbb['enc']['Hex'])
        , _0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](',')
        , _0x104b5b = _0x358b1f['encode'](_0x3c8435);
    return _0x104b5b;
 }
 function generate_token(args) {
    var _0x422986 = (page - 0x1) * limit, _0x263439 = encrypt(args);
    return _0x263439;
 }
 console.log(generate_token('api/movie'))
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/token_decode.py
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/token_decode.py
@ -0,0 +1,55 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 20:32
@Usage : 使用Python实现列表页的爬取
@Desc : 使用逆向分析之后，已经基本确定了token的生成逻辑：
            - 将`/api/movie `放到一个列表里
            - 在列表中加人当前时间戳;
            - 将列表内容用逗号拼接;
            - 将拼接的结果进行 SHA1 编码;
            - 将编码的结果和时间戳再次拼接;
            - 将拼接后的结果进行 Base64 编码。
 '''
 import hashlib
 import time
 import base64
 from typing import Any, List
 import requests
 BASE_URL = 'https://spa6.scrape.center'
 INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
 DETAIL_URL = BASE_URL + "/api/movie/{id}?token={token}"
 SECRET = 'ef34#teuq0btua#(-57w1q5o5--j@98xygimlyfxs*-!i-0-mb'
 MAX_PAGE = 10
 LIMIT = 10
 def generate_token(args: List[Any]):
    cur_time = str(int(time.time()))
    args.append(cur_time)
    sign = hashlib.sha1(','.join(args).encode('utf-8')).hexdigest()
    return base64.b64encode(','.join([sign, cur_time]).encode('utf-8')).decode('utf-8')
 # 爬取列表页(这里暂时只爬取)
 for i in range(MAX_PAGE):
    args = ['/api/movie']
    token = generate_token(args)
    OFFSET = i * LIMIT
    url = INDEX_URL.format(limit=LIMIT, offset=OFFSET, token=token)
    response = requests.get(url)
    print('response:', response.json())
    result = response.json()
    # 爬取详情页
    for movie in result['results']:
        id = movie['id']
        encrypt_id = base64.b64encode((SECRET + str(id)).encode('utf-8')).decode('utf-8')
        args = [f'/api/movie/{encrypt_id}']
        detail_url = DETAIL_URL.format(id=encrypt_id, token=generate_token(args))
        response = requests.get(detail_url)
        print(response.json())
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/init.py
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/init.py
@ -0,0 +1,8 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 20:30
@Usage : 
@Desc :
 '''
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/chunk.js
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/chunk.js
@ -0,0 +1,201 @@
 (window['webpackJsonp'] = window['webpackJsonp'] || [])['push']([['chunk-19c920f8'], {
    '5a19': function(_0x1588d2, _0x49ff45, _0x493500) {},
    'c6bf': function(_0x1ff78d, _0x1a7aa3, _0x6392a5) {},
    'ca9c': function(_0x34ea17, _0x1d01c8, _0x1a974c) {
        'use strict';
        var _0x116bc9 = _0x1a974c('5a19')
          , _0x14ee23 = _0x1a974c['n'](_0x116bc9);
        _0x14ee23['a'];
    },
    'd504': function(_0x4c4705, _0x3c93b9, _0x4c22a6) {
        'use strict';
        _0x4c22a6['r'](_0x3c93b9);
        var _0x4b4f78 = function() {
            var _0x1dc0eb = this
              , _0x559ed0 = _0x1dc0eb['$createElement']
              , _0x28c6bc = _0x1dc0eb['_self']['_c'] || _0x559ed0;
            return _0x28c6bc('div', {
                'attrs': {
                    'id': 'index'
                }
            }, [_0x28c6bc('el-row', {
                'directives': [{
                    'name': 'loading',
                    'rawName': 'v-loading',
                    'value': _0x1dc0eb['loading'],
                    'expression': 'loading'
                }]
            }, [_0x28c6bc('el-col', {
                'attrs': {
                    'span': 0x12,
                    'offset': 0x3
                }
            }, _0x1dc0eb['_l'](_0x1dc0eb['movies'], function(_0x1355ed) {
                return _0x28c6bc('el-card', {
                    'key': _0x1355ed['name'],
                    'staticClass': 'item\x20m-t',
                    'attrs': {
                        'shadow': 'hover'
                    }
                }, [_0x28c6bc('el-row', [_0x28c6bc('el-col', {
                    'attrs': {
                        'xs': 0x8,
                        'sm': 0x6,
                        'md': 0x4
                    }
                }, [_0x28c6bc('router-link', {
                    'attrs': {
                        'to': {
                            'name': 'detail',
                            'params': {
                                'key': _0x1dc0eb['transfer'](_0x1355ed['id'])
                            }
                        }
                    }
                }, [_0x28c6bc('img', {
                    'staticClass': 'cover',
                    'attrs': {
                        'src': _0x1355ed['cover']
                    }
                })])], 0x1), _0x28c6bc('el-col', {
                    'staticClass': 'p-h',
                    'attrs': {
                        'xs': 0x9,
                        'sm': 0xd,
                        'md': 0x10
                    }
                }, [_0x28c6bc('router-link', {
                    'staticClass': 'name',
                    'attrs': {
                        'to': {
                            'name': 'detail',
                            'params': {
                                'key': _0x1dc0eb['transfer'](_0x1355ed['id'])
                            }
                        }
                    }
                }, [_0x28c6bc('h2', {
                    'staticClass': 'm-b-sm'
                }, [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['name']) + '\x20-\x20' + _0x1dc0eb['_s'](_0x1355ed['alias']))])]), _0x28c6bc('div', {
                    'staticClass': 'categories'
                }, _0x1dc0eb['_l'](_0x1355ed['categories'], function(_0x3f20be) {
                    return _0x28c6bc('el-button', {
                        'key': _0x3f20be,
                        'staticClass': 'category',
                        'attrs': {
                            'size': 'mini',
                            'type': 'primary'
                        }
                    }, [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x3f20be) + '\x0a\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20')]);
                }), 0x1), _0x28c6bc('div', {
                    'staticClass': 'm-v-sm\x20info'
                }, [_0x28c6bc('span', [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['regions']['join']('、')))]), _0x28c6bc('span', [_0x1dc0eb['_v']('\x20/\x20')]), _0x28c6bc('span', [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['minute']) + '\x20分钟')])]), _0x28c6bc('div', {
                    'staticClass': 'm-v-sm\x20info'
                }, [_0x28c6bc('span', [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['published_at']) + '\x20上映')])])], 0x1), _0x28c6bc('el-col', {
                    'attrs': {
                        'xs': 0x5,
                        'sm': 0x5,
                        'md': 0x4
                    }
                }, [_0x28c6bc('p', {
                    'staticClass': 'score\x20m-t-md\x20m-b-n-sm'
                }, [_0x1dc0eb['_v'](_0x1dc0eb['_s'](_0x1355ed['score']['toFixed'](0x1)))]), _0x28c6bc('p', [_0x28c6bc('el-rate', {
                    'attrs': {
                        'value': _0x1355ed['score'] / 0x2,
                        'disabled': '',
                        'max': 0x5,
                        'text-color': '#ff9900'
                    }
                })], 0x1)])], 0x1)], 0x1);
            }), 0x1)], 0x1), _0x28c6bc('el-row', [_0x28c6bc('el-col', {
                'attrs': {
                    'span': 0xa,
                    'offset': 0xb
                }
            }, [_0x28c6bc('div', {
                'staticClass': 'pagination\x20m-v-lg'
            }, [_0x28c6bc('el-pagination', {
                'attrs': {
                    'background': '',
                    'current-page': _0x1dc0eb['page'],
                    'page-size': _0x1dc0eb['limit'],
                    'layout': 'total,\x20prev,\x20pager,\x20next',
                    'total': _0x1dc0eb['total']
                },
                'on': {
                    'current-change': _0x1dc0eb['onPageChange'],
                    'update:currentPage': function(_0x241449) {
                        _0x1dc0eb['page'] = _0x241449;
                    },
                    'update:current-page': function(_0x240a84) {
                        _0x1dc0eb['page'] = _0x240a84;
                    }
                }
            })], 0x1)])], 0x1)], 0x1);
        }
          , _0x33c195 = []
          , _0x2fa7bd = _0x4c22a6('7d92')
          , _0x49ecf1 = _0x4c22a6('3e22')
          , _0x4d1fd7 = {
            'name': 'Index',
            'components': {},
            'data': function() {
                return {
                    'loading': !0x1,
                    'total': null,
                    'page': parseInt(this['$route']['params']['page'] || 0x1),
                    'limit': 0xa,
                    'movies': null
                };
            },
            'mounted': function() {
                this['onFetchData']();
            },
            'methods': {
                'transfer': _0x49ecf1['a'],
                'onPageChange': function(_0x12422f) {
                    this['$router']['push']({
                        'name': 'indexPage',
                        'params': {
                            'page': _0x12422f
                        }
                    }),
                    this['onFetchData']();
                },
                'onFetchData': function() {
                    var _0xd5d754 = this;
                    this['loading'] = !0x0;
                    var _0x422986 = (this['page'] - 0x1) * this['limit']
                      , _0x263439 = Object(_0x2fa7bd['a'])(this['$store']['state']['url']['index']);
                    window.encrypt=Object(_0x2fa7bd['a']);
                    this['$axios']['get'](this['$store']['state']['url']['index'], {
                        'params': {
                            'limit': this['limit'],
                            'offset': _0x422986,
                            'token': _0x263439
                        }
                    })['then'](function(_0x464186) {
                        var _0x148e87 = _0x464186['data']
                          , _0x2f29ad = _0x148e87['results']
                          , _0x4829b0 = _0x148e87['count'];
                        _0xd5d754['loading'] = !0x1,
                        _0xd5d754['movies'] = _0x2f29ad,
                        _0xd5d754['total'] = _0x4829b0;
                    });
                }
            }
        }
          , _0x15f73f = _0x4d1fd7
          , _0x3a0944 = (_0x4c22a6('ca9c'),
        _0x4c22a6('eb45'),
        _0x4c22a6('2877'))
          , _0x5b3502 = Object(_0x3a0944['a'])(_0x15f73f, _0x4b4f78, _0x33c195, !0x1, null, '724ecf3b', null);
        _0x3c93b9['default'] = _0x5b3502['exports'];
    },
    'eb45': function(_0x5e6316, _0x331917, _0x1ca927) {
        'use strict';
        var _0x26eff7 = _0x1ca927('c6bf')
          , _0x3af8d4 = _0x1ca927['n'](_0x26eff7);
        _0x3af8d4['a'];
    }
 }]);
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/content.json
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/content.json
@ -0,0 +1,174 @@
 {
  "count": 102,
  "results": [
    {
      "id": 1,
      "name": "霸王别姬",
      "alias": "Farewell My Concubine",
      "cover": "https://p0.meituan.net/movie/ce4da3e03e655b5b88ed31b5cd7896cf62472.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "爱情"
      ],
      "published_at": "1993-07-26",
      "minute": 171,
      "score": 9.5,
      "regions": [
        "中国内地",
        "中国香港"
      ]
    },
    {
      "id": 2,
      "name": "这个杀手不太冷",
      "alias": "Léon",
      "cover": "https://p1.meituan.net/movie/6bea9af4524dfbd0b668eaa7e187c3df767253.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "动作",
        "犯罪"
      ],
      "published_at": "1994-09-14",
      "minute": 110,
      "score": 9.5,
      "regions": [
        "法国"
      ]
    },
    {
      "id": 3,
      "name": "肖申克的救赎",
      "alias": "The Shawshank Redemption",
      "cover": "https://p0.meituan.net/movie/283292171619cdfd5b240c8fd093f1eb255670.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "犯罪"
      ],
      "published_at": "1994-09-10",
      "minute": 142,
      "score": 9.5,
      "regions": [
        "美国"
      ]
    },
    {
      "id": 4,
      "name": "泰坦尼克号",
      "alias": "Titanic",
      "cover": "https://p1.meituan.net/movie/b607fba7513e7f15eab170aac1e1400d878112.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "爱情",
        "灾难"
      ],
      "published_at": "1998-04-03",
      "minute": 194,
      "score": 9.5,
      "regions": [
        "美国"
      ]
    },
    {
      "id": 5,
      "name": "罗马假日",
      "alias": "Roman Holiday",
      "cover": "https://p0.meituan.net/movie/289f98ceaa8a0ae737d3dc01cd05ab052213631.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "喜剧",
        "爱情"
      ],
      "published_at": "1953-08-20",
      "minute": 118,
      "score": 9.5,
      "regions": [
        "美国"
      ]
    },
    {
      "id": 6,
      "name": "唐伯虎点秋香",
      "alias": "Flirting Scholar",
      "cover": "https://p0.meituan.net/movie/da64660f82b98cdc1b8a3804e69609e041108.jpg@464w_644h_1e_1c",
      "categories": [
        "喜剧",
        "爱情",
        "古装"
      ],
      "published_at": "1993-07-01",
      "minute": 102,
      "score": 9.5,
      "regions": [
        "中国香港"
      ]
    },
    {
      "id": 7,
      "name": "乱世佳人",
      "alias": "Gone with the Wind",
      "cover": "https://p0.meituan.net/movie/223c3e186db3ab4ea3bb14508c709400427933.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "爱情",
        "历史",
        "战争"
      ],
      "published_at": "1939-12-15",
      "minute": 238,
      "score": 9.5,
      "regions": [
        "美国"
      ]
    },
    {
      "id": 8,
      "name": "喜剧之王",
      "alias": "The King of Comedy",
      "cover": "https://p0.meituan.net/movie/1f0d671f6a37f9d7b015e4682b8b113e174332.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "喜剧",
        "爱情"
      ],
      "published_at": "1999-02-13",
      "minute": 85,
      "score": 9.5,
      "regions": [
        "中国香港"
      ]
    },
    {
      "id": 9,
      "name": "楚门的世界",
      "alias": "The Truman Show",
      "cover": "https://p0.meituan.net/movie/8959888ee0c399b0fe53a714bc8a5a17460048.jpg@464w_644h_1e_1c",
      "categories": [
        "剧情",
        "科幻"
      ],
      "published_at": null,
      "minute": 103,
      "score": 9.0,
      "regions": [
        "美国"
      ]
    },
    {
      "id": 10,
      "name": "狮子王",
      "alias": "The Lion King",
      "cover": "https://p0.meituan.net/movie/27b76fe6cf3903f3d74963f70786001e1438406.jpg@464w_644h_1e_1c",
      "categories": [
        "动画",
        "歌舞",
        "冒险"
      ],
      "published_at": "1995-07-15",
      "minute": 89,
      "score": 9.0,
      "regions": [
        "美国"
      ]
    }
  ]
 }
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/main.py
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/main.py
@ -0,0 +1,53 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 18:59
@Usage : 
@Desc :使用playwright跳过加密逻辑爬取网站
 '''
 from playwright.sync_api import sync_playwright
 import time
 import requests
 BASE_URL= 'https://spa6.scrape.center/'
 INDEX_URL = BASE_URL + "/api/movie?limit={limit}&offset={offset}&token={token}"
 MAX_PAGE = 10
 LIMIT = 10
 content = sync_playwright().start()
 browser = content.chromium.launch()
 page = browser.new_page()
 # 注意这里路径需要加上**
 page.route(
    "**/js/chunk-19c920f8.c3a1129d.js",
    lambda route: route.fulfill(path='chunk.js')
 )
 page.goto(BASE_URL, wait_until='networkidle')
 def get_token():
    # page.wait_for_function('window.encrypt !== undefined')
    try:
        result = page.evaluate('''() => {
            console.log('window',window)
            console.log('encrypt',window.encrypt)
            return window.encrypt("%s")
        }''' % ('/api/movie'))
        return result
    except Exception as e:
        time.sleep(100)
        print(e)
 def get_key():
    pass
 for i in range(MAX_PAGE):
    offset = LIMIT*i
    result = requests.get(INDEX_URL.format(limit=LIMIT,offset=offset,token=get_token()))
    print(result.text)
 # 到这里已经基本可以爬出了，但是对于详情页的key爬取逻辑有点逆向不出来，无论是关键字还是啥，都断不住
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/test.js
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/test.js
@ -0,0 +1,9 @@
 function _0x456254() {
    for (var _0x5da681 = Math['round'](new Date()['getTime']() / 0x3e8)['toString'](), _0x2a83dd = arguments['length'], _0x31a891 = new Array(_0x2a83dd), _0x596a02 = 0x0; _0x596a02 < _0x2a83dd; _0x596a02++)
        _0x31a891[_0x596a02] = arguments[_0x596a02];
    _0x31a891['push'](_0x5da681);
    var _0xf7c3c7 = _0x189cbb['SHA1'](_0x31a891['join'](','))['toString'](_0x189cbb['enc']['Hex'])
        , _0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](',')
        , _0x104b5b = _0x358b1f['encode'](_0x3c8435);
    return _0x104b5b;
 }
--- a/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/test.py
+++ b/Spider/Chapter11_JavaScript逆向/JS逆向实战/自己的提前尝试/test.py
@ -0,0 +1,10 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 12:26
@Usage : 
@Desc :测试Base64的特点
 '''
 print(len('ZWYzNCN0ZXVxMGJ0dWEjKC01N3cxcTVvNS0takA5OHh5Z2ltbHlmeHMqLSFpLTAtbWIx'))
--- a/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/Wasm.wasm
+++ b/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/Wasm.wasm
--- a/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/pywasm执行wasm.py
+++ b/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/pywasm执行wasm.py
@ -0,0 +1,14 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 12:54
@Usage : 
@Desc :使用pywasm执行wasm文件
 '''
 import pywasm
 runtime = pywasm.load('./Wasm.wasm')
 result = runtime.exec('encrypt', [1, 2])
 print(result)
--- a/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/test.js
+++ b/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/test.js
@ -0,0 +1,2 @@
 const a = parseInt(Math.round((new Date).getTime() / 1e3).toString());
 console.log(a)
--- a/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/wasmMain.py
+++ b/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/wasmMain.py
@ -0,0 +1,24 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 13:08
@Usage : 
@Desc :使用pywasm库完成爬取https://spa14.scrape.center/
 '''
 import time
 import pywasm
 import requests
 baseurl = 'https://spa14.scrape.center/api/movie/?limit={limit}&offset={offset}&sign={sign}'
 MAX_PAGE = 10
 limit = 10
 runtime = pywasm.load('./Wasm.wasm')
 print(time.time())
 print(type(time.time()))
 for i in range(MAX_PAGE):
    offset = i * limit
    sign = runtime.exec('encrypt', [offset, int(time.time())])
    result = requests.get(baseurl.format(limit=limit, offset=offset, sign=sign))
    print(result.text)
--- a/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/wasmerMain.py
+++ b/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/wasmerMain.py
@ -0,0 +1,28 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 13:08
@Usage : 
@Desc :使用wasmer库完成爬取https://spa14.scrape.center/
 '''
 import time
 from wasmer import engine, Store, Module, Instance
 from wasmer_compiler_cranelift import Compiler
 import requests
 baseurl = 'https://spa14.scrape.center/api/movie/?limit={limit}&offset={offset}&sign={sign}'
 MAX_PAGE = 10
 limit = 10
 store = Store(engine.JIT(Compiler))
 module = Module(store, open('Wasm.wasm', 'rb').read())
 instance = Instance(module)
 for i in range(MAX_PAGE):
    offset = i * limit
    sign = instance.exports.encrypt(offset, int(time.time()))
    result = requests.get(baseurl.format(limit=limit, offset=offset, sign=sign))
    print(result.text)
--- a/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/wasmer执行wasm.py
+++ b/Spider/Chapter11_JavaScript逆向/WebAssembly案例分析和爬取实战/wasmer执行wasm.py
@ -0,0 +1,17 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/26 12:54
@Usage : 
@Desc :使用pywasm执行wasm文件
 '''
 from wasmer import engine, Store, Module, Instance
 from wasmer_compiler_cranelift import Compiler
 store = Store(engine.JIT(Compiler))
 module = Module(store, open('Wasm.wasm', 'rb').read())
 instance = Instance(module)
 result = instance.exports.encrypt(1, 2)
 print(result)
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/.babelrc
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/.babelrc
@ -0,0 +1,5 @@
 {
  "presets": [
    "@babel/preset-env"
  ]
 }
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/basic1.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/basic1.js
@ -0,0 +1,27 @@
 //@Author : dingjiawen
 //@Date : 2024/03/25 15:26
 //@Usage :
 //@Desc :使用AST技术，还原code1.js
 import {traverse,types} from "@babel/core";
 import {parse} from "@babel/parser";
 import CodeGenerator from "@babel/generator";
 // import * as types from "@babel/types";
 import fs from "fs";
 const code =fs.readFileSync("code/code1.js","utf-8");
 let ast =parse(code)
 traverse(ast,{
    "UnaryExpression|BinaryExpression|ConditionalExpression|CallExpression":(path) =>{
        const {confident,value} = path.evaluate();
        if(value==Infinity ||value==-Infinity) return;
        confident&&path.replaceWith(types.valueToNode(value));
    },
 });
 const {code:output} = CodeGenerator.default(ast);
 console.log(output)
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/basic2.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/basic2.js
@ -0,0 +1,27 @@
 //@Author : dingjiawen
 //@Date : 2024/03/25 15:26
 //@Usage :
 //@Desc :使用AST技术，还原code1.js
 import {traverse, types} from "@babel/core";
 import {parse} from "@babel/parser";
 import CodeGenerator from "@babel/generator";
 // import * as types from "@babel/types";
 import fs from "fs";
 const code = fs.readFileSync("code/code2.js", "utf-8");
 let ast = parse(code)
 traverse(ast, {
    "StringLiteral"({node}) {
        //这个正则表达式用于匹配字符串中是否包含\u或\x的转义序列，这两个序列分别用于Unicode和十六进制的字符转义,g表示全局,i表示不区分大小写
        if (node.extra && /\\[ux]/gi.test(node.extra.raw)) {
            node.extra.raw = node.extra.rawValue;
        }
    },
 });
 const {code: output} = CodeGenerator.default(ast);
 console.log(output)
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/basic3.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/basic3.js
@ -0,0 +1,41 @@
 //@Author : dingjiawen
 //@Date : 2024/03/25 15:26
 //@Usage :
 //@Desc :使用AST技术，还原code1.js
 import {traverse, types} from "@babel/core";
 import {parse} from "@babel/parser";
 import CodeGenerator from "@babel/generator";
 // import * as types from "@babel/types";
 import fs from "fs";
 const code = fs.readFileSync("code/code3.js", "utf-8");
 let ast = parse(code)
 traverse(ast, {
    IfStatement(path) {
        let {consequent, alternate} = path.node;
        let testpath = path.get("test");
        const evaluateTest = testpath.evaluateTruthy();
        if (evaluateTest === true) {
            if (types.isBlockStatement(consequent)) {
                consequent = consequent.body;
            }
            path.replaceWithMultiple(consequent);
        } else if (evaluateTest === false) {
            if (alternate != null) {
                if (types.isBlockStatement(alternate)) {
                    alternate = alternate.body;
                }
                path.replaceWithMultiple(alternate);
            } else {
                path.remove();
            }
        }
    },
 });
 const {code: output} = CodeGenerator.default(ast);
 console.log(output)
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/basic4.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/basic4.js
@ -0,0 +1,54 @@
 //@Author : dingjiawen
 //@Date : 2024/03/25 15:26
 //@Usage :
 //@Desc :使用AST技术，还原code1.js
 import {traverse, types} from "@babel/core";
 import {parse} from "@babel/parser";
 import CodeGenerator from "@babel/generator";
 // import * as types from "@babel/types";
 import fs from "fs";
 //从系统盘读取文件  code4.js
 const code = fs.readFileSync("code/code4.js", "utf-8");
 let ast = parse(code)
 traverse(ast, {
    WhileStatement(path) {
        const {node, scope} = path;
        const {test, body} = node;
        let switchNode = body.body[0];
        let {discriminant, cases} = switchNode;
        let {object, property} = discriminant;
        let arrName = object.name;
        let binding = scope.getBinding(arrName);
        let {init} = binding.path.node;
        object = init.callee.object;
        property = init.callee.property;
        let argument = init.arguments[0].value;
        let arrayFlow = object.value[property.name](argument);
        let resultBody = [];
        arrayFlow.forEach((index) => {
                let switchCase = cases.filter((c) => c.test.value == index)[0];
                let caseBody = switchCase.consequent;
                if (types.isContinueStatement(caseBody[caseBody.length - 1])) {
                    caseBody.pop();
                }
                resultBody = resultBody.concat(caseBody);
            }
        );
        path.replaceWithMultiple(resultBody);
    },
 });
 const {code: output} = CodeGenerator.default(ast);
 console.log(output)
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/code/code1.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/code/code1.js
@ -0,0 +1,4 @@
 const a = !![];
 const b = "abc" == "bcd";
 const c = (1 << 3) | 2;
 const d = parseInt("5" + "0");
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/code/code2.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/code/code2.js
@ -0,0 +1 @@
 const strings = ["\x68\x65\x6c\x6c\x6f", "\x77\x6f\x72\x6c\x64"];
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/code/code3.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/code/code3.js
@ -0,0 +1,23 @@
 const _0x16c18d = function () {
  if (!![[]]) {
    console.log("hello world");
  } else {
    console.log("this");
    console.log("is");
    console.log("dead");
    console.log("code");
  }
 };
 const _0x1f7292 = function () {
  if ("xmv2nOdfy2N".charAt(4) !== String.fromCharCode(110)) {
    console.log("this");
    console.log("is");
    console.log("dead");
    console.log("code");
  } else {
    console.log("nice to meet you");
  }
 };
 _0x16c18d();
 _0x1f7292();
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/code/code4.js
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/code/code4.js
@ -0,0 +1,16 @@
 const s = "3|1|2".split("|");
 let x = 0;
 while (true) {
  switch (s[x++]) {
    case "1":
      const a = 1;
      continue;
    case "2":
      const b = 3;
      continue;
    case "3":
      const c = 0;
      continue;
  }
  break;
 }
--- a/Spider/Chapter11_JavaScript逆向/apply-ast/package.json
+++ b/Spider/Chapter11_JavaScript逆向/apply-ast/package.json
@ -0,0 +1,18 @@
 {
  "name": "learn-ast",
  "version": "1.0.0",
  "description": "学习ast",
  "main": "index.js",
  "type": "module",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "",
  "license": "ISC",
  "devDependencies": {
    "@babel/cli": "^7.14.8",
    "@babel/core": "^7.15.0",
    "@babel/node": "^7.14.9",
    "@babel/preset-env": "^7.15.0"
  }
 }
--- a/Spider/Chapter11_JavaScript逆向/learn-ast/basic1.js
+++ b/Spider/Chapter11_JavaScript逆向/learn-ast/basic1.js
@ -1,3 +1,4 @@
 //usage:讲述@babel/parse和generate
 import {parse} from "@babel/parser";
 import fs from "fs";
--- a/Spider/Chapter11_JavaScript逆向/learn-ast/basic2.js
+++ b/Spider/Chapter11_JavaScript逆向/learn-ast/basic2.js
@ -1,3 +1,4 @@
 //usage:讲述@babel/traverse
 import {parse} from "@babel/parser"
 import CodeGenerator from "@babel/generator"
 import fs from "fs"
--- a/Spider/Chapter11_JavaScript逆向/learn-ast/basic3.js
+++ b/Spider/Chapter11_JavaScript逆向/learn-ast/basic3.js
@ -0,0 +1,31 @@
 //usage:讲述@babel/type
 import {traverse,types} from "@babel/core";
 import {parse} from "@babel/parser";
 import CodeGenerator from "@babel/generator";
 // import * as t from "@babel/types";
 const code = "const a = 1";
 let ast = parse(code);
 traverse(ast, {
        VariableDeclaration(path) {
            const a = types.identifier("a")
            let init = types.binaryExpression(
                "+",
                types.identifier("a"),
                types.numericLiteral(1)
            );
            let declarator = types.variableDeclarator(types.identifier("b"), init);
            let declaration = types.variableDeclaration("const", [declarator]);
            path.insertAfter(declaration);
            path.stop();
        },
    }
 );
 const output = CodeGenerator.default(ast, {
    retainLines: true,
 }).code;
 console.log(output);
--- a/Spider/Chapter11_JavaScript逆向/learn-ast/package-lock.json
+++ b/Spider/Chapter11_JavaScript逆向/learn-ast/package-lock.json
--- a/Spider/Chapter11_JavaScript逆向/learn-ast/package.json
+++ b/Spider/Chapter11_JavaScript逆向/learn-ast/package.json
@ -10,8 +10,12 @@
  "author": "",
  "license": "ISC",
  "devDependencies": {
-    "@babel/cli": "^7.24.1",
+    "@babel/cli": "^7.14.8",
-    "@babel/core": "^7.24.3",
+    "@babel/core": "^7.15.0",
-    "@babel/preset-env": "^7.24.3"
+    "@babel/node": "^7.10.5",
    "@babel/preset-env": "^7.15.0"
  },
  "dependencies": {
    "javascript-obfuscator": "^2.15.4"
  }
 }
--- a/Spider/Chapter11_JavaScript逆向/learn-ast/test.js
+++ b/Spider/Chapter11_JavaScript逆向/learn-ast/test.js
@ -0,0 +1,21 @@
 import * as t from '@babel/types';
 // 创建标识符"a"
 const idA = t.identifier("a");
 // 创建数字字面量1
 const literalOne = t.numericLiteral(1);
 // 创建二元表达式 "a + 1"
 const binaryExpression = t.binaryExpression("+", idA, literalOne);
 // 创建标识符"b"
 const idB = t.identifier("b");
 // 创建变量声明器
 const variableDeclarator = t.variableDeclarator(idB, binaryExpression);
 // 创建变量声明（const）
 const variableDeclaration = t.variableDeclaration("const", [variableDeclarator]);
 console.log(variableDeclaration);
--- a/Spider/Chapter11_JavaScript逆向/浏览器环境执行JS/main.py
+++ b/Spider/Chapter11_JavaScript逆向/浏览器环境执行JS/main.py
@ -19,10 +19,8 @@ LIMIT = 10
 context = sync_playwright().start()
 browser = context.chromium.launch(devtools=True, headless=False)
 page = browser.new_page()
 # 注意这里路径需要加上**
 page.route(
    "**/js/chunk-10192a00.243cb8b7.js",
@ -39,10 +37,10 @@ def get_token(offset):
            console.log('encrypt',window.encrypt)
            return window.encrypt("%s","%s")
        }''' % ('/api/movie', offset))
        return result
    except Exception as e:
        time.sleep(100)
        print(e)
    return result
 for i in range(MAX_PAGE):
--- a/Spider/spider_practice/init.py
+++ b/Spider/spider_practice/init.py
@ -0,0 +1,8 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 13:59
@Usage : 
@Desc :
 '''
--- a/Spider/spider_practice/起点/init.py
+++ b/Spider/spider_practice/起点/init.py
@ -0,0 +1,8 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 13:59
@Usage : 
@Desc :
 '''
--- a/Spider/spider_practice/起点/crawel_detail.py
+++ b/Spider/spider_practice/起点/crawel_detail.py
@ -0,0 +1,52 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 14:00
@Usage : 
@Desc :爬取起点 指定章节的指定详情页
 '''
 import requests
 from pyquery import PyQuery as pq
 def crawel_detail(book_id, chatpter_id):
    url = f'https://www.qidian.com/chapter/{book_id}/{chatpter_id}/'
    header = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Cookie": "newstatisticUUID=1669693998_1518827460; _csrfToken=9VUvhprKzOz80xLUYXqgOzIcm011iQ9DfhwSyUD2; Hm_lvt_f00f67093ce2f38f215010b699629083=1701842016; supportwebp=true; supportWebp=true; _ga=GA1.1.67407022.1669694001; _ga_FZMMH98S83=GS1.1.1701860853.4.1.1701860855.0.0.0; _ga_PFYW0QLV3P=GS1.1.1701860853.4.1.1701860855.0.0.0; fu=923381569; traffic_utm_referer=https%3A%2F%2Fcn.bing.com%2F; trkf=1; w_tsfp=ltvgWVEE2utBvS0Q6KvslUKvEj87Z2R7xFw0D+M9Os09AaYjU5yD14d5vdfldCyCt5Mxutrd9MVxYnGAU9QkfxcSQs+Qb5tH1VPHx8NlntdKRQJtA5KOD1McdbpzvTJCL24LIRDu3mt3ItRJmONgj14K5yZ137ZlCa8hbMFbixsAqOPFm/97DxvSliPXAHGHM3wLc+6C6rgv8LlSgWyEtBu/eRlhAcxD0EaT3CwfCHoi9BPNc+lYNx+oJ8arTe9Gvy/hk2upNdLxiEox60I3sB49AtX02TXKL3ZEIAtrZViygr4ke66rNuYluTEZXL5TWwpN/FxC9qdk605dDi69YCeHAq555FZRF/pYrMuocHqW0JPrcltbvN4px1kl9g==",
        "Host": "www.qidian.com",
        "Pragma": "no-cache",
        "Referer": f"https://www.qidian.com/book/{book_id}/",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        "sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": "Windows"
    }
    response = requests.get(url, headers=header)
    doc = pq(response.text)
    title = doc('.title')[0]
    with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
        for line in doc('.content p').items():
            file.write(line.text() + '\n')  # 写入每行，并在末尾添加换行符
        print(f"爬取成功【{title.text}】")
 if __name__ == '__main__':
    book_id = 1031940621
    chapter_id = 705235484
    crawel_detail(book_id=book_id, chatpter_id=chapter_id)
--- a/Spider/spider_practice/起点/crawel_list.py
+++ b/Spider/spider_practice/起点/crawel_list.py
@ -0,0 +1,33 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 14:00
@Usage : 
@Desc :爬取起点 指定章节的指定详情页
 '''
 import requests
 from pyquery import PyQuery as pq
 with open(f"./test1.html", 'r', encoding='utf-8') as file:
    content = file.read()
 doc = pq(content)
 result =[]
 for item in doc('.chapter-item a').items():
    strs = item.attr('href').split('/')
    chapter_id, book_id = strs[-2], strs[-3]
    result.append((book_id,chapter_id))
 # title = doc('.chapter-item')
 # with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
 #     for line in doc('.content p').items():
 #         file.write(line.text() + '\n')  # 写入每行，并在末尾添加换行符
 # if __name__ == '__main__':
 #     book_id = 1031940621
 #     chapter_id = 705235484
 #     crawel_detail(book_id=book_id, chatpter_id=chapter_id)
--- a/Spider/spider_practice/起点/main.py
+++ b/Spider/spider_practice/起点/main.py
@ -0,0 +1,50 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 15:33
@Usage : 
@Desc : 正式爬取起点书籍
 '''
 import requests
 from pyquery import PyQuery as pq
 from crawel_detail import crawel_detail
 '''
 观察请求，可以发现，有两种方式获得chapter_id
 '''
 # url = 'https://www.qidian.com/ajax/book/category?_csrfToken={_csrfToken}&bookId={bookId}&w_tsfp={w_tsfp}'
 url = 'https://www.qidian.com/book/1031940621/'
 header = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Cookie': '_yep_uuid=59404ab0-2696-4162-b763-1256a5ca1dca; e1=%7B%22l6%22%3A%221%22%2C%22l1%22%3A%22%22%2C%22pid%22%3A%22qd_P_xiangqing%22%2C%22eid%22%3A%22%22%7D; e2=%7B%22l6%22%3A%221%22%2C%22l1%22%3A%22%22%2C%22pid%22%3A%22qd_P_xiangqing%22%2C%22eid%22%3A%22%22%7D; newstatisticUUID=1669693998_1518827460; _csrfToken=9VUvhprKzOz80xLUYXqgOzIcm011iQ9DfhwSyUD2; Hm_lvt_f00f67093ce2f38f215010b699629083=1701842016; supportwebp=true; supportWebp=true; _ga=GA1.1.67407022.1669694001; _ga_FZMMH98S83=GS1.1.1701860853.4.1.1701860855.0.0.0; _ga_PFYW0QLV3P=GS1.1.1701860853.4.1.1701860855.0.0.0; fu=923381569; trkf=1; traffic_utm_referer=https%3A//cn.bing.com/; w_tsfp=ltvgWVEE2utBvS0Q6KvslUKvEj87Z2R7xFw0D+M9Os09AaYjV5iM2IZ+utfldCyCt5Mxutrd9MVxYnGAU9QgexgdRcSYb5tH1VPHx8NlntdKRQJtA5KOD1McdbpzvTJCL24LIRDu3mt3ItRJmONgj14K5yZ137ZlCa8hbMFbixsAqOPFm/97DxvSliPXAHGHM3wLc+6C6rgv8LlSgW2DugDuLi11A7lD2UGS0yoeG3pV8w2pJbsDal7wcpK9Uv8wrTPzwjn3apCs2RYj4VA3sB49AtX02TXKL3ZEIAtrZUqukO18Lv3wdaN4qzsLX/hITghGqlkd5usw+EBJWXnsZSOLAf8r4wEEQ/JcrZ6+NA==',
    'Host': 'www.qidian.com',
    'Pragma': 'no-cache',
    # TODO refer是防盗链，即访问当前请求的上一级，确保访问当前页面是合理的
    'Referer': 'https://www.qidian.com/all/',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
 }
 response = requests.get(url, headers=header)
 doc = pq(response.text)
 result =[]
 for item in doc('.chapter-item a').items():
    strs = item.attr('href').split('/')
    chapter_id, book_id = strs[-2], strs[-3]
    # result.append((book_id,chapter_id))
    crawel_detail(book_id,chatpter_id=chapter_id)
--- a/Spider/spider_practice/起点/test.html
+++ b/Spider/spider_practice/起点/test.html
--- a/Spider/spider_practice/起点/test.py
+++ b/Spider/spider_practice/起点/test.py
@ -0,0 +1,24 @@
 # -*- encoding:utf-8 -*-
 '''
@Author : dingjiawen
@Date : 2024/03/27 14:49
@Usage : 
@Desc :尝试使用PyQuery爬取test.html
 '''
 from pyquery import PyQuery as pq
 with open("./test.html", 'r', encoding='utf-8') as f:
    html_content = f.read()
 doc = pq(html_content)
 title = doc('.title')[0]
 with open(f"./output/{title.text}.txt", 'w', encoding='utf-8') as file:
    for line in doc('.content p').items():
        file.write(line.text() + '\n')  # 写入每行，并在末尾添加换行符
		`@ -0,0 +1,2 @@`
							`const a = parseInt(Math.round((new Date).getTime() / 1e3).toString());`
							`console.log(a)`
		`@ -0,0 +1 @@`
							`const strings = ["\x68\x65\x6c\x6c\x6f", "\x77\x6f\x72\x6c\x64"];`