爬虫更新
This commit is contained in:
parent
408fcea992
commit
945a4c120c
|
|
@ -0,0 +1,38 @@
|
||||||
|
# -*- encoding:utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
@Author : dingjiawen
|
||||||
|
@Date : 2023/12/15 13:23
|
||||||
|
@Usage :
|
||||||
|
@Desc : 付费的快代理
|
||||||
|
'''
|
||||||
|
|
||||||
|
from proxypool.crawlers.base import BasePaidCrawler
|
||||||
|
from proxypool.schemas.proxy import Proxy
|
||||||
|
import json
|
||||||
|
|
||||||
|
BASE_URL = 'https://dps.kdlapi.com/api/getdps/?secret_id=oimi28znnx51x79f3r0d&num=10&signature=25zjft23etaeswom3ipa56bsyqnne347&pt=1&format=json&sep=1'
|
||||||
|
|
||||||
|
|
||||||
|
class KuaidailiPaidCrawler(BasePaidCrawler):
|
||||||
|
"""
|
||||||
|
kuaidaili crawler, https://www.kuaidaili.com/
|
||||||
|
"""
|
||||||
|
urls = [BASE_URL]
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
"""
|
||||||
|
parse html file to get proxies
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
response = json.loads(response)
|
||||||
|
for proxy in response["data"]["proxy_list"]:
|
||||||
|
ip = proxy.split(":")[0]
|
||||||
|
port = proxy.split(":")[1]
|
||||||
|
yield Proxy(host=ip, port=port)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
crawler = KuaidailiPaidCrawler()
|
||||||
|
for proxy in crawler.crawl():
|
||||||
|
print(proxy)
|
||||||
Loading…
Reference in New Issue