爬虫更新
This commit is contained in:
parent
408fcea992
commit
945a4c120c
|
|
@ -0,0 +1,38 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/15 13:23
|
||||
@Usage :
|
||||
@Desc : 付费的快代理
|
||||
'''
|
||||
|
||||
from proxypool.crawlers.base import BasePaidCrawler
|
||||
from proxypool.schemas.proxy import Proxy
|
||||
import json
|
||||
|
||||
BASE_URL = 'https://dps.kdlapi.com/api/getdps/?secret_id=oimi28znnx51x79f3r0d&num=10&signature=25zjft23etaeswom3ipa56bsyqnne347&pt=1&format=json&sep=1'
|
||||
|
||||
|
||||
class KuaidailiPaidCrawler(BasePaidCrawler):
|
||||
"""
|
||||
kuaidaili crawler, https://www.kuaidaili.com/
|
||||
"""
|
||||
urls = [BASE_URL]
|
||||
|
||||
def parse(self, response):
|
||||
"""
|
||||
parse html file to get proxies
|
||||
:return:
|
||||
"""
|
||||
response = json.loads(response)
|
||||
for proxy in response["data"]["proxy_list"]:
|
||||
ip = proxy.split(":")[0]
|
||||
port = proxy.split(":")[1]
|
||||
yield Proxy(host=ip, port=port)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
crawler = KuaidailiPaidCrawler()
|
||||
for proxy in crawler.crawl():
|
||||
print(proxy)
|
||||
Loading…
Reference in New Issue