From 945a4c120c1eb3810153f5c28c3666af2b359d21 Mon Sep 17 00:00:00 2001 From: kevinding1125 <745518019@qq.com> Date: Fri, 15 Dec 2023 17:45:32 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E8=99=AB=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../proxypool/crawlers/private/kuaidaili_paid.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py diff --git a/Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py b/Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py new file mode 100644 index 0000000..dce213e --- /dev/null +++ b/Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py @@ -0,0 +1,38 @@ +# -*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/12/15 13:23 +@Usage : +@Desc : 付费的快代理 +''' + +from proxypool.crawlers.base import BasePaidCrawler +from proxypool.schemas.proxy import Proxy +import json + +BASE_URL = 'https://dps.kdlapi.com/api/getdps/?secret_id=oimi28znnx51x79f3r0d&num=10&signature=25zjft23etaeswom3ipa56bsyqnne347&pt=1&format=json&sep=1' + + +class KuaidailiPaidCrawler(BasePaidCrawler): + """ + kuaidaili crawler, https://www.kuaidaili.com/ + """ + urls = [BASE_URL] + + def parse(self, response): + """ + parse html file to get proxies + :return: + """ + response = json.loads(response) + for proxy in response["data"]["proxy_list"]: + ip = proxy.split(":")[0] + port = proxy.split(":")[1] + yield Proxy(host=ip, port=port) + + +if __name__ == '__main__': + crawler = KuaidailiPaidCrawler() + for proxy in crawler.crawl(): + print(proxy)