diff --git a/Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py b/Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py new file mode 100644 index 0000000..dce213e --- /dev/null +++ b/Spider/Chapter09_代理的使用/代理池的维护/ProxyPool/proxypool/crawlers/private/kuaidaili_paid.py @@ -0,0 +1,38 @@ +# -*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/12/15 13:23 +@Usage : +@Desc : 付费的快代理 +''' + +from proxypool.crawlers.base import BasePaidCrawler +from proxypool.schemas.proxy import Proxy +import json + +BASE_URL = 'https://dps.kdlapi.com/api/getdps/?secret_id=oimi28znnx51x79f3r0d&num=10&signature=25zjft23etaeswom3ipa56bsyqnne347&pt=1&format=json&sep=1' + + +class KuaidailiPaidCrawler(BasePaidCrawler): + """ + kuaidaili crawler, https://www.kuaidaili.com/ + """ + urls = [BASE_URL] + + def parse(self, response): + """ + parse html file to get proxies + :return: + """ + response = json.loads(response) + for proxy in response["data"]["proxy_list"]: + ip = proxy.split(":")[0] + port = proxy.split(":")[1] + yield Proxy(host=ip, port=port) + + +if __name__ == '__main__': + crawler = KuaidailiPaidCrawler() + for proxy in crawler.crawl(): + print(proxy)