self_example/Spider/Chapter06_异步爬虫/asynchttp/aiohttpTest2.py

43 lines
942 B
Python

# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : 并发限制 防止一次太多爬崩网站 semaphore
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest
'''
import aiohttp
import asyncio
CONCURRENCY = 5
URL = 'https://www.baidu.com/'
semaphore = asyncio.Semaphore(CONCURRENCY)
session = None
async def scrape_api():
async with semaphore:
print('scraping', URL)
async with session.get(URL) as response:
# await asyncio.sleep(1)
return await response.text()
async def main():
global session
session = aiohttp.ClientSession()
scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
await asyncio.gather(*scrape_index_tasks)
await asyncio.wait(scrape_index_tasks)
await session.close()
if __name__ == '__main__':
# asyncio.run(main())
asyncio.get_event_loop().run_until_complete(main())