43 lines
942 B
Python
43 lines
942 B
Python
# -*- encoding:utf-8 -*-
|
|
|
|
'''
|
|
@Author : dingjiawen
|
|
@Date : 2023/12/6 16:57
|
|
@Usage : 并发限制 防止一次太多爬崩网站 semaphore
|
|
@Desc :
|
|
@参考:https://github.dev/Python3WebSpider/AsyncTest
|
|
'''
|
|
|
|
import aiohttp
|
|
import asyncio
|
|
|
|
CONCURRENCY = 5
|
|
URL = 'https://www.baidu.com/'
|
|
|
|
semaphore = asyncio.Semaphore(CONCURRENCY)
|
|
session = None
|
|
|
|
|
|
async def scrape_api():
|
|
async with semaphore:
|
|
print('scraping', URL)
|
|
async with session.get(URL) as response:
|
|
# await asyncio.sleep(1)
|
|
return await response.text()
|
|
|
|
|
|
async def main():
|
|
global session
|
|
|
|
session = aiohttp.ClientSession()
|
|
scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
|
|
await asyncio.gather(*scrape_index_tasks)
|
|
await asyncio.wait(scrape_index_tasks)
|
|
await session.close()
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# asyncio.run(main())
|
|
asyncio.get_event_loop().run_until_complete(main())
|