self_example/Spider/Chapter06_异步爬虫/asynchttp/aiohttpTest2.py

# -*- encoding:utf-8 -*-

'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : 并发限制 防止一次太多爬崩网站 semaphore
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest
'''

import aiohttp
import asyncio

CONCURRENCY = 5
URL = 'https://www.baidu.com/'

semaphore = asyncio.Semaphore(CONCURRENCY)
session = None


async def scrape_api():
    async with semaphore:
        print('scraping', URL)
        async with session.get(URL) as response:
            # await asyncio.sleep(1)
            return await response.text()


async def main():
    global session

    session = aiohttp.ClientSession()
    scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
    await asyncio.gather(*scrape_index_tasks)
    await asyncio.wait(scrape_index_tasks)
    await session.close()


if __name__ == '__main__':
    # asyncio.run(main())
    asyncio.get_event_loop().run_until_complete(main())