下面是一个使用concurrent.futures.ThreadPoolExecutor
读取多个URL的示例.但这只是一种方法,你可以使用multiprocessing
、asyncio
/aiohttp
等.
from concurrent.futures import ThreadPoolExecutor
import requests
def get_from_api(tpl):
session, url = tpl
resp = session.get(url, stream=True)
# just for example:
count_lines = 0
for line in resp.iter_lines():
count_lines += 1
return url, count_lines
def main():
api_urls = [
"https://google.com",
"https://yahoo.com",
"https://facebook.com",
"https://instagram.com",
# ...etc.
]
with ThreadPoolExecutor(max_workers=2) as pool, requests.session() as session:
for url, count_lines in pool.map(
get_from_api, ((session, url) for url in api_urls)
):
print(url, count_lines)
if __name__ == "__main__":
main()
打印:
https://google.com 17
https://yahoo.com 648
https://facebook.com 26
https://instagram.com 50
编辑:使用asyncio
/aiohttp
:
import asyncio
# Streaming API:
# https://docs.aiohttp.org/en/stable/streams.html#streaming-api
import aiohttp
async def fetch(session, url):
while True:
async with session.get(url) as response:
reader = response.content
cnt = 0
async for line in reader:
cnt += 1
print(f"{url}: {cnt} lines read")
await asyncio.sleep(3)
async def main():
urls = [
"https://google.com", # Replace with actual URLs
"https://facebook.com",
]
async with aiohttp.ClientSession() as session:
tasks = {asyncio.create_task(fetch(session, url)) for url in urls}
# this loops indifinitely:
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(main())
打印:
https://google.com: 17 lines read
https://facebook.com: 26 lines read
https://google.com: 655 lines read
https://facebook.com: 26 lines read
...