import asyncio import aiohttp import aiofiles from bs4 import BeautifulSoup import csv import time # 替换为新浪财经股票列表API(示例) STOCK_LIST_API = "https://finance.sina.com.cn/stock/sl/stock_list.html" HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } async def fetch(session, url): """异步获取网页内容""" async with session.get(url, headers=HEADERS) as response: return await response.text() async def parse_stock_data(html): """解析股票数据(示例:仅提取名称和价格)""" soup = BeautifulSoup(html, "html.parser") stock_name = soup.select_one(".stock-name").text.strip() if soup.select_one(".stock-name") else "N/A" stock_price = soup.select_one(".price").text.strip() if soup.select_one(".price") else "N/A" return {"name": stock_name, "price": stock_price} async def save_to_csv(data, filename="stocks.csv"): """异步写入CSV""" async with aiofiles.open(filename, mode="a", encoding="utf-8", newline="") as f: writer = csv.writer(f) await writer.writerow([data["name"], data["price"]]) async def crawl_stock(stock_code, session): """爬取单只股票数据""" url = f"https://finance.sina.com.cn/realstock/company/{stock_code}/nc.shtml" try: html = await fetch(session, url) data = await parse_stock_data(html) await save_to_csv(data) print(f"爬取成功:{stock_code} - {data['name']}") except Exception as e: print(f"爬取失败:{stock_code} - {str(e)}") async def main(): """主协程:并发爬取多个股票""" stock_codes = ["sh600000", "sh601318", "sz000001"] # 示例股票代码(可扩展) # 使用uvloop加速(仅限Unix系统) try: import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) except ImportError: pass # 创建aiohttp会话 async with aiohttp.ClientSession() as session: tasks = [crawl_stock(code, session) for code in stock_codes] await asyncio.gather(*tasks) if __name__ == "__main__": start_time = time.time() asyncio.run(main()) print(f"爬取完成,耗时:{time.time() - start_time:.2f}秒")四、性能优化策略
若有收获,就点个赞吧