AI 股票分析系统搭建：Python 爬虫数据采集实战

AI 股票分析系统搭建：Python 爬虫数据采集实战 | 极客日志

# 克隆项目代码到你的工作空间
git clone https://github.com/ZhuLinsen/daily_stock_analysis.git
# 进入项目文件夹
cd daily_stock_analysis

pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

pip install beautifulsoup4 lxml

import requests
from bs4 import BeautifulSoup
import time

def get_stock_price(stock_code):
    """
    从新浪财经获取 A 股股票实时价格
    :param stock_code: 股票代码，例如 'sh600519' (上海) 或 'sz000001' (深圳)
    :return: 股票名称和当前价格
    """
    # 构建新浪财经股票页面的 URL
    url = f'https://hq.sinajs.cn/list={stock_code}'
    # 设置请求头，模拟浏览器访问
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Referer': 'https://finance.sina.com.cn'
    }
    try:
        # 发送网络请求
        response = requests.get(url, headers=headers)
        response.encoding = 'gbk' # 新浪财经使用 gbk 编码
        if response.status_code == 200: # 解析返回的数据（格式类似：var hq_str_sh600519="茅台，1850.50,..."；）
            data_str = response.text
            # 提取引号内的数据部分
            data_content = data_str.split('"')[1]
            data_list = data_content.split(',')
            if len(data_list) > 1:
                stock_name = data_list[0]
                current_price = data_list[3] # 当前价格通常在第四个位置
                print(f"股票 {stock_name}({stock_code}) 当前价格：{current_price} 元")
                return stock_name, current_price
            else:
                print("解析数据失败")
                return None, None
        else:
            print(f"请求失败，状态码：{response.status_code}")
            return None, None
    except Exception as e:
        print(f"抓取过程中出现错误：{e}")
        return None, None

# 试试抓取贵州茅台的股价
if __name__ == "__main__":
    # 贵州茅台在上海证券交易所的代码是 sh600519
    name, price = get_stock_price('sh600519')
    if price:
        print(f"成功获取到 {name} 的价格：{price}")

sh600519 sz000858 sz300750

def batch_get_prices(stock_file='my_stocks.txt'):
    """批量从文件读取股票代码并获取价格"""
    stock_data = []
    try:
        with open(stock_file, 'r', encoding='utf-8') as f:
            stock_codes = [line.strip() for line in f if line.strip()]
            print(f"开始批量获取 {len(stock_codes)} 只股票数据...")
            for code in stock_codes:
                name, price = get_stock_price(code)
                if name and price:
                    stock_data.append({
                        'code': code,
                        'name': name,
                        'price': price
                    })
                    # 礼貌一点，每次请求间隔 1 秒，避免把别人服务器搞崩
                    time.sleep(1)
            print("\n=== 自选股行情汇总 ===")
            for item in stock_data:
                print(f"{item['name']}({item['code']}): {item['price']}")
            return stock_data
    except FileNotFoundError:
        print(f"文件 {stock_file} 不存在")
        return []

# 运行批量抓取
if __name__ == "__main__":
    my_portfolio = batch_get_prices()

# 每天上午 9 点 30 分（开盘后）和下午 3 点（收盘后）各运行一次爬虫脚本
30 9 * * 1-5 /usr/bin/python3 /你的路径/stock_spider.py >> /你的路径/stock.log 2>&1
0 15 * * 1-5 /usr/bin/python3 /你的路径/stock_spider.py >> /你的路径/stock.log 2>&1

def get_stock_price_enhanced(stock_code):
    url = f'https://hq.sinajs.cn/list={stock_code}'
    headers = {'User-Agent': 'Mozilla/5.0 ...'}
    try:
        response = requests.get(url, headers=headers, timeout=10) # 设置 10 秒超时
        response.encoding = 'gbk'
        if response.status_code != 200:
            print(f"警告：请求{stock_code}失败，状态码{response.status_code}")
            # 这里可以尝试重试，或者记录到错误日志
            return None, None, None # 多返回一个状态
        data_str = response.text
        if 'hq_str' not in data_str:
            print(f"警告：{stock_code}返回的数据格式异常")
            return None, None, '格式错误'
        # ... 原有的解析逻辑 ...
        # 假设我们成功解析出了价格
        price_float = float(current_price)
        # 数据合理性校验：股价通常不会低于 0.01 元或高于 10 万元
        if price_float < 0.01 or price_float > 100000:
            print(f"警告：{stock_code}价格{price_float}异常，可能数据有误")
            return stock_name, current_price, '价格异常'
        return stock_name, current_price, '成功'
    except requests.exceptions.Timeout:
        print(f"错误：获取{stock_code}超时")
        return None, None, '超时'
    except ValueError as e:
        print(f"错误：解析{stock_code}价格时出错：{e}")
        return None, None, '解析错误'
    except Exception as e:
        print(f"未知错误：{e}")
        return None, None, '未知错误'

[
  {
    "symbol": "600519.SH",
    "name": "贵州茅台",
    "current_price": 1850.50,
    "change": "0.85%",
    "update_time": "2024-01-01 15:00:00"
  }
]

import json
from datetime import datetime

def save_to_json(data_list, filename='stock_data.json'):
    """将股票数据保存为 JSON 格式"""
    output_data = []
    for item in data_list:
        output_data.append({
            "symbol": item['code'].replace('sh', '').replace('sz', '') + '.SH' if 'sh' in item['code'] else '.SZ',
            "name": item['name'],
            "current_price": float(item['price']),
            "update_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        })
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, ensure_ascii=False, indent=2)
    print(f"数据已保存至 {filename}")

cp .env.example .env

# 1. 设置你的自选股列表 (用逗号分隔)
STOCK_LIST=600519,000858,300750
# 2. 设置 AI 模型 (二选一)
# 如果你有 Google Gemini 的 API Key（免费申请）
GEMINI_API_KEY=your_gemini_api_key_here
# 或者，如果你用 DeepSeek、通义千问等兼容 OpenAI 的模型
OPENAI_API_KEY=your_openai_api_key
OPENAI_BASE_URL=https://api.deepseek.com/v1
OPENAI_MODEL=deepseek-chat
# 3. 设置通知方式 (比如企业微信)
WECHAT_WEBHOOK_URL=your_wechat_webhook_url

import json
import os
from datetime import datetime, timedelta

def load_custom_stock_data(json_file='stock_data.json'):
    """从本地 JSON 文件加载爬虫获取的股票数据"""
    if not os.path.exists(json_file):
        print(f"自定义数据文件 {json_file} 不存在，将使用默认数据源。")
        return None
    try:
        with open(json_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
            # 检查数据是否新鲜（比如是今天的数据）
            if data:
                latest_time = data[0].get('update_time')
                # 这里可以添加时间有效性校验逻辑
                # ...
                print(f"成功从自定义文件加载 {len(data)} 条股票数据。")
                return data
            else:
                return None
    except Exception as e:
        print(f"加载自定义数据失败：{e}")
        return None

# 然后，在项目主逻辑中，可以这样调用：
# custom_data = load_custom_stock_data()
# if custom_data:
#     # 使用自定义数据
#     process_data(custom_data)
# else:
#     # 回退到默认数据源
#     default_data = get_default_data()

python main.py

[AI 决策仪表盘] 🟢 买入 | 贵州茅台 (600519) 缩量回踩 MA5 支撑，乖离率 1.2% 处于最佳买点
💰 建议：买入 1800 | 止损 1750 | 目标 1900
多头排列 乖离安全 量能配合
--- 生成时间：18:00

AI 股票分析系统搭建：Python 爬虫数据采集实战

AI 股票分析系统搭建：Python 爬虫数据采集实战

1. 准备工作：认识你的 AI 分析师

2. 环境搭建：在云端环境安家

2.1 创建你的工作空间

2.2 把代码'搬'到云端

2.3 安装必需的'零件'

3. 核心实战：用 Python 爬虫抓取股票数据

3.1 爬虫初体验：抓取单只股票实时行情

3.2 进阶任务：批量抓取你的自选股列表

3.3 让数据'活'起来：定时自动运行

4. 数据清洗与整合：喂给 AI'干净'的粮食

4.1 清洗数据：处理异常和缺失

4.2 整合数据：生成 AI 喜欢的格式

5. 连接与测试：让你的爬虫为 AI 服务

5.1 配置 AI 分析系统

5.2 修改数据源指向我们的爬虫

5.3 运行与验收

更多推荐文章

相关免费在线工具

AI 股票分析系统搭建：Python 爬虫数据采集实战

AI 股票分析系统搭建：Python 爬虫数据采集实战

1. 准备工作：认识你的 AI 分析师

2. 环境搭建：在云端环境安家

2.1 创建你的工作空间

2.2 把代码'搬'到云端

2.3 安装必需的'零件'

3. 核心实战：用 Python 爬虫抓取股票数据

3.1 爬虫初体验：抓取单只股票实时行情

3.2 进阶任务：批量抓取你的自选股列表

3.3 让数据'活'起来：定时自动运行

4. 数据清洗与整合：喂给 AI'干净'的粮食

4.1 清洗数据：处理异常和缺失

4.2 整合数据：生成 AI 喜欢的格式

5. 连接与测试：让你的爬虫为 AI 服务

5.1 配置 AI 分析系统

5.2 修改数据源指向我们的爬虫

5.3 运行与验收

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具