import os
from openai import OpenAI
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role":"user","content":"解释量子计算,简短点。"}]
)
print(response.choices[0].message.content)
输出示例:量子计算利用量子比特进行并行计算,能解决经典计算机难题,如因子分解。
2.2 Anthropic (Claude) 示例
import os
from anthropic import Anthropic
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=100,
messages=[{"role":"user","content":"写一首关于 AI 的短诗。"}]
)
print(response.content[0].text)
2.3 Groq 示例
import os
from groq import Groq
client = Groq(api_key=os.environ["GROQ_API_KEY"])
response = client.chat.completions.create(
model="llama3-70b-8192",
messages=[{"role":"user","content":"Python 如何处理异常?"}]
)
print(response.choices[0].message.content)
2.4 Hugging Face Inference API 示例
import os
from huggingface_hub import InferenceClient
client = InferenceClient(token=os.environ["HF_TOKEN"])
response = client.text_generation(
"meta-llama/Llama-3.2-3B-Instruct",
"描述 Hugging Face 的作用。",
max_tokens=50
)
print(response)
通用提示:用 try-except 处理错误(如限速、密钥无效)。例如 except Exception as e: print(e)。
3. 高级技巧
3.1 流式响应(Streaming)
实时输出(如聊天 App)。
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role":"user","content":"讲个笑话。"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
from litellm import completion
import os
os.environ["OPENAI_API_KEY"]="..."
os.environ["ANTHROPIC_API_KEY"]="..."# 调用 OpenAI
response = completion(model="gpt-4o-mini", messages=[{"role":"user","content":"Hi!"}])
# 切换到 Claude
response = completion(model="claude-3-5-sonnet-20241022", messages=[{"role":"user","content":"Hi!"}])
print(response.choices[0].message.content)
优势:成本跟踪、负载均衡、日志。
5. 集成框架:LangChain(链式/代理开发)
构建复杂应用。
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
llm = ChatGroq(model="llama3-70b-8192", api_key=os.environ["GROQ_API_KEY"])
prompt = PromptTemplate(input_variables=["topic"], template="解释 {topic}。")
chain = LLMChain(llm=llm, prompt=prompt)
print(chain.run("区块链"))
6. 性能优化与最佳实践
优化点
做法示例
收益参考
限速处理
用 time.sleep() 或 retry 库
避免封禁
成本控制
计算 tokens(tiktoken 库),用 LiteLLM 跟踪
节省 20-50%
错误重试
pip install tenacity;@retry 装饰器
鲁棒性 ↑
监控/日志
用 Langfuse 集成(e.g., Groq)。
追踪使用
安全
环境变量存密钥;输入过滤防注入
—
批量处理
batch API(OpenAI 支持)
5-10x 快
常见错误:401(密钥错)、429(限速)、超时(加 timeout=30)。
本地运行:用 Ollama/Hugging Face Transformers 跑开源模型(无 API 费)。例如 from transformers import pipeline; pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-3B")。
7. 完整项目实战:Streamlit 聊天机器人
用 Groq 构建 Web UI。
app.py:
import streamlit as st
from groq import Groq
import os
client = Groq(api_key=os.environ["GROQ_API_KEY"])
st.title("Groq 聊天机器人")
if"messages"notin st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
prompt = st.chat_input("说点什么?")
if prompt:
st.session_state.messages.append({"role":"user","content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
stream = client.chat.completions.create(
model="mixtral-8x7b-32768",
messages=[{"role": m["role"],"content": m["content"]} for m in st.session_state.messages],
stream=True
)
response = st.write_stream([chunk.choices[0].delta.content or""for chunk in stream])
st.session_state.messages.append({"role":"assistant","content":"".join(response)})