Java 调用 ChatGPT API 实战：SDK 选型与生产环境避坑指南

Java 调用 ChatGPT API 实战：SDK 选型与生产环境避坑指南 | 极客日志

@Configuration
public class OpenAIConfig {
    @Bean
    public WebClient openAIWebClient() {
        ConnectionProvider connectionProvider = ConnectionProvider.builder("openai-pool")
            .maxConnections(100)
            .pendingAcquireTimeout(Duration.ofSeconds(30))
            .maxIdleTime(Duration.ofMinutes(5))
            .build();
        HttpClient httpClient = HttpClient.create(connectionProvider)
            .responseTimeout(Duration.ofSeconds(60))
            .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 30000);
        return WebClient.builder()
            .baseUrl("https://api.openai.com/v1")
            .clientConnector(new ReactorClientHttpConnector(httpClient))
            .defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
            .defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + getApiKey())
            .build();
    }

    private String getApiKey() {
        // 从安全存储获取 API Key，不要硬编码
        return System.getenv("OPENAI_API_KEY");
    }
}

public class OpenAIService {
    private final WebClient webClient;
    private final Retry retry;

    public OpenAIService(WebClient webClient) {
        this.webClient = webClient;
        // 配置重试策略：最多重试 3 次，使用指数退避
        this.retry = Retry.backoff(3, Duration.ofSeconds(1))
            .maxBackoff(Duration.ofSeconds(10))
            .jitter(0.5) // 添加随机抖动，避免多个客户端同时重试
            .filter(this::shouldRetry)
            .onRetryExhaustedThrow((retryBackoffSpec, retrySignal) -> {
                throw new ServiceUnavailableException("OpenAI 服务暂时不可用");
            });
    }

    private boolean shouldRetry(Throwable throwable) {
        // 只对网络错误和 429（限流）进行重试
        if (throwable instanceof WebClientResponseException) {
            WebClientResponseException ex = (WebClientResponseException) throwable;
            return ex.getStatusCode() == HttpStatus.TOO_MANY_REQUESTS || ex.getStatusCode().is5xxServerError();
        }
        return throwable instanceof IOException;
    }

    public Mono<String> chatCompletion(ChatRequest request) {
        return webClient.post()
            .uri("/chat/completions")
            .bodyValue(request)
            .retrieve()
            .bodyToMono(String.class)
            .retryWhen(retry)
            .timeout(Duration.ofSeconds(30));
    }
}

public Flux<String> streamChatCompletion(ChatRequest request) {
    return webClient.post()
        .uri("/chat/completions")
        .bodyValue(request.toBuilder()
            .stream(true) // 启用流式响应
            .build())
        .accept(MediaType.TEXT_EVENT_STREAM) // 接受 SSE
        .retrieve()
        .bodyToFlux(String.class)
        .map(this::parseSSEEvent)
        .filter(Objects::nonNull)
        .map(this::extractContent)
        .doOnError(this::handleStreamError);
}

private String parseSSEEvent(String event) {
    // SSE 格式：data: {"choices":[{"delta":{"content":"Hello"}}]}
    if (event.startsWith("data: ")) {
        String json = event.substring(6).trim();
        if ("[DONE]".equals(json)) {
            return null; // 流结束
        }
        return json;
    }
    return null;
}

private String extractContent(String json) {
    try {
        JsonNode node = objectMapper.readTree(json);
        JsonNode choices = node.path("choices");
        if (choices.isArray() && choices.size() > 0) {
            JsonNode delta = choices.get(0).path("delta");
            return delta.path("content").asText("");
        }
    } catch (JsonProcessingException e) {
        log.warn("Failed to parse SSE JSON: {}", json, e);
    }
    return "";
}

@Component
public class OpenAIMetrics {
    private final MeterRegistry meterRegistry;
    private final Timer apiCallTimer;
    private final Counter errorCounter;

    public OpenAIMetrics(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
        this.apiCallTimer = Timer.builder("openai.api.call.duration")
            .description("OpenAI API 调用耗时")
            .tag("service", "chatgpt")
            .register(meterRegistry);
        this.errorCounter = Counter.builder("openai.api.errors")
            .description("OpenAI API 调用错误次数")
            .tag("service", "chatgpt")
            .register(meterRegistry);
    }

    public <T> Mono<T> monitor(Mono<T> apiCall, String endpoint) {
        return Mono.defer(() -> {
            long start = System.nanoTime();
            return apiCall
                .doOnSuccess(response -> apiCallTimer.record(System.nanoTime() - start, TimeUnit.NANOSECONDS))
                .doOnError(error -> {
                    errorCounter.increment();
                    apiCallTimer.record(System.nanoTime() - start, TimeUnit.NANOSECONDS);
                });
        });
    }
}

@Component
public class ApiKeyManager {
    private final VaultTemplate vaultTemplate;
    private volatile String cachedApiKey;
    private volatile Instant lastRefreshTime;

    public String getApiKey() {
        // 每 5 分钟刷新一次缓存
        if (cachedApiKey == null || lastRefreshTime == null || Duration.between(lastRefreshTime, Instant.now()).toMinutes() > 5) {
            refreshApiKey();
        }
        return cachedApiKey;
    }

    private synchronized void refreshApiKey() {
        VaultResponse response = vaultTemplate.read("secret/data/openai/api-key");
        cachedApiKey = response.getData().get("key").toString();
        lastRefreshTime = Instant.now();
    }
}

@Configuration
public class CircuitBreakerConfig {
    @Bean
    public CircuitBreaker openAICircuitBreaker() {
        CircuitBreakerConfig config = CircuitBreakerConfig.custom()
            .failureRateThreshold(50) // 失败率阈值 50%
            .waitDurationInOpenState(Duration.ofSeconds(30)) // 半开状态等待时间
            .slidingWindowType(SlidingWindowType.COUNT_BASED)
            .slidingWindowSize(10) // 最近 10 次调用
            .minimumNumberOfCalls(5) // 最少 5 次调用才开始计算
            .permittedNumberOfCallsInHalfOpenState(3) // 半开状态允许的调用数
            .recordExceptions(IOException.class, TimeoutException.class)
            .ignoreExceptions(BusinessException.class) // 业务异常不触发熔断
            .build();
        return CircuitBreaker.of("openai", config);
    }

    @Bean
    public Bulkhead openAIBulkhead() {
        BulkheadConfig config = BulkheadConfig.custom()
            .maxConcurrentCalls(20) // 最大并发调用数
            .maxWaitDuration(Duration.ofSeconds(1)) // 等待超时时间
            .build();
        return Bulkhead.of("openai", config);
    }
}

<!-- logback-spring.xml 配置 -->
<appender name="ASYNC">
    <queueSize>1024</queueSize>
    <discardingThreshold>0</discardingThreshold>
    <includeCallerData>true</includeCallerData>
    <appender-ref ref="FILE"/>
</appender>

log.info("OpenAI API 调用完成", kv("endpoint", "/chat/completions"), kv("duration_ms", duration), kv("tokens_used", tokens), kv("success", true));

@JsonInclude(JsonInclude.Include.NON_NULL)
public class ChatRequest {
    @JsonProperty("model")
    private String model = "gpt-3.5-turbo";
    @JsonProperty("messages")
    private List<ChatMessage> messages;
    @JsonProperty("temperature")
    private Double temperature = 0.7;
    @JsonProperty("stream")
    private Boolean stream = false;
    // 特殊字段：function_call
    @JsonProperty("function_call")
    private Object functionCall;
    // 特殊字段：logit_bias
    @JsonProperty("logit_bias")
    private Map<Integer, Integer> logitBias;
    // 使用@JsonAnyGetter 处理未知字段
    @JsonIgnore
    private Map<String, Object> additionalProperties = new HashMap<>();

    @JsonAnyGetter
    public Map<String, Object> getAdditionalProperties() {
        return additionalProperties;
    }

    @JsonAnySetter
    public void setAdditionalProperty(String name, Object value) {
        additionalProperties.put(name, value);
    }
}

public Mono<String> handleRateLimit(Mono<String> apiCall) {
    return apiCall.onErrorResume(WebClientResponseException.class, ex -> {
        if (ex.getStatusCode() == HttpStatus.TOO_MANY_REQUESTS) {
            // 从响应头获取重试时间
            String retryAfter = ex.getHeaders().getFirst("Retry-After");
            Duration waitTime = retryAfter != null ? Duration.ofSeconds(Long.parseLong(retryAfter)) : Duration.ofSeconds(1);
            log.warn("被限流，等待 {} 秒后重试", waitTime.getSeconds());
            // 使用指数退避等待
            return Mono.delay(waitTime)
                .then(Mono.defer(() -> apiCall));
        }
        return Mono.error(ex);
    });
}

public Flux<String> readStreamResponse(ClientResponse response) {
    return response.bodyToFlux(DataBuffer.class)
        .map(dataBuffer -> {
            // 显式指定 UTF-8 编码
            CharBuffer charBuffer = StandardCharsets.UTF_8.decode(dataBuffer.asByteBuffer());
            dataBuffer.readPosition(dataBuffer.readableByteCount());
            return charBuffer.toString();
        })
        .filter(text -> !text.isEmpty())
        .doFinally(signal -> {
            // 确保资源释放
            if (response != null) {
                response.releaseBody();
            }
        });
}

Java 调用 ChatGPT API 实战：SDK 选型与生产环境避坑指南

1. 背景痛点：Java 调用 ChatGPT API 的常见问题

2. 技术选型：主流 HTTP 客户端的对比

3. 核心实现：基于 WebClient 的完整方案

3.1 基础配置

3.2 带指数退避的自动重试机制

3.3 SSE 流式响应处理

3.4 API 调用监控

4. 生产环境考量

4.1 令牌管理的安全实践

4.2 基于 Resilience4j 的熔断配置

4.3 异步日志记录

5. 避坑指南

5.1 JSON 序列化问题

5.2 处理 HTTP 429 状态码

5.3 流式响应中的 UTF-8 编码陷阱

总结与思考

更多推荐文章

相关免费在线工具

Java 调用 ChatGPT API 实战：SDK 选型与生产环境避坑指南

1. 背景痛点：Java 调用 ChatGPT API 的常见问题

2. 技术选型：主流 HTTP 客户端的对比

3. 核心实现：基于 WebClient 的完整方案

3.1 基础配置

3.2 带指数退避的自动重试机制

3.3 SSE 流式响应处理

3.4 API 调用监控

4. 生产环境考量

4.1 令牌管理的安全实践

4.2 基于 Resilience4j 的熔断配置

4.3 异步日志记录

5. 避坑指南

5.1 JSON 序列化问题

5.2 处理 HTTP 429 状态码

5.3 流式响应中的 UTF-8 编码陷阱

总结与思考

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具