import https from 'https';
interface EndpointConfig {
interface EndpointMetrics {
interface EndpointState extends EndpointConfig {
metrics: EndpointMetrics;
consecutiveFailures: number;
export class AdaptiveLLMBalancer {
private endpoints: EndpointState[];
private readonly HEALTH_CHECK_INTERVAL = 30000; // 30 seconds
private readonly LATENCY_THRESHOLD = 2000; // 2 seconds
private readonly ERROR_THRESHOLD = 0.1; // 10% error rate
constructor(configs: EndpointConfig[]) {
this.endpoints = configs.map(config => ({
metrics: { latency: 0, throughput: 0, errorRate: 0, lastUpdated: 0 },
// Start background health monitoring
this.startHealthMonitoring();
private startHealthMonitoring(): void {
this.endpoints.forEach(endpoint => {
this.performHealthCheck(endpoint);
}, this.HEALTH_CHECK_INTERVAL);
private async performHealthCheck(endpoint: EndpointState): Promise<void> {
const startTime = Date.now();
const response = await this.makeRequest(endpoint, {
messages: [{ role: 'user', content: 'ping' }],
const latency = Date.now() - startTime;
endpoint.metrics.latency = latency;
endpoint.metrics.lastUpdated = Date.now();
// Update health status based on latency
if (latency > this.LATENCY_THRESHOLD) {
console.warn(`High latency detected for ${endpoint.url}: ${latency}ms`);
if (endpoint.totalRequests > 0) {
endpoint.metrics.errorRate = endpoint.failedRequests / endpoint.totalRequests;
if (endpoint.metrics.errorRate > this.ERROR_THRESHOLD) {
endpoint.isHealthy = false;
console.warn(`High error rate for ${endpoint.url}: ${(endpoint.metrics.errorRate * 100).toFixed(2)}%`);
console.error(`Health check failed for ${endpoint.url}:`, error);
endpoint.isHealthy = false;
endpoint.consecutiveFailures++;
private makeRequest(endpoint: EndpointState, payload: any): Promise<any> {
return new Promise((resolve, reject) => {
const startTime = Date.now();
const data = JSON.stringify(payload);
const url = new URL(endpoint.url);
port: url.port || (url.protocol === 'https:' ? 443 : 80),
path: `${url.pathname}/v1/chat/completions`,
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(data),
'Authorization': `Bearer ${endpoint.apiKey}`
const protocol = url.protocol === 'https:' ? https : http;
const req = protocol.request(options, (res) => {
res.on('data', (chunk) => body += chunk);
const latency = Date.now() - startTime;
if (res.statusCode === 200) {
endpoint.totalRequests++;
endpoint.consecutiveFailures = 0;
endpoint.isHealthy = true;
// Update latency metric (EWMA)
const alpha = 0.3; // Exponential smoothing factor
endpoint.metrics.latency =
alpha * latency + (1 - alpha) * endpoint.metrics.latency;
resolve(JSON.parse(body));
reject(new Error(`Invalid JSON response: ${e}`));
} else if (res.statusCode === 429) {
const retryAfter = parseInt(res.headers['retry-after'] || '60');
endpoint.retryAfter = Date.now() + (retryAfter * 1000);
endpoint.failedRequests++;
endpoint.consecutiveFailures++;
reject(new Error(`Rate limited. Retry after ${retryAfter}s`));
endpoint.failedRequests++;
endpoint.consecutiveFailures++;
if (res.statusCode >= 500) {
endpoint.isHealthy = false;
reject(new Error(`HTTP ${res.statusCode}: ${body}`));
req.on('error', (error) => {
endpoint.failedRequests++;
endpoint.consecutiveFailures++;
req.on('timeout', () => {
endpoint.failedRequests++;
endpoint.consecutiveFailures++;
reject(new Error('Request timeout'));
public async routeRequest(payload: any, maxRetries: number = 3): Promise<any> {
// Filter available endpoints
const available = this.endpoints.filter(ep => {
if (!ep.isHealthy) return false;
if (ep.retryAfter && Date.now() < ep.retryAfter) return false;
if (available.length === 0) {
// Fallback to any endpoint
return this.makeRequest(this.endpoints[0], payload);
// Sort by adaptive score (priority * latency factor)
const scored = available.map(ep => {
const latencyFactor = Math.max(1, ep.metrics.latency / 1000);
const errorFactor = 1 + (ep.metrics.errorRate * 10);
const score = ep.priority * latencyFactor * errorFactor;
return { endpoint: ep, score };
}).sort((a, b) => a.score - b.score);
// Try endpoints in order of score
for (const { endpoint } of scored) {
return await this.makeRequest(endpoint, payload);
console.warn(`Request to ${endpoint.url} failed:`, error.message);
// Continue to next endpoint
throw new Error('All available endpoints failed');
public getMetrics(): any {
return this.endpoints.map(ep => ({
totalRequests: ep.totalRequests,
failedRequests: ep.failedRequests,
consecutiveFailures: ep.consecutiveFailures
const balancer = new AdaptiveLLMBalancer([
url: 'https://api.openai.com',
apiKey: process.env.OPENAI_API_KEY || 'sk-proj-...'
url: 'https://api.anthropic.com',
apiKey: process.env.ANTHROPIC_API_KEY || 'sk-ant-...'
messages: [{ role: 'user', content: 'Explain quantum computing' }]
balancer.routeRequest(payload)
.then(response => console.log('Success:', response))
.catch(error => console.error('Failed:', error.message));
// Periodic metrics logging
console.log('\n=== Endpoint Metrics ===');
console.log(JSON.stringify(balancer.getMetrics(), null, 2));