import { Anthropic } from '@anthropic-ai/sdk';
'claude-3-5-sonnet': { input: 3.0, output: 15.0 }, // per 1M tokens
'gpt-4o': { input: 5.0, output: 15.0 },
private client: Anthropic;
private circuitBreaker: CircuitBreaker;
private tokenBudget: TokenBudget;
constructor(private config: {
maxTokensPerRequest: number;
this.client = new Anthropic();
this.circuitBreaker = new CircuitBreaker();
this.tokenBudget = new TokenBudget(config.maxTokensPerRequest);
): Promise<{ success: boolean; cost: number; error?: string }> {
// Step 1: Pre-validation
const validation = await this.validate(prompt, context);
if (!validation.isValid) {
return { success: false, cost: 0, error: validation.reason };
const estimatedTokens = validation.estimatedTokens;
if (!this.tokenBudget.check(estimatedTokens)) {
error: `Token budget exceeded: ${this.tokenBudget.remaining} remaining`
// Step 3: Execute with intelligent retry
const result = await this.circuitBreaker.call(async () => {
return await this.intelligentRetry(async () => {
const response = await this.client.messages.create({
model: this.config.model,
{ role: 'user', content: prompt }
const inputTokens = response.usage.input_tokens;
const outputTokens = response.usage.output_tokens;
const pricing = PRICING[this.config.model as keyof typeof PRICING];
(inputTokens / 1_000_000) * pricing.input +
(outputTokens / 1_000_000) * pricing.output
this.tokenBudget.spend(inputTokens + outputTokens);
content: response.content,
}, this.config.maxRetries);
return { success: true, cost: result.cost };
// Log failed call cost (still burned tokens)
const failedCost = this.calculateFailedCallCost(error);
console.error(`Failed call cost: ${failedCost.toFixed(4)}`, {
private async validate(prompt: string, context: string[]) {
const estimatedTokens = Math.ceil(prompt.length / 4) +
context.reduce((sum, c) => sum + Math.ceil(c.length / 4), 0);
if (estimatedTokens > 200000) {
reason: `Context exceeds 200K token limit (estimated: ${estimatedTokens})`
if (this.containsMaliciousPattern(prompt)) {
reason: "Content violates safety policies"
return { isValid: true, estimatedTokens, reason: "" };
private containsMaliciousPattern(input: string): boolean {
/(?i)jailbreak|system prompt override/,
/(?i)ignore previous instructions/,
return patterns.some(p => p.test(input));
private async intelligentRetry<T>(
operation: () => Promise<T>,
const retryableErrors = [
const nonRetryableErrors = [
'content_policy_violation',
'context_length_exceeded',
for (let attempt = 1; attempt <= maxRetries; attempt++) {
return await operation();
const errorType = error?.error?.type || 'unknown';
// Fail fast on non-retryable errors
if (nonRetryableErrors.includes(errorType)) {
// Log but don't retry unknown errors
if (!retryableErrors.includes(errorType)) {
console.warn(`Unknown error type: ${errorType}`);
// Exponential backoff with jitter
const baseDelay = Math.pow(2, attempt) * 1000;
const jitter = Math.random() * 1000;
const delay = Math.min(baseDelay + jitter, 10000);
console.log(`Retry ${attempt}/${maxRetries} after ${delay}ms delay`);
await new Promise(resolve => setTimeout(resolve, delay));
throw new Error(`Max retries (${maxRetries}) exceeded`);
private calculateFailedCallCost(error: any): number {
// Failed calls still consume tokens before failing
// Estimate based on typical failure patterns
const pricing = PRICING[this.config.model as keyof typeof PRICING];
const errorType = error?.error?.type || 'unknown';
let estimatedInputTokens: number;
let estimatedOutputTokens: number;
case 'context_length_exceeded':
estimatedInputTokens = 150000; // Full context processed
estimatedOutputTokens = 0;
case 'content_policy_violation':
estimatedInputTokens = 500; // Initial check
estimatedOutputTokens = 200; // Partial generation
case 'rate_limit_exceeded':
estimatedInputTokens = 100; // Minimal processing
estimatedOutputTokens = 0;
estimatedInputTokens = 1000; // Typical partial processing
estimatedOutputTokens = 500;
(estimatedInputTokens / 1_000_000) * pricing.input +
(estimatedOutputTokens / 1_000_000) * pricing.output
const guard = new LLMCostGuard({
model: 'claude-3-5-sonnet',
maxTokensPerRequest: 50000,
const result = await guard.generate(
"Analyze this customer feedback and provide sentiment analysis",
["Previous conversation context..."]
console.log(`Success! Cost: ${result.cost.toFixed(4)}`);
console.error(`Failed: ${result.error} | Cost burned: ${result.cost.toFixed(4)}`);