from typing import List, Dict, Any, Optional
class CostOptimizedPromptEngineer:
Production-ready prompt engineering for cost reduction.
Combines: concise instructions, conditional CoT, caching, and template standardization.
# Tasks that need reasoning
REASONING_TASKS = ["calculate", "analyze", "compare", "determine", "solve", "debug", "plan"]
def __init__(self, model: str = "gpt-4.1-mini"):
self.encoding = tiktoken.encoding_for_model(model)
self.client = openai.OpenAI()
def count_tokens(self, text: str) -> int:
"""Accurate token counting using tiktoken."""
return len(self.encoding.encode(text))
def needs_reasoning(self, task: str) -> bool:
"""Determine if task requires CoT."""
task_lower = task.lower()
return any(rt in task_lower for rt in self.REASONING_TASKS)
def build_optimized_prompt(
context: Optional[str] = None,
examples: List[Dict[str, str]] = None,
Build cost-optimized prompt following OpenAI best practices.
Structure: Objective → Context → Format → Examples → Instructions
parts = [f"Task: {task}", ""]
# Context (only if needed)
parts.extend(["Context:", context, ""])
parts.extend(["Output Format:", "Direct answer without reasoning", ""])
# Examples (limited for cost)
parts.append("Examples:")
for i, ex in enumerate(examples[:max_examples]):
f"Output: {ex['output']}",
parts.append("Instructions:")
parts.append("- Be concise and direct")
parts.append("- Use only provided context")
parts.append("- No explanatory text")
if self.needs_reasoning(task):
parts.append("- Think briefly before answering")
parts.append("- Answer directly without reasoning")
def optimize_existing_prompt(self, prompt: str) -> str:
"""Remove fluff from existing prompts."""
fluff_words = ["please", "kindly", "could you", "I would like", "it would be great if"]
optimized = optimized.replace(word, "")
# Remove extra whitespace
return "\n".join(line.strip() for line in optimized.split("\n") if line.strip())
def estimate_cost_savings(
estimated_output_tokens: int = 50
"""Calculate cost savings (GPT-4.1-mini pricing)."""
input_cost = 0.40 # per 1M tokens
output_cost = 1.60 # per 1M tokens
orig_input = self.count_tokens(original_prompt)
opt_input = self.count_tokens(optimized_prompt)
# Estimate: simple tasks need ~20 output tokens, complex need ~100
needs_cot = self.needs_reasoning(original_prompt)
output_tokens = 100 if needs_cot else estimated_output_tokens
orig_cost = (orig_input * input_cost + output_tokens * output_cost) / 1_000_000
opt_cost = (opt_input * input_cost + output_tokens * output_cost) / 1_000_000
"original_tokens": orig_input,
"optimized_tokens": opt_input,
"token_reduction": orig_input - opt_input,
"reduction_percent": ((orig_input - opt_input) / orig_input * 100) if orig_input > 0 else 0,
"original_cost_per_1k": orig_cost * 1000,
"optimized_cost_per_1k": opt_cost * 1000,
"savings_per_1k": (orig_cost - opt_cost) * 1000,
"savings_percent": ((orig_cost - opt_cost) / orig_cost * 100) if orig_cost > 0 else 0
# Example: Optimizing a RAG prompt
if __name__ == "__main__":
engineer = CostOptimizedPromptEngineer()
# Original verbose prompt (typical RAG use case)
original = """Please could you kindly analyze the following context and question?
I would like you to provide a thorough and comprehensive answer that takes into account
all the relevant information from the context. Make sure to explain your reasoning
step by step and provide citations where appropriate. The context is provided below
and the question is at the end. Please be as detailed as possible.
Context: [Retrieved documents]
Question: What is the capital of France?"""
optimized = engineer.build_optimized_prompt(
task="Answer the question using only the provided context",
context="[Retrieved documents]\nQuestion: What is the capital of France?",
{"input": "Context: Paris is in France. Q: What is France's capital?", "output": "Paris"}
metrics = engineer.estimate_cost_savings(original, optimized)
print("=== Cost Optimization Results ===")
print(f"Original tokens: {metrics['original_tokens']}")
print(f"Optimized tokens: {metrics['optimized_tokens']}")
print(f"Token reduction: {metrics['token_reduction']} ({metrics['reduction_percent']:.1f}%)")
print(f"Cost per 1k requests: ${metrics['original_cost_per_1k']:.2f} → ${metrics['optimized_cost_per_1k']:.2f}")
print(f"Savings per 1k requests: ${metrics['savings_per_1k']:.2f} ({metrics['savings_percent']:.1f}%)")
print("\n=== Optimized Prompt ===")