The Cortex Router plugin provides intelligent, multi-tier routing that automatically selects the optimal model based on request content. This eliminates the need to manually choose providers and models for each request.
from openai import OpenAIclient = OpenAI( base_url="http://localhost:18080/v1", api_key="sk-test-123",)# Use "auto" or "cortex" to trigger intelligent routingcompletion = client.chat.completions.create( model="auto", # Let Cortex Router decide messages=[{"role": "user", "content": "Write a Python function to sort a list"}])# → Automatically routes to coding modelprint(completion.choices[0].message.content)
from openai import OpenAIclient = OpenAI( base_url="http://localhost:18080/v1", api_key="sk-test-123",)# These are automatically routed to the coding modelcoding_queries = [ "Write a binary search algorithm in Go", "Debug this Python code: def factorial(n): return n * factorial(n)", "Explain how async/await works in JavaScript", "Generate unit tests for this function",]for query in coding_queries: completion = client.chat.completions.create( model="auto", messages=[{"role": "user", "content": query}] ) print(f"Query: {query[:50]}...") print(f"Response: {completion.choices[0].message.content[:100]}...\n")
# These route to the reasoning modelreasoning_queries = [ "Solve this logic puzzle: If all bloops are razzies...", "What are the ethical implications of AI in healthcare?", "Prove that the square root of 2 is irrational", "Analyze the pros and cons of remote work",]for query in reasoning_queries: completion = client.chat.completions.create( model="cortex", # Explicit Cortex routing messages=[{"role": "user", "content": query}] ) print(f"Reasoning: {completion.choices[0].message.content[:100]}...\n")
# These route to the fast modelfast_queries = [ "What is 2+2?", "Define recursion", "What is the capital of France?", "Translate 'hello' to Spanish",]for query in fast_queries: completion = client.chat.completions.create( model="auto", messages=[{"role": "user", "content": query}] ) print(f"Fast response: {completion.choices[0].message.content}\n")
# Queries with PII automatically route to local modelssensitive_queries = [ "My email is john@example.com and I need help with...", "Analyze this customer data: SSN 123-45-6789", "Review this API key: sk_live_1234567890abcdef",]for query in sensitive_queries: print("🔒 Detected sensitive data -> routing to local model") completion = client.chat.completions.create( model="auto", messages=[{"role": "user", "content": query}] ) # → Automatically routes to ollama:llama3.2 (local)
from openai import OpenAIclient = OpenAI( base_url="http://localhost:18080/v1", api_key="sk-test-123",)# These match to specialized skillscompletion = client.chat.completions.create( model="auto", messages=[{"role": "user", "content": "Write a Kubernetes deployment YAML"}])# → Matches "kubernetes" skill → augments prompt with K8s expertisecompletion = client.chat.completions.create( model="auto", messages=[{"role": "user", "content": "Optimize this Go code for concurrency"}])# → Matches "golang" skill → adds Go best practices to context
# Use a specific model directly (bypasses Cortex)completion = client.chat.completions.create( model="geminicli:gemini-2.5-pro", # Explicit provider messages=[{"role": "user", "content": "Hello"}])