Multi-Model Workflows
Track complex AI workflows involving multiple models and steps.
Basic Multi-Step Pattern
Track different models in a single session:
const session = await Lumina.session.start()
// Step 1: Classification with fast model
const classificationTurn = session.turn()
try {
const category = await classificationTurn.wrapLLM(
async () => {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: `Classify: ${userMessage}` }],
})
return response.choices[0].message.content
},
{ model: 'gpt-4o-mini', prompt_id: 'classify_v1' }
)
classificationTurn.annotate({ step: 'classification', category })
await classificationTurn.finish()
// Step 2: Generation with powerful model
const generationTurn = session.turn()
const response = await generationTurn.wrapLLM(
async () => {
return await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{ role: 'system', content: `Category: ${category}` },
{ role: 'user', content: userMessage }
],
})
},
{ model: 'gpt-4o', prompt_id: 'generate_v1' }
)
generationTurn.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: response.choices[0].message.content || '' }
])
generationTurn.annotate({
step: 'generation',
category
})
await generationTurn.finish()
} catch (error) {
// Errors tracked per turn
console.error('Workflow error:', error)
}Router Pattern
Route to different models based on query complexity:
async function routeQuery(userMessage: string) {
const session = await Lumina.session.start()
// Analyze query complexity
const analysisTurn = session.turn()
const complexity = await analysisTurn.wrapLLM(
async () => {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{
role: 'user',
content: `Rate query complexity (simple/medium/complex): ${userMessage}`
}],
})
return response.choices[0].message.content
},
{ model: 'gpt-4o-mini', prompt_id: 'complexity_router' }
)
analysisTurn.annotate({ step: 'routing', complexity })
await analysisTurn.finish()
// Route to appropriate model
const model = complexity === 'complex' ? 'gpt-4o' :
complexity === 'medium' ? 'gpt-4o-mini' :
'gpt-3.5-turbo'
const responseTurn = session.turn()
const response = await responseTurn.wrapLLM(
async () => {
return await openai.chat.completions.create({
model,
messages: [{ role: 'user', content: userMessage }],
})
},
{ model, prompt_id: 'routed_response' }
)
responseTurn.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: response.choices[0].message.content || '' }
])
responseTurn.annotate({
step: 'response',
routedTo: model,
complexity
})
await responseTurn.finish()
return response.choices[0].message.content
}Sequential Chain
Chain multiple LLM calls in sequence:
const session = await Lumina.session.start()
// Step 1: Extract entities
const extractTurn = session.turn()
const entities = await extractTurn.wrapLLM(
async () => {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{
role: 'user',
content: `Extract entities from: ${userMessage}`
}],
})
return JSON.parse(response.choices[0].message.content)
},
{ model: 'gpt-4o-mini', prompt_id: 'extract_entities' }
)
extractTurn.annotate({
step: 'extraction',
entitiesFound: entities.length
})
await extractTurn.finish()
// Step 2: Enrich entities
const enrichTurn = session.turn()
const enrichedEntities = await enrichTurn.recordTool(
'entity_enrichment',
async () => {
return await Promise.all(
entities.map(e => knowledgeGraph.lookup(e))
)
},
{ type: 'lookup', target: 'knowledge_graph', version: 'v1' }
)
enrichTurn.annotate({
step: 'enrichment',
entitiesEnriched: enrichedEntities.length
})
await enrichTurn.finish()
// Step 3: Generate final response
const generateTurn = session.turn()
const response = await generateTurn.wrapLLM(
async () => {
return await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: `Entities: ${JSON.stringify(enrichedEntities)}`
},
{ role: 'user', content: userMessage }
],
})
},
{ model: 'gpt-4o', prompt_id: 'generate_with_entities' }
)
generateTurn.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: response.choices[0].message.content || '' }
])
generateTurn.annotate({
step: 'generation',
entitiesUsed: enrichedEntities.length
})
await generateTurn.finish()Parallel Execution
Execute multiple LLM calls in parallel and combine results:
const session = await Lumina.session.start()
// Create multiple turns for parallel execution
const turn1 = session.turn()
const turn2 = session.turn()
const turn3 = session.turn()
// Execute in parallel
const [response1, response2, response3] = await Promise.all([
turn1.wrapLLM(
async () => {
const res = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: `Perspective 1: ${userMessage}` }],
})
turn1.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: res.choices[0].message.content || '' }
])
turn1.annotate({ perspective: 1 })
await turn1.finish()
return res.choices[0].message.content
},
{ model: 'gpt-4o', prompt_id: 'perspective_1' }
),
turn2.wrapLLM(
async () => {
const res = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: `Perspective 2: ${userMessage}` }],
})
turn2.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: res.choices[0].message.content || '' }
])
turn2.annotate({ perspective: 2 })
await turn2.finish()
return res.choices[0].message.content
},
{ model: 'gpt-4o', prompt_id: 'perspective_2' }
),
turn3.wrapLLM(
async () => {
const res = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: `Perspective 3: ${userMessage}` }],
})
turn3.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: res.choices[0].message.content || '' }
])
turn3.annotate({ perspective: 3 })
await turn3.finish()
return res.choices[0].message.content
},
{ model: 'gpt-4o', prompt_id: 'perspective_3' }
)
])
// Synthesize results
const synthesisTurn = session.turn()
const synthesis = await synthesisTurn.wrapLLM(
async () => {
return await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{
role: 'user',
content: `Synthesize these perspectives:\n1. ${response1}\n2. ${response2}\n3. ${response3}`
}],
})
},
{ model: 'gpt-4o', prompt_id: 'synthesize' }
)
synthesisTurn.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: synthesis.choices[0].message.content || '' }
])
synthesisTurn.annotate({
step: 'synthesis',
perspectivesCount: 3
})
await synthesisTurn.finish()Multi-Provider Workflow
Use different providers for different steps:
const session = await Lumina.session.start()
// Step 1: OpenAI for classification
const classifyTurn = session.turn()
const category = await classifyTurn.wrapLLM(
async () => {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: `Classify: ${userMessage}` }],
})
return response.choices[0].message.content
},
{ model: 'gpt-4o-mini', prompt_id: 'classify', provider: 'openai' }
)
classifyTurn.annotate({ step: 'classification', category, provider: 'openai' })
await classifyTurn.finish()
// Step 2: Anthropic for generation
const generateTurn = session.turn()
const response = await generateTurn.wrapLLM(
async () => {
return await anthropic.messages.create({
model: 'claude-3-5-sonnet-20241022',
max_tokens: 1024,
messages: [{
role: 'user',
content: `Category: ${category}\n\n${userMessage}`
}],
})
},
{ model: 'claude-3-5-sonnet-20241022', prompt_id: 'generate', provider: 'anthropic' }
)
generateTurn.setMessages([
{ role: 'user', content: userMessage },
{ role: 'assistant', content: response.content[0].text }
])
generateTurn.annotate({
step: 'generation',
category,
provider: 'anthropic'
})
await generateTurn.finish()Best Practices
1. Use One Session for Related Turns
// ✅ Good: One session for entire workflow
const session = await Lumina.session.start()
const turn1 = session.turn()
const turn2 = session.turn()
// ❌ Bad: New session for each turn
const session1 = await Lumina.session.start()
const turn1 = session1.turn()
const session2 = await Lumina.session.start() // Don't do this
const turn2 = session2.turn()2. Annotate Each Step
turn.annotate({
step: 'classification',
category,
model: 'gpt-4o-mini'
})3. Track Latency Per Step
const startTime = Date.now()
await turn.wrapLLM(/* ... */)
turn.annotate({
stepLatencyMs: Date.now() - startTime
})4. Handle Errors Per Turn
try {
await turn.wrapLLM(/* ... */)
} catch (error) {
turn.annotate({ error: error.message })
} finally {
await turn.finish() // Always finish
}Next Steps
- Error Handling - Robust error patterns
- RAG - Retrieval-augmented generation
- Best Practices - General SDK best practices