Error Handling & Retries

Robust error handling patterns for production AI applications.

Basic Error Handling

Always use try/finally blocks to ensure finish() is called:

const turn = session.turn()
 
try {
  const response = await turn.wrapLLM(
    async () => await openai.chat.completions.create({
      model: 'gpt-4o',
      messages: [{ role: 'user', content: userMessage }],
    }),
    { model: 'gpt-4o', prompt_id: 'chat_v1' }
  )
 
  turn.setMessages([
    { role: 'user', content: userMessage },
    { role: 'assistant', content: response.choices[0].message.content || '' }
  ])
 
} catch (error) {
  // Error is automatically captured by wrapLLM
  console.error('LLM call failed:', error)
  throw error
} finally {
  await turn.finish()  // Always called, even on error
}

Retry Logic

Implement exponential backoff for transient failures:

async function robustLLMCall(userMessage: string, maxRetries = 3) {
  const session = await Lumina.session.start()
  const turn = session.turn()
 
  for (let attempt = 0; attempt < maxRetries; attempt++) {
    try {
      const response = await turn.wrapLLM(
        async () => {
          return await openai.chat.completions.create({
            model: 'gpt-4o',
            messages: [{ role: 'user', content: userMessage }],
          })
        },
        { model: 'gpt-4o', prompt_id: 'chat_v1' }
      )
 
      turn.setMessages([
        { role: 'user', content: userMessage },
        { role: 'assistant', content: response.choices[0].message.content || '' }
      ])
 
      // Annotate success
      turn.annotate({
        success: true,
        attemptNumber: attempt + 1
      })
 
      await turn.finish()
      return response
 
    } catch (error) {
      // Log error in turn
      turn.annotate({
        error: {
          message: error.message,
          type: error.constructor.name,
          attempt: attempt + 1
        }
      })
 
      // Last attempt - give up
      if (attempt === maxRetries - 1) {
        await turn.finish()
        throw error
      }
 
      // Exponential backoff
      await new Promise(resolve => setTimeout(resolve, 2 ** attempt * 1000))
    }
  }
}

Error Classification

Classify errors for better analytics:

function classifyError(error: any): string {
  if (error.status === 429) return 'rate_limit'
  if (error.status === 500) return 'server_error'
  if (error.status === 401) return 'auth_error'
  if (error.code === 'ECONNREFUSED') return 'connection_error'
  if (error.message?.includes('timeout')) return 'timeout'
  return 'unknown'
}
 
try {
  const response = await turn.wrapLLM(/* ... */)
} catch (error) {
  turn.annotate({
    error: {
      type: classifyError(error),
      message: error.message,
      status: error.status,
      retryable: [429, 500, 503].includes(error.status)
    }
  })
  throw error
} finally {
  await turn.finish()
}

Graceful Degradation

Provide fallback responses when LLM fails:

const turn = session.turn()
 
try {
  const response = await turn.wrapLLM(
    async () => await openai.chat.completions.create({
      model: 'gpt-4o',
      messages: [{ role: 'user', content: userMessage }],
    }),
    { model: 'gpt-4o', prompt_id: 'chat_v1' }
  )
 
  const assistantMessage = response.choices[0].message.content || ''
 
  turn.setMessages([
    { role: 'user', content: userMessage },
    { role: 'assistant', content: assistantMessage }
  ])
 
  await turn.finish()
  return assistantMessage
 
} catch (error) {
  // Provide fallback response
  const fallbackMessage = "I'm having trouble processing your request right now. Please try again."
 
  turn.setMessages([
    { role: 'user', content: userMessage },
    { role: 'assistant', content: fallbackMessage }
  ])
 
  turn.annotate({
    fallback: true,
    error: error.message
  })
 
  await turn.finish()
  return fallbackMessage
}

Circuit Breaker Pattern

Prevent cascading failures with a circuit breaker:

class CircuitBreaker {
  private failures = 0
  private lastFailureTime = 0
  private readonly threshold = 5
  private readonly timeout = 60000  // 1 minute
 
  async execute<T>(fn: () => Promise<T>): Promise<T> {
    // Circuit open - reject immediately
    if (this.isOpen()) {
      throw new Error('Circuit breaker is open')
    }
 
    try {
      const result = await fn()
      this.onSuccess()
      return result
    } catch (error) {
      this.onFailure()
      throw error
    }
  }
 
  private isOpen(): boolean {
    if (this.failures < this.threshold) return false
    return Date.now() - this.lastFailureTime < this.timeout
  }
 
  private onSuccess() {
    this.failures = 0
  }
 
  private onFailure() {
    this.failures++
    this.lastFailureTime = Date.now()
  }
}
 
const breaker = new CircuitBreaker()
 
const turn = session.turn()
try {
  const response = await breaker.execute(async () => {
    return await turn.wrapLLM(
      async () => await openai.chat.completions.create({
        model: 'gpt-4o',
        messages: [{ role: 'user', content: userMessage }],
      }),
      { model: 'gpt-4o', prompt_id: 'chat_v1' }
    )
  })
 
  turn.setMessages([
    { role: 'user', content: userMessage },
    { role: 'assistant', content: response.choices[0].message.content || '' }
  ])
 
} catch (error) {
  turn.annotate({
    circuitBreakerOpen: breaker.isOpen(),
    error: error.message
  })
  throw error
} finally {
  await turn.finish()
}

Timeout Handling

Set timeouts for LLM calls:

async function withTimeout<T>(
  promise: Promise<T>,
  timeoutMs: number
): Promise<T> {
  return Promise.race([
    promise,
    new Promise<T>((_, reject) =>
      setTimeout(() => reject(new Error('Timeout')), timeoutMs)
    )
  ])
}
 
const turn = session.turn()
try {
  const response = await turn.wrapLLM(
    async () => {
      return await withTimeout(
        openai.chat.completions.create({
          model: 'gpt-4o',
          messages: [{ role: 'user', content: userMessage }],
        }),
        30000  // 30 second timeout
      )
    },
    { model: 'gpt-4o', prompt_id: 'chat_v1' }
  )
 
  turn.setMessages([
    { role: 'user', content: userMessage },
    { role: 'assistant', content: response.choices[0].message.content || '' }
  ])
 
} catch (error) {
  turn.annotate({
    timeout: error.message === 'Timeout',
    error: error.message
  })
  throw error
} finally {
  await turn.finish()
}

Best Practices

1. Always Use try/finally

// ✅ Good: Guaranteed finish()
try {
  await turn.wrapLLM(/* ... */)
} finally {
  await turn.finish()
}
 
// ❌ Bad: finish() might not be called
await turn.wrapLLM(/* ... */)
await turn.finish()  // Skipped on error

2. Annotate Errors with Context

turn.annotate({
  error: {
    type: error.constructor.name,
    message: error.message,
    status: error.status,
    attempt: attemptNumber,
    userMessage: userMessage.substring(0, 100),  // First 100 chars
  }
})

3. Classify Errors for Analytics

turn.annotate({
  errorType: classifyError(error),
  retryable: isRetryable(error),
})

4. Implement Exponential Backoff

// ✅ Good: Exponential backoff
await new Promise(resolve => setTimeout(resolve, 2 ** attempt * 1000))
 
// ❌ Bad: Fixed delay
await new Promise(resolve => setTimeout(resolve, 1000))

Next Steps

Streaming - Handle streaming responses
RAG - Retrieval-augmented generation patterns
Best Practices - General SDK best practices

Streaming Responses RAG (Retrieval)