Understanding workflow execution, monitoring, and compensation
execution: workflow: high_priority_ticket trigger: event: ticket:created conditions: priority: high activities: - name: notify_team - name: assign_agent - name: set_sla compensation: strategy: reverse activities: - name: revert_assignment - name: clear_sla
state: id: wf_123 status: running started_at: "2024-01-01T10:00:00Z" current_activity: assign_agent variables: ticket_id: "T-123" assigned_agent: "A-456"
activity_state: id: act_789 name: assign_agent status: completed started_at: "2024-01-01T10:00:05Z" completed_at: "2024-01-01T10:00:06Z" input: ticket_id: "T-123" output: success: true agent_id: "A-456"
monitoring: active_workflows: 25 completed_today: 150 failed_today: 3 average_duration: "45s" current_load: "medium"
metrics: execution_time: avg: 45 p95: 120 p99: 180 success_rate: 99.5 error_rate: 0.5 throughput: 100
logging: level: info components: - workflow_engine - activity_executor - state_manager format: timestamp: string workflow_id: string activity: string message: string
error_handling: activity_error: retry: max_attempts: 3 backoff: initial: 1s multiplier: 2 fallback: activity: skip notify: true
error_handling: workflow_error: strategy: compensate notification: channels: [slack, email] recipients: [workflow_admin]
compensation: strategy: reverse mode: automatic timeout: 5m notification: true
compensation_activities: assign_agent: activity: unassign_agent params: ticket_id: ${workflow.ticket_id} agent_id: ${workflow.assigned_agent}
concurrency: max_workflows: 1000 max_activities: 100 queuing: strategy: fifo timeout: 30s
resources: cpu: limit: 4 request: 2 memory: limit: "2Gi" request: "1Gi"
saga: steps: - activity: create_ticket compensation: delete_ticket - activity: assign_agent compensation: unassign_agent - activity: set_sla
circuit_breaker: threshold: 5 timeout: 60s reset: 300s monitoring: enabled: true metrics: [error_rate, latency]
bulkhead: max_concurrent: 10 max_queue_size: 100 timeout: 30s fallback: activity: degrade_service
tracing: enabled: true sampling_rate: 0.1 components: - workflow_engine - activity_executor - state_manager