Files
pos-system/microservices/.agent/skills/observability/references/REFERENCE.md
Ho Ngoc Hai 76d75c753b Migrate
2026-05-23 18:37:02 +07:00

16 KiB

Observability - Detailed Reference

Detailed configurations và examples cho Observability stack trong GoodGo.

Table of Contents

  1. Serilog Configuration
  2. OpenTelemetry Setup
  3. Prometheus & Grafana
  4. Health Checks
  5. Loki Logging
  6. Alerting

Serilog Configuration

Complete Program.cs Setup

/// <summary>
/// EN: Complete Serilog configuration for microservice.
/// VI: Serilog configuration đầy đủ cho microservice.
/// </summary>

using Serilog;
using Serilog.Events;
using Serilog.Formatting.Json;
using Serilog.Sinks.Grafana.Loki;

// EN: Configure Serilog bootstrap logger for startup errors
// VI: Cấu hình Serilog bootstrap logger cho lỗi startup
Log.Logger = new LoggerConfiguration()
    .MinimumLevel.Override("Microsoft", LogEventLevel.Information)
    .Enrich.FromLogContext()
    .WriteTo.Console()
    .CreateBootstrapLogger();

try
{
    var builder = WebApplication.CreateBuilder(args);

    // EN: Configure Serilog from configuration
    // VI: Cấu hình Serilog từ configuration
    builder.Host.UseSerilog((context, services, configuration) => configuration
        .ReadFrom.Configuration(context.Configuration)
        .ReadFrom.Services(services)
        .Enrich.FromLogContext()
        .Enrich.WithProperty("Application", "IamService")
        .Enrich.WithProperty("Environment", context.HostingEnvironment.EnvironmentName)
        .Enrich.WithMachineName()
        .Enrich.WithThreadId()
        .WriteTo.Console(new JsonFormatter())
        .WriteTo.GrafanaLoki(
            context.Configuration["Loki:Endpoint"]!,
            labels: new[]
            {
                new LokiLabel { Key = "app", Value = "iam-service" },
                new LokiLabel { Key = "env", Value = context.HostingEnvironment.EnvironmentName }
            }));

    // ... rest of configuration

    var app = builder.Build();

    // EN: Add Serilog request logging middleware
    // VI: Thêm Serilog request logging middleware
    app.UseSerilogRequestLogging(options =>
    {
        options.EnrichDiagnosticContext = (diagnosticContext, httpContext) =>
        {
            diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value);
            diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme);
            diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString());
            
            if (httpContext.User.Identity?.IsAuthenticated == true)
            {
                diagnosticContext.Set("UserId", httpContext.User.FindFirst("sub")?.Value);
            }
        };
    });

    app.Run();
}
catch (Exception ex)
{
    Log.Fatal(ex, "Application terminated unexpectedly");
}
finally
{
    Log.CloseAndFlush();
}

appsettings.json for Serilog

{
  "Serilog": {
    "Using": ["Serilog.Sinks.Console", "Serilog.Sinks.Seq"],
    "MinimumLevel": {
      "Default": "Information",
      "Override": {
        "Microsoft": "Warning",
        "Microsoft.Hosting.Lifetime": "Information",
        "Microsoft.EntityFrameworkCore": "Warning",
        "System": "Warning",
        "Grpc": "Warning"
      }
    },
    "WriteTo": [
      {
        "Name": "Console",
        "Args": {
          "formatter": "Serilog.Formatting.Json.JsonFormatter, Serilog"
        }
      },
      {
        "Name": "Seq",
        "Args": {
          "serverUrl": "http://seq:5341",
          "apiKey": ""
        }
      }
    ],
    "Enrich": [
      "FromLogContext",
      "WithMachineName",
      "WithThreadId",
      "WithEnvironmentName"
    ],
    "Properties": {
      "Application": "IamService"
    }
  }
}

OpenTelemetry Setup

Complete OpenTelemetry Configuration

/// <summary>
/// EN: Configure OpenTelemetry for tracing and metrics.
/// VI: Cấu hình OpenTelemetry cho tracing và metrics.
/// </summary>

builder.Services.AddOpenTelemetry()
    .ConfigureResource(resource => resource
        .AddService(
            serviceName: "iam-service",
            serviceVersion: typeof(Program).Assembly.GetName().Version?.ToString() ?? "1.0.0",
            serviceInstanceId: Environment.MachineName)
        .AddAttributes(new[]
        {
            new KeyValuePair<string, object>("deployment.environment", 
                builder.Environment.EnvironmentName),
            new KeyValuePair<string, object>("host.name", Environment.MachineName)
        }))
    .WithTracing(tracing =>
    {
        tracing
            // EN: ASP.NET Core instrumentation
            .AddAspNetCoreInstrumentation(options =>
            {
                options.RecordException = true;
                options.Filter = ctx => 
                    !ctx.Request.Path.StartsWithSegments("/health") &&
                    !ctx.Request.Path.StartsWithSegments("/metrics");
            })
            // EN: HTTP client instrumentation
            .AddHttpClientInstrumentation(options =>
            {
                options.RecordException = true;
                options.FilterHttpRequestMessage = req =>
                    !req.RequestUri?.Host.Contains("health") ?? true;
            })
            // EN: Entity Framework instrumentation
            .AddEntityFrameworkCoreInstrumentation(options =>
            {
                options.SetDbStatementForText = true;
                options.SetDbStatementForStoredProcedure = true;
            })
            // EN: Custom activity sources
            .AddSource("GoodGo.Iam")
            .AddSource("GoodGo.Orders")
            // EN: Export to OTLP (Jaeger/Tempo)
            .AddOtlpExporter(options =>
            {
                options.Endpoint = new Uri(builder.Configuration["Otlp:Endpoint"]!);
                options.Protocol = OtlpExportProtocol.Grpc;
            });
    })
    .WithMetrics(metrics =>
    {
        metrics
            .AddAspNetCoreInstrumentation()
            .AddHttpClientInstrumentation()
            .AddRuntimeInstrumentation()
            // EN: Custom meters
            .AddMeter("GoodGo.Iam")
            .AddMeter("GoodGo.Orders")
            // EN: Prometheus exporter
            .AddPrometheusExporter();
    });

// EN: Map Prometheus scraping endpoint
app.MapPrometheusScrapingEndpoint();

Custom Activity Source

/// <summary>
/// EN: Service with custom tracing.
/// VI: Service với tracing tùy chỉnh.
/// </summary>
public class OrderService
{
    private static readonly ActivitySource ActivitySource = new("GoodGo.Orders");
    private readonly ILogger<OrderService> _logger;

    public async Task<Order> ProcessOrderAsync(CreateOrderCommand cmd, CancellationToken ct)
    {
        // EN: Create root span for order processing
        // VI: Tạo root span cho xử lý order
        using var activity = ActivitySource.StartActivity(
            "ProcessOrder",
            ActivityKind.Internal);

        activity?.SetTag("user.id", cmd.UserId);
        activity?.SetTag("order.items_count", cmd.Items.Count);

        try
        {
            // EN: Child span: Validate
            // VI: Child span: Xác thực
            using (var validateActivity = ActivitySource.StartActivity("ValidateOrder"))
            {
                await ValidateOrderAsync(cmd, ct);
                validateActivity?.SetTag("validation.result", "success");
            }

            // EN: Child span: Check inventory
            // VI: Child span: Kiểm tra tồn kho
            using (var inventoryActivity = ActivitySource.StartActivity("CheckInventory"))
            {
                await CheckInventoryAsync(cmd.Items, ct);
            }

            // EN: Child span: Persist
            // VI: Child span: Lưu trữ
            Order order;
            using (var persistActivity = ActivitySource.StartActivity("PersistOrder"))
            {
                order = await SaveOrderAsync(cmd, ct);
                persistActivity?.SetTag("order.id", order.Id.ToString());
            }

            activity?.SetTag("order.id", order.Id.ToString());
            activity?.SetTag("order.total", order.TotalAmount);
            activity?.SetStatus(ActivityStatusCode.Ok);

            return order;
        }
        catch (Exception ex)
        {
            activity?.SetStatus(ActivityStatusCode.Error, ex.Message);
            activity?.RecordException(ex);

            _logger.LogError(ex, "Failed to process order for user {UserId}", cmd.UserId);
            throw;
        }
    }
}

Prometheus & Grafana

Docker Compose for Observability Stack

# infra/observability/docker-compose.yml
version: "3.8"

services:
  # ===================================
  # PROMETHEUS
  # ===================================
  prometheus:
    image: prom/prometheus:v2.47.0
    container_name: prometheus
    command:
      - "--config.file=/etc/prometheus/prometheus.yml"
      - "--storage.tsdb.path=/prometheus"
      - "--web.enable-lifecycle"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - ./prometheus/alerts:/etc/prometheus/alerts
      - prometheus_data:/prometheus
    ports:
      - "9090:9090"
    networks:
      - goodgo-network

  # ===================================
  # GRAFANA
  # ===================================
  grafana:
    image: grafana/grafana:10.1.0
    container_name: grafana
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_USERS_ALLOW_SIGN_UP=false
    volumes:
      - ./grafana/provisioning:/etc/grafana/provisioning
      - ./grafana/dashboards:/var/lib/grafana/dashboards
      - grafana_data:/var/lib/grafana
    ports:
      - "3000:3000"
    networks:
      - goodgo-network
    depends_on:
      - prometheus
      - loki

  # ===================================
  # LOKI (Log aggregation)
  # ===================================
  loki:
    image: grafana/loki:2.9.0
    container_name: loki
    command: -config.file=/etc/loki/loki-config.yml
    volumes:
      - ./loki/loki-config.yml:/etc/loki/loki-config.yml
      - loki_data:/loki
    ports:
      - "3100:3100"
    networks:
      - goodgo-network

  # ===================================
  # TEMPO (Distributed tracing)
  # ===================================
  tempo:
    image: grafana/tempo:2.2.0
    container_name: tempo
    command: -config.file=/etc/tempo/tempo-config.yml
    volumes:
      - ./tempo/tempo-config.yml:/etc/tempo/tempo-config.yml
      - tempo_data:/var/tempo
    ports:
      - "4317:4317"   # OTLP gRPC
      - "4318:4318"   # OTLP HTTP
    networks:
      - goodgo-network

volumes:
  prometheus_data:
  grafana_data:
  loki_data:
  tempo_data:

networks:
  goodgo-network:
    external: true

Prometheus Configuration

# infra/observability/prometheus/prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

alerting:
  alertmanagers:
    - static_configs:
        - targets: []

rule_files:
  - /etc/prometheus/alerts/*.yml

scrape_configs:
  # EN: Prometheus self-monitoring
  - job_name: "prometheus"
    static_configs:
      - targets: ["localhost:9090"]

  # EN: GoodGo Services via Traefik
  - job_name: "goodgo-services"
    docker_sd_configs:
      - host: unix:///var/run/docker.sock
        filters:
          - name: network
            values: ["goodgo-network"]
    relabel_configs:
      - source_labels: [__meta_docker_container_name]
        regex: /(.*)
        target_label: container
      - source_labels: [__meta_docker_container_label_com_docker_compose_service]
        target_label: service
      - source_labels: [__address__]
        regex: (.+):.*
        replacement: ${1}:8080
        target_label: __address__
      - source_labels: [__meta_docker_container_label_traefik_enable]
        regex: "true"
        action: keep

  # EN: Traefik metrics
  - job_name: "traefik"
    static_configs:
      - targets: ["traefik:8080"]

Grafana Dashboard (JSON)

{
  "dashboard": {
    "title": "GoodGo Services Overview",
    "panels": [
      {
        "title": "Request Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_server_request_duration_seconds_count[5m])",
            "legendFormat": "{{service}}"
          }
        ]
      },
      {
        "title": "Error Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_server_request_duration_seconds_count{http_response_status_code=~\"5..\"}[5m])",
            "legendFormat": "{{service}} - 5xx"
          }
        ]
      },
      {
        "title": "Request Duration P99",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.99, rate(http_server_request_duration_seconds_bucket[5m]))",
            "legendFormat": "{{service}}"
          }
        ]
      }
    ]
  }
}

Health Checks

Comprehensive Health Check Configuration

/// <summary>
/// EN: Configure all health checks.
/// VI: Cấu hình tất cả health checks.
/// </summary>

builder.Services.AddHealthChecks()
    // EN: Database
    .AddNpgSql(
        connectionString: builder.Configuration.GetConnectionString("DefaultConnection")!,
        name: "postgresql",
        failureStatus: HealthStatus.Unhealthy,
        tags: new[] { "db", "ready", "critical" })
    
    // EN: Redis
    .AddRedis(
        redisConnectionString: builder.Configuration["Redis:ConnectionString"]!,
        name: "redis",
        failureStatus: HealthStatus.Degraded,
        tags: new[] { "cache", "ready" })
    
    // EN: External HTTP dependency
    .AddUrlGroup(
        new Uri(builder.Configuration["Services:Payment:HealthUrl"]!),
        name: "payment-service",
        failureStatus: HealthStatus.Degraded,
        tags: new[] { "external", "ready" })
    
    // EN: Disk space
    .AddDiskStorageHealthCheck(
        setup: options => options.AddDrive("/", 1024),
        name: "disk-space",
        failureStatus: HealthStatus.Degraded,
        tags: new[] { "infrastructure" })
    
    // EN: Memory
    .AddProcessAllocatedMemoryHealthCheck(
        maximumMegabytesAllocated: 500,
        name: "memory",
        tags: new[] { "infrastructure" });

// EN: Map endpoints
app.MapHealthChecks("/health/live", new HealthCheckOptions
{
    Predicate = _ => false,
    ResponseWriter = WriteMinimalResponse
});

app.MapHealthChecks("/health/ready", new HealthCheckOptions
{
    Predicate = hc => hc.Tags.Contains("ready"),
    ResponseWriter = WriteDetailedResponse
});

app.MapHealthChecks("/health", new HealthCheckOptions
{
    ResponseWriter = WriteDetailedResponse
});

// EN: Health check response writers
static Task WriteMinimalResponse(HttpContext context, HealthReport report)
{
    context.Response.ContentType = "application/json";
    return context.Response.WriteAsync(
        JsonSerializer.Serialize(new { status = report.Status.ToString() }));
}

static Task WriteDetailedResponse(HttpContext context, HealthReport report)
{
    context.Response.ContentType = "application/json";
    
    var result = new
    {
        status = report.Status.ToString(),
        totalDuration = report.TotalDuration.TotalMilliseconds,
        entries = report.Entries.Select(e => new
        {
            name = e.Key,
            status = e.Value.Status.ToString(),
            duration = e.Value.Duration.TotalMilliseconds,
            description = e.Value.Description,
            tags = e.Value.Tags,
            data = e.Value.Data
        })
    };

    return context.Response.WriteAsJsonAsync(result);
}

Resources / Tài Nguyên