# Observability - Detailed Reference Detailed configurations và examples cho Observability stack trong GoodGo. ## Table of Contents 1. [Serilog Configuration](#serilog-configuration) 2. [OpenTelemetry Setup](#opentelemetry-setup) 3. [Prometheus & Grafana](#prometheus--grafana) 4. [Health Checks](#health-checks) 5. [Loki Logging](#loki-logging) 6. [Alerting](#alerting) --- ## Serilog Configuration ### Complete Program.cs Setup ```csharp /// /// EN: Complete Serilog configuration for microservice. /// VI: Serilog configuration đầy đủ cho microservice. /// using Serilog; using Serilog.Events; using Serilog.Formatting.Json; using Serilog.Sinks.Grafana.Loki; // EN: Configure Serilog bootstrap logger for startup errors // VI: Cấu hình Serilog bootstrap logger cho lỗi startup Log.Logger = new LoggerConfiguration() .MinimumLevel.Override("Microsoft", LogEventLevel.Information) .Enrich.FromLogContext() .WriteTo.Console() .CreateBootstrapLogger(); try { var builder = WebApplication.CreateBuilder(args); // EN: Configure Serilog from configuration // VI: Cấu hình Serilog từ configuration builder.Host.UseSerilog((context, services, configuration) => configuration .ReadFrom.Configuration(context.Configuration) .ReadFrom.Services(services) .Enrich.FromLogContext() .Enrich.WithProperty("Application", "IamService") .Enrich.WithProperty("Environment", context.HostingEnvironment.EnvironmentName) .Enrich.WithMachineName() .Enrich.WithThreadId() .WriteTo.Console(new JsonFormatter()) .WriteTo.GrafanaLoki( context.Configuration["Loki:Endpoint"]!, labels: new[] { new LokiLabel { Key = "app", Value = "iam-service" }, new LokiLabel { Key = "env", Value = context.HostingEnvironment.EnvironmentName } })); // ... rest of configuration var app = builder.Build(); // EN: Add Serilog request logging middleware // VI: Thêm Serilog request logging middleware app.UseSerilogRequestLogging(options => { options.EnrichDiagnosticContext = (diagnosticContext, httpContext) => { diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value); diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme); diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString()); if (httpContext.User.Identity?.IsAuthenticated == true) { diagnosticContext.Set("UserId", httpContext.User.FindFirst("sub")?.Value); } }; }); app.Run(); } catch (Exception ex) { Log.Fatal(ex, "Application terminated unexpectedly"); } finally { Log.CloseAndFlush(); } ``` ### appsettings.json for Serilog ```json { "Serilog": { "Using": ["Serilog.Sinks.Console", "Serilog.Sinks.Seq"], "MinimumLevel": { "Default": "Information", "Override": { "Microsoft": "Warning", "Microsoft.Hosting.Lifetime": "Information", "Microsoft.EntityFrameworkCore": "Warning", "System": "Warning", "Grpc": "Warning" } }, "WriteTo": [ { "Name": "Console", "Args": { "formatter": "Serilog.Formatting.Json.JsonFormatter, Serilog" } }, { "Name": "Seq", "Args": { "serverUrl": "http://seq:5341", "apiKey": "" } } ], "Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId", "WithEnvironmentName" ], "Properties": { "Application": "IamService" } } } ``` --- ## OpenTelemetry Setup ### Complete OpenTelemetry Configuration ```csharp /// /// EN: Configure OpenTelemetry for tracing and metrics. /// VI: Cấu hình OpenTelemetry cho tracing và metrics. /// builder.Services.AddOpenTelemetry() .ConfigureResource(resource => resource .AddService( serviceName: "iam-service", serviceVersion: typeof(Program).Assembly.GetName().Version?.ToString() ?? "1.0.0", serviceInstanceId: Environment.MachineName) .AddAttributes(new[] { new KeyValuePair("deployment.environment", builder.Environment.EnvironmentName), new KeyValuePair("host.name", Environment.MachineName) })) .WithTracing(tracing => { tracing // EN: ASP.NET Core instrumentation .AddAspNetCoreInstrumentation(options => { options.RecordException = true; options.Filter = ctx => !ctx.Request.Path.StartsWithSegments("/health") && !ctx.Request.Path.StartsWithSegments("/metrics"); }) // EN: HTTP client instrumentation .AddHttpClientInstrumentation(options => { options.RecordException = true; options.FilterHttpRequestMessage = req => !req.RequestUri?.Host.Contains("health") ?? true; }) // EN: Entity Framework instrumentation .AddEntityFrameworkCoreInstrumentation(options => { options.SetDbStatementForText = true; options.SetDbStatementForStoredProcedure = true; }) // EN: Custom activity sources .AddSource("GoodGo.Iam") .AddSource("GoodGo.Orders") // EN: Export to OTLP (Jaeger/Tempo) .AddOtlpExporter(options => { options.Endpoint = new Uri(builder.Configuration["Otlp:Endpoint"]!); options.Protocol = OtlpExportProtocol.Grpc; }); }) .WithMetrics(metrics => { metrics .AddAspNetCoreInstrumentation() .AddHttpClientInstrumentation() .AddRuntimeInstrumentation() // EN: Custom meters .AddMeter("GoodGo.Iam") .AddMeter("GoodGo.Orders") // EN: Prometheus exporter .AddPrometheusExporter(); }); // EN: Map Prometheus scraping endpoint app.MapPrometheusScrapingEndpoint(); ``` ### Custom Activity Source ```csharp /// /// EN: Service with custom tracing. /// VI: Service với tracing tùy chỉnh. /// public class OrderService { private static readonly ActivitySource ActivitySource = new("GoodGo.Orders"); private readonly ILogger _logger; public async Task ProcessOrderAsync(CreateOrderCommand cmd, CancellationToken ct) { // EN: Create root span for order processing // VI: Tạo root span cho xử lý order using var activity = ActivitySource.StartActivity( "ProcessOrder", ActivityKind.Internal); activity?.SetTag("user.id", cmd.UserId); activity?.SetTag("order.items_count", cmd.Items.Count); try { // EN: Child span: Validate // VI: Child span: Xác thực using (var validateActivity = ActivitySource.StartActivity("ValidateOrder")) { await ValidateOrderAsync(cmd, ct); validateActivity?.SetTag("validation.result", "success"); } // EN: Child span: Check inventory // VI: Child span: Kiểm tra tồn kho using (var inventoryActivity = ActivitySource.StartActivity("CheckInventory")) { await CheckInventoryAsync(cmd.Items, ct); } // EN: Child span: Persist // VI: Child span: Lưu trữ Order order; using (var persistActivity = ActivitySource.StartActivity("PersistOrder")) { order = await SaveOrderAsync(cmd, ct); persistActivity?.SetTag("order.id", order.Id.ToString()); } activity?.SetTag("order.id", order.Id.ToString()); activity?.SetTag("order.total", order.TotalAmount); activity?.SetStatus(ActivityStatusCode.Ok); return order; } catch (Exception ex) { activity?.SetStatus(ActivityStatusCode.Error, ex.Message); activity?.RecordException(ex); _logger.LogError(ex, "Failed to process order for user {UserId}", cmd.UserId); throw; } } } ``` --- ## Prometheus & Grafana ### Docker Compose for Observability Stack ```yaml # infra/observability/docker-compose.yml version: "3.8" services: # =================================== # PROMETHEUS # =================================== prometheus: image: prom/prometheus:v2.47.0 container_name: prometheus command: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus" - "--web.enable-lifecycle" volumes: - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml - ./prometheus/alerts:/etc/prometheus/alerts - prometheus_data:/prometheus ports: - "9090:9090" networks: - goodgo-network # =================================== # GRAFANA # =================================== grafana: image: grafana/grafana:10.1.0 container_name: grafana environment: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=admin - GF_USERS_ALLOW_SIGN_UP=false volumes: - ./grafana/provisioning:/etc/grafana/provisioning - ./grafana/dashboards:/var/lib/grafana/dashboards - grafana_data:/var/lib/grafana ports: - "3000:3000" networks: - goodgo-network depends_on: - prometheus - loki # =================================== # LOKI (Log aggregation) # =================================== loki: image: grafana/loki:2.9.0 container_name: loki command: -config.file=/etc/loki/loki-config.yml volumes: - ./loki/loki-config.yml:/etc/loki/loki-config.yml - loki_data:/loki ports: - "3100:3100" networks: - goodgo-network # =================================== # TEMPO (Distributed tracing) # =================================== tempo: image: grafana/tempo:2.2.0 container_name: tempo command: -config.file=/etc/tempo/tempo-config.yml volumes: - ./tempo/tempo-config.yml:/etc/tempo/tempo-config.yml - tempo_data:/var/tempo ports: - "4317:4317" # OTLP gRPC - "4318:4318" # OTLP HTTP networks: - goodgo-network volumes: prometheus_data: grafana_data: loki_data: tempo_data: networks: goodgo-network: external: true ``` ### Prometheus Configuration ```yaml # infra/observability/prometheus/prometheus.yml global: scrape_interval: 15s evaluation_interval: 15s alerting: alertmanagers: - static_configs: - targets: [] rule_files: - /etc/prometheus/alerts/*.yml scrape_configs: # EN: Prometheus self-monitoring - job_name: "prometheus" static_configs: - targets: ["localhost:9090"] # EN: GoodGo Services via Traefik - job_name: "goodgo-services" docker_sd_configs: - host: unix:///var/run/docker.sock filters: - name: network values: ["goodgo-network"] relabel_configs: - source_labels: [__meta_docker_container_name] regex: /(.*) target_label: container - source_labels: [__meta_docker_container_label_com_docker_compose_service] target_label: service - source_labels: [__address__] regex: (.+):.* replacement: ${1}:8080 target_label: __address__ - source_labels: [__meta_docker_container_label_traefik_enable] regex: "true" action: keep # EN: Traefik metrics - job_name: "traefik" static_configs: - targets: ["traefik:8080"] ``` ### Grafana Dashboard (JSON) ```json { "dashboard": { "title": "GoodGo Services Overview", "panels": [ { "title": "Request Rate", "type": "graph", "targets": [ { "expr": "rate(http_server_request_duration_seconds_count[5m])", "legendFormat": "{{service}}" } ] }, { "title": "Error Rate", "type": "graph", "targets": [ { "expr": "rate(http_server_request_duration_seconds_count{http_response_status_code=~\"5..\"}[5m])", "legendFormat": "{{service}} - 5xx" } ] }, { "title": "Request Duration P99", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.99, rate(http_server_request_duration_seconds_bucket[5m]))", "legendFormat": "{{service}}" } ] } ] } } ``` --- ## Health Checks ### Comprehensive Health Check Configuration ```csharp /// /// EN: Configure all health checks. /// VI: Cấu hình tất cả health checks. /// builder.Services.AddHealthChecks() // EN: Database .AddNpgSql( connectionString: builder.Configuration.GetConnectionString("DefaultConnection")!, name: "postgresql", failureStatus: HealthStatus.Unhealthy, tags: new[] { "db", "ready", "critical" }) // EN: Redis .AddRedis( redisConnectionString: builder.Configuration["Redis:ConnectionString"]!, name: "redis", failureStatus: HealthStatus.Degraded, tags: new[] { "cache", "ready" }) // EN: External HTTP dependency .AddUrlGroup( new Uri(builder.Configuration["Services:Payment:HealthUrl"]!), name: "payment-service", failureStatus: HealthStatus.Degraded, tags: new[] { "external", "ready" }) // EN: Disk space .AddDiskStorageHealthCheck( setup: options => options.AddDrive("/", 1024), name: "disk-space", failureStatus: HealthStatus.Degraded, tags: new[] { "infrastructure" }) // EN: Memory .AddProcessAllocatedMemoryHealthCheck( maximumMegabytesAllocated: 500, name: "memory", tags: new[] { "infrastructure" }); // EN: Map endpoints app.MapHealthChecks("/health/live", new HealthCheckOptions { Predicate = _ => false, ResponseWriter = WriteMinimalResponse }); app.MapHealthChecks("/health/ready", new HealthCheckOptions { Predicate = hc => hc.Tags.Contains("ready"), ResponseWriter = WriteDetailedResponse }); app.MapHealthChecks("/health", new HealthCheckOptions { ResponseWriter = WriteDetailedResponse }); // EN: Health check response writers static Task WriteMinimalResponse(HttpContext context, HealthReport report) { context.Response.ContentType = "application/json"; return context.Response.WriteAsync( JsonSerializer.Serialize(new { status = report.Status.ToString() })); } static Task WriteDetailedResponse(HttpContext context, HealthReport report) { context.Response.ContentType = "application/json"; var result = new { status = report.Status.ToString(), totalDuration = report.TotalDuration.TotalMilliseconds, entries = report.Entries.Select(e => new { name = e.Key, status = e.Value.Status.ToString(), duration = e.Value.Duration.TotalMilliseconds, description = e.Value.Description, tags = e.Value.Tags, data = e.Value.Data }) }; return context.Response.WriteAsJsonAsync(result); } ``` --- ## Resources / Tài Nguyên - [OpenTelemetry .NET](https://opentelemetry.io/docs/instrumentation/net/) - [Serilog Documentation](https://serilog.net/) - [Prometheus Documentation](https://prometheus.io/docs/) - [Grafana Dashboards](https://grafana.com/grafana/dashboards/) - [Loki Documentation](https://grafana.com/docs/loki/)