Aula 4 - Módulo 6: Sistemas completos de monitoramento para aplicações tRPC
Aplicações tRPC distribuídas precisam de observability para diagnosticar problemas, otimizar performance e garantir confiabilidade em produção.
Alertas inteligentes permitem detectar e resolver problemas antes que impactem usuários, mantendo SLA e qualidade do serviço.
Métricas detalhadas fornecem insights sobre uso, performance e comportamento dos usuários, direcionando decisões técnicas e de produto.
Logs estruturados e tracing distribuído aceleram drasticamente o processo de debugging e resolução de incidentes.
Logging:
Registro estruturado de eventos e erros
Metrics:
Métricas numéricas de performance e uso
Tracing:
Rastreamento de requests através de serviços
Alerting:
Notificações automáticas para anomalias
// 📁 server/logging/winston-config.ts
import winston from 'winston';
import { format } from 'winston';
const logFormat = format.combine(
format.timestamp(),
format.errors({ stack: true }),
format.json(),
format.colorize()
);
export const logger = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: logFormat,
defaultMeta: { service: 'trpc-api' },
transports: [
new winston.transports.File({
filename: 'logs/error.log',
level: 'error'
}),
new winston.transports.File({
filename: 'logs/combined.log'
}),
new winston.transports.Console({
format: format.simple()
})
]
});
// tRPC procedure logging middleware
export const loggingMiddleware = (opts: any) => {
const { path, type, next } = opts;
const start = Date.now();
logger.info(`tRPC ${type} started: ${path}`);
return next().then(
(result) => {
const duration = Date.now() - start;
logger.info(`tRPC ${type} completed: ${path}`, { duration });
return result;
},
(error) => {
const duration = Date.now() - start;
logger.error(`tRPC ${type} failed: ${path}`, {
error: error.message,
stack: error.stack,
duration
});
throw error;
}
);
};
// 📁 server/metrics/prometheus.ts
import { register, Counter, Histogram, Gauge } from 'prom-client';
// Custom metrics for tRPC
export const trpcMetrics = {
requestsTotal: new Counter({
name: 'trpc_requests_total',
help: 'Total number of tRPC requests',
labelNames: ['method', 'procedure', 'status']
}),
requestDuration: new Histogram({
name: 'trpc_request_duration_seconds',
help: 'Duration of tRPC requests in seconds',
labelNames: ['method', 'procedure'],
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
}),
activeConnections: new Gauge({
name: 'trpc_websocket_connections_active',
help: 'Number of active WebSocket connections'
})
};
// Middleware for automatic metrics collection
export const metricsMiddleware = (opts: any) => {
const { path, type } = opts;
const startTime = Date.now();
return opts.next().then(
(result) => {
const duration = (Date.now() - startTime) / 1000;
trpcMetrics.requestsTotal.inc({ method: type, procedure: path, status: 'success' });
trpcMetrics.requestDuration.observe({ method: type, procedure: path }, duration);
return result;
},
(error) => {
const duration = (Date.now() - startTime) / 1000;
trpcMetrics.requestsTotal.inc({ method: type, procedure: path, status: 'error' });
trpcMetrics.requestDuration.observe({ method: type, procedure: path }, duration);
throw error;
}
);
};
// 📁 server/alerts/alert-manager.ts
interface AlertRule {
name: string;
condition: (metrics: any) => boolean;
severity: 'low' | 'medium' | 'high' | 'critical';
cooldown: number; // minutos
}
class AlertManager {
private rules: AlertRule[] = [];
private lastAlerts: Map<string, number> = new Map();
addRule(rule: AlertRule) {
this.rules.push(rule);
}
checkAlerts(metrics: any) {
for (const rule of this.rules) {
if (rule.condition(metrics)) {
this.triggerAlert(rule, metrics);
}
}
}
private triggerAlert(rule: AlertRule, metrics: any) {
const now = Date.now();
const lastAlert = this.lastAlerts.get(rule.name) || 0;
if (now - lastAlert < rule.cooldown * 60 * 1000) {
return; // Still in cooldown
}
this.sendAlert(rule, metrics);
this.lastAlerts.set(rule.name, now);
}
private async sendAlert(rule: AlertRule, metrics: any) {
// Send to Slack, PagerDuty, etc.
await this.sendSlackAlert(rule, metrics);
}
}
// Example alert rules
export const alertRules: AlertRule[] = [
{
name: 'High Error Rate',
condition: (m) => m.errorRate > 0.05, // 5% error rate
severity: 'critical',
cooldown: 5
},
{
name: 'Slow Response Time',
condition: (m) => m.avgResponseTime > 2000, // 2 seconds
severity: 'high',
cooldown: 10
}
];
{
"dashboard": {
"title": "tRPC Application Metrics",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(trpc_requests_total[5m])",
"legendFormat": "{{procedure}}"
}
]
},
{
"title": "Error Rate",
"type": "stat",
"targets": [
{
"expr": "rate(trpc_requests_total{status="error"}[5m]) / rate(trpc_requests_total[5m])",
"legendFormat": "Error Rate"
}
]
},
{
"title": "Response Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(trpc_request_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
}
]
}
]
}
}
Na próxima aula, vamos explorar Documentação e API Design, criando documentação automática, contratos OpenAPI e guias de desenvolvimento para equipes.