chore: add cpu metrics, lower frequency to report metrics and trace (#6693)

This commit is contained in:
Ryo
2026-04-01 11:24:50 +08:00
committed by GitHub
parent 04d2caa81a
commit 529e29e02e
10 changed files with 63 additions and 16 deletions

View File

@@ -5,6 +5,7 @@ import type {
ObservableGauge
} from '@opentelemetry/api';
import { getMeter } from '@fastgpt-sdk/otel/metrics';
import { cpus } from 'os';
type RuntimeMetricAttributes = Record<string, never>;
@@ -15,6 +16,9 @@ type RuntimeObservableSet = {
processMemoryHeapTotal: ObservableGauge<RuntimeMetricAttributes>;
processMemoryExternal: ObservableGauge<RuntimeMetricAttributes>;
processMemoryArrayBuffers: ObservableGauge<RuntimeMetricAttributes>;
processCpuUser: ObservableGauge<RuntimeMetricAttributes>;
processCpuSystem: ObservableGauge<RuntimeMetricAttributes>;
processCpuUtilization: ObservableGauge<RuntimeMetricAttributes>;
processUptime: ObservableGauge<RuntimeMetricAttributes>;
};
@@ -25,6 +29,9 @@ let runtimeMeter: Meter | undefined;
let runtimeObservables: Observable<RuntimeMetricAttributes>[] = [];
let runtimeMetricsCallback: BatchObservableCallback<RuntimeMetricAttributes> | undefined;
let previousCpuUsage: NodeJS.CpuUsage | undefined;
let previousCpuTimestamp: number | undefined;
function createRuntimeObservables(): RuntimeObservableSet {
const meter = getMeter('fastgpt.runtime');
@@ -50,6 +57,18 @@ function createRuntimeObservables(): RuntimeObservableSet {
description: 'Memory allocated for ArrayBuffer and SharedArrayBuffer instances',
unit: 'By'
}),
processCpuUser: meter.createObservableGauge(`${prefix}.cpu.user`, {
description: 'Cumulative user CPU time of the current process',
unit: 'us'
}),
processCpuSystem: meter.createObservableGauge(`${prefix}.cpu.system`, {
description: 'Cumulative system CPU time of the current process',
unit: 'us'
}),
processCpuUtilization: meter.createObservableGauge(`${prefix}.cpu.utilization`, {
description: 'CPU utilization ratio of the current process (0~1, across all cores)',
unit: '1'
}),
processUptime: meter.createObservableGauge(`${prefix}.uptime`, {
description: 'Process uptime',
unit: 's'
@@ -69,6 +88,9 @@ export function startRuntimeMetrics() {
observables.processMemoryHeapTotal,
observables.processMemoryExternal,
observables.processMemoryArrayBuffers,
observables.processCpuUser,
observables.processCpuSystem,
observables.processCpuUtilization,
observables.processUptime
];
runtimeMetricsCallback = (result) => {
@@ -79,6 +101,29 @@ export function startRuntimeMetrics() {
result.observe(observables.processMemoryHeapTotal, memoryUsage.heapTotal);
result.observe(observables.processMemoryExternal, memoryUsage.external);
result.observe(observables.processMemoryArrayBuffers, memoryUsage.arrayBuffers);
const currentCpuUsage = process.cpuUsage();
const currentTimestamp = Date.now();
result.observe(observables.processCpuUser, currentCpuUsage.user);
result.observe(observables.processCpuSystem, currentCpuUsage.system);
if (previousCpuUsage && previousCpuTimestamp) {
const elapsedUs = (currentTimestamp - previousCpuTimestamp) * 1000;
if (elapsedUs > 0) {
const cpuDeltaUs =
currentCpuUsage.user -
previousCpuUsage.user +
(currentCpuUsage.system - previousCpuUsage.system);
const coreCount = cpus().length || 1;
const utilization = cpuDeltaUs / (elapsedUs * coreCount);
result.observe(observables.processCpuUtilization, Math.min(1, Math.max(0, utilization)));
}
}
previousCpuUsage = currentCpuUsage;
previousCpuTimestamp = currentTimestamp;
result.observe(observables.processUptime, process.uptime());
};
@@ -95,4 +140,6 @@ export function stopRuntimeMetrics() {
runtimeMeter = undefined;
runtimeObservables = [];
runtimeMetricsCallback = undefined;
previousCpuUsage = undefined;
previousCpuTimestamp = undefined;
}

View File

@@ -29,7 +29,7 @@ export type ActiveSpanOptions = {
attributes?: Record<string, unknown>;
};
const DEFAULT_PRODUCTION_TRACING_SAMPLE_RATIO = 0.05;
const DEFAULT_PRODUCTION_TRACING_SAMPLE_RATIO = 0.01;
const DEFAULT_NON_PRODUCTION_TRACING_SAMPLE_RATIO = 1;
function getDefaultTracingSampleRatio() {

View File

@@ -36,7 +36,7 @@ export const env = createEnv({
LOG_OTEL_URL: z.url().optional(),
METRICS_ENABLE_OTEL: BoolSchema.default(false),
METRICS_EXPORT_INTERVAL: z.coerce.number().int().positive().default(15000),
METRICS_EXPORT_INTERVAL: z.coerce.number().int().positive().default(30000),
METRICS_OTEL_SERVICE_NAME: z.string().default('fastgpt-client'),
METRICS_OTEL_URL: z.url().optional(),

12
pnpm-lock.yaml generated
View File

@@ -34,8 +34,8 @@ catalogs:
specifier: 0.1.2
version: 0.1.2
'@fastgpt-sdk/otel':
specifier: 0.1.0
version: 0.1.0
specifier: 0.1.2
version: 0.1.2
'@fastgpt-sdk/storage':
specifier: 0.6.15
version: 0.6.15
@@ -248,7 +248,7 @@ importers:
version: 11.7.2
'@fastgpt-sdk/otel':
specifier: 'catalog:'
version: 0.1.0
version: 0.1.2
'@fastgpt-sdk/sandbox-adapter':
specifier: ^0.0.34
version: 0.0.34
@@ -2728,8 +2728,8 @@ packages:
resolution: {integrity: sha512-nt1qCq7frcRiR+406vEERWC1vEPVIKPUGH/ZRP/mlBxvNJp1RycWQT8RhK7/tHmW6xPNZoRL/q2WfhM4Q+L7eg==}
engines: {node: '>=20', pnpm: '>=9'}
'@fastgpt-sdk/otel@0.1.0':
resolution: {integrity: sha512-wpZUcpoU4u1/UxC8R0KU7spZ1Ku2FbGerp4eBeOCgCDaiJGUbm3P7d1D97wzrPIK3QUgWj2g8N6dS0vrlryp2Q==}
'@fastgpt-sdk/otel@0.1.2':
resolution: {integrity: sha512-niJXceEdyZSWN/syE9aRlTb6Gk6Posez9j4eiuyoQWgF0wGH1IWDbC2c0tlSglZQn6ONY90HzyRsSLOGlpd9KQ==}
engines: {node: '>=20', pnpm: '>=9'}
'@fastgpt-sdk/plugin@0.3.8':
@@ -13724,7 +13724,7 @@ snapshots:
'@opentelemetry/sdk-logs': 0.203.0(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.39.0
'@fastgpt-sdk/otel@0.1.0':
'@fastgpt-sdk/otel@0.1.2':
dependencies:
'@logtape/logtape': 2.0.2
'@logtape/pretty': 2.0.2(@logtape/logtape@2.0.2)

View File

@@ -23,7 +23,7 @@ catalog:
'@emotion/react': ^11
'@emotion/styled': ^11
'@fastgpt-sdk/logger': 0.1.2
'@fastgpt-sdk/otel': 0.1.0
'@fastgpt-sdk/otel': 0.1.2
'@fastgpt-sdk/storage': 0.6.15
'@modelcontextprotocol/sdk': ^1
'@types/lodash': ^4

View File

@@ -1,7 +1,7 @@
{
"name": "@fastgpt-sdk/otel",
"private": false,
"version": "0.1.0",
"version": "0.1.2",
"description": "FastGPT SDK for OpenTelemetry observability",
"type": "module",
"main": "./dist/index.mjs",

View File

@@ -50,7 +50,7 @@ export function createLoggerOptionsFromEnv(
parseStringEnv(env.LOG_OTEL_URL) ??
options.defaultOtelUrl ??
'http://localhost:4318/v1/logs',
level: parseLogLevel(env.LOG_OTEL_LEVEL, options.defaultOtelLevel ?? 'info')
level: parseLogLevel(env.LOG_OTEL_LEVEL, options.defaultOtelLevel ?? 'warning')
}
: false,
sensitiveProperties: options.sensitiveProperties

View File

@@ -10,7 +10,7 @@ import {
import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http';
import type { Resource } from '@opentelemetry/resources';
import { defaultResource, resourceFromAttributes } from '@opentelemetry/resources';
import { LoggerProvider, SimpleLogRecordProcessor } from '@opentelemetry/sdk-logs';
import { BatchLogRecordProcessor, LoggerProvider } from '@opentelemetry/sdk-logs';
import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
import { inspect as nodeInspect } from 'util';
import { mapLevelToSeverityNumber } from './helpers';
@@ -88,7 +88,7 @@ async function initializeLoggerProvider(
const otlpExporter = new OTLPLogExporter(options.otlpExporterConfig);
const loggerProvider = new LoggerProvider({
resource,
processors: [new SimpleLogRecordProcessor(otlpExporter)]
processors: [new BatchLogRecordProcessor(otlpExporter)]
});
return loggerProvider;

View File

@@ -47,14 +47,14 @@ function normalizeMetricsOptions(options?: false | MetricsOptions) {
if (options === false) {
return {
enabled: false,
exportIntervalMillis: 15000
exportIntervalMillis: 30000
};
}
return {
enabled: options?.enabled ?? false,
serviceName: options?.serviceName,
exportIntervalMillis: options?.exportIntervalMillis ?? 15000,
exportIntervalMillis: options?.exportIntervalMillis ?? 30000,
otlpExporterConfig: {
url: options?.url,
headers: options?.headers

View File

@@ -40,7 +40,7 @@ export function createMetricsOptionsFromEnv(
options.defaultMetricsUrl,
exportIntervalMillis: parsePositiveNumberEnv(
env.METRICS_EXPORT_INTERVAL ?? env.OTEL_METRIC_EXPORT_INTERVAL,
options.defaultExportIntervalMillis ?? 15000
options.defaultExportIntervalMillis ?? 30000
)
}
: false