chore: add cpu metrics, lower frequency to report metrics and trace (#6693)

This commit is contained in:
Ryo
2026-04-01 11:24:50 +08:00
committed by GitHub
parent 04d2caa81a
commit 529e29e02e
10 changed files with 63 additions and 16 deletions
@@ -5,6 +5,7 @@ import type {
ObservableGauge ObservableGauge
} from '@opentelemetry/api'; } from '@opentelemetry/api';
import { getMeter } from '@fastgpt-sdk/otel/metrics'; import { getMeter } from '@fastgpt-sdk/otel/metrics';
import { cpus } from 'os';
type RuntimeMetricAttributes = Record<string, never>; type RuntimeMetricAttributes = Record<string, never>;
@@ -15,6 +16,9 @@ type RuntimeObservableSet = {
processMemoryHeapTotal: ObservableGauge<RuntimeMetricAttributes>; processMemoryHeapTotal: ObservableGauge<RuntimeMetricAttributes>;
processMemoryExternal: ObservableGauge<RuntimeMetricAttributes>; processMemoryExternal: ObservableGauge<RuntimeMetricAttributes>;
processMemoryArrayBuffers: ObservableGauge<RuntimeMetricAttributes>; processMemoryArrayBuffers: ObservableGauge<RuntimeMetricAttributes>;
processCpuUser: ObservableGauge<RuntimeMetricAttributes>;
processCpuSystem: ObservableGauge<RuntimeMetricAttributes>;
processCpuUtilization: ObservableGauge<RuntimeMetricAttributes>;
processUptime: ObservableGauge<RuntimeMetricAttributes>; processUptime: ObservableGauge<RuntimeMetricAttributes>;
}; };
@@ -25,6 +29,9 @@ let runtimeMeter: Meter | undefined;
let runtimeObservables: Observable<RuntimeMetricAttributes>[] = []; let runtimeObservables: Observable<RuntimeMetricAttributes>[] = [];
let runtimeMetricsCallback: BatchObservableCallback<RuntimeMetricAttributes> | undefined; let runtimeMetricsCallback: BatchObservableCallback<RuntimeMetricAttributes> | undefined;
let previousCpuUsage: NodeJS.CpuUsage | undefined;
let previousCpuTimestamp: number | undefined;
function createRuntimeObservables(): RuntimeObservableSet { function createRuntimeObservables(): RuntimeObservableSet {
const meter = getMeter('fastgpt.runtime'); const meter = getMeter('fastgpt.runtime');
@@ -50,6 +57,18 @@ function createRuntimeObservables(): RuntimeObservableSet {
description: 'Memory allocated for ArrayBuffer and SharedArrayBuffer instances', description: 'Memory allocated for ArrayBuffer and SharedArrayBuffer instances',
unit: 'By' unit: 'By'
}), }),
processCpuUser: meter.createObservableGauge(`${prefix}.cpu.user`, {
description: 'Cumulative user CPU time of the current process',
unit: 'us'
}),
processCpuSystem: meter.createObservableGauge(`${prefix}.cpu.system`, {
description: 'Cumulative system CPU time of the current process',
unit: 'us'
}),
processCpuUtilization: meter.createObservableGauge(`${prefix}.cpu.utilization`, {
description: 'CPU utilization ratio of the current process (0~1, across all cores)',
unit: '1'
}),
processUptime: meter.createObservableGauge(`${prefix}.uptime`, { processUptime: meter.createObservableGauge(`${prefix}.uptime`, {
description: 'Process uptime', description: 'Process uptime',
unit: 's' unit: 's'
@@ -69,6 +88,9 @@ export function startRuntimeMetrics() {
observables.processMemoryHeapTotal, observables.processMemoryHeapTotal,
observables.processMemoryExternal, observables.processMemoryExternal,
observables.processMemoryArrayBuffers, observables.processMemoryArrayBuffers,
observables.processCpuUser,
observables.processCpuSystem,
observables.processCpuUtilization,
observables.processUptime observables.processUptime
]; ];
runtimeMetricsCallback = (result) => { runtimeMetricsCallback = (result) => {
@@ -79,6 +101,29 @@ export function startRuntimeMetrics() {
result.observe(observables.processMemoryHeapTotal, memoryUsage.heapTotal); result.observe(observables.processMemoryHeapTotal, memoryUsage.heapTotal);
result.observe(observables.processMemoryExternal, memoryUsage.external); result.observe(observables.processMemoryExternal, memoryUsage.external);
result.observe(observables.processMemoryArrayBuffers, memoryUsage.arrayBuffers); result.observe(observables.processMemoryArrayBuffers, memoryUsage.arrayBuffers);
const currentCpuUsage = process.cpuUsage();
const currentTimestamp = Date.now();
result.observe(observables.processCpuUser, currentCpuUsage.user);
result.observe(observables.processCpuSystem, currentCpuUsage.system);
if (previousCpuUsage && previousCpuTimestamp) {
const elapsedUs = (currentTimestamp - previousCpuTimestamp) * 1000;
if (elapsedUs > 0) {
const cpuDeltaUs =
currentCpuUsage.user -
previousCpuUsage.user +
(currentCpuUsage.system - previousCpuUsage.system);
const coreCount = cpus().length || 1;
const utilization = cpuDeltaUs / (elapsedUs * coreCount);
result.observe(observables.processCpuUtilization, Math.min(1, Math.max(0, utilization)));
}
}
previousCpuUsage = currentCpuUsage;
previousCpuTimestamp = currentTimestamp;
result.observe(observables.processUptime, process.uptime()); result.observe(observables.processUptime, process.uptime());
}; };
@@ -95,4 +140,6 @@ export function stopRuntimeMetrics() {
runtimeMeter = undefined; runtimeMeter = undefined;
runtimeObservables = []; runtimeObservables = [];
runtimeMetricsCallback = undefined; runtimeMetricsCallback = undefined;
previousCpuUsage = undefined;
previousCpuTimestamp = undefined;
} }
+1 -1
View File
@@ -29,7 +29,7 @@ export type ActiveSpanOptions = {
attributes?: Record<string, unknown>; attributes?: Record<string, unknown>;
}; };
const DEFAULT_PRODUCTION_TRACING_SAMPLE_RATIO = 0.05; const DEFAULT_PRODUCTION_TRACING_SAMPLE_RATIO = 0.01;
const DEFAULT_NON_PRODUCTION_TRACING_SAMPLE_RATIO = 1; const DEFAULT_NON_PRODUCTION_TRACING_SAMPLE_RATIO = 1;
function getDefaultTracingSampleRatio() { function getDefaultTracingSampleRatio() {
+1 -1
View File
@@ -36,7 +36,7 @@ export const env = createEnv({
LOG_OTEL_URL: z.url().optional(), LOG_OTEL_URL: z.url().optional(),
METRICS_ENABLE_OTEL: BoolSchema.default(false), METRICS_ENABLE_OTEL: BoolSchema.default(false),
METRICS_EXPORT_INTERVAL: z.coerce.number().int().positive().default(15000), METRICS_EXPORT_INTERVAL: z.coerce.number().int().positive().default(30000),
METRICS_OTEL_SERVICE_NAME: z.string().default('fastgpt-client'), METRICS_OTEL_SERVICE_NAME: z.string().default('fastgpt-client'),
METRICS_OTEL_URL: z.url().optional(), METRICS_OTEL_URL: z.url().optional(),
+6 -6
View File
@@ -34,8 +34,8 @@ catalogs:
specifier: 0.1.2 specifier: 0.1.2
version: 0.1.2 version: 0.1.2
'@fastgpt-sdk/otel': '@fastgpt-sdk/otel':
specifier: 0.1.0 specifier: 0.1.2
version: 0.1.0 version: 0.1.2
'@fastgpt-sdk/storage': '@fastgpt-sdk/storage':
specifier: 0.6.15 specifier: 0.6.15
version: 0.6.15 version: 0.6.15
@@ -248,7 +248,7 @@ importers:
version: 11.7.2 version: 11.7.2
'@fastgpt-sdk/otel': '@fastgpt-sdk/otel':
specifier: 'catalog:' specifier: 'catalog:'
version: 0.1.0 version: 0.1.2
'@fastgpt-sdk/sandbox-adapter': '@fastgpt-sdk/sandbox-adapter':
specifier: ^0.0.34 specifier: ^0.0.34
version: 0.0.34 version: 0.0.34
@@ -2728,8 +2728,8 @@ packages:
resolution: {integrity: sha512-nt1qCq7frcRiR+406vEERWC1vEPVIKPUGH/ZRP/mlBxvNJp1RycWQT8RhK7/tHmW6xPNZoRL/q2WfhM4Q+L7eg==} resolution: {integrity: sha512-nt1qCq7frcRiR+406vEERWC1vEPVIKPUGH/ZRP/mlBxvNJp1RycWQT8RhK7/tHmW6xPNZoRL/q2WfhM4Q+L7eg==}
engines: {node: '>=20', pnpm: '>=9'} engines: {node: '>=20', pnpm: '>=9'}
'@fastgpt-sdk/otel@0.1.0': '@fastgpt-sdk/otel@0.1.2':
resolution: {integrity: sha512-wpZUcpoU4u1/UxC8R0KU7spZ1Ku2FbGerp4eBeOCgCDaiJGUbm3P7d1D97wzrPIK3QUgWj2g8N6dS0vrlryp2Q==} resolution: {integrity: sha512-niJXceEdyZSWN/syE9aRlTb6Gk6Posez9j4eiuyoQWgF0wGH1IWDbC2c0tlSglZQn6ONY90HzyRsSLOGlpd9KQ==}
engines: {node: '>=20', pnpm: '>=9'} engines: {node: '>=20', pnpm: '>=9'}
'@fastgpt-sdk/plugin@0.3.8': '@fastgpt-sdk/plugin@0.3.8':
@@ -13724,7 +13724,7 @@ snapshots:
'@opentelemetry/sdk-logs': 0.203.0(@opentelemetry/api@1.9.0) '@opentelemetry/sdk-logs': 0.203.0(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.39.0 '@opentelemetry/semantic-conventions': 1.39.0
'@fastgpt-sdk/otel@0.1.0': '@fastgpt-sdk/otel@0.1.2':
dependencies: dependencies:
'@logtape/logtape': 2.0.2 '@logtape/logtape': 2.0.2
'@logtape/pretty': 2.0.2(@logtape/logtape@2.0.2) '@logtape/pretty': 2.0.2(@logtape/logtape@2.0.2)
+1 -1
View File
@@ -23,7 +23,7 @@ catalog:
'@emotion/react': ^11 '@emotion/react': ^11
'@emotion/styled': ^11 '@emotion/styled': ^11
'@fastgpt-sdk/logger': 0.1.2 '@fastgpt-sdk/logger': 0.1.2
'@fastgpt-sdk/otel': 0.1.0 '@fastgpt-sdk/otel': 0.1.2
'@fastgpt-sdk/storage': 0.6.15 '@fastgpt-sdk/storage': 0.6.15
'@modelcontextprotocol/sdk': ^1 '@modelcontextprotocol/sdk': ^1
'@types/lodash': ^4 '@types/lodash': ^4
+1 -1
View File
@@ -1,7 +1,7 @@
{ {
"name": "@fastgpt-sdk/otel", "name": "@fastgpt-sdk/otel",
"private": false, "private": false,
"version": "0.1.0", "version": "0.1.2",
"description": "FastGPT SDK for OpenTelemetry observability", "description": "FastGPT SDK for OpenTelemetry observability",
"type": "module", "type": "module",
"main": "./dist/index.mjs", "main": "./dist/index.mjs",
+1 -1
View File
@@ -50,7 +50,7 @@ export function createLoggerOptionsFromEnv(
parseStringEnv(env.LOG_OTEL_URL) ?? parseStringEnv(env.LOG_OTEL_URL) ??
options.defaultOtelUrl ?? options.defaultOtelUrl ??
'http://localhost:4318/v1/logs', 'http://localhost:4318/v1/logs',
level: parseLogLevel(env.LOG_OTEL_LEVEL, options.defaultOtelLevel ?? 'info') level: parseLogLevel(env.LOG_OTEL_LEVEL, options.defaultOtelLevel ?? 'warning')
} }
: false, : false,
sensitiveProperties: options.sensitiveProperties sensitiveProperties: options.sensitiveProperties
+2 -2
View File
@@ -10,7 +10,7 @@ import {
import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http'; import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http';
import type { Resource } from '@opentelemetry/resources'; import type { Resource } from '@opentelemetry/resources';
import { defaultResource, resourceFromAttributes } from '@opentelemetry/resources'; import { defaultResource, resourceFromAttributes } from '@opentelemetry/resources';
import { LoggerProvider, SimpleLogRecordProcessor } from '@opentelemetry/sdk-logs'; import { BatchLogRecordProcessor, LoggerProvider } from '@opentelemetry/sdk-logs';
import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
import { inspect as nodeInspect } from 'util'; import { inspect as nodeInspect } from 'util';
import { mapLevelToSeverityNumber } from './helpers'; import { mapLevelToSeverityNumber } from './helpers';
@@ -88,7 +88,7 @@ async function initializeLoggerProvider(
const otlpExporter = new OTLPLogExporter(options.otlpExporterConfig); const otlpExporter = new OTLPLogExporter(options.otlpExporterConfig);
const loggerProvider = new LoggerProvider({ const loggerProvider = new LoggerProvider({
resource, resource,
processors: [new SimpleLogRecordProcessor(otlpExporter)] processors: [new BatchLogRecordProcessor(otlpExporter)]
}); });
return loggerProvider; return loggerProvider;
+2 -2
View File
@@ -47,14 +47,14 @@ function normalizeMetricsOptions(options?: false | MetricsOptions) {
if (options === false) { if (options === false) {
return { return {
enabled: false, enabled: false,
exportIntervalMillis: 15000 exportIntervalMillis: 30000
}; };
} }
return { return {
enabled: options?.enabled ?? false, enabled: options?.enabled ?? false,
serviceName: options?.serviceName, serviceName: options?.serviceName,
exportIntervalMillis: options?.exportIntervalMillis ?? 15000, exportIntervalMillis: options?.exportIntervalMillis ?? 30000,
otlpExporterConfig: { otlpExporterConfig: {
url: options?.url, url: options?.url,
headers: options?.headers headers: options?.headers
+1 -1
View File
@@ -40,7 +40,7 @@ export function createMetricsOptionsFromEnv(
options.defaultMetricsUrl, options.defaultMetricsUrl,
exportIntervalMillis: parsePositiveNumberEnv( exportIntervalMillis: parsePositiveNumberEnv(
env.METRICS_EXPORT_INTERVAL ?? env.OTEL_METRIC_EXPORT_INTERVAL, env.METRICS_EXPORT_INTERVAL ?? env.OTEL_METRIC_EXPORT_INTERVAL,
options.defaultExportIntervalMillis ?? 15000 options.defaultExportIntervalMillis ?? 30000
) )
} }
: false : false