mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-01 18:25:59 +00:00

* Add final newlines and remove trailing spaces * Replace TAB characters with spaces * Fix indent
1215 lines
43 KiB
Perl
1215 lines
43 KiB
Perl
# This program is copyright 2008-2011 Percona Ireland Ltd.
|
|
# Feedback and improvements are welcome.
|
|
#
|
|
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
# licenses.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
# ###########################################################################
|
|
# EventAggregator package
|
|
# ###########################################################################
|
|
{
|
|
# Package: EventAggregator
|
|
# EventAggregator aggregates event values and calculates basic statistics.
|
|
package EventAggregator;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
use List::Util qw(min max);
|
|
use Data::Dumper;
|
|
$Data::Dumper::Indent = 1;
|
|
$Data::Dumper::Sortkeys = 1;
|
|
$Data::Dumper::Quotekeys = 0;
|
|
|
|
# ###########################################################################
|
|
# Set up some constants for bucketing values. It is impossible to keep all
|
|
# values seen in memory, but putting them into logarithmically scaled buckets
|
|
# and just incrementing the bucket each time works, although it is imprecise.
|
|
# See http://code.google.com/p/maatkit/wiki/EventAggregatorInternals.
|
|
# ###########################################################################
|
|
use constant BUCK_SIZE => 1.05;
|
|
use constant BASE_LOG => log(BUCK_SIZE);
|
|
use constant BASE_OFFSET => abs(1 - log(0.000001) / BASE_LOG); # 284.1617969
|
|
use constant NUM_BUCK => 1000;
|
|
use constant MIN_BUCK => .000001;
|
|
|
|
# Used in buckets_of() to map buckets of log10 to log1.05 buckets.
|
|
my @buck_vals = map { bucket_value($_); } (0..NUM_BUCK-1);
|
|
|
|
# Sub: new
|
|
#
|
|
# Parameters:
|
|
# %args - Arguments
|
|
#
|
|
# Required Arguments:
|
|
# groupby - Attribute to group/aggregate classes by.
|
|
# worst - Attribute which defines the worst event in a class.
|
|
# Samples of the worst attribute are saved.
|
|
#
|
|
# Optional Arguments:
|
|
# attributes - Hashref of attributes to aggregate. Keys are attribute
|
|
# names used in the EventAggregator object and values are
|
|
# attribute names used to get values from events.
|
|
# Multiple attrib names in the arrayref specify alternate
|
|
# attrib names in the event. Example:
|
|
# Fruit => ['apple', 'orange']. An attrib called "Fruit"
|
|
# will be created using the event's "apple" value or,
|
|
# if that attrib doesn't exist, its "orange" value.
|
|
# If this option isn't specified, then then all attributes# are auto-detected and aggregated.
|
|
# ignore_attributes - Arrayref of auto-detected attributes to ignore.
|
|
# This does not apply to the attributes specified
|
|
# with the optional attributes option above.
|
|
# unroll_limit - If this many events have been processed and some
|
|
# handlers haven't been generated yet (due to lack
|
|
# of sample data), unroll the loop anyway. (default 1000)
|
|
# attrib_limit - Sanity limit for attribute values. If the value
|
|
# exceeds the limit, use the last-seen for this class;
|
|
# if none, then 0.
|
|
# type_for - Hashref of attribute=>type pairs. See $type in
|
|
# <make_handler()> for the list of types.
|
|
#
|
|
# Returns:
|
|
# EventAggregator object
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
foreach my $arg ( qw(groupby worst) ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
my $attributes = $args{attributes} || {};
|
|
my $self = {
|
|
groupby => $args{groupby},
|
|
detect_attribs => scalar keys %$attributes == 0 ? 1 : 0,
|
|
all_attribs => [ keys %$attributes ],
|
|
ignore_attribs => {
|
|
map { $_ => $args{attributes}->{$_} }
|
|
grep { $_ ne $args{groupby} }
|
|
@{$args{ignore_attributes}}
|
|
},
|
|
attributes => {
|
|
map { $_ => $args{attributes}->{$_} }
|
|
grep { $_ ne $args{groupby} }
|
|
keys %$attributes
|
|
},
|
|
alt_attribs => {
|
|
map { $_ => make_alt_attrib(@{$args{attributes}->{$_}}) }
|
|
grep { $_ ne $args{groupby} }
|
|
keys %$attributes
|
|
},
|
|
worst => $args{worst},
|
|
unroll_limit => $ENV{PT_QUERY_DIGEST_CHECK_ATTRIB_LIMIT} || 1000,
|
|
attrib_limit => $args{attrib_limit},
|
|
result_classes => {},
|
|
result_globals => {},
|
|
result_samples => {},
|
|
class_metrics => {},
|
|
global_metrics => {},
|
|
n_events => 0,
|
|
unrolled_loops => undef,
|
|
type_for => { %{$args{type_for} || { Query_time => 'num' }} },
|
|
};
|
|
return bless $self, $class;
|
|
}
|
|
|
|
# Delete all collected data, but don't delete things like the generated
|
|
# subroutines. Resetting aggregated data is an interesting little exercise.
|
|
# The generated functions that do aggregation have private namespaces with
|
|
# references to some of the data. Thus, they will not necessarily do as
|
|
# expected if the stored data is simply wiped out. Instead, it needs to be
|
|
# zeroed out without replacing the actual objects.
|
|
sub reset_aggregated_data {
|
|
my ( $self ) = @_;
|
|
foreach my $class ( values %{$self->{result_classes}} ) {
|
|
foreach my $attrib ( values %$class ) {
|
|
delete @{$attrib}{keys %$attrib};
|
|
}
|
|
}
|
|
foreach my $class ( values %{$self->{result_globals}} ) {
|
|
delete @{$class}{keys %$class};
|
|
}
|
|
delete @{$self->{result_samples}}{keys %{$self->{result_samples}}};
|
|
$self->{n_events} = 0;
|
|
}
|
|
|
|
# Aggregate an event hashref's properties. Code is built on the fly to do this,
|
|
# based on the values being passed in. After code is built for every attribute
|
|
# (or 50 events are seen and we decide to give up) the little bits of code get
|
|
# unrolled into a whole subroutine to handle events. For that reason, you can't
|
|
# re-use an instance.
|
|
sub aggregate {
|
|
my ( $self, $event ) = @_;
|
|
|
|
my $group_by = $event->{$self->{groupby}};
|
|
return unless defined $group_by;
|
|
|
|
$self->{n_events}++;
|
|
PTDEBUG && _d('Event', $self->{n_events});
|
|
|
|
# Run only unrolled loops if available.
|
|
return $self->{unrolled_loops}->($self, $event, $group_by)
|
|
if $self->{unrolled_loops};
|
|
|
|
# For the first unroll_limit events, auto-detect new attribs and
|
|
# run attrib handlers.
|
|
if ( $self->{n_events} <= $self->{unroll_limit} ) {
|
|
|
|
$self->add_new_attributes($event) if $self->{detect_attribs};
|
|
|
|
ATTRIB:
|
|
foreach my $attrib ( keys %{$self->{attributes}} ) {
|
|
|
|
# Attrib auto-detection can add a lot of attributes which some events
|
|
# may or may not have. Aggregating a nonexistent attrib is wasteful,
|
|
# so we check that the attrib or one of its alternates exists. If
|
|
# one does, then we leave attrib alone because the handler sub will
|
|
# also check alternates.
|
|
if ( !exists $event->{$attrib} ) {
|
|
PTDEBUG && _d("attrib doesn't exist in event:", $attrib);
|
|
my $alt_attrib = $self->{alt_attribs}->{$attrib}->($event);
|
|
PTDEBUG && _d('alt attrib:', $alt_attrib);
|
|
next ATTRIB unless $alt_attrib;
|
|
}
|
|
|
|
# The value of the attribute ( $group_by ) may be an arrayref.
|
|
GROUPBY:
|
|
foreach my $val ( ref $group_by ? @$group_by : ($group_by) ) {
|
|
my $class_attrib = $self->{result_classes}->{$val}->{$attrib} ||= {};
|
|
my $global_attrib = $self->{result_globals}->{$attrib} ||= {};
|
|
my $samples = $self->{result_samples};
|
|
my $handler = $self->{handlers}->{ $attrib };
|
|
if ( !$handler ) {
|
|
$handler = $self->make_handler(
|
|
event => $event,
|
|
attribute => $attrib,
|
|
alternates => $self->{attributes}->{$attrib},
|
|
worst => $self->{worst} eq $attrib,
|
|
);
|
|
$self->{handlers}->{$attrib} = $handler;
|
|
}
|
|
next GROUPBY unless $handler;
|
|
$samples->{$val} ||= $event; # Initialize to the first event.
|
|
$handler->($event, $class_attrib, $global_attrib, $samples, $group_by);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
# After unroll_limit events, unroll the loops.
|
|
$self->_make_unrolled_loops($event);
|
|
# Run unrolled loops here once. Next time, they'll be ran
|
|
# before this if-else.
|
|
$self->{unrolled_loops}->($self, $event, $group_by);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
sub _make_unrolled_loops {
|
|
my ( $self, $event ) = @_;
|
|
|
|
my $group_by = $event->{$self->{groupby}};
|
|
|
|
# All attributes have handlers, so let's combine them into one faster sub.
|
|
# Start by getting direct handles to the location of each data store and
|
|
# thing that would otherwise be looked up via hash keys.
|
|
my @attrs = grep { $self->{handlers}->{$_} } keys %{$self->{attributes}};
|
|
my $globs = $self->{result_globals}; # Global stats for each
|
|
my $samples = $self->{result_samples};
|
|
|
|
# Now the tricky part -- must make sure only the desired variables from
|
|
# the outer scope are re-used, and any variables that should have their
|
|
# own scope are declared within the subroutine.
|
|
my @lines = (
|
|
'my ( $self, $event, $group_by ) = @_;',
|
|
'my ($val, $class, $global, $idx);',
|
|
(ref $group_by ? ('foreach my $group_by ( @$group_by ) {') : ()),
|
|
# Create and get each attribute's storage
|
|
'my $temp = $self->{result_classes}->{ $group_by }
|
|
||= { map { $_ => { } } @attrs };',
|
|
'$samples->{$group_by} ||= $event;', # Always start with the first.
|
|
);
|
|
foreach my $i ( 0 .. $#attrs ) {
|
|
# Access through array indexes, it's faster than hash lookups
|
|
push @lines, (
|
|
'$class = $temp->{\'' . $attrs[$i] . '\'};',
|
|
'$global = $globs->{\'' . $attrs[$i] . '\'};',
|
|
$self->{unrolled_for}->{$attrs[$i]},
|
|
);
|
|
}
|
|
if ( ref $group_by ) {
|
|
push @lines, '}'; # Close the loop opened above
|
|
}
|
|
@lines = map { s/^/ /gm; $_ } @lines; # Indent for debugging
|
|
unshift @lines, 'sub {';
|
|
push @lines, '}';
|
|
|
|
# Make the subroutine.
|
|
my $code = join("\n", @lines);
|
|
PTDEBUG && _d('Unrolled subroutine:', @lines);
|
|
my $sub = eval $code;
|
|
die $EVAL_ERROR if $EVAL_ERROR;
|
|
$self->{unrolled_loops} = $sub;
|
|
|
|
return;
|
|
}
|
|
|
|
# Return the aggregated results.
|
|
sub results {
|
|
my ( $self ) = @_;
|
|
return {
|
|
classes => $self->{result_classes},
|
|
globals => $self->{result_globals},
|
|
samples => $self->{result_samples},
|
|
};
|
|
}
|
|
|
|
sub set_results {
|
|
my ( $self, $results ) = @_;
|
|
$self->{result_classes} = $results->{classes};
|
|
$self->{result_globals} = $results->{globals};
|
|
$self->{result_samples} = $results->{samples};
|
|
return;
|
|
}
|
|
|
|
sub stats {
|
|
my ( $self ) = @_;
|
|
return {
|
|
classes => $self->{class_metrics},
|
|
globals => $self->{global_metrics},
|
|
};
|
|
}
|
|
|
|
# Return the attributes that this object is tracking, and their data types, as
|
|
# a hashref of name => type.
|
|
sub attributes {
|
|
my ( $self ) = @_;
|
|
return $self->{type_for};
|
|
}
|
|
|
|
sub set_attribute_types {
|
|
my ( $self, $attrib_types ) = @_;
|
|
$self->{type_for} = $attrib_types;
|
|
return;
|
|
}
|
|
|
|
# Returns the type of the attribute (as decided by the aggregation process,
|
|
# which inspects the values).
|
|
sub type_for {
|
|
my ( $self, $attrib ) = @_;
|
|
return $self->{type_for}->{$attrib};
|
|
}
|
|
|
|
# Sub: make_handler
|
|
# Make an attribute handler subroutine for <aggregate()>. Each attribute
|
|
# needs a handler to keep trach of the min and max values, worst sample, etc.
|
|
# Handlers differ depending on the type of attribute (num, bool or string).
|
|
#
|
|
# Parameters:
|
|
# %args - Arguments
|
|
#
|
|
# Required Arguments:
|
|
# event - Event hashref
|
|
# attribute - Attribute name
|
|
#
|
|
# Optional Arguments:
|
|
# alternates - Arrayref of alternate names for the attribute
|
|
# worst - Keep a sample of the attribute's worst value (default no)
|
|
#
|
|
# Returns:
|
|
# A subroutine that can aggregate the attribute.
|
|
sub make_handler {
|
|
my ( $self, %args ) = @_;
|
|
my @required_args = qw(event attribute);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
my ($event, $attrib) = @args{@required_args};
|
|
|
|
my $val;
|
|
eval { $val= $self->_get_value(%args); };
|
|
if ( $EVAL_ERROR ) {
|
|
PTDEBUG && _d("Cannot make", $attrib, "handler:", $EVAL_ERROR);
|
|
return;
|
|
}
|
|
return unless defined $val; # can't determine type if it's undef
|
|
|
|
# Ripped off from Regexp::Common::number and modified.
|
|
my $float_re = qr{[+-]?(?:(?=\d|[.])\d+(?:[.])\d{0,})(?:E[+-]?\d+)?}i;
|
|
my $type = $self->type_for($attrib) ? $self->type_for($attrib)
|
|
: $attrib =~ m/_crc$/ ? 'string'
|
|
: $val =~ m/^(?:\d+|$float_re)$/o ? 'num'
|
|
: $val =~ m/^(?:Yes|No)$/ ? 'bool'
|
|
: 'string';
|
|
PTDEBUG && _d('Type for', $attrib, 'is', $type, '(sample:', $val, ')');
|
|
$self->{type_for}->{$attrib} = $type;
|
|
|
|
# ########################################################################
|
|
# Begin creating the handler subroutine by writing lines of code.
|
|
# ########################################################################
|
|
my @lines;
|
|
|
|
# Some attrib types don't need us to track sum, unq or all--and some do.
|
|
my %track = (
|
|
sum => $type =~ m/num|bool/ ? 1 : 0, # sum of values
|
|
unq => $type =~ m/bool|string/ ? 1 : 0, # count of unique values seen
|
|
all => $type eq 'num' ? 1 : 0, # all values in bucketed list
|
|
);
|
|
|
|
# First, do any transformations to the value if needed. Right now,
|
|
# it's just bool type attribs that need to be transformed.
|
|
my $trf = ($type eq 'bool') ? q{(($val || '') eq 'Yes') ? 1 : 0}
|
|
: undef;
|
|
if ( $trf ) {
|
|
push @lines, q{$val = } . $trf . ';';
|
|
}
|
|
|
|
# Handle broken Query_time like 123.124345.8382 (issue 234).
|
|
if ( $attrib eq 'Query_time' ) {
|
|
push @lines, (
|
|
'$val =~ s/^(\d+(?:\.\d+)?).*/$1/;',
|
|
'$event->{\''.$attrib.'\'} = $val;',
|
|
);
|
|
}
|
|
|
|
# Make sure the value is constrained to legal limits. If it's out of
|
|
# bounds, just use the last-seen value for it.
|
|
if ( $type eq 'num' && $self->{attrib_limit} ) {
|
|
push @lines, (
|
|
"if ( \$val > $self->{attrib_limit} ) {",
|
|
' $val = $class->{last} ||= 0;',
|
|
'}',
|
|
'$class->{last} = $val;',
|
|
);
|
|
}
|
|
|
|
# Update values for this attrib in the class and global stores. We write
|
|
# code for each store. The placeholder word PLACE is replaced with either
|
|
# $class or $global at the end of the loop.
|
|
my $lt = $type eq 'num' ? '<' : 'lt';
|
|
my $gt = $type eq 'num' ? '>' : 'gt';
|
|
foreach my $place ( qw($class $global) ) {
|
|
my @tmp; # hold lines until PLACE placeholder is replaced
|
|
|
|
# Track count of any and all values seen for this attribute.
|
|
# This is mostly used for class->Query_time->cnt which represents
|
|
# the number of queries in the class because all queries (should)
|
|
# have a Query_time attribute.
|
|
push @tmp, '++PLACE->{cnt};'; # count of all values seen
|
|
|
|
# CRC attribs are bucketed in 1k buckets by % 1_000. We must
|
|
# convert the val early so min and max don't show, e.g. 996791064
|
|
# whereas unq will contain 64 (996791064 % 1_000).
|
|
if ( $attrib =~ m/_crc$/ ) {
|
|
push @tmp, '$val = $val % 1_000;';
|
|
}
|
|
|
|
# Track min, max and sum of values. Min and max for strings is
|
|
# mostly used for timestamps; min ts is earliest and max ts is latest.
|
|
push @tmp, (
|
|
'PLACE->{min} = $val if !defined PLACE->{min} || $val '
|
|
. $lt . ' PLACE->{min};',
|
|
);
|
|
push @tmp, (
|
|
'PLACE->{max} = $val if !defined PLACE->{max} || $val '
|
|
. $gt . ' PLACE->{max};',
|
|
);
|
|
if ( $track{sum} ) {
|
|
push @tmp, 'PLACE->{sum} += $val;';
|
|
}
|
|
|
|
# Save all values in a bucketed list. See bucket_idx() below.
|
|
if ( $track{all} ) {
|
|
push @tmp, (
|
|
'exists PLACE->{all} or PLACE->{all} = {};',
|
|
'++PLACE->{all}->{ EventAggregator::bucket_idx($val) };',
|
|
);
|
|
}
|
|
|
|
# Replace PLACE with current variable, $class or $global.
|
|
push @lines, map { s/PLACE/$place/g; $_ } @tmp;
|
|
}
|
|
|
|
# We only save unique and worst values for the class, not globally.
|
|
if ( $track{unq} ) {
|
|
push @lines, '++$class->{unq}->{$val}';
|
|
}
|
|
|
|
if ( $args{worst} ) {
|
|
my $op = $type eq 'num' ? '>=' : 'ge';
|
|
push @lines, (
|
|
'if ( $val ' . $op . ' ($class->{max} || 0) ) {',
|
|
' $samples->{$group_by} = $event;',
|
|
'}',
|
|
);
|
|
}
|
|
|
|
# Make the core code. This part is saved for later, as part of an
|
|
# "unrolled" subroutine.
|
|
my @unrolled = (
|
|
# Get $val from primary attrib name.
|
|
"\$val = \$event->{'$attrib'};",
|
|
|
|
# Get $val from alternate attrib names.
|
|
( map { "\$val = \$event->{'$_'} unless defined \$val;" }
|
|
grep { $_ ne $attrib } @{$args{alternates}}
|
|
),
|
|
|
|
# Execute the code lines, if $val is defined.
|
|
'defined $val && do {',
|
|
@lines,
|
|
'};',
|
|
);
|
|
$self->{unrolled_for}->{$attrib} = join("\n", @unrolled);
|
|
|
|
# Finally, make a complete subroutine by wrapping the core code inside
|
|
# a "sub { ... }" template.
|
|
my @code = (
|
|
'sub {',
|
|
# Get args and define all variables.
|
|
'my ( $event, $class, $global, $samples, $group_by ) = @_;',
|
|
'my ($val, $idx);',
|
|
|
|
# Core code from above.
|
|
$self->{unrolled_for}->{$attrib},
|
|
|
|
'return;',
|
|
'}',
|
|
);
|
|
$self->{code_for}->{$attrib} = join("\n", @code);
|
|
PTDEBUG && _d($attrib, 'handler code:', $self->{code_for}->{$attrib});
|
|
my $sub = eval $self->{code_for}->{$attrib};
|
|
if ( $EVAL_ERROR ) {
|
|
die "Failed to compile $attrib handler code: $EVAL_ERROR";
|
|
}
|
|
|
|
return $sub;
|
|
}
|
|
|
|
# Sub: bucket_idx
|
|
# Return the bucket number for the given value. Buck numbers are zero-indexed,
|
|
# so although there are 1,000 buckets (NUM_BUCK), 999 is the greatest idx.
|
|
#
|
|
# Notice that this sub is not a class method, so either call it
|
|
# from inside this module like bucket_idx() or outside this module
|
|
# like EventAggregator::bucket_idx().
|
|
#
|
|
# The bucketed list works this way: each range of values from MIN_BUCK in
|
|
# increments of BUCK_SIZE (that is 5%) we consider a bucket. We keep NUM_BUCK
|
|
# buckets. The upper end of the range is more than 1.5e15 so it should be big
|
|
# enough for almost anything. The buckets are accessed by log base BUCK_SIZE,
|
|
# so floor(log(N)/log(BUCK_SIZE)). The smallest bucket's index is -284. We
|
|
# shift all values up 284 so we have values from 0 to 999 that can be used as
|
|
# array indexes. A value that falls into a bucket simply increments the array
|
|
# entry. We do NOT use POSIX::floor() because it is too expensive.
|
|
#
|
|
# This eliminates the need to keep and sort all values to calculate median,
|
|
# standard deviation, 95th percentile, etc. So memory usage is bounded by
|
|
# the number of distinct aggregated values, not the number of events.
|
|
#
|
|
# TODO: could export this by default to avoid having to specific package::.
|
|
#
|
|
# Parameters:
|
|
# $val - Numeric value to bucketize
|
|
#
|
|
# Returns:
|
|
# Bucket number (0 to NUM_BUCK-1) for the value
|
|
sub bucket_idx {
|
|
my ( $val ) = @_;
|
|
return 0 if $val < MIN_BUCK;
|
|
my $idx = int(BASE_OFFSET + log($val)/BASE_LOG);
|
|
return $idx > (NUM_BUCK-1) ? (NUM_BUCK-1) : $idx;
|
|
}
|
|
|
|
# Sub: bucket_value
|
|
# Return the value corresponding to the given bucket. The value of each
|
|
# bucket is the first value that it covers. So the value of bucket 1 is
|
|
# 0.000001000 because it covers [0.000001000, 0.000001050).
|
|
#
|
|
# Notice that this sub is not a class method, so either call it
|
|
# from inside this module like bucket_idx() or outside this module
|
|
# like EventAggregator::bucket_value().
|
|
#
|
|
# TODO: could export this by default to avoid having to specific package::.
|
|
#
|
|
# Parameters:
|
|
# $bucket - Bucket number (0 to NUM_BUCK-1)
|
|
#
|
|
# Returns:
|
|
# Numeric value corresponding to the bucket
|
|
sub bucket_value {
|
|
my ( $bucket ) = @_;
|
|
return 0 if $bucket == 0;
|
|
die "Invalid bucket: $bucket" if $bucket < 0 || $bucket > (NUM_BUCK-1);
|
|
# $bucket - 1 because buckets are shifted up by 1 to handle zero values.
|
|
return (BUCK_SIZE**($bucket-1)) * MIN_BUCK;
|
|
}
|
|
|
|
# Map the 1,000 base 1.05 buckets to 8 base 10 buckets. Returns an array
|
|
# of 1,000 buckets, the value of each represents its index in an 8 bucket
|
|
# base 10 array. For example: base 10 bucket 0 represents vals (0, 0.000010),
|
|
# and base 1.05 buckets 0..47 represent vals (0, 0.000010401). So the first
|
|
# 48 elements of the returned array will have 0 as their values.
|
|
# TODO: right now it's hardcoded to buckets of 10, in the future maybe not.
|
|
{
|
|
my @buck_tens;
|
|
sub buckets_of {
|
|
return @buck_tens if @buck_tens;
|
|
|
|
# To make a more precise map, we first set the starting values for
|
|
# each of the 8 base 10 buckets.
|
|
my $start_bucket = 0;
|
|
my @base10_starts = (0);
|
|
map { push @base10_starts, (10**$_)*MIN_BUCK } (1..7);
|
|
|
|
# Then find the base 1.05 buckets that correspond to each
|
|
# base 10 bucket. The last value in each bucket's range belongs
|
|
# to the next bucket, so $next_bucket-1 represents the real last
|
|
# base 1.05 bucket in which the base 10 bucket's range falls.
|
|
for my $base10_bucket ( 0..($#base10_starts-1) ) {
|
|
my $next_bucket = bucket_idx( $base10_starts[$base10_bucket+1] );
|
|
PTDEBUG && _d('Base 10 bucket', $base10_bucket, 'maps to',
|
|
'base 1.05 buckets', $start_bucket, '..', $next_bucket-1);
|
|
for my $base1_05_bucket ($start_bucket..($next_bucket-1)) {
|
|
$buck_tens[$base1_05_bucket] = $base10_bucket;
|
|
}
|
|
$start_bucket = $next_bucket;
|
|
}
|
|
|
|
# Map all remaining base 1.05 buckets to base 10 bucket 7 which
|
|
# is for vals > 10.
|
|
map { $buck_tens[$_] = 7 } ($start_bucket..(NUM_BUCK-1));
|
|
|
|
return @buck_tens;
|
|
}
|
|
}
|
|
|
|
# Calculate 95%, stddev and median for numeric attributes in the
|
|
# global and classes stores that have all values (1k buckets).
|
|
# Save the metrics in global_metrics and class_metrics.
|
|
sub calculate_statistical_metrics {
|
|
my ( $self, %args ) = @_;
|
|
my $classes = $self->{result_classes};
|
|
my $globals = $self->{result_globals};
|
|
my $class_metrics = $self->{class_metrics};
|
|
my $global_metrics = $self->{global_metrics};
|
|
PTDEBUG && _d('Calculating statistical_metrics');
|
|
foreach my $attrib ( keys %$globals ) {
|
|
if ( exists $globals->{$attrib}->{all} ) {
|
|
$global_metrics->{$attrib}
|
|
= $self->_calc_metrics(
|
|
$globals->{$attrib}->{all},
|
|
$globals->{$attrib},
|
|
);
|
|
}
|
|
|
|
foreach my $class ( keys %$classes ) {
|
|
if ( exists $classes->{$class}->{$attrib}->{all} ) {
|
|
$class_metrics->{$class}->{$attrib}
|
|
= $self->_calc_metrics(
|
|
$classes->{$class}->{$attrib}->{all},
|
|
$classes->{$class}->{$attrib}
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
# Given a hashref of vals, returns a hashref with the following
|
|
# statistical metrics:
|
|
#
|
|
# pct_95 => top bucket value in the 95th percentile
|
|
# cutoff => How many values fall into the 95th percentile
|
|
# stddev => of all values
|
|
# median => of all values
|
|
#
|
|
# The vals hashref represents the buckets as per the above (see the comments
|
|
# at the top of this file). $args should contain cnt, min and max properties.
|
|
sub _calc_metrics {
|
|
my ( $self, $vals, $args ) = @_;
|
|
my $statistical_metrics = {
|
|
pct_95 => 0,
|
|
stddev => 0,
|
|
median => 0,
|
|
cutoff => undef,
|
|
};
|
|
|
|
# These cases might happen when there is nothing to get from the event, for
|
|
# example, processlist sniffing doesn't gather Rows_examined, so $args won't
|
|
# have {cnt} or other properties.
|
|
return $statistical_metrics
|
|
unless defined $vals && %$vals && $args->{cnt};
|
|
|
|
# Return accurate metrics for some cases.
|
|
my $n_vals = $args->{cnt};
|
|
if ( $n_vals == 1 || $args->{max} == $args->{min} ) {
|
|
my $v = $args->{max} || 0;
|
|
my $bucket = int(6 + ( log($v > 0 ? $v : MIN_BUCK) / log(10)));
|
|
$bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
|
|
return {
|
|
pct_95 => $v,
|
|
stddev => 0,
|
|
median => $v,
|
|
cutoff => $n_vals,
|
|
};
|
|
}
|
|
elsif ( $n_vals == 2 ) {
|
|
foreach my $v ( $args->{min}, $args->{max} ) {
|
|
my $bucket = int(6 + ( log($v && $v > 0 ? $v : MIN_BUCK) / log(10)));
|
|
$bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
|
|
}
|
|
my $v = $args->{max} || 0;
|
|
my $mean = (($args->{min} || 0) + $v) / 2;
|
|
return {
|
|
pct_95 => $v,
|
|
stddev => sqrt((($v - $mean) ** 2) *2),
|
|
median => $mean,
|
|
cutoff => $n_vals,
|
|
};
|
|
}
|
|
|
|
# Determine cutoff point for 95% if there are at least 10 vals. Cutoff
|
|
# serves also for the number of vals left in the 95%. E.g. with 50 vals
|
|
# the cutoff is 47 which means there are 47 vals: 0..46. $cutoff is NOT
|
|
# an array index.
|
|
my $cutoff = $n_vals >= 10 ? int ( $n_vals * 0.95 ) : $n_vals;
|
|
$statistical_metrics->{cutoff} = $cutoff;
|
|
|
|
# Calculate the standard deviation and median of all values.
|
|
my $total_left = $n_vals;
|
|
my $top_vals = $n_vals - $cutoff; # vals > 95th
|
|
my $sum_excl = 0;
|
|
my $sum = 0;
|
|
my $sumsq = 0;
|
|
my $mid = int($n_vals / 2);
|
|
my $median = 0;
|
|
my $prev = NUM_BUCK-1; # Used for getting median when $cutoff is odd
|
|
my $bucket_95 = 0; # top bucket in 95th
|
|
|
|
PTDEBUG && _d('total vals:', $total_left, 'top vals:', $top_vals, 'mid:', $mid);
|
|
|
|
# In ancient times we kept an array of 1k buckets for each numeric
|
|
# attrib. Each such array cost 32_300 bytes of memory (that's not
|
|
# a typo; yes, it was verified). But measurements showed that only
|
|
# 1% of the buckets were used on average, meaning 99% of 32_300 was
|
|
# wasted. Now we store only the used buckets in a hashref which we
|
|
# map to a 1k bucket array for processing, so we don't have to tinker
|
|
# with the delitcate code below.
|
|
# http://code.google.com/p/maatkit/issues/detail?id=866
|
|
my @buckets = map { 0 } (0..NUM_BUCK-1);
|
|
map { $buckets[$_] = $vals->{$_} } keys %$vals;
|
|
$vals = \@buckets; # repoint vals from given hashref to our array
|
|
|
|
BUCKET:
|
|
for my $bucket ( reverse 0..(NUM_BUCK-1) ) {
|
|
my $val = $vals->[$bucket];
|
|
next BUCKET unless $val;
|
|
|
|
$total_left -= $val;
|
|
$sum_excl += $val;
|
|
$bucket_95 = $bucket if !$bucket_95 && $sum_excl > $top_vals;
|
|
|
|
if ( !$median && $total_left <= $mid ) {
|
|
$median = (($cutoff % 2) || ($val > 1)) ? $buck_vals[$bucket]
|
|
: ($buck_vals[$bucket] + $buck_vals[$prev]) / 2;
|
|
}
|
|
|
|
$sum += $val * $buck_vals[$bucket];
|
|
$sumsq += $val * ($buck_vals[$bucket]**2);
|
|
$prev = $bucket;
|
|
}
|
|
|
|
my $var = $sumsq/$n_vals - ( ($sum/$n_vals) ** 2 );
|
|
my $stddev = $var > 0 ? sqrt($var) : 0;
|
|
my $maxstdev = (($args->{max} || 0) - ($args->{min} || 0)) / 2;
|
|
$stddev = $stddev > $maxstdev ? $maxstdev : $stddev;
|
|
|
|
PTDEBUG && _d('sum:', $sum, 'sumsq:', $sumsq, 'stddev:', $stddev,
|
|
'median:', $median, 'prev bucket:', $prev,
|
|
'total left:', $total_left, 'sum excl', $sum_excl,
|
|
'bucket 95:', $bucket_95, $buck_vals[$bucket_95]);
|
|
|
|
$statistical_metrics->{stddev} = $stddev;
|
|
$statistical_metrics->{pct_95} = $buck_vals[$bucket_95];
|
|
$statistical_metrics->{median} = $median;
|
|
|
|
return $statistical_metrics;
|
|
}
|
|
|
|
# Return a hashref of the metrics for some attribute, pre-digested.
|
|
# %args is:
|
|
# attrib => the attribute to report on
|
|
# where => the value of the fingerprint for the attrib
|
|
sub metrics {
|
|
my ( $self, %args ) = @_;
|
|
foreach my $arg ( qw(attrib where) ) {
|
|
die "I need a $arg argument" unless defined $args{$arg};
|
|
}
|
|
my $attrib = $args{attrib};
|
|
my $where = $args{where};
|
|
|
|
my $stats = $self->results();
|
|
my $metrics = $self->stats();
|
|
my $store = $stats->{classes}->{$where}->{$attrib};
|
|
my $global_cnt = $stats->{globals}->{$attrib}->{cnt};
|
|
|
|
return {
|
|
cnt => $store->{cnt},
|
|
pct => $global_cnt && $store->{cnt} ? $store->{cnt} / $global_cnt : 0,
|
|
sum => $store->{sum},
|
|
min => $store->{min},
|
|
max => $store->{max},
|
|
avg => $store->{sum} && $store->{cnt} ? $store->{sum} / $store->{cnt} : 0,
|
|
median => $metrics->{classes}->{$where}->{$attrib}->{median} || 0,
|
|
pct_95 => $metrics->{classes}->{$where}->{$attrib}->{pct_95} || 0,
|
|
stddev => $metrics->{classes}->{$where}->{$attrib}->{stddev} || 0,
|
|
};
|
|
}
|
|
|
|
# Find the top N or top % event keys, in sorted order, optionally including
|
|
# outliers (ol_...) that are notable for some reason. %args looks like this:
|
|
#
|
|
# attrib order-by attribute (usually Query_time)
|
|
# orderby order-by aggregate expression (should be numeric, usually sum)
|
|
# total include events whose summed attribs are <= this number...
|
|
# count ...or this many events, whichever is less...
|
|
# ol_attrib ...or events where the 95th percentile of this attribute...
|
|
# ol_limit ...is greater than this value, AND...
|
|
# ol_freq ...the event occurred at least this many times.
|
|
# The return value is two arrayref. The first is a list of arrayrefs of the
|
|
# chosen (top) events. Each arrayref is the event key and an explanation of
|
|
# why it was included (top|outlier). The second is a list of the non-top
|
|
# event keys.
|
|
sub top_events {
|
|
my ( $self, %args ) = @_;
|
|
my $classes = $self->{result_classes};
|
|
my @sorted = reverse sort { # Sorted list of $groupby values
|
|
($classes->{$a}->{$args{attrib}}->{$args{orderby}}
|
|
== $classes->{$b}->{$args{attrib}}->{$args{orderby}})
|
|
? $a cmp $b
|
|
: $classes->{$a}->{$args{attrib}}->{$args{orderby}}
|
|
<=> $classes->{$b}->{$args{attrib}}->{$args{orderby}}
|
|
} grep {
|
|
# Defensive programming
|
|
defined $classes->{$_}->{$args{attrib}}->{$args{orderby}}
|
|
} keys %$classes;
|
|
my @chosen; # top events
|
|
my @other; # other events (< top)
|
|
my ($total, $count) = (0, 0);
|
|
foreach my $groupby ( @sorted ) {
|
|
# Events that fall into the top criterion for some reason
|
|
if (
|
|
(!$args{total} || $total < $args{total} )
|
|
&& ( !$args{count} || $count < $args{count} )
|
|
) {
|
|
push @chosen, [$groupby, 'top', $count+1];
|
|
}
|
|
|
|
# Events that are notable outliers
|
|
elsif ( $args{ol_attrib} && (!$args{ol_freq}
|
|
|| $classes->{$groupby}->{$args{ol_attrib}}->{cnt} >= $args{ol_freq})
|
|
) {
|
|
my $stats = $self->{class_metrics}->{$groupby}->{$args{ol_attrib}};
|
|
if ( ($stats->{pct_95} || 0) >= $args{ol_limit} ) {
|
|
push @chosen, [$groupby, 'outlier', $count+1];
|
|
}
|
|
else {
|
|
push @other, [$groupby, 'misc', $count+1];
|
|
}
|
|
}
|
|
|
|
# Events not in the top criterion
|
|
else {
|
|
push @other, [$groupby, 'misc', $count+1];
|
|
}
|
|
|
|
$total += $classes->{$groupby}->{$args{attrib}}->{$args{orderby}};
|
|
$count++;
|
|
}
|
|
return \@chosen, \@other;
|
|
}
|
|
|
|
# Adds all new attributes in $event to $self->{attributes}.
|
|
sub add_new_attributes {
|
|
my ( $self, $event ) = @_;
|
|
return unless $event;
|
|
|
|
map {
|
|
my $attrib = $_;
|
|
$self->{attributes}->{$attrib} = [$attrib];
|
|
$self->{alt_attribs}->{$attrib} = make_alt_attrib($attrib);
|
|
push @{$self->{all_attribs}}, $attrib;
|
|
PTDEBUG && _d('Added new attribute:', $attrib);
|
|
}
|
|
grep {
|
|
$_ ne $self->{groupby}
|
|
&& !exists $self->{attributes}->{$_}
|
|
&& !exists $self->{ignore_attribs}->{$_}
|
|
}
|
|
keys %$event;
|
|
|
|
return;
|
|
}
|
|
|
|
# Returns an arrayref of all the attributes that were either given
|
|
# explicitly to new() or that were auto-detected.
|
|
sub get_attributes {
|
|
my ( $self ) = @_;
|
|
return $self->{all_attribs};
|
|
}
|
|
|
|
sub events_processed {
|
|
my ( $self ) = @_;
|
|
return $self->{n_events};
|
|
}
|
|
|
|
sub make_alt_attrib {
|
|
my ( @attribs ) = @_;
|
|
|
|
my $attrib = shift @attribs; # Primary attribute.
|
|
return sub {} unless @attribs; # No alternates.
|
|
|
|
my @lines;
|
|
push @lines, 'sub { my ( $event ) = @_; my $alt_attrib;';
|
|
push @lines, map {
|
|
"\$alt_attrib = '$_' if !defined \$alt_attrib "
|
|
. "&& exists \$event->{'$_'};"
|
|
} @attribs;
|
|
push @lines, 'return $alt_attrib; }';
|
|
PTDEBUG && _d('alt attrib sub for', $attrib, ':', @lines);
|
|
my $sub = eval join("\n", @lines);
|
|
die if $EVAL_ERROR;
|
|
return $sub;
|
|
}
|
|
|
|
# Merge/add the given arrayref of EventAggregator objects.
|
|
# Returns a new EventAggregator obj.
|
|
sub merge {
|
|
my ( @ea_objs ) = @_;
|
|
PTDEBUG && _d('Merging', scalar @ea_objs, 'ea');
|
|
return unless scalar @ea_objs;
|
|
|
|
# If all the ea don't have the same groupby and worst then adding
|
|
# them will produce a nonsensical result. (Maybe not if worst
|
|
# differs but certainly if groupby differs). And while checking this...
|
|
my $ea1 = shift @ea_objs;
|
|
my $r1 = $ea1->results;
|
|
my $worst = $ea1->{worst}; # for merging, finding worst sample
|
|
|
|
# ...get all attributes and their types to properly initialize the
|
|
# returned ea obj;
|
|
my %attrib_types = %{ $ea1->attributes() };
|
|
|
|
foreach my $ea ( @ea_objs ) {
|
|
die "EventAggregator objects have different groupby: "
|
|
. "$ea1->{groupby} and $ea->{groupby}"
|
|
unless $ea1->{groupby} eq $ea->{groupby};
|
|
die "EventAggregator objects have different worst: "
|
|
. "$ea1->{worst} and $ea->{worst}"
|
|
unless $ea1->{worst} eq $ea->{worst};
|
|
|
|
my $attrib_types = $ea->attributes();
|
|
map {
|
|
$attrib_types{$_} = $attrib_types->{$_}
|
|
unless exists $attrib_types{$_};
|
|
} keys %$attrib_types;
|
|
}
|
|
|
|
# First, deep copy the first ea obj. Do not shallow copy, do deep copy
|
|
# so the returned ea is truly its own obj and does not point to data
|
|
# structs in one of the given ea.
|
|
my $r_merged = {
|
|
classes => {},
|
|
globals => _deep_copy_attribs($r1->{globals}),
|
|
samples => {},
|
|
};
|
|
map {
|
|
$r_merged->{classes}->{$_}
|
|
= _deep_copy_attribs($r1->{classes}->{$_});
|
|
|
|
@{$r_merged->{samples}->{$_}}{keys %{$r1->{samples}->{$_}}}
|
|
= values %{$r1->{samples}->{$_}};
|
|
} keys %{$r1->{classes}};
|
|
|
|
# Then, merge/add the other eas. r1* is the eventual return val.
|
|
# r2* is the current ea being merged/added into r1*.
|
|
for my $i ( 0..$#ea_objs ) {
|
|
PTDEBUG && _d('Merging ea obj', ($i + 1));
|
|
my $r2 = $ea_objs[$i]->results;
|
|
|
|
# Descend into each class (e.g. unique query/fingerprint), each
|
|
# attribute (e.g. Query_time, etc.), and then each attribute
|
|
# value (e.g. min, max, etc.). If either a class or attrib is
|
|
# missing in one of the results, deep copy the extant class/attrib;
|
|
# if both exist, add/merge the results.
|
|
eval {
|
|
CLASS:
|
|
foreach my $class ( keys %{$r2->{classes}} ) {
|
|
my $r1_class = $r_merged->{classes}->{$class};
|
|
my $r2_class = $r2->{classes}->{$class};
|
|
|
|
if ( $r1_class && $r2_class ) {
|
|
# Class exists in both results. Add/merge all their attributes.
|
|
CLASS_ATTRIB:
|
|
foreach my $attrib ( keys %$r2_class ) {
|
|
PTDEBUG && _d('merge', $attrib);
|
|
if ( $r1_class->{$attrib} && $r2_class->{$attrib} ) {
|
|
_add_attrib_vals($r1_class->{$attrib}, $r2_class->{$attrib});
|
|
}
|
|
elsif ( !$r1_class->{$attrib} ) {
|
|
PTDEBUG && _d('copy', $attrib);
|
|
$r1_class->{$attrib} =
|
|
_deep_copy_attrib_vals($r2_class->{$attrib})
|
|
}
|
|
}
|
|
}
|
|
elsif ( !$r1_class ) {
|
|
# Class is missing in r1; deep copy it from r2.
|
|
PTDEBUG && _d('copy class');
|
|
$r_merged->{classes}->{$class} = _deep_copy_attribs($r2_class);
|
|
}
|
|
|
|
# Update the worst sample if either the r2 sample is worst than
|
|
# the r1 or there's no such sample in r1.
|
|
my $new_worst_sample;
|
|
if ( $r_merged->{samples}->{$class} && $r2->{samples}->{$class} ) {
|
|
if ( $r2->{samples}->{$class}->{$worst}
|
|
> $r_merged->{samples}->{$class}->{$worst} ) {
|
|
$new_worst_sample = $r2->{samples}->{$class}
|
|
}
|
|
}
|
|
elsif ( !$r_merged->{samples}->{$class} ) {
|
|
$new_worst_sample = $r2->{samples}->{$class};
|
|
}
|
|
# Events don't have references to other data structs
|
|
# so we don't have to worry about doing a deep copy.
|
|
if ( $new_worst_sample ) {
|
|
PTDEBUG && _d('New worst sample:', $worst, '=',
|
|
$new_worst_sample->{$worst}, 'item:', substr($class, 0, 100));
|
|
my %new_sample;
|
|
@new_sample{keys %$new_worst_sample}
|
|
= values %$new_worst_sample;
|
|
$r_merged->{samples}->{$class} = \%new_sample;
|
|
}
|
|
}
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
warn "Error merging class/sample: $EVAL_ERROR";
|
|
}
|
|
|
|
# Same as above but for the global attribs/vals.
|
|
eval {
|
|
GLOBAL_ATTRIB:
|
|
PTDEBUG && _d('Merging global attributes');
|
|
foreach my $attrib ( keys %{$r2->{globals}} ) {
|
|
my $r1_global = $r_merged->{globals}->{$attrib};
|
|
my $r2_global = $r2->{globals}->{$attrib};
|
|
|
|
if ( $r1_global && $r2_global ) {
|
|
# Global attrib exists in both results. Add/merge all its values.
|
|
PTDEBUG && _d('merge', $attrib);
|
|
_add_attrib_vals($r1_global, $r2_global);
|
|
}
|
|
elsif ( !$r1_global ) {
|
|
# Global attrib is missing in r1; deep copy it from r2 global.
|
|
PTDEBUG && _d('copy', $attrib);
|
|
$r_merged->{globals}->{$attrib}
|
|
= _deep_copy_attrib_vals($r2_global);
|
|
}
|
|
}
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
warn "Error merging globals: $EVAL_ERROR";
|
|
}
|
|
}
|
|
|
|
# Create a new EventAggregator obj, initialize it with the summed results,
|
|
# and return it.
|
|
my $ea_merged = new EventAggregator(
|
|
groupby => $ea1->{groupby},
|
|
worst => $ea1->{worst},
|
|
attributes => { map { $_=>[$_] } keys %attrib_types },
|
|
);
|
|
$ea_merged->set_results($r_merged);
|
|
$ea_merged->set_attribute_types(\%attrib_types);
|
|
return $ea_merged;
|
|
}
|
|
|
|
# Adds/merges vals2 attrib values into vals1.
|
|
sub _add_attrib_vals {
|
|
my ( $vals1, $vals2 ) = @_;
|
|
|
|
# Assuming both sets of values are the same attribute (that's the caller
|
|
# responsibility), each should have the same values (min, max, unq, etc.)
|
|
foreach my $val ( keys %$vals1 ) {
|
|
my $val1 = $vals1->{$val};
|
|
my $val2 = $vals2->{$val};
|
|
|
|
if ( (!ref $val1) && (!ref $val2) ) {
|
|
# min, max, cnt, sum should never be undef.
|
|
die "undefined $val value" unless defined $val1 && defined $val2;
|
|
|
|
# Value is scalar but return unless it's numeric.
|
|
# Only numeric values have "sum".
|
|
my $is_num = exists $vals1->{sum} ? 1 : 0;
|
|
if ( $val eq 'max' ) {
|
|
if ( $is_num ) {
|
|
$vals1->{$val} = $val1 > $val2 ? $val1 : $val2;
|
|
}
|
|
else {
|
|
$vals1->{$val} = $val1 gt $val2 ? $val1 : $val2;
|
|
}
|
|
}
|
|
elsif ( $val eq 'min' ) {
|
|
if ( $is_num ) {
|
|
$vals1->{$val} = $val1 < $val2 ? $val1 : $val2;
|
|
}
|
|
else {
|
|
$vals1->{$val} = $val1 lt $val2 ? $val1 : $val2;
|
|
}
|
|
}
|
|
else {
|
|
$vals1->{$val} += $val2;
|
|
}
|
|
}
|
|
elsif ( (ref $val1 eq 'ARRAY') && (ref $val2 eq 'ARRAY') ) {
|
|
# Value is an arrayref, so it should be 1k buckets.
|
|
# Should never be empty.
|
|
die "Empty $val arrayref" unless @$val1 && @$val2;
|
|
my $n_buckets = (scalar @$val1) - 1;
|
|
for my $i ( 0..$n_buckets ) {
|
|
$vals1->{$val}->[$i] += $val2->[$i];
|
|
}
|
|
}
|
|
elsif ( (ref $val1 eq 'HASH') && (ref $val2 eq 'HASH') ) {
|
|
# Value is a hashref, probably for unq string occurrences.
|
|
# Should never be empty.
|
|
die "Empty $val hashref" unless %$val1 and %$val2;
|
|
map { $vals1->{$val}->{$_} += $val2->{$_} } keys %$val2;
|
|
}
|
|
else {
|
|
# This shouldn't happen.
|
|
PTDEBUG && _d('vals1:', Dumper($vals1));
|
|
PTDEBUG && _d('vals2:', Dumper($vals2));
|
|
die "$val type mismatch";
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
# These _deep_copy_* subs only go 1 level deep because, so far,
|
|
# no ea data struct has a ref any deeper.
|
|
sub _deep_copy_attribs {
|
|
my ( $attribs ) = @_;
|
|
my $copy = {};
|
|
foreach my $attrib ( keys %$attribs ) {
|
|
$copy->{$attrib} = _deep_copy_attrib_vals($attribs->{$attrib});
|
|
}
|
|
return $copy;
|
|
}
|
|
|
|
sub _deep_copy_attrib_vals {
|
|
my ( $vals ) = @_;
|
|
my $copy;
|
|
if ( ref $vals eq 'HASH' ) {
|
|
$copy = {};
|
|
foreach my $val ( keys %$vals ) {
|
|
if ( my $ref_type = ref $val ) {
|
|
if ( $ref_type eq 'ARRAY' ) {
|
|
my $n_elems = (scalar @$val) - 1;
|
|
$copy->{$val} = [ map { undef } ( 0..$n_elems ) ];
|
|
for my $i ( 0..$n_elems ) {
|
|
$copy->{$val}->[$i] = $vals->{$val}->[$i];
|
|
}
|
|
}
|
|
elsif ( $ref_type eq 'HASH' ) {
|
|
$copy->{$val} = {};
|
|
map { $copy->{$val}->{$_} += $vals->{$val}->{$_} }
|
|
keys %{$vals->{$val}}
|
|
}
|
|
else {
|
|
die "I don't know how to deep copy a $ref_type reference";
|
|
}
|
|
}
|
|
else {
|
|
$copy->{$val} = $vals->{$val};
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
$copy = $vals;
|
|
}
|
|
return $copy;
|
|
}
|
|
|
|
# Sub: _get_value
|
|
# Get the value of the attribute (or one of its alternatives) from the event.
|
|
# Undef is a valid value. If the attrib or none of its alternatives exist
|
|
# in the event, then this sub dies.
|
|
#
|
|
# Parameters:
|
|
# %args - Arguments
|
|
#
|
|
# Required Arguments:
|
|
# event - Event hashref
|
|
# attribute - Attribute name
|
|
#
|
|
# Optional Arguments:
|
|
# alternates - Arrayref of alternate attribute names
|
|
#
|
|
# Returns:
|
|
# Value of attribute in the event, possibly undef
|
|
sub _get_value {
|
|
my ( $self, %args ) = @_;
|
|
my ($event, $attrib, $alts) = @args{qw(event attribute alternates)};
|
|
return unless $event && $attrib;
|
|
|
|
my $value;
|
|
if ( exists $event->{$attrib} ) {
|
|
$value = $event->{$attrib};
|
|
}
|
|
elsif ( $alts ) {
|
|
my $found_value = 0;
|
|
foreach my $alt_attrib( @$alts ) {
|
|
if ( exists $event->{$alt_attrib} ) {
|
|
$value = $event->{$alt_attrib};
|
|
$found_value = 1;
|
|
last;
|
|
}
|
|
}
|
|
die "Event does not have attribute $attrib or any of its alternates"
|
|
unless $found_value;
|
|
}
|
|
else {
|
|
die "Event does not have attribute $attrib and there are no alterantes";
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End EventAggregator package
|
|
# ###########################################################################
|