From a23729c5332ae8f43dd6ee18030c4c3cf9d0a9f5 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 22 Feb 2013 11:51:21 -0700 Subject: [PATCH] Remove unused modules like SysLogParser and PgLogParser. Start re-writing the docs. Remove some options to indicate where to simplify the tool more. --- bin/pt-query-digest | 1539 ++----------------------------------------- 1 file changed, 58 insertions(+), 1481 deletions(-) diff --git a/bin/pt-query-digest b/bin/pt-query-digest index 5e670dde..a3a3defd 100755 --- a/bin/pt-query-digest +++ b/bin/pt-query-digest @@ -27,8 +27,6 @@ BEGIN { Processlist TcpdumpParser MySQLProtocolParser - SysLogParser - PgLogParser SlowLogParser SlowLogWriter EventAggregator @@ -40,8 +38,6 @@ BEGIN { TableParser QueryReview Daemon - MemcachedProtocolParser - MemcachedEvent BinaryLogParser GeneralLogParser RawLogParser @@ -4824,617 +4820,6 @@ sub _d { # End MySQLProtocolParser package # ########################################################################### -# ########################################################################### -# SysLogParser package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/SysLogParser.pm -# t/lib/SysLogParser.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package SysLogParser; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant PTDEBUG => $ENV{PTDEBUG} || 0; - -my $syslog_regex = qr{\A.*\w+\[\d+\]: \[(\d+)-(\d+)\] (.*)\Z}; - -sub new { - my ( $class ) = @_; - my $self = {}; - return bless $self, $class; -} - -sub parse_event { - my ( $self, %args ) = @_; - my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args); - return $next_event->(); -} - -sub generate_wrappers { - my ( $self, %args ) = @_; - - if ( ($self->{sanity} || '') ne "$args{next_event}" ){ - PTDEBUG && _d("Clearing and recreating internal state"); - @{$self}{qw(next_event tell is_syslog)} = $self->make_closures(%args); - $self->{sanity} = "$args{next_event}"; - } - - return @{$self}{qw(next_event tell is_syslog)}; -} - -sub make_closures { - my ( $self, %args ) = @_; - - my $next_event = $args{'next_event'}; - my $tell = $args{'tell'}; - my $new_event_test = $args{'misc'}->{'new_event_test'}; - my $line_filter = $args{'misc'}->{'line_filter'}; - - my $test_line = $next_event->(); - PTDEBUG && _d('Read first sample/test line:', $test_line); - - if ( defined $test_line && $test_line =~ m/$syslog_regex/o ) { - - PTDEBUG && _d('This looks like a syslog line, PTDEBUG prefix=LLSP'); - - my ($msg_nr, $line_nr, $content) = $test_line =~ m/$syslog_regex/o; - my @pending = ($test_line); - my $last_msg_nr = $msg_nr; - my $pos_in_log = 0; - - my $new_next_event = sub { - PTDEBUG && _d('LLSP: next_event()'); - - PTDEBUG && _d('LLSP: Current virtual $fh position:', $pos_in_log); - my $new_pos = 0; - - my @arg_lines; - - my $line; - LINE: - while ( - defined($line = shift @pending) - || do { - eval { $new_pos = -1; $new_pos = $tell->() }; - defined($line = $next_event->()); - } - ) { - PTDEBUG && _d('LLSP: Line:', $line); - - ($msg_nr, $line_nr, $content) = $line =~ m/$syslog_regex/o; - if ( !$msg_nr ) { - die "Can't parse line: $line"; - } - - elsif ( $msg_nr != $last_msg_nr ) { - PTDEBUG && _d('LLSP: $msg_nr', $last_msg_nr, '=>', $msg_nr); - $last_msg_nr = $msg_nr; - last LINE; - } - - elsif ( @arg_lines && $new_event_test && $new_event_test->($content) ) { - PTDEBUG && _d('LLSP: $new_event_test matches'); - last LINE; - } - - $content =~ s/#(\d{3})/chr(oct($1))/ge; - $content =~ s/\^I/\t/g; - if ( $line_filter ) { - PTDEBUG && _d('LLSP: applying $line_filter'); - $content = $line_filter->($content); - } - - push @arg_lines, $content; - } - PTDEBUG && _d('LLSP: Exited while-loop after finding a complete entry'); - - my $psql_log_event = @arg_lines ? join('', @arg_lines) : undef; - PTDEBUG && _d('LLSP: Final log entry:', $psql_log_event); - - if ( defined $line ) { - PTDEBUG && _d('LLSP: Saving $line:', $line); - @pending = $line; - PTDEBUG && _d('LLSP: $pos_in_log:', $pos_in_log, '=>', $new_pos); - $pos_in_log = $new_pos; - } - else { - PTDEBUG && _d('LLSP: EOF reached'); - @pending = (); - $last_msg_nr = 0; - } - - return $psql_log_event; - }; - - my $new_tell = sub { - PTDEBUG && _d('LLSP: tell()', $pos_in_log); - return $pos_in_log; - }; - - return ($new_next_event, $new_tell, 1); - } - - else { - - PTDEBUG && _d('Plain log, or we are at EOF; PTDEBUG prefix=PLAIN'); - - my @pending = defined $test_line ? ($test_line) : (); - - my $new_next_event = sub { - PTDEBUG && _d('PLAIN: next_event(); @pending:', scalar @pending); - return @pending ? shift @pending : $next_event->(); - }; - my $new_tell = sub { - PTDEBUG && _d('PLAIN: tell(); @pending:', scalar @pending); - return @pending ? 0 : $tell->(); - }; - return ($new_next_event, $new_tell, 0); - } -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End SysLogParser package -# ########################################################################### - -# ########################################################################### -# PgLogParser package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/PgLogParser.pm -# t/lib/PgLogParser.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package PgLogParser; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant PTDEBUG => $ENV{PTDEBUG} || 0; - -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -my $log_line_regex = qr{ - (LOG|DEBUG|CONTEXT|WARNING|ERROR|FATAL|PANIC|HINT - |DETAIL|NOTICE|STATEMENT|INFO|LOCATION) - :\s\s+ - }x; - -my %attrib_name_for = ( - u => 'user', - d => 'db', - r => 'host', # With port - h => 'host', - p => 'Process_id', - t => 'ts', - m => 'ts', # With milliseconds - i => 'Query_type', - c => 'Session_id', - l => 'Line_no', - s => 'Session_id', - v => 'Vrt_trx_id', - x => 'Trx_id', -); - -sub new { - my ( $class ) = @_; - my $self = { - pending => [], - is_syslog => undef, - next_event => undef, - 'tell' => undef, - }; - return bless $self, $class; -} - -sub parse_event { - my ( $self, %args ) = @_; - my @required_args = qw(next_event tell); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - - my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args); - - my @properties = (); - - my ($pos_in_log, $line, $was_pending) = $self->get_line(); - my $new_pos; - - my @arg_lines; - - my $done; - - my $got_duration; - - if ( !$was_pending && (!defined $line || $line !~ m/$log_line_regex/o) ) { - PTDEBUG && _d('Skipping lines until I find a header'); - my $found_header; - LINE: - while ( - eval { - ($new_pos, $line) = $self->get_line(); - defined $line; - } - ) { - if ( $line =~ m/$log_line_regex/o ) { - $pos_in_log = $new_pos; - last LINE; - } - else { - PTDEBUG && _d('Line was not a header, will fetch another'); - } - } - PTDEBUG && _d('Found a header line, now at pos_in_line', $pos_in_log); - } - - my $first_line; - - my $line_type; - - LINE: - while ( !$done && defined $line ) { - - chomp $line unless $is_syslog; - - if ( (($line_type) = $line =~ m/$log_line_regex/o) && $line_type ne 'LOG' ) { - - if ( @arg_lines ) { - PTDEBUG && _d('Found a non-LOG line, exiting loop'); - last LINE; - } - - else { - $first_line ||= $line; - - if ( my ($e) = $line =~ m/ERROR:\s+(\S.*)\Z/s ) { - push @properties, 'Error_msg', $e; - PTDEBUG && _d('Found an error msg, saving and continuing'); - ($new_pos, $line) = $self->get_line(); - next LINE; - } - - elsif ( my ($s) = $line =~ m/STATEMENT:\s+(\S.*)\Z/s ) { - push @properties, 'arg', $s, 'cmd', 'Query'; - PTDEBUG && _d('Found a statement, finishing up event'); - $done = 1; - last LINE; - } - - else { - PTDEBUG && _d("I don't know what to do with this line"); - } - } - - } - - if ( - $line =~ m{ - Address\sfamily\snot\ssupported\sby\sprotocol - |archived\stransaction\slog\sfile - |autovacuum:\sprocessing\sdatabase - |checkpoint\srecord\sis\sat - |checkpoints\sare\soccurring\stoo\sfrequently\s\( - |could\snot\sreceive\sdata\sfrom\sclient - |database\ssystem\sis\sready - |database\ssystem\sis\sshut\sdown - |database\ssystem\swas\sshut\sdown - |incomplete\sstartup\spacket - |invalid\slength\sof\sstartup\spacket - |next\sMultiXactId: - |next\stransaction\sID: - |received\ssmart\sshutdown\srequest - |recycled\stransaction\slog\sfile - |redo\srecord\sis\sat - |removing\sfile\s" - |removing\stransaction\slog\sfile\s" - |shutting\sdown - |transaction\sID\swrap\slimit\sis - }x - ) { - PTDEBUG && _d('Skipping this line because it matches skip-pattern'); - ($new_pos, $line) = $self->get_line(); - next LINE; - } - - $first_line ||= $line; - - if ( $line !~ m/$log_line_regex/o && @arg_lines ) { - - if ( !$is_syslog ) { - $line =~ s/\A\t?/\n/; - } - - push @arg_lines, $line; - PTDEBUG && _d('This was a continuation line'); - } - - elsif ( - my ( $sev, $label, $rest ) - = $line =~ m/$log_line_regex(.+?):\s+(.*)\Z/so - ) { - PTDEBUG && _d('Line is case 1 or case 3'); - - if ( @arg_lines ) { - $done = 1; - PTDEBUG && _d('There are saved @arg_lines, we are done'); - - if ( $label eq 'duration' && $rest =~ m/[0-9.]+\s+\S+\Z/ ) { - if ( $got_duration ) { - PTDEBUG && _d('Discarding line, duration already found'); - } - else { - push @properties, 'Query_time', $self->duration_to_secs($rest); - PTDEBUG && _d("Line's duration is for previous event:", $rest); - } - } - else { - $self->pending($new_pos, $line); - PTDEBUG && _d('Deferred line'); - } - } - - elsif ( $label =~ m/\A(?:duration|statement|query)\Z/ ) { - PTDEBUG && _d('Case 1: start a multi-line event'); - - if ( $label eq 'duration' ) { - - if ( - (my ($dur, $stmt) - = $rest =~ m/([0-9.]+ \S+)\s+(?:statement|query): *(.*)\Z/s) - ) { - push @properties, 'Query_time', $self->duration_to_secs($dur); - $got_duration = 1; - push @arg_lines, $stmt; - PTDEBUG && _d('Duration + statement'); - } - - else { - $first_line = undef; - ($pos_in_log, $line) = $self->get_line(); - PTDEBUG && _d('Line applies to event we never saw, discarding'); - next LINE; - } - } - else { - push @arg_lines, $rest; - PTDEBUG && _d('Putting onto @arg_lines'); - } - } - - else { - $done = 1; - PTDEBUG && _d('Line is case 3, event is done'); - - if ( @arg_lines ) { - $self->pending($new_pos, $line); - PTDEBUG && _d('There was @arg_lines, putting line to pending'); - } - - else { - PTDEBUG && _d('No need to defer, process event from this line now'); - push @properties, 'cmd', 'Admin', 'arg', $label; - - if ( $label =~ m/\A(?:dis)?connection(?: received| authorized)?\Z/ ) { - push @properties, $self->get_meta($rest); - } - - else { - die "I don't understand line $line"; - } - - } - } - - } - - else { - die "I don't understand line $line"; - } - - if ( !$done ) { - ($new_pos, $line) = $self->get_line(); - } - } # LINE - - if ( !defined $line ) { - PTDEBUG && _d('Line not defined, at EOF; calling oktorun(0) if exists'); - $args{oktorun}->(0) if $args{oktorun}; - if ( !@arg_lines ) { - PTDEBUG && _d('No saved @arg_lines either, we are all done'); - return undef; - } - } - - if ( $line_type && $line_type ne 'LOG' ) { - PTDEBUG && _d('Line is not a LOG line'); - - if ( $line_type eq 'ERROR' ) { - PTDEBUG && _d('Line is ERROR'); - - if ( @arg_lines ) { - PTDEBUG && _d('There is @arg_lines, will peek ahead one line'); - my ( $temp_pos, $temp_line ) = $self->get_line(); - my ( $type, $msg ); - if ( - defined $temp_line - && ( ($type, $msg) = $temp_line =~ m/$log_line_regex(.*)/o ) - && ( $type ne 'STATEMENT' || $msg eq $arg_lines[-1] ) - ) { - PTDEBUG && _d('Error/statement line pertain to current event'); - push @properties, 'Error_msg', $line =~ m/ERROR:\s*(\S.*)\Z/s; - if ( $type ne 'STATEMENT' ) { - PTDEBUG && _d('Must save peeked line, it is a', $type); - $self->pending($temp_pos, $temp_line); - } - } - elsif ( defined $temp_line && defined $type ) { - PTDEBUG && _d('Error/statement line are a new event'); - $self->pending($new_pos, $line); - $self->pending($temp_pos, $temp_line); - } - else { - PTDEBUG && _d("Unknown line", $line); - } - } - } - else { - PTDEBUG && _d("Unknown line", $line); - } - } - - if ( $done || @arg_lines ) { - PTDEBUG && _d('Making event'); - - push @properties, 'pos_in_log', $pos_in_log; - - if ( @arg_lines ) { - PTDEBUG && _d('Assembling @arg_lines: ', scalar @arg_lines); - push @properties, 'arg', join('', @arg_lines), 'cmd', 'Query'; - } - - if ( $first_line ) { - if ( my ($ts) = $first_line =~ m/([0-9-]{10} [0-9:.]{8,12})/ ) { - PTDEBUG && _d('Getting timestamp', $ts); - push @properties, 'ts', $ts; - } - - if ( my ($meta) = $first_line =~ m/(.*?)[A-Z]{3,}: / ) { - PTDEBUG && _d('Found a meta-data chunk:', $meta); - push @properties, $self->get_meta($meta); - } - } - - PTDEBUG && _d('Properties of event:', Dumper(\@properties)); - my $event = { @properties }; - $event->{bytes} = length($event->{arg} || ''); - return $event; - } - -} - -sub get_meta { - my ( $self, $meta ) = @_; - my @properties; - foreach my $set ( $meta =~ m/(\w+=[^, ]+)/g ) { - my ($key, $val) = split(/=/, $set); - if ( $key && $val ) { - if ( my $prop = $attrib_name_for{lc substr($key, 0, 1)} ) { - push @properties, $prop, $val; - } - else { - PTDEBUG && _d('Bad meta key', $set); - } - } - else { - PTDEBUG && _d("Can't figure out meta from", $set); - } - } - return @properties; -} - -sub get_line { - my ( $self ) = @_; - my ($pos, $line, $was_pending) = $self->pending; - if ( ! defined $line ) { - PTDEBUG && _d('Got nothing from pending, trying the $fh'); - my ( $next_event, $tell) = @{$self}{qw(next_event tell)}; - eval { - $pos = $tell->(); - $line = $next_event->(); - }; - if ( PTDEBUG && $EVAL_ERROR ) { - _d($EVAL_ERROR); - } - } - - PTDEBUG && _d('Got pos/line:', $pos, $line); - return ($pos, $line); -} - -sub pending { - my ( $self, $val, $pos_in_log ) = @_; - my $was_pending; - PTDEBUG && _d('In sub pending, val:', $val); - if ( $val ) { - push @{$self->{pending}}, [$val, $pos_in_log]; - } - elsif ( @{$self->{pending}} ) { - ($val, $pos_in_log) = @{ shift @{$self->{pending}} }; - $was_pending = 1; - } - PTDEBUG && _d('Return from pending:', $val, $pos_in_log); - return ($val, $pos_in_log, $was_pending); -} - -sub generate_wrappers { - my ( $self, %args ) = @_; - - if ( ($self->{sanity} || '') ne "$args{next_event}" ){ - PTDEBUG && _d("Clearing and recreating internal state"); - eval { require SysLogParser; }; # Required for tests to work. - my $sl = new SysLogParser(); - - $args{misc}->{new_event_test} = sub { - my ( $content ) = @_; - return unless defined $content; - return $content =~ m/$log_line_regex/o; - }; - - $args{misc}->{line_filter} = sub { - my ( $content ) = @_; - $content =~ s/\A\t/\n/; - return $content; - }; - - @{$self}{qw(next_event tell is_syslog)} = $sl->make_closures(%args); - $self->{sanity} = "$args{next_event}"; - } - - return @{$self}{qw(next_event tell is_syslog)}; -} - -sub duration_to_secs { - my ( $self, $str ) = @_; - PTDEBUG && _d('Duration:', $str); - my ( $num, $suf ) = split(/\s+/, $str); - my $factor = $suf eq 'ms' ? 1000 - : $suf eq 'sec' ? 1 - : die("Unknown suffix '$suf'"); - return $num / $factor; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End PgLogParser package -# ########################################################################### - # ########################################################################### # SlowLogParser package # This package is a copy without comments from the original. The original @@ -9693,522 +9078,6 @@ sub _d { # End Daemon package # ########################################################################### -# ########################################################################### -# MemcachedProtocolParser package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/MemcachedProtocolParser.pm -# t/lib/MemcachedProtocolParser.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package MemcachedProtocolParser; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); - -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -use constant PTDEBUG => $ENV{PTDEBUG} || 0; - -sub new { - my ( $class, %args ) = @_; - - my $self = { - server => $args{server}, - port => $args{port} || '11211', - sessions => {}, - o => $args{o}, - }; - return bless $self, $class; -} - -sub parse_event { - my ( $self, %args ) = @_; - my @required_args = qw(event); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $packet = @args{@required_args}; - - if ( $packet->{data_len} == 0 ) { - PTDEBUG && _d('No TCP data'); - $args{stats}->{no_tcp_data}++ if $args{stats}; - return; - } - - my $src_host = "$packet->{src_host}:$packet->{src_port}"; - my $dst_host = "$packet->{dst_host}:$packet->{dst_port}"; - - if ( my $server = $self->{server} ) { # Watch only the given server. - $server .= ":$self->{port}"; - if ( $src_host ne $server && $dst_host ne $server ) { - PTDEBUG && _d('Packet is not to or from', $server); - $args{stats}->{not_watched_server}++ if $args{stats}; - return; - } - } - - my $packet_from; - my $client; - if ( $src_host =~ m/:$self->{port}$/ ) { - $packet_from = 'server'; - $client = $dst_host; - } - elsif ( $dst_host =~ m/:$self->{port}$/ ) { - $packet_from = 'client'; - $client = $src_host; - } - else { - warn 'Packet is not to or from memcached server: ', Dumper($packet); - return; - } - PTDEBUG && _d('Client:', $client); - - if ( !exists $self->{sessions}->{$client} ) { - PTDEBUG && _d('New session'); - $self->{sessions}->{$client} = { - client => $client, - state => undef, - raw_packets => [], - }; - }; - my $session = $self->{sessions}->{$client}; - - push @{$session->{raw_packets}}, $packet->{raw_packet}; - - $packet->{data} = pack('H*', $packet->{data}); - my $event; - if ( $packet_from eq 'server' ) { - $event = $self->_packet_from_server($packet, $session, %args); - } - elsif ( $packet_from eq 'client' ) { - $event = $self->_packet_from_client($packet, $session, %args); - } - else { - $args{stats}->{unknown_packet_origin}++ if $args{stats}; - die 'Packet origin unknown'; - } - - PTDEBUG && _d('Done with packet; event:', Dumper($event)); - $args{stats}->{events_parsed}++ if $args{stats}; - return $event; -} - -sub _packet_from_server { - my ( $self, $packet, $session, %args ) = @_; - die "I need a packet" unless $packet; - die "I need a session" unless $session; - - PTDEBUG && _d('Packet is from server; client state:', $session->{state}); - - my $data = $packet->{data}; - - if ( !$session->{state} ) { - PTDEBUG && _d('Ignoring mid-stream server response'); - $args{stats}->{ignored_midstream_server_response}++ if $args{stats}; - return; - } - - if ( $session->{state} eq 'awaiting reply' ) { - PTDEBUG && _d('State is awaiting reply'); - my ($line1, $rest) = $packet->{data} =~ m/\A(.*?)\r\n(.*)?/s; - if ( !$line1 ) { - $args{stats}->{unknown_server_data}++ if $args{stats}; - die "Unknown memcached data from server"; - } - - my @vals = $line1 =~ m/(\S+)/g; - $session->{res} = shift @vals; - PTDEBUG && _d('Result of last', $session->{cmd}, 'cmd:', $session->{res}); - - if ( $session->{cmd} eq 'incr' || $session->{cmd} eq 'decr' ) { - PTDEBUG && _d('It is an incr or decr'); - if ( $session->{res} !~ m/\D/ ) { # It's an integer, not an error - PTDEBUG && _d('Got a value for the incr/decr'); - $session->{val} = $session->{res}; - $session->{res} = ''; - } - } - elsif ( $session->{res} eq 'VALUE' ) { - PTDEBUG && _d('It is the result of a "get"'); - my ($key, $flags, $bytes) = @vals; - defined $session->{flags} or $session->{flags} = $flags; - defined $session->{bytes} or $session->{bytes} = $bytes; - - if ( $rest && $bytes ) { - PTDEBUG && _d('There is a value'); - if ( length($rest) > $bytes ) { - PTDEBUG && _d('Got complete response'); - $session->{val} = substr($rest, 0, $bytes); - } - else { - PTDEBUG && _d('Got partial response, saving for later'); - push @{$session->{partial}}, [ $packet->{seq}, $rest ]; - $session->{gathered} += length($rest); - $session->{state} = 'partial recv'; - return; # Prevent firing an event. - } - } - } - elsif ( $session->{res} eq 'END' ) { - PTDEBUG && _d('Got an END without any data, firing NOT_FOUND'); - $session->{res} = 'NOT_FOUND'; - } - elsif ( $session->{res} !~ m/STORED|DELETED|NOT_FOUND/ ) { - PTDEBUG && _d('Unknown result'); - } - else { - $args{stats}->{unknown_server_response}++ if $args{stats}; - } - } - else { # Should be 'partial recv' - PTDEBUG && _d('Session state: ', $session->{state}); - push @{$session->{partial}}, [ $packet->{seq}, $data ]; - $session->{gathered} += length($data); - PTDEBUG && _d('Gathered', $session->{gathered}, 'bytes in', - scalar(@{$session->{partial}}), 'packets from server'); - if ( $session->{gathered} >= $session->{bytes} + 2 ) { # Done. - PTDEBUG && _d('End of partial response, preparing event'); - my $val = join('', - map { $_->[1] } - sort { $a->[0] <=> $b->[0] } - @{$session->{partial}}); - $session->{val} = substr($val, 0, $session->{bytes}); - } - else { - PTDEBUG && _d('Partial response continues, no action'); - return; # Prevent firing event. - } - } - - PTDEBUG && _d('Creating event, deleting session'); - my $event = make_event($session, $packet); - delete $self->{sessions}->{$session->{client}}; # memcached is stateless! - $session->{raw_packets} = []; # Avoid keeping forever - return $event; -} - -sub _packet_from_client { - my ( $self, $packet, $session, %args ) = @_; - die "I need a packet" unless $packet; - die "I need a session" unless $session; - - PTDEBUG && _d('Packet is from client; state:', $session->{state}); - - my $event; - if ( ($session->{state} || '') =~m/awaiting reply|partial recv/ ) { - PTDEBUG && _d("Expected data from the client, looks like interrupted"); - $session->{res} = 'INTERRUPTED'; - $event = make_event($session, $packet); - my $client = $session->{client}; - delete @{$session}{keys %$session}; - $session->{client} = $client; - } - - my ($line1, $val); - my ($cmd, $key, $flags, $exptime, $bytes); - - if ( !$session->{state} ) { - PTDEBUG && _d('Session state: ', $session->{state}); - ($line1, $val) = $packet->{data} =~ m/\A(.*?)\r\n(.+)?/s; - if ( !$line1 ) { - PTDEBUG && _d('Unknown memcached data from client, skipping packet'); - $args{stats}->{unknown_client_data}++ if $args{stats}; - return; - } - - my @vals = $line1 =~ m/(\S+)/g; - $cmd = lc shift @vals; - PTDEBUG && _d('$cmd is a ', $cmd); - if ( $cmd eq 'set' || $cmd eq 'add' || $cmd eq 'replace' ) { - ($key, $flags, $exptime, $bytes) = @vals; - $session->{bytes} = $bytes; - } - elsif ( $cmd eq 'get' ) { - ($key) = @vals; - if ( $val ) { - PTDEBUG && _d('Multiple cmds:', $val); - $val = undef; - } - } - elsif ( $cmd eq 'delete' ) { - ($key) = @vals; # TODO: handle the - if ( $val ) { - PTDEBUG && _d('Multiple cmds:', $val); - $val = undef; - } - } - elsif ( $cmd eq 'incr' || $cmd eq 'decr' ) { - ($key) = @vals; - } - else { - PTDEBUG && _d("Don't know how to handle", $cmd, "command"); - $args{stats}->{unknown_client_command}++ if $args{stats}; - return; - } - - @{$session}{qw(cmd key flags exptime)} - = ($cmd, $key, $flags, $exptime); - $session->{host} = $packet->{src_host}; - $session->{pos_in_log} = $packet->{pos_in_log}; - $session->{ts} = $packet->{ts}; - } - else { - PTDEBUG && _d('Session state: ', $session->{state}); - $val = $packet->{data}; - } - - $session->{state} = 'awaiting reply'; # Assume we got the whole packet - if ( $val ) { - if ( $session->{bytes} + 2 == length($val) ) { # +2 for the \r\n - PTDEBUG && _d('Complete send'); - $val =~ s/\r\n\Z//; # We got the whole thing. - $session->{val} = $val; - } - else { # We apparently did NOT get the whole thing. - PTDEBUG && _d('Partial send, saving for later'); - push @{$session->{partial}}, - [ $packet->{seq}, $val ]; - $session->{gathered} += length($val); - PTDEBUG && _d('Gathered', $session->{gathered}, 'bytes in', - scalar(@{$session->{partial}}), 'packets from client'); - if ( $session->{gathered} >= $session->{bytes} + 2 ) { # Done. - PTDEBUG && _d('Message looks complete now, saving value'); - $val = join('', - map { $_->[1] } - sort { $a->[0] <=> $b->[0] } - @{$session->{partial}}); - $val =~ s/\r\n\Z//; - $session->{val} = $val; - } - else { - PTDEBUG && _d('Message not complete'); - $val = '[INCOMPLETE]'; - $session->{state} = 'partial send'; - } - } - } - - return $event; -} - -sub make_event { - my ( $session, $packet ) = @_; - my $event = { - cmd => $session->{cmd}, - key => $session->{key}, - val => $session->{val} || '', - res => $session->{res}, - ts => $session->{ts}, - host => $session->{host}, - flags => $session->{flags} || 0, - exptime => $session->{exptime} || 0, - bytes => $session->{bytes} || 0, - Query_time => timestamp_diff($session->{ts}, $packet->{ts}), - pos_in_log => $session->{pos_in_log}, - }; - return $event; -} - -sub _get_errors_fh { - my ( $self ) = @_; - my $errors_fh = $self->{errors_fh}; - return $errors_fh if $errors_fh; - - my $o = $self->{o}; - if ( $o && $o->has('tcpdump-errors') && $o->got('tcpdump-errors') ) { - my $errors_file = $o->get('tcpdump-errors'); - PTDEBUG && _d('tcpdump-errors file:', $errors_file); - open $errors_fh, '>>', $errors_file - or die "Cannot open tcpdump-errors file $errors_file: $OS_ERROR"; - } - - $self->{errors_fh} = $errors_fh; - return $errors_fh; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -sub timestamp_diff { - my ( $start, $end ) = @_; - my $sd = substr($start, 0, 11, ''); - my $ed = substr($end, 0, 11, ''); - my ( $sh, $sm, $ss ) = split(/:/, $start); - my ( $eh, $em, $es ) = split(/:/, $end); - my $esecs = ($eh * 3600 + $em * 60 + $es); - my $ssecs = ($sh * 3600 + $sm * 60 + $ss); - if ( $sd eq $ed ) { - return sprintf '%.6f', $esecs - $ssecs; - } - else { # Assume only one day boundary has been crossed, no DST, etc - return sprintf '%.6f', ( 86_400 - $ssecs ) + $esecs; - } -} - -1; -} -# ########################################################################### -# End MemcachedProtocolParser package -# ########################################################################### - -# ########################################################################### -# MemcachedEvent package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/MemcachedEvent.pm -# t/lib/MemcachedEvent.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package MemcachedEvent; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant PTDEBUG => $ENV{PTDEBUG} || 0; - -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -my %cmds = map { $_ => 1 } qw( - set - add - replace - append - prepend - cas - get - gets - delete - incr - decr -); - -my %cmd_handler_for = ( - set => \&handle_storage_cmd, - add => \&handle_storage_cmd, - replace => \&handle_storage_cmd, - append => \&handle_storage_cmd, - prepend => \&handle_storage_cmd, - cas => \&handle_storage_cmd, - get => \&handle_retr_cmd, - gets => \&handle_retr_cmd, -); - -sub new { - my ( $class, %args ) = @_; - my $self = {}; - return bless $self, $class; -} - -sub parse_event { - my ( $self, %args ) = @_; - my $event = $args{event}; - return unless $event; - - if ( !$event->{cmd} || !$event->{key} ) { - PTDEBUG && _d('Event has no cmd or key:', Dumper($event)); - return; - } - - if ( !$cmds{$event->{cmd}} ) { - PTDEBUG && _d("Don't know how to handle cmd:", $event->{cmd}); - return; - } - - $event->{arg} = "$event->{cmd} $event->{key}"; - $event->{fingerprint} = $self->fingerprint($event->{arg}); - $event->{key_print} = $self->fingerprint($event->{key}); - - map { $event->{"Memc_$_"} = 'No' } keys %cmds; - $event->{"Memc_$event->{cmd}"} = 'Yes'; # Got this cmd. - $event->{Memc_error} = 'No'; # A handler may change this. - $event->{Memc_miss} = 'No'; - if ( $event->{res} ) { - $event->{Memc_miss} = 'Yes' if $event->{res} eq 'NOT_FOUND'; - } - else { - PTDEBUG && _d('Event has no res:', Dumper($event)); - } - - if ( $cmd_handler_for{$event->{cmd}} ) { - return $cmd_handler_for{$event->{cmd}}->($event); - } - - return $event; -} - -sub fingerprint { - my ( $self, $val ) = @_; - $val =~ s/[0-9A-Fa-f]{16,}|\d+/?/g; - return $val; -} - -sub handle_storage_cmd { - my ( $event ) = @_; - - if ( !$event->{res} ) { - PTDEBUG && _d('No result for event:', Dumper($event)); - return; - } - - $event->{'Memc_Not_Stored'} = $event->{res} eq 'NOT_STORED' ? 'Yes' : 'No'; - $event->{'Memc_Exists'} = $event->{res} eq 'EXISTS' ? 'Yes' : 'No'; - - return $event; -} - -sub handle_retr_cmd { - my ( $event ) = @_; - - if ( !$event->{res} ) { - PTDEBUG && _d('No result for event:', Dumper($event)); - return; - } - - $event->{'Memc_error'} = $event->{res} eq 'INTERRUPTED' ? 'Yes' : 'No'; - - return $event; -} - - -sub handle_delete { - my ( $event ) = @_; - return $event; -} - -sub handle_incr_decr_cmd { - my ( $event ) = @_; - return $event; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End MemcachedEvent package -# ########################################################################### - # ########################################################################### # BinaryLogParser package # This package is a copy without comments from the original. The original @@ -13793,7 +12662,6 @@ sub main { } $o->set('order-by', \@orderby); - my $run_time_mode = lc $o->get('run-time-mode'); my $run_time_interval; eval { @@ -14143,10 +13011,6 @@ sub main { binlog => ['BinaryLogParser'], genlog => ['GeneralLogParser'], tcpdump => ['TcpdumpParser','MySQLProtocolParser'], - memcached => ['TcpdumpParser','MemcachedProtocolParser', - 'MemcachedEvent'], - http => ['TcpdumpParser','HTTPProtocolParser'], - pglog => ['PgLogParser'], rawlog => ['RawLogParser'], ); my $type = $o->get('type'); @@ -14455,37 +13319,6 @@ sub main { } if ( PTDEBUG ) { - # Print statistics about internal counters. This option is mostly for - # development and debugging. The statistics report is printed for each - # iteration after all other reports, even if no events are processed or - # C<--no-report> is specified. The statistics report looks like: - - # No events processed. - - # Statistic Count %/Events - # ================================================ ====== ======== - # events_read 142030 100.00 - # events_parsed 50430 35.51 - # events_aggregated 0 0.00 - # ignored_midstream_server_response 18111 12.75 - # no_tcp_data 91600 64.49 - # pipeline_restarted_after_MemcachedProtocolParser 142030 100.00 - # pipeline_restarted_after_TcpdumpParser 1 0.00 - # unknown_client_command 1 0.00 - # unknown_client_data 32318 22.75 - - # The first column is the internal counter name; the second column is counter's - # count; and the third column is the count as a percentage of C. - - # In this case, it shows why no events were processed/aggregated: 100% of events - # were rejected by the C. Of those, 35.51% were data - # packets, but of these 12.75% of ignored mid-stream server response, one was - # an unknown client command, and 22.75% were unknown client data. The other - # 64.49% were TCP control packets (probably most ACKs). - - # Since pt-query-digest is complex, you will probably need someone familiar - # with its code to decipher the statistics report. - if ( keys %stats ) { my $report = new ReportFormatter( line_width => 74, @@ -15439,38 +14272,36 @@ if ( !caller ) { exit main(@ARGV); } =head1 NAME -pt-query-digest - Analyze query execution logs and generate a query report, filter, replay, or transform queries for MySQL, PostgreSQL, memcached, and more. +pt-query-digest - Analyze MySQL queries from logs, processlist, and tcpdump. =head1 SYNOPSIS -Usage: pt-query-digest [OPTION...] [FILE] +Usage: pt-query-digest [OPTIONS] [FILES] [DSN] -pt-query-digest parses and analyzes MySQL log files. With no FILE, or when -FILE is -, it read standard input. +pt-query-digest analyzes queries from MySQL slow, general, and binary log +files. It can also analyze queries from C and MySQL +protocol data from tcpdump. By default, queries are grouped by fingerprint +and reported in descending order of query time (i.e. the slowest queries +first). If no C are given, the tool reads C. The optional +C is used for certain options like L<"--since"> and L<"--until">. -Analyze, aggregate, and report on a slow query log: +Report the slowest queries from C: - pt-query-digest /path/to/slow.log + pt-query-digest slow.log -Review a slow log, saving results to the test.query_review table in a MySQL -server running on host1. See L<"--review"> for more on reviewing queries: +Report the slowest queries from the processlist on host1: - pt-query-digest --review h=host1 --review-table test.query_review - --history-table test.query_history /path/to/slow.log + pt-query-digest --processlist h=host1 -Print the structure of events so you can construct a complex L<"--filter">: +Capture MySQL protocol data with tcppdump, then report the slowest queries: - pt-query-digest /path/to/slow.log --no-report \ - --filter 'print Dumper($event)' + tcpdump -s 65535 -x -nn -q -tttt -i any -c 1000 port 3306 > mysql.tcp.txt -Watch SHOW FULL PROCESSLIST and output a log in slow query log format: + pt-query-digest --type tcpdump mysql.tcp.txt - pt-query-digest --processlist h=host1 --print --no-report +Save query data from C to host2 for later review and trend analysis: -The default aggregation and analysis is CPU and memory intensive. Disable it if -you don't need the default report: - - pt-query-digest --no-report + pt-query-digest --review h=host2 --no-report slow.log =head1 RISKS @@ -15511,9 +14342,6 @@ this is that you can keep track of changes to your server's queries and avoid repeated work. You can also save other information with the queries, such as comments, issue numbers in your ticketing system, and so on. -Note that this is a work in *very* active progress and you should expect -incompatible changes in the future. - =head1 ATTRIBUTES pt-query-digest works on events, which are a collection of key/value pairs @@ -15544,25 +14372,12 @@ automatically appears in the output: Attributes created this way can be specified for L<"--order-by"> or any option that requires an attribute. -=head2 memcached - -memcached events have additional attributes related to the memcached protocol: -cmd, key, res (result) and val. Also, boolean attributes are created for -the various commands, misses and errors: Memc_CMD where CMD is a memcached -command (get, set, delete, etc.), Memc_error and Memc_miss. - -These attributes are no different from slow log attributes, so you can use them -with L<"--[no]report">, L<"--group-by">, in a L<"--filter">, etc. - -See the memcached section of L<"ATTRIBUTES REFERENCE"> for a list of -memcached-specific attributes. - =head1 OUTPUT -The default output is a query analysis report. The L<"--[no]report"> option -controls whether or not this report is printed. Sometimes you may wish to -parse all the queries but suppress the report, for example when using -L<"--print"> or L<"--review">. +The default L<"--output"> is a query analysis report. The L<"--[no]report"> +option controls whether or not this report is printed. Sometimes you may +want to parse all the queries but suppress the report, for example when using +L<"--review">. There is one paragraph for each class of query analyzed. A "class" of queries all have the same value for the L<"--group-by"> attribute which is @@ -15620,10 +14435,14 @@ select the reviewed query's details from the database with a query like C