pqd: Remove --type pglog, memcached, and http

This commit is contained in:
Brian Fraser
2013-02-25 12:13:35 -03:00
parent 45a21acc0c
commit 8e449a1c83
75 changed files with 12 additions and 224362 deletions

View File

@@ -1,242 +0,0 @@
# This program is copyright 2009-2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# HTTPProtocolParser package
# ###########################################################################
{
# Package: HTTPProtocolParser
# HTTPProtocolParser parses HTTP traffic from tcpdump files.
package HTTPProtocolParser;
use base 'ProtocolParser';
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
# server is the "host:port" of the sever being watched. It's auto-guessed if
# not specified.
sub new {
my ( $class, %args ) = @_;
my $self = $class->SUPER::new(
%args,
port => 80,
);
return $self;
}
# Handles a packet from the server given the state of the session. Returns an
# event if one was ready to be created, otherwise returns nothing.
sub _packet_from_server {
my ( $self, $packet, $session, $misc ) = @_;
die "I need a packet" unless $packet;
die "I need a session" unless $session;
PTDEBUG && _d('Packet is from server; client state:', $session->{state});
# If there's no session state, then we're catching a server response
# mid-stream.
if ( !$session->{state} ) {
PTDEBUG && _d('Ignoring mid-stream server response');
return;
}
if ( $session->{out_of_order} ) {
# We're waiting for the header so we can get the content length.
# Once we know this, we can determine how many out of order packets
# we need to complete the request, then order them and re-process.
my ($line1, $content);
if ( !$session->{have_header} ) {
($line1, $content) = $self->_parse_header(
$session, $packet->{data}, $packet->{data_len});
}
if ( $line1 ) {
$session->{have_header} = 1;
$packet->{content_len} = length $content;
PTDEBUG && _d('Got out of order header with',
$packet->{content_len}, 'bytes of content');
}
my $have_len = $packet->{content_len} || $packet->{data_len};
map { $have_len += $_->{data_len} }
@{$session->{packets}};
$session->{have_all_packets}
= 1 if $session->{attribs}->{bytes}
&& $have_len >= $session->{attribs}->{bytes};
PTDEBUG && _d('Have', $have_len, 'of', $session->{attribs}->{bytes});
return;
}
# Assume that the server is returning only one value.
# TODO: make it handle multiple.
if ( $session->{state} eq 'awaiting reply' ) {
# Save this early because we may return early if the packets
# are being received out of order. Also, save it only once
# in case we re-process packets if they're out of order.
$session->{start_reply} = $packet->{ts} unless $session->{start_reply};
# Get first line of header and first chunk of contents/data.
my ($line1, $content) = $self->_parse_header($session, $packet->{data},
$packet->{data_len});
# The reponse, when in order, is text header followed by data.
# If there's no line1, then we didn't get the text header first
# which means we're getting the response in out of order packets.
if ( !$line1 ) {
$session->{out_of_order} = 1; # alert parent
$session->{have_all_packets} = 0;
return;
}
# First line should be: version code phrase
# E.g.: HTTP/1.1 200 OK
my ($version, $code, $phrase) = $line1 =~ m/(\S+)/g;
$session->{attribs}->{Status_code} = $code;
PTDEBUG && _d('Status code for last', $session->{attribs}->{arg},
'request:', $session->{attribs}->{Status_code});
my $content_len = $content ? length $content : 0;
PTDEBUG && _d('Got', $content_len, 'bytes of content');
if ( $session->{attribs}->{bytes}
&& $content_len < $session->{attribs}->{bytes} ) {
$session->{data_len} = $session->{attribs}->{bytes};
$session->{buff} = $content;
$session->{buff_left} = $session->{attribs}->{bytes} - $content_len;
PTDEBUG && _d('Contents not complete,', $session->{buff_left},
'bytes left');
$session->{state} = 'recving content';
return;
}
}
elsif ( $session->{state} eq 'recving content' ) {
if ( $session->{buff} ) {
PTDEBUG && _d('Receiving content,', $session->{buff_left},
'bytes left');
return;
}
PTDEBUG && _d('Contents received');
}
else {
# TODO:
warn "Server response in unknown state";
return;
}
PTDEBUG && _d('Creating event, deleting session');
$session->{end_reply} = $session->{ts_max} || $packet->{ts};
my $event = $self->make_event($session, $packet);
delete $self->{sessions}->{$session->{client}}; # http is stateless!
return $event;
}
# Handles a packet from the client given the state of the session.
sub _packet_from_client {
my ( $self, $packet, $session, $misc ) = @_;
die "I need a packet" unless $packet;
die "I need a session" unless $session;
PTDEBUG && _d('Packet is from client; state:', $session->{state});
my $event;
if ( ($session->{state} || '') =~ m/awaiting / ) {
PTDEBUG && _d('More client headers:', $packet->{data});
return;
}
if ( !$session->{state} ) {
$session->{state} = 'awaiting reply';
my ($line1, undef) = $self->_parse_header($session, $packet->{data}, $packet->{data_len});
# First line should be: request page version
# E.g.: GET /foo.html HTTP/1.1
my ($request, $page, $version) = $line1 =~ m/(\S+)/g;
if ( !$request || !$page ) {
PTDEBUG && _d("Didn't get a request or page:", $request, $page);
return;
}
$request = lc $request;
my $vh = $session->{attribs}->{Virtual_host} || '';
my $arg = "$request $vh$page";
PTDEBUG && _d('arg:', $arg);
if ( $request eq 'get' || $request eq 'post' ) {
@{$session->{attribs}}{qw(arg)} = ($arg);
}
else {
PTDEBUG && _d("Don't know how to handle a", $request, "request");
return;
}
$session->{start_request} = $packet->{ts};
$session->{attribs}->{host} = $packet->{src_host};
$session->{attribs}->{pos_in_log} = $packet->{pos_in_log};
$session->{attribs}->{ts} = $packet->{ts};
}
else {
# TODO:
die "Probably multiple GETs from client before a server response?";
}
return $event;
}
sub _parse_header {
my ( $self, $session, $data, $len, $no_recurse ) = @_;
die "I need data" unless $data;
my ($header, $content) = split(/\r\n\r\n/, $data);
my ($line1, $header_vals) = $header =~ m/\A(\S+ \S+ .+?)\r\n(.+)?/s;
PTDEBUG && _d('HTTP header:', $line1);
return unless $line1;
if ( !$header_vals ) {
PTDEBUG && _d('No header vals');
return $line1, undef;
}
my @headers;
foreach my $val ( split(/\r\n/, $header_vals) ) {
last unless $val;
# Capture and save any useful header values.
PTDEBUG && _d('HTTP header:', $val);
if ( $val =~ m/^Content-Length/i ) {
($session->{attribs}->{bytes}) = $val =~ /: (\d+)/;
PTDEBUG && _d('Saved Content-Length:', $session->{attribs}->{bytes});
}
if ( $val =~ m/Content-Encoding/i ) {
($session->{compressed}) = $val =~ /: (\w+)/;
PTDEBUG && _d('Saved Content-Encoding:', $session->{compressed});
}
if ( $val =~ m/^Host/i ) {
# The "host" attribute is already taken, so we call this "domain".
($session->{attribs}->{Virtual_host}) = $val =~ /: (\S+)/;
PTDEBUG && _d('Saved Host:', ($session->{attribs}->{Virtual_host}));
}
}
return $line1, $content;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End HTTPProtocolParser package
# ###########################################################################

View File

@@ -1,216 +0,0 @@
# This program is copyright 2009-2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# MemcachedEvent package
# ###########################################################################
{
# Package: MemcachedEvent
# MemcachedEvent creates events from <MemcachedProtocolParser> data.
# Since memcached is not strictly MySQL stuff, we have to
# fabricate MySQL-like query events from memcached.
#
# See http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt
# for information about the memcached protocol.
package MemcachedEvent;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
# cmds that we know how to handle.
my %cmds = map { $_ => 1 } qw(
set
add
replace
append
prepend
cas
get
gets
delete
incr
decr
);
my %cmd_handler_for = (
set => \&handle_storage_cmd,
add => \&handle_storage_cmd,
replace => \&handle_storage_cmd,
append => \&handle_storage_cmd,
prepend => \&handle_storage_cmd,
cas => \&handle_storage_cmd,
get => \&handle_retr_cmd,
gets => \&handle_retr_cmd,
);
sub new {
my ( $class, %args ) = @_;
my $self = {};
return bless $self, $class;
}
# Given an event from MemcachedProtocolParser, returns an event
# more suitable for mk-query-digest.
sub parse_event {
my ( $self, %args ) = @_;
my $event = $args{event};
return unless $event;
if ( !$event->{cmd} || !$event->{key} ) {
PTDEBUG && _d('Event has no cmd or key:', Dumper($event));
return;
}
if ( !$cmds{$event->{cmd}} ) {
PTDEBUG && _d("Don't know how to handle cmd:", $event->{cmd});
return;
}
# For a normal event, arg is the query. For memcached, the "query" is
# essentially the cmd and key, so this becomes arg. E.g.: "set mk_key".
$event->{arg} = "$event->{cmd} $event->{key}";
$event->{fingerprint} = $self->fingerprint($event->{arg});
$event->{key_print} = $self->fingerprint($event->{key});
# Set every cmd so that aggregated totals will be correct. If we only
# set cmd that we get, then all cmds will show as 100% in the report.
# This will create a lot of 0% cmds, but --[no]zero-bool will remove them.
# Think of events in a Percona-patched log: the attribs like Full_scan are
# present for every event.
map { $event->{"Memc_$_"} = 'No' } keys %cmds;
$event->{"Memc_$event->{cmd}"} = 'Yes'; # Got this cmd.
$event->{Memc_error} = 'No'; # A handler may change this.
$event->{Memc_miss} = 'No';
if ( $event->{res} ) {
$event->{Memc_miss} = 'Yes' if $event->{res} eq 'NOT_FOUND';
}
else {
# This normally happens with incr and decr cmds.
PTDEBUG && _d('Event has no res:', Dumper($event));
}
# Handle special results, errors, etc. The handler should return the
# event on success, or nothing on failure.
if ( $cmd_handler_for{$event->{cmd}} ) {
return $cmd_handler_for{$event->{cmd}}->($event);
}
return $event;
}
# Replace things that look like placeholders with a ?
sub fingerprint {
my ( $self, $val ) = @_;
$val =~ s/[0-9A-Fa-f]{16,}|\d+/?/g;
return $val;
}
# Possible results for storage cmds:
# - "STORED\r\n", to indicate success.
#
# - "NOT_STORED\r\n" to indicate the data was not stored, but not
# because of an error. This normally means that either that the
# condition for an "add" or a "replace" command wasn't met, or that the
# item is in a delete queue (see the "delete" command below).
#
# - "EXISTS\r\n" to indicate that the item you are trying to store with
# a "cas" command has been modified since you last fetched it.
#
# - "NOT_FOUND\r\n" to indicate that the item you are trying to store
# with a "cas" command did not exist or has been deleted.
sub handle_storage_cmd {
my ( $event ) = @_;
# There should be a result for any storage cmd.
if ( !$event->{res} ) {
PTDEBUG && _d('No result for event:', Dumper($event));
return;
}
$event->{'Memc_Not_Stored'} = $event->{res} eq 'NOT_STORED' ? 'Yes' : 'No';
$event->{'Memc_Exists'} = $event->{res} eq 'EXISTS' ? 'Yes' : 'No';
return $event;
}
# Technically, the only results for a retrieval cmd are the values requested.
# "If some of the keys appearing in a retrieval request are not sent back
# by the server in the item list this means that the server does not
# hold items with such keys (because they were never stored, or stored
# but deleted to make space for more items, or expired, or explicitly
# deleted by a client)."
# Contrary to this, MemcacedProtocolParser will set res='VALUE' on
# success, res='NOT_FOUND' on failure, or res='INTERRUPTED' if the get
# didn't finish.
sub handle_retr_cmd {
my ( $event ) = @_;
# There should be a result for any retr cmd.
if ( !$event->{res} ) {
PTDEBUG && _d('No result for event:', Dumper($event));
return;
}
$event->{'Memc_error'} = $event->{res} eq 'INTERRUPTED' ? 'Yes' : 'No';
return $event;
}
# handle_delete() and handle_incr_decr_cmd() are stub subs in case we
# need them later.
# Possible results for a delete cmd:
# - "DELETED\r\n" to indicate success
#
# - "NOT_FOUND\r\n" to indicate that the item with this key was not
# found.
sub handle_delete {
my ( $event ) = @_;
return $event;
}
# Possible results for an incr or decr cmd:
# - "NOT_FOUND\r\n" to indicate the item with this value was not found
#
# - <value>\r\n , where <value> is the new value of the item's data,
# after the increment/decrement operation was carried out.
# On success, MemcachedProtocolParser sets res='' and val=the new val.
# On failure, res=the result and val=''.
sub handle_incr_decr_cmd {
my ( $event ) = @_;
return $event;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End MemcachedEvent package
# ###########################################################################

View File

@@ -1,424 +0,0 @@
# This program is copyright 2007-2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# MemcachedProtocolParser package
# ###########################################################################
{
# Package: MemcachedProtocolParser
# MemcachedProtocolParser parses memcached events from tcpdump files.
package MemcachedProtocolParser;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
sub new {
my ( $class, %args ) = @_;
my $self = {
server => $args{server},
port => $args{port} || '11211',
sessions => {},
o => $args{o},
};
return bless $self, $class;
}
# The packet arg should be a hashref from TcpdumpParser::parse_event().
# misc is a placeholder for future features.
sub parse_event {
my ( $self, %args ) = @_;
my @required_args = qw(event);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my $packet = @args{@required_args};
# Return early if there's no TCP data. These are usually ACK packets, but
# they could also be FINs in which case, we should close and delete the
# client's session.
# TODO: It seems we don't handle FIN here? So I moved this code block here.
if ( $packet->{data_len} == 0 ) {
PTDEBUG && _d('No TCP data');
$args{stats}->{no_tcp_data}++ if $args{stats};
return;
}
my $src_host = "$packet->{src_host}:$packet->{src_port}";
my $dst_host = "$packet->{dst_host}:$packet->{dst_port}";
if ( my $server = $self->{server} ) { # Watch only the given server.
$server .= ":$self->{port}";
if ( $src_host ne $server && $dst_host ne $server ) {
PTDEBUG && _d('Packet is not to or from', $server);
$args{stats}->{not_watched_server}++ if $args{stats};
return;
}
}
# Auto-detect the server by looking for port 11211
my $packet_from;
my $client;
if ( $src_host =~ m/:$self->{port}$/ ) {
$packet_from = 'server';
$client = $dst_host;
}
elsif ( $dst_host =~ m/:$self->{port}$/ ) {
$packet_from = 'client';
$client = $src_host;
}
else {
warn 'Packet is not to or from memcached server: ', Dumper($packet);
return;
}
PTDEBUG && _d('Client:', $client);
# Get the client's session info or create a new session if the
# client hasn't been seen before.
if ( !exists $self->{sessions}->{$client} ) {
PTDEBUG && _d('New session');
$self->{sessions}->{$client} = {
client => $client,
state => undef,
raw_packets => [],
# ts -- wait for ts later.
};
};
my $session = $self->{sessions}->{$client};
# Save raw packets to dump later in case something fails.
push @{$session->{raw_packets}}, $packet->{raw_packet};
# Finally, parse the packet and maybe create an event.
$packet->{data} = pack('H*', $packet->{data});
my $event;
if ( $packet_from eq 'server' ) {
$event = $self->_packet_from_server($packet, $session, %args);
}
elsif ( $packet_from eq 'client' ) {
$event = $self->_packet_from_client($packet, $session, %args);
}
else {
# Should not get here.
$args{stats}->{unknown_packet_origin}++ if $args{stats};
die 'Packet origin unknown';
}
PTDEBUG && _d('Done with packet; event:', Dumper($event));
$args{stats}->{events_parsed}++ if $args{stats};
return $event;
}
# Handles a packet from the server given the state of the session. Returns an
# event if one was ready to be created, otherwise returns nothing.
sub _packet_from_server {
my ( $self, $packet, $session, %args ) = @_;
die "I need a packet" unless $packet;
die "I need a session" unless $session;
PTDEBUG && _d('Packet is from server; client state:', $session->{state});
my $data = $packet->{data};
# If there's no session state, then we're catching a server response
# mid-stream.
if ( !$session->{state} ) {
PTDEBUG && _d('Ignoring mid-stream server response');
$args{stats}->{ignored_midstream_server_response}++ if $args{stats};
return;
}
# Assume that the server is returning only one value. TODO: make it
# handle multi-gets.
if ( $session->{state} eq 'awaiting reply' ) {
PTDEBUG && _d('State is awaiting reply');
# \r\n == 0d0a
my ($line1, $rest) = $packet->{data} =~ m/\A(.*?)\r\n(.*)?/s;
if ( !$line1 ) {
$args{stats}->{unknown_server_data}++ if $args{stats};
die "Unknown memcached data from server";
}
# Split up the first line into its parts.
my @vals = $line1 =~ m/(\S+)/g;
$session->{res} = shift @vals;
PTDEBUG && _d('Result of last', $session->{cmd}, 'cmd:', $session->{res});
if ( $session->{cmd} eq 'incr' || $session->{cmd} eq 'decr' ) {
PTDEBUG && _d('It is an incr or decr');
if ( $session->{res} !~ m/\D/ ) { # It's an integer, not an error
PTDEBUG && _d('Got a value for the incr/decr');
$session->{val} = $session->{res};
$session->{res} = '';
}
}
elsif ( $session->{res} eq 'VALUE' ) {
PTDEBUG && _d('It is the result of a "get"');
my ($key, $flags, $bytes) = @vals;
defined $session->{flags} or $session->{flags} = $flags;
defined $session->{bytes} or $session->{bytes} = $bytes;
# Get the value from the $rest.
# TODO: there might be multiple responses
if ( $rest && $bytes ) {
PTDEBUG && _d('There is a value');
if ( length($rest) > $bytes ) {
PTDEBUG && _d('Got complete response');
$session->{val} = substr($rest, 0, $bytes);
}
else {
PTDEBUG && _d('Got partial response, saving for later');
push @{$session->{partial}}, [ $packet->{seq}, $rest ];
$session->{gathered} += length($rest);
$session->{state} = 'partial recv';
return; # Prevent firing an event.
}
}
}
elsif ( $session->{res} eq 'END' ) {
# Technically NOT_FOUND is an error, and this isn't an error it's just
# a NULL, but what it really means is the value isn't found.
PTDEBUG && _d('Got an END without any data, firing NOT_FOUND');
$session->{res} = 'NOT_FOUND';
}
elsif ( $session->{res} !~ m/STORED|DELETED|NOT_FOUND/ ) {
# Not really sure what else would get us here... want to make a note
# and not have an uncaught condition.
PTDEBUG && _d('Unknown result');
}
else {
$args{stats}->{unknown_server_response}++ if $args{stats};
}
}
else { # Should be 'partial recv'
PTDEBUG && _d('Session state: ', $session->{state});
push @{$session->{partial}}, [ $packet->{seq}, $data ];
$session->{gathered} += length($data);
PTDEBUG && _d('Gathered', $session->{gathered}, 'bytes in',
scalar(@{$session->{partial}}), 'packets from server');
if ( $session->{gathered} >= $session->{bytes} + 2 ) { # Done.
PTDEBUG && _d('End of partial response, preparing event');
my $val = join('',
map { $_->[1] }
# Sort in proper sequence because TCP might reorder them.
sort { $a->[0] <=> $b->[0] }
@{$session->{partial}});
$session->{val} = substr($val, 0, $session->{bytes});
}
else {
PTDEBUG && _d('Partial response continues, no action');
return; # Prevent firing event.
}
}
PTDEBUG && _d('Creating event, deleting session');
my $event = make_event($session, $packet);
delete $self->{sessions}->{$session->{client}}; # memcached is stateless!
$session->{raw_packets} = []; # Avoid keeping forever
return $event;
}
# Handles a packet from the client given the state of the session.
sub _packet_from_client {
my ( $self, $packet, $session, %args ) = @_;
die "I need a packet" unless $packet;
die "I need a session" unless $session;
PTDEBUG && _d('Packet is from client; state:', $session->{state});
my $event;
if ( ($session->{state} || '') =~m/awaiting reply|partial recv/ ) {
# Whoa, we expected something from the server, not the client. Fire an
# INTERRUPTED with what we've got, and create a new session.
PTDEBUG && _d("Expected data from the client, looks like interrupted");
$session->{res} = 'INTERRUPTED';
$event = make_event($session, $packet);
my $client = $session->{client};
delete @{$session}{keys %$session};
$session->{client} = $client;
}
my ($line1, $val);
my ($cmd, $key, $flags, $exptime, $bytes);
if ( !$session->{state} ) {
PTDEBUG && _d('Session state: ', $session->{state});
# Split up the first line into its parts.
($line1, $val) = $packet->{data} =~ m/\A(.*?)\r\n(.+)?/s;
if ( !$line1 ) {
PTDEBUG && _d('Unknown memcached data from client, skipping packet');
$args{stats}->{unknown_client_data}++ if $args{stats};
return;
}
# TODO: handle <cas unique> and [noreply]
my @vals = $line1 =~ m/(\S+)/g;
$cmd = lc shift @vals;
PTDEBUG && _d('$cmd is a ', $cmd);
if ( $cmd eq 'set' || $cmd eq 'add' || $cmd eq 'replace' ) {
($key, $flags, $exptime, $bytes) = @vals;
$session->{bytes} = $bytes;
}
elsif ( $cmd eq 'get' ) {
($key) = @vals;
if ( $val ) {
PTDEBUG && _d('Multiple cmds:', $val);
$val = undef;
}
}
elsif ( $cmd eq 'delete' ) {
($key) = @vals; # TODO: handle the <queue_time>
if ( $val ) {
PTDEBUG && _d('Multiple cmds:', $val);
$val = undef;
}
}
elsif ( $cmd eq 'incr' || $cmd eq 'decr' ) {
($key) = @vals;
}
else {
PTDEBUG && _d("Don't know how to handle", $cmd, "command");
$args{stats}->{unknown_client_command}++ if $args{stats};
return;
}
@{$session}{qw(cmd key flags exptime)}
= ($cmd, $key, $flags, $exptime);
$session->{host} = $packet->{src_host};
$session->{pos_in_log} = $packet->{pos_in_log};
$session->{ts} = $packet->{ts};
}
else {
PTDEBUG && _d('Session state: ', $session->{state});
$val = $packet->{data};
}
# Handle the rest of the packet. It might not be the whole value that was
# sent, for example for a big set(). We need to look at the number of bytes
# and see if we got it all.
$session->{state} = 'awaiting reply'; # Assume we got the whole packet
if ( $val ) {
if ( $session->{bytes} + 2 == length($val) ) { # +2 for the \r\n
PTDEBUG && _d('Complete send');
$val =~ s/\r\n\Z//; # We got the whole thing.
$session->{val} = $val;
}
else { # We apparently did NOT get the whole thing.
PTDEBUG && _d('Partial send, saving for later');
push @{$session->{partial}},
[ $packet->{seq}, $val ];
$session->{gathered} += length($val);
PTDEBUG && _d('Gathered', $session->{gathered}, 'bytes in',
scalar(@{$session->{partial}}), 'packets from client');
if ( $session->{gathered} >= $session->{bytes} + 2 ) { # Done.
PTDEBUG && _d('Message looks complete now, saving value');
$val = join('',
map { $_->[1] }
# Sort in proper sequence because TCP might reorder them.
sort { $a->[0] <=> $b->[0] }
@{$session->{partial}});
$val =~ s/\r\n\Z//;
$session->{val} = $val;
}
else {
PTDEBUG && _d('Message not complete');
$val = '[INCOMPLETE]';
$session->{state} = 'partial send';
}
}
}
return $event;
}
# The event is not yet suitable for mk-query-digest. It lacks, for example,
# an arg and fingerprint attribute. The event should be passed to
# MemcachedEvent::make_event() to transform it.
sub make_event {
my ( $session, $packet ) = @_;
my $event = {
cmd => $session->{cmd},
key => $session->{key},
val => $session->{val} || '',
res => $session->{res},
ts => $session->{ts},
host => $session->{host},
flags => $session->{flags} || 0,
exptime => $session->{exptime} || 0,
bytes => $session->{bytes} || 0,
Query_time => timestamp_diff($session->{ts}, $packet->{ts}),
pos_in_log => $session->{pos_in_log},
};
return $event;
}
sub _get_errors_fh {
my ( $self ) = @_;
my $errors_fh = $self->{errors_fh};
return $errors_fh if $errors_fh;
# Errors file isn't open yet; try to open it.
my $o = $self->{o};
if ( $o && $o->has('tcpdump-errors') && $o->got('tcpdump-errors') ) {
my $errors_file = $o->get('tcpdump-errors');
PTDEBUG && _d('tcpdump-errors file:', $errors_file);
open $errors_fh, '>>', $errors_file
or die "Cannot open tcpdump-errors file $errors_file: $OS_ERROR";
}
$self->{errors_fh} = $errors_fh;
return $errors_fh;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
# Returns the difference between two tcpdump timestamps. TODO: this is in
# MySQLProtocolParser too, best to factor it out somewhere common.
sub timestamp_diff {
my ( $start, $end ) = @_;
my $sd = substr($start, 0, 11, '');
my $ed = substr($end, 0, 11, '');
my ( $sh, $sm, $ss ) = split(/:/, $start);
my ( $eh, $em, $es ) = split(/:/, $end);
my $esecs = ($eh * 3600 + $em * 60 + $es);
my $ssecs = ($sh * 3600 + $sm * 60 + $ss);
if ( $sd eq $ed ) {
return sprintf '%.6f', $esecs - $ssecs;
}
else { # Assume only one day boundary has been crossed, no DST, etc
return sprintf '%.6f', ( 86_400 - $ssecs ) + $esecs;
}
}
1;
}
# ###########################################################################
# End MemcachedProtocolParser package
# ###########################################################################

View File

@@ -1,669 +0,0 @@
# This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# PgLogParser package
# ###########################################################################
{
# Package: PgLogParser
# PgLogParser parses Postgres logs.
package PgLogParser;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
# This regex is partially inspired by one from pgfouine. But there is no
# documentation on the last capture in that regex, so I omit that. (TODO: that
# actually seems to be for CSV logging.)
# (?:[0-9XPFDBLA]{2}[0-9A-Z]{3}:[\s]+)?
# Here I constrain to match at least two spaces after the severity level,
# because the source code tells me to. I believe this is controlled in elog.c:
# appendStringInfo(&buf, "%s: ", error_severity(edata->elevel));
my $log_line_regex = qr{
(LOG|DEBUG|CONTEXT|WARNING|ERROR|FATAL|PANIC|HINT
|DETAIL|NOTICE|STATEMENT|INFO|LOCATION)
:\s\s+
}x;
# The following are taken right from the comments in postgresql.conf for
# log_line_prefix.
my %attrib_name_for = (
u => 'user',
d => 'db',
r => 'host', # With port
h => 'host',
p => 'Process_id',
t => 'ts',
m => 'ts', # With milliseconds
i => 'Query_type',
c => 'Session_id',
l => 'Line_no',
s => 'Session_id',
v => 'Vrt_trx_id',
x => 'Trx_id',
);
# This class's data structure is a hashref with some statefulness: pending
# lines. This is necessary because we sometimes don't know whether the event is
# complete until we read the next line or even several lines, so we store these.
#
# Another bit of data that's stored in $self is some code to automatically
# translate syslog into plain log format.
sub new {
my ( $class ) = @_;
my $self = {
pending => [],
is_syslog => undef,
next_event => undef,
'tell' => undef,
};
return bless $self, $class;
}
# This method accepts an iterator that contains an open log filehandle. It
# reads events from the filehandle by calling the iterator, and returns the
# events.
#
# Each event is a hashref of attribute => value pairs like:
# my $event = {
# ts => '', # Timestamp
# arg => '', # Argument to the command
# other attributes...
# };
#
# The log format is ideally prefixed with the following:
#
# * timestamp with microseconds
# * session ID, user, database
#
# The format I'd like to see is something like this:
#
# 2010-02-08 15:31:48.685 EST c=4b7074b4.985,u=user,D=database LOG:
#
# However, pgfouine supports user=user, db=database format. And I think
# it should be reasonable to grab pretty much any name=value properties out, and
# handle them based on the lower-cased first character of $name, to match the
# special values that are possible to give for log_line_prefix. For example, %u
# = user, so anything starting with a 'u' should be interpreted as a user.
#
# In general the log format is rather flexible, and we don't know by looking at
# any given line whether it's the last line in the event. So we often have to
# read a line and then decide what to do with the previous line we saw. Thus we
# use 'pending' when necessary but we try to do it as little as possible,
# because it's double work to defer and re-parse lines; and we try to defer as
# soon as possible so we don't have to do as much work.
#
# There are 3 categories of lines in a log file, referred to in the code as case
# 1/2/3:
#
# - Those that start a possibly multi-line event
# - Those that can continue one
# - Those that are neither the start nor the continuation, and thus must be the
# end.
#
# In cases 1 and 3, we have to check whether information from previous lines has
# been accumulated. If it has, we defer the current line and create the event.
# Otherwise we keep going, looking for more lines for the event that begins with
# the current line. Processing the lines is easiest if we arrange the cases in
# this order: 2, 1, 3.
#
# The term "line" is to be interpreted loosely here. Logs that are in syslog
# format might have multi-line "lines" that are handled by the generated
# $next_event closure and given back to the main while-loop with newlines in
# them. Therefore, regexes that match "the rest of the line" generally need the
# /s flag.
sub parse_event {
my ( $self, %args ) = @_;
my @required_args = qw(next_event tell);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
# The subroutine references that wrap the filehandle operations.
my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args);
# These are the properties for the log event, which will later be used to
# create an event hash ref.
my @properties = ();
# Holds the current line being processed, and its position in the log as a
# byte offset from the beginning. In some cases we'll have to reset this
# position later. We'll also have to take a wait-and-see attitude towards
# the $pos_in_log, so we use $new_pos to record where we're working in the
# log, and $pos_in_log to record where the beginning of the current event
# started.
my ($pos_in_log, $line, $was_pending) = $self->get_line();
my $new_pos;
# Sometimes we need to accumulate some lines and then join them together.
# This is used for that.
my @arg_lines;
# This is used to signal that an entire event has been found, and thus exit
# the while loop.
my $done;
# This is used to signal that an event's duration has already been found.
# See the sample file pg-syslog-001.txt and the test for it.
my $got_duration;
# Before we start, we read and discard lines until we get one with a header.
# The only thing we can really count on is that a header line should have
# the header in it. But, we only do this if we aren't in the middle of an
# ongoing event, whose first line was pending.
if ( !$was_pending && (!defined $line || $line !~ m/$log_line_regex/o) ) {
PTDEBUG && _d('Skipping lines until I find a header');
my $found_header;
LINE:
while (
eval {
($new_pos, $line) = $self->get_line();
defined $line;
}
) {
if ( $line =~ m/$log_line_regex/o ) {
$pos_in_log = $new_pos;
last LINE;
}
else {
PTDEBUG && _d('Line was not a header, will fetch another');
}
}
PTDEBUG && _d('Found a header line, now at pos_in_line', $pos_in_log);
}
# We need to keep the line that begins the event we're parsing.
my $first_line;
# This is for holding the type of the log line, which is important for
# choosing the right code to run.
my $line_type;
# Parse each line.
LINE:
while ( !$done && defined $line ) {
# Throw away the newline ending.
chomp $line unless $is_syslog;
# This while loop works with LOG lines. Other lines, such as ERROR and
# so forth, need to be handled outside this loop. The exception is when
# there's nothing in progress in @arg_lines, and the non-LOG line might
# just be something we can get relevant info from.
if ( (($line_type) = $line =~ m/$log_line_regex/o) && $line_type ne 'LOG' ) {
# There's something in progress, so we abort the loop and let it be
# handled specially.
if ( @arg_lines ) {
PTDEBUG && _d('Found a non-LOG line, exiting loop');
last LINE;
}
# There's nothing in @arg_lines, so we save what info we can and keep
# on going.
else {
$first_line ||= $line;
# Handle ERROR and STATEMENT lines...
if ( my ($e) = $line =~ m/ERROR:\s+(\S.*)\Z/s ) {
push @properties, 'Error_msg', $e;
PTDEBUG && _d('Found an error msg, saving and continuing');
($new_pos, $line) = $self->get_line();
next LINE;
}
elsif ( my ($s) = $line =~ m/STATEMENT:\s+(\S.*)\Z/s ) {
push @properties, 'arg', $s, 'cmd', 'Query';
PTDEBUG && _d('Found a statement, finishing up event');
$done = 1;
last LINE;
}
else {
PTDEBUG && _d("I don't know what to do with this line");
}
}
}
# The log isn't just queries. It also has status and informational lines
# in it. We ignore these, but if we see one that's not recognized, we
# warn. These types of things are better off in mk-error-log.
if (
$line =~ m{
Address\sfamily\snot\ssupported\sby\sprotocol
|archived\stransaction\slog\sfile
|autovacuum:\sprocessing\sdatabase
|checkpoint\srecord\sis\sat
|checkpoints\sare\soccurring\stoo\sfrequently\s\(
|could\snot\sreceive\sdata\sfrom\sclient
|database\ssystem\sis\sready
|database\ssystem\sis\sshut\sdown
|database\ssystem\swas\sshut\sdown
|incomplete\sstartup\spacket
|invalid\slength\sof\sstartup\spacket
|next\sMultiXactId:
|next\stransaction\sID:
|received\ssmart\sshutdown\srequest
|recycled\stransaction\slog\sfile
|redo\srecord\sis\sat
|removing\sfile\s"
|removing\stransaction\slog\sfile\s"
|shutting\sdown
|transaction\sID\swrap\slimit\sis
}x
) {
# We get the next line to process and skip the rest of the loop.
PTDEBUG && _d('Skipping this line because it matches skip-pattern');
($new_pos, $line) = $self->get_line();
next LINE;
}
# Possibly reset $first_line, depending on whether it was determined to be
# junk and unset.
$first_line ||= $line;
# Case 2: non-header lines, optionally starting with a TAB, are a
# continuation of the previous line.
if ( $line !~ m/$log_line_regex/o && @arg_lines ) {
if ( !$is_syslog ) {
# We need to translate tabs to newlines. Weirdly, some logs (see
# samples/pg-log-005.txt) have newlines without a leading tab.
# Maybe it's an older log format.
$line =~ s/\A\t?/\n/;
}
# Save the remainder.
push @arg_lines, $line;
PTDEBUG && _d('This was a continuation line');
}
# Cases 1 and 3: These lines start with some optional meta-data, and then
# the $log_line_regex followed by the line's log message. The message can be
# of the form "label: text....". Examples:
# LOG: duration: 1.565 ms
# LOG: statement: SELECT ....
# LOG: duration: 1.565 ms statement: SELECT ....
# In the above examples, the $label is duration, statement, and duration.
elsif (
my ( $sev, $label, $rest )
= $line =~ m/$log_line_regex(.+?):\s+(.*)\Z/so
) {
PTDEBUG && _d('Line is case 1 or case 3');
# This is either a case 1 or case 3. If there's previously gathered
# data in @arg_lines, it doesn't matter which -- we have to create an
# event (a Query event), and we're $done. This is case 0xdeadbeef.
if ( @arg_lines ) {
$done = 1;
PTDEBUG && _d('There are saved @arg_lines, we are done');
# We shouldn't modify @properties based on $line, because $line
# doesn't have anything to do with the stuff in @properties, which
# is all related to the previous line(s). However, there is one
# case in which the line could be part of the event: when it's a
# plain 'duration' line. This happens when the statement is logged
# on one line, and then the duration is logged afterwards. If this
# is true, then we alter @properties, and we do NOT defer the current
# line.
if ( $label eq 'duration' && $rest =~ m/[0-9.]+\s+\S+\Z/ ) {
if ( $got_duration ) {
# Just discard the line.
PTDEBUG && _d('Discarding line, duration already found');
}
else {
push @properties, 'Query_time', $self->duration_to_secs($rest);
PTDEBUG && _d("Line's duration is for previous event:", $rest);
}
}
else {
# We'll come back to this line later.
$self->pending($new_pos, $line);
PTDEBUG && _d('Deferred line');
}
}
# Here we test for case 1, lines that can start a multi-line event.
elsif ( $label =~ m/\A(?:duration|statement|query)\Z/ ) {
PTDEBUG && _d('Case 1: start a multi-line event');
# If it's a duration, then there might be a statement later on the
# same line and the duration applies to that.
if ( $label eq 'duration' ) {
if (
(my ($dur, $stmt)
= $rest =~ m/([0-9.]+ \S+)\s+(?:statement|query): *(.*)\Z/s)
) {
# It does, so we'll pull out the Query_time etc now, rather
# than doing it later, when we might end up in the case above
# (case 0xdeadbeef).
push @properties, 'Query_time', $self->duration_to_secs($dur);
$got_duration = 1;
push @arg_lines, $stmt;
PTDEBUG && _d('Duration + statement');
}
else {
# The duration line is just junk. It's the line after a
# statement, but we never saw the statement (else we'd have
# fallen into 0xdeadbeef above). Discard this line and adjust
# pos_in_log. See t/samples/pg-log-002.txt for an example.
$first_line = undef;
($pos_in_log, $line) = $self->get_line();
PTDEBUG && _d('Line applies to event we never saw, discarding');
next LINE;
}
}
else {
# This isn't a duration line, it's a statement or query. Put it
# onto @arg_lines for later and keep going.
push @arg_lines, $rest;
PTDEBUG && _d('Putting onto @arg_lines');
}
}
# Here is case 3, lines that can't be in case 1 or 2. These surely
# terminate any event that's been accumulated, and if there isn't any
# such, then we just create an event without the overhead of deferring.
else {
$done = 1;
PTDEBUG && _d('Line is case 3, event is done');
# Again, if there's previously gathered data in @arg_lines, we have
# to defer the current line (not touching @properties) and revisit it.
if ( @arg_lines ) {
$self->pending($new_pos, $line);
PTDEBUG && _d('There was @arg_lines, putting line to pending');
}
# Otherwise we can parse the line and put it into @properties.
else {
PTDEBUG && _d('No need to defer, process event from this line now');
push @properties, 'cmd', 'Admin', 'arg', $label;
# For some kinds of log lines, we can grab extra meta-data out of
# the end of the line.
# LOG: connection received: host=[local]
if ( $label =~ m/\A(?:dis)?connection(?: received| authorized)?\Z/ ) {
push @properties, $self->get_meta($rest);
}
else {
die "I don't understand line $line";
}
}
}
}
# If the line isn't case 1, 2, or 3 I don't know what it is.
else {
die "I don't understand line $line";
}
# We get the next line to process.
if ( !$done ) {
($new_pos, $line) = $self->get_line();
}
} # LINE
# If we're at the end of the file, we finish and tell the caller we're done.
if ( !defined $line ) {
PTDEBUG && _d('Line not defined, at EOF; calling oktorun(0) if exists');
$args{oktorun}->(0) if $args{oktorun};
if ( !@arg_lines ) {
PTDEBUG && _d('No saved @arg_lines either, we are all done');
return undef;
}
}
# If we got kicked out of the while loop because of a non-LOG line, we handle
# that line here.
if ( $line_type && $line_type ne 'LOG' ) {
PTDEBUG && _d('Line is not a LOG line');
# ERROR lines come in a few flavors. See t/samples/pg-log-006.txt,
# t/samples/pg-syslog-002.txt, and t/samples/pg-syslog-007.txt for some
# examples. The rules seem to be this: if the ERROR is followed by a
# STATEMENT, and the STATEMENT's statement matches the query in
# @arg_lines, then the STATEMENT message is redundant. (This can be
# caused by various combos of configuration options in postgresql.conf).
# However, if the ERROR's STATEMENT line doesn't match what's in
# @arg_lines, then the ERROR actually starts a new event. If the ERROR is
# followed by another LOG event, then the ERROR also starts a new event.
if ( $line_type eq 'ERROR' ) {
PTDEBUG && _d('Line is ERROR');
# If there's already a statement in processing, then put aside the
# current line, and peek ahead.
if ( @arg_lines ) {
PTDEBUG && _d('There is @arg_lines, will peek ahead one line');
my ( $temp_pos, $temp_line ) = $self->get_line();
my ( $type, $msg );
if (
defined $temp_line
&& ( ($type, $msg) = $temp_line =~ m/$log_line_regex(.*)/o )
&& ( $type ne 'STATEMENT' || $msg eq $arg_lines[-1] )
) {
# Looks like the whole thing is pertaining to the current event
# in progress. Add the error message to the event.
PTDEBUG && _d('Error/statement line pertain to current event');
push @properties, 'Error_msg', $line =~ m/ERROR:\s*(\S.*)\Z/s;
if ( $type ne 'STATEMENT' ) {
PTDEBUG && _d('Must save peeked line, it is a', $type);
$self->pending($temp_pos, $temp_line);
}
}
elsif ( defined $temp_line && defined $type ) {
# Looks like the current and next line are about a new event.
# Put them into pending.
PTDEBUG && _d('Error/statement line are a new event');
$self->pending($new_pos, $line);
$self->pending($temp_pos, $temp_line);
}
else {
PTDEBUG && _d("Unknown line", $line);
}
}
}
else {
PTDEBUG && _d("Unknown line", $line);
}
}
# If $done is true, then some of the above code decided that the full
# event has been found. If we reached the end of the file, then we might
# also have something in @arg_lines, although we didn't find the "line after"
# that signals the event was done. In either case we return an event. This
# should be the only 'return' statement in this block of code.
if ( $done || @arg_lines ) {
PTDEBUG && _d('Making event');
# Finish building the event.
push @properties, 'pos_in_log', $pos_in_log;
# Statement/query lines will be in @arg_lines.
if ( @arg_lines ) {
PTDEBUG && _d('Assembling @arg_lines: ', scalar @arg_lines);
push @properties, 'arg', join('', @arg_lines), 'cmd', 'Query';
}
if ( $first_line ) {
# Handle some meta-data: a timestamp, with optional milliseconds.
if ( my ($ts) = $first_line =~ m/([0-9-]{10} [0-9:.]{8,12})/ ) {
PTDEBUG && _d('Getting timestamp', $ts);
push @properties, 'ts', $ts;
}
# Find meta-data embedded in the log line prefix, in name=value format.
if ( my ($meta) = $first_line =~ m/(.*?)[A-Z]{3,}: / ) {
PTDEBUG && _d('Found a meta-data chunk:', $meta);
push @properties, $self->get_meta($meta);
}
}
# Dump info about what we've found, but don't dump $event; want to see
# full dump of all properties, and after it's been cast into a hash,
# duplicated keys will be gone.
PTDEBUG && _d('Properties of event:', Dumper(\@properties));
my $event = { @properties };
$event->{bytes} = length($event->{arg} || '');
return $event;
}
}
# Parses key=value meta-data from the $meta string, and returns a list of event
# attribute names and values.
sub get_meta {
my ( $self, $meta ) = @_;
my @properties;
foreach my $set ( $meta =~ m/(\w+=[^, ]+)/g ) {
my ($key, $val) = split(/=/, $set);
if ( $key && $val ) {
# The first letter of the name, lowercased, determines the
# meaning of the item.
if ( my $prop = $attrib_name_for{lc substr($key, 0, 1)} ) {
push @properties, $prop, $val;
}
else {
PTDEBUG && _d('Bad meta key', $set);
}
}
else {
PTDEBUG && _d("Can't figure out meta from", $set);
}
}
return @properties;
}
# This subroutine abstracts the process and source of getting a line of text and
# its position in the log file. It might get the line of text from the log; it
# might get it from the @pending array. It also does infinite loop checking
# TODO.
sub get_line {
my ( $self ) = @_;
my ($pos, $line, $was_pending) = $self->pending;
if ( ! defined $line ) {
PTDEBUG && _d('Got nothing from pending, trying the $fh');
my ( $next_event, $tell) = @{$self}{qw(next_event tell)};
eval {
$pos = $tell->();
$line = $next_event->();
};
if ( PTDEBUG && $EVAL_ERROR ) {
_d($EVAL_ERROR);
}
}
PTDEBUG && _d('Got pos/line:', $pos, $line);
return ($pos, $line);
}
# This subroutine defers and retrieves a line/pos pair. If you give it an
# argument it'll set the stored value. If not, it'll get one if there is one
# and return it.
sub pending {
my ( $self, $val, $pos_in_log ) = @_;
my $was_pending;
PTDEBUG && _d('In sub pending, val:', $val);
if ( $val ) {
push @{$self->{pending}}, [$val, $pos_in_log];
}
elsif ( @{$self->{pending}} ) {
($val, $pos_in_log) = @{ shift @{$self->{pending}} };
$was_pending = 1;
}
PTDEBUG && _d('Return from pending:', $val, $pos_in_log);
return ($val, $pos_in_log, $was_pending);
}
# This subroutine manufactures subroutines to automatically translate incoming
# syslog format into standard log format, to keep the main parse_event free from
# having to think about that. For documentation on how this works, see
# SysLogParser.pm.
sub generate_wrappers {
my ( $self, %args ) = @_;
# Reset everything, just in case some cruft was left over from a previous use
# of this object. The object has stateful closures. If this isn't done,
# then they'll keep reading from old filehandles. The sanity check is based
# on the memory address of the closure!
if ( ($self->{sanity} || '') ne "$args{next_event}" ){
PTDEBUG && _d("Clearing and recreating internal state");
eval { require SysLogParser; }; # Required for tests to work.
my $sl = new SysLogParser();
# We need a special item in %args for syslog parsing. (This might not be
# a syslog log file...) See the test for t/samples/pg-syslog-002.txt for
# an example of when this is needed.
$args{misc}->{new_event_test} = sub {
my ( $content ) = @_;
return unless defined $content;
return $content =~ m/$log_line_regex/o;
};
# The TAB at the beginning of the line indicates that there's a newline
# at the end of the previous line.
$args{misc}->{line_filter} = sub {
my ( $content ) = @_;
$content =~ s/\A\t/\n/;
return $content;
};
@{$self}{qw(next_event tell is_syslog)} = $sl->make_closures(%args);
$self->{sanity} = "$args{next_event}";
}
# Return the wrapper functions!
return @{$self}{qw(next_event tell is_syslog)};
}
# This subroutine converts various formats to seconds. Examples:
# 10.870 ms
sub duration_to_secs {
my ( $self, $str ) = @_;
PTDEBUG && _d('Duration:', $str);
my ( $num, $suf ) = split(/\s+/, $str);
my $factor = $suf eq 'ms' ? 1000
: $suf eq 'sec' ? 1
: die("Unknown suffix '$suf'");
return $num / $factor;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End PgLogParser package
# ###########################################################################

View File

@@ -481,9 +481,7 @@ sub query_report {
}
my $log_type = $args{log_type} || '';
my $mark = $log_type eq 'memcached'
|| $log_type eq 'http'
|| $log_type eq 'pglog' ? '' : '\G';
my $mark = '\G';
if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
if ( $item =~ m/^(?:insert|replace)/ ) { # No EXPLAIN

View File

@@ -1,259 +0,0 @@
# This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# SysLogParser package
# ###########################################################################
{
# Package: SysLogParser
# SysLogParser parses events from syslogs.
package SysLogParser;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
# This regex matches the message number, line number, and content of a syslog
# message:
# 2008 Jan 9 16:16:34 hostname postgres[30059]: [13-2] ...content...
my $syslog_regex = qr{\A.*\w+\[\d+\]: \[(\d+)-(\d+)\] (.*)\Z};
# This class generates currying functions that wrap around a standard
# log-parser's next_event() and tell() function pointers. The wrappers behave
# the same way, except that they'll return entire syslog events, instead of
# lines at a time. To use it, do the following:
#
# sub parse_event {
# my ($self, %args) = @_;
# my ($next_event, $tell, $is_syslog) = SysLogParser::make_closures(%args);
# # ... write your code to use the $next_event and $tell here...
# }
#
# If the log isn't in syslog format, $is_syslog will be false and you'll get
# back simple wrappers around the $next_event and $tell functions. (They still
# have to be wrapped, because to find out whether the log is in syslog format,
# the first line has to be examined.)
sub new {
my ( $class ) = @_;
my $self = {};
return bless $self, $class;
}
# This method is here so that SysLogParser can be used and tested in its own
# right. However, its ability to generate wrapper functions probably means that
# it should be used as a translation layer, not directly. You can use this code
# as an example of how to integrate this into other packages.
sub parse_event {
my ( $self, %args ) = @_;
my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args);
return $next_event->();
}
# This is an example of how a class can seamlessly put a syslog translation
# layer underneath itself.
sub generate_wrappers {
my ( $self, %args ) = @_;
# Reset everything, just in case some cruft was left over from a previous use
# of this object. The object has stateful closures. If this isn't done,
# then they'll keep reading from old filehandles. The sanity check is based
# on the memory address of the closure!
if ( ($self->{sanity} || '') ne "$args{next_event}" ){
PTDEBUG && _d("Clearing and recreating internal state");
@{$self}{qw(next_event tell is_syslog)} = $self->make_closures(%args);
$self->{sanity} = "$args{next_event}";
}
# Return the wrapper functions!
return @{$self}{qw(next_event tell is_syslog)};
}
# Make the closures! The $args{misc}->{new_event_test} is an optional
# subroutine reference, which tells the wrapper when to consider a line part of
# a new event, in syslog format, even when it's technically the same syslog
# event. See the test for samples/pg-syslog-002.txt for an example. This
# argument should be passed in via the call to parse_event(). Ditto for
# 'line_filter', which is some processing code to run on every line of content
# in an event.
sub make_closures {
my ( $self, %args ) = @_;
# The following variables will be referred to in the manufactured
# subroutines, making them proper closures.
my $next_event = $args{'next_event'};
my $tell = $args{'tell'};
my $new_event_test = $args{'misc'}->{'new_event_test'};
my $line_filter = $args{'misc'}->{'line_filter'};
# The first thing to do is get a line from the log and see if it's from
# syslog.
my $test_line = $next_event->();
PTDEBUG && _d('Read first sample/test line:', $test_line);
# If it's syslog, we have to generate a moderately elaborate wrapper
# function.
if ( defined $test_line && $test_line =~ m/$syslog_regex/o ) {
# Within syslog-parsing subroutines, we'll use LLSP (low-level syslog
# parser) as a PTDEBUG line prefix.
PTDEBUG && _d('This looks like a syslog line, PTDEBUG prefix=LLSP');
# Grab the interesting bits out of the test line, and save the result.
my ($msg_nr, $line_nr, $content) = $test_line =~ m/$syslog_regex/o;
my @pending = ($test_line);
my $last_msg_nr = $msg_nr;
my $pos_in_log = 0;
# Generate the subroutine for getting a full log message without syslog
# breaking it across multiple lines.
my $new_next_event = sub {
PTDEBUG && _d('LLSP: next_event()');
# Keeping the pos_in_log variable right is a bit tricky! In general,
# we have to tell() the filehandle before trying to read from it,
# getting the position before the data we've just read. The simple
# rule is that when we push something onto @pending, which we almost
# always do, then $pos_in_log should point to the beginning of that
# saved content in the file.
PTDEBUG && _d('LLSP: Current virtual $fh position:', $pos_in_log);
my $new_pos = 0;
# @arg_lines is where we store up the content we're about to return.
# It contains $content; @pending contains a single saved $line.
my @arg_lines;
# Here we actually examine lines until we have found a complete event.
my $line;
LINE:
while (
defined($line = shift @pending)
|| do {
# Save $new_pos, because when we hit EOF we can't $tell->()
# anymore.
eval { $new_pos = -1; $new_pos = $tell->() };
defined($line = $next_event->());
}
) {
PTDEBUG && _d('LLSP: Line:', $line);
# Parse the line.
($msg_nr, $line_nr, $content) = $line =~ m/$syslog_regex/o;
if ( !$msg_nr ) {
die "Can't parse line: $line";
}
# The message number has changed -- thus, new message.
elsif ( $msg_nr != $last_msg_nr ) {
PTDEBUG && _d('LLSP: $msg_nr', $last_msg_nr, '=>', $msg_nr);
$last_msg_nr = $msg_nr;
last LINE;
}
# Or, the caller gave us a custom new_event_test and it is true --
# thus, also new message.
elsif ( @arg_lines && $new_event_test && $new_event_test->($content) ) {
PTDEBUG && _d('LLSP: $new_event_test matches');
last LINE;
}
# Otherwise it's part of the current message; put it onto the list
# of lines pending. We have to translate characters that syslog has
# munged. Some translate TAB into the literal characters '^I' and
# some, rsyslog on Debian anyway, seem to translate all whitespace
# control characters into an octal string representing the character
# code.
# Example: #011FROM pg_catalog.pg_class c
$content =~ s/#(\d{3})/chr(oct($1))/ge;
$content =~ s/\^I/\t/g;
if ( $line_filter ) {
PTDEBUG && _d('LLSP: applying $line_filter');
$content = $line_filter->($content);
}
push @arg_lines, $content;
}
PTDEBUG && _d('LLSP: Exited while-loop after finding a complete entry');
# Mash the pending stuff together to return it.
my $psql_log_event = @arg_lines ? join('', @arg_lines) : undef;
PTDEBUG && _d('LLSP: Final log entry:', $psql_log_event);
# Save the new content into @pending for the next time. $pos_in_log
# must also be updated to whatever $new_pos is.
if ( defined $line ) {
PTDEBUG && _d('LLSP: Saving $line:', $line);
@pending = $line;
PTDEBUG && _d('LLSP: $pos_in_log:', $pos_in_log, '=>', $new_pos);
$pos_in_log = $new_pos;
}
else {
# We hit the end of the file.
PTDEBUG && _d('LLSP: EOF reached');
@pending = ();
$last_msg_nr = 0;
}
return $psql_log_event;
};
# Create the closure for $tell->();
my $new_tell = sub {
PTDEBUG && _d('LLSP: tell()', $pos_in_log);
return $pos_in_log;
};
return ($new_next_event, $new_tell, 1);
}
# This is either at EOF already, or it's not syslog format.
else {
# Within plain-log-parsing subroutines, we'll use PLAIN as a PTDEBUG
# line prefix.
PTDEBUG && _d('Plain log, or we are at EOF; PTDEBUG prefix=PLAIN');
# The @pending array is really only needed to return the one line we
# already read as a test. Too bad we can't just push it back onto the
# log. TODO: maybe we can test whether the filehandle is seekable and
# seek back to the start, then just return the unwrapped functions?
my @pending = defined $test_line ? ($test_line) : ();
my $new_next_event = sub {
PTDEBUG && _d('PLAIN: next_event(); @pending:', scalar @pending);
return @pending ? shift @pending : $next_event->();
};
my $new_tell = sub {
PTDEBUG && _d('PLAIN: tell(); @pending:', scalar @pending);
return @pending ? 0 : $tell->();
};
return ($new_next_event, $new_tell, 0);
}
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End SysLogParser package
# ###########################################################################

View File

@@ -172,10 +172,7 @@ sub _parse_packet {
sub port_number {
my ( $self, $port ) = @_;
return unless $port;
return $port eq 'memcached' ? 11211
: $port eq 'http' ? 80
: $port eq 'mysql' ? 3306
: $port;
return $port eq 'mysql' ? 3306 : $port;
}
sub _d {