mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 13:40:07 +00:00
pqd: Remove --type pglog, memcached, and http
This commit is contained in:
@@ -1,242 +0,0 @@
|
||||
# This program is copyright 2009-2011 Percona Ireland Ltd.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# HTTPProtocolParser package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: HTTPProtocolParser
|
||||
# HTTPProtocolParser parses HTTP traffic from tcpdump files.
|
||||
package HTTPProtocolParser;
|
||||
use base 'ProtocolParser';
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
# server is the "host:port" of the sever being watched. It's auto-guessed if
|
||||
# not specified.
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
my $self = $class->SUPER::new(
|
||||
%args,
|
||||
port => 80,
|
||||
);
|
||||
return $self;
|
||||
}
|
||||
|
||||
# Handles a packet from the server given the state of the session. Returns an
|
||||
# event if one was ready to be created, otherwise returns nothing.
|
||||
sub _packet_from_server {
|
||||
my ( $self, $packet, $session, $misc ) = @_;
|
||||
die "I need a packet" unless $packet;
|
||||
die "I need a session" unless $session;
|
||||
|
||||
PTDEBUG && _d('Packet is from server; client state:', $session->{state});
|
||||
|
||||
# If there's no session state, then we're catching a server response
|
||||
# mid-stream.
|
||||
if ( !$session->{state} ) {
|
||||
PTDEBUG && _d('Ignoring mid-stream server response');
|
||||
return;
|
||||
}
|
||||
|
||||
if ( $session->{out_of_order} ) {
|
||||
# We're waiting for the header so we can get the content length.
|
||||
# Once we know this, we can determine how many out of order packets
|
||||
# we need to complete the request, then order them and re-process.
|
||||
my ($line1, $content);
|
||||
if ( !$session->{have_header} ) {
|
||||
($line1, $content) = $self->_parse_header(
|
||||
$session, $packet->{data}, $packet->{data_len});
|
||||
}
|
||||
if ( $line1 ) {
|
||||
$session->{have_header} = 1;
|
||||
$packet->{content_len} = length $content;
|
||||
PTDEBUG && _d('Got out of order header with',
|
||||
$packet->{content_len}, 'bytes of content');
|
||||
}
|
||||
my $have_len = $packet->{content_len} || $packet->{data_len};
|
||||
map { $have_len += $_->{data_len} }
|
||||
@{$session->{packets}};
|
||||
$session->{have_all_packets}
|
||||
= 1 if $session->{attribs}->{bytes}
|
||||
&& $have_len >= $session->{attribs}->{bytes};
|
||||
PTDEBUG && _d('Have', $have_len, 'of', $session->{attribs}->{bytes});
|
||||
return;
|
||||
}
|
||||
|
||||
# Assume that the server is returning only one value.
|
||||
# TODO: make it handle multiple.
|
||||
if ( $session->{state} eq 'awaiting reply' ) {
|
||||
|
||||
# Save this early because we may return early if the packets
|
||||
# are being received out of order. Also, save it only once
|
||||
# in case we re-process packets if they're out of order.
|
||||
$session->{start_reply} = $packet->{ts} unless $session->{start_reply};
|
||||
|
||||
# Get first line of header and first chunk of contents/data.
|
||||
my ($line1, $content) = $self->_parse_header($session, $packet->{data},
|
||||
$packet->{data_len});
|
||||
|
||||
# The reponse, when in order, is text header followed by data.
|
||||
# If there's no line1, then we didn't get the text header first
|
||||
# which means we're getting the response in out of order packets.
|
||||
if ( !$line1 ) {
|
||||
$session->{out_of_order} = 1; # alert parent
|
||||
$session->{have_all_packets} = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
# First line should be: version code phrase
|
||||
# E.g.: HTTP/1.1 200 OK
|
||||
my ($version, $code, $phrase) = $line1 =~ m/(\S+)/g;
|
||||
$session->{attribs}->{Status_code} = $code;
|
||||
PTDEBUG && _d('Status code for last', $session->{attribs}->{arg},
|
||||
'request:', $session->{attribs}->{Status_code});
|
||||
|
||||
my $content_len = $content ? length $content : 0;
|
||||
PTDEBUG && _d('Got', $content_len, 'bytes of content');
|
||||
if ( $session->{attribs}->{bytes}
|
||||
&& $content_len < $session->{attribs}->{bytes} ) {
|
||||
$session->{data_len} = $session->{attribs}->{bytes};
|
||||
$session->{buff} = $content;
|
||||
$session->{buff_left} = $session->{attribs}->{bytes} - $content_len;
|
||||
PTDEBUG && _d('Contents not complete,', $session->{buff_left},
|
||||
'bytes left');
|
||||
$session->{state} = 'recving content';
|
||||
return;
|
||||
}
|
||||
}
|
||||
elsif ( $session->{state} eq 'recving content' ) {
|
||||
if ( $session->{buff} ) {
|
||||
PTDEBUG && _d('Receiving content,', $session->{buff_left},
|
||||
'bytes left');
|
||||
return;
|
||||
}
|
||||
PTDEBUG && _d('Contents received');
|
||||
}
|
||||
else {
|
||||
# TODO:
|
||||
warn "Server response in unknown state";
|
||||
return;
|
||||
}
|
||||
|
||||
PTDEBUG && _d('Creating event, deleting session');
|
||||
$session->{end_reply} = $session->{ts_max} || $packet->{ts};
|
||||
my $event = $self->make_event($session, $packet);
|
||||
delete $self->{sessions}->{$session->{client}}; # http is stateless!
|
||||
return $event;
|
||||
}
|
||||
|
||||
# Handles a packet from the client given the state of the session.
|
||||
sub _packet_from_client {
|
||||
my ( $self, $packet, $session, $misc ) = @_;
|
||||
die "I need a packet" unless $packet;
|
||||
die "I need a session" unless $session;
|
||||
|
||||
PTDEBUG && _d('Packet is from client; state:', $session->{state});
|
||||
|
||||
my $event;
|
||||
if ( ($session->{state} || '') =~ m/awaiting / ) {
|
||||
PTDEBUG && _d('More client headers:', $packet->{data});
|
||||
return;
|
||||
}
|
||||
|
||||
if ( !$session->{state} ) {
|
||||
$session->{state} = 'awaiting reply';
|
||||
my ($line1, undef) = $self->_parse_header($session, $packet->{data}, $packet->{data_len});
|
||||
# First line should be: request page version
|
||||
# E.g.: GET /foo.html HTTP/1.1
|
||||
my ($request, $page, $version) = $line1 =~ m/(\S+)/g;
|
||||
if ( !$request || !$page ) {
|
||||
PTDEBUG && _d("Didn't get a request or page:", $request, $page);
|
||||
return;
|
||||
}
|
||||
$request = lc $request;
|
||||
my $vh = $session->{attribs}->{Virtual_host} || '';
|
||||
my $arg = "$request $vh$page";
|
||||
PTDEBUG && _d('arg:', $arg);
|
||||
|
||||
if ( $request eq 'get' || $request eq 'post' ) {
|
||||
@{$session->{attribs}}{qw(arg)} = ($arg);
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d("Don't know how to handle a", $request, "request");
|
||||
return;
|
||||
}
|
||||
|
||||
$session->{start_request} = $packet->{ts};
|
||||
$session->{attribs}->{host} = $packet->{src_host};
|
||||
$session->{attribs}->{pos_in_log} = $packet->{pos_in_log};
|
||||
$session->{attribs}->{ts} = $packet->{ts};
|
||||
}
|
||||
else {
|
||||
# TODO:
|
||||
die "Probably multiple GETs from client before a server response?";
|
||||
}
|
||||
|
||||
return $event;
|
||||
}
|
||||
|
||||
sub _parse_header {
|
||||
my ( $self, $session, $data, $len, $no_recurse ) = @_;
|
||||
die "I need data" unless $data;
|
||||
my ($header, $content) = split(/\r\n\r\n/, $data);
|
||||
my ($line1, $header_vals) = $header =~ m/\A(\S+ \S+ .+?)\r\n(.+)?/s;
|
||||
PTDEBUG && _d('HTTP header:', $line1);
|
||||
return unless $line1;
|
||||
|
||||
if ( !$header_vals ) {
|
||||
PTDEBUG && _d('No header vals');
|
||||
return $line1, undef;
|
||||
}
|
||||
my @headers;
|
||||
foreach my $val ( split(/\r\n/, $header_vals) ) {
|
||||
last unless $val;
|
||||
# Capture and save any useful header values.
|
||||
PTDEBUG && _d('HTTP header:', $val);
|
||||
if ( $val =~ m/^Content-Length/i ) {
|
||||
($session->{attribs}->{bytes}) = $val =~ /: (\d+)/;
|
||||
PTDEBUG && _d('Saved Content-Length:', $session->{attribs}->{bytes});
|
||||
}
|
||||
if ( $val =~ m/Content-Encoding/i ) {
|
||||
($session->{compressed}) = $val =~ /: (\w+)/;
|
||||
PTDEBUG && _d('Saved Content-Encoding:', $session->{compressed});
|
||||
}
|
||||
if ( $val =~ m/^Host/i ) {
|
||||
# The "host" attribute is already taken, so we call this "domain".
|
||||
($session->{attribs}->{Virtual_host}) = $val =~ /: (\S+)/;
|
||||
PTDEBUG && _d('Saved Host:', ($session->{attribs}->{Virtual_host}));
|
||||
}
|
||||
}
|
||||
return $line1, $content;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End HTTPProtocolParser package
|
||||
# ###########################################################################
|
@@ -1,216 +0,0 @@
|
||||
# This program is copyright 2009-2011 Percona Ireland Ltd.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# MemcachedEvent package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: MemcachedEvent
|
||||
# MemcachedEvent creates events from <MemcachedProtocolParser> data.
|
||||
# Since memcached is not strictly MySQL stuff, we have to
|
||||
# fabricate MySQL-like query events from memcached.
|
||||
#
|
||||
# See http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt
|
||||
# for information about the memcached protocol.
|
||||
package MemcachedEvent;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
# cmds that we know how to handle.
|
||||
my %cmds = map { $_ => 1 } qw(
|
||||
set
|
||||
add
|
||||
replace
|
||||
append
|
||||
prepend
|
||||
cas
|
||||
get
|
||||
gets
|
||||
delete
|
||||
incr
|
||||
decr
|
||||
);
|
||||
|
||||
my %cmd_handler_for = (
|
||||
set => \&handle_storage_cmd,
|
||||
add => \&handle_storage_cmd,
|
||||
replace => \&handle_storage_cmd,
|
||||
append => \&handle_storage_cmd,
|
||||
prepend => \&handle_storage_cmd,
|
||||
cas => \&handle_storage_cmd,
|
||||
get => \&handle_retr_cmd,
|
||||
gets => \&handle_retr_cmd,
|
||||
);
|
||||
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
my $self = {};
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
# Given an event from MemcachedProtocolParser, returns an event
|
||||
# more suitable for mk-query-digest.
|
||||
sub parse_event {
|
||||
my ( $self, %args ) = @_;
|
||||
my $event = $args{event};
|
||||
return unless $event;
|
||||
|
||||
if ( !$event->{cmd} || !$event->{key} ) {
|
||||
PTDEBUG && _d('Event has no cmd or key:', Dumper($event));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( !$cmds{$event->{cmd}} ) {
|
||||
PTDEBUG && _d("Don't know how to handle cmd:", $event->{cmd});
|
||||
return;
|
||||
}
|
||||
|
||||
# For a normal event, arg is the query. For memcached, the "query" is
|
||||
# essentially the cmd and key, so this becomes arg. E.g.: "set mk_key".
|
||||
$event->{arg} = "$event->{cmd} $event->{key}";
|
||||
$event->{fingerprint} = $self->fingerprint($event->{arg});
|
||||
$event->{key_print} = $self->fingerprint($event->{key});
|
||||
|
||||
# Set every cmd so that aggregated totals will be correct. If we only
|
||||
# set cmd that we get, then all cmds will show as 100% in the report.
|
||||
# This will create a lot of 0% cmds, but --[no]zero-bool will remove them.
|
||||
# Think of events in a Percona-patched log: the attribs like Full_scan are
|
||||
# present for every event.
|
||||
map { $event->{"Memc_$_"} = 'No' } keys %cmds;
|
||||
$event->{"Memc_$event->{cmd}"} = 'Yes'; # Got this cmd.
|
||||
$event->{Memc_error} = 'No'; # A handler may change this.
|
||||
$event->{Memc_miss} = 'No';
|
||||
if ( $event->{res} ) {
|
||||
$event->{Memc_miss} = 'Yes' if $event->{res} eq 'NOT_FOUND';
|
||||
}
|
||||
else {
|
||||
# This normally happens with incr and decr cmds.
|
||||
PTDEBUG && _d('Event has no res:', Dumper($event));
|
||||
}
|
||||
|
||||
# Handle special results, errors, etc. The handler should return the
|
||||
# event on success, or nothing on failure.
|
||||
if ( $cmd_handler_for{$event->{cmd}} ) {
|
||||
return $cmd_handler_for{$event->{cmd}}->($event);
|
||||
}
|
||||
|
||||
return $event;
|
||||
}
|
||||
|
||||
# Replace things that look like placeholders with a ?
|
||||
sub fingerprint {
|
||||
my ( $self, $val ) = @_;
|
||||
$val =~ s/[0-9A-Fa-f]{16,}|\d+/?/g;
|
||||
return $val;
|
||||
}
|
||||
|
||||
# Possible results for storage cmds:
|
||||
# - "STORED\r\n", to indicate success.
|
||||
#
|
||||
# - "NOT_STORED\r\n" to indicate the data was not stored, but not
|
||||
# because of an error. This normally means that either that the
|
||||
# condition for an "add" or a "replace" command wasn't met, or that the
|
||||
# item is in a delete queue (see the "delete" command below).
|
||||
#
|
||||
# - "EXISTS\r\n" to indicate that the item you are trying to store with
|
||||
# a "cas" command has been modified since you last fetched it.
|
||||
#
|
||||
# - "NOT_FOUND\r\n" to indicate that the item you are trying to store
|
||||
# with a "cas" command did not exist or has been deleted.
|
||||
sub handle_storage_cmd {
|
||||
my ( $event ) = @_;
|
||||
|
||||
# There should be a result for any storage cmd.
|
||||
if ( !$event->{res} ) {
|
||||
PTDEBUG && _d('No result for event:', Dumper($event));
|
||||
return;
|
||||
}
|
||||
|
||||
$event->{'Memc_Not_Stored'} = $event->{res} eq 'NOT_STORED' ? 'Yes' : 'No';
|
||||
$event->{'Memc_Exists'} = $event->{res} eq 'EXISTS' ? 'Yes' : 'No';
|
||||
|
||||
return $event;
|
||||
}
|
||||
|
||||
# Technically, the only results for a retrieval cmd are the values requested.
|
||||
# "If some of the keys appearing in a retrieval request are not sent back
|
||||
# by the server in the item list this means that the server does not
|
||||
# hold items with such keys (because they were never stored, or stored
|
||||
# but deleted to make space for more items, or expired, or explicitly
|
||||
# deleted by a client)."
|
||||
# Contrary to this, MemcacedProtocolParser will set res='VALUE' on
|
||||
# success, res='NOT_FOUND' on failure, or res='INTERRUPTED' if the get
|
||||
# didn't finish.
|
||||
sub handle_retr_cmd {
|
||||
my ( $event ) = @_;
|
||||
|
||||
# There should be a result for any retr cmd.
|
||||
if ( !$event->{res} ) {
|
||||
PTDEBUG && _d('No result for event:', Dumper($event));
|
||||
return;
|
||||
}
|
||||
|
||||
$event->{'Memc_error'} = $event->{res} eq 'INTERRUPTED' ? 'Yes' : 'No';
|
||||
|
||||
return $event;
|
||||
}
|
||||
|
||||
# handle_delete() and handle_incr_decr_cmd() are stub subs in case we
|
||||
# need them later.
|
||||
|
||||
# Possible results for a delete cmd:
|
||||
# - "DELETED\r\n" to indicate success
|
||||
#
|
||||
# - "NOT_FOUND\r\n" to indicate that the item with this key was not
|
||||
# found.
|
||||
sub handle_delete {
|
||||
my ( $event ) = @_;
|
||||
return $event;
|
||||
}
|
||||
|
||||
# Possible results for an incr or decr cmd:
|
||||
# - "NOT_FOUND\r\n" to indicate the item with this value was not found
|
||||
#
|
||||
# - <value>\r\n , where <value> is the new value of the item's data,
|
||||
# after the increment/decrement operation was carried out.
|
||||
# On success, MemcachedProtocolParser sets res='' and val=the new val.
|
||||
# On failure, res=the result and val=''.
|
||||
sub handle_incr_decr_cmd {
|
||||
my ( $event ) = @_;
|
||||
return $event;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End MemcachedEvent package
|
||||
# ###########################################################################
|
@@ -1,424 +0,0 @@
|
||||
# This program is copyright 2007-2011 Percona Ireland Ltd.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# MemcachedProtocolParser package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: MemcachedProtocolParser
|
||||
# MemcachedProtocolParser parses memcached events from tcpdump files.
|
||||
package MemcachedProtocolParser;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
|
||||
my $self = {
|
||||
server => $args{server},
|
||||
port => $args{port} || '11211',
|
||||
sessions => {},
|
||||
o => $args{o},
|
||||
};
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
# The packet arg should be a hashref from TcpdumpParser::parse_event().
|
||||
# misc is a placeholder for future features.
|
||||
sub parse_event {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(event);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my $packet = @args{@required_args};
|
||||
|
||||
# Return early if there's no TCP data. These are usually ACK packets, but
|
||||
# they could also be FINs in which case, we should close and delete the
|
||||
# client's session.
|
||||
# TODO: It seems we don't handle FIN here? So I moved this code block here.
|
||||
if ( $packet->{data_len} == 0 ) {
|
||||
PTDEBUG && _d('No TCP data');
|
||||
$args{stats}->{no_tcp_data}++ if $args{stats};
|
||||
return;
|
||||
}
|
||||
|
||||
my $src_host = "$packet->{src_host}:$packet->{src_port}";
|
||||
my $dst_host = "$packet->{dst_host}:$packet->{dst_port}";
|
||||
|
||||
if ( my $server = $self->{server} ) { # Watch only the given server.
|
||||
$server .= ":$self->{port}";
|
||||
if ( $src_host ne $server && $dst_host ne $server ) {
|
||||
PTDEBUG && _d('Packet is not to or from', $server);
|
||||
$args{stats}->{not_watched_server}++ if $args{stats};
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
# Auto-detect the server by looking for port 11211
|
||||
my $packet_from;
|
||||
my $client;
|
||||
if ( $src_host =~ m/:$self->{port}$/ ) {
|
||||
$packet_from = 'server';
|
||||
$client = $dst_host;
|
||||
}
|
||||
elsif ( $dst_host =~ m/:$self->{port}$/ ) {
|
||||
$packet_from = 'client';
|
||||
$client = $src_host;
|
||||
}
|
||||
else {
|
||||
warn 'Packet is not to or from memcached server: ', Dumper($packet);
|
||||
return;
|
||||
}
|
||||
PTDEBUG && _d('Client:', $client);
|
||||
|
||||
# Get the client's session info or create a new session if the
|
||||
# client hasn't been seen before.
|
||||
if ( !exists $self->{sessions}->{$client} ) {
|
||||
PTDEBUG && _d('New session');
|
||||
$self->{sessions}->{$client} = {
|
||||
client => $client,
|
||||
state => undef,
|
||||
raw_packets => [],
|
||||
# ts -- wait for ts later.
|
||||
};
|
||||
};
|
||||
my $session = $self->{sessions}->{$client};
|
||||
|
||||
# Save raw packets to dump later in case something fails.
|
||||
push @{$session->{raw_packets}}, $packet->{raw_packet};
|
||||
|
||||
# Finally, parse the packet and maybe create an event.
|
||||
$packet->{data} = pack('H*', $packet->{data});
|
||||
my $event;
|
||||
if ( $packet_from eq 'server' ) {
|
||||
$event = $self->_packet_from_server($packet, $session, %args);
|
||||
}
|
||||
elsif ( $packet_from eq 'client' ) {
|
||||
$event = $self->_packet_from_client($packet, $session, %args);
|
||||
}
|
||||
else {
|
||||
# Should not get here.
|
||||
$args{stats}->{unknown_packet_origin}++ if $args{stats};
|
||||
die 'Packet origin unknown';
|
||||
}
|
||||
|
||||
PTDEBUG && _d('Done with packet; event:', Dumper($event));
|
||||
$args{stats}->{events_parsed}++ if $args{stats};
|
||||
return $event;
|
||||
}
|
||||
|
||||
# Handles a packet from the server given the state of the session. Returns an
|
||||
# event if one was ready to be created, otherwise returns nothing.
|
||||
sub _packet_from_server {
|
||||
my ( $self, $packet, $session, %args ) = @_;
|
||||
die "I need a packet" unless $packet;
|
||||
die "I need a session" unless $session;
|
||||
|
||||
PTDEBUG && _d('Packet is from server; client state:', $session->{state});
|
||||
|
||||
my $data = $packet->{data};
|
||||
|
||||
# If there's no session state, then we're catching a server response
|
||||
# mid-stream.
|
||||
if ( !$session->{state} ) {
|
||||
PTDEBUG && _d('Ignoring mid-stream server response');
|
||||
$args{stats}->{ignored_midstream_server_response}++ if $args{stats};
|
||||
return;
|
||||
}
|
||||
|
||||
# Assume that the server is returning only one value. TODO: make it
|
||||
# handle multi-gets.
|
||||
if ( $session->{state} eq 'awaiting reply' ) {
|
||||
PTDEBUG && _d('State is awaiting reply');
|
||||
# \r\n == 0d0a
|
||||
my ($line1, $rest) = $packet->{data} =~ m/\A(.*?)\r\n(.*)?/s;
|
||||
if ( !$line1 ) {
|
||||
$args{stats}->{unknown_server_data}++ if $args{stats};
|
||||
die "Unknown memcached data from server";
|
||||
}
|
||||
|
||||
# Split up the first line into its parts.
|
||||
my @vals = $line1 =~ m/(\S+)/g;
|
||||
$session->{res} = shift @vals;
|
||||
PTDEBUG && _d('Result of last', $session->{cmd}, 'cmd:', $session->{res});
|
||||
|
||||
if ( $session->{cmd} eq 'incr' || $session->{cmd} eq 'decr' ) {
|
||||
PTDEBUG && _d('It is an incr or decr');
|
||||
if ( $session->{res} !~ m/\D/ ) { # It's an integer, not an error
|
||||
PTDEBUG && _d('Got a value for the incr/decr');
|
||||
$session->{val} = $session->{res};
|
||||
$session->{res} = '';
|
||||
}
|
||||
}
|
||||
elsif ( $session->{res} eq 'VALUE' ) {
|
||||
PTDEBUG && _d('It is the result of a "get"');
|
||||
my ($key, $flags, $bytes) = @vals;
|
||||
defined $session->{flags} or $session->{flags} = $flags;
|
||||
defined $session->{bytes} or $session->{bytes} = $bytes;
|
||||
|
||||
# Get the value from the $rest.
|
||||
# TODO: there might be multiple responses
|
||||
if ( $rest && $bytes ) {
|
||||
PTDEBUG && _d('There is a value');
|
||||
if ( length($rest) > $bytes ) {
|
||||
PTDEBUG && _d('Got complete response');
|
||||
$session->{val} = substr($rest, 0, $bytes);
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d('Got partial response, saving for later');
|
||||
push @{$session->{partial}}, [ $packet->{seq}, $rest ];
|
||||
$session->{gathered} += length($rest);
|
||||
$session->{state} = 'partial recv';
|
||||
return; # Prevent firing an event.
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif ( $session->{res} eq 'END' ) {
|
||||
# Technically NOT_FOUND is an error, and this isn't an error it's just
|
||||
# a NULL, but what it really means is the value isn't found.
|
||||
PTDEBUG && _d('Got an END without any data, firing NOT_FOUND');
|
||||
$session->{res} = 'NOT_FOUND';
|
||||
}
|
||||
elsif ( $session->{res} !~ m/STORED|DELETED|NOT_FOUND/ ) {
|
||||
# Not really sure what else would get us here... want to make a note
|
||||
# and not have an uncaught condition.
|
||||
PTDEBUG && _d('Unknown result');
|
||||
}
|
||||
else {
|
||||
$args{stats}->{unknown_server_response}++ if $args{stats};
|
||||
}
|
||||
}
|
||||
else { # Should be 'partial recv'
|
||||
PTDEBUG && _d('Session state: ', $session->{state});
|
||||
push @{$session->{partial}}, [ $packet->{seq}, $data ];
|
||||
$session->{gathered} += length($data);
|
||||
PTDEBUG && _d('Gathered', $session->{gathered}, 'bytes in',
|
||||
scalar(@{$session->{partial}}), 'packets from server');
|
||||
if ( $session->{gathered} >= $session->{bytes} + 2 ) { # Done.
|
||||
PTDEBUG && _d('End of partial response, preparing event');
|
||||
my $val = join('',
|
||||
map { $_->[1] }
|
||||
# Sort in proper sequence because TCP might reorder them.
|
||||
sort { $a->[0] <=> $b->[0] }
|
||||
@{$session->{partial}});
|
||||
$session->{val} = substr($val, 0, $session->{bytes});
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d('Partial response continues, no action');
|
||||
return; # Prevent firing event.
|
||||
}
|
||||
}
|
||||
|
||||
PTDEBUG && _d('Creating event, deleting session');
|
||||
my $event = make_event($session, $packet);
|
||||
delete $self->{sessions}->{$session->{client}}; # memcached is stateless!
|
||||
$session->{raw_packets} = []; # Avoid keeping forever
|
||||
return $event;
|
||||
}
|
||||
|
||||
# Handles a packet from the client given the state of the session.
|
||||
sub _packet_from_client {
|
||||
my ( $self, $packet, $session, %args ) = @_;
|
||||
die "I need a packet" unless $packet;
|
||||
die "I need a session" unless $session;
|
||||
|
||||
PTDEBUG && _d('Packet is from client; state:', $session->{state});
|
||||
|
||||
my $event;
|
||||
if ( ($session->{state} || '') =~m/awaiting reply|partial recv/ ) {
|
||||
# Whoa, we expected something from the server, not the client. Fire an
|
||||
# INTERRUPTED with what we've got, and create a new session.
|
||||
PTDEBUG && _d("Expected data from the client, looks like interrupted");
|
||||
$session->{res} = 'INTERRUPTED';
|
||||
$event = make_event($session, $packet);
|
||||
my $client = $session->{client};
|
||||
delete @{$session}{keys %$session};
|
||||
$session->{client} = $client;
|
||||
}
|
||||
|
||||
my ($line1, $val);
|
||||
my ($cmd, $key, $flags, $exptime, $bytes);
|
||||
|
||||
if ( !$session->{state} ) {
|
||||
PTDEBUG && _d('Session state: ', $session->{state});
|
||||
# Split up the first line into its parts.
|
||||
($line1, $val) = $packet->{data} =~ m/\A(.*?)\r\n(.+)?/s;
|
||||
if ( !$line1 ) {
|
||||
PTDEBUG && _d('Unknown memcached data from client, skipping packet');
|
||||
$args{stats}->{unknown_client_data}++ if $args{stats};
|
||||
return;
|
||||
}
|
||||
|
||||
# TODO: handle <cas unique> and [noreply]
|
||||
my @vals = $line1 =~ m/(\S+)/g;
|
||||
$cmd = lc shift @vals;
|
||||
PTDEBUG && _d('$cmd is a ', $cmd);
|
||||
if ( $cmd eq 'set' || $cmd eq 'add' || $cmd eq 'replace' ) {
|
||||
($key, $flags, $exptime, $bytes) = @vals;
|
||||
$session->{bytes} = $bytes;
|
||||
}
|
||||
elsif ( $cmd eq 'get' ) {
|
||||
($key) = @vals;
|
||||
if ( $val ) {
|
||||
PTDEBUG && _d('Multiple cmds:', $val);
|
||||
$val = undef;
|
||||
}
|
||||
}
|
||||
elsif ( $cmd eq 'delete' ) {
|
||||
($key) = @vals; # TODO: handle the <queue_time>
|
||||
if ( $val ) {
|
||||
PTDEBUG && _d('Multiple cmds:', $val);
|
||||
$val = undef;
|
||||
}
|
||||
}
|
||||
elsif ( $cmd eq 'incr' || $cmd eq 'decr' ) {
|
||||
($key) = @vals;
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d("Don't know how to handle", $cmd, "command");
|
||||
$args{stats}->{unknown_client_command}++ if $args{stats};
|
||||
return;
|
||||
}
|
||||
|
||||
@{$session}{qw(cmd key flags exptime)}
|
||||
= ($cmd, $key, $flags, $exptime);
|
||||
$session->{host} = $packet->{src_host};
|
||||
$session->{pos_in_log} = $packet->{pos_in_log};
|
||||
$session->{ts} = $packet->{ts};
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d('Session state: ', $session->{state});
|
||||
$val = $packet->{data};
|
||||
}
|
||||
|
||||
# Handle the rest of the packet. It might not be the whole value that was
|
||||
# sent, for example for a big set(). We need to look at the number of bytes
|
||||
# and see if we got it all.
|
||||
$session->{state} = 'awaiting reply'; # Assume we got the whole packet
|
||||
if ( $val ) {
|
||||
if ( $session->{bytes} + 2 == length($val) ) { # +2 for the \r\n
|
||||
PTDEBUG && _d('Complete send');
|
||||
$val =~ s/\r\n\Z//; # We got the whole thing.
|
||||
$session->{val} = $val;
|
||||
}
|
||||
else { # We apparently did NOT get the whole thing.
|
||||
PTDEBUG && _d('Partial send, saving for later');
|
||||
push @{$session->{partial}},
|
||||
[ $packet->{seq}, $val ];
|
||||
$session->{gathered} += length($val);
|
||||
PTDEBUG && _d('Gathered', $session->{gathered}, 'bytes in',
|
||||
scalar(@{$session->{partial}}), 'packets from client');
|
||||
if ( $session->{gathered} >= $session->{bytes} + 2 ) { # Done.
|
||||
PTDEBUG && _d('Message looks complete now, saving value');
|
||||
$val = join('',
|
||||
map { $_->[1] }
|
||||
# Sort in proper sequence because TCP might reorder them.
|
||||
sort { $a->[0] <=> $b->[0] }
|
||||
@{$session->{partial}});
|
||||
$val =~ s/\r\n\Z//;
|
||||
$session->{val} = $val;
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d('Message not complete');
|
||||
$val = '[INCOMPLETE]';
|
||||
$session->{state} = 'partial send';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $event;
|
||||
}
|
||||
|
||||
# The event is not yet suitable for mk-query-digest. It lacks, for example,
|
||||
# an arg and fingerprint attribute. The event should be passed to
|
||||
# MemcachedEvent::make_event() to transform it.
|
||||
sub make_event {
|
||||
my ( $session, $packet ) = @_;
|
||||
my $event = {
|
||||
cmd => $session->{cmd},
|
||||
key => $session->{key},
|
||||
val => $session->{val} || '',
|
||||
res => $session->{res},
|
||||
ts => $session->{ts},
|
||||
host => $session->{host},
|
||||
flags => $session->{flags} || 0,
|
||||
exptime => $session->{exptime} || 0,
|
||||
bytes => $session->{bytes} || 0,
|
||||
Query_time => timestamp_diff($session->{ts}, $packet->{ts}),
|
||||
pos_in_log => $session->{pos_in_log},
|
||||
};
|
||||
return $event;
|
||||
}
|
||||
|
||||
sub _get_errors_fh {
|
||||
my ( $self ) = @_;
|
||||
my $errors_fh = $self->{errors_fh};
|
||||
return $errors_fh if $errors_fh;
|
||||
|
||||
# Errors file isn't open yet; try to open it.
|
||||
my $o = $self->{o};
|
||||
if ( $o && $o->has('tcpdump-errors') && $o->got('tcpdump-errors') ) {
|
||||
my $errors_file = $o->get('tcpdump-errors');
|
||||
PTDEBUG && _d('tcpdump-errors file:', $errors_file);
|
||||
open $errors_fh, '>>', $errors_file
|
||||
or die "Cannot open tcpdump-errors file $errors_file: $OS_ERROR";
|
||||
}
|
||||
|
||||
$self->{errors_fh} = $errors_fh;
|
||||
return $errors_fh;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
# Returns the difference between two tcpdump timestamps. TODO: this is in
|
||||
# MySQLProtocolParser too, best to factor it out somewhere common.
|
||||
sub timestamp_diff {
|
||||
my ( $start, $end ) = @_;
|
||||
my $sd = substr($start, 0, 11, '');
|
||||
my $ed = substr($end, 0, 11, '');
|
||||
my ( $sh, $sm, $ss ) = split(/:/, $start);
|
||||
my ( $eh, $em, $es ) = split(/:/, $end);
|
||||
my $esecs = ($eh * 3600 + $em * 60 + $es);
|
||||
my $ssecs = ($sh * 3600 + $sm * 60 + $ss);
|
||||
if ( $sd eq $ed ) {
|
||||
return sprintf '%.6f', $esecs - $ssecs;
|
||||
}
|
||||
else { # Assume only one day boundary has been crossed, no DST, etc
|
||||
return sprintf '%.6f', ( 86_400 - $ssecs ) + $esecs;
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End MemcachedProtocolParser package
|
||||
# ###########################################################################
|
@@ -1,669 +0,0 @@
|
||||
# This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# PgLogParser package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: PgLogParser
|
||||
# PgLogParser parses Postgres logs.
|
||||
package PgLogParser;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
# This regex is partially inspired by one from pgfouine. But there is no
|
||||
# documentation on the last capture in that regex, so I omit that. (TODO: that
|
||||
# actually seems to be for CSV logging.)
|
||||
# (?:[0-9XPFDBLA]{2}[0-9A-Z]{3}:[\s]+)?
|
||||
# Here I constrain to match at least two spaces after the severity level,
|
||||
# because the source code tells me to. I believe this is controlled in elog.c:
|
||||
# appendStringInfo(&buf, "%s: ", error_severity(edata->elevel));
|
||||
my $log_line_regex = qr{
|
||||
(LOG|DEBUG|CONTEXT|WARNING|ERROR|FATAL|PANIC|HINT
|
||||
|DETAIL|NOTICE|STATEMENT|INFO|LOCATION)
|
||||
:\s\s+
|
||||
}x;
|
||||
|
||||
# The following are taken right from the comments in postgresql.conf for
|
||||
# log_line_prefix.
|
||||
my %attrib_name_for = (
|
||||
u => 'user',
|
||||
d => 'db',
|
||||
r => 'host', # With port
|
||||
h => 'host',
|
||||
p => 'Process_id',
|
||||
t => 'ts',
|
||||
m => 'ts', # With milliseconds
|
||||
i => 'Query_type',
|
||||
c => 'Session_id',
|
||||
l => 'Line_no',
|
||||
s => 'Session_id',
|
||||
v => 'Vrt_trx_id',
|
||||
x => 'Trx_id',
|
||||
);
|
||||
|
||||
# This class's data structure is a hashref with some statefulness: pending
|
||||
# lines. This is necessary because we sometimes don't know whether the event is
|
||||
# complete until we read the next line or even several lines, so we store these.
|
||||
#
|
||||
# Another bit of data that's stored in $self is some code to automatically
|
||||
# translate syslog into plain log format.
|
||||
sub new {
|
||||
my ( $class ) = @_;
|
||||
my $self = {
|
||||
pending => [],
|
||||
is_syslog => undef,
|
||||
next_event => undef,
|
||||
'tell' => undef,
|
||||
};
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
# This method accepts an iterator that contains an open log filehandle. It
|
||||
# reads events from the filehandle by calling the iterator, and returns the
|
||||
# events.
|
||||
#
|
||||
# Each event is a hashref of attribute => value pairs like:
|
||||
# my $event = {
|
||||
# ts => '', # Timestamp
|
||||
# arg => '', # Argument to the command
|
||||
# other attributes...
|
||||
# };
|
||||
#
|
||||
# The log format is ideally prefixed with the following:
|
||||
#
|
||||
# * timestamp with microseconds
|
||||
# * session ID, user, database
|
||||
#
|
||||
# The format I'd like to see is something like this:
|
||||
#
|
||||
# 2010-02-08 15:31:48.685 EST c=4b7074b4.985,u=user,D=database LOG:
|
||||
#
|
||||
# However, pgfouine supports user=user, db=database format. And I think
|
||||
# it should be reasonable to grab pretty much any name=value properties out, and
|
||||
# handle them based on the lower-cased first character of $name, to match the
|
||||
# special values that are possible to give for log_line_prefix. For example, %u
|
||||
# = user, so anything starting with a 'u' should be interpreted as a user.
|
||||
#
|
||||
# In general the log format is rather flexible, and we don't know by looking at
|
||||
# any given line whether it's the last line in the event. So we often have to
|
||||
# read a line and then decide what to do with the previous line we saw. Thus we
|
||||
# use 'pending' when necessary but we try to do it as little as possible,
|
||||
# because it's double work to defer and re-parse lines; and we try to defer as
|
||||
# soon as possible so we don't have to do as much work.
|
||||
#
|
||||
# There are 3 categories of lines in a log file, referred to in the code as case
|
||||
# 1/2/3:
|
||||
#
|
||||
# - Those that start a possibly multi-line event
|
||||
# - Those that can continue one
|
||||
# - Those that are neither the start nor the continuation, and thus must be the
|
||||
# end.
|
||||
#
|
||||
# In cases 1 and 3, we have to check whether information from previous lines has
|
||||
# been accumulated. If it has, we defer the current line and create the event.
|
||||
# Otherwise we keep going, looking for more lines for the event that begins with
|
||||
# the current line. Processing the lines is easiest if we arrange the cases in
|
||||
# this order: 2, 1, 3.
|
||||
#
|
||||
# The term "line" is to be interpreted loosely here. Logs that are in syslog
|
||||
# format might have multi-line "lines" that are handled by the generated
|
||||
# $next_event closure and given back to the main while-loop with newlines in
|
||||
# them. Therefore, regexes that match "the rest of the line" generally need the
|
||||
# /s flag.
|
||||
sub parse_event {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(next_event tell);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
|
||||
# The subroutine references that wrap the filehandle operations.
|
||||
my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args);
|
||||
|
||||
# These are the properties for the log event, which will later be used to
|
||||
# create an event hash ref.
|
||||
my @properties = ();
|
||||
|
||||
# Holds the current line being processed, and its position in the log as a
|
||||
# byte offset from the beginning. In some cases we'll have to reset this
|
||||
# position later. We'll also have to take a wait-and-see attitude towards
|
||||
# the $pos_in_log, so we use $new_pos to record where we're working in the
|
||||
# log, and $pos_in_log to record where the beginning of the current event
|
||||
# started.
|
||||
my ($pos_in_log, $line, $was_pending) = $self->get_line();
|
||||
my $new_pos;
|
||||
|
||||
# Sometimes we need to accumulate some lines and then join them together.
|
||||
# This is used for that.
|
||||
my @arg_lines;
|
||||
|
||||
# This is used to signal that an entire event has been found, and thus exit
|
||||
# the while loop.
|
||||
my $done;
|
||||
|
||||
# This is used to signal that an event's duration has already been found.
|
||||
# See the sample file pg-syslog-001.txt and the test for it.
|
||||
my $got_duration;
|
||||
|
||||
# Before we start, we read and discard lines until we get one with a header.
|
||||
# The only thing we can really count on is that a header line should have
|
||||
# the header in it. But, we only do this if we aren't in the middle of an
|
||||
# ongoing event, whose first line was pending.
|
||||
if ( !$was_pending && (!defined $line || $line !~ m/$log_line_regex/o) ) {
|
||||
PTDEBUG && _d('Skipping lines until I find a header');
|
||||
my $found_header;
|
||||
LINE:
|
||||
while (
|
||||
eval {
|
||||
($new_pos, $line) = $self->get_line();
|
||||
defined $line;
|
||||
}
|
||||
) {
|
||||
if ( $line =~ m/$log_line_regex/o ) {
|
||||
$pos_in_log = $new_pos;
|
||||
last LINE;
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d('Line was not a header, will fetch another');
|
||||
}
|
||||
}
|
||||
PTDEBUG && _d('Found a header line, now at pos_in_line', $pos_in_log);
|
||||
}
|
||||
|
||||
# We need to keep the line that begins the event we're parsing.
|
||||
my $first_line;
|
||||
|
||||
# This is for holding the type of the log line, which is important for
|
||||
# choosing the right code to run.
|
||||
my $line_type;
|
||||
|
||||
# Parse each line.
|
||||
LINE:
|
||||
while ( !$done && defined $line ) {
|
||||
|
||||
# Throw away the newline ending.
|
||||
chomp $line unless $is_syslog;
|
||||
|
||||
# This while loop works with LOG lines. Other lines, such as ERROR and
|
||||
# so forth, need to be handled outside this loop. The exception is when
|
||||
# there's nothing in progress in @arg_lines, and the non-LOG line might
|
||||
# just be something we can get relevant info from.
|
||||
if ( (($line_type) = $line =~ m/$log_line_regex/o) && $line_type ne 'LOG' ) {
|
||||
|
||||
# There's something in progress, so we abort the loop and let it be
|
||||
# handled specially.
|
||||
if ( @arg_lines ) {
|
||||
PTDEBUG && _d('Found a non-LOG line, exiting loop');
|
||||
last LINE;
|
||||
}
|
||||
|
||||
# There's nothing in @arg_lines, so we save what info we can and keep
|
||||
# on going.
|
||||
else {
|
||||
$first_line ||= $line;
|
||||
|
||||
# Handle ERROR and STATEMENT lines...
|
||||
if ( my ($e) = $line =~ m/ERROR:\s+(\S.*)\Z/s ) {
|
||||
push @properties, 'Error_msg', $e;
|
||||
PTDEBUG && _d('Found an error msg, saving and continuing');
|
||||
($new_pos, $line) = $self->get_line();
|
||||
next LINE;
|
||||
}
|
||||
|
||||
elsif ( my ($s) = $line =~ m/STATEMENT:\s+(\S.*)\Z/s ) {
|
||||
push @properties, 'arg', $s, 'cmd', 'Query';
|
||||
PTDEBUG && _d('Found a statement, finishing up event');
|
||||
$done = 1;
|
||||
last LINE;
|
||||
}
|
||||
|
||||
else {
|
||||
PTDEBUG && _d("I don't know what to do with this line");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# The log isn't just queries. It also has status and informational lines
|
||||
# in it. We ignore these, but if we see one that's not recognized, we
|
||||
# warn. These types of things are better off in mk-error-log.
|
||||
if (
|
||||
$line =~ m{
|
||||
Address\sfamily\snot\ssupported\sby\sprotocol
|
||||
|archived\stransaction\slog\sfile
|
||||
|autovacuum:\sprocessing\sdatabase
|
||||
|checkpoint\srecord\sis\sat
|
||||
|checkpoints\sare\soccurring\stoo\sfrequently\s\(
|
||||
|could\snot\sreceive\sdata\sfrom\sclient
|
||||
|database\ssystem\sis\sready
|
||||
|database\ssystem\sis\sshut\sdown
|
||||
|database\ssystem\swas\sshut\sdown
|
||||
|incomplete\sstartup\spacket
|
||||
|invalid\slength\sof\sstartup\spacket
|
||||
|next\sMultiXactId:
|
||||
|next\stransaction\sID:
|
||||
|received\ssmart\sshutdown\srequest
|
||||
|recycled\stransaction\slog\sfile
|
||||
|redo\srecord\sis\sat
|
||||
|removing\sfile\s"
|
||||
|removing\stransaction\slog\sfile\s"
|
||||
|shutting\sdown
|
||||
|transaction\sID\swrap\slimit\sis
|
||||
}x
|
||||
) {
|
||||
# We get the next line to process and skip the rest of the loop.
|
||||
PTDEBUG && _d('Skipping this line because it matches skip-pattern');
|
||||
($new_pos, $line) = $self->get_line();
|
||||
next LINE;
|
||||
}
|
||||
|
||||
# Possibly reset $first_line, depending on whether it was determined to be
|
||||
# junk and unset.
|
||||
$first_line ||= $line;
|
||||
|
||||
# Case 2: non-header lines, optionally starting with a TAB, are a
|
||||
# continuation of the previous line.
|
||||
if ( $line !~ m/$log_line_regex/o && @arg_lines ) {
|
||||
|
||||
if ( !$is_syslog ) {
|
||||
# We need to translate tabs to newlines. Weirdly, some logs (see
|
||||
# samples/pg-log-005.txt) have newlines without a leading tab.
|
||||
# Maybe it's an older log format.
|
||||
$line =~ s/\A\t?/\n/;
|
||||
}
|
||||
|
||||
# Save the remainder.
|
||||
push @arg_lines, $line;
|
||||
PTDEBUG && _d('This was a continuation line');
|
||||
}
|
||||
|
||||
# Cases 1 and 3: These lines start with some optional meta-data, and then
|
||||
# the $log_line_regex followed by the line's log message. The message can be
|
||||
# of the form "label: text....". Examples:
|
||||
# LOG: duration: 1.565 ms
|
||||
# LOG: statement: SELECT ....
|
||||
# LOG: duration: 1.565 ms statement: SELECT ....
|
||||
# In the above examples, the $label is duration, statement, and duration.
|
||||
elsif (
|
||||
my ( $sev, $label, $rest )
|
||||
= $line =~ m/$log_line_regex(.+?):\s+(.*)\Z/so
|
||||
) {
|
||||
PTDEBUG && _d('Line is case 1 or case 3');
|
||||
|
||||
# This is either a case 1 or case 3. If there's previously gathered
|
||||
# data in @arg_lines, it doesn't matter which -- we have to create an
|
||||
# event (a Query event), and we're $done. This is case 0xdeadbeef.
|
||||
if ( @arg_lines ) {
|
||||
$done = 1;
|
||||
PTDEBUG && _d('There are saved @arg_lines, we are done');
|
||||
|
||||
# We shouldn't modify @properties based on $line, because $line
|
||||
# doesn't have anything to do with the stuff in @properties, which
|
||||
# is all related to the previous line(s). However, there is one
|
||||
# case in which the line could be part of the event: when it's a
|
||||
# plain 'duration' line. This happens when the statement is logged
|
||||
# on one line, and then the duration is logged afterwards. If this
|
||||
# is true, then we alter @properties, and we do NOT defer the current
|
||||
# line.
|
||||
if ( $label eq 'duration' && $rest =~ m/[0-9.]+\s+\S+\Z/ ) {
|
||||
if ( $got_duration ) {
|
||||
# Just discard the line.
|
||||
PTDEBUG && _d('Discarding line, duration already found');
|
||||
}
|
||||
else {
|
||||
push @properties, 'Query_time', $self->duration_to_secs($rest);
|
||||
PTDEBUG && _d("Line's duration is for previous event:", $rest);
|
||||
}
|
||||
}
|
||||
else {
|
||||
# We'll come back to this line later.
|
||||
$self->pending($new_pos, $line);
|
||||
PTDEBUG && _d('Deferred line');
|
||||
}
|
||||
}
|
||||
|
||||
# Here we test for case 1, lines that can start a multi-line event.
|
||||
elsif ( $label =~ m/\A(?:duration|statement|query)\Z/ ) {
|
||||
PTDEBUG && _d('Case 1: start a multi-line event');
|
||||
|
||||
# If it's a duration, then there might be a statement later on the
|
||||
# same line and the duration applies to that.
|
||||
if ( $label eq 'duration' ) {
|
||||
|
||||
if (
|
||||
(my ($dur, $stmt)
|
||||
= $rest =~ m/([0-9.]+ \S+)\s+(?:statement|query): *(.*)\Z/s)
|
||||
) {
|
||||
# It does, so we'll pull out the Query_time etc now, rather
|
||||
# than doing it later, when we might end up in the case above
|
||||
# (case 0xdeadbeef).
|
||||
push @properties, 'Query_time', $self->duration_to_secs($dur);
|
||||
$got_duration = 1;
|
||||
push @arg_lines, $stmt;
|
||||
PTDEBUG && _d('Duration + statement');
|
||||
}
|
||||
|
||||
else {
|
||||
# The duration line is just junk. It's the line after a
|
||||
# statement, but we never saw the statement (else we'd have
|
||||
# fallen into 0xdeadbeef above). Discard this line and adjust
|
||||
# pos_in_log. See t/samples/pg-log-002.txt for an example.
|
||||
$first_line = undef;
|
||||
($pos_in_log, $line) = $self->get_line();
|
||||
PTDEBUG && _d('Line applies to event we never saw, discarding');
|
||||
next LINE;
|
||||
}
|
||||
}
|
||||
else {
|
||||
# This isn't a duration line, it's a statement or query. Put it
|
||||
# onto @arg_lines for later and keep going.
|
||||
push @arg_lines, $rest;
|
||||
PTDEBUG && _d('Putting onto @arg_lines');
|
||||
}
|
||||
}
|
||||
|
||||
# Here is case 3, lines that can't be in case 1 or 2. These surely
|
||||
# terminate any event that's been accumulated, and if there isn't any
|
||||
# such, then we just create an event without the overhead of deferring.
|
||||
else {
|
||||
$done = 1;
|
||||
PTDEBUG && _d('Line is case 3, event is done');
|
||||
|
||||
# Again, if there's previously gathered data in @arg_lines, we have
|
||||
# to defer the current line (not touching @properties) and revisit it.
|
||||
if ( @arg_lines ) {
|
||||
$self->pending($new_pos, $line);
|
||||
PTDEBUG && _d('There was @arg_lines, putting line to pending');
|
||||
}
|
||||
|
||||
# Otherwise we can parse the line and put it into @properties.
|
||||
else {
|
||||
PTDEBUG && _d('No need to defer, process event from this line now');
|
||||
push @properties, 'cmd', 'Admin', 'arg', $label;
|
||||
|
||||
# For some kinds of log lines, we can grab extra meta-data out of
|
||||
# the end of the line.
|
||||
# LOG: connection received: host=[local]
|
||||
if ( $label =~ m/\A(?:dis)?connection(?: received| authorized)?\Z/ ) {
|
||||
push @properties, $self->get_meta($rest);
|
||||
}
|
||||
|
||||
else {
|
||||
die "I don't understand line $line";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# If the line isn't case 1, 2, or 3 I don't know what it is.
|
||||
else {
|
||||
die "I don't understand line $line";
|
||||
}
|
||||
|
||||
# We get the next line to process.
|
||||
if ( !$done ) {
|
||||
($new_pos, $line) = $self->get_line();
|
||||
}
|
||||
} # LINE
|
||||
|
||||
# If we're at the end of the file, we finish and tell the caller we're done.
|
||||
if ( !defined $line ) {
|
||||
PTDEBUG && _d('Line not defined, at EOF; calling oktorun(0) if exists');
|
||||
$args{oktorun}->(0) if $args{oktorun};
|
||||
if ( !@arg_lines ) {
|
||||
PTDEBUG && _d('No saved @arg_lines either, we are all done');
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
|
||||
# If we got kicked out of the while loop because of a non-LOG line, we handle
|
||||
# that line here.
|
||||
if ( $line_type && $line_type ne 'LOG' ) {
|
||||
PTDEBUG && _d('Line is not a LOG line');
|
||||
|
||||
# ERROR lines come in a few flavors. See t/samples/pg-log-006.txt,
|
||||
# t/samples/pg-syslog-002.txt, and t/samples/pg-syslog-007.txt for some
|
||||
# examples. The rules seem to be this: if the ERROR is followed by a
|
||||
# STATEMENT, and the STATEMENT's statement matches the query in
|
||||
# @arg_lines, then the STATEMENT message is redundant. (This can be
|
||||
# caused by various combos of configuration options in postgresql.conf).
|
||||
# However, if the ERROR's STATEMENT line doesn't match what's in
|
||||
# @arg_lines, then the ERROR actually starts a new event. If the ERROR is
|
||||
# followed by another LOG event, then the ERROR also starts a new event.
|
||||
if ( $line_type eq 'ERROR' ) {
|
||||
PTDEBUG && _d('Line is ERROR');
|
||||
|
||||
# If there's already a statement in processing, then put aside the
|
||||
# current line, and peek ahead.
|
||||
if ( @arg_lines ) {
|
||||
PTDEBUG && _d('There is @arg_lines, will peek ahead one line');
|
||||
my ( $temp_pos, $temp_line ) = $self->get_line();
|
||||
my ( $type, $msg );
|
||||
if (
|
||||
defined $temp_line
|
||||
&& ( ($type, $msg) = $temp_line =~ m/$log_line_regex(.*)/o )
|
||||
&& ( $type ne 'STATEMENT' || $msg eq $arg_lines[-1] )
|
||||
) {
|
||||
# Looks like the whole thing is pertaining to the current event
|
||||
# in progress. Add the error message to the event.
|
||||
PTDEBUG && _d('Error/statement line pertain to current event');
|
||||
push @properties, 'Error_msg', $line =~ m/ERROR:\s*(\S.*)\Z/s;
|
||||
if ( $type ne 'STATEMENT' ) {
|
||||
PTDEBUG && _d('Must save peeked line, it is a', $type);
|
||||
$self->pending($temp_pos, $temp_line);
|
||||
}
|
||||
}
|
||||
elsif ( defined $temp_line && defined $type ) {
|
||||
# Looks like the current and next line are about a new event.
|
||||
# Put them into pending.
|
||||
PTDEBUG && _d('Error/statement line are a new event');
|
||||
$self->pending($new_pos, $line);
|
||||
$self->pending($temp_pos, $temp_line);
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d("Unknown line", $line);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d("Unknown line", $line);
|
||||
}
|
||||
}
|
||||
|
||||
# If $done is true, then some of the above code decided that the full
|
||||
# event has been found. If we reached the end of the file, then we might
|
||||
# also have something in @arg_lines, although we didn't find the "line after"
|
||||
# that signals the event was done. In either case we return an event. This
|
||||
# should be the only 'return' statement in this block of code.
|
||||
if ( $done || @arg_lines ) {
|
||||
PTDEBUG && _d('Making event');
|
||||
|
||||
# Finish building the event.
|
||||
push @properties, 'pos_in_log', $pos_in_log;
|
||||
|
||||
# Statement/query lines will be in @arg_lines.
|
||||
if ( @arg_lines ) {
|
||||
PTDEBUG && _d('Assembling @arg_lines: ', scalar @arg_lines);
|
||||
push @properties, 'arg', join('', @arg_lines), 'cmd', 'Query';
|
||||
}
|
||||
|
||||
if ( $first_line ) {
|
||||
# Handle some meta-data: a timestamp, with optional milliseconds.
|
||||
if ( my ($ts) = $first_line =~ m/([0-9-]{10} [0-9:.]{8,12})/ ) {
|
||||
PTDEBUG && _d('Getting timestamp', $ts);
|
||||
push @properties, 'ts', $ts;
|
||||
}
|
||||
|
||||
# Find meta-data embedded in the log line prefix, in name=value format.
|
||||
if ( my ($meta) = $first_line =~ m/(.*?)[A-Z]{3,}: / ) {
|
||||
PTDEBUG && _d('Found a meta-data chunk:', $meta);
|
||||
push @properties, $self->get_meta($meta);
|
||||
}
|
||||
}
|
||||
|
||||
# Dump info about what we've found, but don't dump $event; want to see
|
||||
# full dump of all properties, and after it's been cast into a hash,
|
||||
# duplicated keys will be gone.
|
||||
PTDEBUG && _d('Properties of event:', Dumper(\@properties));
|
||||
my $event = { @properties };
|
||||
$event->{bytes} = length($event->{arg} || '');
|
||||
return $event;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Parses key=value meta-data from the $meta string, and returns a list of event
|
||||
# attribute names and values.
|
||||
sub get_meta {
|
||||
my ( $self, $meta ) = @_;
|
||||
my @properties;
|
||||
foreach my $set ( $meta =~ m/(\w+=[^, ]+)/g ) {
|
||||
my ($key, $val) = split(/=/, $set);
|
||||
if ( $key && $val ) {
|
||||
# The first letter of the name, lowercased, determines the
|
||||
# meaning of the item.
|
||||
if ( my $prop = $attrib_name_for{lc substr($key, 0, 1)} ) {
|
||||
push @properties, $prop, $val;
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d('Bad meta key', $set);
|
||||
}
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d("Can't figure out meta from", $set);
|
||||
}
|
||||
}
|
||||
return @properties;
|
||||
}
|
||||
|
||||
# This subroutine abstracts the process and source of getting a line of text and
|
||||
# its position in the log file. It might get the line of text from the log; it
|
||||
# might get it from the @pending array. It also does infinite loop checking
|
||||
# TODO.
|
||||
sub get_line {
|
||||
my ( $self ) = @_;
|
||||
my ($pos, $line, $was_pending) = $self->pending;
|
||||
if ( ! defined $line ) {
|
||||
PTDEBUG && _d('Got nothing from pending, trying the $fh');
|
||||
my ( $next_event, $tell) = @{$self}{qw(next_event tell)};
|
||||
eval {
|
||||
$pos = $tell->();
|
||||
$line = $next_event->();
|
||||
};
|
||||
if ( PTDEBUG && $EVAL_ERROR ) {
|
||||
_d($EVAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
PTDEBUG && _d('Got pos/line:', $pos, $line);
|
||||
return ($pos, $line);
|
||||
}
|
||||
|
||||
# This subroutine defers and retrieves a line/pos pair. If you give it an
|
||||
# argument it'll set the stored value. If not, it'll get one if there is one
|
||||
# and return it.
|
||||
sub pending {
|
||||
my ( $self, $val, $pos_in_log ) = @_;
|
||||
my $was_pending;
|
||||
PTDEBUG && _d('In sub pending, val:', $val);
|
||||
if ( $val ) {
|
||||
push @{$self->{pending}}, [$val, $pos_in_log];
|
||||
}
|
||||
elsif ( @{$self->{pending}} ) {
|
||||
($val, $pos_in_log) = @{ shift @{$self->{pending}} };
|
||||
$was_pending = 1;
|
||||
}
|
||||
PTDEBUG && _d('Return from pending:', $val, $pos_in_log);
|
||||
return ($val, $pos_in_log, $was_pending);
|
||||
}
|
||||
|
||||
# This subroutine manufactures subroutines to automatically translate incoming
|
||||
# syslog format into standard log format, to keep the main parse_event free from
|
||||
# having to think about that. For documentation on how this works, see
|
||||
# SysLogParser.pm.
|
||||
sub generate_wrappers {
|
||||
my ( $self, %args ) = @_;
|
||||
|
||||
# Reset everything, just in case some cruft was left over from a previous use
|
||||
# of this object. The object has stateful closures. If this isn't done,
|
||||
# then they'll keep reading from old filehandles. The sanity check is based
|
||||
# on the memory address of the closure!
|
||||
if ( ($self->{sanity} || '') ne "$args{next_event}" ){
|
||||
PTDEBUG && _d("Clearing and recreating internal state");
|
||||
eval { require SysLogParser; }; # Required for tests to work.
|
||||
my $sl = new SysLogParser();
|
||||
|
||||
# We need a special item in %args for syslog parsing. (This might not be
|
||||
# a syslog log file...) See the test for t/samples/pg-syslog-002.txt for
|
||||
# an example of when this is needed.
|
||||
$args{misc}->{new_event_test} = sub {
|
||||
my ( $content ) = @_;
|
||||
return unless defined $content;
|
||||
return $content =~ m/$log_line_regex/o;
|
||||
};
|
||||
|
||||
# The TAB at the beginning of the line indicates that there's a newline
|
||||
# at the end of the previous line.
|
||||
$args{misc}->{line_filter} = sub {
|
||||
my ( $content ) = @_;
|
||||
$content =~ s/\A\t/\n/;
|
||||
return $content;
|
||||
};
|
||||
|
||||
@{$self}{qw(next_event tell is_syslog)} = $sl->make_closures(%args);
|
||||
$self->{sanity} = "$args{next_event}";
|
||||
}
|
||||
|
||||
# Return the wrapper functions!
|
||||
return @{$self}{qw(next_event tell is_syslog)};
|
||||
}
|
||||
|
||||
# This subroutine converts various formats to seconds. Examples:
|
||||
# 10.870 ms
|
||||
sub duration_to_secs {
|
||||
my ( $self, $str ) = @_;
|
||||
PTDEBUG && _d('Duration:', $str);
|
||||
my ( $num, $suf ) = split(/\s+/, $str);
|
||||
my $factor = $suf eq 'ms' ? 1000
|
||||
: $suf eq 'sec' ? 1
|
||||
: die("Unknown suffix '$suf'");
|
||||
return $num / $factor;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End PgLogParser package
|
||||
# ###########################################################################
|
@@ -481,9 +481,7 @@ sub query_report {
|
||||
}
|
||||
|
||||
my $log_type = $args{log_type} || '';
|
||||
my $mark = $log_type eq 'memcached'
|
||||
|| $log_type eq 'http'
|
||||
|| $log_type eq 'pglog' ? '' : '\G';
|
||||
my $mark = '\G';
|
||||
|
||||
if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
|
||||
if ( $item =~ m/^(?:insert|replace)/ ) { # No EXPLAIN
|
||||
|
@@ -1,259 +0,0 @@
|
||||
# This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# SysLogParser package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: SysLogParser
|
||||
# SysLogParser parses events from syslogs.
|
||||
package SysLogParser;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
# This regex matches the message number, line number, and content of a syslog
|
||||
# message:
|
||||
# 2008 Jan 9 16:16:34 hostname postgres[30059]: [13-2] ...content...
|
||||
my $syslog_regex = qr{\A.*\w+\[\d+\]: \[(\d+)-(\d+)\] (.*)\Z};
|
||||
|
||||
# This class generates currying functions that wrap around a standard
|
||||
# log-parser's next_event() and tell() function pointers. The wrappers behave
|
||||
# the same way, except that they'll return entire syslog events, instead of
|
||||
# lines at a time. To use it, do the following:
|
||||
#
|
||||
# sub parse_event {
|
||||
# my ($self, %args) = @_;
|
||||
# my ($next_event, $tell, $is_syslog) = SysLogParser::make_closures(%args);
|
||||
# # ... write your code to use the $next_event and $tell here...
|
||||
# }
|
||||
#
|
||||
# If the log isn't in syslog format, $is_syslog will be false and you'll get
|
||||
# back simple wrappers around the $next_event and $tell functions. (They still
|
||||
# have to be wrapped, because to find out whether the log is in syslog format,
|
||||
# the first line has to be examined.)
|
||||
sub new {
|
||||
my ( $class ) = @_;
|
||||
my $self = {};
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
# This method is here so that SysLogParser can be used and tested in its own
|
||||
# right. However, its ability to generate wrapper functions probably means that
|
||||
# it should be used as a translation layer, not directly. You can use this code
|
||||
# as an example of how to integrate this into other packages.
|
||||
sub parse_event {
|
||||
my ( $self, %args ) = @_;
|
||||
my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args);
|
||||
return $next_event->();
|
||||
}
|
||||
|
||||
# This is an example of how a class can seamlessly put a syslog translation
|
||||
# layer underneath itself.
|
||||
sub generate_wrappers {
|
||||
my ( $self, %args ) = @_;
|
||||
|
||||
# Reset everything, just in case some cruft was left over from a previous use
|
||||
# of this object. The object has stateful closures. If this isn't done,
|
||||
# then they'll keep reading from old filehandles. The sanity check is based
|
||||
# on the memory address of the closure!
|
||||
if ( ($self->{sanity} || '') ne "$args{next_event}" ){
|
||||
PTDEBUG && _d("Clearing and recreating internal state");
|
||||
@{$self}{qw(next_event tell is_syslog)} = $self->make_closures(%args);
|
||||
$self->{sanity} = "$args{next_event}";
|
||||
}
|
||||
|
||||
# Return the wrapper functions!
|
||||
return @{$self}{qw(next_event tell is_syslog)};
|
||||
}
|
||||
|
||||
# Make the closures! The $args{misc}->{new_event_test} is an optional
|
||||
# subroutine reference, which tells the wrapper when to consider a line part of
|
||||
# a new event, in syslog format, even when it's technically the same syslog
|
||||
# event. See the test for samples/pg-syslog-002.txt for an example. This
|
||||
# argument should be passed in via the call to parse_event(). Ditto for
|
||||
# 'line_filter', which is some processing code to run on every line of content
|
||||
# in an event.
|
||||
sub make_closures {
|
||||
my ( $self, %args ) = @_;
|
||||
|
||||
# The following variables will be referred to in the manufactured
|
||||
# subroutines, making them proper closures.
|
||||
my $next_event = $args{'next_event'};
|
||||
my $tell = $args{'tell'};
|
||||
my $new_event_test = $args{'misc'}->{'new_event_test'};
|
||||
my $line_filter = $args{'misc'}->{'line_filter'};
|
||||
|
||||
# The first thing to do is get a line from the log and see if it's from
|
||||
# syslog.
|
||||
my $test_line = $next_event->();
|
||||
PTDEBUG && _d('Read first sample/test line:', $test_line);
|
||||
|
||||
# If it's syslog, we have to generate a moderately elaborate wrapper
|
||||
# function.
|
||||
if ( defined $test_line && $test_line =~ m/$syslog_regex/o ) {
|
||||
|
||||
# Within syslog-parsing subroutines, we'll use LLSP (low-level syslog
|
||||
# parser) as a PTDEBUG line prefix.
|
||||
PTDEBUG && _d('This looks like a syslog line, PTDEBUG prefix=LLSP');
|
||||
|
||||
# Grab the interesting bits out of the test line, and save the result.
|
||||
my ($msg_nr, $line_nr, $content) = $test_line =~ m/$syslog_regex/o;
|
||||
my @pending = ($test_line);
|
||||
my $last_msg_nr = $msg_nr;
|
||||
my $pos_in_log = 0;
|
||||
|
||||
# Generate the subroutine for getting a full log message without syslog
|
||||
# breaking it across multiple lines.
|
||||
my $new_next_event = sub {
|
||||
PTDEBUG && _d('LLSP: next_event()');
|
||||
|
||||
# Keeping the pos_in_log variable right is a bit tricky! In general,
|
||||
# we have to tell() the filehandle before trying to read from it,
|
||||
# getting the position before the data we've just read. The simple
|
||||
# rule is that when we push something onto @pending, which we almost
|
||||
# always do, then $pos_in_log should point to the beginning of that
|
||||
# saved content in the file.
|
||||
PTDEBUG && _d('LLSP: Current virtual $fh position:', $pos_in_log);
|
||||
my $new_pos = 0;
|
||||
|
||||
# @arg_lines is where we store up the content we're about to return.
|
||||
# It contains $content; @pending contains a single saved $line.
|
||||
my @arg_lines;
|
||||
|
||||
# Here we actually examine lines until we have found a complete event.
|
||||
my $line;
|
||||
LINE:
|
||||
while (
|
||||
defined($line = shift @pending)
|
||||
|| do {
|
||||
# Save $new_pos, because when we hit EOF we can't $tell->()
|
||||
# anymore.
|
||||
eval { $new_pos = -1; $new_pos = $tell->() };
|
||||
defined($line = $next_event->());
|
||||
}
|
||||
) {
|
||||
PTDEBUG && _d('LLSP: Line:', $line);
|
||||
|
||||
# Parse the line.
|
||||
($msg_nr, $line_nr, $content) = $line =~ m/$syslog_regex/o;
|
||||
if ( !$msg_nr ) {
|
||||
die "Can't parse line: $line";
|
||||
}
|
||||
|
||||
# The message number has changed -- thus, new message.
|
||||
elsif ( $msg_nr != $last_msg_nr ) {
|
||||
PTDEBUG && _d('LLSP: $msg_nr', $last_msg_nr, '=>', $msg_nr);
|
||||
$last_msg_nr = $msg_nr;
|
||||
last LINE;
|
||||
}
|
||||
|
||||
# Or, the caller gave us a custom new_event_test and it is true --
|
||||
# thus, also new message.
|
||||
elsif ( @arg_lines && $new_event_test && $new_event_test->($content) ) {
|
||||
PTDEBUG && _d('LLSP: $new_event_test matches');
|
||||
last LINE;
|
||||
}
|
||||
|
||||
# Otherwise it's part of the current message; put it onto the list
|
||||
# of lines pending. We have to translate characters that syslog has
|
||||
# munged. Some translate TAB into the literal characters '^I' and
|
||||
# some, rsyslog on Debian anyway, seem to translate all whitespace
|
||||
# control characters into an octal string representing the character
|
||||
# code.
|
||||
# Example: #011FROM pg_catalog.pg_class c
|
||||
$content =~ s/#(\d{3})/chr(oct($1))/ge;
|
||||
$content =~ s/\^I/\t/g;
|
||||
if ( $line_filter ) {
|
||||
PTDEBUG && _d('LLSP: applying $line_filter');
|
||||
$content = $line_filter->($content);
|
||||
}
|
||||
|
||||
push @arg_lines, $content;
|
||||
}
|
||||
PTDEBUG && _d('LLSP: Exited while-loop after finding a complete entry');
|
||||
|
||||
# Mash the pending stuff together to return it.
|
||||
my $psql_log_event = @arg_lines ? join('', @arg_lines) : undef;
|
||||
PTDEBUG && _d('LLSP: Final log entry:', $psql_log_event);
|
||||
|
||||
# Save the new content into @pending for the next time. $pos_in_log
|
||||
# must also be updated to whatever $new_pos is.
|
||||
if ( defined $line ) {
|
||||
PTDEBUG && _d('LLSP: Saving $line:', $line);
|
||||
@pending = $line;
|
||||
PTDEBUG && _d('LLSP: $pos_in_log:', $pos_in_log, '=>', $new_pos);
|
||||
$pos_in_log = $new_pos;
|
||||
}
|
||||
else {
|
||||
# We hit the end of the file.
|
||||
PTDEBUG && _d('LLSP: EOF reached');
|
||||
@pending = ();
|
||||
$last_msg_nr = 0;
|
||||
}
|
||||
|
||||
return $psql_log_event;
|
||||
};
|
||||
|
||||
# Create the closure for $tell->();
|
||||
my $new_tell = sub {
|
||||
PTDEBUG && _d('LLSP: tell()', $pos_in_log);
|
||||
return $pos_in_log;
|
||||
};
|
||||
|
||||
return ($new_next_event, $new_tell, 1);
|
||||
}
|
||||
|
||||
# This is either at EOF already, or it's not syslog format.
|
||||
else {
|
||||
|
||||
# Within plain-log-parsing subroutines, we'll use PLAIN as a PTDEBUG
|
||||
# line prefix.
|
||||
PTDEBUG && _d('Plain log, or we are at EOF; PTDEBUG prefix=PLAIN');
|
||||
|
||||
# The @pending array is really only needed to return the one line we
|
||||
# already read as a test. Too bad we can't just push it back onto the
|
||||
# log. TODO: maybe we can test whether the filehandle is seekable and
|
||||
# seek back to the start, then just return the unwrapped functions?
|
||||
my @pending = defined $test_line ? ($test_line) : ();
|
||||
|
||||
my $new_next_event = sub {
|
||||
PTDEBUG && _d('PLAIN: next_event(); @pending:', scalar @pending);
|
||||
return @pending ? shift @pending : $next_event->();
|
||||
};
|
||||
my $new_tell = sub {
|
||||
PTDEBUG && _d('PLAIN: tell(); @pending:', scalar @pending);
|
||||
return @pending ? 0 : $tell->();
|
||||
};
|
||||
return ($new_next_event, $new_tell, 0);
|
||||
}
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End SysLogParser package
|
||||
# ###########################################################################
|
@@ -172,10 +172,7 @@ sub _parse_packet {
|
||||
sub port_number {
|
||||
my ( $self, $port ) = @_;
|
||||
return unless $port;
|
||||
return $port eq 'memcached' ? 11211
|
||||
: $port eq 'http' ? 80
|
||||
: $port eq 'mysql' ? 3306
|
||||
: $port;
|
||||
return $port eq 'mysql' ? 3306 : $port;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
|
Reference in New Issue
Block a user