Files
percona-toolkit/lib/SysLogParser.pm

260 lines
10 KiB
Perl

# This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# SysLogParser package
# ###########################################################################
{
# Package: SysLogParser
# SysLogParser parses events from syslogs.
package SysLogParser;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
# This regex matches the message number, line number, and content of a syslog
# message:
# 2008 Jan 9 16:16:34 hostname postgres[30059]: [13-2] ...content...
my $syslog_regex = qr{\A.*\w+\[\d+\]: \[(\d+)-(\d+)\] (.*)\Z};
# This class generates currying functions that wrap around a standard
# log-parser's next_event() and tell() function pointers. The wrappers behave
# the same way, except that they'll return entire syslog events, instead of
# lines at a time. To use it, do the following:
#
# sub parse_event {
# my ($self, %args) = @_;
# my ($next_event, $tell, $is_syslog) = SysLogParser::make_closures(%args);
# # ... write your code to use the $next_event and $tell here...
# }
#
# If the log isn't in syslog format, $is_syslog will be false and you'll get
# back simple wrappers around the $next_event and $tell functions. (They still
# have to be wrapped, because to find out whether the log is in syslog format,
# the first line has to be examined.)
sub new {
my ( $class ) = @_;
my $self = {};
return bless $self, $class;
}
# This method is here so that SysLogParser can be used and tested in its own
# right. However, its ability to generate wrapper functions probably means that
# it should be used as a translation layer, not directly. You can use this code
# as an example of how to integrate this into other packages.
sub parse_event {
my ( $self, %args ) = @_;
my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args);
return $next_event->();
}
# This is an example of how a class can seamlessly put a syslog translation
# layer underneath itself.
sub generate_wrappers {
my ( $self, %args ) = @_;
# Reset everything, just in case some cruft was left over from a previous use
# of this object. The object has stateful closures. If this isn't done,
# then they'll keep reading from old filehandles. The sanity check is based
# on the memory address of the closure!
if ( ($self->{sanity} || '') ne "$args{next_event}" ){
MKDEBUG && _d("Clearing and recreating internal state");
@{$self}{qw(next_event tell is_syslog)} = $self->make_closures(%args);
$self->{sanity} = "$args{next_event}";
}
# Return the wrapper functions!
return @{$self}{qw(next_event tell is_syslog)};
}
# Make the closures! The $args{misc}->{new_event_test} is an optional
# subroutine reference, which tells the wrapper when to consider a line part of
# a new event, in syslog format, even when it's technically the same syslog
# event. See the test for samples/pg-syslog-002.txt for an example. This
# argument should be passed in via the call to parse_event(). Ditto for
# 'line_filter', which is some processing code to run on every line of content
# in an event.
sub make_closures {
my ( $self, %args ) = @_;
# The following variables will be referred to in the manufactured
# subroutines, making them proper closures.
my $next_event = $args{'next_event'};
my $tell = $args{'tell'};
my $new_event_test = $args{'misc'}->{'new_event_test'};
my $line_filter = $args{'misc'}->{'line_filter'};
# The first thing to do is get a line from the log and see if it's from
# syslog.
my $test_line = $next_event->();
MKDEBUG && _d('Read first sample/test line:', $test_line);
# If it's syslog, we have to generate a moderately elaborate wrapper
# function.
if ( defined $test_line && $test_line =~ m/$syslog_regex/o ) {
# Within syslog-parsing subroutines, we'll use LLSP (low-level syslog
# parser) as a MKDEBUG line prefix.
MKDEBUG && _d('This looks like a syslog line, MKDEBUG prefix=LLSP');
# Grab the interesting bits out of the test line, and save the result.
my ($msg_nr, $line_nr, $content) = $test_line =~ m/$syslog_regex/o;
my @pending = ($test_line);
my $last_msg_nr = $msg_nr;
my $pos_in_log = 0;
# Generate the subroutine for getting a full log message without syslog
# breaking it across multiple lines.
my $new_next_event = sub {
MKDEBUG && _d('LLSP: next_event()');
# Keeping the pos_in_log variable right is a bit tricky! In general,
# we have to tell() the filehandle before trying to read from it,
# getting the position before the data we've just read. The simple
# rule is that when we push something onto @pending, which we almost
# always do, then $pos_in_log should point to the beginning of that
# saved content in the file.
MKDEBUG && _d('LLSP: Current virtual $fh position:', $pos_in_log);
my $new_pos = 0;
# @arg_lines is where we store up the content we're about to return.
# It contains $content; @pending contains a single saved $line.
my @arg_lines;
# Here we actually examine lines until we have found a complete event.
my $line;
LINE:
while (
defined($line = shift @pending)
|| do {
# Save $new_pos, because when we hit EOF we can't $tell->()
# anymore.
eval { $new_pos = -1; $new_pos = $tell->() };
defined($line = $next_event->());
}
) {
MKDEBUG && _d('LLSP: Line:', $line);
# Parse the line.
($msg_nr, $line_nr, $content) = $line =~ m/$syslog_regex/o;
if ( !$msg_nr ) {
die "Can't parse line: $line";
}
# The message number has changed -- thus, new message.
elsif ( $msg_nr != $last_msg_nr ) {
MKDEBUG && _d('LLSP: $msg_nr', $last_msg_nr, '=>', $msg_nr);
$last_msg_nr = $msg_nr;
last LINE;
}
# Or, the caller gave us a custom new_event_test and it is true --
# thus, also new message.
elsif ( @arg_lines && $new_event_test && $new_event_test->($content) ) {
MKDEBUG && _d('LLSP: $new_event_test matches');
last LINE;
}
# Otherwise it's part of the current message; put it onto the list
# of lines pending. We have to translate characters that syslog has
# munged. Some translate TAB into the literal characters '^I' and
# some, rsyslog on Debian anyway, seem to translate all whitespace
# control characters into an octal string representing the character
# code.
# Example: #011FROM pg_catalog.pg_class c
$content =~ s/#(\d{3})/chr(oct($1))/ge;
$content =~ s/\^I/\t/g;
if ( $line_filter ) {
MKDEBUG && _d('LLSP: applying $line_filter');
$content = $line_filter->($content);
}
push @arg_lines, $content;
}
MKDEBUG && _d('LLSP: Exited while-loop after finding a complete entry');
# Mash the pending stuff together to return it.
my $psql_log_event = @arg_lines ? join('', @arg_lines) : undef;
MKDEBUG && _d('LLSP: Final log entry:', $psql_log_event);
# Save the new content into @pending for the next time. $pos_in_log
# must also be updated to whatever $new_pos is.
if ( defined $line ) {
MKDEBUG && _d('LLSP: Saving $line:', $line);
@pending = $line;
MKDEBUG && _d('LLSP: $pos_in_log:', $pos_in_log, '=>', $new_pos);
$pos_in_log = $new_pos;
}
else {
# We hit the end of the file.
MKDEBUG && _d('LLSP: EOF reached');
@pending = ();
$last_msg_nr = 0;
}
return $psql_log_event;
};
# Create the closure for $tell->();
my $new_tell = sub {
MKDEBUG && _d('LLSP: tell()', $pos_in_log);
return $pos_in_log;
};
return ($new_next_event, $new_tell, 1);
}
# This is either at EOF already, or it's not syslog format.
else {
# Within plain-log-parsing subroutines, we'll use PLAIN as a MKDEBUG
# line prefix.
MKDEBUG && _d('Plain log, or we are at EOF; MKDEBUG prefix=PLAIN');
# The @pending array is really only needed to return the one line we
# already read as a test. Too bad we can't just push it back onto the
# log. TODO: maybe we can test whether the filehandle is seekable and
# seek back to the start, then just return the unwrapped functions?
my @pending = defined $test_line ? ($test_line) : ();
my $new_next_event = sub {
MKDEBUG && _d('PLAIN: next_event(); @pending:', scalar @pending);
return @pending ? shift @pending : $next_event->();
};
my $new_tell = sub {
MKDEBUG && _d('PLAIN: tell(); @pending:', scalar @pending);
return @pending ? 0 : $tell->();
};
return ($new_next_event, $new_tell, 0);
}
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End SysLogParser package
# ###########################################################################