Files
percona-toolkit/lib/BinaryLogParser.pm
Viktor Szépe 2bd40d8c39 Remove trailing spaces (#665)
* Remove trailing spaces

* PR-665 -  Remove trailing spaces

- Updated not stable test t/pt-online-schema-change/preserve_triggers.t
- Updated utilities in bin directory

* PR-665 -  Remove trailing spaces

- Fixed typos

* PR-665 -  Remove trailing spaces

- Fixed typos

---------

Co-authored-by: Sveta Smirnova <sveta.smirnova@percona.com>
2023-09-06 01:15:12 +03:00

282 lines
11 KiB
Perl

# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# BinaryLogParser package
# ###########################################################################
{
# Package: BinaryLogParser
# BinaryLogParser parses binary log files converted to text by mysqlbinlog.
package BinaryLogParser;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
my $binlog_line_1 = qr/at (\d+)$/m;
my $binlog_line_2 = qr/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)\s+server\s+id\s+(\d+)\s+end_log_pos\s+(\d+)\s+(?:CRC32\s+0x[a-f0-9]{8}\s+)?(\S+)\s*([^\n]*)$/m;
my $binlog_line_2_rest = qr/thread_id=(\d+)\s+exec_time=(\d+)\s+error_code=(\d+)/m;
# Sub: new
#
# Parameters:
# %args - Arguments
#
# Returns:
# BinaryLogParser object
sub new {
my ( $class, %args ) = @_;
my $self = {
delim => undef,
delim_len => 0,
};
return bless $self, $class;
}
# Sub: parse_event
# Parse binary log events returned by input callback. This sub implements
# a standard interface that the *Parser.pm modules share. Each such
# module has a sub named "parse_event" that is expected to return one event
# each time it's called. Events are received from an input callback,
# $args{next_event}. The events are chunks of text that this sub parses
# into a hashref representing the attributes and values of the event.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# next_event - Coderef that returns the text of the next event from the input
# tell - Coderef that tells the file position being read in the input
#
# Optional Arguments:
# oktorun - Coderef to tell caller that there are no more events
#
# Returns:
# Hashref representing one event and its attributes and values
sub parse_event {
my ( $self, %args ) = @_;
my @required_args = qw(next_event tell);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($next_event, $tell) = @args{@required_args};
local $INPUT_RECORD_SEPARATOR = ";\n#";
my $pos_in_log = $tell->();
my $stmt;
my ($delim, $delim_len) = ($self->{delim}, $self->{delim_len});
EVENT:
while ( defined($stmt = $next_event->()) ) {
my @properties = ('pos_in_log', $pos_in_log);
my ($ts, $sid, $end, $type, $rest);
$pos_in_log = $tell->();
$stmt =~ s/;\n#?\Z//;
my ( $got_offset, $got_hdr );
my $pos = 0;
my $len = length($stmt);
my $found_arg = 0;
LINE:
while ( $stmt =~ m/^(.*)$/mg ) { # /g requires scalar match.
$pos = pos($stmt); # Be careful not to mess this up!
my $line = $1; # Necessary for /g and pos() to work.
$line =~ s/$delim// if $delim;
PTDEBUG && _d($line);
if ( $line =~ m/^\/\*.+\*\/;/ ) {
PTDEBUG && _d('Comment line');
next LINE;
}
if ( $line =~ m/^DELIMITER/m ) {
my ( $del ) = $line =~ m/^DELIMITER (\S*)$/m;
if ( $del ) {
$self->{delim_len} = $delim_len = length $del;
$self->{delim} = $delim = quotemeta $del;
PTDEBUG && _d('delimiter:', $delim);
}
else {
# Because of the line $stmt =~ s/;\n#?\Z//; above, setting
# the delimiter back to normal like "DELIMITER ;" appear as
# "DELIMITER ".
PTDEBUG && _d('Delimiter reset to ;');
$self->{delim} = $delim = undef;
$self->{delim_len} = $delim_len = 0;
}
next LINE;
}
next LINE if $line =~ m/End of log file/;
# Match the beginning of an event in the binary log.
if ( !$got_offset && (my ( $offset ) = $line =~ m/$binlog_line_1/m) ) {
PTDEBUG && _d('Got the at offset line');
push @properties, 'offset', $offset;
$got_offset++;
}
# Match the 2nd line of binary log header, after "# at OFFSET".
elsif ( !$got_hdr && $line =~ m/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)/ ) {
($ts, $sid, $end, $type, $rest) = $line =~ m/$binlog_line_2/m;
PTDEBUG && _d('Got the header line; type:', $type, 'rest:', $rest);
push @properties, 'cmd', 'Query', 'ts', $ts, 'server_id', $sid,
'end_log_pos', $end;
$got_hdr++;
}
# Handle meta-data lines.
elsif ( $line =~ m/^(?:#|use |SET)/i ) {
# Include the current default database given by 'use <db>;' Again
# as per the code in sql/log.cc this is case-sensitive.
if ( my ( $db ) = $line =~ m/^use ([^;]+)/ ) {
PTDEBUG && _d("Got a default database:", $db);
push @properties, 'db', $db;
}
# Some things you might see in the log output, as printed by
# sql/log.cc (this time the SET is uppercaes, and again it is
# case-sensitive).
# SET timestamp=foo;
# SET timestamp=foo,insert_id=123;
# SET insert_id=123;
elsif ( my ($setting) = $line =~ m/^SET\s+([^;]*)/ ) {
PTDEBUG && _d("Got some setting:", $setting);
push @properties, map { s/\s+//; lc } split(/,|\s*=\s*/, $setting);
}
}
else {
# This isn't a meta-data line. It's the first line of the
# whole query. Grab from here to the end of the string and
# put that into the 'arg' for the event. Then we are done.
# Note that if this line really IS the query but we skip in
# the 'if' above because it looks like meta-data, later
# we'll remedy that.
PTDEBUG && _d("Got the query/arg line at pos", $pos);
$found_arg++;
if ( $got_offset && $got_hdr ) {
if ( $type eq 'Xid' ) {
my ($xid) = $rest =~ m/(\d+)/;
push @properties, 'Xid', $xid;
}
elsif ( $type eq 'Query' ) {
my ($i, $t, $c) = $rest =~ m/$binlog_line_2_rest/m;
push @properties, 'Thread_id', $i, 'Query_time', $t,
'error_code', $c;
}
elsif ( $type eq 'Start:' ) {
# These are lines like "#090722 7:21:41 server id 12345
# end_log_pos 98 Start: binlog v 4, server v 5.0.82-log
# created 090722 7:21:41 at startup". They may or may
# not have a statement after them (ROLLBACK can follow
# this line), so we do not want to skip these types.
PTDEBUG && _d("Binlog start");
}
else {
PTDEBUG && _d('Unknown event type:', $type);
next EVENT;
}
}
else {
PTDEBUG && _d("It's not a query/arg, it's just some SQL fluff");
push @properties, 'cmd', 'Query', 'ts', undef;
}
# Removing delimiters alters the length of $stmt, so we account
# for this in our substr() offset. If $pos is equal to the length
# of $stmt, then this $line is the whole $arg (i.e. one line
# query). In this case, we go back the $delim_len that was
# removed from this $line. Otherwise, there are more lines to
# this arg so a delimiter has not yet been removed (it remains
# somewhere in $arg, at least at the end). Therefore, we do not
# go back any extra.
my $delim_len = ($pos == length($stmt) ? $delim_len : 0);
my $arg = substr($stmt, $pos - length($line) - $delim_len);
$arg =~ s/$delim// if $delim; # Remove the delimiter.
# Sometimes DELIMITER appears at the end of an arg, so we have
# to catch it again. Queries in this arg before this new
# DELIMITER should have the old delim, which is why we still
# remove it in the previous line.
if ( $arg =~ m/^DELIMITER/m ) {
my ( $del ) = $arg =~ m/^DELIMITER (\S*)$/m;
if ( $del ) {
$self->{delim_len} = $delim_len = length $del;
$self->{delim} = $delim = quotemeta $del;
PTDEBUG && _d('delimiter:', $delim);
}
else {
PTDEBUG && _d('Delimiter reset to ;');
$del = ';';
$self->{delim} = $delim = undef;
$self->{delim_len} = $delim_len = 0;
}
$arg =~ s/^DELIMITER.*$//m; # Remove DELIMITER from arg.
}
$arg =~ s/;$//gm; # Ensure ending ; are gone.
$arg =~ s/\s+$//; # Remove trailing spaces and newlines.
push @properties, 'arg', $arg, 'bytes', length($arg);
last LINE;
}
} # LINE
if ( $found_arg ) {
# Don't dump $event; want to see full dump of all properties, and after
# it's been cast into a hash, duplicated keys will be gone.
PTDEBUG && _d('Properties of event:', Dumper(\@properties));
my $event = { @properties };
if ( $args{stats} ) {
$args{stats}->{events_read}++;
$args{stats}->{events_parsed}++;
}
return $event;
}
else {
PTDEBUG && _d('Event had no arg');
}
} # EVENT
$args{oktorun}->(0) if $args{oktorun};
return;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End BinaryLogParser package
# ###########################################################################