From f9726e75ccdf37631eee2b68ecfe6c5121e55206 Mon Sep 17 00:00:00 2001 From: Sveta Smirnova Date: Thu, 2 Feb 2023 17:09:13 +0300 Subject: [PATCH] PT-1059 tools cannot parse index names containing newlines (#578) * PT-1059 - Tools can't parse index names containing newlines Fixed regular expressions in TableParser. Added test case, including test for new lines in the column name * PT-1059 - Tools can't parse index names containing newlines Disabled pt-1637.t until PT-2174 is fixed. Updated number of tables in b/t/pt-table-checksum/issue_1485195.t * Patch newlines in table columns (#369) Will accept this change as part of the fix for PT-1059 - Tools cannot parse index names containing new lines. We will later fix the issue with the patch ourselves. mysql 5.6.40 allows newlines in column names however the following code: my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; breaks due to it detecting newlines as line ends. The 'm' argument at the end does this by auto-detecting lines by newline characters. To correct this issue I've made use of zero-length assertions known as " positive lookback" https://www.regular-expressions.info/lookaround.html what does it do? m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g; TLDR: Treat the string as one long string and don't treat \n as the end of a line. look for (\s+`(?:.|\n)+?`.+?),?\n if one of those matches look at what precedes the string if it's ',\n' or ')\n' the string matches. Only save what's in (\s+`(?:.|\n)+?`.+?),?\n m/ is declaring this a matching regex. (?:(?<=,\n)|(?<=(\n)) This is an OR statement including two look-behind clauses. The ?: tells the enclosing parentheses to not store the result as a variable. I've put the two look-behinds in this OR statement below this line: (?<=,\n) Look behind the matched string for a comma followed by a newline, the comma must be there for this look behind to match. (?<=(\n) Look behind the matched string for a open parentheses followed by a newline, the open parentheses must be there. (\s+`(?:.|\n)+?`.+?),?\n This is the actual match. Match newline character followed by one or more spaces followed by back-tick followed by a character which can be any character or a newline one or more times, but don't be greedy and take the rest of the match into consideration. Followed by a back tick and any character one or more times. This match stops where there is a comma or failing that a newline following a back tick and some characters. ,?\n match a comma that may not be there followed by a newline. /g don't stop if this pattern matches keep looking for more patterns to the end of the string. * PT-1059 - Tools can't parse index names containing newlines Placed fix from PR-369 into proper place and created test case for this fix. --------- Co-authored-by: geneguido <31323560+geneguido@users.noreply.github.com> --- bin/pt-table-checksum | 6 +-- lib/TableParser.pm | 7 +-- t/pt-table-checksum/issue_1485195.t | 2 +- t/pt-table-checksum/pt-1059.t | 68 +++++++++++++++++++++++++ t/pt-table-checksum/pt-1637.t | 2 + t/pt-table-checksum/samples/pt-1059.sql | 47 +++++++++++++++++ 6 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 t/pt-table-checksum/pt-1059.t create mode 100644 t/pt-table-checksum/samples/pt-1059.sql diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index ab1fccef..34123303 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -4516,7 +4516,7 @@ sub parse { my $engine = $self->get_engine($ddl); - my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; + my @defs = $ddl =~ m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g; my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); @@ -4697,7 +4697,7 @@ sub get_keys { my $clustered_key = undef; KEY: - foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { + foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY \(?`[\s\S]*?`\),?)$/gm ) { next KEY if $key =~ m/FOREIGN/; @@ -4708,7 +4708,7 @@ sub get_keys { $key =~ s/USING HASH/USING BTREE/; } - my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; + my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \(([\s\S]+?)\)/; my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; $type = $type || $special || 'BTREE'; my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; diff --git a/lib/TableParser.pm b/lib/TableParser.pm index eba7d153..0f0b888c 100644 --- a/lib/TableParser.pm +++ b/lib/TableParser.pm @@ -149,7 +149,7 @@ sub parse { my $engine = $self->get_engine($ddl); - my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; + my @defs = $ddl =~ m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g; my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); @@ -389,7 +389,8 @@ sub get_keys { my $clustered_key = undef; KEY: - foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { + #foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { + foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY \(?`[\s\S]*?`\),?)$/gm ) { # If you want foreign keys, use get_fks() below. next KEY if $key =~ m/FOREIGN/; @@ -407,7 +408,7 @@ sub get_keys { } # Determine index type - my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; + my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \(([\s\S]+?)\)/; my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; $type = $type || $special || 'BTREE'; my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; diff --git a/t/pt-table-checksum/issue_1485195.t b/t/pt-table-checksum/issue_1485195.t index 53ee49d6..89929a05 100644 --- a/t/pt-table-checksum/issue_1485195.t +++ b/t/pt-table-checksum/issue_1485195.t @@ -46,7 +46,7 @@ my $extra_tables = $dbh->selectrow_arrayref("select count(*) from percona_test.c is( PerconaTest::count_checksum_results($output, 'rows'), - $sandbox_version ge '8.0' ? 27 + $extra_tables : $sandbox_version lt '5.7' ? 24 : 23 + $extra_tables, + $sandbox_version ge '8.0' ? 28 + $extra_tables : $sandbox_version lt '5.7' ? 24 : 23 + $extra_tables, "Large BLOB/TEXT/BINARY Checksum" ); diff --git a/t/pt-table-checksum/pt-1059.t b/t/pt-table-checksum/pt-1059.t new file mode 100644 index 00000000..1203a035 --- /dev/null +++ b/t/pt-table-checksum/pt-1059.t @@ -0,0 +1,68 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use PerconaTest; +use Sandbox; +use SqlModes; +require "$trunk/bin/pt-table-checksum"; + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('master'); + +if ( !$dbh ) { + plan skip_all => 'Cannot connect to sandbox master'; +} +else { + plan tests => 3; +} + +# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic +# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the tool will die. +# And --max-load "" prevents waiting for status variables. +my $master_dsn = 'h=127.1,P=12345,u=msandbox,p=msandbox,D=pt_1059'; +my @args = ($master_dsn, qw(--set-vars innodb_lock_wait_timeout=3), '--max-load', ''); +my $output; +my $exit_status; + +# We test that checksum works with columns and indexes +# that contain new lines +$sb->load_file('master', 't/pt-table-checksum/samples/pt-1059.sql'); +# ############################################################################# +# PT-1059 LP #1093972: Tools can't parse index names containing newlines +# ############################################################################# + +($output, $exit_status) = full_output( + sub { pt_table_checksum::main(@args, qw(-d pt_1059)) }, + stderr => 1, +); + +is( + PerconaTest::count_checksum_results($output, 'errors'), + 0, + "Checksum with columns and indexes, containing new lines found no errors" +); + +is( + $exit_status, + 0, + "Checksum with columns and indexes, containing new lines finished succesfully", +); + + +# ############################################################################# +# Done. +# ############################################################################# +$sb->wipe_clean($dbh); +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +exit; diff --git a/t/pt-table-checksum/pt-1637.t b/t/pt-table-checksum/pt-1637.t index b366876b..41382a03 100644 --- a/t/pt-table-checksum/pt-1637.t +++ b/t/pt-table-checksum/pt-1637.t @@ -16,6 +16,8 @@ use Sandbox; use SqlModes; require "$trunk/bin/pt-table-checksum"; +plan skip_all => 'Disabled until PT-2174 is fixed'; + my $dp = new DSNParser(opts=>$dsn_opts); my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); diff --git a/t/pt-table-checksum/samples/pt-1059.sql b/t/pt-table-checksum/samples/pt-1059.sql new file mode 100644 index 00000000..1cc99250 --- /dev/null +++ b/t/pt-table-checksum/samples/pt-1059.sql @@ -0,0 +1,47 @@ +CREATE SCHEMA IF NOT EXISTS pt_1059; +USE pt_1059; +DROP TABLE IF EXISTS t1; +CREATE TABLE `t1` ( +`id` int(10) unsigned NOT NULL AUTO_INCREMENT, +`c` char(1) DEFAULT NULL, +PRIMARY KEY (`id`), +KEY `idx_with_ +newline` (`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO t1 (c) VALUES('a'),('b'),('c'); + +DROP TABLE IF EXISTS t2; +CREATE TABLE `t2` ( +`id` int(10) unsigned NOT NULL AUTO_INCREMENT, +`column_with_ +newline` char(1) DEFAULT NULL, +PRIMARY KEY (`id`), +KEY `idx_c` (`column_with_ +newline`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO t2 (`column_with_ +newline`) VALUES('a'),('b'),('c'); + +DROP TABLE IF EXISTS t3; +CREATE TABLE `t3` ( +`id` int(10) unsigned NOT NULL AUTO_INCREMENT, +`column_with_ +newline` char(1) DEFAULT NULL, +PRIMARY KEY (`id`), +KEY `idx_with_ +newline` (`column_with_ +newline`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO t3 (`column_with_ +newline`) VALUES('a'),('b'),('c'); + +DROP TABLE IF EXISTS t4; +CREATE TABLE `t4` ( +` +column, starting from new line` char(1) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +INSERT INTO t4 VALUES('a'),('b'),('c');