mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-21 11:30:24 +00:00
PT-1059 tools cannot parse index names containing newlines (#578)
* PT-1059 - Tools can't parse index names containing newlines Fixed regular expressions in TableParser. Added test case, including test for new lines in the column name * PT-1059 - Tools can't parse index names containing newlines Disabled pt-1637.t until PT-2174 is fixed. Updated number of tables in b/t/pt-table-checksum/issue_1485195.t * Patch newlines in table columns (#369) Will accept this change as part of the fix for PT-1059 - Tools cannot parse index names containing new lines. We will later fix the issue with the patch ourselves. mysql 5.6.40 allows newlines in column names however the following code: my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; breaks due to it detecting newlines as line ends. The 'm' argument at the end does this by auto-detecting lines by newline characters. To correct this issue I've made use of zero-length assertions known as " positive lookback" https://www.regular-expressions.info/lookaround.html what does it do? m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g; TLDR: Treat the string as one long string and don't treat \n as the end of a line. look for (\s+`(?:.|\n)+?`.+?),?\n if one of those matches look at what precedes the string if it's ',\n' or ')\n' the string matches. Only save what's in (\s+`(?:.|\n)+?`.+?),?\n m/ is declaring this a matching regex. (?:(?<=,\n)|(?<=(\n)) This is an OR statement including two look-behind clauses. The ?: tells the enclosing parentheses to not store the result as a variable. I've put the two look-behinds in this OR statement below this line: (?<=,\n) Look behind the matched string for a comma followed by a newline, the comma must be there for this look behind to match. (?<=(\n) Look behind the matched string for a open parentheses followed by a newline, the open parentheses must be there. (\s+`(?:.|\n)+?`.+?),?\n This is the actual match. Match newline character followed by one or more spaces followed by back-tick followed by a character which can be any character or a newline one or more times, but don't be greedy and take the rest of the match into consideration. Followed by a back tick and any character one or more times. This match stops where there is a comma or failing that a newline following a back tick and some characters. ,?\n match a comma that may not be there followed by a newline. /g don't stop if this pattern matches keep looking for more patterns to the end of the string. * PT-1059 - Tools can't parse index names containing newlines Placed fix from PR-369 into proper place and created test case for this fix. --------- Co-authored-by: geneguido <31323560+geneguido@users.noreply.github.com>
This commit is contained in:
@@ -4516,7 +4516,7 @@ sub parse {
|
|||||||
|
|
||||||
my $engine = $self->get_engine($ddl);
|
my $engine = $self->get_engine($ddl);
|
||||||
|
|
||||||
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
|
my @defs = $ddl =~ m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g;
|
||||||
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
|
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
|
||||||
PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
|
PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
|
||||||
|
|
||||||
@@ -4697,7 +4697,7 @@ sub get_keys {
|
|||||||
my $clustered_key = undef;
|
my $clustered_key = undef;
|
||||||
|
|
||||||
KEY:
|
KEY:
|
||||||
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
|
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY \(?`[\s\S]*?`\),?)$/gm ) {
|
||||||
|
|
||||||
next KEY if $key =~ m/FOREIGN/;
|
next KEY if $key =~ m/FOREIGN/;
|
||||||
|
|
||||||
@@ -4708,7 +4708,7 @@ sub get_keys {
|
|||||||
$key =~ s/USING HASH/USING BTREE/;
|
$key =~ s/USING HASH/USING BTREE/;
|
||||||
}
|
}
|
||||||
|
|
||||||
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
|
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \(([\s\S]+?)\)/;
|
||||||
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
|
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
|
||||||
$type = $type || $special || 'BTREE';
|
$type = $type || $special || 'BTREE';
|
||||||
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
|
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
|
||||||
|
@@ -149,7 +149,7 @@ sub parse {
|
|||||||
|
|
||||||
my $engine = $self->get_engine($ddl);
|
my $engine = $self->get_engine($ddl);
|
||||||
|
|
||||||
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
|
my @defs = $ddl =~ m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g;
|
||||||
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
|
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
|
||||||
PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
|
PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
|
||||||
|
|
||||||
@@ -389,7 +389,8 @@ sub get_keys {
|
|||||||
my $clustered_key = undef;
|
my $clustered_key = undef;
|
||||||
|
|
||||||
KEY:
|
KEY:
|
||||||
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
|
#foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
|
||||||
|
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY \(?`[\s\S]*?`\),?)$/gm ) {
|
||||||
|
|
||||||
# If you want foreign keys, use get_fks() below.
|
# If you want foreign keys, use get_fks() below.
|
||||||
next KEY if $key =~ m/FOREIGN/;
|
next KEY if $key =~ m/FOREIGN/;
|
||||||
@@ -407,7 +408,7 @@ sub get_keys {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Determine index type
|
# Determine index type
|
||||||
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
|
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \(([\s\S]+?)\)/;
|
||||||
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
|
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
|
||||||
$type = $type || $special || 'BTREE';
|
$type = $type || $special || 'BTREE';
|
||||||
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
|
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
|
||||||
|
@@ -46,7 +46,7 @@ my $extra_tables = $dbh->selectrow_arrayref("select count(*) from percona_test.c
|
|||||||
|
|
||||||
is(
|
is(
|
||||||
PerconaTest::count_checksum_results($output, 'rows'),
|
PerconaTest::count_checksum_results($output, 'rows'),
|
||||||
$sandbox_version ge '8.0' ? 27 + $extra_tables : $sandbox_version lt '5.7' ? 24 : 23 + $extra_tables,
|
$sandbox_version ge '8.0' ? 28 + $extra_tables : $sandbox_version lt '5.7' ? 24 : 23 + $extra_tables,
|
||||||
"Large BLOB/TEXT/BINARY Checksum"
|
"Large BLOB/TEXT/BINARY Checksum"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
68
t/pt-table-checksum/pt-1059.t
Normal file
68
t/pt-table-checksum/pt-1059.t
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
|
||||||
|
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
|
||||||
|
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
|
||||||
|
};
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings FATAL => 'all';
|
||||||
|
use English qw(-no_match_vars);
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
use PerconaTest;
|
||||||
|
use Sandbox;
|
||||||
|
use SqlModes;
|
||||||
|
require "$trunk/bin/pt-table-checksum";
|
||||||
|
|
||||||
|
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||||
|
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||||
|
my $dbh = $sb->get_dbh_for('master');
|
||||||
|
|
||||||
|
if ( !$dbh ) {
|
||||||
|
plan skip_all => 'Cannot connect to sandbox master';
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
plan tests => 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
|
||||||
|
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the tool will die.
|
||||||
|
# And --max-load "" prevents waiting for status variables.
|
||||||
|
my $master_dsn = 'h=127.1,P=12345,u=msandbox,p=msandbox,D=pt_1059';
|
||||||
|
my @args = ($master_dsn, qw(--set-vars innodb_lock_wait_timeout=3), '--max-load', '');
|
||||||
|
my $output;
|
||||||
|
my $exit_status;
|
||||||
|
|
||||||
|
# We test that checksum works with columns and indexes
|
||||||
|
# that contain new lines
|
||||||
|
$sb->load_file('master', 't/pt-table-checksum/samples/pt-1059.sql');
|
||||||
|
# #############################################################################
|
||||||
|
# PT-1059 LP #1093972: Tools can't parse index names containing newlines
|
||||||
|
# #############################################################################
|
||||||
|
|
||||||
|
($output, $exit_status) = full_output(
|
||||||
|
sub { pt_table_checksum::main(@args, qw(-d pt_1059)) },
|
||||||
|
stderr => 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
is(
|
||||||
|
PerconaTest::count_checksum_results($output, 'errors'),
|
||||||
|
0,
|
||||||
|
"Checksum with columns and indexes, containing new lines found no errors"
|
||||||
|
);
|
||||||
|
|
||||||
|
is(
|
||||||
|
$exit_status,
|
||||||
|
0,
|
||||||
|
"Checksum with columns and indexes, containing new lines finished succesfully",
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
# #############################################################################
|
||||||
|
# Done.
|
||||||
|
# #############################################################################
|
||||||
|
$sb->wipe_clean($dbh);
|
||||||
|
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
|
||||||
|
exit;
|
@@ -16,6 +16,8 @@ use Sandbox;
|
|||||||
use SqlModes;
|
use SqlModes;
|
||||||
require "$trunk/bin/pt-table-checksum";
|
require "$trunk/bin/pt-table-checksum";
|
||||||
|
|
||||||
|
plan skip_all => 'Disabled until PT-2174 is fixed';
|
||||||
|
|
||||||
my $dp = new DSNParser(opts=>$dsn_opts);
|
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||||
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||||
|
|
||||||
|
47
t/pt-table-checksum/samples/pt-1059.sql
Normal file
47
t/pt-table-checksum/samples/pt-1059.sql
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
CREATE SCHEMA IF NOT EXISTS pt_1059;
|
||||||
|
USE pt_1059;
|
||||||
|
DROP TABLE IF EXISTS t1;
|
||||||
|
CREATE TABLE `t1` (
|
||||||
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||||
|
`c` char(1) DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`id`),
|
||||||
|
KEY `idx_with_
|
||||||
|
newline` (`c`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
|
||||||
|
|
||||||
|
INSERT INTO t1 (c) VALUES('a'),('b'),('c');
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t2;
|
||||||
|
CREATE TABLE `t2` (
|
||||||
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||||
|
`column_with_
|
||||||
|
newline` char(1) DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`id`),
|
||||||
|
KEY `idx_c` (`column_with_
|
||||||
|
newline`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
|
||||||
|
|
||||||
|
INSERT INTO t2 (`column_with_
|
||||||
|
newline`) VALUES('a'),('b'),('c');
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t3;
|
||||||
|
CREATE TABLE `t3` (
|
||||||
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||||
|
`column_with_
|
||||||
|
newline` char(1) DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`id`),
|
||||||
|
KEY `idx_with_
|
||||||
|
newline` (`column_with_
|
||||||
|
newline`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
|
||||||
|
|
||||||
|
INSERT INTO t3 (`column_with_
|
||||||
|
newline`) VALUES('a'),('b'),('c');
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t4;
|
||||||
|
CREATE TABLE `t4` (
|
||||||
|
`
|
||||||
|
column, starting from new line` char(1)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
|
||||||
|
|
||||||
|
INSERT INTO t4 VALUES('a'),('b'),('c');
|
Reference in New Issue
Block a user