Add test for bulk-insert with special characters (tabs, newlines, backslashes)

This commit is contained in:
wu.xu
2026-02-03 10:57:32 +08:00
parent ad57c0fc1b
commit 2eb9736cc0
2 changed files with 97 additions and 0 deletions
+75
View File
@@ -0,0 +1,75 @@
#!/usr/bin/env perl
# Test that pt-archiver --bulk-insert correctly escapes and preserves data
# containing tabs, newlines, and backslashes (no field misalignment).
# See: fix for escape() order (backslash first, then newline, then tab).
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use PerconaTest;
use Sandbox;
require "$trunk/bin/pt-archiver";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('source');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox source';
}
my $cnf = "/tmp/12345/my.sandbox.cnf";
$sb->wipe_clean($dbh);
$sb->load_file('source', 't/pt-archiver/samples/bulk_insert_special_chars.sql');
# Rows with special characters: tab, newline, backslash, quotes.
# Using prepared statement so we pass actual characters (not SQL-escaped).
my @test_rows = (
[ 1, 'John "Doe"', "Software Engineer\t\n", 123, 'Senior "Developer"' ],
[ 2, 'Alice', "Data Scientist\nwith newline\n", 456, 'Lead Analyst' ],
[ 3, 'Bob', "Engineer with tab\tcharacter\n", 789, 'Manager' ],
[ 4, 'Charlie', "Backslash\\Test\n", 101, 'Director' ],
[ 5, 'Eve', "Quote\'s Test\\t\n", 202, 'Consultant' ],
);
my $ins = $dbh->prepare(
'INSERT INTO bulk_escape.source (id, name, job, stu_id, title) VALUES (?, ?, ?, ?, ?)'
);
for my $row ( @test_rows ) {
$ins->execute(@$row);
}
# Archive from source to dest using --bulk-insert (purge source).
my $output = output(
sub { pt_archiver::main(
qw(--where 1=1 --bulk-insert --bulk-delete --limit 100 --statistics),
'--source', "L=1,D=bulk_escape,t=source,F=$cnf",
'--dest', "D=bulk_escape,t=dest") },
);
# Verify row counts.
my $src_count = $dbh->selectrow_array('SELECT COUNT(*) FROM bulk_escape.source');
my $dst_count = $dbh->selectrow_array('SELECT COUNT(*) FROM bulk_escape.dest');
is($src_count, 0, 'Source table is purged');
is($dst_count, scalar(@test_rows), 'All rows archived to dest');
# Compare archived data with expected (order by id).
my $archived = $dbh->selectall_arrayref(
'SELECT id, name, job, stu_id, title FROM bulk_escape.dest ORDER BY id'
);
is_deeply($archived, \@test_rows, 'Bulk-insert preserves tabs, newlines, backslashes (no field misalignment)');
$sb->wipe_clean($dbh);
ok($sb->ok(), 'Sandbox servers') or BAIL_OUT(__FILE__ . ' broke the sandbox');
done_testing;
exit;
@@ -0,0 +1,22 @@
-- Schema for testing pt-archiver --bulk-insert with special characters
-- (tabs, newlines, backslashes) to ensure no field misalignment.
DROP DATABASE IF EXISTS `bulk_escape`;
CREATE DATABASE `bulk_escape`;
CREATE TABLE `bulk_escape`.`source` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(500) DEFAULT NULL,
`job` varchar(450) DEFAULT NULL,
`stu_id` int DEFAULT NULL,
`title` varchar(450) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `bulk_escape`.`dest` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(500) DEFAULT NULL,
`job` varchar(450) DEFAULT NULL,
`stu_id` int DEFAULT NULL,
`title` varchar(450) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;