From 2eb9736cc067974aac736300168fdd559e9262b5 Mon Sep 17 00:00:00 2001 From: "wu.xu" Date: Tue, 3 Feb 2026 10:57:32 +0800 Subject: [PATCH] Add test for bulk-insert with special characters (tabs, newlines, backslashes) --- t/pt-archiver/bulk_insert_special_chars.t | 75 +++++++++++++++++++ .../samples/bulk_insert_special_chars.sql | 22 ++++++ 2 files changed, 97 insertions(+) create mode 100644 t/pt-archiver/bulk_insert_special_chars.t create mode 100644 t/pt-archiver/samples/bulk_insert_special_chars.sql diff --git a/t/pt-archiver/bulk_insert_special_chars.t b/t/pt-archiver/bulk_insert_special_chars.t new file mode 100644 index 00000000..1e0f52a4 --- /dev/null +++ b/t/pt-archiver/bulk_insert_special_chars.t @@ -0,0 +1,75 @@ +#!/usr/bin/env perl + +# Test that pt-archiver --bulk-insert correctly escapes and preserves data +# containing tabs, newlines, and backslashes (no field misalignment). +# See: fix for escape() order (backslash first, then newline, then tab). + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use PerconaTest; +use Sandbox; +require "$trunk/bin/pt-archiver"; + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('source'); + +if ( !$dbh ) { + plan skip_all => 'Cannot connect to sandbox source'; +} + +my $cnf = "/tmp/12345/my.sandbox.cnf"; + +$sb->wipe_clean($dbh); +$sb->load_file('source', 't/pt-archiver/samples/bulk_insert_special_chars.sql'); + +# Rows with special characters: tab, newline, backslash, quotes. +# Using prepared statement so we pass actual characters (not SQL-escaped). +my @test_rows = ( + [ 1, 'John "Doe"', "Software Engineer\t\n", 123, 'Senior "Developer"' ], + [ 2, 'Alice', "Data Scientist\nwith newline\n", 456, 'Lead Analyst' ], + [ 3, 'Bob', "Engineer with tab\tcharacter\n", 789, 'Manager' ], + [ 4, 'Charlie', "Backslash\\Test\n", 101, 'Director' ], + [ 5, 'Eve', "Quote\'s Test\\t\n", 202, 'Consultant' ], +); + +my $ins = $dbh->prepare( + 'INSERT INTO bulk_escape.source (id, name, job, stu_id, title) VALUES (?, ?, ?, ?, ?)' +); +for my $row ( @test_rows ) { + $ins->execute(@$row); +} + +# Archive from source to dest using --bulk-insert (purge source). +my $output = output( + sub { pt_archiver::main( + qw(--where 1=1 --bulk-insert --bulk-delete --limit 100 --statistics), + '--source', "L=1,D=bulk_escape,t=source,F=$cnf", + '--dest', "D=bulk_escape,t=dest") }, +); + +# Verify row counts. +my $src_count = $dbh->selectrow_array('SELECT COUNT(*) FROM bulk_escape.source'); +my $dst_count = $dbh->selectrow_array('SELECT COUNT(*) FROM bulk_escape.dest'); +is($src_count, 0, 'Source table is purged'); +is($dst_count, scalar(@test_rows), 'All rows archived to dest'); + +# Compare archived data with expected (order by id). +my $archived = $dbh->selectall_arrayref( + 'SELECT id, name, job, stu_id, title FROM bulk_escape.dest ORDER BY id' +); +is_deeply($archived, \@test_rows, 'Bulk-insert preserves tabs, newlines, backslashes (no field misalignment)'); + +$sb->wipe_clean($dbh); +ok($sb->ok(), 'Sandbox servers') or BAIL_OUT(__FILE__ . ' broke the sandbox'); +done_testing; +exit; diff --git a/t/pt-archiver/samples/bulk_insert_special_chars.sql b/t/pt-archiver/samples/bulk_insert_special_chars.sql new file mode 100644 index 00000000..07e3714d --- /dev/null +++ b/t/pt-archiver/samples/bulk_insert_special_chars.sql @@ -0,0 +1,22 @@ +-- Schema for testing pt-archiver --bulk-insert with special characters +-- (tabs, newlines, backslashes) to ensure no field misalignment. +DROP DATABASE IF EXISTS `bulk_escape`; +CREATE DATABASE `bulk_escape`; + +CREATE TABLE `bulk_escape`.`source` ( + `id` int unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(500) DEFAULT NULL, + `job` varchar(450) DEFAULT NULL, + `stu_id` int DEFAULT NULL, + `title` varchar(450) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE TABLE `bulk_escape`.`dest` ( + `id` int unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(500) DEFAULT NULL, + `job` varchar(450) DEFAULT NULL, + `stu_id` int DEFAULT NULL, + `title` varchar(450) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;