Merge pull request #861 from hpoettker/PT-2377_table_sync_with_utf8_json

PT-2377 - fixed pt-table-sync for JSON utf8 strings
This commit is contained in:
Sveta Smirnova
2025-01-14 18:04:41 +03:00
committed by GitHub
5 changed files with 178 additions and 0 deletions

View File

@@ -3612,6 +3612,14 @@ sub make_UPDATE {
@cols = $self->sort_cols($row);
}
my $types = $self->{tbl_struct}->{type_for};
foreach my $col ( @cols ) {
my $is_json = ($types->{$col} || '') =~ m/json/i;
if ( $is_json && defined $row->{$col} ) {
utf8::decode($row->{$col});
}
}
return "UPDATE $self->{dst_db_tbl} SET "
. join(', ', map {
my $is_hex = ($types->{$_} || '') =~ m/^0x[0-9a-fA-F]+$/i;
@@ -3660,6 +3668,13 @@ sub make_row {
my $q = $self->{Quoter};
my $type_for = $self->{tbl_struct}->{type_for};
foreach my $col ( @cols ) {
my $is_json = ($type_for->{$col} || '') =~ m/json/i;
if ( $is_json && defined $row->{$col} ) {
utf8::decode($row->{$col});
}
}
return "$verb INTO $self->{dst_db_tbl}("
. join(', ', map { $q->quote($_) } @cols)
. ') VALUES ('

View File

@@ -324,6 +324,16 @@ sub make_UPDATE {
@cols = $self->sort_cols($row);
}
my $types = $self->{tbl_struct}->{type_for};
# MySQL uses utf8mb4 for all strings in JSON, but
# DBD::mysql does not decode it accordingly
foreach my $col ( @cols ) {
my $is_json = ($types->{$col} || '') =~ m/json/i;
if ( $is_json && defined $row->{$col} ) {
utf8::decode($row->{$col});
}
}
return "UPDATE $self->{dst_db_tbl} SET "
. join(', ', map {
my $is_hex = ($types->{$_} || '') =~ m/^0x[0-9a-fA-F]+$/i;
@@ -403,6 +413,15 @@ sub make_row {
my $q = $self->{Quoter};
my $type_for = $self->{tbl_struct}->{type_for};
# MySQL uses utf8mb4 for all strings in JSON, but
# DBD::mysql does not decode it accordingly
foreach my $col ( @cols ) {
my $is_json = ($type_for->{$col} || '') =~ m/json/i;
if ( $is_json && defined $row->{$col} ) {
utf8::decode($row->{$col});
}
}
return "$verb INTO $self->{dst_db_tbl}("
. join(', ', map { $q->quote($_) } @cols)
. ') VALUES ('

View File

@@ -7,6 +7,7 @@ BEGIN {
};
use strict;
use utf8;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
@@ -617,6 +618,52 @@ SKIP: {
);
}
# #############################################################################
# PT-2377: pt-table-sync must handle utf8 in JSON columns correctly
# #############################################################################
SKIP: {
skip 'Cannot connect to sandbox master', 1 unless $master_dbh;
$master_dbh->do('DROP TABLE IF EXISTS `test`.`pt-2377`');
$master_dbh->do('CREATE TABLE `test`.`pt-2377` (id INT, data JSON)');
$master_dbh->do(q/INSERT INTO `test`.`pt-2377` VALUES (1, '{"name": "Müller"}')/);
$master_dbh->do(q/INSERT INTO `test`.`pt-2377` VALUES (2, NULL)/);
@rows = ();
$tbl_struct = {
cols => [qw(id data)],
col_posn => {id=>0, data=>1},
type_for => {id=>'int', data=>'json'},
};
$ch = new ChangeHandler(
Quoter => $q,
left_db => 'test',
left_tbl => 'pt-2377',
right_db => 'test',
right_tbl => 'pt-2377',
actions => [ sub { push @rows, $_[0]; } ],
replace => 0,
queue => 0,
tbl_struct => $tbl_struct,
);
$ch->fetch_back($master_dbh);
$ch->change('UPDATE', {id=>1}, [qw(id)] );
$ch->change('INSERT', {id=>1}, [qw(id)] );
$ch->change('UPDATE', {id=>2}, [qw(id)] );
$ch->change('INSERT', {id=>2}, [qw(id)] );
is_deeply(
\@rows,
[
q/UPDATE `test`.`pt-2377` SET `data`='{"name": "Müller"}' WHERE `id`='1' LIMIT 1/,
q/INSERT INTO `test`.`pt-2377`(`id`, `data`) VALUES ('1', '{"name": "Müller"}')/,
q/UPDATE `test`.`pt-2377` SET `data`=NULL WHERE `id`='2' LIMIT 1/,
q/INSERT INTO `test`.`pt-2377`(`id`, `data`) VALUES ('2', NULL)/
],
"UPDATE and INSERT quote data regardless of how it looks if tbl_struct->quote_val is true"
);
}
# #############################################################################
# Done.
# #############################################################################

83
t/pt-table-sync/pt-2377.t Normal file
View File

@@ -0,0 +1,83 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use PerconaTest;
use Sandbox;
require "$trunk/bin/pt-table-sync";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $source_dbh = $sb->get_dbh_for('source');
my $replica1_dbh = $sb->get_dbh_for('replica1');
if ( !$source_dbh ) {
plan skip_all => 'Cannot connect to sandbox source';
}
elsif ( !$replica1_dbh ) {
plan skip_all => 'Cannot connect to sandbox replica1';
}
elsif ( $sandbox_version lt '8.0') {
plan skip_all => 'Requires MySQL >= 8.0';
}
else {
plan tests => 3;
}
my $output;
# #############################################################################
# Test generated REPLACE statements.
# #############################################################################
$sb->load_file('source', "t/pt-table-sync/samples/pt-2377.sql");
$sb->wait_for_replicas();
$replica1_dbh->do("delete from `test`.`test_table` where `id`=1");
$output = remove_traces(output(
sub { pt_table_sync::main('--sync-to-source',
'h=127.0.0.1,P=12346,u=msandbox,p=msandbox',
qw(-t test.test_table --print --execute))
},
));
chomp($output);
is(
$output,
q/REPLACE INTO `test`.`test_table`(`id`, `data`) VALUES ('1', '{"name": "Müller"}');/,
"UTF8 characters of JSON values are printed correctly in REPLACE statements"
);
# #############################################################################
# Test generated UPDATE statements.
# #############################################################################
$sb->load_file('source', "t/pt-table-sync/samples/pt-2377.sql");
$sb->wait_for_replicas();
$replica1_dbh->do(q/update `test`.`test_table` set `data`='{"reaction": "哈哈哈"}' where `id`=2/);
$output = remove_traces(output(
sub { pt_table_sync::main(qw(--print --execute),
"h=127.0.0.1,P=12346,u=msandbox,p=msandbox,D=test,t=test_table",
"h=127.0.0.1,P=12345,u=msandbox,p=msandbox,D=test,t=test_table");
}
));
chomp($output);
is(
$output,
q/UPDATE `test`.`test_table` SET `data`='{"reaction": "哈哈哈"}' WHERE `id`='2' LIMIT 1;/,
"UTF8 characters of JSON values are printed correctly in UPDATE statements"
);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($source_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;

View File

@@ -0,0 +1,14 @@
DROP DATABASE IF EXISTS test;
CREATE DATABASE test;
USE test;
CREATE TABLE `test_table` (
`id` INT AUTO_INCREMENT PRIMARY KEY,
`data` JSON NOT NULL
) ENGINE=InnoDB;
INSERT INTO
`test_table` (`data`)
VALUES
('{"name": "Müller"}'),
('{"reaction": "哈哈"}');