diff --git a/.bzrignore b/.bzrignore index 94c66994..06be5c3b 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1,6 +1,6 @@ config/NaturalDocs/Data docs/dev/* -docs/user/html +docs/user/* docs/test-coverage/db docs/test-coverage/html release diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 196a8d10..b16698ed 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -1604,14 +1604,19 @@ sub join_quote { sub serialize_list { my ( $self, @args ) = @_; - if ( @args && $args[-1] eq '' ) { - push @args, ''; - } + return unless @args; + + return $args[0] if @args == 1 && !defined $args[0]; + + die "Cannot serialize multiple values with undef/NULL" + if grep { !defined $_ } @args; + return join ',', map { quotemeta } @args; } sub deserialize_list { my ( $self, $string ) = @_; + return $string unless defined $string; my @escaped_parts = $string =~ / \G # Start of string, or end of previous match. ( # Each of these is an element in the original list. @@ -1621,10 +1626,10 @@ sub deserialize_list { [^\\,]* # Same as above. )* # Repeat zero of more times. ) - ,? # Comma dividing elements or absolute end of the string. - /sxg; + , # Comma dividing elements + /sxgc; - pop @escaped_parts; + push @escaped_parts, pos($string) ? substr( $string, pos($string) ) : $string; my @unescaped_parts = map { my $part = $_; @@ -6402,13 +6407,17 @@ sub main { # --explain level 2: print chunk,lower boundary values,upper # boundary values. if ( $o->get('explain') > 1 ) { - my $lb_quoted = join(',', @{$boundary->{lower} || []}); - my $ub_quoted = join(',', @{$boundary->{upper} || []}); - my $chunk = $nibble_iter->nibble_number(); - printf "%d %s %s\n", - $chunk, - (defined $lb_quoted ? $lb_quoted : '1=1'), - (defined $ub_quoted ? $ub_quoted : '1=1'); + my $chunk = $nibble_iter->nibble_number(); + if ( $nibble_iter->one_nibble() ) { + printf "%d 1=1\n", $chunk; + } + else { + my $lb_quoted = join( + ',', map { defined $_ ? $_ : 'NULL'} @{$boundary->{lower}}); + my $ub_quoted = join( + ',', map { defined $_ ? $_ : 'NULL'} @{$boundary->{upper}}); + printf "%d %s %s\n", $chunk, $lb_quoted, $ub_quoted; + } if ( !$nibble_iter->more_boundaries() ) { print "\n"; # blank line between this table and the next table } @@ -6814,8 +6823,8 @@ sub exec_nibble { $tbl->{tbl}, # tbl $chunk, # chunk (number) $chunk_index, # chunk_index - $lb_quoted || undef, # lower_boundary - $ub_quoted || undef, # upper_boundary + $lb_quoted, # lower_boundary + $ub_quoted, # upper_boundary # this_cnt, this_crc WHERE @{$boundary->{lower}}, # upper boundary values @{$boundary->{upper}}, # lower boundary values diff --git a/bin/pt-table-sync b/bin/pt-table-sync index d9307fa6..c6820f62 100755 --- a/bin/pt-table-sync +++ b/bin/pt-table-sync @@ -1106,14 +1106,19 @@ sub join_quote { sub serialize_list { my ( $self, @args ) = @_; - if ( @args && $args[-1] eq '' ) { - push @args, ''; - } + return unless @args; + + return $args[0] if @args == 1 && !defined $args[0]; + + die "Cannot serialize multiple values with undef/NULL" + if grep { !defined $_ } @args; + return join ',', map { quotemeta } @args; } sub deserialize_list { my ( $self, $string ) = @_; + return $string unless defined $string; my @escaped_parts = $string =~ / \G # Start of string, or end of previous match. ( # Each of these is an element in the original list. @@ -1123,10 +1128,10 @@ sub deserialize_list { [^\\,]* # Same as above. )* # Repeat zero of more times. ) - ,? # Comma dividing elements or absolute end of the string. - /sxg; + , # Comma dividing elements + /sxgc; - pop @escaped_parts; + push @escaped_parts, pos($string) ? substr( $string, pos($string) ) : $string; my @unescaped_parts = map { my $part = $_; diff --git a/docs/user/authors.rst b/docs/user/authors.rst deleted file mode 100644 index c2600dc9..00000000 --- a/docs/user/authors.rst +++ /dev/null @@ -1,9 +0,0 @@ - -******* -AUTHORS -******* - -Percona Toolkit is primarily developed by Baron Schwartz and Daniel Nichter, -both of whom are employed by Percona Inc. See each program's documenation -for details. - diff --git a/docs/user/bugs.rst b/docs/user/bugs.rst deleted file mode 100644 index 6fca5644..00000000 --- a/docs/user/bugs.rst +++ /dev/null @@ -1,21 +0,0 @@ - -**** -BUGS -**** - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - -\* Complete command-line used to run the tool - -\* Tool \ ``--version``\ - -\* MySQL version of all servers involved - -\* Output from the tool including STDERR - -\* Input files (log/dump/config files, etc.) - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - diff --git a/docs/user/configuration_files.rst b/docs/user/configuration_files.rst deleted file mode 100644 index 088b675b..00000000 --- a/docs/user/configuration_files.rst +++ /dev/null @@ -1,115 +0,0 @@ - -******************* -CONFIGURATION FILES -******************* - -Percona Toolkit tools can read options from configuration files. The -configuration file syntax is simple and direct, and bears some resemblances -to the MySQL command-line client tools. The configuration files all follow -the same conventions. - -Internally, what actually happens is that the lines are read from the file and -then added as command-line options and arguments to the tool, so just -think of the configuration files as a way to write your command lines. - -SYNTAX -====== - -The syntax of the configuration files is as follows: - -\* - - Whitespace followed by a hash (#) sign signifies that the rest of the line is a - comment. This is deleted. - - -\* - - Whitespace is stripped from the beginning and end of all lines. - - -\* - - Empty lines are ignored. - - -\* - - Each line is permitted to be in either of the following formats: - - - .. code-block:: perl - - option - option=value - - - Whitespace around the equals sign is deleted during processing. - - -\* - - Only long options are recognized. - - -\* - - A line containing only two hyphens signals the end of option parsing. Any - further lines are interpreted as additional arguments (not options) to the - program. - - -READ ORDER -========== - -The tools read several configuration files in order: - -1. - - The global Percona Toolkit configuration file, - \ */etc/percona-toolkit/percona-toolkit.conf*\ . All tools read this file, - so you should only add options to it that you want to apply to all tools. - - -2. - - The global tool-specific configuration file, \ */etc/percona-toolkit/TOOL.conf*\ , - where \ ``TOOL``\ is a tool name like \ ``pt-query-digest``\ . This file is named - after the specific tool you're using, so you can add options that apply - only to that tool. - - -3. - - The user's own Percona Toolkit configuration file, - \ *$HOME/.percona-toolkit.conf*\ . All tools read this file, so you should only - add options to it that you want to apply to all tools. - - -4. - - The user's tool-specific configuration file, \ *$HOME/.TOOL.conf*\ , - where \ ``TOOL``\ is a tool name like \ ``pt-query-digest``\ . This file is named - after the specific tool you're using, so you can add options that apply - only to that tool. - - -SPECIFYING -========== - -There is a special \ ``--config``\ option, which lets you specify which -configuration files Percona Toolkit should read. You specify a -comma-separated list of files. However, its behavior is not like other -command-line options. It must be given \ **first**\ on the command line, -before any other options. If you try to specify it anywhere else, it will -cause an error. Also, you cannot specify \ ``--config=/path/to/file``\ ; -you must specify the option and the path to the file separated by whitespace -\ *without an equal sign*\ between them, like: - -.. code-block:: perl - - --config /path/to/file - -If you don't want any configuration files at all, specify \ ``--config ''``\ to -provide an empty list of files. - diff --git a/docs/user/copyright_license_and_warranty.rst b/docs/user/copyright_license_and_warranty.rst deleted file mode 100644 index 6d13866a..00000000 --- a/docs/user/copyright_license_and_warranty.rst +++ /dev/null @@ -1,22 +0,0 @@ - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - -Percona Toolkit is copyright 2011 Percona Inc. and others. -See each program's documentation for complete copyright notices. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - diff --git a/docs/user/environment.rst b/docs/user/environment.rst deleted file mode 100644 index 9d583d0c..00000000 --- a/docs/user/environment.rst +++ /dev/null @@ -1,15 +0,0 @@ - -*********** -ENVIRONMENT -*********** - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - -.. code-block:: perl - - PTDEBUG=1 pt-table-checksum ... > FILE 2>&1 - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - diff --git a/docs/user/index.rst b/docs/user/index.rst deleted file mode 100644 index 77788587..00000000 --- a/docs/user/index.rst +++ /dev/null @@ -1,31 +0,0 @@ - -***************************** -Percona Toolkit Documentation -***************************** - -Percona Toolkit is a collection of advanced command-line tools used by -Percona (`http://www.percona.com/ `_) support staff to perform a variety of -MySQL and system tasks that are too difficult or complex to perform manually. - -These tools are ideal alternatives to private or "one-off" scripts because -they are professionally developed, formally tested, and fully documented. -They are also fully self-contained, so installation is quick and easy and -no libraries are installed. - -Percona Toolkit is derived from Maatkit and Aspersa, two of the best-known -toolkits for MySQL server administration. It is developed and supported by -Percona Inc. For more information and other free, open-source software -developed by Percona, visit `http://www.percona.com/software/ `_. - -.. toctree:: - :maxdepth: 2 - - tools - configuration_files - environment - system_requirements - bugs - authors - copyright_license_and_warranty - version - release_notes diff --git a/docs/user/pt-archiver.rst b/docs/user/pt-archiver.rst deleted file mode 100644 index 21e4e03b..00000000 --- a/docs/user/pt-archiver.rst +++ /dev/null @@ -1,1556 +0,0 @@ - -########### -pt-archiver -########### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-archiver - Archive rows from a MySQL table into another table or a file. - - -******** -SYNOPSIS -******** - - -Usage: pt-archiver [OPTION...] --source DSN --where WHERE - -pt-archiver nibbles records from a MySQL table. The --source and --dest -arguments use DSN syntax; if COPY is yes, --dest defaults to the key's value -from --source. - -Examples: - -Archive all rows from oltp_server to olap_server and to a file: - - -.. code-block:: perl - - pt-archiver --source h=oltp_server,D=test,t=tbl --dest h=olap_server \ - --file '/var/log/archive/%Y-%m-%d-%D.%t' \ - --where "1=1" --limit 1000 --commit-each - - -Purge (delete) orphan rows from child table: - - -.. code-block:: perl - - pt-archiver --source h=host,D=db,t=child --purge \ - --where 'NOT EXISTS(SELECT * FROM parent WHERE col=child.col)' - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-achiver is a read-write tool. It deletes data from the source by default, so -you should test your archiving jobs with the "--dry-run" option if you're not -sure about them. It is designed to have as little impact on production systems -as possible, but tuning with "--limit", "--txn-size" and similar options -might be a good idea too. - -If you write or use "--plugin" modules, you should ensure they are good -quality and well-tested. - -At the time of this release there is an unverified bug with -"--bulk-insert" that may cause data loss. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-archiver `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-archiver is the tool I use to archive tables as described in -`http://tinyurl.com/mysql-archiving `_. The goal is a low-impact, forward-only -job to nibble old data out of the table without impacting OLTP queries much. -You can insert the data into another table, which need not be on the same -server. You can also write it to a file in a format suitable for LOAD DATA -INFILE. Or you can do neither, in which case it's just an incremental DELETE. - -pt-archiver is extensible via a plugin mechanism. You can inject your own -code to add advanced archiving logic that could be useful for archiving -dependent data, applying complex business rules, or building a data warehouse -during the archiving process. - -You need to choose values carefully for some options. The most important are -"--limit", "--retries", and "--txn-size". - -The strategy is to find the first row(s), then scan some index forward-only to -find more rows efficiently. Each subsequent query should not scan the entire -table; it should seek into the index, then scan until it finds more archivable -rows. Specifying the index with the 'i' part of the "--source" argument can -be crucial for this; use "--dry-run" to examine the generated queries and be -sure to EXPLAIN them to see if they are efficient (most of the time you probably -want to scan the PRIMARY key, which is the default). Even better, profile -pt-archiver with pt-query-profiler and make sure it is not scanning the whole -table every query. - -You can disable the seek-then-scan optimizations partially or wholly with -"--no-ascend" and "--ascend-first". Sometimes this may be more efficient -for multi-column keys. Be aware that pt-archiver is built to start at the -beginning of the index it chooses and scan it forward-only. This might result -in long table scans if you're trying to nibble from the end of the table by an -index other than the one it prefers. See "--source" and read the -documentation on the \ ``i``\ part if this applies to you. - - -****** -OUTPUT -****** - - -If you specify "--progress", the output is a header row, plus status output -at intervals. Each row in the status output lists the current date and time, -how many seconds pt-archiver has been running, and how many rows it has -archived. - -If you specify "--statistics", \ ``pt-archiver``\ outputs timing and other -information to help you identify which part of your archiving process takes the -most time. - - -************** -ERROR-HANDLING -************** - - -pt-archiver tries to catch signals and exit gracefully; for example, if you -send it SIGTERM (Ctrl-C on UNIX-ish systems), it will catch the signal, print a -message about the signal, and exit fairly normally. It will not execute -"--analyze" or "--optimize", because these may take a long time to finish. -It will run all other code normally, including calling after_finish() on any -plugins (see "EXTENDING"). - -In other words, a signal, if caught, will break out of the main archiving -loop and skip optimize/analyze. - - -******* -OPTIONS -******* - - -Specify at least one of "--dest", "--file", or "--purge". - -"--ignore" and "--replace" are mutually exclusive. - -"--txn-size" and "--commit-each" are mutually exclusive. - -"--low-priority-insert" and "--delayed-insert" are mutually exclusive. - -"--share-lock" and "--for-update" are mutually exclusive. - -"--analyze" and "--optimize" are mutually exclusive. - -"--no-ascend" and "--no-delete" are mutually exclusive. - -DSN values in "--dest" default to values from "--source" if COPY is yes. - - ---analyze - - type: string - - Run ANALYZE TABLE afterwards on "--source" and/or "--dest". - - Runs ANALYZE TABLE after finishing. The argument is an arbitrary string. If it - contains the letter 's', the source will be analyzed. If it contains 'd', the - destination will be analyzed. You can specify either or both. For example, the - following will analyze both: - - - .. code-block:: perl - - --analyze=ds - - - See `http://dev.mysql.com/doc/en/analyze-table.html `_ for details on ANALYZE - TABLE. - - - ---ascend-first - - Ascend only first column of index. - - If you do want to use the ascending index optimization (see "--no-ascend"), - but do not want to incur the overhead of ascending a large multi-column index, - you can use this option to tell pt-archiver to ascend only the leftmost column - of the index. This can provide a significant performance boost over not - ascending the index at all, while avoiding the cost of ascending the whole - index. - - See "EXTENDING" for a discussion of how this interacts with plugins. - - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---buffer - - Buffer output to "--file" and flush at commit. - - Disables autoflushing to "--file" and flushes "--file" to disk only when a - transaction commits. This typically means the file is block-flushed by the - operating system, so there may be some implicit flushes to disk between - commits as well. The default is to flush "--file" to disk after every row. - - The danger is that a crash might cause lost data. - - The performance increase I have seen from using "--buffer" is around 5 to 15 - percent. Your mileage may vary. - - - ---bulk-delete - - Delete each chunk with a single statement (implies "--commit-each"). - - Delete each chunk of rows in bulk with a single \ ``DELETE``\ statement. The - statement deletes every row between the first and last row of the chunk, - inclusive. It implies "--commit-each", since it would be a bad idea to - \ ``INSERT``\ rows one at a time and commit them before the bulk \ ``DELETE``\ . - - The normal method is to delete every row by its primary key. Bulk deletes might - be a lot faster. \ **They also might not be faster**\ if you have a complex - \ ``WHERE``\ clause. - - This option completely defers all \ ``DELETE``\ processing until the chunk of rows - is finished. If you have a plugin on the source, its \ ``before_delete``\ method - will not be called. Instead, its \ ``before_bulk_delete``\ method is called later. - - \ **WARNING**\ : if you have a plugin on the source that sometimes doesn't return - true from \ ``is_archivable()``\ , you should use this option only if you understand - what it does. If the plugin instructs \ ``pt-archiver``\ not to archive a row, - it will still be deleted by the bulk delete! - - - ---[no]bulk-delete-limit - - default: yes - - Add "--limit" to "--bulk-delete" statement. - - This is an advanced option and you should not disable it unless you know what - you are doing and why! By default, "--bulk-delete" appends a "--limit" - clause to the bulk delete SQL statement. In certain cases, this clause can be - omitted by specifying \ ``--no-bulk-delete-limit``\ . "--limit" must still be - specified. - - - ---bulk-insert - - Insert each chunk with LOAD DATA INFILE (implies "--bulk-delete" "--commit-each"). - - Insert each chunk of rows with \ ``LOAD DATA LOCAL INFILE``\ . This may be much - faster than inserting a row at a time with \ ``INSERT``\ statements. It is - implemented by creating a temporary file for each chunk of rows, and writing the - rows to this file instead of inserting them. When the chunk is finished, it - uploads the rows. - - To protect the safety of your data, this option forces bulk deletes to be used. - It would be unsafe to delete each row as it is found, before inserting the rows - into the destination first. Forcing bulk deletes guarantees that the deletion - waits until the insertion is successful. - - The "--low-priority-insert", "--replace", and "--ignore" options work - with this option, but "--delayed-insert" does not. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - See also "--[no]check-charset". - - - ---[no]check-charset - - default: yes - - Ensure connection and table character sets are the same. Disabling this check - may cause text to be erroneously converted from one character set to another - (usually from utf8 to latin1) which may cause data loss or mojibake. Disabling - this check may be useful or necessary when character set conversions are - intended. - - - ---[no]check-columns - - default: yes - - Ensure "--source" and "--dest" have same columns. - - Enabled by default; causes pt-archiver to check that the source and destination - tables have the same columns. It does not check column order, data type, etc. - It just checks that all columns in the source exist in the destination and - vice versa. If there are any differences, pt-archiver will exit with an - error. - - To disable this check, specify --no-check-columns. - - - ---check-interval - - type: time; default: 1s - - How often to check for slave lag if "--check-slave-lag" is given. - - - ---check-slave-lag - - type: string - - Pause archiving until the specified DSN's slave lag is less than "--max-lag". - - - ---columns - - short form: -c; type: array - - Comma-separated list of columns to archive. - - Specify a comma-separated list of columns to fetch, write to the file, and - insert into the destination table. If specified, pt-archiver ignores other - columns unless it needs to add them to the \ ``SELECT``\ statement for ascending an - index or deleting rows. It fetches and uses these extra columns internally, but - does not write them to the file or to the destination table. It \ *does*\ pass - them to plugins. - - See also "--primary-key-only". - - - ---commit-each - - Commit each set of fetched and archived rows (disables "--txn-size"). - - Commits transactions and flushes "--file" after each set of rows has been - archived, before fetching the next set of rows, and before sleeping if - "--sleep" is specified. Disables "--txn-size"; use "--limit" to - control the transaction size with "--commit-each". - - This option is useful as a shortcut to make "--limit" and "--txn-size" the - same value, but more importantly it avoids transactions being held open while - searching for more rows. For example, imagine you are archiving old rows from - the beginning of a very large table, with "--limit" 1000 and "--txn-size" - 1000. After some period of finding and archiving 1000 rows at a time, - pt-archiver finds the last 999 rows and archives them, then executes the next - SELECT to find more rows. This scans the rest of the table, but never finds any - more rows. It has held open a transaction for a very long time, only to - determine it is finished anyway. You can use "--commit-each" to avoid this. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---delayed-insert - - Add the DELAYED modifier to INSERT statements. - - Adds the DELAYED modifier to INSERT or REPLACE statements. See - `http://dev.mysql.com/doc/en/insert.html `_ for details. - - - ---dest - - type: DSN - - DSN specifying the table to archive to. - - This item specifies a table into which pt-archiver will insert rows - archived from "--source". It uses the same key=val argument format as - "--source". Most missing values default to the same values as - "--source", so you don't have to repeat options that are the same in - "--source" and "--dest". Use the "--help" option to see which values - are copied from "--source". - - \ **WARNING**\ : Using a default options file (F) DSN option that defines a - socket for "--source" causes pt-archiver to connect to "--dest" using - that socket unless another socket for "--dest" is specified. This - means that pt-archiver may incorrectly connect to "--source" when it - connects to "--dest". For example: - - - .. code-block:: perl - - --source F=host1.cnf,D=db,t=tbl --dest h=host2 - - - When pt-archiver connects to "--dest", host2, it will connect via the - "--source", host1, socket defined in host1.cnf. - - - ---dry-run - - Print queries and exit without doing anything. - - Causes pt-archiver to exit after printing the filename and SQL statements - it will use. - - - ---file - - type: string - - File to archive to, with DATE_FORMAT()-like formatting. - - Filename to write archived rows to. A subset of MySQL's DATE_FORMAT() - formatting codes are allowed in the filename, as follows: - - - .. code-block:: perl - - %d Day of the month, numeric (01..31) - %H Hour (00..23) - %i Minutes, numeric (00..59) - %m Month, numeric (01..12) - %s Seconds (00..59) - %Y Year, numeric, four digits - - - You can use the following extra format codes too: - - - .. code-block:: perl - - %D Database name - %t Table name - - - Example: - - - .. code-block:: perl - - --file '/var/log/archive/%Y-%m-%d-%D.%t' - - - The file's contents are in the same format used by SELECT INTO OUTFILE, as - documented in the MySQL manual: rows terminated by newlines, columns - terminated by tabs, NULL characters are represented by \N, and special - characters are escaped by \. This lets you reload a file with LOAD DATA - INFILE's default settings. - - If you want a column header at the top of the file, see "--header". The file - is auto-flushed by default; see "--buffer". - - - ---for-update - - Adds the FOR UPDATE modifier to SELECT statements. - - For details, see `http://dev.mysql.com/doc/en/innodb-locking-reads.html `_. - - - ---header - - Print column header at top of "--file". - - Writes column names as the first line in the file given by "--file". If the - file exists, does not write headers; this keeps the file loadable with LOAD - DATA INFILE in case you append more output to it. - - - ---help - - Show help and exit. - - - ---high-priority-select - - Adds the HIGH_PRIORITY modifier to SELECT statements. - - See `http://dev.mysql.com/doc/en/select.html `_ for details. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore - - Use IGNORE for INSERT statements. - - Causes INSERTs into "--dest" to be INSERT IGNORE. - - - ---limit - - type: int; default: 1 - - Number of rows to fetch and archive per statement. - - Limits the number of rows returned by the SELECT statements that retrieve rows - to archive. Default is one row. It may be more efficient to increase the - limit, but be careful if you are archiving sparsely, skipping over many rows; - this can potentially cause more contention with other queries, depending on the - storage engine, transaction isolation level, and options such as - "--for-update". - - - ---local - - Do not write OPTIMIZE or ANALYZE queries to binlog. - - Adds the NO_WRITE_TO_BINLOG modifier to ANALYZE and OPTIMIZE queries. See - "--analyze" for details. - - - ---low-priority-delete - - Adds the LOW_PRIORITY modifier to DELETE statements. - - See `http://dev.mysql.com/doc/en/delete.html `_ for details. - - - ---low-priority-insert - - Adds the LOW_PRIORITY modifier to INSERT or REPLACE statements. - - See `http://dev.mysql.com/doc/en/insert.html `_ for details. - - - ---max-lag - - type: time; default: 1s - - Pause archiving if the slave given by "--check-slave-lag" lags. - - This option causes pt-archiver to look at the slave every time it's about - to fetch another row. If the slave's lag is greater than the option's value, - or if the slave isn't running (so its lag is NULL), pt-table-checksum sleeps - for "--check-interval" seconds and then looks at the lag again. It repeats - until the slave is caught up, then proceeds to fetch and archive the row. - - This option may eliminate the need for "--sleep" or "--sleep-coef". - - - ---no-ascend - - Do not use ascending index optimization. - - The default ascending-index optimization causes \ ``pt-archiver``\ to optimize - repeated \ ``SELECT``\ queries so they seek into the index where the previous query - ended, then scan along it, rather than scanning from the beginning of the table - every time. This is enabled by default because it is generally a good strategy - for repeated accesses. - - Large, multiple-column indexes may cause the WHERE clause to be complex enough - that this could actually be less efficient. Consider for example a four-column - PRIMARY KEY on (a, b, c, d). The WHERE clause to start where the last query - ended is as follows: - - - .. code-block:: perl - - WHERE (a > ?) - OR (a = ? AND b > ?) - OR (a = ? AND b = ? AND c > ?) - OR (a = ? AND b = ? AND c = ? AND d >= ?) - - - Populating the placeholders with values uses memory and CPU, adds network - traffic and parsing overhead, and may make the query harder for MySQL to - optimize. A four-column key isn't a big deal, but a ten-column key in which - every column allows \ ``NULL``\ might be. - - Ascending the index might not be necessary if you know you are simply removing - rows from the beginning of the table in chunks, but not leaving any holes, so - starting at the beginning of the table is actually the most efficient thing to - do. - - See also "--ascend-first". See "EXTENDING" for a discussion of how this - interacts with plugins. - - - ---no-delete - - Do not delete archived rows. - - Causes \ ``pt-archiver``\ not to delete rows after processing them. This disallows - "--no-ascend", because enabling them both would cause an infinite loop. - - If there is a plugin on the source DSN, its \ ``before_delete``\ method is called - anyway, even though \ ``pt-archiver``\ will not execute the delete. See - "EXTENDING" for more on plugins. - - - ---optimize - - type: string - - Run OPTIMIZE TABLE afterwards on "--source" and/or "--dest". - - Runs OPTIMIZE TABLE after finishing. See "--analyze" for the option syntax - and `http://dev.mysql.com/doc/en/optimize-table.html `_ for details on OPTIMIZE - TABLE. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process ID of - the daemonized instance. The PID file is removed when the daemonized instance - exits. The program checks for the existence of the PID file when starting; if - it exists and the process with the matching PID exists, the program exits. - - - ---plugin - - type: string - - Perl module name to use as a generic plugin. - - Specify the Perl module name of a general-purpose plugin. It is currently used - only for statistics (see "--statistics") and must have \ ``new()``\ and a - \ ``statistics()``\ method. - - The \ ``new( src =``\ $src, dst => $dst, opts => $o )> method gets the source - and destination DSNs, and their database connections, just like the - connection-specific plugins do. It also gets an OptionParser object (\ ``$o``\ ) for - accessing command-line options (example: \ ``$o-``\ get('purge');>). - - The \ ``statistics(\%stats, $time)``\ method gets a hashref of the statistics - collected by the archiving job, and the time the whole job started. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---primary-key-only - - Primary key columns only. - - A shortcut for specifying "--columns" with the primary key columns. This is - an efficiency if you just want to purge rows; it avoids fetching the entire row, - when only the primary key columns are needed for \ ``DELETE``\ statements. See also - "--purge". - - - ---progress - - type: int - - Print progress information every X rows. - - Prints current time, elapsed time, and rows archived every X rows. - - - ---purge - - Purge instead of archiving; allows omitting "--file" and "--dest". - - Allows archiving without a "--file" or "--dest" argument, which is - effectively a purge since the rows are just deleted. - - If you just want to purge rows, consider specifying the table's primary key - columns with "--primary-key-only". This will prevent fetching all columns - from the server for no reason. - - - ---quick-delete - - Adds the QUICK modifier to DELETE statements. - - See `http://dev.mysql.com/doc/en/delete.html `_ for details. As stated in the - documentation, in some cases it may be faster to use DELETE QUICK followed by - OPTIMIZE TABLE. You can use "--optimize" for this. - - - ---quiet - - short form: -q - - Do not print any output, such as for "--statistics". - - Suppresses normal output, including the output of "--statistics", but doesn't - suppress the output from "--why-quit". - - - ---replace - - Causes INSERTs into "--dest" to be written as REPLACE. - - - ---retries - - type: int; default: 1 - - Number of retries per timeout or deadlock. - - Specifies the number of times pt-archiver should retry when there is an - InnoDB lock wait timeout or deadlock. When retries are exhausted, - pt-archiver will exit with an error. - - Consider carefully what you want to happen when you are archiving between a - mixture of transactional and non-transactional storage engines. The INSERT to - "--dest" and DELETE from "--source" are on separate connections, so they - do not actually participate in the same transaction even if they're on the same - server. However, pt-archiver implements simple distributed transactions in - code, so commits and rollbacks should happen as desired across the two - connections. - - At this time I have not written any code to handle errors with transactional - storage engines other than InnoDB. Request that feature if you need it. - - - ---run-time - - type: time - - Time to run before exiting. - - Optional suffix s=seconds, m=minutes, h=hours, d=days; if no suffix, s is used. - - - ---[no]safe-auto-increment - - default: yes - - Do not archive row with max AUTO_INCREMENT. - - Adds an extra WHERE clause to prevent pt-archiver from removing the newest - row when ascending a single-column AUTO_INCREMENT key. This guards against - re-using AUTO_INCREMENT values if the server restarts, and is enabled by - default. - - The extra WHERE clause contains the maximum value of the auto-increment column - as of the beginning of the archive or purge job. If new rows are inserted while - pt-archiver is running, it will not see them. - - - ---sentinel - - type: string; default: /tmp/pt-archiver-sentinel - - Exit if this file exists. - - The presence of the file specified by "--sentinel" will cause pt-archiver to - stop archiving and exit. The default is /tmp/pt-archiver-sentinel. You - might find this handy to stop cron jobs gracefully if necessary. See also - "--stop". - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. - - Specify any variables you want to be set immediately after connecting to MySQL. - These will be included in a \ ``SET``\ command. - - - ---share-lock - - Adds the LOCK IN SHARE MODE modifier to SELECT statements. - - See `http://dev.mysql.com/doc/en/innodb-locking-reads.html `_. - - - ---skip-foreign-key-checks - - Disables foreign key checks with SET FOREIGN_KEY_CHECKS=0. - - - ---sleep - - type: int - - Sleep time between fetches. - - Specifies how long to sleep between SELECT statements. Default is not to - sleep at all. Transactions are NOT committed, and the "--file" file is NOT - flushed, before sleeping. See "--txn-size" to control that. - - If "--commit-each" is specified, committing and flushing happens before - sleeping. - - - ---sleep-coef - - type: float - - Calculate "--sleep" as a multiple of the last SELECT time. - - If this option is specified, pt-archiver will sleep for the query time of the - last SELECT multiplied by the specified coefficient. - - This is a slightly more sophisticated way to throttle the SELECTs: sleep a - varying amount of time between each SELECT, depending on how long the SELECTs - are taking. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---source - - type: DSN - - DSN specifying the table to archive from (required). This argument is a DSN. - See DSN OPTIONS for the syntax. Most options control how pt-archiver - connects to MySQL, but there are some extended DSN options in this tool's - syntax. The D, t, and i options select a table to archive: - - - .. code-block:: perl - - --source h=my_server,D=my_database,t=my_tbl - - - The a option specifies the database to set as the connection's default with USE. - If the b option is true, it disables binary logging with SQL_LOG_BIN. The m - option specifies pluggable actions, which an external Perl module can provide. - The only required part is the table; other parts may be read from various - places in the environment (such as options files). - - The 'i' part deserves special mention. This tells pt-archiver which index - it should scan to archive. This appears in a FORCE INDEX or USE INDEX hint in - the SELECT statements used to fetch archivable rows. If you don't specify - anything, pt-archiver will auto-discover a good index, preferring a \ ``PRIMARY - KEY``\ if one exists. In my experience this usually works well, so most of the - time you can probably just omit the 'i' part. - - The index is used to optimize repeated accesses to the table; pt-archiver - remembers the last row it retrieves from each SELECT statement, and uses it to - construct a WHERE clause, using the columns in the specified index, that should - allow MySQL to start the next SELECT where the last one ended, rather than - potentially scanning from the beginning of the table with each successive - SELECT. If you are using external plugins, please see "EXTENDING" for a - discussion of how they interact with ascending indexes. - - The 'a' and 'b' options allow you to control how statements flow through the - binary log. If you specify the 'b' option, binary logging will be disabled on - the specified connection. If you specify the 'a' option, the connection will - \ ``USE``\ the specified database, which you can use to prevent slaves from - executing the binary log events with \ ``--replicate-ignore-db``\ options. These - two options can be used as different methods to achieve the same goal: archive - data off the master, but leave it on the slave. For example, you can run a - purge job on the master and prevent it from happening on the slave using your - method of choice. - - \ **WARNING**\ : Using a default options file (F) DSN option that defines a - socket for "--source" causes pt-archiver to connect to "--dest" using - that socket unless another socket for "--dest" is specified. This - means that pt-archiver may incorrectly connect to "--source" when it - is meant to connect to "--dest". For example: - - - .. code-block:: perl - - --source F=host1.cnf,D=db,t=tbl --dest h=host2 - - - When pt-archiver connects to "--dest", host2, it will connect via the - "--source", host1, socket defined in host1.cnf. - - - ---statistics - - Collect and print timing statistics. - - Causes pt-archiver to collect timing statistics about what it does. These - statistics are available to the plugin specified by "--plugin" - - Unless you specify "--quiet", \ ``pt-archiver``\ prints the statistics when it - exits. The statistics look like this: - - - .. code-block:: perl - - Started at 2008-07-18T07:18:53, ended at 2008-07-18T07:18:53 - Source: D=db,t=table - SELECT 4 - INSERT 4 - DELETE 4 - Action Count Time Pct - commit 10 0.1079 88.27 - select 5 0.0047 3.87 - deleting 4 0.0028 2.29 - inserting 4 0.0028 2.28 - other 0 0.0040 3.29 - - - The first two (or three) lines show times and the source and destination tables. - The next three lines show how many rows were fetched, inserted, and deleted. - - The remaining lines show counts and timing. The columns are the action, the - total number of times that action was timed, the total time it took, and the - percent of the program's total runtime. The rows are sorted in order of - descending total time. The last row is the rest of the time not explicitly - attributed to anything. Actions will vary depending on command-line options. - - If "--why-quit" is given, its behavior is changed slightly. This option - causes it to print the reason for exiting even when it's just because there are - no more rows. - - This option requires the standard Time::HiRes module, which is part of core Perl - on reasonably new Perl releases. - - - ---stop - - Stop running instances by creating the sentinel file. - - Causes pt-archiver to create the sentinel file specified by "--sentinel" and - exit. This should have the effect of stopping all running instances which are - watching the same sentinel file. - - - ---txn-size - - type: int; default: 1 - - Number of rows per transaction. - - Specifies the size, in number of rows, of each transaction. Zero disables - transactions altogether. After pt-archiver processes this many rows, it - commits both the "--source" and the "--dest" if given, and flushes the - file given by "--file". - - This parameter is critical to performance. If you are archiving from a live - server, which for example is doing heavy OLTP work, you need to choose a good - balance between transaction size and commit overhead. Larger transactions - create the possibility of more lock contention and deadlocks, but smaller - transactions cause more frequent commit overhead, which can be significant. To - give an idea, on a small test set I worked with while writing pt-archiver, a - value of 500 caused archiving to take about 2 seconds per 1000 rows on an - otherwise quiet MySQL instance on my desktop machine, archiving to disk and to - another table. Disabling transactions with a value of zero, which turns on - autocommit, dropped performance to 38 seconds per thousand rows. - - If you are not archiving from or to a transactional storage engine, you may - want to disable transactions so pt-archiver doesn't try to commit. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - ---where - - type: string - - WHERE clause to limit which rows to archive (required). - - Specifies a WHERE clause to limit which rows are archived. Do not include the - word WHERE. You may need to quote the argument to prevent your shell from - interpreting it. For example: - - - .. code-block:: perl - - --where 'ts < current_date - interval 90 day' - - - For safety, "--where" is required. If you do not require a WHERE clause, use - "--where" 1=1. - - - ---why-quit - - Print reason for exiting unless rows exhausted. - - Causes pt-archiver to print a message if it exits for any reason other than - running out of rows to archive. This can be useful if you have a cron job with - "--run-time" specified, for example, and you want to be sure pt-archiver is - finishing before running out of time. - - If "--statistics" is given, the behavior is changed slightly. It will print - the reason for exiting even when it's just because there are no more rows. - - This output prints even if "--quiet" is given. That's so you can put - \ ``pt-archiver``\ in a \ ``cron``\ job and get an email if there's an abnormal exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* a - - copy: no - - Database to USE when executing queries. - - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* b - - copy: no - - If true, disable binlog with SQL_LOG_BIN. - - - -\* D - - dsn: database; copy: yes - - Database that contains the table. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* i - - copy: yes - - Index to use. - - - -\* m - - copy: no - - Plugin module name. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - copy: yes - - Table to archive from/to. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -********* -EXTENDING -********* - - -pt-archiver is extensible by plugging in external Perl modules to handle some -logic and/or actions. You can specify a module for both the "--source" and -the "--dest", with the 'm' part of the specification. For example: - - -.. code-block:: perl - - --source D=test,t=test1,m=My::Module1 --dest m=My::Module2,t=test2 - - -This will cause pt-archiver to load the My::Module1 and My::Module2 packages, -create instances of them, and then make calls to them during the archiving -process. - -You can also specify a plugin with "--plugin". - -The module must provide this interface: - - -new(dbh => $dbh, db => $db_name, tbl => $tbl_name) - - The plugin's constructor is passed a reference to the database handle, the - database name, and table name. The plugin is created just after pt-archiver - opens the connection, and before it examines the table given in the arguments. - This gives the plugin a chance to create and populate temporary tables, or do - other setup work. - - - -before_begin(cols => \@cols, allcols => \@allcols) - - This method is called just before pt-archiver begins iterating through rows - and archiving them, but after it does all other setup work (examining table - structures, designing SQL queries, and so on). This is the only time - pt-archiver tells the plugin column names for the rows it will pass the - plugin while archiving. - - The \ ``cols``\ argument is the column names the user requested to be archived, - either by default or by the "--columns" option. The \ ``allcols``\ argument is - the list of column names for every row pt-archiver will fetch from the source - table. It may fetch more columns than the user requested, because it needs some - columns for its own use. When subsequent plugin functions receive a row, it is - the full row containing all the extra columns, if any, added to the end. - - - -is_archivable(row => \@row) - - This method is called for each row to determine whether it is archivable. This - applies only to "--source". The argument is the row itself, as an arrayref. - If the method returns true, the row will be archived; otherwise it will be - skipped. - - Skipping a row adds complications for non-unique indexes. Normally - pt-archiver uses a WHERE clause designed to target the last processed row as - the place to start the scan for the next SELECT statement. If you have skipped - the row by returning false from is_archivable(), pt-archiver could get into - an infinite loop because the row still exists. Therefore, when you specify a - plugin for the "--source" argument, pt-archiver will change its WHERE clause - slightly. Instead of starting at "greater than or equal to" the last processed - row, it will start "strictly greater than." This will work fine on unique - indexes such as primary keys, but it may skip rows (leave holes) on non-unique - indexes or when ascending only the first column of an index. - - \ ``pt-archiver``\ will change the clause in the same way if you specify - "--no-delete", because again an infinite loop is possible. - - If you specify the "--bulk-delete" option and return false from this method, - \ ``pt-archiver``\ may not do what you want. The row won't be archived, but it will - be deleted, since bulk deletes operate on ranges of rows and don't know which - rows the plugin selected to keep. - - If you specify the "--bulk-insert" option, this method's return value will - influence whether the row is written to the temporary file for the bulk insert, - so bulk inserts will work as expected. However, bulk inserts require bulk - deletes. - - - -before_delete(row => \@row) - - This method is called for each row just before it is deleted. This applies only - to "--source". This is a good place for you to handle dependencies, such as - deleting things that are foreign-keyed to the row you are about to delete. You - could also use this to recursively archive all dependent tables. - - This plugin method is called even if "--no-delete" is given, but not if - "--bulk-delete" is given. - - - -before_bulk_delete(first_row => \@row, last_row => \@row) - - This method is called just before a bulk delete is executed. It is similar to - the \ ``before_delete``\ method, except its arguments are the first and last row of - the range to be deleted. It is called even if "--no-delete" is given. - - - -before_insert(row => \@row) - - This method is called for each row just before it is inserted. This applies - only to "--dest". You could use this to insert the row into multiple tables, - perhaps with an ON DUPLICATE KEY UPDATE clause to build summary tables in a data - warehouse. - - This method is not called if "--bulk-insert" is given. - - - -before_bulk_insert(first_row => \@row, last_row => \@row, filename => bulk_insert_filename) - - This method is called just before a bulk insert is executed. It is similar to - the \ ``before_insert``\ method, except its arguments are the first and last row of - the range to be deleted. - - - -custom_sth(row => \@row, sql => $sql) - - This method is called just before inserting the row, but after - "before_insert()". It allows the plugin to specify different \ ``INSERT``\ - statement if desired. The return value (if any) should be a DBI statement - handle. The \ ``sql``\ parameter is the SQL text used to prepare the default - \ ``INSERT``\ statement. This method is not called if you specify - "--bulk-insert". - - If no value is returned, the default \ ``INSERT``\ statement handle is used. - - This method applies only to the plugin specified for "--dest", so if your - plugin isn't doing what you expect, check that you've specified it for the - destination and not the source. - - - -custom_sth_bulk(first_row => \@row, last_row => \@row, sql => $sql, filename => $bulk_insert_filename) - - If you've specified "--bulk-insert", this method is called just before the - bulk insert, but after "before_bulk_insert()", and the arguments are - different. - - This method's return value etc is similar to the "custom_sth()" method. - - - -after_finish() - - This method is called after pt-archiver exits the archiving loop, commits all - database handles, closes "--file", and prints the final statistics, but - before pt-archiver runs ANALYZE or OPTIMIZE (see "--analyze" and - "--optimize"). - - - -If you specify a plugin for both "--source" and "--dest", pt-archiver -constructs, calls before_begin(), and calls after_finish() on the two plugins in -the order "--source", "--dest". - -pt-archiver assumes it controls transactions, and that the plugin will NOT -commit or roll back the database handle. The database handle passed to the -plugin's constructor is the same handle pt-archiver uses itself. Remember -that "--source" and "--dest" are separate handles. - -A sample module might look like this: - - -.. code-block:: perl - - package My::Module; - - sub new { - my ( $class, %args ) = @_; - return bless(\%args, $class); - } - - sub before_begin { - my ( $self, %args ) = @_; - # Save column names for later - $self->{cols} = $args{cols}; - } - - sub is_archivable { - my ( $self, %args ) = @_; - # Do some advanced logic with $args{row} - return 1; - } - - sub before_delete {} # Take no action - sub before_insert {} # Take no action - sub custom_sth {} # Take no action - sub after_finish {} # Take no action - - 1; - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-archiver ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-archiver `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -*************** -ACKNOWLEDGMENTS -*************** - - -Andrew O'Brien - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-archiver 1.0.1 - diff --git a/docs/user/pt-collect.rst b/docs/user/pt-collect.rst deleted file mode 100644 index 342495e7..00000000 --- a/docs/user/pt-collect.rst +++ /dev/null @@ -1,264 +0,0 @@ - -########## -pt-collect -########## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-collect - Collect information from a server for some period of time. - - -******** -SYNOPSIS -******** - - -Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS] - -pt-collect tool gathers a variety of information about a system for a period -of time. It is typically executed when the stalk tool detects a condition -and wants to collect information to assist in diagnosis. Four options -must be specified on the command line: -dgios. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-collect is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-collect `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-collect creates a lock to ensure that only one instance runs at a time, -and then saves a variety of performance and status data into files in the -configured directory. Files are named with a timestamp so they can be -grouped together. The tool is MySQL-centric by default, and gathers quite -a bit of diagnostic data that's useful for understanding the behavior of -a MySQL database server. - -Options after \ ``--``\ are passed to \ ``mysql``\ and \ ``mysqladmin``\ . - - -******* -OPTIONS -******* - - - --d (required) - - DESTINATION Where to store the resulting data; must already exist. - - - --g (required) - - Collect GDB stack traces. - - - --i INTERVAL (required) - - How many seconds to collect data. - - - --o (required) - - Collect oprofile data; disables -s. - - - --s (required) - - Collect strace data. - - - --f PERCENT - - Exit if the disk is more than this percent full (default 100). - - - --m MEGABYTES - - Exit if there are less than this many megabytes free disk space (default 0). - - - --p PREFIX - - Store the data into files with this prefix (optional). - - - --t - - Collect tcpdump data. - - - - -*********** -ENVIRONMENT -*********** - - -This tool does not use any environment variables. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires Bash v3 or newer and assumes that these programs -are installed, in the PATH, and executable: sysctl, top, vmstat, iostat, -mpstat, lsof, mysql, mysqladmin, df, netstat, pidof, flock, and others -depending on what command-line options are specified. If some of those -programs are not available, the tool will still run but may print warnings. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-collect `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-collect 1.0.1 - diff --git a/docs/user/pt-config-diff.rst b/docs/user/pt-config-diff.rst deleted file mode 100644 index c673c1cc..00000000 --- a/docs/user/pt-config-diff.rst +++ /dev/null @@ -1,518 +0,0 @@ - -############## -pt-config-diff -############## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-config-diff - Diff MySQL configuration files and server variables. - - -******** -SYNOPSIS -******** - - -Usage: pt-config-diff [OPTION...] CONFIG CONFIG [CONFIG...] - -pt-config-diff diffs MySQL configuration files and server variables. -CONFIG can be a filename or a DSN. At least two CONFIG sources must be given. -Like standard Unix diff, there is no output if there are no differences. - -Diff host1 config from SHOW VARIABLES against host2: - - -.. code-block:: perl - - pt-config-diff h=host1 h=host2 - - -Diff config from [mysqld] section in my.cnf against host1 config: - - -.. code-block:: perl - - pt-config-diff /etc/my.cnf h=host1 - - -Diff the [mysqld] section of two option files: - - -.. code-block:: perl - - pt-config-diff /etc/my-small.cnf /etc/my-large.cnf - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-config-diff reads MySQL's configuration and examines it and is thus very -low risk. - -At the time of this release there are no known bugs that pose a serious risk. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-config-diff `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-config-diff diffs MySQL configurations by examining the values of server -system variables from two or more CONFIG sources specified on the command -line. A CONFIG source can be a DSN or a filename containing the output of -\ ``mysqld --help --verbose``\ , \ ``my_print_defaults``\ , \ ``SHOW VARIABLES``\ , or -an option file (e.g. my.cnf). - -For each DSN CONFIG, pt-config-diff connects to MySQL and gets variables -and values by executing \ ``SHOW /\*!40103 GLOBAL\*/ VARIABLES``\ . This is -an "active config" because it shows what server values MySQL is -actively (currently) running with. - -Only variables that all CONFIG sources have are compared because if a -variable is not present then we cannot know or safely guess its value. -For example, if you compare an option file (e.g. my.cnf) to an active config -(i.e. SHOW VARIABLES from a DSN CONFIG), the option file will probably -only have a few variables, whereas the active config has every variable. -Only values of the variables present in both configs are compared. - -Option file and DSN configs provide the best results. - - -****** -OUTPUT -****** - - -There is no output when there are no differences. When there are differences, -pt-config-diff prints a report to STDOUT that looks similar to the following: - - -.. code-block:: perl - - 2 config differences - Variable my.master.cnf my.slave.cnf - ========================= =============== =============== - datadir /tmp/12345/data /tmp/12346/data - port 12345 12346 - - -Comparing MySQL variables is difficult because there are many variations and -subtleties across the many versions and distributions of MySQL. When a -comparison fails, the tool prints a warning to STDERR, such as the following: - - -.. code-block:: perl - - Comparing log_error values (mysqld.log, /tmp/12345/data/mysqld.log) - caused an error: Argument "/tmp/12345/data/mysqld.log" isn't numeric - in numeric eq (==) at ./pt-config-diff line 2311. - - -Please report these warnings so the comparison functions can be improved. - - -*********** -EXIT STATUS -*********** - - -pt-config-diff exits with a zero exit status when there are no differences, and -1 if there are. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. (This option does not specify a CONFIG; - it's equivalent to \ ``--defaults-file``\ .) - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-variables - - type: array - - Ignore, do not compare, these variables. - - - ---password - - short form: -p; type: string - - Password to use for connection. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---[no]report - - default: yes - - Print the MySQL config diff report to STDOUT. If you just want to check - if the given configs are different or not by examining the tool's exit - status, then specify \ ``--no-report``\ to suppress the report. - - - ---report-width - - type: int; default: 78 - - Truncate report lines to this many characters. Since some variable values can - be long, or when comparing multiple configs, it may help to increase the - report width so values are not truncated beyond readability. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---user - - short form: -u; type: string - - MySQL user if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-config-diff ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-config-diff `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-config-diff 1.0.1 - diff --git a/docs/user/pt-deadlock-logger.rst b/docs/user/pt-deadlock-logger.rst deleted file mode 100644 index 60cd6173..00000000 --- a/docs/user/pt-deadlock-logger.rst +++ /dev/null @@ -1,760 +0,0 @@ - -################## -pt-deadlock-logger -################## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-deadlock-logger - Extract and log MySQL deadlock information. - - -******** -SYNOPSIS -******** - - -Usage: pt-deadlock-logger [OPTION...] SOURCE_DSN - -pt-deadlock-logger extracts and saves information about the most recent deadlock -in a MySQL server. - -Print deadlocks on SOURCE_DSN: - - -.. code-block:: perl - - pt-deadlock-logger SOURCE_DSN - - -Store deadlock information from SOURCE_DSN in test.deadlocks table on SOURCE_DSN -(source and destination are the same host): - - -.. code-block:: perl - - pt-deadlock-logger SOURCE_DSN --dest D=test,t=deadlocks - - -Store deadlock information from SOURCE_DSN in test.deadlocks table on DEST_DSN -(source and destination are different hosts): - - -.. code-block:: perl - - pt-deadlock-logger SOURCE_DSN --dest DEST_DSN,D=test,t=deadlocks - - -Daemonize and check for deadlocks on SOURCE_DSN every 30 seconds for 4 hours: - - -.. code-block:: perl - - pt-deadlock-logger SOURCE_DSN --dest D=test,t=deadlocks --daemonize --run-time 4h --interval 30s - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-deadlock-logger is a read-only tool unless you specify a "--dest" table. -In some cases polling SHOW INNODB STATUS too rapidly can cause extra load on the -server. If you're using it on a production server under very heavy load, you -might want to set "--interval" to 30 seconds or more. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-deadlock-logger `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-deadlock-logger extracts deadlock data from a MySQL server. Currently only -InnoDB deadlock information is available. You can print the information to -standard output, store it in a database table, or both. If neither -"--print" nor "--dest" are given, then the deadlock information is -printed by default. If only "--dest" is given, then the deadlock -information is only stored. If both options are given, then the deadlock -information is printed and stored. - -The source host can be specified using one of two methods. The first method is -to use at least one of the standard connection-related command line options: -"--defaults-file", "--password", "--host", "--port", "--socket" -or "--user". These options only apply to the source host; they cannot be -used to specify the destination host. - -The second method to specify the source host, or the optional destination host -using "--dest", is a DSN. A DSN is a special syntax that can be either just -a hostname (like \ ``server.domain.com``\ or \ ``1.2.3.4``\ ), or a -\ ``key=value,key=value``\ string. Keys are a single letter: - - -.. code-block:: perl - - KEY MEANING - === ======= - h Connect to host - P Port number to use for connection - S Socket file to use for connection - u User for login if not current user - p Password to use when connecting - F Only read default options from the given file - - -If you omit any values from the destination host DSN, they are filled in with -values from the source host, so you don't need to specify them in both places. -\ ``pt-deadlock-logger``\ reads all normal MySQL option files, such as ~/.my.cnf, so -you may not need to specify username, password and other common options at all. - - -****** -OUTPUT -****** - - -You can choose which columns are output and/or saved to "--dest" with the -"--columns" argument. The default columns are as follows: - - -server - - The (source) server on which the deadlock occurred. This might be useful if - you're tracking deadlocks on many servers. - - - -ts - - The date and time of the last detected deadlock. - - - -thread - - The MySQL thread number, which is the same as the connection ID in SHOW FULL - PROCESSLIST. - - - -txn_id - - The InnoDB transaction ID, which InnoDB expresses as two unsigned integers. I - have multiplied them out to be one number. - - - -txn_time - - How long the transaction was active when the deadlock happened. - - - -user - - The connection's database username. - - - -hostname - - The connection's host. - - - -ip - - The connection's IP address. If you specify "--numeric-ip", this is - converted to an unsigned integer. - - - -db - - The database in which the deadlock occurred. - - - -tbl - - The table on which the deadlock occurred. - - - -idx - - The index on which the deadlock occurred. - - - -lock_type - - The lock type the transaction held on the lock that caused the deadlock. - - - -lock_mode - - The lock mode of the lock that caused the deadlock. - - - -wait_hold - - Whether the transaction was waiting for the lock or holding the lock. Usually - you will see the two waited-for locks. - - - -victim - - Whether the transaction was selected as the deadlock victim and rolled back. - - - -query - - The query that caused the deadlock. - - - - -************************** -INNODB CAVEATS AND DETAILS -************************** - - -InnoDB's output is hard to parse and sometimes there's no way to do it right. - -Sometimes not all information (for example, username or IP address) is included -in the deadlock information. In this case there's nothing for the script to put -in those columns. It may also be the case that the deadlock output is so long -(because there were a lot of locks) that the whole thing is truncated. - -Though there are usually two transactions involved in a deadlock, there are more -locks than that; at a minimum, one more lock than transactions is necessary to -create a cycle in the waits-for graph. pt-deadlock-logger prints the -transactions (always two in the InnoDB output, even when there are more -transactions in the waits-for graph than that) and fills in locks. It prefers -waited-for over held when choosing lock information to output, but you can -figure out the rest with a moment's thought. If you see one wait-for and one -held lock, you're looking at the same lock, so of course you'd prefer to see -both wait-for locks and get more information. If the two waited-for locks are -not on the same table, more than two transactions were involved in the deadlock. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---clear-deadlocks - - type: string - - Use this table to create a small deadlock. This usually has the effect of - clearing out a huge deadlock, which otherwise consumes the entire output of - \ ``SHOW INNODB STATUS``\ . The table must not exist. pt-deadlock-logger will - create it with the following MAGIC_clear_deadlocks structure: - - - .. code-block:: perl - - CREATE TABLE test.deadlock_maker(a INT PRIMARY KEY) ENGINE=InnoDB; - - - After creating the table and causing a small deadlock, the tool will drop the - table again. - - - ---[no]collapse - - Collapse whitespace in queries to a single space. This might make it easier to - inspect on the command line or in a query. By default, whitespace is collapsed - when printing with "--print", but not modified when storing to "--dest". - (That is, the default is different for each action). - - - ---columns - - type: hash - - Output only this comma-separated list of columns. See "OUTPUT" for more - details on columns. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---create-dest-table - - Create the table specified by "--dest". - - Normally the "--dest" table is expected to exist already. This option - causes pt-deadlock-logger to create the table automatically using the suggested - table structure. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX operating systems only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---dest - - type: DSN - - DSN for where to store deadlocks; specify at least a database (D) and table (t). - - Missing values are filled in with the same values from the source host, so you - can usually omit most parts of this argument if you're storing deadlocks on the - same server on which they happen. - - By default, whitespace in the query column is left intact; - use "--[no]collapse" if you want whitespace collapsed. - - The following MAGIC_dest_table is suggested if you want to store all the - information pt-deadlock-logger can extract about deadlocks: - - - .. code-block:: perl - - CREATE TABLE deadlocks ( - server char(20) NOT NULL, - ts datetime NOT NULL, - thread int unsigned NOT NULL, - txn_id bigint unsigned NOT NULL, - txn_time smallint unsigned NOT NULL, - user char(16) NOT NULL, - hostname char(20) NOT NULL, - ip char(15) NOT NULL, -- alternatively, ip int unsigned NOT NULL - db char(64) NOT NULL, - tbl char(64) NOT NULL, - idx char(64) NOT NULL, - lock_type char(16) NOT NULL, - lock_mode char(1) NOT NULL, - wait_hold char(1) NOT NULL, - victim tinyint unsigned NOT NULL, - query text NOT NULL, - PRIMARY KEY (server,ts,thread) - ) ENGINE=InnoDB - - - If you use "--columns", you can omit whichever columns you don't want to - store. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---interval - - type: time - - How often to check for deadlocks. If no "--run-time" is specified, - pt-deadlock-logger runs forever, checking for deadlocks at every interval. - See also "--run-time". - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---numeric-ip - - Express IP addresses as integers. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process ID of - the daemonized instance. The PID file is removed when the daemonized instance - exits. The program checks for the existence of the PID file when starting; if - it exists and the process with the matching PID exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print - - Print results on standard output. See "OUTPUT" for more. By default, - enables "--[no]collapse" unless you explicitly disable it. - - If "--interval" or "--run-time" is specified, only new deadlocks are - printed at each interval. A fingerprint for each deadlock is created using - "--columns" server, ts and thread (even if those columns were not specified - by "--columns") and if the current deadlock's fingerprint is different from - the last deadlock's fingerprint, then it is printed. - - - ---run-time - - type: time - - How long to run before exiting. By default pt-deadlock-logger runs once, - checks for deadlocks, and exits. If "--run-time" is specified but - no "--interval" is specified, a default 1 second interval will be used. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---tab - - Print tab-separated columns, instead of aligned. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - Table in which to store deadlock information. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-deadlock-logger ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-deadlock-logger `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-deadlock-logger 1.0.1 - diff --git a/docs/user/pt-diskstats.rst b/docs/user/pt-diskstats.rst deleted file mode 100644 index 0dc6f4a6..00000000 --- a/docs/user/pt-diskstats.rst +++ /dev/null @@ -1,390 +0,0 @@ - -############ -pt-diskstats -############ - -.. highlight:: perl - - -**** -NAME -**** - - -pt-diskstats - Aggregate and summarize \ */proc/diskstats*\ . - - -******** -SYNOPSIS -******** - - -Usage: pt-diskstats [OPTIONS] [FILES] - -pt-diskstats reads \ */proc/diskstats*\ periodically, or files with the -contents of \ */proc/diskstats*\ , aggregates the data, and prints it nicely. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-diskstats is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-diskstats `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-diskstats tool is similar to iostat, but has some advantages. It separates -reads and writes, for example, and computes some things that iostat does in -either incorrect or confusing ways. It is also menu-driven and interactive -with several different ways to aggregate the data, and integrates well with -the pt-collect tool. These properties make it very convenient for quickly -drilling down into I/O performance at the desired level of granularity. - -This program works in two main modes. One way is to process a file with saved -disk statistics, which you specify on the command line. The other way is to -start a background process gathering samples at intervals and saving them into -a file, and process this file in the foreground. In both cases, the tool is -interactively controlled by keystrokes, so you can redisplay and slice the -data flexibly and easily. If the tool is not attached to a terminal, it -doesn't run interactively; it just processes and prints its output, then exits. -Otherwise it loops until you exit with the 'q' key. - -If you press the '?' key, you will bring up the interactive help menu that -shows which keys control the program. - -Files should have this format: - - -.. code-block:: perl - - - TS - - ... et cetera - TS <-- must end with a TS line. - - -See `http://aspersa.googlecode.com/svn/html/diskstats.html `_ for a detailed -example of using the tool. - - -****** -OUTPUT -****** - - -The columns are as follows: - - -#ts - - The number of seconds of samples in the line. If there is only one, then - the timestamp itself is shown, without the {curly braces}. - - - -device - - The device name. If there is more than one device, then instead the number - of devices aggregated into the line is shown, in {curly braces}. - - - -rd_mb_s - - The number of megabytes read per second, average, during the sampled interval. - - - -rd_cnc - - The average concurrency of the read operations, as computed by Little's Law - (a.k.a. queueing theory). - - - -rd_rt - - The average response time of the read operations, in milliseconds. - - - -wr_mb_s - - Megabytes written per second, average. - - - -wr_cnc - - Write concurrency, similar to read concurrency. - - - -wr_rt - - Write response time, similar to read response time. - - - -busy - - The fraction of time that the device had at least one request in progress; - this is what iostat calls %util (which is a misleading name). - - - -in_prg - - The number of requests that were in progress. Unlike the read and write - concurrencies, which are averages that are generated from reliable numbers, - this number is an instantaneous sample, and you can see that it might - represent a spike of requests, rather than the true long-term average. - - - -In addition to the above columns, there are a few columns that are hidden by -default. If you press the 'c' key, and then press Enter, you will blank out -the regular expression pattern that selects columns to display, and you will -then see the extra columns: - - -rd_s - - The number of reads per second. - - - -rd_avkb - - The average size of the reads, in kilobytes. - - - -rd_mrg - - The percentage of read requests that were merged together in the disk - scheduler before reaching the device. - - - -wr_s, wr_avgkb, and wr_mrg - - These are analogous to their \ ``rd_\*``\ cousins. - - - - -******* -OPTIONS -******* - - -Options must precede files on the command line. - - --c COLS - - Awk regex of which columns to include (default cnc|rt|mb|busy|prg). - - - --d DEVICES - - Awk regex of which devices to include. - - - --g GROUPBY - - Group-by mode (default disk); specify one of the following: - - - .. code-block:: perl - - disk - Each line of output shows one disk device. - sample - Each line of output shows one sample of statistics. - all - Each line of output shows one sample and one disk device. - - - - --i INTERVAL - - In -g sample mode, include INTERVAL seconds per sample. - - - --k KEEPFILE - - File to save diskstats samples in (default /tmp/diskstats-samples). - If a non-default filename is used, it will be saved for later analysis. - - - --n SAMPLES - - When in interactive mode, stop after N samples. - - - --s INTERVAL - - Sample /proc/diskstats every N seconds (default 1). - - - - -*********** -ENVIRONMENT -*********** - - -This tool does not use any environment variables. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires Bash v3 or newer and the \ */proc*\ filesystem unless -reading from files. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-diskstats `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-diskstats 1.0.1 - diff --git a/docs/user/pt-duplicate-key-checker.rst b/docs/user/pt-duplicate-key-checker.rst deleted file mode 100644 index f10fb45e..00000000 --- a/docs/user/pt-duplicate-key-checker.rst +++ /dev/null @@ -1,563 +0,0 @@ - -######################## -pt-duplicate-key-checker -######################## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-duplicate-key-checker - Find duplicate indexes and foreign keys on MySQL tables. - - -******** -SYNOPSIS -******** - - -Usage: pt-duplicate-key-checker [OPTION...] [DSN] - -pt-duplicate-key-checker examines MySQL tables for duplicate or redundant -indexes and foreign keys. Connection options are read from MySQL option files. - - -.. code-block:: perl - - pt-duplicate-key-checker --host host1 - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-duplicate-key-checker is a read-only tool that executes SHOW CREATE TABLE and -related queries to inspect table structures, and thus is very low-risk. - -At the time of this release, there is an unconfirmed bug that causes the tool -to crash. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-duplicate-key-checker `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -This program examines the output of SHOW CREATE TABLE on MySQL tables, and if -it finds indexes that cover the same columns as another index in the same -order, or cover an exact leftmost prefix of another index, it prints out -the suspicious indexes. By default, indexes must be of the same type, so a -BTREE index is not a duplicate of a FULLTEXT index, even if they have the same -columns. You can override this. - -It also looks for duplicate foreign keys. A duplicate foreign key covers the -same columns as another in the same table, and references the same parent -table. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---all-structs - - Compare indexes with different structs (BTREE, HASH, etc). - - By default this is disabled, because a BTREE index that covers the same columns - as a FULLTEXT index is not really a duplicate, for example. - - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---[no]clustered - - default: yes - - PK columns appended to secondary key is duplicate. - - Detects when a suffix of a secondary key is a leftmost prefix of the primary - key, and treats it as a duplicate key. Only detects this condition on storage - engines whose primary keys are clustered (currently InnoDB and solidDB). - - Clustered storage engines append the primary key columns to the leaf nodes of - all secondary keys anyway, so you might consider it redundant to have them - appear in the internal nodes as well. Of course, you may also want them in the - internal nodes, because just having them at the leaf nodes won't help for some - queries. It does help for covering index queries, however. - - Here's an example of a key that is considered redundant with this option: - - - .. code-block:: perl - - PRIMARY KEY (`a`) - KEY `b` (`b`,`a`) - - - The use of such indexes is rather subtle. For example, suppose you have the - following query: - - - .. code-block:: perl - - SELECT ... WHERE b=1 ORDER BY a; - - - This query will do a filesort if we remove the index on \ ``b,a``\ . But if we - shorten the index on \ ``b,a``\ to just \ ``b``\ and also remove the ORDER BY, the query - should return the same results. - - The tool suggests shortening duplicate clustered keys by dropping the key - and re-adding it without the primary key prefix. The shortened clustered - key may still duplicate another key, but the tool cannot currently detect - when this happens without being ran a second time to re-check the newly - shortened clustered keys. Therefore, if you shorten any duplicate clustered - keys, you should run the tool again. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---databases - - short form: -d; type: hash - - Check only this comma-separated list of databases. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute pathname. - - - ---engines - - short form: -e; type: hash - - Check only tables whose storage engine is in this comma-separated list. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-databases - - type: Hash - - Ignore this comma-separated list of databases. - - - ---ignore-engines - - type: Hash - - Ignore this comma-separated list of storage engines. - - - ---ignore-order - - Ignore index order so KEY(a,b) duplicates KEY(b,a). - - - ---ignore-tables - - type: Hash - - Ignore this comma-separated list of tables. Table names may be qualified with - the database name. - - - ---key-types - - type: string; default: fk - - Check for duplicate f=foreign keys, k=keys or fk=both. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---[no]sql - - default: yes - - Print DROP KEY statement for each duplicate key. By default an ALTER TABLE - DROP KEY statement is printed below each duplicate key so that, if you want to - remove the duplicate key, you can copy-paste the statement into MySQL. - - To disable printing these statements, specify --nosql. - - - ---[no]summary - - default: yes - - Print summary of indexes at end of output. - - - ---tables - - short form: -t; type: hash - - Check only this comma-separated list of tables. - - Table names may be qualified with the database name. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---verbose - - short form: -v - - Output all keys and/or foreign keys found, not just redundant ones. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-duplicate-key-checker ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-duplicate-key-checker `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-duplicate-key-checker 1.0.1 - diff --git a/docs/user/pt-fifo-split.rst b/docs/user/pt-fifo-split.rst deleted file mode 100644 index 0dba9cbc..00000000 --- a/docs/user/pt-fifo-split.rst +++ /dev/null @@ -1,305 +0,0 @@ - -############# -pt-fifo-split -############# - -.. highlight:: perl - - -**** -NAME -**** - - -pt-fifo-split - Split files and pipe lines to a fifo without really splitting. - - -******** -SYNOPSIS -******** - - -Usage: pt-fifo-split [options] [FILE ...] - -pt-fifo-split splits FILE and pipes lines to a fifo. With no FILE, or when FILE -is -, read standard input. - -Read hugefile.txt in chunks of a million lines without physically splitting it: - - -.. code-block:: perl - - pt-fifo-split --lines 1000000 hugefile.txt - while [ -e /tmp/pt-fifo-split ]; do cat /tmp/pt-fifo-split; done - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-fifo-split creates and/or deletes the "--fifo" file. Otherwise, no other -files are modified, and it merely reads lines from the file given on the -command-line. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-fifo-split `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-fifo-split lets you read from a file as though it contains only some of the -lines in the file. When you read from it again, it contains the next set of -lines; when you have gone all the way through it, the file disappears. This -works only on Unix-like operating systems. - -You can specify multiple files on the command line. If you don't specify any, -or if you use the special filename \ ``-``\ , lines are read from standard input. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---fifo - - type: string; default: /tmp/pt-fifo-split - - The name of the fifo from which the lines can be read. - - - ---force - - Remove the fifo if it exists already, then create it again. - - - ---help - - Show help and exit. - - - ---lines - - type: int; default: 1000 - - The number of lines to read in each chunk. - - - ---offset - - type: int; default: 0 - - Begin at the Nth line. If the argument is 0, all lines are printed to the fifo. - If 1, then beginning at the first line, lines are printed (exactly the same as - 0). If 2, the first line is skipped, and the 2nd and subsequent lines are - printed to the fifo. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---statistics - - Print out statistics between chunks. The statistics are the number of chunks, - the number of lines, elapsed time, and lines per second overall and during the - last chunk. - - - ---version - - Show version and exit. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-fifo-split ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-fifo-split `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-fifo-split 1.0.1 - diff --git a/docs/user/pt-find.rst b/docs/user/pt-find.rst deleted file mode 100644 index 860cf5c7..00000000 --- a/docs/user/pt-find.rst +++ /dev/null @@ -1,977 +0,0 @@ - -####### -pt-find -####### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-find - Find MySQL tables and execute actions, like GNU find. - - -******** -SYNOPSIS -******** - - -Usage: pt-find [OPTION...] [DATABASE...] - -pt-find searches for MySQL tables and executes actions, like GNU find. The -default action is to print the database and table name. - -Find all tables created more than a day ago, which use the MyISAM engine, and -print their names: - - -.. code-block:: perl - - pt-find --ctime +1 --engine MyISAM - - -Find InnoDB tables that haven't been updated in a month, and convert them to -MyISAM storage engine (data warehousing, anyone?): - - -.. code-block:: perl - - pt-find --mtime +30 --engine InnoDB --exec "ALTER TABLE %D.%N ENGINE=MyISAM" - - -Find tables created by a process that no longer exists, following the -name_sid_pid naming convention, and remove them. - - -.. code-block:: perl - - pt-find --connection-id '\D_\d+_(\d+)$' --server-id '\D_(\d+)_\d+$' --exec-plus "DROP TABLE %s" - - -Find empty tables in the test and junk databases, and delete them: - - -.. code-block:: perl - - pt-find --empty junk test --exec-plus "DROP TABLE %s" - - -Find tables more than five gigabytes in total size: - - -.. code-block:: perl - - pt-find --tablesize +5G - - -Find all tables and print their total data and index size, and sort largest -tables first (sort is a different program, by the way). - - -.. code-block:: perl - - pt-find --printf "%T\t%D.%N\n" | sort -rn - - -As above, but this time, insert the data back into the database for posterity: - - -.. code-block:: perl - - pt-find --noquote --exec "INSERT INTO sysdata.tblsize(db, tbl, size) VALUES('%D', '%N', %T)" - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-find only reads and prints information by default, but "--exec" and -"--exec-plus" can execute user-defined SQL. You should be as careful with it -as you are with any command-line tool that can execute queries against your -database. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-find `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-find looks for MySQL tables that pass the tests you specify, and executes -the actions you specify. The default action is to print the database and table -name to STDOUT. - -pt-find is simpler than GNU find. It doesn't allow you to specify -complicated expressions on the command line. - -pt-find uses SHOW TABLES when possible, and SHOW TABLE STATUS when needed. - - -************ -OPTION TYPES -************ - - -There are three types of options: normal options, which determine some behavior -or setting; tests, which determine whether a table should be included in the -list of tables found; and actions, which do something to the tables pt-find -finds. - -pt-find uses standard Getopt::Long option parsing, so you should use double -dashes in front of long option names, unlike GNU find. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---case-insensitive - - Specifies that all regular expression searches are case-insensitive. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---day-start - - Measure times (for "--mmin", etc) from the beginning of today rather than - from the current time. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---or - - Combine tests with OR, not AND. - - By default, tests are evaluated as though there were an AND between them. This - option switches it to OR. - - Option parsing is not implemented by pt-find itself, so you cannot specify - complicated expressions with parentheses and mixtures of OR and AND. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---[no]quote - - default: yes - - Quotes MySQL identifier names with MySQL's standard backtick character. - - Quoting happens after tests are run, and before actions are run. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - -TESTS -===== - - -Most tests check some criterion against a column of SHOW TABLE STATUS output. -Numeric arguments can be specified as +n for greater than n, -n for less than n, -and n for exactly n. All numeric options can take an optional suffix multiplier -of k, M or G (1_024, 1_048_576, and 1_073_741_824 respectively). All patterns -are Perl regular expressions (see 'man perlre') unless specified as SQL LIKE -patterns. - -Dates and times are all measured relative to the same instant, when pt-find -first asks the database server what time it is. All date and time manipulation -is done in SQL, so if you say to find tables modified 5 days ago, that -translates to SELECT DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 5 DAY). If you -specify "--day-start", if course it's relative to CURRENT_DATE instead. - -However, table sizes and other metrics are not consistent at an instant in -time. It can take some time for MySQL to process all the SHOW queries, and -pt-find can't do anything about that. These measurements are as of the -time they're taken. - -If you need some test that's not in this list, file a bug report and I'll -enhance pt-find for you. It's really easy. - - ---autoinc - - type: string; group: Tests - - Table's next AUTO_INCREMENT is n. This tests the Auto_increment column. - - - ---avgrowlen - - type: size; group: Tests - - Table avg row len is n bytes. This tests the Avg_row_length column. - The specified size can be "NULL" to test where Avg_row_length IS NULL. - - - ---checksum - - type: string; group: Tests - - Table checksum is n. This tests the Checksum column. - - - ---cmin - - type: size; group: Tests - - Table was created n minutes ago. This tests the Create_time column. - - - ---collation - - type: string; group: Tests - - Table collation matches pattern. This tests the Collation column. - - - ---column-name - - type: string; group: Tests - - A column name in the table matches pattern. - - - ---column-type - - type: string; group: Tests - - A column in the table matches this type (case-insensitive). - - Examples of types are: varchar, char, int, smallint, bigint, decimal, year, - timestamp, text, enum. - - - ---comment - - type: string; group: Tests - - Table comment matches pattern. This tests the Comment column. - - - ---connection-id - - type: string; group: Tests - - Table name has nonexistent MySQL connection ID. This tests the table name for - a pattern. The argument to this test must be a Perl regular expression that - captures digits like this: (\d+). If the table name matches the pattern, - these captured digits are taken to be the MySQL connection ID of some process. - If the connection doesn't exist according to SHOW FULL PROCESSLIST, the test - returns true. If the connection ID is greater than pt-find's own - connection ID, the test returns false for safety. - - Why would you want to do this? If you use MySQL statement-based replication, - you probably know the trouble temporary tables can cause. You might choose to - work around this by creating real tables with unique names, instead of - temporary tables. One way to do this is to append your connection ID to the - end of the table, thusly: scratch_table_12345. This assures the table name is - unique and lets you have a way to find which connection it was associated - with. And perhaps most importantly, if the connection no longer exists, you - can assume the connection died without cleaning up its tables, and this table - is a candidate for removal. - - This is how I manage scratch tables, and that's why I included this test in - pt-find. - - The argument I use to "--connection-id" is "\D_(\d+)$". That finds tables - with a series of numbers at the end, preceded by an underscore and some - non-number character (the latter criterion prevents me from examining tables - with a date at the end, which people tend to do: baron_scratch_2007_05_07 for - example). It's better to keep the scratch tables separate of course. - - If you do this, make sure the user pt-find runs as has the PROCESS privilege! - Otherwise it will only see connections from the same user, and might think some - tables are ready to remove when they're still in use. For safety, pt-find - checks this for you. - - See also "--server-id". - - - ---createopts - - type: string; group: Tests - - Table create option matches pattern. This tests the Create_options column. - - - ---ctime - - type: size; group: Tests - - Table was created n days ago. This tests the Create_time column. - - - ---datafree - - type: size; group: Tests - - Table has n bytes of free space. This tests the Data_free column. - The specified size can be "NULL" to test where Data_free IS NULL. - - - ---datasize - - type: size; group: Tests - - Table data uses n bytes of space. This tests the Data_length column. - The specified size can be "NULL" to test where Data_length IS NULL. - - - ---dblike - - type: string; group: Tests - - Database name matches SQL LIKE pattern. - - - ---dbregex - - type: string; group: Tests - - Database name matches this pattern. - - - ---empty - - group: Tests - - Table has no rows. This tests the Rows column. - - - ---engine - - type: string; group: Tests - - Table storage engine matches this pattern. This tests the Engine column, or in - earlier versions of MySQL, the Type column. - - - ---function - - type: string; group: Tests - - Function definition matches pattern. - - - ---indexsize - - type: size; group: Tests - - Table indexes use n bytes of space. This tests the Index_length column. - The specified size can be "NULL" to test where Index_length IS NULL. - - - ---kmin - - type: size; group: Tests - - Table was checked n minutes ago. This tests the Check_time column. - - - ---ktime - - type: size; group: Tests - - Table was checked n days ago. This tests the Check_time column. - - - ---mmin - - type: size; group: Tests - - Table was last modified n minutes ago. This tests the Update_time column. - - - ---mtime - - type: size; group: Tests - - Table was last modified n days ago. This tests the Update_time column. - - - ---procedure - - type: string; group: Tests - - Procedure definition matches pattern. - - - ---rowformat - - type: string; group: Tests - - Table row format matches pattern. This tests the Row_format column. - - - ---rows - - type: size; group: Tests - - Table has n rows. This tests the Rows column. - The specified size can be "NULL" to test where Rows IS NULL. - - - ---server-id - - type: string; group: Tests - - Table name contains the server ID. If you create temporary tables with the - naming convention explained in "--connection-id", but also add the server ID of the - server on which the tables are created, then you can use this pattern match to - ensure tables are dropped only on the server they're created on. This prevents - a table from being accidentally dropped on a slave while it's in use (provided - that your server IDs are all unique, which they should be for replication to - work). - - For example, on the master (server ID 22) you create a table called - scratch_table_22_12345. If you see this table on the slave (server ID 23), you - might think it can be dropped safely if there's no such connection 12345. But - if you also force the name to match the server ID with \ ``--server-id '\D_(\d+)_\d+$'``\ , - the table won't be dropped on the slave. - - - ---tablesize - - type: size; group: Tests - - Table uses n bytes of space. This tests the sum of the Data_length and - Index_length columns. - - - ---tbllike - - type: string; group: Tests - - Table name matches SQL LIKE pattern. - - - ---tblregex - - type: string; group: Tests - - Table name matches this pattern. - - - ---tblversion - - type: size; group: Tests - - Table version is n. This tests the Version column. - - - ---trigger - - type: string; group: Tests - - Trigger action statement matches pattern. - - - ---trigger-table - - type: string; group: Tests - - "--trigger" is defined on table matching pattern. - - - ---view - - type: string; group: Tests - - CREATE VIEW matches this pattern. - - - - -ACTIONS -======= - - -The "--exec-plus" action happens after everything else, but otherwise actions -happen in an indeterminate order. If you need determinism, file a bug report -and I'll add this feature. - - ---exec - - type: string; group: Actions - - Execute this SQL with each item found. The SQL can contain escapes and - formatting directives (see "--printf"). - - - ---exec-dsn - - type: string; group: Actions - - Specify a DSN in key-value format to use when executing SQL with "--exec" and - "--exec-plus". Any values not specified are inherited from command-line - arguments. - - - ---exec-plus - - type: string; group: Actions - - Execute this SQL with all items at once. This option is unlike "--exec". There - are no escaping or formatting directives; there is only one special placeholder - for the list of database and table names, %s. The list of tables found will be - joined together with commas and substituted wherever you place %s. - - You might use this, for example, to drop all the tables you found: - - - .. code-block:: perl - - DROP TABLE %s - - - This is sort of like GNU find's "-exec command {} +" syntax. Only it's not - totally cryptic. And it doesn't require me to write a command-line parser. - - - ---print - - group: Actions - - Print the database and table name, followed by a newline. This is the default - action if no other action is specified. - - - ---printf - - type: string; group: Actions - - Print format on the standard output, interpreting '\' escapes and '%' - directives. Escapes are backslashed characters, like \n and \t. Perl - interprets these, so you can use any escapes Perl knows about. Directives are - replaced by %s, and as of this writing, you can't add any special formatting - instructions, like field widths or alignment (though I'm musing over ways to do - that). - - Here is a list of the directives. Note that most of them simply come from - columns of SHOW TABLE STATUS. If the column is NULL or doesn't exist, you get - an empty string in the output. A % character followed by any character not in - the following list is discarded (but the other character is printed). - - - .. code-block:: perl - - CHAR DATA SOURCE NOTES - ---- ------------------ ------------------------------------------ - a Auto_increment - A Avg_row_length - c Checksum - C Create_time - D Database The database name in which the table lives - d Data_length - E Engine In older versions of MySQL, this is Type - F Data_free - f Innodb_free Parsed from the Comment field - I Index_length - K Check_time - L Collation - M Max_data_length - N Name - O Comment - P Create_options - R Row_format - S Rows - T Table_length Data_length+Index_length - U Update_time - V Version - - - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-find ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-find `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-find 1.0.1 - diff --git a/docs/user/pt-fk-error-logger.rst b/docs/user/pt-fk-error-logger.rst deleted file mode 100644 index 7e8af47b..00000000 --- a/docs/user/pt-fk-error-logger.rst +++ /dev/null @@ -1,493 +0,0 @@ - -################## -pt-fk-error-logger -################## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-fk-error-logger - Extract and log MySQL foreign key errors. - - -******** -SYNOPSIS -******** - - -Usage: pt-fk-error-logger [OPTION...] SOURCE_DSN - -pt-fk-error-logger extracts and saves information about the most recent foreign -key errors in a MySQL server. - -Print foreign key errors on host1: - - -.. code-block:: perl - - pt-fk-error-logger h=host1 - - -Save foreign key errors on host1 to db.foreign_key_errors table on host2: - - -.. code-block:: perl - - pt-fk-error-logger h=host1 --dest h=host1,D=db,t=foreign_key_errors - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-fk-error-logger is read-only unless you specify "--dest". It should be -very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-fk-error-logger `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-fk-error-logger prints or saves the foreign key errors text from -\ ``SHOW INNODB STATUS``\ . The errors are not parsed or interpreted in any -way. Foreign key errors are uniquely identified by their timestamp. -Only new (more recent) errors are printed or saved. - - -****** -OUTPUT -****** - - -If "--print" is given or no "--dest" is given, then pt-fk-error-logger -prints the foreign key error text to STDOUT exactly as it appeared in -\ ``SHOW INNODB STATUS``\ . - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX operating systems only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---dest - - type: DSN - - DSN for where to store foreign key errors; specify at least a database (D) and table (t). - - Missing values are filled in with the same values from the source host, so you - can usually omit most parts of this argument if you're storing foreign key - errors on the same server on which they happen. - - The following table is suggested: - - - .. code-block:: perl - - CREATE TABLE foreign_key_errors ( - ts datetime NOT NULL, - error text NOT NULL, - PRIMARY KEY (ts), - ) - - - The only information saved is the timestamp and the foreign key error text. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---interval - - type: time; default: 0 - - How often to check for foreign key errors. - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process ID of - the daemonized instance. The PID file is removed when the daemonized instance - exits. The program checks for the existence of the PID file when starting; if - it exists and the process with the matching PID exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print - - Print results on standard output. See "OUTPUT" for more. - - - ---run-time - - type: time - - How long to run before exiting. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - Table in which to store foreign key errors. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-fk-error-logger ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-fk-error-logger `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-fk-error-logger 1.0.1 - diff --git a/docs/user/pt-heartbeat.rst b/docs/user/pt-heartbeat.rst deleted file mode 100644 index 141813db..00000000 --- a/docs/user/pt-heartbeat.rst +++ /dev/null @@ -1,874 +0,0 @@ - -############ -pt-heartbeat -############ - -.. highlight:: perl - - -**** -NAME -**** - - -pt-heartbeat - Monitor MySQL replication delay. - - -******** -SYNOPSIS -******** - - -Usage: pt-heartbeat [OPTION...] [DSN] --update|--monitor|--check|--stop - -pt-heartbeat measures replication lag on a MySQL or PostgreSQL server. You can -use it to update a master or monitor a replica. If possible, MySQL connection -options are read from your .my.cnf file. - -Start daemonized process to update test.heartbeat table on master: - - -.. code-block:: perl - - pt-heartbeat -D test --update -h master-server --daemonize - - -Monitor replication lag on slave: - - -.. code-block:: perl - - pt-heartbeat -D test --monitor -h slave-server - - pt-heartbeat -D test --monitor -h slave-server --dbi-driver Pg - - -Check slave lag once and exit (using optional DSN to specify slave host): - - -.. code-block:: perl - - pt-heartbeat -D test --check h=slave-server - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-heartbeat merely reads and writes a single record in a table. It should be -very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-heartbeat `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-heartbeat is a two-part MySQL and PostgreSQL replication delay monitoring -system that measures delay by looking at actual replicated data. This -avoids reliance on the replication mechanism itself, which is unreliable. (For -example, \ ``SHOW SLAVE STATUS``\ on MySQL). - -The first part is an "--update" instance of pt-heartbeat that connects to -a master and updates a timestamp ("heartbeat record") every "--interval" -seconds. Since the heartbeat table may contain records from multiple -masters (see "MULTI-SLAVE HIERARCHY"), the server's ID (@@server_id) is -used to identify records. - -The second part is a "--monitor" or "--check" instance of pt-heartbeat -that connects to a slave, examines the replicated heartbeat record from its -immediate master or the specified "--master-server-id", and computes the -difference from the current system time. If replication between the slave and -the master is delayed or broken, the computed difference will be greater than -zero and potentially increase if "--monitor" is specified. - -You must either manually create the heartbeat table on the master or use -"--create-table". See "--create-table" for the proper heartbeat -table structure. The \ ``MEMORY``\ storage engine is suggested, but not -required of course, for MySQL. - -The heartbeat table must contain a heartbeat row. By default, a heartbeat -row is inserted if it doesn't exist. This feature can be disabled with the -"--[no]insert-heartbeat-row" option in case the database user does not -have INSERT privileges. - -pt-heartbeat depends only on the heartbeat record being replicated to the slave, -so it works regardless of the replication mechanism (built-in replication, a -system such as Continuent Tungsten, etc). It works at any depth in the -replication hierarchy; for example, it will reliably report how far a slave lags -its master's master's master. And if replication is stopped, it will continue -to work and report (accurately!) that the slave is falling further and further -behind the master. - -pt-heartbeat has a maximum resolution of 0.01 second. The clocks on the -master and slave servers must be closely synchronized via NTP. By default, -"--update" checks happen on the edge of the second (e.g. 00:01) and -"--monitor" checks happen halfway between seconds (e.g. 00:01.5). -As long as the servers' clocks are closely synchronized and replication -events are propagating in less than half a second, pt-heartbeat will report -zero seconds of delay. - -pt-heartbeat will try to reconnect if the connection has an error, but will -not retry if it can't get a connection when it first starts. - -The "--dbi-driver" option lets you use pt-heartbeat to monitor PostgreSQL -as well. It is reported to work well with Slony-1 replication. - - -********************* -MULTI-SLAVE HIERARCHY -********************* - - -If the replication hierarchy has multiple slaves which are masters of -other slaves, like "master -> slave1 -> slave2", "--update" instances -can be ran on the slaves as well as the master. The default heartbeat -table (see "--create-table") is keyed on the \ ``server_id``\ column, so -each server will update the row where \ ``server_id=@@server_id``\ . - -For "--monitor" and "--check", if "--master-server-id" is not -specified, the tool tries to discover and use the slave's immediate master. -If this fails, or if you want monitor lag from another master, then you can -specify the "--master-server-id" to use. - -For example, if the replication hierarchy is "master -> slave1 -> slave2" -with corresponding server IDs 1, 2 and 3, you can: - - -.. code-block:: perl - - pt-heartbeat --daemonize -D test --update -h master - pt-heartbeat --daemonize -D test --update -h slave1 - - -Then check (or monitor) the replication delay from master to slave2: - - -.. code-block:: perl - - pt-heartbeat -D test --master-server-id 1 --check slave2 - - -Or check the replication delay from slave1 to slave2: - - -.. code-block:: perl - - pt-heartbeat -D test --master-server-id 2 --check slave2 - - -Stopping the "--update" instance one slave1 will not affect the instance -on master. - - -*********************** -MASTER AND SLAVE STATUS -*********************** - - -The default heartbeat table (see "--create-table") has columns for saving -information from \ ``SHOW MASTER STATUS``\ and \ ``SHOW SLAVE STATUS``\ . These -columns are optional. If any are present, their corresponding information -will be saved. - - -******* -OPTIONS -******* - - -Specify at least one of "--stop", "--update", "--monitor", or "--check". - -"--update", "--monitor", and "--check" are mutually exclusive. - -"--daemonize" and "--check" are mutually exclusive. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on STDOUT to - utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET NAMES UTF8 - after connecting to MySQL. Any other value sets binmode on STDOUT without the - utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---check - - Check slave delay once and exit. If you also specify "--recurse", the - tool will try to discover slave's of the given slave and check and print - their lag, too. The hostname or IP and port for each slave is printed - before its delay. "--recurse" only works with MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---create-table - - Create the heartbeat "--table" if it does not exist. - - This option causes the table specified by "--database" and "--table" to - be created with the following MAGIC_create_heartbeat table definition: - - - .. code-block:: perl - - CREATE TABLE heartbeat ( - ts varchar(26) NOT NULL, - server_id int unsigned NOT NULL PRIMARY KEY, - file varchar(255) DEFAULT NULL, -- SHOW MASTER STATUS - position bigint unsigned DEFAULT NULL, -- SHOW MASTER STATUS - relay_master_log_file varchar(255) DEFAULT NULL, -- SHOW SLAVE STATUS - exec_master_log_pos bigint unsigned DEFAULT NULL -- SHOW SLAVE STATUS - ); - - - The heartbeat table requires at least one row. If you manually create the - heartbeat table, then you must insert a row by doing: - - - .. code-block:: perl - - INSERT INTO heartbeat (ts, server_id) VALUES (NOW(), N); - - - where \ ``N``\ is the server's ID; do not use @@server_id because it will replicate - and slaves will insert their own server ID instead of the master's server ID. - - This is done automatically by "--create-table". - - A legacy version of the heartbeat table is still supported: - - - .. code-block:: perl - - CREATE TABLE heartbeat ( - id int NOT NULL PRIMARY KEY, - ts datetime NOT NULL - ); - - - Legacy tables do not support "--update" instances on each slave - of a multi-slave hierarchy like "master -> slave1 -> slave2". - To manually insert the one required row into a legacy table: - - - .. code-block:: perl - - INSERT INTO heartbeat (id, ts) VALUES (1, NOW()); - - - The tool automatically detects if the heartbeat table is legacy. - - See also "MULTI-SLAVE HIERARCHY". - - - ---daemonize - - Fork to the background and detach from the shell. POSIX operating systems only. - - - ---database - - short form: -D; type: string - - The database to use for the connection. - - - ---dbi-driver - - default: mysql; type: string - - Specify a driver for the connection; \ ``mysql``\ and \ ``Pg``\ are supported. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---file - - type: string - - Print latest "--monitor" output to this file. - - When "--monitor" is given, prints output to the specified file instead of to - STDOUT. The file is opened, truncated, and closed every interval, so it will - only contain the most recent statistics. Useful when "--daemonize" is given. - - - ---frames - - type: string; default: 1m,5m,15m - - Timeframes for averages. - - Specifies the timeframes over which to calculate moving averages when - "--monitor" is given. Specify as a comma-separated list of numbers with - suffixes. The suffix can be s for seconds, m for minutes, h for hours, or d for - days. The size of the largest frame determines the maximum memory usage, as up - to the specified number of per-second samples are kept in memory to calculate - the averages. You can specify as many timeframes as you like. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---[no]insert-heartbeat-row - - default: yes - - Insert a heartbeat row in the "--table" if one doesn't exist. - - The heartbeat "--table" requires a heartbeat row, else there's nothing - to "--update", "--monitor", or "--check"! By default, the tool will - insert a heartbeat row if one is not already present. You can disable this - feature by specifying \ ``--no-insert-heartbeat-row``\ in case the database user - does not have INSERT privileges. - - - ---interval - - type: float; default: 1.0 - - How often to update or check the heartbeat "--table". Updates and checks - begin on the first whole second then repeat every "--interval" seconds - for "--update" and every "--interval" plus "--skew" seconds for - "--monitor". - - For example, if at 00:00.4 an "--update" instance is started at 0.5 second - intervals, the first update happens at 00:01.0, the next at 00:01.5, etc. - If at 00:10.7 a "--monitor" instance is started at 0.05 second intervals - with the default 0.5 second "--skew", then the first check happens at - 00:11.5 (00:11.0 + 0.5) which will be "--skew" seconds after the last update - which, because the instances are checking at synchronized intervals, happened - at 00:11.0. - - The tool waits for and begins on the first whole second just to make the - interval calculations simpler. Therefore, the tool could wait up to 1 second - before updating or checking. - - The minimum (fastest) interval is 0.01, and the maximum precision is two - decimal places, so 0.015 will be rounded to 0.02. - - If a legacy heartbeat table (see "--create-table") is used, then the - maximum precision is 1s because the \ ``ts``\ column is type \ ``datetime``\ . - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---master-server-id - - type: string - - Calculate delay from this master server ID for "--monitor" or "--check". - If not given, pt-heartbeat attempts to connect to the server's master and - determine its server id. - - - ---monitor - - Monitor slave delay continuously. - - Specifies that pt-heartbeat should check the slave's delay every second and - report to STDOUT (or if "--file" is given, to the file instead). The output - is the current delay followed by moving averages over the timeframe given in - "--frames". For example, - - - .. code-block:: perl - - 5s [ 0.25s, 0.05s, 0.02s ] - - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process ID of - the daemonized instance. The PID file is removed when the daemonized instance - exits. The program checks for the existence of the PID file when starting; if - it exists and the process with the matching PID exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print-master-server-id - - Print the auto-detected or given "--master-server-id". If "--check" - or "--monitor" is specified, specifying this option will print the - auto-detected or given "--master-server-id" at the end of each line. - - - ---recurse - - type: int - - Check slaves recursively to this depth in "--check" mode. - - Try to discover slave servers recursively, to the specified depth. After - discovering servers, run the check on each one of them and print the hostname - (if possible), followed by the slave delay. - - This currently works only with MySQL. See "--recursion-method". - - - ---recursion-method - - type: string - - Preferred recursion method used to find slaves. - - Possible methods are: - - - .. code-block:: perl - - METHOD USES - =========== ================ - processlist SHOW PROCESSLIST - hosts SHOW SLAVE HOSTS - - - The processlist method is preferred because SHOW SLAVE HOSTS is not reliable. - However, the hosts method is required if the server uses a non-standard - port (not 3306). Usually pt-heartbeat does the right thing and finds - the slaves, but you may give a preferred method and it will be used first. - If it doesn't find any slaves, the other methods will be tried. - - - ---replace - - Use \ ``REPLACE``\ instead of \ ``UPDATE``\ for --update. - - When running in "--update" mode, use \ ``REPLACE``\ instead of \ ``UPDATE``\ to set - the heartbeat table's timestamp. The \ ``REPLACE``\ statement is a MySQL extension - to SQL. This option is useful when you don't know whether the table contains - any rows or not. It must be used in conjunction with --update. - - - ---run-time - - type: time - - Time to run before exiting. - - - ---sentinel - - type: string; default: /tmp/pt-heartbeat-sentinel - - Exit if this file exists. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---skew - - type: float; default: 0.5 - - How long to delay checks. - - The default is to delay checks one half second. Since the update happens as - soon as possible after the beginning of the second on the master, this allows - one half second of replication delay before reporting that the slave lags the - master by one second. If your clocks are not completely accurate or there is - some other reason you'd like to delay the slave more or less, you can tweak this - value. Try setting the \ ``MKDEBUG``\ environment variable to see the effect this - has. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---stop - - Stop running instances by creating the sentinel file. - - This should have the effect of stopping all running - instances which are watching the same sentinel file. If none of - "--update", "--monitor" or "--check" is specified, \ ``pt-heartbeat``\ - will exit after creating the file. If one of these is specified, - \ ``pt-heartbeat``\ will wait the interval given by "--interval", then remove - the file and continue working. - - You might find this handy to stop cron jobs gracefully if necessary, or to - replace one running instance with another. For example, if you want to stop - and restart \ ``pt-heartbeat``\ every hour (just to make sure that it is restarted - every hour, in case of a server crash or some other problem), you could use a - \ ``crontab``\ line like this: - - - .. code-block:: perl - - 0 * * * * pt-heartbeat --update -D test --stop \ - --sentinel /tmp/pt-heartbeat-hourly - - - The non-default "--sentinel" will make sure the hourly \ ``cron``\ job stops - only instances previously started with the same options (that is, from the - same \ ``cron``\ job). - - See also "--sentinel". - - - ---table - - type: string; default: heartbeat - - The table to use for the heartbeat. - - Don't specify database.table; use "--database" to specify the database. - - See "--create-table". - - - ---update - - Update a master's heartbeat. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-heartbeat ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-heartbeat `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Proven Scaling LLC, SixApart Ltd, Baron Schwartz, and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2006 Proven Scaling LLC and Six Apart Ltd, -2007-2011 Percona Inc. -Feedback and improvements are welcome. - -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-heartbeat 1.0.1 - diff --git a/docs/user/pt-index-usage.rst b/docs/user/pt-index-usage.rst deleted file mode 100644 index fc438df6..00000000 --- a/docs/user/pt-index-usage.rst +++ /dev/null @@ -1,840 +0,0 @@ - -############## -pt-index-usage -############## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-index-usage - Read queries from a log and analyze how they use indexes. - - -******** -SYNOPSIS -******** - - -Usage: pt-index-usage [OPTION...] [FILE...] - -pt-index-usage reads queries from logs and analyzes how they use indexes. - -Analyze queries in slow.log and print reports: - - -.. code-block:: perl - - pt-index-usage /path/to/slow.log --host localhost - - -Disable reports and save results to mk database for later analysis: - - -.. code-block:: perl - - pt-index-usage slow.log --no-report --save-results-database mk - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -This tool is read-only unless you use "--save-results-database". It reads a -log of queries and EXPLAIN them. It also gathers information about all tables -in all databases. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-index-usage `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -This tool connects to a MySQL database server, reads through a query log, and -uses EXPLAIN to ask MySQL how it will use each query. When it is finished, it -prints out a report on indexes that the queries didn't use. - -The query log needs to be in MySQL's slow query log format. If you need to -input a different format, you can use pt-query-digest to translate the -formats. If you don't specify a filename, the tool reads from STDIN. - -The tool runs two stages. In the first stage, the tool takes inventory of all -the tables and indexes in your database, so it can compare the existing indexes -to those that were actually used by the queries in the log. In the second -stage, it runs EXPLAIN on each query in the query log. It uses separate -database connections to inventory the tables and run EXPLAIN, so it opens two -connections to the database. - -If a query is not a SELECT, it tries to transform it to a roughly equivalent -SELECT query so it can be EXPLAINed. This is not a perfect process, but it is -good enough to be useful. - -The tool skips the EXPLAIN step for queries that are exact duplicates of those -seen before. It assumes that the same query will generate the same EXPLAIN plan -as it did previously (usually a safe assumption, and generally good for -performance), and simply increments the count of times that the indexes were -used. However, queries that have the same fingerprint but different checksums -will be re-EXPLAINed. Queries that have different literal constants can have -different execution plans, and this is important to measure. - -After EXPLAIN-ing the query, it is necessary to try to map aliases in the query -back to the original table names. For example, consider the EXPLAIN plan for -the following query: - - -.. code-block:: perl - - SELECT * FROM tbl1 AS foo; - - -The EXPLAIN output will show access to table \ ``foo``\ , and that must be translated -back to \ ``tbl1``\ . This process involves complex parsing. It is generally very -accurate, but there is some chance that it might not work right. If you find -cases where it fails, submit a bug report and a reproducible test case. - -Queries that cannot be EXPLAINed will cause all subsequent queries with the -same fingerprint to be blacklisted. This is to reduce the work they cause, and -prevent them from continuing to print error messages. However, at least in -this stage of the tool's development, it is my opinion that it's not a good -idea to preemptively silence these, or prevent them from being EXPLAINed at -all. I am looking for lots of feedback on how to improve things like the -query parsing. So please submit your test cases based on the errors the tool -prints! - - -****** -OUTPUT -****** - - -After it reads all the events in the log, the tool prints out DROP statements -for every index that was not used. It skips indexes for tables that were never -accessed by any queries in the log, to avoid false-positive results. - -If you don't specify "--quiet", the tool also outputs warnings about -statements that cannot be EXPLAINed and similar. These go to standard error. - -Progress reports are enabled by default (see "--progress"). These also go to -standard error. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---create-save-results-database - - Create the "--save-results-database" if it does not exist. - - If the "--save-results-database" already exists and this option is - specified, the database is used and the necessary tables are created if - they do not already exist. - - - ---[no]create-views - - Create views for "--save-results-database" example queries. - - Several example queries are given for querying the tables in the - "--save-results-database". These example queries are, by default, created - as views. Specifying \ ``--no-create-views``\ prevents these views from being - created. - - - ---database - - short form: -D; type: string - - The database to use for the connection. - - - ---databases - - short form: -d; type: hash - - Only get tables and indexes from this comma-separated list of databases. - - - ---databases-regex - - type: string - - Only get tables and indexes from database whose names match this Perl regex. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute pathname. - - - ---drop - - type: Hash; default: non-unique - - Suggest dropping only these types of unused indexes. - - By default pt-index-usage will only suggest to drop unused secondary indexes, - not primary or unique indexes. You can specify which types of unused indexes - the tool suggests to drop: primary, unique, non-unique, all. - - A separate \ ``ALTER TABLE``\ statement for each type is printed. So if you - specify \ ``--drop all``\ and there is a primary key and a non-unique index, - the \ ``ALTER TABLE ... DROP``\ for each will be printed on separate lines. - - - ---empty-save-results-tables - - Drop and re-create all pre-existing tables in the "--save-results-database". - This allows information from previous runs to be removed before the current run. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-databases - - type: Hash - - Ignore this comma-separated list of databases. - - - ---ignore-databases-regex - - type: string - - Ignore databases whose names match this Perl regex. - - - ---ignore-tables - - type: Hash - - Ignore this comma-separated list of table names. - - Table names may be qualified with the database name. - - - ---ignore-tables-regex - - type: string - - Ignore tables whose names match the Perl regex. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---progress - - type: array; default: time,30 - - Print progress reports to STDERR. The value is a comma-separated list with two - parts. The first part can be percentage, time, or iterations; the second part - specifies how often an update should be printed, in percentage, seconds, or - number of iterations. - - - ---quiet - - short form: -q - - Do not print any warnings. Also disables "--progress". - - - ---[no]report - - default: yes - - Print the reports for "--report-format". - - You may want to disable the reports by specifying \ ``--no-report``\ if, for - example, you also specify "--save-results-database" and you only want - to query the results tables later. - - - ---report-format - - type: Array; default: drop_unused_indexes - - Right now there is only one report: drop_unused_indexes. This report prints - SQL statements for dropping any unused indexes. See also "--drop". - - See also "--[no]report". - - - ---save-results-database - - type: DSN - - Save results to tables in this database. Information about indexes, queries, - tables and their usage is stored in several tables in the specified database. - The tables are auto-created if they do not exist. If the database doesn't - exist, it can be auto-created with "--create-save-results-database". In this - case the connection is initially created with no default database, then after - the database is created, it is USE'ed. - - pt-index-usage executes INSERT statements to save the results. Therefore, you - should be careful if you use this feature on a production server. It might - increase load, or cause trouble if you don't want the server to be written to, - or so on. - - This is a new feature. It may change in future releases. - - After a run, you can query the usage tables to answer various questions about - index usage. The tables have the following CREATE TABLE definitions: - - MAGIC_create_indexes: - - - .. code-block:: perl - - CREATE TABLE IF NOT EXISTS indexes ( - db VARCHAR(64) NOT NULL, - tbl VARCHAR(64) NOT NULL, - idx VARCHAR(64) NOT NULL, - cnt BIGINT UNSIGNED NOT NULL DEFAULT 0, - PRIMARY KEY (db, tbl, idx) - ) - - - MAGIC_create_queries: - - - .. code-block:: perl - - CREATE TABLE IF NOT EXISTS queries ( - query_id BIGINT UNSIGNED NOT NULL, - fingerprint TEXT NOT NULL, - sample TEXT NOT NULL, - PRIMARY KEY (query_id) - ) - - - MAGIC_create_tables: - - - .. code-block:: perl - - CREATE TABLE IF NOT EXISTS tables ( - db VARCHAR(64) NOT NULL, - tbl VARCHAR(64) NOT NULL, - cnt BIGINT UNSIGNED NOT NULL DEFAULT 0, - PRIMARY KEY (db, tbl) - ) - - - MAGIC_create_index_usage: - - - .. code-block:: perl - - CREATE TABLE IF NOT EXISTS index_usage ( - query_id BIGINT UNSIGNED NOT NULL, - db VARCHAR(64) NOT NULL, - tbl VARCHAR(64) NOT NULL, - idx VARCHAR(64) NOT NULL, - cnt BIGINT UNSIGNED NOT NULL DEFAULT 1, - UNIQUE INDEX (query_id, db, tbl, idx) - ) - - - MAGIC_create_index_alternatives: - - - .. code-block:: perl - - CREATE TABLE IF NOT EXISTS index_alternatives ( - query_id BIGINT UNSIGNED NOT NULL, -- This query used - db VARCHAR(64) NOT NULL, -- this index, but... - tbl VARCHAR(64) NOT NULL, -- - idx VARCHAR(64) NOT NULL, -- - alt_idx VARCHAR(64) NOT NULL, -- was an alternative - cnt BIGINT UNSIGNED NOT NULL DEFAULT 1, - UNIQUE INDEX (query_id, db, tbl, idx, alt_idx), - INDEX (db, tbl, idx), - INDEX (db, tbl, alt_idx) - ) - - - The following are some queries you can run against these tables to answer common - questions you might have. Each query is also created as a view (with MySQL - v5.0 and newer) if \ ``"--[no]create-views"``\ is true (it is by default). - The view names are the strings after the \ ``MAGIC_view_``\ prefix. - - Question: which queries sometimes use different indexes, and what fraction of - the time is each index chosen? MAGIC_view_query_uses_several_indexes: - - - .. code-block:: perl - - SELECT iu.query_id, CONCAT_WS('.', iu.db, iu.tbl, iu.idx) AS idx, - variations, iu.cnt, iu.cnt / total_cnt * 100 AS pct - FROM index_usage AS iu - INNER JOIN ( - SELECT query_id, db, tbl, SUM(cnt) AS total_cnt, - COUNT(*) AS variations - FROM index_usage - GROUP BY query_id, db, tbl - HAVING COUNT(*) > 1 - ) AS qv USING(query_id, db, tbl); - - - Question: which indexes have lots of alternatives, i.e. are chosen instead of - other indexes, and for what queries? MAGIC_view_index_has_alternates: - - - .. code-block:: perl - - SELECT CONCAT_WS('.', db, tbl, idx) AS idx_chosen, - GROUP_CONCAT(DISTINCT alt_idx) AS alternatives, - GROUP_CONCAT(DISTINCT query_id) AS queries, SUM(cnt) AS cnt - FROM index_alternatives - GROUP BY db, tbl, idx - HAVING COUNT(*) > 1; - - - Question: which indexes are considered as alternates for other indexes, and for - what queries? MAGIC_view_index_alternates: - - - .. code-block:: perl - - SELECT CONCAT_WS('.', db, tbl, alt_idx) AS idx_considered, - GROUP_CONCAT(DISTINCT idx) AS alternative_to, - GROUP_CONCAT(DISTINCT query_id) AS queries, SUM(cnt) AS cnt - FROM index_alternatives - GROUP BY db, tbl, alt_idx - HAVING COUNT(*) > 1; - - - Question: which of those are never chosen by any queries, and are therefore - superfluous? MAGIC_view_unused_index_alternates: - - - .. code-block:: perl - - SELECT CONCAT_WS('.', i.db, i.tbl, i.idx) AS idx, - alt.alternative_to, alt.queries, alt.cnt - FROM indexes AS i - INNER JOIN ( - SELECT db, tbl, alt_idx, GROUP_CONCAT(DISTINCT idx) AS alternative_to, - GROUP_CONCAT(DISTINCT query_id) AS queries, SUM(cnt) AS cnt - FROM index_alternatives - GROUP BY db, tbl, alt_idx - HAVING COUNT(*) > 1 - ) AS alt ON i.db = alt.db AND i.tbl = alt.tbl - AND i.idx = alt.alt_idx - WHERE i.cnt = 0; - - - Question: given a table, which indexes were used, by how many queries, with how - many distinct fingerprints? Were there alternatives? Which indexes were not - used? You can edit the following query's SELECT list to also see the query IDs - in question. MAGIC_view_index_usage: - - - .. code-block:: perl - - SELECT i.idx, iu.usage_cnt, iu.usage_total, - ia.alt_cnt, ia.alt_total - FROM indexes AS i - LEFT OUTER JOIN ( - SELECT db, tbl, idx, COUNT(*) AS usage_cnt, - SUM(cnt) AS usage_total, GROUP_CONCAT(query_id) AS used_by - FROM index_usage - GROUP BY db, tbl, idx - ) AS iu ON i.db=iu.db AND i.tbl=iu.tbl AND i.idx = iu.idx - LEFT OUTER JOIN ( - SELECT db, tbl, idx, COUNT(*) AS alt_cnt, - SUM(cnt) AS alt_total, - GROUP_CONCAT(query_id) AS alt_queries - FROM index_alternatives - GROUP BY db, tbl, idx - ) AS ia ON i.db=ia.db AND i.tbl=ia.tbl AND i.idx = ia.idx; - - - Question: which indexes on a given table are vital for at least one query (there - is no alternative)? MAGIC_view_required_indexes: - - - .. code-block:: perl - - SELECT i.db, i.tbl, i.idx, no_alt.queries - FROM indexes AS i - INNER JOIN ( - SELECT iu.db, iu.tbl, iu.idx, - GROUP_CONCAT(iu.query_id) AS queries - FROM index_usage AS iu - LEFT OUTER JOIN index_alternatives AS ia - USING(db, tbl, idx) - WHERE ia.db IS NULL - GROUP BY iu.db, iu.tbl, iu.idx - ) AS no_alt ON no_alt.db = i.db AND no_alt.tbl = i.tbl - AND no_alt.idx = i.idx - ORDER BY i.db, i.tbl, i.idx, no_alt.queries; - - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---tables - - short form: -t; type: hash - - Only get indexes from this comma-separated list of tables. - - - ---tables-regex - - type: string - - Only get indexes from tables whose names match this Perl regex. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Database to connect to. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-index-usage ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-index-usage `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-index-usage 1.0.1 - diff --git a/docs/user/pt-kill.rst b/docs/user/pt-kill.rst deleted file mode 100644 index 463f9837..00000000 --- a/docs/user/pt-kill.rst +++ /dev/null @@ -1,1053 +0,0 @@ - -####### -pt-kill -####### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-kill - Kill MySQL queries that match certain criteria. - - -******** -SYNOPSIS -******** - - -Usage: pt-kill [OPTION]... [FILE...] - -pt-kill kills MySQL connections. pt-kill connects to MySQL and gets queries -from SHOW PROCESSLIST if no FILE is given. Else, it reads queries from one -or more FILE which contains the output of SHOW PROCESSLIST. If FILE is -, -pt-kill reads from STDIN. - -Kill queries running longer than 60s: - - -.. code-block:: perl - - pt-kill --busy-time 60 --kill - - -Print, do not kill, queries running longer than 60s: - - -.. code-block:: perl - - pt-kill --busy-time 60 --print - - -Check for sleeping processes and kill them all every 10s: - - -.. code-block:: perl - - pt-kill --match-command Sleep --kill --victims all --interval 10 - - -Print all login processes: - - -.. code-block:: perl - - pt-kill --match-state login --print --victims all - - -See which queries in the processlist right now would match: - - -.. code-block:: perl - - mysql -e "SHOW PROCESSLIST" | pt-kill --busy-time 60 --print - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-kill is designed to kill queries if you use the "--kill" option is given, -and that might disrupt your database's users, of course. You should test with -the <"--print"> option, which is safe, if you're unsure what the tool will do. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-kill `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-kill captures queries from SHOW PROCESSLIST, filters them, and then either -kills or prints them. This is also known as a "slow query sniper" in some -circles. The idea is to watch for queries that might be consuming too many -resources, and kill them. - -For brevity, we talk about killing queries, but they may just be printed -(or some other future action) depending on what options are given. - -Normally pt-kill connects to MySQL to get queries from SHOW PROCESSLIST. -Alternatively, it can read SHOW PROCESSLIST output from files. In this case, -pt-kill does not connect to MySQL and "--kill" has no effect. You should -use "--print" instead when reading files. The ability to read a file (or -- for STDIN) allows you to capture SHOW PROCESSLIST and test it later with -pt-kill to make sure that your matches kill the proper queries. There are a -lot of special rules to follow, such as "don't kill replication threads," -so be careful to not kill something important! - -Two important options to know are "--busy-time" and "--victims". -First, whereas most match/filter options match their corresponding value from -SHOW PROCESSLIST (e.g. "--match-command" matches a query's Command value), -the Time value is matched by "--busy-time". See also "--interval". - -Second, "--victims" controls which matching queries from each class are -killed. By default, the matching query with the highest Time value is killed -(the oldest query). See the next section, "GROUP, MATCH AND KILL", -for more details. - -Usually you need to specify at least one \ ``--match``\ option, else no -queries will match. Or, you can specify "--match-all" to match all queries -that aren't ignored by an \ ``--ignore``\ option. - -pt-kill is a work in progress, and there is much more it could do. - - -********************* -GROUP, MATCH AND KILL -********************* - - -Queries pass through several steps to determine which exactly will be killed -(or printed--whatever action is specified). Understanding these steps will -help you match precisely the queries you want. - -The first step is grouping queries into classes. The "--group-by" option -controls grouping. By default, this option has no value so all queries are -grouped into one, big default class. All types of matching and filtering -(the next step) are applied per-class. Therefore, you may need to group -queries in order to match/filter some classes but not others. - -The second step is matching. Matching implies filtering since if a query -doesn't match some criteria, it is removed from its class. -Matching happens for each class. First, queries are filtered from their -class by the various \ ``Query Matches``\ options like "--match-user". -Then, entire classes are filtered by the various \ ``Class Matches``\ options -like "--query-count". - -The third step is victim selection, that is, which matching queries in each -class to kill. This is controlled by the "--victims" option. Although -many queries in a class may match, you may only want to kill the oldest -query, or all queries, etc. - -The forth and final step is to take some action on all matching queries -from all classes. The \ ``Actions``\ options specify which actions will be -taken. At this step, there are no more classes, just a single list of -queries to kill, print, etc. - - -****** -OUTPUT -****** - - -If only "--kill" then there is no output. If only "--print" then a -timestamped KILL statement if printed for every query that would have -been killed, like: - - -.. code-block:: perl - - # 2009-07-15T15:04:01 KILL 8 (Query 42 sec) SELECT * FROM huge_table - - -The line shows a timestamp, the query's Id (8), its Time (42 sec) and its -Info (usually the query SQL). - -If both "--kill" and "--print" are given, then matching queries are -killed and a line for each like the one above is printed. - -Any command executed by "--execute-command" is responsible for its own -output and logging. After being executed, pt-kill has no control or interaction -with the command. - - -******* -OPTIONS -******* - - -Specify at least one of "--kill", "--kill-query", "--print", "--execute-command" or "--stop". - -"--any-busy-time" and "--each-busy-time" are mutually exclusive. - -"--kill" and "--kill-query" are mutually exclusive. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX operating systems - only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---group-by - - type: string - - Apply matches to each class of queries grouped by this SHOW PROCESSLIST column. - In addition to the basic columns of SHOW PROCESSLIST (user, host, command, - state, etc.), queries can be matched by \ ``fingerprint``\ which abstracts the - SQL query in the \ ``Info``\ column. - - By default, queries are not grouped, so matches and actions apply to all - queries. Grouping allows matches and actions to apply to classes of - similar queries, if any queries in the class match. - - For example, detecting cache stampedes (see \ ``all-but-oldest``\ under - "--victims" for an explanation of that term) requires that queries are - grouped by the \ ``arg``\ attribute. This creates classes of identical queries - (stripped of comments). So queries \ ``"SELECT c FROM t WHERE id=1"``\ and - \ ``"SELECT c FROM t WHERE id=1"``\ are grouped into the same class, but - query c<"SELECT c FROM t WHERE id=3"> is not identical to the first two - queries so it is grouped into another class. Then when "--victims" - \ ``all-but-oldest``\ is specified, all but the oldest query in each class is - killed for each class of queries that matches the match criteria. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string; default: localhost - - Connect to host. - - - ---interval - - type: time - - How often to check for queries to kill. If "--busy-time" is not given, - then the default interval is 30 seconds. Else the default is half as often - as "--busy-time". If both "--interval" and "--busy-time" are given, - then the explicit "--interval" value is used. - - See also "--run-time". - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process ID of - the daemonized instance. The PID file is removed when the daemonized instance - exits. The program checks for the existence of the PID file when starting; if - it exists and the process with the matching PID exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---run-time - - type: time - - How long to run before exiting. By default pt-kill runs forever, or until - its process is killed or stopped by the creation of a "--sentinel" file. - If this option is specified, pt-kill runs for the specified amount of time - and sleeps "--interval" seconds between each check of the PROCESSLIST. - - - ---sentinel - - type: string; default: /tmp/pt-kill-sentinel - - Exit if this file exists. - - The presence of the file specified by "--sentinel" will cause all - running instances of pt-kill to exit. You might find this handy to stop cron - jobs gracefully if necessary. See also "--stop". - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---stop - - Stop running instances by creating the "--sentinel" file. - - Causes pt-kill to create the sentinel file specified by "--sentinel" and - exit. This should have the effect of stopping all running instances which are - watching the same sentinel file. - - - ---[no]strip-comments - - default: yes - - Remove SQL comments from queries in the Info column of the PROCESSLIST. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - ---victims - - type: string; default: oldest - - Which of the matching queries in each class will be killed. After classes - have been matched/filtered, this option specifies which of the matching - queries in each class will be killed (or printed, etc.). The following - values are possible: - - - oldest - - Only kill the single oldest query. This is to prevent killing queries that - aren't really long-running, they're just long-waiting. This sorts matching - queries by Time and kills the one with the highest Time value. - - - - all - - Kill all queries in the class. - - - - all-but-oldest - - Kill all but the oldest query. This is the inverse of the \ ``oldest``\ value. - - This value can be used to prevent "cache stampedes", the condition where - several identical queries are executed and create a backlog while the first - query attempts to finish. Since all queries are identical, all but the first - query are killed so that it can complete and populate the cache. - - - - - ---wait-after-kill - - type: time - - Wait after killing a query, before looking for more to kill. The purpose of - this is to give blocked queries a chance to execute, so we don't kill a query - that's blocking a bunch of others, and then kill the others immediately - afterwards. - - - ---wait-before-kill - - type: time - - Wait before killing a query. The purpose of this is to give - "--execute-command" a chance to see the matching query and gather other - MySQL or system information before it's killed. - - - -QUERY MATCHES -============= - - -These options filter queries from their classes. If a query does not -match, it is removed from its class. The \ ``--ignore``\ options take precedence. -The matches for command, db, host, etc. correspond to the columns returned -by SHOW PROCESSLIST: Command, db, Host, etc. All pattern matches are -case-sensitive by default, but they can be made case-insensitive by specifying -a regex pattern like \ ``(?i-xsm:select)``\ . - -See also "GROUP, MATCH AND KILL". - - ---busy-time - - type: time; group: Query Matches - - Match queries that have been running for longer than this time. The queries - must be in Command=Query status. This matches a query's Time value as - reported by SHOW PROCESSLIST. - - - ---idle-time - - type: time; group: Query Matches - - Match queries that have been idle/sleeping for longer than this time. - The queries must be in Command=Sleep status. This matches a query's Time - value as reported by SHOW PROCESSLIST. - - - ---ignore-command - - type: string; group: Query Matches - - Ignore queries whose Command matches this Perl regex. - - See "--match-command". - - - ---ignore-db - - type: string; group: Query Matches - - Ignore queries whose db (database) matches this Perl regex. - - See "--match-db". - - - ---ignore-host - - type: string; group: Query Matches - - Ignore queries whose Host matches this Perl regex. - - See "--match-host". - - - ---ignore-info - - type: string; group: Query Matches - - Ignore queries whose Info (query) matches this Perl regex. - - See "--match-info". - - - ---[no]ignore-self - - default: yes; group: Query Matches - - Don't kill pt-kill's own connection. - - - ---ignore-state - - type: string; group: Query Matches; default: Locked - - Ignore queries whose State matches this Perl regex. The default is to keep - threads from being killed if they are locked waiting for another thread. - - See "--match-state". - - - ---ignore-user - - type: string; group: Query Matches - - Ignore queries whose user matches this Perl regex. - - See "--match-user". - - - ---match-all - - group: Query Matches - - Match all queries that are not ignored. If no ignore options are specified, - then every query matches (except replication threads, unless - "--replication-threads" is also specified). This option allows you to - specify negative matches, i.e. "match every query \ *except*\ ..." where the - exceptions are defined by specifying various \ ``--ignore``\ options. - - This option is \ *not*\ the same as "--victims" \ ``all``\ . This option matches - all queries within a class, whereas "--victims" \ ``all``\ specifies that all - matching queries in a class (however they matched) will be killed. Normally, - however, the two are used together because if, for example, you specify - "--victims" \ ``oldest``\ , then although all queries may match, only the oldest - will be killed. - - - ---match-command - - type: string; group: Query Matches - - Match only queries whose Command matches this Perl regex. - - Common Command values are: - - - .. code-block:: perl - - Query - Sleep - Binlog Dump - Connect - Delayed insert - Execute - Fetch - Init DB - Kill - Prepare - Processlist - Quit - Reset stmt - Table Dump - - - See `http://dev.mysql.com/doc/refman/5.1/en/thread-commands.html `_ for a full - list and description of Command values. - - - ---match-db - - type: string; group: Query Matches - - Match only queries whose db (database) matches this Perl regex. - - - ---match-host - - type: string; group: Query Matches - - Match only queries whose Host matches this Perl regex. - - The Host value often time includes the port like "host:port". - - - ---match-info - - type: string; group: Query Matches - - Match only queries whose Info (query) matches this Perl regex. - - The Info column of the processlist shows the query that is being executed - or NULL if no query is being executed. - - - ---match-state - - type: string; group: Query Matches - - Match only queries whose State matches this Perl regex. - - Common State values are: - - - .. code-block:: perl - - Locked - login - copy to tmp table - Copying to tmp table - Copying to tmp table on disk - Creating tmp table - executing - Reading from net - Sending data - Sorting for order - Sorting result - Table lock - Updating - - - See `http://dev.mysql.com/doc/refman/5.1/en/general-thread-states.html `_ for - a full list and description of State values. - - - ---match-user - - type: string; group: Query Matches - - Match only queries whose User matches this Perl regex. - - - ---replication-threads - - group: Query Matches - - Allow matching and killing replication threads. - - By default, matches do not apply to replication threads; i.e. replication - threads are completely ignored. Specifying this option allows matches to - match (and potentially kill) replication threads on masters and slaves. - - - - -CLASS MATCHES -============= - - -These matches apply to entire query classes. Classes are created by specifying -the "--group-by" option, else all queries are members of a single, default -class. - -See also "GROUP, MATCH AND KILL". - - ---any-busy-time - - type: time; group: Class Matches - - Match query class if any query has been running for longer than this time. - "Longer than" means that if you specify \ ``10``\ , for example, the class will - only match if there's at least one query that has been running for greater - than 10 seconds. - - See "--each-busy-time" for more details. - - - ---each-busy-time - - type: time; group: Class Matches - - Match query class if each query has been running for longer than this time. - "Longer than" means that if you specify \ ``10``\ , for example, the class will - only match if each and every query has been running for greater than 10 - seconds. - - See also "--any-busy-time" (to match a class if ANY query has been running - longer than the specified time) and "--busy-time". - - - ---query-count - - type: int; group: Class Matches - - Match query class if it has at least this many queries. When queries are - grouped into classes by specifying "--group-by", this option causes matches - to apply only to classes with at least this many queries. If "--group-by" - is not specified then this option causes matches to apply only if there - are at least this many queries in the entire SHOW PROCESSLIST. - - - ---verbose - - short form: -v - - Print information to STDOUT about what is being done. - - - - -ACTIONS -======= - - -These actions are taken for every matching query from all classes. -The actions are taken in this order: "--print", "--execute-command", -"--kill"/"--kill-query". This order allows "--execute-command" -to see the output of "--print" and the query before -"--kill"/"--kill-query". This may be helpful because pt-kill does -not pass any information to "--execute-command". - -See also "GROUP, MATCH AND KILL". - - ---execute-command - - type: string; group: Actions - - Execute this command when a query matches. - - After the command is executed, pt-kill has no control over it, so the command - is responsible for its own info gathering, logging, interval, etc. The - command is executed each time a query matches, so be careful that the command - behaves well when multiple instances are ran. No information from pt-kill is - passed to the command. - - See also "--wait-before-kill". - - - ---kill - - group: Actions - - Kill the connection for matching queries. - - This option makes pt-kill kill the connections (a.k.a. processes, threads) that - have matching queries. Use "--kill-query" if you only want to kill - individual queries and not their connections. - - Unless "--print" is also given, no other information is printed that shows - that pt-kill matched and killed a query. - - See also "--wait-before-kill" and "--wait-after-kill". - - - ---kill-query - - group: Actions - - Kill matching queries. - - This option makes pt-kill kill matching queries. This requires MySQL 5.0 or - newer. Unlike "--kill" which kills the connection for matching queries, - this option only kills the query, not its connection. - - - ---print - - group: Actions - - Print a KILL statement for matching queries; does not actually kill queries. - - If you just want to see which queries match and would be killed without - actually killing them, specify "--print". To both kill and print - matching queries, specify both "--kill" and "--print". - - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-kill ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-kill `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2009-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-kill 1.0.1 - diff --git a/docs/user/pt-log-player.rst b/docs/user/pt-log-player.rst deleted file mode 100644 index 60b4a666..00000000 --- a/docs/user/pt-log-player.rst +++ /dev/null @@ -1,795 +0,0 @@ - -############# -pt-log-player -############# - -.. highlight:: perl - - -**** -NAME -**** - - -pt-log-player - Replay MySQL query logs. - - -******** -SYNOPSIS -******** - - -Usage: pt-log-player [OPTION...] [DSN] - -pt-log-player splits and plays slow log files. - -Split slow.log on Thread_id into 16 session files, save in ./sessions: - - -.. code-block:: perl - - pt-log-player --split Thread_id --session-files 16 --base-dir ./sessions slow.log - - -Play all those sessions on host1, save results in ./results: - - -.. code-block:: perl - - pt-log-player --play ./sessions --base-dir ./results h=host1 - - -Use pt-query-digest to summarize the results: - - -.. code-block:: perl - - pt-query-digest ./results/* - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -This tool is meant to load a server as much as possible, for stress-testing -purposes. It is not designed to be used on production servers. - -At the time of this release there is a bug which causes pt-log-player to -exceed max open files during "--split". - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-log-player `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-log-player does two things: it splits MySQL query logs into session files -and it plays (executes) queries in session files on a MySQL server. Only -session files can be played; slow logs cannot be played directly without -being split. - -A session is a group of queries from the slow log that all share a common -attribute, usually Thread_id. The common attribute is specified with -"--split". Multiple sessions are saved into a single session file. -See "--session-files", "--max-sessions", "--base-file-name" and -"--base-dir". These session files are played with "--play". - -pt-log-player will "--play" session files in parallel using N number of -"--threads". (They're not technically threads, but we call them that -anyway.) Each thread will play all the sessions in its given session files. -The sessions are played as fast as possible--there are no delays--because the -goal is to stress-test and load-test the server. So be careful using this -script on a production server! - -Each "--play" thread writes its results to a separate file. These result -files are in slow log format so they can be aggregated and summarized with -pt-query-digest. See "OUTPUT". - - -****** -OUTPUT -****** - - -Both "--split" and "--play" have two outputs: status messages printed to -STDOUT to let you know what the script is doing, and session or result files -written to separate files saved in "--base-dir". You can suppress all -output to STDOUT for each with "--quiet", or increase output with -"--verbose". - -The session files written by "--split" are simple text files containing -queries grouped into sessions. For example: - - -.. code-block:: perl - - -- START SESSION 10 - - use foo - - SELECT col FROM foo_tbl - - -The format of these session files is important: each query must be a single -line separated by a single blank line. And the "-- START SESSION" comment -tells pt-log-player where individual sessions begin and end so that "--play" -can correctly fake Thread_id in its result files. - -The result files written by "--play" are in slow log format with a minimal -header: the only attributes printed are Thread_id, Query_time and Schema. - - -******* -OPTIONS -******* - - -Specify at least one of "--play", "--split" or "--split-random". - -"--play" and "--split" are mutually exclusive. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - group: Play - - Prompt for a password when connecting to MySQL. - - - ---base-dir - - type: string; default: ./ - - Base directory for "--split" session files and "--play" result file. - - - ---base-file-name - - type: string; default: session - - Base file name for "--split" session files and "--play" result file. - - Each "--split" session file will be saved as -N.txt, where - N is a four digit, zero-padded session ID. For example: session-0003.txt. - - Each "--play" result file will be saved as -results-PID.txt, - where PID is the process ID of the executing thread. - - All files are saved in "--base-dir". - - - ---charset - - short form: -A; type: string; group: Play - - Default character set. If the value is utf8, sets Perl's binmode on STDOUT to - utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET NAMES UTF8 - after connecting to MySQL. Any other value sets binmode on STDOUT without the - utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. - - - ---dry-run - - Print which processes play which session files then exit. - - - ---filter - - type: string; group: Split - - Discard "--split" events for which this Perl code doesn't return true. - - This option only works with "--split". - - This option allows you to inject Perl code into the tool to affect how the - tool runs. Usually your code should examine \ ``$event``\ to decided whether - or not to allow the event. \ ``$event``\ is a hashref of attributes and values of - the event being filtered. Or, your code could add new attribute-value pairs - to \ ``$event``\ for use by other options that accept event attributes as their - value. You can find an explanation of the structure of \ ``$event``\ at - `http://code.google.com/p/maatkit/wiki/EventAttributes `_. - - There are two ways to supply your code: on the command line or in a file. - If you supply your code on the command line, it is injected into the following - subroutine where \ ``$filter``\ is your code: - - - .. code-block:: perl - - sub { - MKDEBUG && _d('callback: filter'); - my( $event ) = shift; - ( $filter ) && return $event; - } - - - Therefore you must ensure two things: first, that you correctly escape any - special characters that need to be escaped on the command line for your - shell, and two, that your code is syntactically valid when injected into - the subroutine above. - - Here's an example filter supplied on the command line that discards - events that are not SELECT statements: - - - .. code-block:: perl - - --filter '$event->{arg} =~ m/^select/i' - - - The second way to supply your code is in a file. If your code is too complex - to be expressed on the command line that results in valid syntax in the - subroutine above, then you need to put the code in a file and give the file - name as the value to "--filter". The file should not contain a shebang - (\ ``#!/usr/bin/perl``\ ) line. The entire contents of the file is injected into - the following subroutine: - - - .. code-block:: perl - - sub { - MKDEBUG && _d('callback: filter'); - my( $event ) = shift; - $filter && return $event; - } - - - That subroutine is almost identical to the one above except your code is - not wrapped in parentheses. This allows you to write multi-line code like: - - - .. code-block:: perl - - my $event_ok; - if (...) { - $event_ok = 1; - } - else { - $event_ok = 0; - } - $event_ok - - - Notice that the last line is not syntactically valid by itself, but it - becomes syntactically valid when injected into the subroutine because it - becomes: - - - .. code-block:: perl - - $event_ok && return $event; - - - If your code doesn't compile, the tool will die with an error. Even if your - code compiles, it may crash to tool during runtime if, for example, it tries - a pattern match an undefined value. No safeguards of any kind of provided so - code carefully! - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string; group: Play - - Connect to host. - - - ---iterations - - type: int; default: 1; group: Play - - How many times each thread should play all its session files. - - - ---max-sessions - - type: int; default: 5000000; group: Split - - Maximum number of sessions to "--split". - - By default, \ ``pt-log-player``\ tries to split every session from the log file. - For huge logs, however, this can result in millions of sessions. This - option causes only the first N number of sessions to be saved. All sessions - after this number are ignored, but sessions split before this number will - continue to have their queries split even if those queries appear near the end - of the log and after this number has been reached. - - - ---only-select - - group: Play - - Play only SELECT and USE queries; ignore all others. - - - ---password - - short form: -p; type: string; group: Play - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---play - - type: string; group: Play - - Play (execute) session files created by "--split". - - The argument to play must be a comma-separated list of session files - created by "--split" or a directory. If the argument is a directory, - ALL files in that directory will be played. - - - ---port - - short form: -P; type: int; group: Play - - Port number to use for connection. - - - ---print - - group: Play - - Print queries instead of playing them; requires "--play". - - You must also specify "--play" with "--print". Although the queries - will not be executed, "--play" is required to specify which session files to - read. - - - ---quiet - - short form: -q - - Do not print anything; disables "--verbose". - - - ---[no]results - - default: yes - - Print "--play" results to files in "--base-dir". - - - ---session-files - - type: int; default: 8; group: Split - - Number of session files to create with "--split". - - The number of session files should either be equal to the number of - "--threads" you intend to "--play" or be an even multiple of - "--threads". This number is important for maximum performance because it: - - - .. code-block:: perl - - * allows each thread to have roughly the same amount of sessions to play - * avoids having to open/close many session files - * avoids disk IO overhead by doing large sequential reads - - - You may want to increase this number beyond "--threads" if each session - file becomes too large. For example, splitting a 20G log into 8 sessions - files may yield roughly eight 2G session files. - - See also "--max-sessions". - - - ---set-vars - - type: string; group: Play; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string; group: Play - - Socket file to use for connection. - - - ---split - - type: string; group: Split - - Split log by given attribute to create session files. - - Valid attributes are any which appear in the log: Thread_id, Schema, - etc. - - - ---split-random - - group: Split - - Split log without an attribute, write queries round-robin to session files. - - This option, if specified, overrides "--split" and causes the log to be - split query-by-query, writing each query to the next session file in round-robin - style. If you don't care about "sessions" and just want to split a lot into - N many session files and the relation or order of the queries does not matter, - then use this option. - - - ---threads - - type: int; default: 2; group: Play - - Number of threads used to play sessions concurrently. - - Specifies the number of parallel processes to run. The default is 2. On - GNU/Linux machines, the default is the number of times 'processor' appears in - \ */proc/cpuinfo*\ . On Windows, the default is read from the environment. - In any case, the default is at least 2, even when there's only a single - processor. - - See also "--session-files". - - - ---type - - type: string; group: Split - - The type of log to "--split" (default slowlog). The permitted types are - - - binlog - - Split the output of running \ ``mysqlbinlog``\ against a binary log file. - Currently, splitting binary logs does not always work well depending - on what the binary logs contain. Be sure to check the session files - after splitting to ensure proper "OUTPUT". - - If the binary log contains row-based replication data, you need to run - \ ``mysqlbinlog``\ with options \ ``--base64-output=decode-rows --verbose``\ , - else invalid statements will be written to the session files. - - - - genlog - - Split a general log file. - - - - slowlog - - Split a log file in any variation of MySQL slow-log format. - - - - - ---user - - short form: -u; type: string; group: Play - - User for login if not current user. - - - ---verbose - - short form: -v; cumulative: yes; default: 0 - - Increase verbosity; can be specified multiple times. - - This option is disabled by "--quiet". - - - ---version - - Show version and exit. - - - ---[no]warnings - - default: no; group: Play - - Print warnings about SQL errors such as invalid queries to STDERR. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-log-player ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-log-player `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2008-2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-log-player 1.0.1 - diff --git a/docs/user/pt-mext.rst b/docs/user/pt-mext.rst deleted file mode 100644 index bfc69bf9..00000000 --- a/docs/user/pt-mext.rst +++ /dev/null @@ -1,224 +0,0 @@ - -####### -pt-mext -####### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-mext - Look at many samples of MySQL \ ``SHOW GLOBAL STATUS``\ side-by-side. - - -******** -SYNOPSIS -******** - - -Usage: pt-mext [OPTIONS] -- COMMAND - -pt-mext columnizes repeated output from a program like mysqladmin extended. - -Get output from \ ``mysqladmin``\ : - - -.. code-block:: perl - - pt-mext -r -- mysqladmin ext -i10 -c3" - - -Get output from a file: - - -.. code-block:: perl - - pt-mext -r -- cat mysqladmin-output.txt - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-mext is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-mext `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-mext executes the \ ``COMMAND``\ you specify, and reads through the result one -line at a time. It places each line into a temporary file. When it finds a -blank line, it assumes that a new sample of SHOW GLOBAL STATUS is starting, -and it creates a new temporary file. At the end of this process, it has a -number of temporary files. It joins the temporary files together side-by-side -and prints the result. If the "-r" option is given, it first subtracts -each sample from the one after it before printing results. - - -******* -OPTIONS -******* - - - --r - - Relative: subtract each column from the previous column. - - - - -*********** -ENVIRONMENT -*********** - - -This tool does not use any environment variables. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires the Bourne shell (\ */bin/sh*\ ) and the seq program. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-mext `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-mext 1.0.1 - diff --git a/docs/user/pt-mysql-summary.rst b/docs/user/pt-mysql-summary.rst deleted file mode 100644 index 4c0e8696..00000000 --- a/docs/user/pt-mysql-summary.rst +++ /dev/null @@ -1,233 +0,0 @@ - -################ -pt-mysql-summary -################ - -.. highlight:: perl - - -**** -NAME -**** - - -pt-mysql-summary - Summarize MySQL information in a nice way. - - -******** -SYNOPSIS -******** - - -Usage: pt-mysql-summary [MYSQL-OPTIONS] - -pt-mysql-summary conveniently summarizes the status and configuration of a -MySQL database server so that you can learn about it at a glance. It is not -a tuning tool or diagnosis tool. It produces a report that is easy to diff -and can be pasted into emails without losing the formatting. It should work -well on any modern UNIX systems. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-mysql-summary is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-mysql-summary `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-mysql-summary works by connecting to a MySQL database server and querying -it for status and configuration information. It saves these bits of data -into files in /tmp, and then formats them neatly with awk and other scripting -languages. - -To use, simply execute it. Optionally add the same command-line options -you would use to connect to MySQL, like \ ``pt-mysql-summary --user=foo``\ . - -The tool interacts minimally with the server upon which it runs. It assumes -that you'll run it on the same server you're inspecting, and therefore it -assumes that it will be able to find the my.cnf configuration file, for -example. However, it should degrade gracefully if this is not the case. -Note, however, that its output does not indicate which information comes from -the MySQL database and which comes from the host operating system, so it is -possible for confusing output to be generated if you run the tool on one -server and direct it to connect to a MySQL database server running on another -server. - - -************** -Fuzzy-Rounding -************** - - -Many of the outputs from this tool are deliberately rounded to show their -magnitude but not the exact detail. This is called fuzzy-rounding. The idea -is that it doesn't matter whether a server is running 918 queries per second -or 921 queries per second; such a small variation is insignificant, and only -makes the output hard to compare to other servers. Fuzzy-rounding rounds in -larger increments as the input grows. It begins by rounding to the nearest 5, -then the nearest 10, nearest 25, and then repeats by a factor of 10 larger -(50, 100, 250), and so on, as the input grows. - - -******* -OPTIONS -******* - - -This tool does not have any command-line options of its own. All options -are passed to \ ``mysql``\ . - - -*********** -ENVIRONMENT -*********** - - -This tool does not use any environment variables. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires Bash v3 or newer. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-mysql-summary `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-mysql-summary 1.0.1 - diff --git a/docs/user/pt-online-schema-change.rst b/docs/user/pt-online-schema-change.rst deleted file mode 100644 index 8d5bc653..00000000 --- a/docs/user/pt-online-schema-change.rst +++ /dev/null @@ -1,807 +0,0 @@ - -####################### -pt-online-schema-change -####################### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-online-schema-change - Perform online, non-blocking table schema changes. - - -******** -SYNOPSIS -******** - - -Usage: pt-online-schema-change [OPTION...] DSN - -pt-online-schema-change performs online, non-blocking schema changes to a table. -The table to change must be specified in the DSN \ ``t``\ part, like \ ``t=my_table``\ . -The table can be database-qualified, or the database can be specified with the -"--database" option. - -Change the table's engine to InnoDB: - - -.. code-block:: perl - - pt-online-schema-change \ - h=127.1,t=db.tbl \ - --alter "ENGINE=InnoDB" \ - --drop-tmp-table - - -Rebuild but do not alter the table, and keep the temporary table: - - -.. code-block:: perl - - pt-online-schema-change h=127.1,t=tbl --database db - - -Add column to parent table, update child table foreign key constraints: - - -.. code-block:: perl - - pt-online-schema-change \ - h=127.1,D=db,t=parent \ - --alter "ADD COLUMN (foo INT)" \ - --child-tables child1,child2 \ - --update-foreign-keys-method drop_tmp_table - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-online-schema-change reads, writes, alters and drops tables. Although -it is tested, do not use it in production until you have thoroughly tested it -in your environment! - -This tool has not been tested with replication; it may break replication. -See "REPLICATION". - -At the time of this release there are no known bugs that pose a serious risk. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-online-schema-change `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-online-schema-change performs online, non-blocking schema changes to tables. -Only one table can be altered at a time because triggers are used to capture -and synchronize changes between the table and the temporary table that -will take its place once it has been altered. Since triggers are used, this -tool only works with MySQL 5.0.2 and newer. - -The table to alter is specified by the DSN \ ``t``\ part on the command line, -as shown in the "SYNOPSIS" examples. A database must also be specified -either by the DSN \ ``D``\ part or by the "--database" option. - -If you're using replication, read "REPLICATION" or else you may break -replication. Performing an online schema change in a replication environment -requires extra planning and care. - -In brief, this tool works by creating a temporary table which is a copy of -the original table (the one being altered). (The temporary table is not -created like \ ``CREATE TEMPORARY TABLE``\ ; we call it temporary because it -ultimately replaces the original table.) The temporary table is altered, -then triggers are defined on the original table to capture changes made on -it and apply them to the temporary table. This keeps the two tables in -sync. Then all rows are copied from the original table to the temporary -table; this part can take awhile. When done copying rows, the two tables -are swapped by using \ ``RENAME TABLE``\ . At this point there are two copies -of the table: the old table which used to be the original table, and the -new table which used to be the temporary table but now has the same name -as the original table. If "--drop-old-table" is specified, then the -old table is dropped. - -For example, if you alter table \ ``foo``\ , the tool will create table -\ ``__tmp_foo``\ , alter it, define triggers on \ ``foo``\ , and then copy rows -from \ ``foo``\ to \ ``__tmp_foo``\ . Once all rows are copied, \ ``foo``\ is renamed -to \ ``__old_foo``\ and \ ``__tmp_foo``\ is renamed to \ ``foo``\ . -If "--drop-old-table" is specified, then \ ``__old_foo``\ is dropped. - -The tool preforms the following steps: - - -.. code-block:: perl - - 1. Sanity checks - 2. Chunking - 3. Online schema change - - -The first two steps cannot be skipped. The sanity checks help ensure that -running the tool will work and not encounter problems half way through the -whole process. Chunk is required during the third step when rows from the -old table are copied to the new table. Currently, only table with a -single-column unique index can be chunked. If there is any problem in these -two steps, the tool will die. - -Most of the tool's work is done in the third step which has 6 phases: - - -.. code-block:: perl - - 1. Create and alter temporary table - 2. Capture changes from the table to the temporary table - 3. Copy rows from the table to the temporary table - 4. Synchronize the table and the temporary table - 5. Swap/rename the table and the temporary table - 6. Cleanup - - -There are several ways to accomplish an online schema change which differ -in how changes are captured and synced (phases 2 and 4), how rows are copied -(phase 3), and how the tables are swapped (phase 5). Currently, this tool -employs synchronous triggers (Shlomi's method), \ ``INSERT-SELECT``\ , and -\ ``RENAME TABLE``\ respectively for these phases. - -Here are options related to each phase: - - -.. code-block:: perl - - 1. --[no]create-tmp-table, --alter, --tmp-table - 2. (none) - 3. --chunk-size, --sleep - 4. (none) - 5. --[no]rename-tables - 6. --drop-old-table - - -Options "--check-tables-and-exit" and "--print" are helpful to see what -the tool might do before actually doing it. - - -*********** -REPLICATION -*********** - - -In brief: update slaves first if columns are added or removed. Certain -ALTER changes like ENGINE may not affect replication. - - -****** -OUTPUT -****** - - -Output to STDOUT is very verbose and should tell you everything that the -tool is doing. Warnings, errors, and "--progress" are printed to STDERR. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---alter - - type: string - - Semicolon-separated list of \ ``ALTER TABLE``\ statements to apply to the new table. - The statements should not contain \ ``ALTER TABLE``\ , just what would follow that - clause. For example, if you want to \ ``ALTER TABLE ENGINE=InnoDB``\ , the value - would be \ ``ENGINE=InnoDB``\ . - - The value can also be a filename which contains statements, one per line - with no blank lines and no trailing semicolons. Each statement will be - applied in the order it appears in the file. - - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---bin-log - - Allow binary logging (\ ``SET SQL_LOG_BIN=1``\ ). By default binary logging is - turned off because in most cases the "--tmp-table" does not need to - be replicated. Also, performing an online schema change in a replication - environment requires careful planning else replication may be broken; - see "REPLICATION". - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET - NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT - without the utf8 layer, and runs SET NAMES after connecting to MySQL. - - - ---check-tables-and-exit - - Check that the table can be altered then exit; do not alter the table. - If you just want to see that the tool can/will work for the given table, - specify this option. Even if all checks pass, the tool may still encounter - problems if, for example, one of the "--alter" statements uses - incorrect syntax. - - - ---child-tables - - type: string - - Foreign key constraints in these (child) tables reference the table. - If the table being altered is a parent to tables which reference it with - foreign key constraints, you must specify those child tables with this option - so that the tool will update the foreign key constraints after renaming - tables. The list of child tables is comma-separated, not quoted, and not - database-qualified (the database is assumed to be the same as the table) - If you specify a table that doesn't exist, it is ignored. - - Or you can specify just \ ``auto_detect``\ and the tool will query the - \ ``INFORMATION_SCHEMA``\ to auto-detect any foreign key constraints on the table. - - When specifying this option, you must also specify - "--update-foreign-keys-method". - - - ---chunk-size - - type: string; default: 1000 - - Number of rows or data size per chunk. Data sizes are specified with a - suffix of k=kibibytes, M=mebibytes, G=gibibytes. Data sizes are converted - to a number of rows by dividing by the average row length. - - - ---cleanup-and-exit - - Cleanup and exit; do not alter the table. If a previous run fails, you - may need to use this option to remove any temporary tables, triggers, - outfiles, etc. that where left behind before another run will succeed. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---[no]create-tmp-table - - default: yes - - Create the "--tmp-table" with \ ``CREATE TABLE LIKE``\ . The temporary table - to which the "--alter" statements are applied is automatically created - by default with the name \ ``__tmp_TABLE``\ where \ ``TABLE``\ is the original table - specified by the DSN on the command line. If you want to create the temporary - table manually before running this tool, then you must specify - \ ``--no-create-tmp-table``\ \ **and**\ "--tmp-table" so the tool will use your - temporary table. - - - ---database - - short form: -D; type: string - - Database of the table. You can also specify the database with the \ ``D``\ part - of the DSN given on the command line. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---drop-old-table - - Drop the original table after it's swapped with the "--tmp-table". - After the original table is renamed/swapped with the "--tmp-table" - it becomes the "old table". By default, the old table is not dropped - because if there are problems with the "new table" (the temporary table - swapped for the original table), then the old table can be restored. - - If altering a table with foreign key constraints, you may need to specify - this option depending on which "--update-foreign-keys-method" you choose. - - - ---[no]foreign-key-checks - - default: yes - - Enforce foreign key checks (FOREIGN_KEY_CHECKS=1). - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the tool's - instance. The PID file is removed when the tool exits. The tool checks for - the existence of the PID file when starting; if it exists and the process with - the matching PID exists, the tool exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print - - Print SQL statements to STDOUT instead of executing them. Specifying this - option allows you to see most of the statements that the tool would execute. - - - ---progress - - type: array; default: time,30 - - Print progress reports to STDERR while copying rows. - - The value is a comma-separated list with two parts. The first part can be - percentage, time, or iterations; the second part specifies how often an update - should be printed, in percentage, seconds, or number of iterations. - - - ---quiet - - short form: -q - - Do not print messages to STDOUT. Errors and warnings are still printed to - STDERR. - - - ---[no]rename-tables - - default: yes - - Rename/swap the original table and the "--tmp-table". This option - essentially completes the online schema change process by making the - temporary table with the new schema take the place of the original table. - The original tables becomes the "old table" and is dropped if - "--drop-old-table" is specified. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---sleep - - type: float; default: 0 - - How long to sleep between chunks while copying rows. The time has micro-second - precision, so you can specify fractions of seconds like \ ``0.1``\ . - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---tmp-table - - type: string - - Temporary table if \ ``--no-create-tmp-table``\ is specified. If you specify - \ ``--no-create-tmp-table``\ , then you must also specify this option to tell - the tool which table to use as the temporary table. The temporary table - and the original table are renamed/swapped unless \ ``--no-rename-tables``\ is - specified. - - The default behavior, when this option is not specified and - \ ``--[no]create-tmp-tble``\ is true, is to create a temporary table named - \ ``__tmp_TABLE``\ where \ ``TABLE``\ is the original table specified by the DSN - on the command line. - - - ---update-foreign-keys-method - - type: string - - Method for updating foreign key constraints in "--child-tables". If - "--child-tables" is specified, the tool will need to ensure that foreign - key constraints in those tables continue to reference the original table - after it is renamed and/or dropped. This is necessary because when a parent - table is renamed, MySQL automatically updates all child table - foreign key constraints that reference the renamed table so that the rename - does not break foreign key constraints. This poses a problem for this tool. - - For example: if the table being altered is \ ``foo``\ , then \ ``foo``\ is renamed - to \ ``__old_foo``\ when it is swapped with the "--tmp-table". - Any foreign key references to \ ``foo``\ before it is swapped/renamed are renamed - automatically by MySQL to \ ``__old_foo``\ . We do not want this; we want those - foreign key references to continue to reference \ ``foo``\ . - - There are currently two methods to solve this problem: - - - rebuild_constraints - - Drop and re-add child table foreign key constraints to reference the new table. - (The new table is the temporary table after being renamed/swapped. To MySQL - it's a new table because it does not know that it's a copy of the original - table). This method parses foreign key constraints referencing the original - table from all child tables, drops them, then re-adds them referencing the - new table. - - This method uses \ ``ALTER TABLE``\ which can by slow and blocking, but it is - safer because the old table does not need to be dropped. So if there's a - problem with the new table and "--drop-old-table" was not specified, - then the original table can be restored. - - - - drop_old_table - - Disable foreign key checks (FOREIGN_KEY_CHECKS=0) then drop the original table. - This method bypasses MySQL's auto-renaming feature by disabling foreign key - checks, dropping the original table, then renaming the temporary table with - the same name. Foreign key checks must be disabled to drop table because it is - referenced by foreign key constraints. Since the original table is not renamed, - MySQL does not auto-rename references to it. Then the temporary table is - renamed to the same name so child table references are maintained. - So this method requires "--drop-old-table". - - This method is faster and does not block, but it is less safe for two reasons. - One, for a very short time (between dropping the original table and renaming the - temporary table) the child tables reference a non-existent table. Two, more - importantly, if for some reason the temporary table was not copied correctly, - didn't capture all changes, etc., the original table cannot be recovered - because it was dropped. - - - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Database for the old and new table. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - dsn: table; copy: no - - Table to alter. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-online-schema-change ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-online-schema-change `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -*************** -ACKNOWLEDGMENTS -*************** - - -The "online schema change" concept was first implemented by Shlomi Noach -in his tool \ ``oak-online-alter-table``\ , part of -`http://code.google.com/p/openarkkit/ `_. Then engineers at Facebook built -their version called \ ``OnlineSchemaChange.php``\ as explained by their blog -post: `http://tinyurl.com/32zeb86 `_. Searching for "online schema change" -will return other relevant pages about this concept. - -This implementation, \ ``pt-online-schema-change``\ , is a hybrid of Shlomi's -and Facebook's approach. Shlomi's code is a full-featured tool with command -line options, documentation, etc., but its continued development/support is -not assured. Facebook's tool has certain technical advantages, but it's not -a full-featured tool; it's more a custom job by Facebook for Facebook. And -neither of those tools is tested. \ ``pt-online-schema-change``\ is a -full-featured, tested tool with stable development and support. - -This tool was made possible by a generous client of Percona Inc. - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-online-schema-change 1.0.1 - diff --git a/docs/user/pt-pmp.rst b/docs/user/pt-pmp.rst deleted file mode 100644 index 0f427d54..00000000 --- a/docs/user/pt-pmp.rst +++ /dev/null @@ -1,244 +0,0 @@ - -###### -pt-pmp -###### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-pmp - Aggregate GDB stack traces for a selected program. - - -******** -SYNOPSIS -******** - - -Usage: pt-pmp [OPTIONS] [FILES] - -pt-pmp is a poor man's profiler, inspired by `http://poormansprofiler.org `_. -It can create and summarize full stack traces of processes on Linux. -Summaries of stack traces can be an invaluable tool for diagnosing what -a process is waiting for. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-pmp is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-pmp `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-pmp performs two tasks: it gets a stack trace, and it summarizes the stack -trace. If a file is given on the command line, the tool skips the first step -and just aggregates the file. - -To summarize the stack trace, the tool extracts the function name (symbol) -from each level of the stack, and combines them with commas. It does this -for each thread in the output. Afterwards, it sorts similar threads together -and counts how many of each one there are, then sorts them most-frequent first. - - -******* -OPTIONS -******* - - -Options must precede files on the command line. - - --b BINARY - - Which binary to trace (default mysqld) - - - --i ITERATIONS - - How many traces to gather and aggregate (default 1) - - - --k KEEPFILE - - Keep the raw traces in this file after aggregation - - - --l NUMBER - - Aggregate only first NUMBER functions; 0=infinity (default 0) - - - --p PID - - Process ID of the process to trace; overrides -b - - - --s SLEEPTIME - - Number of seconds to sleep between iterations (default 0) - - - - -*********** -ENVIRONMENT -*********** - - -This tool does not use any environment variables. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires Bash v3 or newer. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-pmp `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz, based on a script by Domas Mituzas (`http://poormansprofiler.org/ `_) - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-pmp 1.0.1 - diff --git a/docs/user/pt-query-advisor.rst b/docs/user/pt-query-advisor.rst deleted file mode 100644 index 30978494..00000000 --- a/docs/user/pt-query-advisor.rst +++ /dev/null @@ -1,848 +0,0 @@ - -################ -pt-query-advisor -################ - -.. highlight:: perl - - -**** -NAME -**** - - -pt-query-advisor - Analyze queries and advise on possible problems. - - -******** -SYNOPSIS -******** - - -Usage: pt-query-advisor [OPTION...] [FILE] - -pt-query-advisor analyzes queries and advises on possible problems. -Queries are given either by specifying slowlog files, --query, or --review. - -Analyze all queries in a slow log: - - -.. code-block:: perl - - pt-query-advisor /path/to/slow-query.log - - -Analyze all queires in a general log: - - -.. code-block:: perl - - pt-query-advisor --type genlog mysql.log - - -Get queries from tcpdump using pt-query-digest: - - -.. code-block:: perl - - pt-query-digest --type tcpdump.txt --print --no-report | pt-query-advisor - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-query-advisor simply reads queries and examines them, and is thus -very low risk. - -At the time of this release there is a bug that may cause an infinite (or -very long) loop when parsing very large queries. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-query-advisor `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-query-advisor examines queries and applies rules to them, trying to -find queries that look bad according to the rules. It reports on -queries that match the rules, so you can find bad practices or hidden -problems in your SQL. By default, it accepts a MySQL slow query log -as input. - - -***** -RULES -***** - - -These are the rules that pt-query-advisor will apply to the queries it -examines. Each rule has three bits of information: an ID, a severity -and a description. - -The rule's ID is its identifier. We use a seven-character ID, and the -naming convention is three characters, a period, and a three-digit -number. The first three characters are sort of an abbreviation of the -general class of the rule. For example, ALI.001 is some rule related -to how the query uses aliases. - -The rule's severity is an indication of how important it is that this -rule matched a query. We use NOTE, WARN, and CRIT to denote these -levels. - -The rule's description is a textual, human-readable explanation of -what it means when a query matches this rule. Depending on the -verbosity of the report you generate, you will see more of the text in -the description. By default, you'll see only the first sentence, -which is sort of a terse synopsis of the rule's meaning. At a higher -verbosity, you'll see subsequent sentences. - - -ALI.001 - - severity: note - - Aliasing without the AS keyword. Explicitly using the AS keyword in - column or table aliases, such as "tbl AS alias," is more readable - than implicit aliases such as "tbl alias". - - - -ALI.002 - - severity: warn - - Aliasing the '\*' wildcard. Aliasing a column wildcard, such as - "SELECT tbl.\* col1, col2" probably indicates a bug in your SQL. - You probably meant for the query to retrieve col1, but instead it - renames the last column in the \*-wildcarded list. - - - -ALI.003 - - severity: note - - Aliasing without renaming. The table or column's alias is the same as - its real name, and the alias just makes the query harder to read. - - - -ARG.001 - - severity: warn - - Argument with leading wildcard. An argument has a leading - wildcard character, such as "%foo". The predicate with this argument - is not sargable and cannot use an index if one exists. - - - -ARG.002 - - severity: note - - LIKE without a wildcard. A LIKE pattern that does not include a - wildcard is potentially a bug in the SQL. - - - -CLA.001 - - severity: warn - - SELECT without WHERE. The SELECT statement has no WHERE clause. - - - -CLA.002 - - severity: note - - ORDER BY RAND(). ORDER BY RAND() is a very inefficient way to - retrieve a random row from the results. - - - -CLA.003 - - severity: note - - LIMIT with OFFSET. Paginating a result set with LIMIT and OFFSET is - O(n^2) complexity, and will cause performance problems as the data - grows larger. - - - -CLA.004 - - severity: note - - Ordinal in the GROUP BY clause. Using a number in the GROUP BY clause, - instead of an expression or column name, can cause problems if the - query is changed. - - - -CLA.005 - - severity: warn - - ORDER BY constant column. - - - -CLA.006 - - severity: warn - - GROUP BY or ORDER BY different tables will force a temp table and filesort. - - - -CLA.007 - - severity: warn - - ORDER BY different directions prevents index from being used. All tables - in the ORDER BY clause must be either ASC or DESC, else MySQL cannot use - an index. - - - -COL.001 - - severity: note - - SELECT \*. Selecting all columns with the \* wildcard will cause the - query's meaning and behavior to change if the table's schema - changes, and might cause the query to retrieve too much data. - - - -COL.002 - - severity: note - - Blind INSERT. The INSERT or REPLACE query doesn't specify the - columns explicitly, so the query's behavior will change if the - table's schema changes; use "INSERT INTO tbl(col1, col2) VALUES..." - instead. - - - -LIT.001 - - severity: warn - - Storing an IP address as characters. The string literal looks like - an IP address, but is not an argument to INET_ATON(), indicating that - the data is stored as characters instead of as integers. It is - more efficient to store IP addresses as integers. - - - -LIT.002 - - severity: warn - - Unquoted date/time literal. A query such as "WHERE col<2010-02-12" - is valid SQL but is probably a bug; the literal should be quoted. - - - -KWR.001 - - severity: note - - SQL_CALC_FOUND_ROWS is inefficient. SQL_CALC_FOUND_ROWS can cause - performance problems because it does not scale well; use - alternative strategies to build functionality such as paginated - result screens. - - - -JOI.001 - - severity: crit - - Mixing comma and ANSI joins. Mixing comma joins and ANSI joins - is confusing to humans, and the behavior differs between some - MySQL versions. - - - -JOI.002 - - severity: crit - - A table is joined twice. The same table appears at least twice in the - FROM clause. - - - -JOI.003 - - severity: warn - - Reference to outer table column in WHERE clause prevents OUTER JOIN, - implicitly converts to INNER JOIN. - - - -JOI.004 - - severity: warn - - Exclusion join uses wrong column in WHERE. The exclusion join (LEFT - OUTER JOIN with a WHERE clause that is satisfied only if there is no row in - the right-hand table) seems to use the wrong column in the WHERE clause. A - query such as "... FROM l LEFT OUTER JOIN r ON l.l=r.r WHERE r.z IS NULL" - probably ought to list r.r in the WHERE IS NULL clause. - - - -RES.001 - - severity: warn - - Non-deterministic GROUP BY. The SQL retrieves columns that are - neither in an aggregate function nor the GROUP BY expression, so - these values will be non-deterministic in the result. - - - -RES.002 - - severity: warn - - LIMIT without ORDER BY. LIMIT without ORDER BY causes - non-deterministic results, depending on the query execution plan. - - - -STA.001 - - severity: note - - != is non-standard. Use the <> operator to test for inequality. - - - -SUB.001 - - severity: crit - - IN() and NOT IN() subqueries are poorly optimized. MySQL executes the subquery - as a dependent subquery for each row in the outer query. This is a frequent - cause of serious performance problems. This might change version 6.0 of MySQL, - but for versions 5.1 and older, the query should be rewritten as a JOIN or a - LEFT OUTER JOIN, respectively. - - - - -******* -OPTIONS -******* - - -"--query" and "--review" are mutually exclusive. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---[no]continue-on-error - - default: yes - - Continue working even if there is an error. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---database - - short form: -D; type: string - - Connect to this database. This is also used as the default database - for "--[no]show-create-table" if a query does not use database-qualified - tables. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---group-by - - type: string; default: rule_id - - Group items in the report by this attribute. Possible attributes are: - - - .. code-block:: perl - - ATTRIBUTE GROUPS - ========= ========================================================== - rule_id Items matching the same rule ID - query_id Queries with the same ID (the same fingerprint) - none No grouping, report each query and its advice individually - - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-rules - - type: hash - - Ignore these rule IDs. - - Specify a comma-separated list of rule IDs (e.g. LIT.001,RES.002,etc.) - to ignore. Currently, the rule IDs are case-sensitive and must be uppercase. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print-all - - Print all queries, even those that do not match any rules. With - "--group-by" \ ``none``\ , non-matching queries are printed in the main report - and profile. For other "--group-by" values, non-matching queries are only - printed in the profile. Non-matching queries have zeros for \ ``NOTE``\ , \ ``WARN``\ - and \ ``CRIT``\ in the profile. - - - ---query - - type: string - - Analyze this single query and ignore files and STDIN. This option - allows you to supply a single query on the command line. Any files - also specified on the command line are ignored. - - - ---report-format - - type: string; default: compact - - Type of report format: full or compact. In full mode, every query's - report contains the description of the rules it matched, even if this - information was previously displayed. In compact mode, the repeated - information is suppressed, and only the rule ID is displayed. - - - ---review - - type: DSN - - Analyze queries from this pt-query-digest query review table. - - - ---sample - - type: int; default: 1 - - How many samples of the query to show. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---[no]show-create-table - - default: yes - - Get \ ``SHOW CREATE TABLE``\ for each query's table. - - If host connection options are given (like "--host", "--port", etc.) - then the tool will also get \ ``SHOW CREATE TABLE``\ for each query. This - information is needed for some rules like JOI.004. If this option is - disabled by specifying \ ``--no-show-create-table``\ then some rules may not - be checked. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---type - - type: Array - - The type of input to parse (default slowlog). The permitted types are - slowlog and genlog. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---verbose - - short form: -v; cumulative: yes; default: 1 - - Increase verbosity of output. At the default level of verbosity, the - program prints only the first sentence of each rule's description. At - higher levels, the program prints more of the description. See also - "--report-format". - - - ---version - - Show version and exit. - - - ---where - - type: string - - Apply this WHERE clause to the SELECT query on the "--review" table. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Database that contains the query review table. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - Table to use as the query review table. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-query-advisor ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-query-advisor `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-query-advisor 1.0.1 - diff --git a/docs/user/pt-query-digest.rst b/docs/user/pt-query-digest.rst deleted file mode 100644 index 24a4566f..00000000 --- a/docs/user/pt-query-digest.rst +++ /dev/null @@ -1,2561 +0,0 @@ - -############### -pt-query-digest -############### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-query-digest - Analyze query execution logs and generate a query report, filter, replay, or transform queries for MySQL, PostgreSQL, memcached, and more. - - -******** -SYNOPSIS -******** - - -Usage: pt-query-digest [OPTION...] [FILE] - -pt-query-digest parses and analyzes MySQL log files. With no FILE, or when -FILE is -, it read standard input. - -Analyze, aggregate, and report on a slow query log: - - -.. code-block:: perl - - pt-query-digest /path/to/slow.log - - -Review a slow log, saving results to the test.query_review table in a MySQL -server running on host1. See "--review" for more on reviewing queries: - - -.. code-block:: perl - - pt-query-digest --review h=host1,D=test,t=query_review /path/to/slow.log - - -Filter out everything but SELECT queries, replay the queries against another -server, then use the timings from replaying them to analyze their performance: - - -.. code-block:: perl - - pt-query-digest /path/to/slow.log --execute h=another_server \ - --filter '$event->{fingerprint} =~ m/^select/' - - -Print the structure of events so you can construct a complex "--filter": - - -.. code-block:: perl - - pt-query-digest /path/to/slow.log --no-report \ - --filter 'print Dumper($event)' - - -Watch SHOW FULL PROCESSLIST and output a log in slow query log format: - - -.. code-block:: perl - - pt-query-digest --processlist h=host1 --print --no-report - - -The default aggregation and analysis is CPU and memory intensive. Disable it if -you don't need the default report: - - -.. code-block:: perl - - pt-query-digest --no-report - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -By default pt-query-digest merely collects and aggregates data from the files -specified. It is designed to be as efficient as possible, but depending on the -input you give it, it can use a lot of CPU and memory. Practically speaking, it -is safe to run even on production systems, but you might want to monitor it -until you are satisfied that the input you give it does not cause undue load. - -Various options will cause pt-query-digest to insert data into tables, execute -SQL queries, and so on. These include the "--execute" option and -"--review". - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-query-digest `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -\ ``pt-query-digest``\ is a framework for doing things with events from a query -source such as the slow query log or PROCESSLIST. By default it acts as a very -sophisticated log analysis tool. You can group and sort queries in many -different ways simultaneously and find the most expensive queries, or create a -timeline of queries in the log, for example. It can also do a "query review," -which means to save a sample of each type of query into a MySQL table so you can -easily see whether you've reviewed and analyzed a query before. The benefit of -this is that you can keep track of changes to your server's queries and avoid -repeated work. You can also save other information with the queries, such as -comments, issue numbers in your ticketing system, and so on. - -Note that this is a work in \*very\* active progress and you should expect -incompatible changes in the future. - - -********** -ATTRIBUTES -********** - - -pt-query-digest works on events, which are a collection of key/value pairs -called attributes. You'll recognize most of the attributes right away: -Query_time, Lock_time, and so on. You can just look at a slow log and see them. -However, there are some that don't exist in the slow log, and slow logs -may actually include different kinds of attributes (for example, you may have a -server with the Percona patches). - -For a full list of attributes, see -`http://code.google.com/p/maatkit/wiki/EventAttributes `_. - -With creative use of "--filter", you can create new attributes derived -from existing attributes. For example, to create an attribute called -\ ``Row_ratio``\ for examining the ratio of \ ``Rows_sent``\ to \ ``Rows_examined``\ , -specify a filter like: - - -.. code-block:: perl - - --filter '($event->{Row_ratio} = $event->{Rows_sent} / ($event->{Rows_examined})) && 1' - - -The \ ``&& 1``\ trick is needed to create a valid one-line syntax that is always -true, even if the assignment happens to evaluate false. The new attribute will -automatically appears in the output: - - -.. code-block:: perl - - # Row ratio 1.00 0.00 1 0.50 1 0.71 0.50 - - -Attributes created this way can be specified for "--order-by" or any -option that requires an attribute. - -memcached -========= - - -memcached events have additional attributes related to the memcached protocol: -cmd, key, res (result) and val. Also, boolean attributes are created for -the various commands, misses and errors: Memc_CMD where CMD is a memcached -command (get, set, delete, etc.), Memc_error and Memc_miss. - -These attributes are no different from slow log attributes, so you can use them -with "--[no]report", "--group-by", in a "--filter", etc. - -These attributes and more are documented at -`http://code.google.com/p/maatkit/wiki/EventAttributes `_. - - - -****** -OUTPUT -****** - - -The default output is a query analysis report. The "--[no]report" option -controls whether or not this report is printed. Sometimes you may wish to -parse all the queries but suppress the report, for example when using -"--print" or "--review". - -There is one paragraph for each class of query analyzed. A "class" of queries -all have the same value for the "--group-by" attribute which is -"fingerprint" by default. (See "ATTRIBUTES".) A fingerprint is an -abstracted version of the query text with literals removed, whitespace -collapsed, and so forth. The report is formatted so it's easy to paste into -emails without wrapping, and all non-query lines begin with a comment, so you -can save it to a .sql file and open it in your favorite syntax-highlighting -text editor. There is a response-time profile at the beginning. - -The output described here is controlled by "--report-format". -That option allows you to specify what to print and in what order. -The default output in the default order is described here. - -The report, by default, begins with a paragraph about the entire analysis run -The information is very similar to what you'll see for each class of queries in -the log, but it doesn't have some information that would be too expensive to -keep globally for the analysis. It also has some statistics about the code's -execution itself, such as the CPU and memory usage, the local date and time -of the run, and a list of input file read/parsed. - -Following this is the response-time profile over the events. This is a -highly summarized view of the unique events in the detailed query report -that follows. It contains the following columns: - - -.. code-block:: perl - - Column Meaning - ============ ========================================================== - Rank The query's rank within the entire set of queries analyzed - Query ID The query's fingerprint - Response time The total response time, and percentage of overall total - Calls The number of times this query was executed - R/Call The mean response time per execution - Apdx The Apdex score; see --apdex-threshold for details - V/M The Variance-to-mean ratio of response time - EXPLAIN If --explain was specified, a sparkline; see --explain - Item The distilled query - - -A final line whose rank is shown as MISC contains aggregate statistics on the -queries that were not included in the report, due to options such as -"--limit" and "--outliers". For details on the variance-to-mean ratio, -please see http://en.wikipedia.org/wiki/Index_of_dispersion. - -Next, the detailed query report is printed. Each query appears in a paragraph. -Here is a sample, slightly reformatted so 'perldoc' will not wrap lines in a -terminal. The following will all be one paragraph, but we'll break it up for -commentary. - - -.. code-block:: perl - - # Query 2: 0.01 QPS, 0.02x conc, ID 0xFDEA8D2993C9CAF3 at byte 160665 - - -This line identifies the sequential number of the query in the sort order -specified by "--order-by". Then there's the queries per second, and the -approximate concurrency for this query (calculated as a function of the timespan -and total Query_time). Next there's a query ID. This ID is a hex version of -the query's checksum in the database, if you're using "--review". You can -select the reviewed query's details from the database with a query like \ ``SELECT -.... WHERE checksum=0xFDEA8D2993C9CAF3``\ . - -If you are investigating the report and want to print out every sample of a -particular query, then the following "--filter" may be helpful: -\ ``pt-query-digest slow-log.log --no-report --print --filter '$event-``\ {fingerprint} -&& make_checksum($event->{fingerprint}) eq "FDEA8D2993C9CAF3"'>. - -Notice that you must remove the 0x prefix from the checksum in order for this to work. - -Finally, in case you want to find a sample of the query in the log file, there's -the byte offset where you can look. (This is not always accurate, due to some -silly anomalies in the slow-log format, but it's usually right.) The position -refers to the worst sample, which we'll see more about below. - -Next is the table of metrics about this class of queries. - - -.. code-block:: perl - - # pct total min max avg 95% stddev median - # Count 0 2 - # Exec time 13 1105s 552s 554s 553s 554s 2s 553s - # Lock time 0 216us 99us 117us 108us 117us 12us 108us - # Rows sent 20 6.26M 3.13M 3.13M 3.13M 3.13M 12.73 3.13M - # Rows exam 0 6.26M 3.13M 3.13M 3.13M 3.13M 12.73 3.13M - - -The first line is column headers for the table. The percentage is the percent -of the total for the whole analysis run, and the total is the actual value of -the specified metric. For example, in this case we can see that the query -executed 2 times, which is 13% of the total number of queries in the file. The -min, max and avg columns are self-explanatory. The 95% column shows the 95th -percentile; 95% of the values are less than or equal to this value. The -standard deviation shows you how tightly grouped the values are. The standard -deviation and median are both calculated from the 95th percentile, discarding -the extremely large values. - -The stddev, median and 95th percentile statistics are approximate. Exact -statistics require keeping every value seen, sorting, and doing some -calculations on them. This uses a lot of memory. To avoid this, we keep 1000 -buckets, each of them 5% bigger than the one before, ranging from .000001 up to -a very big number. When we see a value we increment the bucket into which it -falls. Thus we have fixed memory per class of queries. The drawback is the -imprecision, which typically falls in the 5 percent range. - -Next we have statistics on the users, databases and time range for the query. - - -.. code-block:: perl - - # Users 1 user1 - # Databases 2 db1(1), db2(1) - # Time range 2008-11-26 04:55:18 to 2008-11-27 00:15:15 - - -The users and databases are shown as a count of distinct values, followed by the -values. If there's only one, it's shown alone; if there are many, we show each -of the most frequent ones, followed by the number of times it appears. - - -.. code-block:: perl - - # Query_time distribution - # 1us - # 10us - # 100us - # 1ms - # 10ms - # 100ms - # 1s - # 10s+ ############################################################# - - -The execution times show a logarithmic chart of time clustering. Each query -goes into one of the "buckets" and is counted up. The buckets are powers of -ten. The first bucket is all values in the "single microsecond range" -- that -is, less than 10us. The second is "tens of microseconds," which is from 10us -up to (but not including) 100us; and so on. The charted attribute can be -changed by specifying "--report-histogram" but is limited to time-based -attributes. - - -.. code-block:: perl - - # Tables - # SHOW TABLE STATUS LIKE 'table1'\G - # SHOW CREATE TABLE `table1`\G - # EXPLAIN - SELECT * FROM table1\G - - -This section is a convenience: if you're trying to optimize the queries you see -in the slow log, you probably want to examine the table structure and size. -These are copy-and-paste-ready commands to do that. - -Finally, we see a sample of the queries in this class of query. This is not a -random sample. It is the query that performed the worst, according to the sort -order given by "--order-by". You will normally see a commented \ ``# EXPLAIN``\ -line just before it, so you can copy-paste the query to examine its EXPLAIN -plan. But for non-SELECT queries that isn't possible to do, so the tool tries to -transform the query into a roughly equivalent SELECT query, and adds that below. - -If you want to find this sample event in the log, use the offset mentioned -above, and something like the following: - - -.. code-block:: perl - - tail -c + /path/to/file | head - - -See also "--report-format". - -SPARKLINES -========== - - -The output also contains sparklines. Sparklines are "data-intense, -design-simple, word-sized graphics" (`http://en.wikipedia.org/wiki/Sparkline `_).There is a sparkline for "--report-histogram" and for "--explain". -See each of those options for details about interpreting their sparklines. - - - -************* -QUERY REVIEWS -************* - - -A "query review" is the process of storing all the query fingerprints analyzed. -This has several benefits: - - -\* - - You can add meta-data to classes of queries, such as marking them for follow-up, - adding notes to queries, or marking them with an issue ID for your issue - tracking system. - - - -\* - - You can refer to the stored values on subsequent runs so you'll know whether - you've seen a query before. This can help you cut down on duplicated work. - - - -\* - - You can store historical data such as the row count, query times, and generally - anything you can see in the report. - - - -To use this feature, you run pt-query-digest with the "--review" option. It -will store the fingerprints and other information into the table you specify. -Next time you run it with the same option, it will do the following: - - -\* - - It won't show you queries you've already reviewed. A query is considered to be - already reviewed if you've set a value for the \ ``reviewed_by``\ column. (If you - want to see queries you've already reviewed, use the "--report-all" option.) - - - -\* - - Queries that you've reviewed, and don't appear in the output, will cause gaps in - the query number sequence in the first line of each paragraph. And the value - you've specified for "--limit" will still be honored. So if you've reviewed all - queries in the top 10 and you ask for the top 10, you won't see anything in the - output. - - - -\* - - If you want to see the queries you've already reviewed, you can specify - "--report-all". Then you'll see the normal analysis output, but you'll also see - the information from the review table, just below the execution time graph. For - example, - - - .. code-block:: perl - - # Review information - # comments: really bad IN() subquery, fix soon! - # first_seen: 2008-12-01 11:48:57 - # jira_ticket: 1933 - # last_seen: 2008-12-18 11:49:07 - # priority: high - # reviewed_by: xaprb - # reviewed_on: 2008-12-18 15:03:11 - - - You can see how useful this meta-data is -- as you analyze your queries, you get - your comments integrated right into the report. - - If you add the "--review-history" option, it will also store information into - a separate database table, so you can keep historical trending information on - classes of queries. - - - - -************ -FINGERPRINTS -************ - - -A query fingerprint is the abstracted form of a query, which makes it possible -to group similar queries together. Abstracting a query removes literal values, -normalizes whitespace, and so on. For example, consider these two queries: - - -.. code-block:: perl - - SELECT name, password FROM user WHERE id='12823'; - select name, password from user - where id=5; - - -Both of those queries will fingerprint to - - -.. code-block:: perl - - select name, password from user where id=? - - -Once the query's fingerprint is known, we can then talk about a query as though -it represents all similar queries. - -What \ ``pt-query-digest``\ does is analogous to a GROUP BY statement in SQL. (But -note that "multiple columns" doesn't define a multi-column grouping; it defines -multiple reports!) If your command-line looks like this, - - -.. code-block:: perl - - pt-query-digest /path/to/slow.log --select Rows_read,Rows_sent \ - --group-by fingerprint --order-by Query_time:sum --limit 10 - - -The corresponding pseudo-SQL looks like this: - - -.. code-block:: perl - - SELECT WORST(query BY Query_time), SUM(Query_time), ... - FROM /path/to/slow.log - GROUP BY FINGERPRINT(query) - ORDER BY SUM(Query_time) DESC - LIMIT 10 - - -You can also use the value \ ``distill``\ , which is a kind of super-fingerprint. -See "--group-by" for more. - -When parsing memcached input ("--type" memcached), the fingerprint is an -abstracted version of the command and key, with placeholders removed. For -example, \ ``get user_123_preferences``\ fingerprints to \ ``get user_?_preferences``\ . -There is also a \ ``key_print``\ which a fingerprinted version of the key. This -example's key_print is \ ``user_?_preferences``\ . - -Query fingerprinting accommodates a great many special cases, which have proven -necessary in the real world. For example, an IN list with 5 literals is really -equivalent to one with 4 literals, so lists of literals are collapsed to a -single one. If you want to understand more about how and why all of these cases -are handled, please review the test cases in the Subversion repository. If you -find something that is not fingerprinted properly, please submit a bug report -with a reproducible test case. Here is a list of transformations during -fingerprinting, which might not be exhaustive: - - -\* - - Group all SELECT queries from mysqldump together, even if they are against - different tables. Ditto for all of pt-table-checksum's checksum queries. - - - -\* - - Shorten multi-value INSERT statements to a single VALUES() list. - - - -\* - - Strip comments. - - - -\* - - Abstract the databases in USE statements, so all USE statements are grouped - together. - - - -\* - - Replace all literals, such as quoted strings. For efficiency, the code that - replaces literal numbers is somewhat non-selective, and might replace some - things as numbers when they really are not. Hexadecimal literals are also - replaced. NULL is treated as a literal. Numbers embedded in identifiers are - also replaced, so tables named similarly will be fingerprinted to the same - values (e.g. users_2009 and users_2010 will fingerprint identically). - - - -\* - - Collapse all whitespace into a single space. - - - -\* - - Lowercase the entire query. - - - -\* - - Replace all literals inside of IN() and VALUES() lists with a single - placeholder, regardless of cardinality. - - - -\* - - Collapse multiple identical UNION queries into a single one. - - - - -******* -OPTIONS -******* - - -DSN values in "--review-history" default to values in "--review" if COPY -is yes. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---apdex-threshold - - type: float; default: 1.0 - - Set Apdex target threshold (T) for query response time. The Application - Performance Index (Apdex) Technical Specification V1.1 defines T as "a - positive decimal value in seconds, having no more than two significant digits - of granularity." This value only applies to query response time (Query_time). - - Options can be abbreviated so specifying \ ``--apdex-t``\ also works. - - See `http://www.apdex.org/ `_. - - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---attribute-aliases - - type: array; default: db|Schema - - List of attribute|alias,etc. - - Certain attributes have multiple names, like db and Schema. If an event does - not have the primary attribute, pt-query-digest looks for an alias attribute. - If it finds an alias, it creates the primary attribute with the alias - attribute's value and removes the alias attribute. - - If the event has the primary attribute, all alias attributes are deleted. - - This helps simplify event attributes so that, for example, there will not - be report lines for both db and Schema. - - - ---attribute-value-limit - - type: int; default: 4294967296 - - A sanity limit for attribute values. - - This option deals with bugs in slow-logging functionality that causes large - values for attributes. If the attribute's value is bigger than this, the - last-seen value for that class of query is used instead. - - - ---aux-dsn - - type: DSN - - Auxiliary DSN used for special options. - - The following options may require a DSN even when only parsing a slow log file: - - - .. code-block:: perl - - * --since - * --until - - - See each option for why it might require a DSN. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---check-attributes-limit - - type: int; default: 1000 - - Stop checking for new attributes after this many events. - - For better speed, pt-query-digest stops checking events for new attributes - after a certain number of events. Any new attributes after this number - will be ignored and will not be reported. - - One special case is new attributes for pre-existing query classes - (see "--group-by" about query classes). New attributes will not be added - to pre-existing query classes even if the attributes are detected before the - "--check-attributes-limit" limit. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---[no]continue-on-error - - default: yes - - Continue parsing even if there is an error. - - - ---create-review-history-table - - Create the "--review-history" table if it does not exist. - - This option causes the table specified by "--review-history" to be created - with the default structure shown in the documentation for that option. - - - ---create-review-table - - Create the "--review" table if it does not exist. - - This option causes the table specified by "--review" to be created with the - default structure shown in the documentation for that option. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute pathname. - - - ---embedded-attributes - - type: array - - Two Perl regex patterns to capture pseudo-attributes embedded in queries. - - Embedded attributes might be special attribute-value pairs that you've hidden - in comments. The first regex should match the entire set of attributes (in - case there are multiple). The second regex should match and capture - attribute-value pairs from the first regex. - - For example, suppose your query looks like the following: - - - .. code-block:: perl - - SELECT * from users -- file: /login.php, line: 493; - - - You might run pt-query-digest with the following option: - - - .. code-block:: perl - - pt-query-digest --embedded-attributes ' -- .*','(\w+): ([^\,]+)' - - - The first regular expression captures the whole comment: - - - .. code-block:: perl - - " -- file: /login.php, line: 493;" - - - The second one splits it into attribute-value pairs and adds them to the event: - - - .. code-block:: perl - - ATTRIBUTE VALUE - ========= ========== - file /login.php - line 493 - - - \ **NOTE**\ : All commas in the regex patterns must be escaped with \ otherwise - the pattern will break. - - - ---execute - - type: DSN - - Execute queries on this DSN. - - Adds a callback into the chain, after filters but before the reports. Events - are executed on this DSN. If they are successful, the time they take to execute - overwrites the event's Query_time attribute and the original Query_time value - (from the log) is saved as the Exec_orig_time attribute. If unsuccessful, - the callback returns false and terminates the chain. - - If the connection fails, pt-query-digest tries to reconnect once per second. - - See also "--mirror" and "--execute-throttle". - - - ---execute-throttle - - type: array - - Throttle values for "--execute". - - By default "--execute" runs without any limitations or concerns for the - amount of time that it takes to execute the events. The "--execute-throttle" - allows you to limit the amount of time spent doing "--execute" relative - to the other processes that handle events. This works by marking some events - with a \ ``Skip_exec``\ attribute when "--execute" begins to take too much time. - "--execute" will not execute an event if this attribute is true. This - indirectly decreases the time spent doing "--execute". - - The "--execute-throttle" option takes at least two comma-separated values: - max allowed "--execute" time as a percentage and a check interval time. An - optional third value is a percentage step for increasing and decreasing the - probability that an event will be marked \ ``Skip_exec``\ true. 5 (percent) is - the default step. - - For example: "--execute-throttle" \ ``70,60,10``\ . This will limit - "--execute" to 70% of total event processing time, checked every minute - (60 seconds) and probability stepped up and down by 10%. When "--execute" - exceeds 70%, the probability that events will be marked \ ``Skip_exec``\ true - increases by 10%. "--execute" time is checked again after another minute. - If it's still above 70%, then the probability will increase another 10%. - Or, if it's dropped below 70%, then the probability will decrease by 10%. - - - ---expected-range - - type: array; default: 5,10 - - Explain items when there are more or fewer than expected. - - Defines the number of items expected to be seen in the report given by - "--[no]report", as controlled by "--limit" and "--outliers". If - there are more or fewer items in the report, each one will explain why it was - included. - - - ---explain - - type: DSN - - Run EXPLAIN for the sample query with this DSN and print results. - - This works only when "--group-by" includes fingerprint. It causes - pt-query-digest to run EXPLAIN and include the output into the report. For - safety, queries that appear to have a subquery that EXPLAIN will execute won't - be EXPLAINed. Those are typically "derived table" queries of the form - - - .. code-block:: perl - - select ... from ( select .... ) der; - - - The EXPLAIN results are printed in three places: a sparkline in the event - header, a full vertical format in the event report, and a sparkline in the - profile. - - The full format appears at the end of each event report in vertical style - (\ ``\G``\ ) just like MySQL prints it. - - The sparklines (see "SPARKLINES") are compact representations of the - access type for each table and whether or not "Using temporary" or "Using - filesort" appear in EXPLAIN. The sparklines look like: - - - .. code-block:: perl - - nr>TF - - - That sparkline means that there are two tables, the first uses a range (n) - access, the second uses a ref access, and both "Using temporary" (T) and - "Using filesort" (F) appear. The greater-than character just separates table - access codes from T and/or F. - - The abbreviated table access codes are: - - - .. code-block:: perl - - a ALL - c const - e eq_ref - f fulltext - i index - m index_merge - n range - o ref_or_null - r ref - s system - u unique_subquery - - - A capitalized access code means that "Using index" appears in EXPLAIN for - that table. - - - ---filter - - type: string - - Discard events for which this Perl code doesn't return true. - - This option is a string of Perl code or a file containing Perl code that gets - compiled into a subroutine with one argument: $event. This is a hashref. - If the given value is a readable file, then pt-query-digest reads the entire - file and uses its contents as the code. The file should not contain - a shebang (#!/usr/bin/perl) line. - - If the code returns true, the chain of callbacks continues; otherwise it ends. - The code is the last statement in the subroutine other than \ ``return $event``\ . - The subroutine template is: - - - .. code-block:: perl - - sub { $event = shift; filter && return $event; } - - - Filters given on the command line are wrapped inside parentheses like like - \ ``( filter )``\ . For complex, multi-line filters, you must put the code inside - a file so it will not be wrapped inside parentheses. Either way, the filter - must produce syntactically valid code given the template. For example, an - if-else branch given on the command line would not be valid: - - - .. code-block:: perl - - --filter 'if () { } else { }' # WRONG - - - Since it's given on the command line, the if-else branch would be wrapped inside - parentheses which is not syntactically valid. So to accomplish something more - complex like this would require putting the code in a file, for example - filter.txt: - - - .. code-block:: perl - - my $event_ok; if (...) { $event_ok=1; } else { $event_ok=0; } $event_ok - - - Then specify \ ``--filter filter.txt``\ to read the code from filter.txt. - - If the filter code won't compile, pt-query-digest will die with an error. - If the filter code does compile, an error may still occur at runtime if the - code tries to do something wrong (like pattern match an undefined value). - pt-query-digest does not provide any safeguards so code carefully! - - An example filter that discards everything but SELECT statements: - - - .. code-block:: perl - - --filter '$event->{arg} =~ m/^select/i' - - - This is compiled into a subroutine like the following: - - - .. code-block:: perl - - sub { $event = shift; ( $event->{arg} =~ m/^select/i ) && return $event; } - - - It is permissible for the code to have side effects (to alter \ ``$event``\ ). - - You can find an explanation of the structure of $event at - `http://code.google.com/p/maatkit/wiki/EventAttributes `_. - - Here are more examples of filter code: - - - Host/IP matches domain.com - - --filter '($event->{host} || $event->{ip} || "") =~ m/domain.com/' - - Sometimes MySQL logs the host where the IP is expected. Therefore, we - check both. - - - - User matches john - - --filter '($event->{user} || "") =~ m/john/' - - - - More than 1 warning - - --filter '($event->{Warning_count} || 0) > 1' - - - - Query does full table scan or full join - - --filter '(($event->{Full_scan} || "") eq "Yes") || (($event->{Full_join} || "") eq "Yes")' - - - - Query was not served from query cache - - --filter '($event->{QC_Hit} || "") eq "No"' - - - - Query is 1 MB or larger - - --filter '$event->{bytes} >= 1_048_576' - - - - Since "--filter" allows you to alter \ ``$event``\ , you can use it to do other - things, like create new attributes. See "ATTRIBUTES" for an example. - - - ---fingerprints - - Add query fingerprints to the standard query analysis report. This is mostly - useful for debugging purposes. - - - ---[no]for-explain - - default: yes - - Print extra information to make analysis easy. - - This option adds code snippets to make it easy to run SHOW CREATE TABLE and SHOW - TABLE STATUS for the query's tables. It also rewrites non-SELECT queries into a - SELECT that might be helpful for determining the non-SELECT statement's index - usage. - - - ---group-by - - type: Array; default: fingerprint - - Which attribute of the events to group by. - - In general, you can group queries into classes based on any attribute of the - query, such as \ ``user``\ or \ ``db``\ , which will by default show you which users - and which databases get the most \ ``Query_time``\ . The default attribute, - \ ``fingerprint``\ , groups similar, abstracted queries into classes; see below - and see also "FINGERPRINTS". - - A report is printed for each "--group-by" value (unless \ ``--no-report``\ is - given). Therefore, \ ``--group-by user,db``\ means "report on queries with the - same user and report on queries with the same db"--it does not mean "report - on queries with the same user and db." See also "OUTPUT". - - Every value must have a corresponding value in the same position in - "--order-by". However, adding values to "--group-by" will automatically - add values to "--order-by", for your convenience. - - There are several magical values that cause some extra data mining to happen - before the grouping takes place: - - - fingerprint - - This causes events to be fingerprinted to abstract queries into - a canonical form, which is then used to group events together into a class. - See "FINGERPRINTS" for more about fingerprinting. - - - - tables - - This causes events to be inspected for what appear to be tables, and - then aggregated by that. Note that a query that contains two or more tables - will be counted as many times as there are tables; so a join against two tables - will count the Query_time against both tables. - - - - distill - - This is a sort of super-fingerprint that collapses queries down - into a suggestion of what they do, such as \ ``INSERT SELECT table1 table2``\ . - - - - If parsing memcached input ("--type" memcached), there are other - attributes which you can group by: key_print (see memcached section in - "FINGERPRINTS"), cmd, key, res and val (see memcached section in - "ATTRIBUTES"). - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-attributes - - type: array; default: arg, cmd, insert_id, ip, port, Thread_id, timestamp, exptime, flags, key, res, val, server_id, offset, end_log_pos, Xid - - Do not aggregate these attributes when auto-detecting "--select". - - If you do not specify "--select" then pt-query-digest auto-detects and - aggregates every attribute that it finds in the slow log. Some attributes, - however, should not be aggregated. This option allows you to specify a list - of attributes to ignore. This only works when no explicit "--select" is - given. - - - ---inherit-attributes - - type: array; default: db,ts - - If missing, inherit these attributes from the last event that had them. - - This option sets which attributes are inherited or carried forward to events - which do not have them. For example, if one event has the db attribute equal - to "foo", but the next event doesn't have the db attribute, then it inherits - "foo" for its db attribute. - - Inheritance is usually desirable, but in some cases it might confuse things. - If a query inherits a database that it doesn't actually use, then this could - confuse "--execute". - - - ---interval - - type: float; default: .1 - - How frequently to poll the processlist, in seconds. - - - ---iterations - - type: int; default: 1 - - How many times to iterate through the collect-and-report cycle. If 0, iterate - to infinity. Each iteration runs for "--run-time" amount of time. An - iteration is usually determined by an amount of time and a report is printed - when that amount of time elapses. With "--run-time-mode" \ ``interval``\ , - an interval is instead determined by the interval time you specify with - "--run-time". See "--run-time" and "--run-time-mode" for more - information. - - - ---limit - - type: Array; default: 95%:20 - - Limit output to the given percentage or count. - - If the argument is an integer, report only the top N worst queries. If the - argument is an integer followed by the \ ``%``\ sign, report that percentage of the - worst queries. If the percentage is followed by a colon and another integer, - report the top percentage or the number specified by that integer, whichever - comes first. - - The value is actually a comma-separated array of values, one for each item in - "--group-by". If you don't specify a value for any of those items, the - default is the top 95%. - - See also "--outliers". - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---mirror - - type: float - - How often to check whether connections should be moved, depending on - \ ``read_only``\ . Requires "--processlist" and "--execute". - - This option causes pt-query-digest to check every N seconds whether it is reading - from a read-write server and executing against a read-only server, which is a - sensible way to set up two servers if you're doing something like master-master - replication. The `http://code.google.com/p/mysql-master-master/ `_ master-master - toolkit does this. The aim is to keep the passive server ready for failover, - which is impossible without putting it under a realistic workload. - - - ---order-by - - type: Array; default: Query_time:sum - - Sort events by this attribute and aggregate function. - - This is a comma-separated list of order-by expressions, one for each - "--group-by" attribute. The default \ ``Query_time:sum``\ is used for - "--group-by" attributes without explicitly given "--order-by" attributes - (that is, if you specify more "--group-by" attributes than corresponding - "--order-by" attributes). The syntax is \ ``attribute:aggregate``\ . See - "ATTRIBUTES" for valid attributes. Valid aggregates are: - - - .. code-block:: perl - - Aggregate Meaning - ========= ============================ - sum Sum/total attribute value - min Minimum attribute value - max Maximum attribute value - cnt Frequency/count of the query - - - For example, the default \ ``Query_time:sum``\ means that queries in the - query analysis report will be ordered (sorted) by their total query execution - time ("Exec time"). \ ``Query_time:max``\ orders the queries by their - maximum query execution time, so the query with the single largest - \ ``Query_time``\ will be list first. \ ``cnt``\ refers more to the frequency - of the query as a whole, how often it appears; "Count" is its corresponding - line in the query analysis report. So any attribute and \ ``cnt``\ should yield - the same report wherein queries are sorted by the number of times they - appear. - - When parsing general logs ("--type" \ ``genlog``\ ), the default "--order-by" - becomes \ ``Query_time:cnt``\ . General logs do not report query times so only - the \ ``cnt``\ aggregate makes sense because all query times are zero. - - If you specify an attribute that doesn't exist in the events, then - pt-query-digest falls back to the default \ ``Query_time:sum``\ and prints a notice - at the beginning of the report for each query class. You can create attributes - with "--filter" and order by them; see "ATTRIBUTES" for an example. - - - ---outliers - - type: array; default: Query_time:1:10 - - Report outliers by attribute:percentile:count. - - The syntax of this option is a comma-separated list of colon-delimited strings. - The first field is the attribute by which an outlier is defined. The second is - a number that is compared to the attribute's 95th percentile. The third is - optional, and is compared to the attribute's cnt aggregate. Queries that pass - this specification are added to the report, regardless of any limits you - specified in "--limit". - - For example, to report queries whose 95th percentile Query_time is at least 60 - seconds and which are seen at least 5 times, use the following argument: - - - .. code-block:: perl - - --outliers Query_time:60:5 - - - You can specify an --outliers option for each value in "--group-by". - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---pipeline-profile - - Print a profile of the pipeline processes. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print - - Print log events to STDOUT in standard slow-query-log format. - - - ---print-iterations - - Print the start time for each "--iterations". - - This option causes a line like the following to be printed at the start - of each "--iterations" report: - - - .. code-block:: perl - - # Iteration 2 started at 2009-11-24T14:39:48.345780 - - - This line will print even if \ ``--no-report``\ is specified. If \ ``--iterations 0``\ - is specified, each iteration number will be \ ``0``\ . - - - ---processlist - - type: DSN - - Poll this DSN's processlist for queries, with "--interval" sleep between. - - If the connection fails, pt-query-digest tries to reopen it once per second. See - also "--mirror". - - - ---progress - - type: array; default: time,30 - - Print progress reports to STDERR. The value is a comma-separated list with two - parts. The first part can be percentage, time, or iterations; the second part - specifies how often an update should be printed, in percentage, seconds, or - number of iterations. - - - ---read-timeout - - type: time; default: 0 - - Wait this long for an event from the input; 0 to wait forever. - - This option sets the maximum time to wait for an event from the input. It - applies to all types of input except "--processlist". If an - event is not received after the specified time, the script stops reading the - input and prints its reports. If "--iterations" is 0 or greater than - 1, the next iteration will begin, else the script will exit. - - This option requires the Perl POSIX module. - - - ---[no]report - - default: yes - - Print out reports on the aggregate results from "--group-by". - - This is the standard slow-log analysis functionality. See "OUTPUT" for the - description of what this does and what the results look like. - - - ---report-all - - Include all queries, even if they have already been reviewed. - - - ---report-format - - type: Array; default: rusage,date,hostname,files,header,profile,query_report,prepared - - Print these sections of the query analysis report. - - - .. code-block:: perl - - SECTION PRINTS - ============ ====================================================== - rusage CPU times and memory usage reported by ps - date Current local date and time - hostname Hostname of machine on which pt-query-digest was run - files Input files read/parse - header Summary of the entire analysis run - profile Compact table of queries for an overview of the report - query_report Detailed information about each unique query - prepared Prepared statements - - - The sections are printed in the order specified. The rusage, date, files and - header sections are grouped together if specified together; other sections are - separated by blank lines. - - See "OUTPUT" for more information on the various parts of the query report. - - - ---report-histogram - - type: string; default: Query_time - - Chart the distribution of this attribute's values. - - The distribution chart is limited to time-based attributes, so charting - \ ``Rows_examined``\ , for example, will produce a useless chart. Charts look - like: - - - .. code-block:: perl - - # Query_time distribution - # 1us - # 10us - # 100us - # 1ms - # 10ms ################################ - # 100ms ################################################################ - # 1s ######## - # 10s+ - - - A sparkline (see "SPARKLINES") of the full chart is also printed in the - header for each query event. The sparkline of that full chart is: - - - .. code-block:: perl - - # Query_time sparkline: | .^_ | - - - The sparkline itself is the 8 characters between the pipes (\ ``|``\ ), one character - for each of the 8 buckets (1us, 10us, etc.) Four character codes are used - to represent the approximate relation between each bucket's value: - - - .. code-block:: perl - - _ . - ^ - - - The caret \ ``^``\ represents peaks (buckets with the most values), and - the underscore \ ``_``\ represents lows (buckets with the least or at least - one value). The period \ ``.``\ and the hyphen \ ``-``\ represent buckets with values - between these two extremes. If a bucket has no values, a space is printed. - So in the example above, the period represents the 10ms bucket, the caret - the 100ms bucket, and the underscore the 1s bucket. - - See "OUTPUT" for more information. - - - ---review - - type: DSN - - Store a sample of each class of query in this DSN. - - The argument specifies a table to store all unique query fingerprints in. The - table must have at least the following columns. You can add more columns for - your own special purposes, but they won't be used by pt-query-digest. The - following CREATE TABLE definition is also used for "--create-review-table". - MAGIC_create_review: - - - .. code-block:: perl - - CREATE TABLE query_review ( - checksum BIGINT UNSIGNED NOT NULL PRIMARY KEY, - fingerprint TEXT NOT NULL, - sample TEXT NOT NULL, - first_seen DATETIME, - last_seen DATETIME, - reviewed_by VARCHAR(20), - reviewed_on DATETIME, - comments TEXT - ) - - - The columns are as follows: - - - .. code-block:: perl - - COLUMN MEANING - =========== =============== - checksum A 64-bit checksum of the query fingerprint - fingerprint The abstracted version of the query; its primary key - sample The query text of a sample of the class of queries - first_seen The smallest timestamp of this class of queries - last_seen The largest timestamp of this class of queries - reviewed_by Initially NULL; if set, query is skipped thereafter - reviewed_on Initially NULL; not assigned any special meaning - comments Initially NULL; not assigned any special meaning - - - Note that the \ ``fingerprint``\ column is the true primary key for a class of - queries. The \ ``checksum``\ is just a cryptographic hash of this value, which - provides a shorter value that is very likely to also be unique. - - After parsing and aggregating events, your table should contain a row for each - fingerprint. This option depends on \ ``--group-by fingerprint``\ (which is the - default). It will not work otherwise. - - - ---review-history - - type: DSN - - The table in which to store historical values for review trend analysis. - - Each time you review queries with "--review", pt-query-digest will save - information into this table so you can see how classes of queries have changed - over time. - - This DSN inherits unspecified values from "--review". It should mention a - table in which to store statistics about each class of queries. pt-query-digest - verifies the existence of the table, and your privileges to insert, delete and - update on that table. - - pt-query-digest then inspects the columns in the table. The table must have at - least the following columns: - - - .. code-block:: perl - - CREATE TABLE query_review_history ( - checksum BIGINT UNSIGNED NOT NULL, - sample TEXT NOT NULL - ); - - - Any columns not mentioned above are inspected to see if they follow a certain - naming convention. The column is special if the name ends with an underscore - followed by any of these MAGIC_history_cols values: - - - .. code-block:: perl - - pct|avt|cnt|sum|min|max|pct_95|stddev|median|rank - - - If the column ends with one of those values, then the prefix is interpreted as - the event attribute to store in that column, and the suffix is interpreted as - the metric to be stored. For example, a column named Query_time_min will be - used to store the minimum Query_time for the class of events. The presence of - this column will also add Query_time to the "--select" list. - - The table should also have a primary key, but that is up to you, depending on - how you want to store the historical data. We suggest adding ts_min and ts_max - columns and making them part of the primary key along with the checksum. But - you could also just add a ts_min column and make it a DATE type, so you'd get - one row per class of queries per day. - - The default table structure follows. The following MAGIC_create_review_history - table definition is used for "--create-review-history-table": - - - .. code-block:: perl - - CREATE TABLE query_review_history ( - checksum BIGINT UNSIGNED NOT NULL, - sample TEXT NOT NULL, - ts_min DATETIME, - ts_max DATETIME, - ts_cnt FLOAT, - Query_time_sum FLOAT, - Query_time_min FLOAT, - Query_time_max FLOAT, - Query_time_pct_95 FLOAT, - Query_time_stddev FLOAT, - Query_time_median FLOAT, - Lock_time_sum FLOAT, - Lock_time_min FLOAT, - Lock_time_max FLOAT, - Lock_time_pct_95 FLOAT, - Lock_time_stddev FLOAT, - Lock_time_median FLOAT, - Rows_sent_sum FLOAT, - Rows_sent_min FLOAT, - Rows_sent_max FLOAT, - Rows_sent_pct_95 FLOAT, - Rows_sent_stddev FLOAT, - Rows_sent_median FLOAT, - Rows_examined_sum FLOAT, - Rows_examined_min FLOAT, - Rows_examined_max FLOAT, - Rows_examined_pct_95 FLOAT, - Rows_examined_stddev FLOAT, - Rows_examined_median FLOAT, - -- Percona extended slowlog attributes - -- http://www.percona.com/docs/wiki/patches:slow_extended - Rows_affected_sum FLOAT, - Rows_affected_min FLOAT, - Rows_affected_max FLOAT, - Rows_affected_pct_95 FLOAT, - Rows_affected_stddev FLOAT, - Rows_affected_median FLOAT, - Rows_read_sum FLOAT, - Rows_read_min FLOAT, - Rows_read_max FLOAT, - Rows_read_pct_95 FLOAT, - Rows_read_stddev FLOAT, - Rows_read_median FLOAT, - Merge_passes_sum FLOAT, - Merge_passes_min FLOAT, - Merge_passes_max FLOAT, - Merge_passes_pct_95 FLOAT, - Merge_passes_stddev FLOAT, - Merge_passes_median FLOAT, - InnoDB_IO_r_ops_min FLOAT, - InnoDB_IO_r_ops_max FLOAT, - InnoDB_IO_r_ops_pct_95 FLOAT, - InnoDB_IO_r_ops_stddev FLOAT, - InnoDB_IO_r_ops_median FLOAT, - InnoDB_IO_r_bytes_min FLOAT, - InnoDB_IO_r_bytes_max FLOAT, - InnoDB_IO_r_bytes_pct_95 FLOAT, - InnoDB_IO_r_bytes_stddev FLOAT, - InnoDB_IO_r_bytes_median FLOAT, - InnoDB_IO_r_wait_min FLOAT, - InnoDB_IO_r_wait_max FLOAT, - InnoDB_IO_r_wait_pct_95 FLOAT, - InnoDB_IO_r_wait_stddev FLOAT, - InnoDB_IO_r_wait_median FLOAT, - InnoDB_rec_lock_wait_min FLOAT, - InnoDB_rec_lock_wait_max FLOAT, - InnoDB_rec_lock_wait_pct_95 FLOAT, - InnoDB_rec_lock_wait_stddev FLOAT, - InnoDB_rec_lock_wait_median FLOAT, - InnoDB_queue_wait_min FLOAT, - InnoDB_queue_wait_max FLOAT, - InnoDB_queue_wait_pct_95 FLOAT, - InnoDB_queue_wait_stddev FLOAT, - InnoDB_queue_wait_median FLOAT, - InnoDB_pages_distinct_min FLOAT, - InnoDB_pages_distinct_max FLOAT, - InnoDB_pages_distinct_pct_95 FLOAT, - InnoDB_pages_distinct_stddev FLOAT, - InnoDB_pages_distinct_median FLOAT, - -- Boolean (Yes/No) attributes. Only the cnt and sum are needed for these. - -- cnt is how many times is attribute was recorded and sum is how many of - -- those times the value was Yes. Therefore sum/cnt * 100 = % of recorded - -- times that the value was Yes. - QC_Hit_cnt FLOAT, - QC_Hit_sum FLOAT, - Full_scan_cnt FLOAT, - Full_scan_sum FLOAT, - Full_join_cnt FLOAT, - Full_join_sum FLOAT, - Tmp_table_cnt FLOAT, - Tmp_table_sum FLOAT, - Disk_tmp_table_cnt FLOAT, - Disk_tmp_table_sum FLOAT, - Filesort_cnt FLOAT, - Filesort_sum FLOAT, - Disk_filesort_cnt FLOAT, - Disk_filesort_sum FLOAT, - PRIMARY KEY(checksum, ts_min, ts_max) - ); - - - Note that we store the count (cnt) for the ts attribute only; it will be - redundant to store this for other attributes. - - - ---run-time - - type: time - - How long to run for each "--iterations". The default is to run forever - (you can interrupt with CTRL-C). Because "--iterations" defaults to 1, - if you only specify "--run-time", pt-query-digest runs for that amount of - time and then exits. The two options are specified together to do - collect-and-report cycles. For example, specifying "--iterations" \ ``4``\ - "--run-time" \ ``15m``\ with a continuous input (like STDIN or - "--processlist") will cause pt-query-digest to run for 1 hour - (15 minutes x 4), reporting four times, once at each 15 minute interval. - - - ---run-time-mode - - type: string; default: clock - - Set what the value of "--run-time" operates on. Following are the possible - values for this option: - - - clock - - "--run-time" specifies an amount of real clock time during which the tool - should run for each "--iterations". - - - - event - - "--run-time" specifies an amount of log time. Log time is determined by - timestamps in the log. The first timestamp seen is remembered, and each - timestamp after that is compared to the first to determine how much log time - has passed. For example, if the first timestamp seen is \ ``12:00:00``\ and the - next is \ ``12:01:30``\ , that is 1 minute and 30 seconds of log time. The tool - will read events until the log time is greater than or equal to the specified - "--run-time" value. - - Since timestamps in logs are not always printed, or not always printed - frequently, this mode varies in accuracy. - - - - interval - - "--run-time" specifies interval boundaries of log time into which events - are divided and reports are generated. This mode is different from the - others because it doesn't specify how long to run. The value of - "--run-time" must be an interval that divides evenly into minutes, hours - or days. For example, \ ``5m``\ divides evenly into hours (60/5=12, so 12 - 5 minutes intervals per hour) but \ ``7m``\ does not (60/7=8.6). - - Specifying \ ``--run-time-mode interval --run-time 30m --iterations 0``\ is - similar to specifying \ ``--run-time-mode clock --run-time 30m --iterations 0``\ . - In the latter case, pt-query-digest will run forever, producing reports every - 30 minutes, but this only works effectively with continuous inputs like - STDIN and the processlist. For fixed inputs, like log files, the former - example produces multiple reports by dividing the log into 30 minutes - intervals based on timestamps. - - Intervals are calculated from the zeroth second/minute/hour in which a - timestamp occurs, not from whatever time it specifies. For example, - with 30 minute intervals and a timestamp of \ ``12:10:30``\ , the interval - is \ *not*\ \ ``12:10:30``\ to \ ``12:40:30``\ , it is \ ``12:00:00``\ to \ ``12:29:59``\ . - Or, with 1 hour intervals, it is \ ``12:00:00``\ to \ ``12:59:59``\ . - When a new timestamp exceeds the interval, a report is printed, and the - next interval is recalculated based on the new timestamp. - - Since "--iterations" is 1 by default, you probably want to specify - a new value else pt-query-digest will only get and report on the first - interval from the log since 1 interval = 1 iteration. If you want to - get and report every interval in a log, specify "--iterations" \ ``0``\ . - - - - - ---sample - - type: int - - Filter out all but the first N occurrences of each query. The queries are - filtered on the first value in "--group-by", so by default, this will filter - by query fingerprint. For example, \ ``--sample 2``\ will permit two sample queries - for each fingerprint. Useful in conjunction with "--print" to print out the - queries. You probably want to set \ ``--no-report``\ to avoid the overhead of - aggregating and reporting if you're just using this to print out samples of - queries. A complete example: - - - .. code-block:: perl - - pt-query-digest --sample 2 --no-report --print slow.log - - - - ---select - - type: Array - - Compute aggregate statistics for these attributes. - - By default pt-query-digest auto-detects, aggregates and prints metrics for - every query attribute that it finds in the slow query log. This option - specifies a list of only the attributes that you want. You can specify an - alternative attribute with a colon. For example, \ ``db:Schema``\ uses db if it's - available, and Schema if it's not. - - Previously, pt-query-digest only aggregated these attributes: - - - .. code-block:: perl - - Query_time,Lock_time,Rows_sent,Rows_examined,user,db:Schema,ts - - - Attributes specified in the "--review-history" table will always be selected - even if you do not specify "--select". - - See also "--ignore-attributes" and "ATTRIBUTES". - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---shorten - - type: int; default: 1024 - - Shorten long statements in reports. - - Shortens long statements, replacing the omitted portion with a \ ``/\*... omitted - ...\*/``\ comment. This applies only to the output in reports, not to information - stored for "--review" or other places. It prevents a large statement from - causing difficulty in a report. The argument is the preferred length of the - shortened statement. Not all statements can be shortened, but very large INSERT - and similar statements often can; and so can IN() lists, although only the first - such list in the statement will be shortened. - - If it shortens something beyond recognition, you can find the original statement - in the log, at the offset shown in the report header (see "OUTPUT"). - - - ---show-all - - type: Hash - - Show all values for these attributes. - - By default pt-query-digest only shows as many of an attribute's value that - fit on a single line. This option allows you to specify attributes for which - all values will be shown (line width is ignored). This only works for - attributes with string values like user, host, db, etc. Multiple attributes - can be specified, comma-separated. - - - ---since - - type: string - - Parse only queries newer than this value (parse queries since this date). - - This option allows you to ignore queries older than a certain value and parse - only those queries which are more recent than the value. The value can be - several types: - - - .. code-block:: perl - - * Simple time value N with optional suffix: N[shmd], where - s=seconds, h=hours, m=minutes, d=days (default s if no suffix - given); this is like saying "since N[shmd] ago" - * Full date with optional hours:minutes:seconds: - YYYY-MM-DD [HH:MM::SS] - * Short, MySQL-style date: - YYMMDD [HH:MM:SS] - * Any time expression evaluated by MySQL: - CURRENT_DATE - INTERVAL 7 DAY - - - If you give a MySQL time expression, then you must also specify a DSN - so that pt-query-digest can connect to MySQL to evaluate the expression. If you - specify "--execute", "--explain", "--processlist", "--review" - or "--review-history", then one of these DSNs will be used automatically. - Otherwise, you must specify an "--aux-dsn" or pt-query-digest will die - saying that the value is invalid. - - The MySQL time expression is wrapped inside a query like - "SELECT UNIX_TIMESTAMP()", so be sure that the expression is - valid inside this query. For example, do not use UNIX_TIMESTAMP() because - UNIX_TIMESTAMP(UNIX_TIMESTAMP()) returns 0. - - Events are assumed to be in chronological--older events at the beginning of - the log and newer events at the end of the log. "--since" is strict: it - ignores all queries until one is found that is new enough. Therefore, if - the query events are not consistently timestamped, some may be ignored which - are actually new enough. - - See also "--until". - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---statistics - - Print statistics about internal counters. This option is mostly for - development and debugging. The statistics report is printed for each - iteration after all other reports, even if no events are processed or - \ ``--no-report``\ is specified. The statistics report looks like: - - - .. code-block:: perl - - # No events processed. - - # Statistic Count %/Events - # ================================================ ====== ======== - # events_read 142030 100.00 - # events_parsed 50430 35.51 - # events_aggregated 0 0.00 - # ignored_midstream_server_response 18111 12.75 - # no_tcp_data 91600 64.49 - # pipeline_restarted_after_MemcachedProtocolParser 142030 100.00 - # pipeline_restarted_after_TcpdumpParser 1 0.00 - # unknown_client_command 1 0.00 - # unknown_client_data 32318 22.75 - - - The first column is the internal counter name; the second column is counter's - count; and the third column is the count as a percentage of \ ``events_read``\ . - - In this case, it shows why no events were processed/aggregated: 100% of events - were rejected by the \ ``MemcachedProtocolParser``\ . Of those, 35.51% were data - packets, but of these 12.75% of ignored mid-stream server response, one was - an unknown client command, and 22.75% were unknown client data. The other - 64.49% were TCP control packets (probably most ACKs). - - Since pt-query-digest is complex, you will probably need someone familiar - with its code to decipher the statistics report. - - - ---table-access - - Print a table access report. - - The table access report shows which tables are accessed by all the queries - and if the access is a read or write. The report looks like: - - - .. code-block:: perl - - write `baz`.`tbl` - read `baz`.`new_tbl` - write `baz`.`tbl3` - write `db6`.`tbl6` - - - If you pipe the output to sort, the read and write tables will be grouped - together and sorted alphabetically: - - - .. code-block:: perl - - read `baz`.`new_tbl` - write `baz`.`tbl` - write `baz`.`tbl3` - write `db6`.`tbl6` - - - - ---tcpdump-errors - - type: string - - Write the tcpdump data to this file on error. If pt-query-digest doesn't - parse the stream correctly for some reason, the session's packets since the - last query event will be written out to create a usable test case. If this - happens, pt-query-digest will not raise an error; it will just discard the - session's saved state and permit the tool to continue working. See "tcpdump" - for more information about parsing tcpdump output. - - - ---timeline - - Show a timeline of events. - - This option makes pt-query-digest print another kind of report: a timeline of - the events. Each query is still grouped and aggregate into classes according to - "--group-by", but then they are printed in chronological order. The timeline - report prints out the timestamp, interval, count and value of each classes. - - If all you want is the timeline report, then specify \ ``--no-report``\ to - suppress the default query analysis report. Otherwise, the timeline report - will be printed at the end before the response-time profile - (see "--report-format" and "OUTPUT"). - - For example, this: - - - .. code-block:: perl - - pt-query-digest /path/to/log --group-by distill --timeline - - - will print something like: - - - .. code-block:: perl - - # ######################################################## - # distill report - # ######################################################## - # 2009-07-25 11:19:27 1+00:00:01 2 SELECT foo - # 2009-07-27 11:19:30 00:01 2 SELECT bar - # 2009-07-27 11:30:00 1+06:30:00 2 SELECT foo - - - - ---type - - type: Array - - The type of input to parse (default slowlog). The permitted types are - - - binlog - - Parse a binary log file. - - - - genlog - - Parse a MySQL general log file. General logs lack a lot of "ATTRIBUTES", - notably \ ``Query_time``\ . The default "--order-by" for general logs - changes to \ ``Query_time:cnt``\ . - - - - http - - Parse HTTP traffic from tcpdump. - - - - pglog - - Parse a log file in PostgreSQL format. The parser will automatically recognize - logs sent to syslog and transparently parse the syslog format, too. The - recommended configuration for logging in your postgresql.conf is as follows. - - The log_destination setting can be set to either syslog or stderr. Syslog has - the added benefit of not interleaving log messages from several sessions - concurrently, which the parser cannot handle, so this might be better than - stderr. CSV-formatted logs are not supported at this time. - - The log_min_duration_statement setting should be set to 0 to capture all - statements with their durations. Alternatively, the parser will also recognize - and handle various combinations of log_duration and log_statement. - - You may enable log_connections and log_disconnections, but this is optional. - - It is highly recommended to set your log_line_prefix to the following: - - - .. code-block:: perl - - log_line_prefix = '%m c=%c,u=%u,D=%d ' - - - This lets the parser find timestamps with milliseconds, session IDs, users, and - databases from the log. If these items are missing, you'll simply get less - information to analyze. For compatibility with other log analysis tools such as - PQA and pgfouine, various log line prefix formats are supported. The general - format is as follows: a timestamp can be detected and extracted (the syslog - timestamp is NOT parsed), and a name=value list of properties can also. - Although the suggested format is as shown above, any name=value list will be - captured and interpreted by using the first letter of the 'name' part, - lowercased, to determine the meaning of the item. The lowercased first letter - is interpreted to mean the same thing as PostgreSQL's built-in %-codes for the - log_line_prefix format string. For example, u means user, so unicorn=fred - will be interpreted as user=fred; d means database, so D=john will be - interpreted as database=john. The pgfouine-suggested formatting is user=%u and - db=%d, so it should Just Work regardless of which format you choose. The main - thing is to add as much information as possible into the log_line_prefix to - permit richer analysis. - - Currently, only English locale messages are supported, so if your server's - locale is set to something else, the log won't be parsed properly. (Log - messages with "duration:" and "statement:" won't be recognized.) - - - - slowlog - - Parse a log file in any variation of MySQL slow-log format. - - - - tcpdump - - Inspect network packets and decode the MySQL client protocol, extracting queries - and responses from it. - - pt-query-digest does not actually watch the network (i.e. it does NOT "sniff - packets"). Instead, it's just parsing the output of tcpdump. You are - responsible for generating this output; pt-query-digest does not do it for you. - Then you send this to pt-query-digest as you would any log file: as files on the - command line or to STDIN. - - The parser expects the input to be formatted with the following options: \ ``-x -n - -q -tttt``\ . For example, if you want to capture output from your local machine, - you can do something like the following (the port must come last on FreeBSD): - - - .. code-block:: perl - - tcpdump -s 65535 -x -nn -q -tttt -i any -c 1000 port 3306 \ - > mysql.tcp.txt - pt-query-digest --type tcpdump mysql.tcp.txt - - - The other tcpdump parameters, such as -s, -c, and -i, are up to you. Just make - sure the output looks like this (there is a line break in the first line to - avoid man-page problems): - - - .. code-block:: perl - - 2009-04-12 09:50:16.804849 IP 127.0.0.1.42167 - > 127.0.0.1.3306: tcp 37 - 0x0000: 4508 0059 6eb2 4000 4006 cde2 7f00 0001 - 0x0010: .... - - - Remember tcpdump has a handy -c option to stop after it captures some number of - packets! That's very useful for testing your tcpdump command. Note that - tcpdump can't capture traffic on a Unix socket. Read - `http://bugs.mysql.com/bug.php?id=31577 `_ if you're confused about this. - - Devananda Van Der Veen explained on the MySQL Performance Blog how to capture - traffic without dropping packets on busy servers. Dropped packets cause - pt-query-digest to miss the response to a request, then see the response to a - later request and assign the wrong execution time to the query. You can change - the filter to something like the following to help capture a subset of the - queries. (See `http://www.mysqlperformanceblog.com/?p=6092 `_ for details.) - - - .. code-block:: perl - - tcpdump -i any -s 65535 -x -n -q -tttt \ - 'port 3306 and tcp[1] & 7 == 2 and tcp[3] & 7 == 2' - - - All MySQL servers running on port 3306 are automatically detected in the - tcpdump output. Therefore, if the tcpdump out contains packets from - multiple servers on port 3306 (for example, 10.0.0.1:3306, 10.0.0.2:3306, - etc.), all packets/queries from all these servers will be analyzed - together as if they were one server. - - If you're analyzing traffic for a MySQL server that is not running on port - 3306, see "--watch-server". - - Also note that pt-query-digest may fail to report the database for queries - when parsing tcpdump output. The database is discovered only in the initial - connect events for a new client or when is executed. If the tcpdump - output contains neither of these, then pt-query-digest cannot discover the - database. - - Server-side prepared statements are supported. SSL-encrypted traffic cannot be - inspected and decoded. - - - - memcached - - Similar to tcpdump, but the expected input is memcached packets - instead of MySQL packets. For example: - - - .. code-block:: perl - - tcpdump -i any port 11211 -s 65535 -x -nn -q -tttt \ - > memcached.tcp.txt - pt-query-digest --type memcached memcached.tcp.txt - - - memcached uses port 11211 by default. - - - - - ---until - - type: string - - Parse only queries older than this value (parse queries until this date). - - This option allows you to ignore queries newer than a certain value and parse - only those queries which are older than the value. The value can be one of - the same types listed for "--since". - - Unlike "--since", "--until" is not strict: all queries are parsed until - one has a timestamp that is equal to or greater than "--until". Then - all subsequent queries are ignored. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---variations - - type: Array - - Report the number of variations in these attributes' values. - - Variations show how many distinct values an attribute had within a class. - The usual value for this option is \ ``arg``\ which shows how many distinct queries - were in the class. This can be useful to determine a query's cacheability. - - Distinct values are determined by CRC32 checksums of the attributes' values. - These checksums are reported in the query report for attributes specified by - this option, like: - - - .. code-block:: perl - - # arg crc 109 (1/25%), 144 (1/25%)... 2 more - - - In that class there were 4 distinct queries. The checksums of the first two - variations are shown, and each one occurred once (or, 25% of the time). - - The counts of distinct variations is approximate because only 1,000 variations - are saved. The mod (%) 1000 of the full CRC32 checksum is saved, so some - distinct checksums are treated as equal. - - - ---version - - Show version and exit. - - - ---watch-server - - type: string - - This option tells pt-query-digest which server IP address and port (like - "10.0.0.1:3306") to watch when parsing tcpdump (for "--type" tcpdump and - memcached); all other servers are ignored. If you don't specify it, - pt-query-digest watches all servers by looking for any IP address using port - 3306 or "mysql". If you're watching a server with a non-standard port, this - won't work, so you must specify the IP address and port to watch. - - If you want to watch a mix of servers, some running on standard port 3306 - and some running on non-standard ports, you need to create separate - tcpdump outputs for the non-standard port servers and then specify this - option for each. At present pt-query-digest cannot auto-detect servers on - port 3306 and also be told to watch a server on a non-standard port. - - - ---[no]zero-admin - - default: yes - - Zero out the Rows_XXX properties for administrator command events. - - - ---[no]zero-bool - - default: yes - - Print 0% boolean values in report. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Database that contains the query review table. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - Table to use as the query review table. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-query-digest ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-query-digest `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2008-2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-query-digest 1.0.1 - diff --git a/docs/user/pt-show-grants.rst b/docs/user/pt-show-grants.rst deleted file mode 100644 index 23e43cba..00000000 --- a/docs/user/pt-show-grants.rst +++ /dev/null @@ -1,534 +0,0 @@ - -############## -pt-show-grants -############## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-show-grants - Canonicalize and print MySQL grants so you can effectively replicate, compare and version-control them. - - -******** -SYNOPSIS -******** - - -Usage: pt-show-grants [OPTION...] [DSN] - -pt-show-grants shows grants (user privileges) from a MySQL server. - -Examples: - - -.. code-block:: perl - - pt-show-grants - - pt-show-grants --separate --revoke | diff othergrants.sql - - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-show-grants is read-only by default, and very low-risk. If you specify -"--flush", it will execute \ ``FLUSH PRIVILEGES``\ . - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-show-grants `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-show-grants extracts, orders, and then prints grants for MySQL user -accounts. - -Why would you want this? There are several reasons. - -The first is to easily replicate users from one server to another; you can -simply extract the grants from the first server and pipe the output directly -into another server. - -The second use is to place your grants into version control. If you do a daily -automated grant dump into version control, you'll get lots of spurious -changesets for grants that don't change, because MySQL prints the actual grants -out in a seemingly random order. For instance, one day it'll say - - -.. code-block:: perl - - GRANT DELETE, INSERT, UPDATE ON `test`.* TO 'foo'@'%'; - - -And then another day it'll say - - -.. code-block:: perl - - GRANT INSERT, DELETE, UPDATE ON `test`.* TO 'foo'@'%'; - - -The grants haven't changed, but the order has. This script sorts the grants -within the line, between 'GRANT' and 'ON'. If there are multiple rows from SHOW -GRANTS, it sorts the rows too, except that it always prints the row with the -user's password first, if it exists. This removes three kinds of inconsistency -you'll get from running SHOW GRANTS, and avoids spurious changesets in version -control. - -Third, if you want to diff grants across servers, it will be hard without -"canonicalizing" them, which pt-show-grants does. The output is fully -diff-able. - -With the "--revoke", "--separate" and other options, pt-show-grants -also makes it easy to revoke specific privileges from users. This is tedious -otherwise. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---database - - short form: -D; type: string - - The database to use for the connection. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---drop - - Add DROP USER before each user in the output. - - - ---flush - - Add FLUSH PRIVILEGES after output. - - You might need this on pre-4.1.1 servers if you want to drop a user completely. - - - ---[no]header - - default: yes - - Print dump header. - - The header precedes the dumped grants. It looks like: - - - .. code-block:: perl - - -- Grants dumped by pt-show-grants 1.0.19 - -- Dumped from server Localhost via UNIX socket, MySQL 5.0.82-log at 2009-10-26 10:01:04 - - - See also "--[no]timestamp". - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore - - type: array - - Ignore this comma-separated list of users. - - - ---only - - type: array - - Only show grants for this comma-separated list of users. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---revoke - - Add REVOKE statements for each GRANT statement. - - - ---separate - - List each GRANT or REVOKE separately. - - The default output from MySQL's SHOW GRANTS command lists many privileges on a - single line. With "--flush", places a FLUSH PRIVILEGES after each user, - instead of once at the end of all the output. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---[no]timestamp - - default: yes - - Add timestamp to the dump header. - - See also "--[no]header". - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-show-grants ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-show-grants `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-show-grants 1.0.1 - diff --git a/docs/user/pt-sift.rst b/docs/user/pt-sift.rst deleted file mode 100644 index cc9bdf32..00000000 --- a/docs/user/pt-sift.rst +++ /dev/null @@ -1,273 +0,0 @@ - -####### -pt-sift -####### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-sift - Browses files created by pt-collect. - - -******** -SYNOPSIS -******** - - -Usage: pt-sift FILE|PREFIX|DIRECTORY - -pt-sift browses the files created by pt-collect. If you specify a -FILE or PREFIX, it browses only files with that prefix. If you specify a -DIRECTORY, then it browses all files within that directory. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-sift is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-sift `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-sift downloads other tools that it might need, such as pt-diskstats, -and then makes a list of the unique timestamp prefixes of all the files in -the directory, as written by the pt-collect tool. If the user specified -a timestamp on the command line, then it begins with that sample of data; -otherwise it begins by showing a list of the timestamps and prompting for -a selection. Thereafter, it displays a summary of the selected sample, and -the user can navigate and inspect with keystrokes. The keystroke commands -you can use are as follows: - - -d - - Sets the action to start the pt-diskstats tool on the sample's disk - performance statistics. - - - -i - - Sets the action to view the first INNODB STATUS sample in less. - - - -m - - Displays the first 4 samples of SHOW STATUS counters side by side with the - pt-mext tool. - - - -n - - Summarizes the first sample of netstat data in two ways: by originating host, - and by connection state. - - - -j - - Select the next timestamp as the active sample. - - - -k - - Select the previous timestamp as the active sample. - - - -q - - Quit the program. - - - -1 - - Sets the action for each sample to the default, which is to view a summary - of the sample. - - - -0 - - Sets the action to just list the files in the sample. - - - -\* - - Sets the action to view all of the samples's files in the less program. - - - - -******* -OPTIONS -******* - - -This tool does not have any command-line options. - - -*********** -ENVIRONMENT -*********** - - -This tool does not use any environment variables. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires Bash v3 and the following programs: pt-diskstats, pt-pmp, -pt-mext, and align (from Aspersa). If these programs are not in your PATH, -they will be fetched from the Internet if curl is available. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-sift `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-sift 1.0.1 - diff --git a/docs/user/pt-slave-delay.rst b/docs/user/pt-slave-delay.rst deleted file mode 100644 index d272fbb0..00000000 --- a/docs/user/pt-slave-delay.rst +++ /dev/null @@ -1,532 +0,0 @@ - -############## -pt-slave-delay -############## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-slave-delay - Make a MySQL slave server lag behind its master. - - -******** -SYNOPSIS -******** - - -Usage: pt-slave-delay [OPTION...] SLAVE-HOST [MASTER-HOST] - -pt-slave-delay starts and stops a slave server as needed to make it lag -behind the master. The SLAVE-HOST and MASTER-HOST use DSN syntax, and -values are copied from the SLAVE-HOST to the MASTER-HOST if omitted. - -To hold slavehost one minute behind its master for ten minutes: - - -.. code-block:: perl - - pt-slave-delay --delay 1m --interval 15s --run-time 10m slavehost - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-slave-delay is generally very low-risk. It simply starts and stops the -replication SQL thread. This might cause monitoring systems to think the slave -is having trouble. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-slave-delay `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -\ ``pt-slave-delay``\ watches a slave and starts and stops its replication SQL -thread as necessary to hold it at least as far behind the master as you -request. In practice, it will typically cause the slave to lag between -"--delay" and "--delay"+"--interval" behind the master. - -It bases the delay on binlog positions in the slave's relay logs by default, -so there is no need to connect to the master. This works well if the IO -thread doesn't lag the master much, which is typical in most replication -setups; the IO thread lag is usually milliseconds on a fast network. If your -IO thread's lag is too large for your purposes, \ ``pt-slave-delay``\ can also -connect to the master for information about binlog positions. - -If the slave's I/O thread reports that it is waiting for the SQL thread to -free some relay log space, \ ``pt-slave-delay``\ will automatically connect to the -master to find binary log positions. If "--ask-pass" and "--daemonize" -are given, it is possible that this could cause it to ask for a password while -daemonized. In this case, it exits. Therefore, if you think your slave might -encounter this condition, you should be sure to either specify -"--use-master" explicitly when daemonizing, or don't specify "--ask-pass". - -The SLAVE-HOST and optional MASTER-HOST are both DSNs. See "DSN OPTIONS". -Missing MASTER-HOST values are filled in with values from SLAVE-HOST, so you -don't need to specify them in both places. \ ``pt-slave-delay``\ reads all normal -MySQL option files, such as ~/.my.cnf, so you may not need to specify username, -password and other common options at all. - -\ ``pt-slave-delay``\ tries to exit gracefully by trapping signals such as Ctrl-C. -You cannot bypass "--[no]continue" with a trappable signal. - - -********** -PRIVILEGES -********** - - -pt-slave-delay requires the following privileges: PROCESS, REPLICATION CLIENT, -and SUPER. - - -****** -OUTPUT -****** - - -If you specify "--quiet", there is no output. Otherwise, the normal output -is a status message consisting of a timestamp and information about what -\ ``pt-slave-delay``\ is doing: starting the slave, stopping the slave, or just -observing. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---[no]continue - - default: yes - - Continue replication normally on exit. After exiting, restart the slave's SQL - thread with no UNTIL condition, so it will run as usual and catch up to the - master. This is enabled by default and works even if you terminate - \ ``pt-slave-delay``\ with Control-C. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---delay - - type: time; default: 1h - - How far the slave should lag its master. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---interval - - type: time; default: 1m - - How frequently \ ``pt-slave-delay``\ should check whether the slave needs to be - started or stopped. - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---quiet - - short form: -q - - Don't print informational messages about operation. See OUTPUT for details. - - - ---run-time - - type: time - - How long \ ``pt-slave-delay``\ should run before exiting. The default is to run - forever. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---use-master - - Get binlog positions from master, not slave. Don't trust the binlog positions - in the slave's relay log. Connect to the master and get binlog positions - instead. If you specify this option without giving a MASTER-HOST on the command - line, \ ``pt-slave-delay``\ examines the slave's SHOW SLAVE STATUS to determine the - hostname and port for connecting to the master. - - \ ``pt-slave-delay``\ uses only the MASTER_HOST and MASTER_PORT values from SHOW - SLAVE STATUS for the master connection. It does not use the MASTER_USER - value. If you want to specify a different username for the master than the - one you use to connect to the slave, you should specify the MASTER-HOST option - explicitly on the command line. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-slave-delay ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-slave-delay `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Sergey Zhuravlev and Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Sergey Zhuravle and Baron Schwartz, -2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-slave-delay 1.0.1 - diff --git a/docs/user/pt-slave-find.rst b/docs/user/pt-slave-find.rst deleted file mode 100644 index c8e53906..00000000 --- a/docs/user/pt-slave-find.rst +++ /dev/null @@ -1,543 +0,0 @@ - -############# -pt-slave-find -############# - -.. highlight:: perl - - -**** -NAME -**** - - -pt-slave-find - Find and print replication hierarchy tree of MySQL slaves. - - -******** -SYNOPSIS -******** - - -Usage: pt-slave-find [OPTION...] MASTER-HOST - -pt-slave-find finds and prints a hierarchy tree of MySQL slaves. - -Examples: - - -.. code-block:: perl - - pt-slave-find --host master-host - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-slave-find is read-only and very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-slave-find `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-slave-find connects to a MySQL replication master and finds its slaves. -Currently the only thing it can do is print a tree-like view of the replication -hierarchy. - -The master host can be specified using one of two methods. The first method is -to use the standard connection-related command line options: -"--defaults-file", "--password", "--host", "--port", "--socket" -or "--user". - -The second method to specify the master host is a DSN. A DSN is a special -syntax that can be either just a hostname (like \ ``server.domain.com``\ or -\ ``1.2.3.4``\ ), or a \ ``key=value,key=value``\ string. Keys are a single letter: - - -.. code-block:: perl - - KEY MEANING - === ======= - h Connect to host - P Port number to use for connection - S Socket file to use for connection - u User for login if not current user - p Password to use when connecting - F Only read default options from the given file - - -\ ``pt-slave-find``\ reads all normal MySQL option files, such as ~/.my.cnf, so -you may not need to specify username, password and other common options at all. - - -*********** -EXIT STATUS -*********** - - -An exit status of 0 (sometimes also called a return value or return code) -indicates success. Any other value represents the exit status of -the Perl process itself. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---database - - type: string; short form: -D - - Database to use. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---recurse - - type: int - - Number of levels to recurse in the hierarchy. Default is infinite. - - See "--recursion-method". - - - ---recursion-method - - type: string - - Preferred recursion method used to find slaves. - - Possible methods are: - - - .. code-block:: perl - - METHOD USES - =========== ================ - processlist SHOW PROCESSLIST - hosts SHOW SLAVE HOSTS - - - The processlist method is preferred because SHOW SLAVE HOSTS is not reliable. - However, the hosts method is required if the server uses a non-standard - port (not 3306). Usually pt-slave-find does the right thing and finds - the slaves, but you may give a preferred method and it will be used first. - If it doesn't find any slaves, the other methods will be tried. - - - ---report-format - - type: string; default: summary - - Set what information about the slaves is printed. The report format can be - one of the following: - - - \* hostname - - Print just the hostname name of the slaves. It looks like: - - - .. code-block:: perl - - 127.0.0.1:12345 - +- 127.0.0.1:12346 - +- 127.0.0.1:12347 - - - - - \* summary - - Print a summary of each slave's settings. This report shows more information - about each slave, like: - - - .. code-block:: perl - - 127.0.0.1:12345 - Version 5.1.34-log - Server ID 12345 - Uptime 04:56 (started 2010-06-17T11:21:22) - Replication Is not a slave, has 1 slaves connected - Filters - Binary logging STATEMENT - Slave status - Slave mode STRICT - Auto-increment increment 1, offset 1 - +- 127.0.0.1:12346 - Version 5.1.34-log - Server ID 12346 - Uptime 04:54 (started 2010-06-17T11:21:24) - Replication Is a slave, has 1 slaves connected - Filters - Binary logging STATEMENT - Slave status 0 seconds behind, running, no errors - Slave mode STRICT - Auto-increment increment 1, offset 1 - - - - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-slave-find ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-slave-find `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-slave-find 1.0.1 - diff --git a/docs/user/pt-slave-restart.rst b/docs/user/pt-slave-restart.rst deleted file mode 100644 index b8c18884..00000000 --- a/docs/user/pt-slave-restart.rst +++ /dev/null @@ -1,755 +0,0 @@ - -################ -pt-slave-restart -################ - -.. highlight:: perl - - -**** -NAME -**** - - -pt-slave-restart - Watch and restart MySQL replication after errors. - - -******** -SYNOPSIS -******** - - -Usage: pt-slave-restart [OPTION...] [DSN] - -pt-slave-restart watches one or more MySQL replication slaves for -errors, and tries to restart replication if it stops. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-slave-restart is a brute-force way to try to keep a slave server running when -it is having problems with replication. Don't be too hasty to use it unless you -need to. If you use this tool carelessly, you might miss the chance to really -solve the slave server's problems. - -At the time of this release there is a bug that causes an invalid -\ ``CHANGE MASTER TO``\ statement to be executed. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-slave-restart `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-slave-restart watches one or more MySQL replication slaves and tries to skip -statements that cause errors. It polls slaves intelligently with an -exponentially varying sleep time. You can specify errors to skip and run the -slaves until a certain binlog position. - -Note: it has come to my attention that Yahoo! had or has an internal tool -called fix_repl, described to me by a past Yahoo! employee and mentioned in -the first edition of High Performance MySQL. Apparently this tool does the -same thing. Make no mistake, though: this is not a way to "fix replication." -In fact I would not even encourage its use on a regular basis; I use it only -when I have an error I know I just need to skip past. - - -****** -OUTPUT -****** - - -If you specify "--verbose", pt-slave-restart prints a line every time it sees -the slave has an error. See "--verbose" for details. - - -***** -SLEEP -***** - - -pt-slave-restart sleeps intelligently between polling the slave. The current -sleep time varies. - - -\* - - The initial sleep time is given by "--sleep". - - - -\* - - If it checks and finds an error, it halves the previous sleep time. - - - -\* - - If it finds no error, it doubles the previous sleep time. - - - -\* - - The sleep time is bounded below by "--min-sleep" and above by - "--max-sleep". - - - -\* - - Immediately after finding an error, pt-slave-restart assumes another error is - very likely to happen next, so it sleeps the current sleep time or the initial - sleep time, whichever is less. - - - - -*********** -EXIT STATUS -*********** - - -An exit status of 0 (sometimes also called a return value or return code) -indicates success. Any other value represents the exit status of the Perl -process itself, or of the last forked process that exited if there were multiple -servers to monitor. - - -************* -COMPATIBILITY -************* - - -pt-slave-restart should work on many versions of MySQL. Lettercase of many -output columns from SHOW SLAVE STATUS has changed over time, so it treats them -all as lowercase. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---always - - Start slaves even when there is no error. With this option enabled, - pt-slave-restart will not let you stop the slave manually if you want to! - - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---[no]check-relay-log - - default: yes - - Check the last relay log file and position before checking for slave errors. - - By default pt-slave-restart will not doing anything (it will just sleep) - if neither the relay log file nor the relay log position have changed since - the last check. This prevents infinite loops (i.e. restarting the same - error in the same relay log file at the same relay log position). - - For certain slave errors, however, this check needs to be disabled by - specifying \ ``--no-check-relay-log``\ . Do not do this unless you know what - you are doing! - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---database - - short form: -D; type: string - - Database to use. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---error-length - - type: int - - Max length of error message to print. When "--verbose" is set high enough to - print the error, this option will truncate the error text to the specified - length. This can be useful to prevent wrapping on the terminal. - - - ---error-numbers - - type: hash - - Only restart this comma-separated list of errors. Makes pt-slave-restart only - try to restart if the error number is in this comma-separated list of errors. - If it sees an error not in the list, it will exit. - - The error number is in the \ ``last_errno``\ column of \ ``SHOW SLAVE STATUS``\ . - - - ---error-text - - type: string - - Only restart errors that match this pattern. A Perl regular expression against - which the error text, if any, is matched. If the error text exists and matches, - pt-slave-restart will try to restart the slave. If it exists but doesn't match, - pt-slave-restart will exit. - - The error text is in the \ ``last_error``\ column of \ ``SHOW SLAVE STATUS``\ . - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---max-sleep - - type: float; default: 64 - - Maximum sleep seconds. - - The maximum time pt-slave-restart will sleep before polling the slave again. - This is also the time that pt-slave-restart will wait for all other running - instances to quit if both "--stop" and "--monitor" are specified. - - See "SLEEP". - - - ---min-sleep - - type: float; default: 0.015625 - - The minimum time pt-slave-restart will sleep before polling the slave again. - See "SLEEP". - - - ---monitor - - Whether to monitor the slave (default). Unless you specify --monitor - explicitly, "--stop" will disable it. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---quiet - - short form: -q - - Suppresses normal output (disables "--verbose"). - - - ---recurse - - type: int; default: 0 - - Watch slaves of the specified server, up to the specified number of servers deep - in the hierarchy. The default depth of 0 means "just watch the slave - specified." - - pt-slave-restart examines \ ``SHOW PROCESSLIST``\ and tries to determine which - connections are from slaves, then connect to them. See "--recursion-method". - - Recursion works by finding all slaves when the program starts, then watching - them. If there is more than one slave, \ ``pt-slave-restart``\ uses \ ``fork()``\ to - monitor them. - - This also works if you have configured your slaves to show up in \ ``SHOW SLAVE - HOSTS``\ . The minimal configuration for this is the \ ``report_host``\ parameter, but - there are other "report" parameters as well for the port, username, and - password. - - - ---recursion-method - - type: string - - Preferred recursion method used to find slaves. - - Possible methods are: - - - .. code-block:: perl - - METHOD USES - =========== ================ - processlist SHOW PROCESSLIST - hosts SHOW SLAVE HOSTS - - - The processlist method is preferred because SHOW SLAVE HOSTS is not reliable. - However, the hosts method is required if the server uses a non-standard - port (not 3306). Usually pt-slave-restart does the right thing and finds - the slaves, but you may give a preferred method and it will be used first. - If it doesn't find any slaves, the other methods will be tried. - - - ---run-time - - type: time - - Time to run before exiting. Causes pt-slave-restart to stop after the specified - time has elapsed. Optional suffix: s=seconds, m=minutes, h=hours, d=days; if no - suffix, s is used. - - - ---sentinel - - type: string; default: /tmp/pt-slave-restart-sentinel - - Exit if this file exists. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---skip-count - - type: int; default: 1 - - Number of statements to skip when restarting the slave. - - - ---sleep - - type: int; default: 1 - - Initial sleep seconds between checking the slave. - - See "SLEEP". - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---stop - - Stop running instances by creating the sentinel file. - - Causes \ ``pt-slave-restart``\ to create the sentinel file specified by - "--sentinel". This should have the effect of stopping all running - instances which are watching the same sentinel file. If "--monitor" isn't - specified, \ ``pt-slave-restart``\ will exit after creating the file. If it is - specified, \ ``pt-slave-restart``\ will wait the interval given by - "--max-sleep", then remove the file and continue working. - - You might find this handy to stop cron jobs gracefully if necessary, or to - replace one running instance with another. For example, if you want to stop - and restart \ ``pt-slave-restart``\ every hour (just to make sure that it is - restarted every hour, in case of a server crash or some other problem), you - could use a \ ``crontab``\ line like this: - - - .. code-block:: perl - - 0 * * * * pt-slave-restart --monitor --stop --sentinel /tmp/pt-slave-restartup - - - The non-default "--sentinel" will make sure the hourly \ ``cron``\ job stops - only instances previously started with the same options (that is, from the - same \ ``cron``\ job). - - See also "--sentinel". - - - ---until-master - - type: string - - Run until this master log file and position. Start the slave, and retry if it - fails, until it reaches the given replication coordinates. The coordinates are - the logfile and position on the master, given by relay_master_log_file, - exec_master_log_pos. The argument must be in the format "file,pos". Separate - the filename and position with a single comma and no space. - - This will also cause an UNTIL clause to be given to START SLAVE. - - After reaching this point, the slave should be stopped and pt-slave-restart - will exit. - - - ---until-relay - - type: string - - Run until this relay log file and position. Like "--until-master", but in - the slave's relay logs instead. The coordinates are given by relay_log_file, - relay_log_pos. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---verbose - - short form: -v; cumulative: yes; default: 1 - - Be verbose; can specify multiple times. Verbosity 1 outputs connection - information, a timestamp, relay_log_file, relay_log_pos, and last_errno. - Verbosity 2 adds last_error. See also "--error-length". Verbosity 3 prints - the current sleep time each time pt-slave-restart sleeps. - - - ---version - - - -Show version and exit. - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-slave-restart ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-slave-restart `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-slave-restart 1.0.1 - diff --git a/docs/user/pt-stalk.rst b/docs/user/pt-stalk.rst deleted file mode 100644 index 3d32b0cd..00000000 --- a/docs/user/pt-stalk.rst +++ /dev/null @@ -1,367 +0,0 @@ - -######## -pt-stalk -######## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-stalk - Wait for a condition to occur then begin collecting data. - - -******** -SYNOPSIS -******** - - -Usage: pt-stalk - -pt-stalk watches for a condition to become true, and when it does, executes -a script. By default it executes pt-collect, but that can be customized. -This tool is useful for gathering diagnostic data when an infrequent event -occurs, so an expert person can review the data later. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-stalk is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-stalk `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -Although pt-stalk comes pre-configured to do a specific thing, in general -this tool is just a skeleton script for the following flow of actions: - - -1. - - Loop infinitely, sleeping between iterations. - - - -2. - - In each iteration, run some command and get the output. - - - -3. - - If the command fails or the output is larger than the threshold, - execute the collection script; but do not execute if the destination disk - is too full. - - - -By default, the tool is configured to execute mysqladmin extended-status and -extract the value of the Threads_connected variable; if this is greater than -100, it runs the collection script. This is really just placeholder code, -and almost certainly needs to be customized! - -If the tool does execute the collection script, it will wait for a while -before checking and executing again. This is to prevent a continuous -condition from causing a huge number of executions to fire off. - -The name 'stalk' is because 'watch' is already taken, and 'stalk' is fun. - - -*********** -CONFIGURING -*********** - - -If the file \ *pt-stalk.conf*\ exists in the current working directory, then -"ENVIRONMENT" variables are imported from it. For example, the config -file has the format: - - -.. code-block:: perl - - INTERVAL=10 - GDB=yes - - -See "ENVIRONMENT". - - -******* -OPTIONS -******* - - -This tool does not have any command-line options, but see -"ENVIRONMENT" and "CONFIGURING". - - -*********** -ENVIRONMENT -*********** - - -The following environment variables configure how, what, and when the tool -runs. They are all optional and can be specified either on the command line -or in the \ *pt-stalk.conf*\ config file (see "CONFIGURING"). - - -THRESHOLD (default 100) - - This is the max number of we want to tolerate. - - - -VARIABLE (default Threads_connected} - - This is the thing to check for. - - - -CYCLES (default 1) - - How many times must the condition be met before the script will fire? - - - -GDB (default no) - - Collect GDB stacktraces? - - - -OPROFILE (default yes) - - Collect oprofile data? - - - -STRACE (default no) - - Collect strace data? - - - -TCPDUMP (default yes) - - Collect tcpdump data? - - - -EMAIL - - Send mail to this list of addresses when the script triggers. - - - -MYSQLOPTIONS - - Any options to pass to mysql/mysqladmin, such as -u, -p, etc - - - -INTERVAL (default 30) - - This is the interval between checks. - - - -MAYBE_EMPTY (default no) - - If the command you're running to detect the condition is allowed to return - nothing (e.g. a grep line that might not even exist if there's no problem), - then set this to "yes". - - - -COLLECT (default ${HOME}/bin/pt-collect) - - This is the location of the 'collect' script. - - - -DEST (default ${HOME}/collected/) - - This is where to store the collected data. - - - -DURATION (default 30) - - How long to collect statistics data for? Make sure that this isn't longer - than SLEEP. - - - -SLEEP (default DURATION \* 10) - - How long to sleep after collecting? - - - -PCT_THRESHOLD (default 95) - - Bail out if the disk is more than this %full. - - - -MB_THRESHOLD (default 100) - - Bail out if the disk has less than this many MB free. - - - -PURGE (default 30) - - Remove samples after this many days. - - - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires Bash v3 or newer. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-stalk `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz, Justin Swanhart, and Fernando Ipar - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-stalk 1.0.1 - diff --git a/docs/user/pt-summary.rst b/docs/user/pt-summary.rst deleted file mode 100644 index 1f432587..00000000 --- a/docs/user/pt-summary.rst +++ /dev/null @@ -1,230 +0,0 @@ - -########## -pt-summary -########## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-summary - Summarize system information in a nice way. - - -******** -SYNOPSIS -******** - - -Usage: pt-summary - -pt-summary conveniently summarizes the status and configuration of a server. -It is not a tuning tool or diagnosis tool. It produces a report that is easy -to diff and can be pasted into emails without losing the formatting. This -tool works well on Linux systems. - -Download and run: - - -.. code-block:: perl - - wget http://percona.com/get/pt-summary - bash ./pt-summary - - -Download and run in a single step: - - -.. code-block:: perl - - wget -O- http://percona.com/get/summary | bash - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-summary is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-summary `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-summary runs a large variety of commands to inspect system status and -configuration, saves the output into files in /tmp, and then runs Unix -commands on these results to format them nicely. It works best when -executed as a privileged user, but will also work without privileges, -although some output might not be possible to generate without root. - - -******* -OPTIONS -******* - - -This tool does not have any command-line options. - - -*********** -ENVIRONMENT -*********** - - -The PT_SUMMARY_SKIP environment variable specifies a comma-separated list -of things to skip: - - -.. code-block:: perl - - MOUNT: Don't print out mounted filesystems and disk fullness. - NETWORK: Don't print out information on network controllers & config. - PROCESS: Don't print out top processes and vmstat information. - - - -******************* -SYSTEM REQUIREMENTS -******************* - - -This tool requires the Bourne shell (\ */bin/sh*\ ). - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-summary `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Kevin van Zonneveld (http://kevin.vanzonneveld.net) - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-summary 1.0.1 - diff --git a/docs/user/pt-table-checksum.rst b/docs/user/pt-table-checksum.rst deleted file mode 100644 index 73ed4d5f..00000000 --- a/docs/user/pt-table-checksum.rst +++ /dev/null @@ -1,2036 +0,0 @@ - -################# -pt-table-checksum -################# - -.. highlight:: perl - - -**** -NAME -**** - - -pt-table-checksum - Perform an online replication consistency check, or checksum MySQL tables efficiently on one or many servers. - - -******** -SYNOPSIS -******** - - -Usage: pt-table-checksum [OPTION...] DSN [DSN...] - -pt-table-checksum checksums MySQL tables efficiently on one or more hosts. -Each host is specified as a DSN and missing values are inherited from the -first host. If you specify multiple hosts, the first is assumed to be the -master. - -\ **STOP!**\ Are you checksumming slaves against a master? Then be sure to learn -what "--replicate" does. It is probably the option you want to use. - -Checksum all slaves against the master: - - -.. code-block:: perl - - pt-table-checksum \ - h=master-host \ - --replicate mydb.checksums - - # Wait for first command to complete and replication to catchup - # on all slaves, then... - - pt-table-checksum \ - h=master-host \ - --replicat mydb.checksums \ - --replicate-check 2 - - -Checksum all databases and tables on two servers and print the differences: - - -.. code-block:: perl - - pt-table-checksum h=host1,u=user h=host2 | pt-checksum-filter - - -See "SPECIFYING HOSTS" for more on the syntax of the host arguments. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-table-checksum executes queries that cause the MySQL server to checksum its -data. This can cause significant server load. It is read-only unless you use -the "--replicate" option, in which case it inserts a small amount of data -into the specified table. - -At the time of this release, we know of no bugs that could cause serious harm to -users. There are miscellaneous bugs that might be annoying. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-table-checksum `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-table-checksum generates table checksums for MySQL tables, typically -useful for verifying your slaves are in sync with the master. The checksums -are generated by a query on the server, and there is very little network -traffic as a result. - -Checksums typically take about twice as long as COUNT(\*) on very large InnoDB -tables in my tests. For smaller tables, COUNT(\*) is a good bit faster than -the checksums. See "--algorithm" for more details on performance. - -If you specify more than one server, pt-table-checksum assumes the first -server is the master and others are slaves. Checksums are parallelized for -speed, forking off a child process for each table. Duplicate server names are -ignored, but if you want to checksum a server against itself you can use two -different forms of the hostname (for example, "localhost 127.0.0.1", or -"h=localhost,P=3306 h=localhost,P=3307"). - -If you want to compare the tables in one database to those in another database -on the same server, just checksum both databases: - - -.. code-block:: perl - - pt-table-checksum --databases db1,db2 - - -You can then use pt-checksum-filter to compare the results in both databases -easily. - -pt-table-checksum examines table structure only on the first host specified, -so if anything differs on the others, it won't notice. It ignores views. - -The checksums work on MySQL version 3.23.58 through 6.0-alpha. They will not -necessarily produce the same values on all versions. Differences in -formatting and/or space-padding between 4.1 and 5.0, for example, will cause -the checksums to be different. - - -**************** -SPECIFYING HOSTS -**************** - - -Each host is specified on the command line as a DSN. A DSN is a comma-separted -list of \ ``option=value``\ pairs. The most basic DSN is \ ``h=host``\ to specify -the hostname of the server and use default for everything else (port, etc.). -See "DSN OPTIONS" for more information. - -DSN options that are listed as \ ``copy: yes``\ are copied from the first DSN -to subsequent DSNs that do not specify the DSN option. For example, -\ ``h=host1,P=12345 h=host2``\ is equivalent to \ ``h=host1,P=12345 h=host2,P=12345``\ . -This allows you to avoid repeating DSN options that have the same value -for all DSNs. - -Connection-related command-line options like "--user" and "--password" -provide default DSN values for the corresponding DSN options indicated by -the short form of each option. For example, the short form of "--user" -is \ ``-u``\ which corresponds to the \ ``u``\ DSN option, so \ ``--user bob h=host``\ -is equivalent to \ ``h=host,u=bob``\ . These defaults apply to all DSNs that -do not specify the DSN option. - -The DSN option value precedence from higest to lowest is: - - -.. code-block:: perl - - * explicit values in each DSN on the command-line - * copied values from the first DSN - * default values from connection-related command-line options - - -If you are confused about how pt-table-checksum will connect to your servers, -use the "--explain-hosts" option and it will tell you. - - -*************** -HOW FAST IS IT? -*************** - - -Speed and efficiency are important, because the typical use case is checksumming -large amounts of data. - -\ ``pt-table-checksum``\ is designed to do very little work itself, and generates -very little network traffic aside from inspecting table structures with \ ``SHOW -CREATE TABLE``\ . The results of checksum queries are typically 40-character or -shorter strings. - -The MySQL server does the bulk of the work, in the form of the checksum queries. -The following benchmarks show the checksum query times for various checksum -algorithms. The first two results are simply running \ ``COUNT(col8)``\ and -\ ``CHECKSUM TABLE``\ on the table. \ ``CHECKSUM TABLE``\ is just \ ``CRC32``\ under the -hood, but it's implemented inside the storage engine layer instead of at the -MySQL layer. - - -.. code-block:: perl - - ALGORITHM HASH FUNCTION EXTRA TIME - ============== ============= ============== ===== - COUNT(col8) 2.3 - CHECKSUM TABLE 5.3 - BIT_XOR FNV_64 12.7 - ACCUM FNV_64 42.4 - BIT_XOR MD5 --optimize-xor 80.0 - ACCUM MD5 87.4 - BIT_XOR SHA1 --optimize-xor 90.1 - ACCUM SHA1 101.3 - BIT_XOR MD5 172.0 - BIT_XOR SHA1 197.3 - - -The tests are entirely CPU-bound. The sample data is an InnoDB table with the -following structure: - - -.. code-block:: perl - - CREATE TABLE test ( - col1 int NOT NULL, - col2 date NOT NULL, - col3 int NOT NULL, - col4 int NOT NULL, - col5 int, - col6 decimal(3,1), - col7 smallint unsigned NOT NULL, - col8 timestamp NOT NULL, - PRIMARY KEY (col2, col1), - KEY (col7), - KEY (col1) - ) ENGINE=InnoDB - - -The table has 4303585 rows, 365969408 bytes of data and 173457408 bytes of -indexes. The server is a Dell PowerEdge 1800 with dual 32-bit Xeon 2.8GHz -processors and 2GB of RAM. The tests are fully CPU-bound, and the server is -otherwise idle. The results are generally consistent to within a tenth of a -second on repeated runs. - -\ ``CRC32``\ is the default checksum function to use, and should be enough for most -cases. If you need stronger guarantees that your data is identical, you should -use one of the other functions. - - -******************* -ALGORITHM SELECTION -******************* - - -The "--algorithm" option allows you to specify which algorithm you would -like to use, but it does not guarantee that pt-table-checksum will use this -algorithm. pt-table-checksum will ultimately select the best algorithm possible -given various factors such as the MySQL version and other command line options. - -The three basic algorithms in descending order of preference are CHECKSUM, -BIT_XOR and ACCUM. CHECKSUM cannot be used if any one of these criteria -is true: - - -.. code-block:: perl - - * --where is used - * --since is used - * --chunk-size is used - * --replicate is used - * --count is used - * MySQL version less than 4.1.1 - - -The BIT_XOR algorithm also requires MySQL version 4.1.1 or later. - -After checking these criteria, if the requested "--algorithm" remains then it -is used, otherwise the first remaining algorithm with the highest preference -is used. - - -******************** -CONSISTENT CHECKSUMS -******************** - - -If you are using this tool to verify your slaves still have the same data as the -master, which is why I wrote it, you should read this section. - -The best way to do this with replication is to use the "--replicate" option. -When the queries are finished running on the master and its slaves, you can go -to the slaves and issue SQL queries to see if any tables are different from the -master. Try the following: - - -.. code-block:: perl - - SELECT db, tbl, chunk, this_cnt-master_cnt AS cnt_diff, - this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc) - AS crc_diff - FROM checksum - WHERE master_cnt <> this_cnt OR master_crc <> this_crc - OR ISNULL(master_crc) <> ISNULL(this_crc); - - -The "--replicate-check" option can do this query for you. If you can't use -this method, try the following: - - -\* - - If your servers are not being written to, you can just run the tool with no - further ado: - - - .. code-block:: perl - - pt-table-checksum server1 server2 ... serverN - - - - -\* - - If the servers are being written to, you need some way to make sure they are - consistent at the moment you run the checksums. For situations other than - master-slave replication, you will have to figure this out yourself. You may be - able to use the "--where" option with a date or time column to only checksum - data that's not recent. - - - -\* - - If you are checksumming a master and slaves, you can do a fast parallel - checksum and assume the slaves are caught up to the master. In practice, this - tends to work well except for tables which are constantly updated. You can - use the "--slave-lag" option to see how far behind each slave was when it - checksummed a given table. This can help you decide whether to investigate - further. - - - -\* - - The next most disruptive technique is to lock the table on the master, then take - checksums. This should prevent changes from propagating to the slaves. You can - just lock on the master (with "--lock"), or you can both lock on the master - and wait on the slaves till they reach that point in the master's binlog - ("--wait"). Which is better depends on your workload; only you know that. - - - -\* - - If you decide to make the checksums on the slaves wait until they're guaranteed - to be caught up to the master, the algorithm looks like this: - - - .. code-block:: perl - - For each table, - Master: lock table - Master: get pos - In parallel, - Master: checksum - Slave(s): wait for pos, then checksum - End - Master: unlock table - End - - - - -What I typically do when I'm not using the "--replicate" option is simply run -the tool on all servers with no further options. This runs fast, parallel, -non-blocking checksums simultaneously. If there are tables that look different, -I re-run with "--wait"=600 on the tables in question. This makes the tool -lock on the master as explained above. - - -****** -OUTPUT -****** - - -Output is to STDOUT, one line per server and table, with header lines for each -database. I tried to make the output easy to process with awk. For this reason -columns are always present. If there's no value, pt-table-checksum prints -'NULL'. - -The default is column-aligned output for human readability, but you can change -it to tab-separated if you want. Use the "--tab" option for this. - -Output is unsorted, though all lines for one table should be output together. -For speed, all checksums are done in parallel (as much as possible) and may -complete out of the order in which they were started. You might want to run -them through another script or command-line utility to make sure they are in the -order you want. If you pipe the output through pt-checksum-filter, you -can sort the output and/or avoid seeing output about tables that have no -differences. - -The columns in the output are as follows. The database, table, and chunk come -first so you can sort by them easily (they are the "primary key"). - -Output from "--replicate-check" and "--checksum" are different. - - -DATABASE - - The database the table is in. - - - -TABLE - - The table name. - - - -CHUNK - - The chunk (see "--chunk-size"). Zero if you are not doing chunked checksums. - - - -HOST - - The server's hostname. - - - -ENGINE - - The table's storage engine. - - - -COUNT - - The table's row count, unless you specified to skip it. If \ ``OVERSIZE``\ is - printed, the chunk was skipped because the actual number of rows was greater - than "--chunk-size" times "--chunk-size-limit". - - - -CHECKSUM - - The table's checksum, unless you specified to skip it or the table has no rows. - some types of checksums will be 0 if there are no rows; others will print NULL. - - - -TIME - - How long it took to checksum the \ ``CHUNK``\ , not including \ ``WAIT``\ time. - Total checksum time is \ ``WAIT + TIME``\ . - - - -WAIT - - How long the slave waited to catch up to its master before beginning to - checksum. \ ``WAIT``\ is always 0 for the master. See "--wait". - - - -STAT - - The return value of MASTER_POS_WAIT(). \ ``STAT``\ is always \ ``NULL``\ for the - master. - - - -LAG - - How far the slave lags the master, as reported by SHOW SLAVE STATUS. - \ ``LAG``\ is always \ ``NULL``\ for the master. - - - - -*************************** -REPLICATE TABLE MAINTENANCE -*************************** - - -If you use "--replicate" to store and replicate checksums, you may need to -perform maintenance on the replicate table from time to time to remove old -checksums. This section describes when checksums in the replicate table are -deleted automatically by pt-table-checksum and when you must manually delete -them. - -Before starting, pt-table-checksum calculates chunks for each table, even -if "--chunk-size" is not specified (in that case there is one chunk: "1=1"). -Then, before checksumming each table, the tool deletes checksum chunks in the -replicate table greater than the current number of chunks. For example, -if a table is chunked into 100 chunks, 0-99, then pt-table-checksum does: - - -.. code-block:: perl - - DELETE FROM replicate table WHERE db=? AND tbl=? AND chunk > 99 - - -That removes any high-end chunks from previous runs which no longer exist. -Currently, this operation cannot be disabled. - -If you use "--resume", "--resume-replicate", or "--modulo", then -you need to be careful that the number of rows in a table does not decrease -so much that the number of chunks decreases too, else some checksum chunks may -be deleted. The one exception is if only rows at the high end of the range -are deleted. In that case, the high-end chunks are deleted and lower chunks -remain unchanged. An increasing number of rows or chunks should not cause -any adverse affects. - -Changing the "--chunk-size" between runs with "--resume", -"--resume-replicate", or "--modulo" can cause odd or invalid checksums. -You should not do this. It won't work with the resume options. With -"--modulo", the safest thing to do is manually delete all the rows in -the replicate table for the table in question and start over. - -If the replicate table becomes cluttered with old or invalid checksums -and the auto-delete operation is not deleting them, then you will need to -manually clean up the replicate table. Alternatively, if you specify -"--empty-replicate-table", then the tool deletes every row in the -replicate table. - - -*********** -EXIT STATUS -*********** - - -An exit status of 0 (sometimes also called a return value or return code) -indicates success. If there is an error checksumming any table, the exit status -is 1. - -When running "--replicate-check", if any slave has chunks that differ from -the master, the exit status is 1. - - -******* -QUERIES -******* - - -If you are using innotop (see `http://code.google.com/p/innotop `_), -mytop, or another tool to watch currently running MySQL queries, you may see -the checksum queries. They look similar to this: - - -.. code-block:: perl - - REPLACE /*test.test_tbl:'2'/'5'*/ INTO test.checksum(db, ... - - -Since pt-table-checksum's queries run for a long time and tend to be -textually very long, and thus won't fit on one screen of these monitoring -tools, I've been careful to place a comment at the beginning of the query so -you can see what it is and what it's doing. The comment contains the name of -the table that's being checksummed, the chunk it is currently checksumming, -and how many chunks will be checksummed. In the case above, it is -checksumming chunk 2 of 5 in table test.test_tbl. - - -******* -OPTIONS -******* - - -"--schema" is restricted to option groups Connection, Filter, Output, Help, Config, Safety. - -"--empty-replicate-table", "--resume" and "--resume-replicate" are mutually exclusive. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---algorithm - - type: string - - Checksum algorithm (ACCUM|CHECKSUM|BIT_XOR). - - Specifies which checksum algorithm to use. Valid arguments are CHECKSUM, - BIT_XOR and ACCUM. The latter two do cryptographic hash checksums. - See also "ALGORITHM SELECTION". - - CHECKSUM is built into MySQL, but has some disadvantages. BIT_XOR and ACCUM are - implemented by SQL queries. They use a cryptographic hash of all columns - concatenated together with a separator, followed by a bitmap of each nullable - column that is NULL (necessary because CONCAT_WS() skips NULL columns). - - CHECKSUM is the default. This method uses MySQL's built-in CHECKSUM TABLE - command, which is a CRC32 behind the scenes. It cannot be used before MySQL - 4.1.1, and various options disable it as well. It does not simultaneously count - rows; that requires an extra COUNT(\*) query. This is a good option when you are - using MyISAM tables with live checksums enabled; in this case both the COUNT(\*) - and CHECKSUM queries will run very quickly. - - The BIT_XOR algorithm is available for MySQL 4.1.1 and newer. It uses - BIT_XOR(), which is order-independent, to reduce all the rows to a single - checksum. - - ACCUM uses a user variable as an accumulator. It reduces each row to a single - checksum, which is concatenated with the accumulator and re-checksummed. This - technique is order-dependent. If the table has a primary key, it will be used - to order the results for consistency; otherwise it's up to chance. - - The pathological worst case is where identical rows will cancel each other out - in the BIT_XOR. In this case you will not be able to distinguish a table full - of one value from a table full of another value. The ACCUM algorithm will - distinguish them. - - However, the ACCUM algorithm is order-dependent, so if you have two tables - with identical data but the rows are out of order, you'll get different - checksums with ACCUM. - - If a given algorithm won't work for some reason, pt-table-checksum falls back to - another. The least common denominator is ACCUM, which works on MySQL 3.23.2 and - newer. - - - ---arg-table - - type: string - - The database.table with arguments for each table to checksum. - - This table may be named anything you wish. It must contain at least the - following columns: - - - .. code-block:: perl - - CREATE TABLE checksum_args ( - db char(64) NOT NULL, - tbl char(64) NOT NULL, - -- other columns as desired - PRIMARY KEY (db, tbl) - ); - - - In addition to the columns shown, it may contain any of the other columns listed - here (Note: this list is used by the code, MAGIC_overridable_args): - - - .. code-block:: perl - - algorithm chunk-column chunk-index chunk-size columns count crc function lock - modulo use-index offset optimize-xor chunk-size-limit probability separator - save-since single-chunk since since-column sleep sleep-coef trim wait where - - - Each of these columns corresponds to the long form of a command-line option. - Each column should be NULL-able. Column names with hyphens should be enclosed - in backticks (e.g. \`chunk-size\`) when the table is created. The data type does - not matter, but it's suggested you use a sensible data type to prevent garbage - data. - - When \ ``pt-table-checksum``\ checksums a table, it will look for a matching entry - in this table. Any column that has a defined value will override the - corresponding command-line argument for the table being currently processed. - In this way it is possible to specify custom command-line arguments for any - table. - - If you add columns to the table that aren't in the above list of allowable - columns, it's an error. The exceptions are \ ``db``\ , \ ``tbl``\ , and \ ``ts``\ . The \ ``ts``\ - column can be used as a timestamp for easy visibility into the last time the - \ ``since``\ column was updated with "--save-since". - - This table is assumed to be located on the first server given on the - command-line. - - - ---ask-pass - - group: Connection - - Prompt for a password when connecting to MySQL. - - - ---check-interval - - type: time; group: Throttle; default: 1s - - How often to check for slave lag if "--check-slave-lag" is given. - - - ---[no]check-replication-filters - - default: yes; group: Safety - - Do not "--replicate" if any replication filters are set. When - --replicate is specified, pt-table-checksum tries to detect slaves and look - for options that filter replication, such as binlog_ignore_db and - replicate_do_db. If it finds any such filters, it aborts with an error. - Replication filtering makes it impossible to be sure that the checksum - queries won't break replication or simply fail to replicate. If you are sure - that it's OK to run the checksum queries, you can negate this option to - disable the checks. See also "--replicate-database". - - - ---check-slave-lag - - type: DSN; group: Throttle - - Pause checksumming until the specified slave's lag is less than "--max-lag". - - If this option is specified and "--throttle-method" is set to \ ``slavelag``\ - then "--throttle-method" only checks this slave. - - - ---checksum - - group: Output - - Print checksums and table names in the style of md5sum (disables - "--[no]count"). - - Makes the output behave more like the output of \ ``md5sum``\ . The checksum is - first on the line, followed by the host, database, table, and chunk number, - concatenated with dots. - - - ---chunk-column - - type: string - - Prefer this column for dividing tables into chunks. By default, - pt-table-checksum chooses the first suitable column for each table, preferring - to use the primary key. This option lets you specify a preferred column, which - pt-table-checksum uses if it exists in the table and is chunkable. If not, then - pt-table-checksum will revert to its default behavior. Be careful when using - this option; a poor choice could cause bad performance. This is probably best - to use when you are checksumming only a single table, not an entire server. See - also "--chunk-index". - - - ---chunk-index - - type: string - - Prefer this index for chunking tables. By default, pt-table-checksum chooses an - appropriate index for the "--chunk-column" (even if it chooses the chunk - column automatically). This option lets you specify the index you prefer. If - the index doesn't exist, then pt-table-checksum will fall back to its default - behavior. pt-table-checksum adds the index to the checksum SQL statements in a - \ ``FORCE INDEX``\ clause. Be careful when using this option; a poor choice of - index could cause bad performance. This is probably best to use when you are - checksumming only a single table, not an entire server. - - - ---chunk-range - - type: string; default: open - - Set which ends of the chunk range are open or closed. Possible values are - one of MAGIC_chunk_range: - - - .. code-block:: perl - - VALUE OPENS/CLOSES - ========== ====================== - open Both ends are open - openclosed Low end open, high end closed - - - By default pt-table-checksum uses an open range of chunks like: - - - .. code-block:: perl - - `id` < '10' - `id` >= '10' AND < '20' - `id` >= '20' - - - That range is open because the last chunk selects any row with id greater than - (or equal to) 20. An open range can be a problem in cases where a lot of new - rows are inserted with IDs greater than 20 while pt-table-checksumming is - running because the final open-ended chunk will select all the newly inserted - rows. (The less common case of inserting rows with IDs less than 10 would - require a \ ``closedopen``\ range but that is not currently implemented.) - Specifying \ ``openclosed``\ will cause the final chunk to be closed like: - - - .. code-block:: perl - - `id` >= '20' AND `id` <= N - - - N is the \ ``MAX(\`id\`)``\ that pt-table-checksum used when it first chunked - the rows. Therefore, it will only chunk the range of rows that existed when - the tool started and not any newly inserted rows (unless those rows happen - to be inserted with IDs less than N). - - See also "--chunk-size-limit". - - - ---chunk-size - - type: string - - Approximate number of rows or size of data to checksum at a time. Allowable - suffixes are k, M, G. Disallows \ ``--algorithm CHECKSUM``\ . - - If you specify a chunk size, pt-table-checksum will try to find an index that - will let it split the table into ranges of approximately "--chunk-size" - rows, based on the table's index statistics. Currently only numeric and date - types can be chunked. - - If the table is chunkable, pt-table-checksum will checksum each range separately - with parameters in the checksum query's WHERE clause. If pt-table-checksum - cannot find a suitable index, it will do the entire table in one chunk as though - you had not specified "--chunk-size" at all. Each table is handled - individually, so some tables may be chunked and others not. - - The chunks will be approximately sized, and depending on the distribution of - values in the indexed column, some chunks may be larger than the value you - specify. - - If you specify a suffix (one of k, M or G), the parameter is treated as a data - size rather than a number of rows. The output of SHOW TABLE STATUS is then used - to estimate the amount of data the table contains, and convert that to a number - of rows. - - - ---chunk-size-limit - - type: float; default: 2.0; group: Safety - - Do not checksum chunks with this many times more rows than "--chunk-size". - - When "--chunk-size" is given it specifies an ideal size for each chunk - of a chunkable table (in rows; size values are converted to rows). Before - checksumming each chunk, pt-table-checksum checks how many rows are in the - chunk with EXPLAIN. If the number of rows reported by EXPLAIN is this many - times greater than "--chunk-size", then the chunk is skipped and \ ``OVERSIZE``\ - is printed for the \ ``COUNT``\ column of the "OUTPUT". - - For example, if you specify "--chunk-size" 100 and a chunk has 150 rows, - then it is checksummed with the default "--chunk-size-limit" value 2.0 - because 150 is less than 100 \* 2.0. But if the chunk has 205 rows, then it - is not checksummed because 205 is greater than 100 \* 2.0. - - The minimum value for this option is 1 which means that no chunk can be any - larger than "--chunk-size". You probably don't want to specify 1 because - rows reported by EXPLAIN are estimates which can be greater than or less than - the real number of rows in the chunk. If too many chunks are skipped because - they are oversize, you might want to specify a value larger than 2. - - You can disable oversize chunk checking by specifying "--chunk-size-limit" 0. - - See also "--unchunkable-tables". - - - ---columns - - short form: -c; type: array; group: Filter - - Checksum only this comma-separated list of columns. - - - ---config - - type: Array; group: Config - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---[no]count - - Count rows in tables. This is built into ACCUM and BIT_XOR, but requires an - extra query for CHECKSUM. - - This is disabled by default to avoid an extra COUNT(\*) query when - "--algorithm" is CHECKSUM. If you have only MyISAM tables and live checksums - are enabled, both CHECKSUM and COUNT will be very fast, but otherwise you may - want to use one of the other algorithms. - - - ---[no]crc - - default: yes - - Do a CRC (checksum) of tables. - - Take the checksum of the rows as well as their count. This is enabled by - default. If you disable it, you'll just get COUNT(\*) queries. - - - ---create-replicate-table - - Create the replicate table given by "--replicate" if it does not exist. - - Normally, if the replicate table given by "--replicate" does not exist, - \ ``pt-table-checksum``\ will die. With this option, however, \ ``pt-table-checksum``\ - will create the replicate table for you, using the database.table name given to - "--replicate". - - The structure of the replicate table is the same as the suggested table - mentioned in "--replicate". Note that since ENGINE is not specified, the - replicate table will use the server's default storage engine. If you want to - use a different engine, you need to create the table yourself. - - - ---databases - - short form: -d; type: hash; group: Filter - - Only checksum this comma-separated list of databases. - - - ---databases-regex - - type: string - - Only checksum databases whose names match this Perl regex. - - - ---defaults-file - - short form: -F; type: string; group: Connection - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---empty-replicate-table - - DELETE all rows in the "--replicate" table before starting. - - Issues a DELETE against the table given by "--replicate" before beginning - work. Ignored if "--replicate" is not specified. This can be useful to - remove entries related to tables that no longer exist, or just to clean out the - results of a previous run. - - If you want to delete entries for specific databases or tables you must - do this manually. - - - ---engines - - short form: -e; type: hash; group: Filter - - Do only this comma-separated list of storage engines. - - - ---explain - - group: Output - - Show, but do not execute, checksum queries (disables "--empty-replicate-table"). - - - ---explain-hosts - - group: Help - - Print full DSNs for each host and exit. This option allows you to see how - pt-table-checksum parses DSNs from the command-line and how it will connect - to those hosts. See "SPECIFYING HOSTS". - - - ---float-precision - - type: int - - Precision for \ ``FLOAT``\ and \ ``DOUBLE``\ number-to-string conversion. Causes FLOAT - and DOUBLE values to be rounded to the specified number of digits after the - decimal point, with the ROUND() function in MySQL. This can help avoid - checksum mismatches due to different floating-point representations of the same - values on different MySQL versions and hardware. The default is no rounding; - the values are converted to strings by the CONCAT() function, and MySQL chooses - the string representation. If you specify a value of 2, for example, then the - values 1.008 and 1.009 will be rounded to 1.01, and will checksum as equal. - - - ---function - - type: string - - Hash function for checksums (FNV1A_64, MURMUR_HASH, SHA1, MD5, CRC32, etc). - - You can use this option to choose the cryptographic hash function used for - "--algorithm"=ACCUM or "--algorithm"=BIT_XOR. The default is to use - \ ``CRC32``\ , but \ ``MD5``\ and \ ``SHA1``\ also work, and you can use your own function, - such as a compiled UDF, if you wish. Whatever function you specify is run in - SQL, not in Perl, so it must be available to MySQL. - - The \ ``FNV1A_64``\ UDF mentioned in the benchmarks is much faster than \ ``MD5``\ . The - C++ source code is distributed with Maatkit. It is very simple to compile and - install; look at the header in the source code for instructions. If it is - installed, it is preferred over \ ``MD5``\ . You can also use the MURMUR_HASH - function if you compile and install that as a UDF; the source is also - distributed with Maatkit, and it is faster and has better distribution - than FNV1A_64. - - - ---help - - group: Help - - Show help and exit. - - - ---ignore-columns - - type: Hash; group: Filter - - Ignore this comma-separated list of columns when calculating the checksum. - - This option only affects the checksum when using the ACCUM or BIT_XOR - "--algorithm". - - - ---ignore-databases - - type: Hash; group: Filter - - Ignore this comma-separated list of databases. - - - ---ignore-databases-regex - - type: string - - Ignore databases whose names match this Perl regex. - - - ---ignore-engines - - type: Hash; default: FEDERATED,MRG_MyISAM; group: Filter - - Ignore this comma-separated list of storage engines. - - - ---ignore-tables - - type: Hash; group: Filter - - Ignore this comma-separated list of tables. - - Table names may be qualified with the database name. - - - ---ignore-tables-regex - - type: string - - Ignore tables whose names match the Perl regex. - - - ---lock - - Lock on master until done on slaves (implies "--slave-lag"). - - This option can help you to get a consistent read on a master and many slaves. - If you specify this option, pt-table-checksum will lock the table on the - first server on the command line, which it assumes to be the master. It will - keep this lock until the checksums complete on the other servers. - - This option isn't very useful by itself, so you probably want to use "--wait" - instead. - - Note: if you're checksumming a slave against its master, you should use - "--replicate". In that case, there's no need for locking, waiting, or any of - that. - - - ---max-lag - - type: time; group: Throttle; default: 1s - - Suspend checksumming if the slave given by "--check-slave-lag" lags. - - This option causes pt-table-checksum to look at the slave every time it's about - to checksum a chunk. If the slave's lag is greater than the option's value, or - if the slave isn't running (so its lag is NULL), pt-table-checksum sleeps for - "--check-interval" seconds and then looks at the lag again. It repeats until - the slave is caught up, then proceeds to checksum the chunk. - - This option is useful to let you checksum data as fast as the slaves can handle - it, assuming the slave you directed pt-table-checksum to monitor is - representative of all the slaves that may be replicating from this server. It - should eliminate the need for "--sleep" or "--sleep-coef". - - - ---modulo - - type: int - - Do only every Nth chunk on chunked tables. - - This option lets you checksum only some chunks of the table. This is a useful - alternative to "--probability" when you want to be sure you get full coverage - in some specified number of runs; for example, you can do only every 7th chunk, - and then use "--offset" to rotate the modulo every day of the week. - - Just like with "--probability", a table that cannot be chunked is done every - time. - - - ---offset - - type: string; default: 0 - - Modulo offset expression for use with "--modulo". - - The argument may be an SQL expression, such as \ ``WEEKDAY(NOW())``\ (which returns - a number from 0 through 6). The argument is evaluated by MySQL. The result is - used as follows: if chunk_num % "--modulo" == "--offset", the chunk will - be checksummed. - - - ---[no]optimize-xor - - default: yes - - Optimize BIT_XOR with user variables. - - This option specifies to use user variables to reduce the number of times each - row must be passed through the cryptographic hash function when you are using - the BIT_XOR algorithm. - - With the optimization, the queries look like this in pseudo-code: - - - .. code-block:: perl - - SELECT CONCAT( - BIT_XOR(SLICE_OF(@user_variable)), - BIT_XOR(SLICE_OF(@user_variable)), - ... - BIT_XOR(SLICE_OF(@user_variable := HASH(col1, col2... colN)))); - - - The exact positioning of user variables and calls to the hash function is - determined dynamically, and will vary between MySQL versions. Without the - optimization, it looks like this: - - - .. code-block:: perl - - SELECT CONCAT( - BIT_XOR(SLICE_OF(MD5(col1, col2... colN))), - BIT_XOR(SLICE_OF(MD5(col1, col2... colN))), - ... - BIT_XOR(SLICE_OF(MD5(col1, col2... colN)))); - - - The difference is the number of times all the columns must be mashed together - and fed through the hash function. If you are checksumming really large - columns, such as BLOB or TEXT columns, this might make a big difference. - - - ---password - - short form: -p; type: string; group: Connection - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int; group: Connection - - Port number to use for connection. - - - ---probability - - type: int; default: 100 - - Checksums will be run with this percent probability. - - This is an integer between 1 and 100. If 100, every chunk of every table will - certainly be checksummed. If less than that, there is a chance that some chunks - of some tables will be skipped. This is useful for routine jobs designed to - randomly sample bits of tables without checksumming the whole server. By - default, if a table is not chunkable, it will be checksummed every time even - when the probability is less than 100. You can override this with - "--single-chunk". - - See also "--modulo". - - - ---progress - - type: array; default: time,30 - - Print progress reports to STDERR. Currently, this feature is only for when - "--throttle-method" waits for slaves to catch up. - - The value is a comma-separated list with two parts. The first part can be - percentage, time, or iterations; the second part specifies how often an update - should be printed, in percentage, seconds, or number of iterations. - - - ---quiet - - short form: -q; group: Output - - Do not print checksum results. - - - ---recheck - - Re-checksum chunks that "--replicate-check" found to be different. - - - ---recurse - - type: int; group: Throttle - - Number of levels to recurse in the hierarchy when discovering slaves. - Default is infinite. - - See "--recursion-method". - - - ---recursion-method - - type: string - - Preferred recursion method for discovering slaves. - - Possible methods are: - - - .. code-block:: perl - - METHOD USES - =========== ================ - processlist SHOW PROCESSLIST - hosts SHOW SLAVE HOSTS - - - The processlist method is preferred because SHOW SLAVE HOSTS is not reliable. - However, the hosts method is required if the server uses a non-standard - port (not 3306). Usually pt-table-checksum does the right thing and finds - the slaves, but you may give a preferred method and it will be used first. - If it doesn't find any slaves, the other methods will be tried. - - - ---replicate - - type: string - - Replicate checksums to slaves (disallows --algorithm CHECKSUM). - - This option enables a completely different checksum strategy for a consistent, - lock-free checksum across a master and its slaves. Instead of running the - checksum queries on each server, you run them only on the master. You specify a - table, fully qualified in db.table format, to insert the results into. The - checksum queries will insert directly into the table, so they will be replicated - through the binlog to the slaves. - - When the queries are finished replicating, you can run a simple query on each - slave to see which tables have differences from the master. With the - "--replicate-check" option, pt-table-checksum can run the query for you to - make it even easier. See "CONSISTENT CHECKSUMS" for details. - - If you find tables that have differences, you can use the chunk boundaries in a - WHERE clause with pt-table-sync to help repair them more efficiently. See - pt-table-sync for details. - - The table must have at least these columns: db, tbl, chunk, boundaries, - this_crc, master_crc, this_cnt, master_cnt. The table may be named anything you - wish. Here is a suggested table structure, which is automatically used for - "--create-replicate-table" (MAGIC_create_replicate): - - - .. code-block:: perl - - CREATE TABLE checksum ( - db char(64) NOT NULL, - tbl char(64) NOT NULL, - chunk int NOT NULL, - boundaries char(100) NOT NULL, - this_crc char(40) NOT NULL, - this_cnt int NOT NULL, - master_crc char(40) NULL, - master_cnt int NULL, - ts timestamp NOT NULL, - PRIMARY KEY (db, tbl, chunk) - ); - - - Be sure to choose an appropriate storage engine for the checksum table. If you - are checksumming InnoDB tables, for instance, a deadlock will break replication - if the checksum table is non-transactional, because the transaction will still - be written to the binlog. It will then replay without a deadlock on the - slave and break replication with "different error on master and slave." This - is not a problem with pt-table-checksum, it's a problem with MySQL - replication, and you can read more about it in the MySQL manual. - - This works only with statement-based replication (pt-table-checksum will switch - the binlog format to STATEMENT for the duration of the session if your server - uses row-based replication). - - In contrast to running the tool against multiple servers at once, using this - option eliminates the complexities of synchronizing checksum queries across - multiple servers, which normally requires locking and unlocking, waiting for - master binlog positions, and so on. Thus, it disables "--lock", "--wait", - and "--slave-lag" (but not "--check-slave-lag", which is a way to throttle - the execution speed). - - The checksum queries actually do a REPLACE into this table, so existing rows - need not be removed before running. However, you may wish to do this anyway to - remove rows related to tables that don't exist anymore. The - "--empty-replicate-table" option does this for you. - - Since the table must be qualified with a database (e.g. \ ``db.checksums``\ ), - pt-table-checksum will only USE this database. This may be important if any - replication options are set because it could affect whether or not changes - to the table are replicated. - - If the slaves have any --replicate-do-X or --replicate-ignore-X options, you - should be careful not to checksum any databases or tables that exist on the - master and not the slaves. Changes to such tables may not normally be executed - on the slaves because of the --replicate options, but the checksum queries - modify the contents of the table that stores the checksums, not the tables whose - data you are checksumming. Therefore, these queries will be executed on the - slave, and if the table or database you're checksumming does not exist, the - queries will cause replication to fail. For more information on replication - rules, see `http://dev.mysql.com/doc/en/replication-rules.html `_. - - The table specified by "--replicate" will never be checksummed itself. - - - ---replicate-check - - type: int - - Check results in "--replicate" table, to the specified depth. You must use - this after you run the tool normally; it skips the checksum step and only checks - results. - - It recursively finds differences recorded in the table given by - "--replicate". It recurses to the depth you specify: 0 is no recursion - (check only the server you specify), 1 is check the server and its slaves, 2 is - check the slaves of its slaves, and so on. - - It finds differences by running the query shown in "CONSISTENT CHECKSUMS", - and prints results, then exits after printing. This is just a convenient way of - running the query so you don't have to do it manually. - - The output is one informational line per slave host, followed by the results - of the query, if any. If "--quiet" is specified, there is no output. If - there are no differences between the master and any slave, there is no output. - If any slave has chunks that differ from the master, pt-table-checksum's - exit status is 1; otherwise it is 0. - - This option makes \ ``pt-table-checksum``\ look for slaves by running \ ``SHOW - PROCESSLIST``\ . If it finds connections that appear to be from slaves, it derives - connection information for each slave with the same default-and-override method - described in "SPECIFYING HOSTS". - - If \ ``SHOW PROCESSLIST``\ doesn't return any rows, \ ``pt-table-checksum``\ looks at - \ ``SHOW SLAVE HOSTS``\ instead. The host and port, and user and password if - available, from \ ``SHOW SLAVE HOSTS``\ are combined into a DSN and used as the - argument. This requires slaves to be configured with \ ``report-host``\ , - \ ``report-port``\ and so on. - - This requires the @@SERVER_ID system variable, so it works only on MySQL - 3.23.26 or newer. - - - ---replicate-database - - type: string - - \ ``USE``\ only this database with "--replicate". By default, pt-table-checksum - executes USE to set its default database to the database that contains the table - it's currently working on. It changes its default database as it works on - different tables. This is is a best effort to avoid problems with replication - filters such as binlog_ignore_db and replicate_ignore_db. However, replication - filters can create a situation where there simply is no one right way to do - things. Some statements might not be replicated, and others might cause - replication to fail on the slaves. In such cases, it is up to the user to - specify a safe default database. This option specifies a default database that - pt-table-checksum selects with USE, and never changes afterwards. See also - . - - - ---resume - - type: string - - Resume checksum using given output file from a previously interrupted run. - - The given output file should be the literal output from a previous run of - \ ``pt-table-checksum``\ . For example: - - - .. code-block:: perl - - pt-table-checksum host1 host2 -C 100 > checksum_results.txt - pt-table-checksum host1 host2 -C 100 --resume checksum_results.txt - - - The command line options given to the first run and the resumed run must - be identical (except, of course, for --resume). If they are not, the result - will be unpredictable and probably wrong. - - "--resume" does not work with "--replicate"; for that, use - "--resume-replicate". - - - ---resume-replicate - - Resume "--replicate". - - This option resumes a previous checksum operation using "--replicate". - It is like "--resume" but does not require an output file. Instead, - it uses the checksum table given to "--replicate" to determine where to - resume the checksum operation. - - - ---save-since - - When "--arg-table" and "--since" are given, save the current "--since" - value into that table's \ ``since``\ column after checksumming. In this way you can - incrementally checksum tables by starting where the last one finished. - - The value to be saved could be the current timestamp, or it could be the maximum - existing value of the column given by "--since-column". It depends on what - options are in effect. See the description of "--since" to see how - timestamps are different from ordinary values. - - - ---schema - - Checksum \ ``SHOW CREATE TABLE``\ instead of table data. - - - ---separator - - type: string; default: # - - The separator character used for CONCAT_WS(). - - This character is used to join the values of columns when checksumming with - "--algorithm" of BIT_XOR or ACCUM. - - - ---set-vars - - type: string; default: wait_timeout=10000; group: Connection - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---since - - type: string - - Checksum only data newer than this value. - - If the table is chunk-able or nibble-able, this value will apply to the first - column of the chunked or nibbled index. - - This is not too different to "--where", but instead of universally applying a - WHERE clause to every table, it selectively finds the right column to use and - applies it only if such a column is found. See also "--since-column". - - The argument may be an expression, which is evaluated by MySQL. For example, - you can specify \ ``CURRENT_DATE - INTERVAL 7 DAY``\ to get the date of one week - ago. - - A special bit of extra magic: if the value is temporal (looks like a date or - datetime), then the table is checksummed only if the create time (or last - modified time, for tables that report the last modified time, such as MyISAM - tables) is newer than the value. In this sense it's not applied as a WHERE - clause at all. - - - ---since-column - - type: string - - The column name to be used for "--since". - - The default is for the tool to choose the best one automatically. If you - specify a value, that will be used if possible; otherwise the best - auto-determined one; otherwise none. If the column doesn't exist in the table, - it is just ignored. - - - ---single-chunk - - Permit skipping with "--probability" if there is only one chunk. - - Normally, if a table isn't split into many chunks, it will always be - checksummed regardless of "--probability". This setting lets the - probabilistic behavior apply to tables that aren't divided into chunks. - - - ---slave-lag - - group: Output - - Report replication delay on the slaves. - - If this option is enabled, the output will show how many seconds behind the - master each slave is. This can be useful when you want a fast, parallel, - non-blocking checksum, and you know your slaves might be delayed relative to the - master. You can inspect the results and make an educated guess whether any - discrepancies on the slave are due to replication delay instead of corrupt data. - - If you're using "--replicate", a slave that is delayed relative to the master - does not invalidate the correctness of the results, so this option is disabled. - - - ---sleep - - type: int; group: Throttle - - Sleep time between checksums. - - If this option is specified, pt-table-checksum will sleep the specified - number of seconds between checksums. That is, it will sleep between every - table, and if you specify "--chunk-size", it will also sleep between chunks. - - This is a very crude way to throttle checksumming; see "--sleep-coef" and - "--check-slave-lag" for techniques that permit greater control. - - - ---sleep-coef - - type: float; group: Throttle - - Calculate "--sleep" as a multiple of the last checksum time. - - If this option is specified, pt-table-checksum will sleep the amount of - time elapsed during the previous checksum, multiplied by the specified - coefficient. This option is ignored if "--sleep" is specified. - - This is a slightly more sophisticated way to throttle checksum speed: sleep a - varying amount of time between chunks, depending on how long the chunks are - taking. Even better is to use "--check-slave-lag" if you're checksumming - master/slave replication. - - - ---socket - - short form: -S; type: string; group: Connection - - Socket file to use for connection. - - - ---tab - - group: Output - - Print tab-separated output, not column-aligned output. - - - ---tables - - short form: -t; type: hash; group: Filter - - Do only this comma-separated list of tables. - - Table names may be qualified with the database name. - - - ---tables-regex - - type: string - - Only checksum tables whose names match this Perl regex. - - - ---throttle-method - - type: string; default: none; group: Throttle - - Throttle checksumming when doing "--replicate". - - At present there is only one method: \ ``slavelag``\ . When "--replicate" is - used, pt-table-checksum automatically sets "--throttle-method" to - \ ``slavelag``\ and discovers every slave and throttles checksumming if any slave - lags more than "--max-lag". Specify \ ``-throttle-method none``\ to disable - this behavior completely, or specify "--check-slave-lag" and - pt-table-checksum will only check that slave. - - See also "--recurse" and "--recursion-method". - - - ---trim - - Trim \ ``VARCHAR``\ columns (helps when comparing 4.1 to >= 5.0). - - This option adds a \ ``TRIM()``\ to \ ``VARCHAR``\ columns in \ ``BIT_XOR``\ and \ ``ACCUM``\ - modes. - - This is useful when you don't care about the trailing space differences between - MySQL versions which vary in their handling of trailing spaces. MySQL 5.0 and - later all retain trailing spaces in \ ``VARCHAR``\ , while previous versions would - remove them. - - - ---unchunkable-tables - - group: Safety - - Checksum tables that cannot be chunked when "--chunk-size" is specified. - - By default pt-table-checksum will not checksum a table that cannot be chunked - when "--chunk-size" is specified because this might result in a huge, - non-chunkable table being checksummed in one huge, memory-intensive chunk. - - Specifying this option allows checksumming tables that cannot be chunked. - Be careful when using this option! Make sure any non-chunkable tables - are not so large that they will cause the tool to consume too much memory - or CPU. - - See also "--chunk-size-limit". - - - ---[no]use-index - - default: yes - - Add FORCE INDEX hints to SQL statements. - - By default \ ``pt-table-checksum``\ adds an index hint (\ ``FORCE INDEX``\ for MySQL - v4.0.9 and newer, \ ``USE INDEX``\ for older MySQL versions) to each SQL statement - to coerce MySQL into using the "--chunk-index" (whether the index is - specified by the option or auto-detected). Specifying \ ``--no-use-index``\ causes - \ ``pt-table-checksum``\ to omit index hints. - - - ---user - - short form: -u; type: string; group: Connection - - User for login if not current user. - - - ---[no]verify - - default: yes - - Verify checksum compatibility across servers. - - This option runs a trivial checksum on all servers to ensure they have - compatible CONCAT_WS() and cryptographic hash functions. - - Versions of MySQL before 4.0.14 will skip empty strings and NULLs in - CONCAT_WS, and others will only skip NULLs. The two kinds of behavior will - produce different results if you have any columns containing the empty string - in your table. If you know you don't (for instance, all columns are - integers), you can safely disable this check and you will get a reliable - checksum even on servers with different behavior. - - - ---version - - group: Help - - Show version and exit. - - - ---wait - - short form: -w; type: time - - Wait this long for slaves to catch up to their master (implies "--lock" - "--slave-lag"). - - Note: the best way to verify that a slave is in sync with its master is to use - "--replicate" instead. The "--wait" option is really only useful if - you're trying to compare masters and slaves without using "--replicate", - which is possible but complex and less efficient in some ways. - - This option helps you get a consistent checksum across a master server and its - slaves. It combines locking and waiting to accomplish this. First it locks the - table on the master (the first server on the command line). Then it finds the - master's binlog position. Checksums on slaves will be deferred until they reach - the same binlog position. - - The argument to the option is the number of seconds to wait for the slaves to - catch up to the master. It is actually the argument to MASTER_POS_WAIT(). If - the slaves don't catch up to the master within this time, they will unblock - and go ahead with the checksum. You can tell whether this happened by - examining the STAT column in the output, which is the return value of - MASTER_POS_WAIT(). - - - ---where - - type: string - - Do only rows matching this \ ``WHERE``\ clause (disallows "--algorithm" CHECKSUM). - - You can use this option to limit the checksum to only part of the table. This - is particularly useful if you have append-only tables and don't want to - constantly re-check all rows; you could run a daily job to just check - yesterday's rows, for instance. - - This option is much like the -w option to mysqldump. Do not specify the WHERE - keyword. You may need to quote the value. Here is an example: - - - .. code-block:: perl - - pt-table-checksum --where "foo=bar" - - - - ---[no]zero-chunk - - default: yes - - Add a chunk for rows with zero or zero-equivalent values. The only has an - effect when "--chunk-size" is specified. The purpose of the zero chunk - is to capture a potentially large number of zero values that would imbalance - the size of the first chunk. For example, if a lot of negative numbers were - inserted into an unsigned integer column causing them to be stored as zeros, - then these zero values are captured by the zero chunk instead of the first - chunk and all its non-zero values. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-table-checksum ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-table-checksum `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -*************** -ACKNOWLEDGMENTS -*************** - - -Claus Jeppesen, Francois Saint-Jacques, Giuseppe Maxia, Heikki Tuuri, -James Briggs, Martin Friebe, and Sergey Zhuravlev - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-table-checksum 1.0.1 - diff --git a/docs/user/pt-table-sync.rst b/docs/user/pt-table-sync.rst deleted file mode 100644 index b84f0735..00000000 --- a/docs/user/pt-table-sync.rst +++ /dev/null @@ -1,1627 +0,0 @@ - -############# -pt-table-sync -############# - -.. highlight:: perl - - -**** -NAME -**** - - -pt-table-sync - Synchronize MySQL table data efficiently. - - -******** -SYNOPSIS -******** - - -Usage: pt-table-sync [OPTION...] DSN [DSN...] - -pt-table-sync synchronizes data efficiently between MySQL tables. - -This tool changes data, so for maximum safety, you should back up your data -before you use it. When synchronizing a server that is a replication slave with -the --replicate or --sync-to-master methods, it \ **always**\ makes the changes on -the replication master, \ **never**\ the replication slave directly. This is in -general the only safe way to bring a replica back in sync with its master; -changes to the replica are usually the source of the problems in the first -place. However, the changes it makes on the master should be no-op changes that -set the data to their current values, and actually affect only the replica. -Please read the detailed documentation that follows to learn more about this. - -Sync db.tbl on host1 to host2: - - -.. code-block:: perl - - pt-table-sync --execute h=host1,D=db,t=tbl h=host2 - - -Sync all tables on host1 to host2 and host3: - - -.. code-block:: perl - - pt-table-sync --execute host1 host2 host3 - - -Make slave1 have the same data as its replication master: - - -.. code-block:: perl - - pt-table-sync --execute --sync-to-master slave1 - - -Resolve differences that pt-table-checksum found on all slaves of master1: - - -.. code-block:: perl - - pt-table-sync --execute --replicate test.checksum master1 - - -Same as above but only resolve differences on slave1: - - -.. code-block:: perl - - pt-table-sync --execute --replicate test.checksum \ - --sync-to-master slave1 - - -Sync master2 in a master-master replication configuration, where master2's copy -of db.tbl is known or suspected to be incorrect: - - -.. code-block:: perl - - pt-table-sync --execute --sync-to-master h=master2,D=db,t=tbl - - -Note that in the master-master configuration, the following will NOT do what you -want, because it will make changes directly on master2, which will then flow -through replication and change master1's data: - - -.. code-block:: perl - - # Don't do this in a master-master setup! - pt-table-sync --execute h=master1,D=db,t=tbl master2 - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -With great power comes great responsibility! This tool changes data, so it is a -good idea to back up your data. It is also very powerful, which means it is -very complex, so you should run it with the "--dry-run" option to see what it -will do, until you're familiar with its operation. If you want to see which -rows are different, without changing any data, use "--print" instead of -"--execute". - -Be careful when using pt-table-sync in any master-master setup. Master-master -replication is inherently tricky, and it's easy to make mistakes. You need to -be sure you're using the tool correctly for master-master replication. See the -"SYNOPSIS" for the overview of the correct usage. - -Also be careful with tables that have foreign key constraints with \ ``ON DELETE``\ -or \ ``ON UPDATE``\ definitions because these might cause unintended changes on the -child tables. - -In general, this tool is best suited when your tables have a primary key or -unique index. Although it can synchronize data in tables lacking a primary key -or unique index, it might be best to synchronize that data by another means. - -At the time of this release, there is a potential bug using -"--lock-and-rename" with MySQL 5.1, a bug detecting certain differences, -a bug using ROUND() across different platforms, and a bug mixing collations. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-table-sync `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-table-sync does one-way and bidirectional synchronization of table data. -It does \ **not**\ synchronize table structures, indexes, or any other schema -objects. The following describes one-way synchronization. -"BIDIRECTIONAL SYNCING" is described later. - -This tool is complex and functions in several different ways. To use it -safely and effectively, you should understand three things: the purpose -of "--replicate", finding differences, and specifying hosts. These -three concepts are closely related and determine how the tool will run. -The following is the abbreviated logic: - - -.. code-block:: perl - - if DSN has a t part, sync only that table: - if 1 DSN: - if --sync-to-master: - The DSN is a slave. Connect to its master and sync. - if more than 1 DSN: - The first DSN is the source. Sync each DSN in turn. - else if --replicate: - if --sync-to-master: - The DSN is a slave. Connect to its master, find records - of differences, and fix. - else: - The DSN is the master. Find slaves and connect to each, - find records of differences, and fix. - else: - if only 1 DSN and --sync-to-master: - The DSN is a slave. Connect to its master, find tables and - filter with --databases etc, and sync each table to the master. - else: - find tables, filtering with --databases etc, and sync each - DSN to the first. - - -pt-table-sync can run in one of two ways: with "--replicate" or without. -The default is to run without "--replicate" which causes pt-table-sync -to automatically find differences efficiently with one of several -algorithms (see "ALGORITHMS"). Alternatively, the value of -"--replicate", if specified, causes pt-table-sync to use the differences -already found by having previously ran pt-table-checksum with its own -\ ``--replicate``\ option. Strictly speaking, you don't need to use -"--replicate" because pt-table-sync can find differences, but many -people use "--replicate" if, for example, they checksum regularly -using pt-table-checksum then fix differences as needed with pt-table-sync. -If you're unsure, read each tool's documentation carefully and decide for -yourself, or consult with an expert. - -Regardless of whether "--replicate" is used or not, you need to specify -which hosts to sync. There are two ways: with "--sync-to-master" or -without. Specifying "--sync-to-master" makes pt-table-sync expect -one and only slave DSN on the command line. The tool will automatically -discover the slave's master and sync it so that its data is the same as -its master. This is accomplished by making changes on the master which -then flow through replication and update the slave to resolve its differences. -\ **Be careful though**\ : although this option specifies and syncs a single -slave, if there are other slaves on the same master, they will receive -via replication the changes intended for the slave that you're trying to -sync. - -Alternatively, if you do not specify "--sync-to-master", the first -DSN given on the command line is the source host. There is only ever -one source host. If you do not also specify "--replicate", then you -must specify at least one other DSN as the destination host. There -can be one or more destination hosts. Source and destination hosts -must be independent; they cannot be in the same replication topology. -pt-table-sync will die with an error if it detects that a destination -host is a slave because changes are written directly to destination hosts -(and it's not safe to write directly to slaves). Or, if you specify -"--replicate" (but not "--sync-to-master") then pt-table-sync expects -one and only one master DSN on the command line. The tool will automatically -discover all the master's slaves and sync them to the master. This is -the only way to sync several (all) slaves at once (because -"--sync-to-master" only specifies one slave). - -Each host on the command line is specified as a DSN. The first DSN -(or only DSN for cases like "--sync-to-master") provides default values -for other DSNs, whether those other DSNs are specified on the command line -or auto-discovered by the tool. So in this example, - - -.. code-block:: perl - - pt-table-sync --execute h=host1,u=msandbox,p=msandbox h=host2 - - -the host2 DSN inherits the \ ``u``\ and \ ``p``\ DSN parts from the host1 DSN. -Use the "--explain-hosts" option to see how pt-table-sync will interpret -the DSNs given on the command line. - - -****** -OUTPUT -****** - - -If you specify the "--verbose" option, you'll see information about the -differences between the tables. There is one row per table. Each server is -printed separately. For example, - - -.. code-block:: perl - - # Syncing h=host1,D=test,t=test1 - # DELETE REPLACE INSERT UPDATE ALGORITHM START END EXIT DATABASE.TABLE - # 0 0 3 0 Chunk 13:00:00 13:00:17 2 test.test1 - - -Table test.test1 on host1 required 3 \ ``INSERT``\ statements to synchronize -and it used the Chunk algorithm (see "ALGORITHMS"). The sync operation -for this table started at 13:00:00 and ended 17 seconds later (times taken -from \ ``NOW()``\ on the source host). Because differences were found, its -"EXIT STATUS" was 2. - -If you specify the "--print" option, you'll see the actual SQL statements -that the script uses to synchronize the table if "--execute" is also -specified. - -If you want to see the SQL statements that pt-table-sync is using to select -chunks, nibbles, rows, etc., then specify "--print" once and "--verbose" -twice. Be careful though: this can print a lot of SQL statements. - -There are cases where no combination of \ ``INSERT``\ , \ ``UPDATE``\ or \ ``DELETE``\ -statements can resolve differences without violating some unique key. For -example, suppose there's a primary key on column a and a unique key on column b. -Then there is no way to sync these two tables with straightforward UPDATE -statements: - - -.. code-block:: perl - - +---+---+ +---+---+ - | a | b | | a | b | - +---+---+ +---+---+ - | 1 | 2 | | 1 | 1 | - | 2 | 1 | | 2 | 2 | - +---+---+ +---+---+ - - -The tool rewrites queries to \ ``DELETE``\ and \ ``REPLACE``\ in this case. This is -automatically handled after the first index violation, so you don't have to -worry about it. - - -****************** -REPLICATION SAFETY -****************** - - -Synchronizing a replication master and slave safely is a non-trivial problem, in -general. There are all sorts of issues to think about, such as other processes -changing data, trying to change data on the slave, whether the destination and -source are a master-master pair, and much more. - -In general, the safe way to do it is to change the data on the master, and let -the changes flow through replication to the slave like any other changes. -However, this works only if it's possible to REPLACE into the table on the -master. REPLACE works only if there's a unique index on the table (otherwise it -just acts like an ordinary INSERT). - -If your table has unique keys, you should use the "--sync-to-master" and/or -"--replicate" options to sync a slave to its master. This will generally do -the right thing. When there is no unique key on the table, there is no choice -but to change the data on the slave, and pt-table-sync will detect that you're -trying to do so. It will complain and die unless you specify -\ ``--no-check-slave``\ (see "--[no]check-slave"). - -If you're syncing a table without a primary or unique key on a master-master -pair, you must change the data on the destination server. Therefore, you need -to specify \ ``--no-bin-log``\ for safety (see "--[no]bin-log"). If you don't, -the changes you make on the destination server will replicate back to the -source server and change the data there! - -The generally safe thing to do on a master-master pair is to use the -"--sync-to-master" option so you don't change the data on the destination -server. You will also need to specify \ ``--no-check-slave``\ to keep -pt-table-sync from complaining that it is changing data on a slave. - - -********** -ALGORITHMS -********** - - -pt-table-sync has a generic data-syncing framework which uses different -algorithms to find differences. The tool automatically chooses the best -algorithm for each table based on indexes, column types, and the algorithm -preferences specified by "--algorithms". The following algorithms are -available, listed in their default order of preference: - - -Chunk - - Finds an index whose first column is numeric (including date and time types), - and divides the column's range of values into chunks of approximately - "--chunk-size" rows. Syncs a chunk at a time by checksumming the entire - chunk. If the chunk differs on the source and destination, checksums each - chunk's rows individually to find the rows that differ. - - It is efficient when the column has sufficient cardinality to make the chunks - end up about the right size. - - The initial per-chunk checksum is quite small and results in minimal network - traffic and memory consumption. If a chunk's rows must be examined, only the - primary key columns and a checksum are sent over the network, not the entire - row. If a row is found to be different, the entire row will be fetched, but not - before. - - - -Nibble - - Finds an index and ascends the index in fixed-size nibbles of "--chunk-size" - rows, using a non-backtracking algorithm (see pt-archiver for more on this - algorithm). It is very similar to "Chunk", but instead of pre-calculating - the boundaries of each piece of the table based on index cardinality, it uses - \ ``LIMIT``\ to define each nibble's upper limit, and the previous nibble's upper - limit to define the lower limit. - - It works in steps: one query finds the row that will define the next nibble's - upper boundary, and the next query checksums the entire nibble. If the nibble - differs between the source and destination, it examines the nibble row-by-row, - just as "Chunk" does. - - - -GroupBy - - Selects the entire table grouped by all columns, with a COUNT(\*) column added. - Compares all columns, and if they're the same, compares the COUNT(\*) column's - value to determine how many rows to insert or delete into the destination. - Works on tables with no primary key or unique index. - - - -Stream - - Selects the entire table in one big stream and compares all columns. Selects - all columns. Much less efficient than the other algorithms, but works when - there is no suitable index for them to use. - - - -Future Plans - - Possibilities for future algorithms are TempTable (what I originally called - bottom-up in earlier versions of this tool), DrillDown (what I originally - called top-down), and GroupByPrefix (similar to how SqlYOG Job Agent works). - Each algorithm has strengths and weaknesses. If you'd like to implement your - favorite technique for finding differences between two sources of data on - possibly different servers, I'm willing to help. The algorithms adhere to a - simple interface that makes it pretty easy to write your own. - - - - -********************* -BIDIRECTIONAL SYNCING -********************* - - -Bidirectional syncing is a new, experimental feature. To make it work -reliably there are a number of strict limitations: - - -.. code-block:: perl - - * only works when syncing one server to other independent servers - * does not work in any way with replication - * requires that the table(s) are chunkable with the Chunk algorithm - * is not N-way, only bidirectional between two servers at a time - * does not handle DELETE changes - - -For example, suppose we have three servers: c1, r1, r2. c1 is the central -server, a pseudo-master to the other servers (viz. r1 and r2 are not slaves -to c1). r1 and r2 are remote servers. Rows in table foo are updated and -inserted on all three servers and we want to synchronize all the changes -between all the servers. Table foo has columns: - - -.. code-block:: perl - - id int PRIMARY KEY - ts timestamp auto updated - name varchar - - -Auto-increment offsets are used so that new rows from any server do not -create conflicting primary key (id) values. In general, newer rows, as -determined by the ts column, take precedence when a same but differing row -is found during the bidirectional sync. "Same but differing" means that -two rows have the same primary key (id) value but different values for some -other column, like the name column in this example. Same but differing -conflicts are resolved by a "conflict". A conflict compares some column of -the competing rows to determine a "winner". The winning row becomes the -source and its values are used to update the other row. - -There are subtle differences between three columns used to achieve -bidirectional syncing that you should be familiar with: chunk column -("--chunk-column"), comparison column(s) ("--columns"), and conflict -column ("--conflict-column"). The chunk column is only used to chunk the -table; e.g. "WHERE id >= 5 AND id < 10". Chunks are checksummed and when -chunk checksums reveal a difference, the tool selects the rows in that -chunk and checksums the "--columns" for each row. If a column checksum -differs, the rows have one or more conflicting column values. In a -traditional unidirectional sync, the conflict is a moot point because it can -be resolved simply by updating the entire destination row with the source -row's values. In a bidirectional sync, however, the "--conflict-column" -(in accordance with other \ ``--conflict-\*``\ options list below) is compared -to determine which row is "correct" or "authoritative"; this row becomes -the "source". - -To sync all three servers completely, two runs of pt-table-sync are required. -The first run syncs c1 and r1, then syncs c1 and r2 including any changes -from r1. At this point c1 and r2 are completely in sync, but r1 is missing -any changes from r2 because c1 didn't have these changes when it and r1 -were synced. So a second run is needed which syncs the servers in the same -order, but this time when c1 and r1 are synced r1 gets r2's changes. - -The tool does not sync N-ways, only bidirectionally between the first DSN -given on the command line and each subsequent DSN in turn. So the tool in -this example would be ran twice like: - - -.. code-block:: perl - - pt-table-sync --bidirectional h=c1 h=r1 h=r2 - - -The "--bidirectional" option enables this feature and causes various -sanity checks to be performed. You must specify other options that tell -pt-table-sync how to resolve conflicts for same but differing rows. -These options are: - - -.. code-block:: perl - - * --conflict-column - * --conflict-comparison - * --conflict-value - * --conflict-threshold - * --conflict-error"> (optional) - - -Use "--print" to test this option before "--execute". The printed -SQL statements will have comments saying on which host the statement -would be executed if you used "--execute". - -Technical side note: the first DSN is always the "left" server and the other -DSNs are always the "right" server. Since either server can become the source -or destination it's confusing to think of them as "src" and "dst". Therefore, -they're generically referred to as left and right. It's easy to remember -this because the first DSN is always to the left of the other server DSNs on -the command line. - - -*********** -EXIT STATUS -*********** - - -The following are the exit statuses (also called return values, or return codes) -when pt-table-sync finishes and exits. - - -.. code-block:: perl - - STATUS MEANING - ====== ======================================================= - 0 Success. - 1 Internal error. - 2 At least one table differed on the destination. - 3 Combination of 1 and 2. - - - -******* -OPTIONS -******* - - -Specify at least one of "--print", "--execute", or "--dry-run". - -"--where" and "--replicate" are mutually exclusive. - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---algorithms - - type: string; default: Chunk,Nibble,GroupBy,Stream - - Algorithm to use when comparing the tables, in order of preference. - - For each table, pt-table-sync will check if the table can be synced with - the given algorithms in the order that they're given. The first algorithm - that can sync the table is used. See "ALGORITHMS". - - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---bidirectional - - Enable bidirectional sync between first and subsequent hosts. - - See "BIDIRECTIONAL SYNCING" for more information. - - - ---[no]bin-log - - default: yes - - Log to the binary log (\ ``SET SQL_LOG_BIN=1``\ ). - - Specifying \ ``--no-bin-log``\ will \ ``SET SQL_LOG_BIN=0``\ . - - - ---buffer-in-mysql - - Instruct MySQL to buffer queries in its memory. - - This option adds the \ ``SQL_BUFFER_RESULT``\ option to the comparison queries. - This causes MySQL to execute the queries and place them in a temporary table - internally before sending the results back to pt-table-sync. The advantage of - this strategy is that pt-table-sync can fetch rows as desired without using a - lot of memory inside the Perl process, while releasing locks on the MySQL table - (to reduce contention with other queries). The disadvantage is that it uses - more memory on the MySQL server instead. - - You probably want to leave "--[no]buffer-to-client" enabled too, because - buffering into a temp table and then fetching it all into Perl's memory is - probably a silly thing to do. This option is most useful for the GroupBy and - Stream algorithms, which may fetch a lot of data from the server. - - - ---[no]buffer-to-client - - default: yes - - Fetch rows one-by-one from MySQL while comparing. - - This option enables \ ``mysql_use_result``\ which causes MySQL to hold the selected - rows on the server until the tool fetches them. This allows the tool to use - less memory but may keep the rows locked on the server longer. - - If this option is disabled by specifying \ ``--no-buffer-to-client``\ then - \ ``mysql_store_result``\ is used which causes MySQL to send all selected rows to - the tool at once. This may result in the results "cursor" being held open for - a shorter time on the server, but if the tables are large, it could take a long - time anyway, and use all your memory. - - For most non-trivial data sizes, you want to leave this option enabled. - - This option is disabled when "--bidirectional" is used. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---[no]check-master - - default: yes - - With "--sync-to-master", try to verify that the detected - master is the real master. - - - ---[no]check-privileges - - default: yes - - Check that user has all necessary privileges on source and destination table. - - - ---[no]check-slave - - default: yes - - Check whether the destination server is a slave. - - If the destination server is a slave, it's generally unsafe to make changes on - it. However, sometimes you have to; "--replace" won't work unless there's a - unique index, for example, so you can't make changes on the master in that - scenario. By default pt-table-sync will complain if you try to change data on - a slave. Specify \ ``--no-check-slave``\ to disable this check. Use it at your own - risk. - - - ---[no]check-triggers - - default: yes - - Check that no triggers are defined on the destination table. - - Triggers were introduced in MySQL v5.0.2, so for older versions this option - has no effect because triggers will not be checked. - - - ---chunk-column - - type: string - - Chunk the table on this column. - - - ---chunk-index - - type: string - - Chunk the table using this index. - - - ---chunk-size - - type: string; default: 1000 - - Number of rows or data size per chunk. - - The size of each chunk of rows for the "Chunk" and "Nibble" algorithms. - The size can be either a number of rows, or a data size. Data sizes are - specified with a suffix of k=kibibytes, M=mebibytes, G=gibibytes. Data sizes - are converted to a number of rows by dividing by the average row length. - - - ---columns - - short form: -c; type: array - - Compare this comma-separated list of columns. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---conflict-column - - type: string - - Compare this column when rows conflict during a "--bidirectional" sync. - - When a same but differing row is found the value of this column from each - row is compared according to "--conflict-comparison", "--conflict-value" - and "--conflict-threshold" to determine which row has the correct data and - becomes the source. The column can be any type for which there is an - appropriate "--conflict-comparison" (this is almost all types except, for - example, blobs). - - This option only works with "--bidirectional". - See "BIDIRECTIONAL SYNCING" for more information. - - - ---conflict-comparison - - type: string - - Choose the "--conflict-column" with this property as the source. - - The option affects how the "--conflict-column" values from the conflicting - rows are compared. Possible comparisons are one of these MAGIC_comparisons: - - - .. code-block:: perl - - newest|oldest|greatest|least|equals|matches - - COMPARISON CHOOSES ROW WITH - ========== ========================================================= - newest Newest temporal --conflict-column value - oldest Oldest temporal --conflict-column value - greatest Greatest numerical "--conflict-column value - least Least numerical --conflict-column value - equals --conflict-column value equal to --conflict-value - matches --conflict-column value matching Perl regex pattern - --conflict-value - - - This option only works with "--bidirectional". - See "BIDIRECTIONAL SYNCING" for more information. - - - ---conflict-error - - type: string; default: warn - - How to report unresolvable conflicts and conflict errors - - This option changes how the user is notified when a conflict cannot be - resolved or causes some kind of error. Possible values are: - - - .. code-block:: perl - - * warn: Print a warning to STDERR about the unresolvable conflict - * die: Die, stop syncing, and print a warning to STDERR - - - This option only works with "--bidirectional". - See "BIDIRECTIONAL SYNCING" for more information. - - - ---conflict-threshold - - type: string - - Amount by which one "--conflict-column" must exceed the other. - - The "--conflict-threshold" prevents a conflict from being resolved if - the absolute difference between the two "--conflict-column" values is - less than this amount. For example, if two "--conflict-column" have - timestamp values "2009-12-01 12:00:00" and "2009-12-01 12:05:00" the difference - is 5 minutes. If "--conflict-threshold" is set to "5m" the conflict will - be resolved, but if "--conflict-threshold" is set to "6m" the conflict - will fail to resolve because the difference is not greater than or equal - to 6 minutes. In this latter case, "--conflict-error" will report - the failure. - - This option only works with "--bidirectional". - See "BIDIRECTIONAL SYNCING" for more information. - - - ---conflict-value - - type: string - - Use this value for certain "--conflict-comparison". - - This option gives the value for \ ``equals``\ and \ ``matches``\ - "--conflict-comparison". - - This option only works with "--bidirectional". - See "BIDIRECTIONAL SYNCING" for more information. - - - ---databases - - short form: -d; type: hash - - Sync only this comma-separated list of databases. - - A common request is to sync tables from one database with tables from another - database on the same or different server. This is not yet possible. - "--databases" will not do it, and you can't do it with the D part of the DSN - either because in the absence of a table name it assumes the whole server - should be synced and the D part controls only the connection's default database. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute pathname. - - - ---dry-run - - Analyze, decide the sync algorithm to use, print and exit. - - Implies "--verbose" so you can see the results. The results are in the same - output format that you'll see from actually running the tool, but there will be - zeros for rows affected. This is because the tool actually executes, but stops - before it compares any data and just returns zeros. The zeros do not mean there - are no changes to be made. - - - ---engines - - short form: -e; type: hash - - Sync only this comma-separated list of storage engines. - - - ---execute - - Execute queries to make the tables have identical data. - - This option makes pt-table-sync actually sync table data by executing all - the queries that it created to resolve table differences. Therefore, \ **the - tables will be changed!**\ And unless you also specify "--verbose", the - changes will be made silently. If this is not what you want, see - "--print" or "--dry-run". - - - ---explain-hosts - - Print connection information and exit. - - Print out a list of hosts to which pt-table-sync will connect, with all - the various connection options, and exit. - - - ---float-precision - - type: int - - Precision for \ ``FLOAT``\ and \ ``DOUBLE``\ number-to-string conversion. Causes FLOAT - and DOUBLE values to be rounded to the specified number of digits after the - decimal point, with the ROUND() function in MySQL. This can help avoid - checksum mismatches due to different floating-point representations of the same - values on different MySQL versions and hardware. The default is no rounding; - the values are converted to strings by the CONCAT() function, and MySQL chooses - the string representation. If you specify a value of 2, for example, then the - values 1.008 and 1.009 will be rounded to 1.01, and will checksum as equal. - - - ---[no]foreign-key-checks - - default: yes - - Enable foreign key checks (\ ``SET FOREIGN_KEY_CHECKS=1``\ ). - - Specifying \ ``--no-foreign-key-checks``\ will \ ``SET FOREIGN_KEY_CHECKS=0``\ . - - - ---function - - type: string - - Which hash function you'd like to use for checksums. - - The default is \ ``CRC32``\ . Other good choices include \ ``MD5``\ and \ ``SHA1``\ . If you - have installed the \ ``FNV_64``\ user-defined function, \ ``pt-table-sync``\ will detect - it and prefer to use it, because it is much faster than the built-ins. You can - also use MURMUR_HASH if you've installed that user-defined function. Both of - these are distributed with Maatkit. See pt-table-checksum for more - information and benchmarks. - - - ---help - - Show help and exit. - - - ---[no]hex-blob - - default: yes - - \ ``HEX()``\ \ ``BLOB``\ , \ ``TEXT``\ and \ ``BINARY``\ columns. - - When row data from the source is fetched to create queries to sync the - data (i.e. the queries seen with "--print" and executed by "--execute"), - binary columns are wrapped in HEX() so the binary data does not produce - an invalid SQL statement. You can disable this option but you probably - shouldn't. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-columns - - type: Hash - - Ignore this comma-separated list of column names in comparisons. - - This option causes columns not to be compared. However, if a row is determined - to differ between tables, all columns in that row will be synced, regardless. - (It is not currently possible to exclude columns from the sync process itself, - only from the comparison.) - - - ---ignore-databases - - type: Hash - - Ignore this comma-separated list of databases. - - - ---ignore-engines - - type: Hash; default: FEDERATED,MRG_MyISAM - - Ignore this comma-separated list of storage engines. - - - ---ignore-tables - - type: Hash - - Ignore this comma-separated list of tables. - - Table names may be qualified with the database name. - - - ---[no]index-hint - - default: yes - - Add FORCE/USE INDEX hints to the chunk and row queries. - - By default \ ``pt-table-sync``\ adds a FORCE/USE INDEX hint to each SQL statement - to coerce MySQL into using the index chosen by the sync algorithm or specified - by "--chunk-index". This is usually a good thing, but in rare cases the - index may not be the best for the query so you can suppress the index hint - by specifying \ ``--no-index-hint``\ and let MySQL choose the index. - - This does not affect the queries printed by "--print"; it only affects the - chunk and row queries that \ ``pt-table-sync``\ uses to select and compare rows. - - - ---lock - - type: int - - Lock tables: 0=none, 1=per sync cycle, 2=per table, or 3=globally. - - This uses \ ``LOCK TABLES``\ . This can help prevent tables being changed while - you're examining them. The possible values are as follows: - - - .. code-block:: perl - - VALUE MEANING - ===== ======================================================= - 0 Never lock tables. - 1 Lock and unlock one time per sync cycle (as implemented - by the syncing algorithm). This is the most granular - level of locking available. For example, the Chunk - algorithm will lock each chunk of C rows, and then - unlock them if they are the same on the source and the - destination, before moving on to the next chunk. - 2 Lock and unlock before and after each table. - 3 Lock and unlock once for every server (DSN) synced, with - C. - - - A replication slave is never locked if "--replicate" or "--sync-to-master" - is specified, since in theory locking the table on the master should prevent any - changes from taking place. (You are not changing data on your slave, right?) - If "--wait" is given, the master (source) is locked and then the tool waits - for the slave to catch up to the master before continuing. - - If \ ``--transaction``\ is specified, \ ``LOCK TABLES``\ is not used. Instead, lock - and unlock are implemented by beginning and committing transactions. - The exception is if "--lock" is 3. - - If \ ``--no-transaction``\ is specified, then \ ``LOCK TABLES``\ is used for any - value of "--lock". See "--[no]transaction". - - - ---lock-and-rename - - Lock the source and destination table, sync, then swap names. This is useful as - a less-blocking ALTER TABLE, once the tables are reasonably in sync with each - other (which you may choose to accomplish via any number of means, including - dump and reload or even something like pt-archiver). It requires exactly two - DSNs and assumes they are on the same server, so it does no waiting for - replication or the like. Tables are locked with LOCK TABLES. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---print - - Print queries that will resolve differences. - - If you don't trust \ ``pt-table-sync``\ , or just want to see what it will do, this - is a good way to be safe. These queries are valid SQL and you can run them - yourself if you want to sync the tables manually. - - - ---recursion-method - - type: string - - Preferred recursion method used to find slaves. - - Possible methods are: - - - .. code-block:: perl - - METHOD USES - =========== ================ - processlist SHOW PROCESSLIST - hosts SHOW SLAVE HOSTS - - - The processlist method is preferred because SHOW SLAVE HOSTS is not reliable. - However, the hosts method is required if the server uses a non-standard - port (not 3306). Usually pt-table-sync does the right thing and finds - the slaves, but you may give a preferred method and it will be used first. - If it doesn't find any slaves, the other methods will be tried. - - - ---replace - - Write all \ ``INSERT``\ and \ ``UPDATE``\ statements as \ ``REPLACE``\ . - - This is automatically switched on as needed when there are unique index - violations. - - - ---replicate - - type: string - - Sync tables listed as different in this table. - - Specifies that \ ``pt-table-sync``\ should examine the specified table to find data - that differs. The table is exactly the same as the argument of the same name to - pt-table-checksum. That is, it contains records of which tables (and ranges - of values) differ between the master and slave. - - For each table and range of values that shows differences between the master and - slave, \ ``pt-table-checksum``\ will sync that table, with the appropriate \ ``WHERE``\ - clause, to its master. - - This automatically sets "--wait" to 60 and causes changes to be made on the - master instead of the slave. - - If "--sync-to-master" is specified, the tool will assume the server you - specified is the slave, and connect to the master as usual to sync. - - Otherwise, it will try to use \ ``SHOW PROCESSLIST``\ to find slaves of the server - you specified. If it is unable to find any slaves via \ ``SHOW PROCESSLIST``\ , it - will inspect \ ``SHOW SLAVE HOSTS``\ instead. You must configure each slave's - \ ``report-host``\ , \ ``report-port``\ and other options for this to work right. After - finding slaves, it will inspect the specified table on each slave to find data - that needs to be synced, and sync it. - - The tool examines the master's copy of the table first, assuming that the master - is potentially a slave as well. Any table that shows differences there will - \ **NOT**\ be synced on the slave(s). For example, suppose your replication is set - up as A->B, B->C, B->D. Suppose you use this argument and specify server B. - The tool will examine server B's copy of the table. If it looks like server B's - data in table \ ``test.tbl1``\ is different from server A's copy, the tool will not - sync that table on servers C and D. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---sync-to-master - - Treat the DSN as a slave and sync it to its master. - - Treat the server you specified as a slave. Inspect \ ``SHOW SLAVE STATUS``\ , - connect to the server's master, and treat the master as the source and the slave - as the destination. Causes changes to be made on the master. Sets "--wait" - to 60 by default, sets "--lock" to 1 by default, and disables - "--[no]transaction" by default. See also "--replicate", which changes - this option's behavior. - - - ---tables - - short form: -t; type: hash - - Sync only this comma-separated list of tables. - - Table names may be qualified with the database name. - - - ---timeout-ok - - Keep going if "--wait" fails. - - If you specify "--wait" and the slave doesn't catch up to the master's - position before the wait times out, the default behavior is to abort. This - option makes the tool keep going anyway. \ **Warning**\ : if you are trying to get a - consistent comparison between the two servers, you probably don't want to keep - going after a timeout. - - - ---[no]transaction - - Use transactions instead of \ ``LOCK TABLES``\ . - - The granularity of beginning and committing transactions is controlled by - "--lock". This is enabled by default, but since "--lock" is disabled by - default, it has no effect. - - Most options that enable locking also disable transactions by default, so if - you want to use transactional locking (via \ ``LOCK IN SHARE MODE``\ and \ ``FOR - UPDATE``\ , you must specify \ ``--transaction``\ explicitly. - - If you don't specify \ ``--transaction``\ explicitly \ ``pt-table-sync``\ will decide on - a per-table basis whether to use transactions or table locks. It currently - uses transactions on InnoDB tables, and table locks on all others. - - If \ ``--no-transaction``\ is specified, then \ ``pt-table-sync``\ will not use - transactions at all (not even for InnoDB tables) and locking is controlled - by "--lock". - - When enabled, either explicitly or implicitly, the transaction isolation level - is set \ ``REPEATABLE READ``\ and transactions are started \ ``WITH CONSISTENT - SNAPSHOT``\ . - - - ---trim - - \ ``TRIM()``\ \ ``VARCHAR``\ columns in \ ``BIT_XOR``\ and \ ``ACCUM``\ modes. Helps when - comparing MySQL 4.1 to >= 5.0. - - This is useful when you don't care about the trailing space differences between - MySQL versions which vary in their handling of trailing spaces. MySQL 5.0 and - later all retain trailing spaces in \ ``VARCHAR``\ , while previous versions would - remove them. - - - ---[no]unique-checks - - default: yes - - Enable unique key checks (\ ``SET UNIQUE_CHECKS=1``\ ). - - Specifying \ ``--no-unique-checks``\ will \ ``SET UNIQUE_CHECKS=0``\ . - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---verbose - - short form: -v; cumulative: yes - - Print results of sync operations. - - See "OUTPUT" for more details about the output. - - - ---version - - Show version and exit. - - - ---wait - - short form: -w; type: time - - How long to wait for slaves to catch up to their master. - - Make the master wait for the slave to catch up in replication before comparing - the tables. The value is the number of seconds to wait before timing out (see - also "--timeout-ok"). Sets "--lock" to 1 and "--[no]transaction" to 0 - by default. If you see an error such as the following, - - - .. code-block:: perl - - MASTER_POS_WAIT returned -1 - - - It means the timeout was exceeded and you need to increase it. - - The default value of this option is influenced by other options. To see what - value is in effect, run with "--help". - - To disable waiting entirely (except for locks), specify "--wait" 0. This - helps when the slave is lagging on tables that are not being synced. - - - ---where - - type: string - - \ ``WHERE``\ clause to restrict syncing to part of the table. - - - ---[no]zero-chunk - - default: yes - - Add a chunk for rows with zero or zero-equivalent values. The only has an - effect when "--chunk-size" is specified. The purpose of the zero chunk - is to capture a potentially large number of zero values that would imbalance - the size of the first chunk. For example, if a lot of negative numbers were - inserted into an unsigned integer column causing them to be stored as zeros, - then these zero values are captured by the zero chunk instead of the first - chunk and all its non-zero values. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Database containing the table to be synced. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* t - - copy: yes - - Table to be synced. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-table-sync ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-table-sync `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -*************** -ACKNOWLEDGMENTS -*************** - - -My work is based in part on Giuseppe Maxia's work on distributed databases, -`http://www.sysadminmag.com/articles/2004/0408/ `_ and code derived from that -article. There is more explanation, and a link to the code, at -`http://www.perlmonks.org/?node_id=381053 `_. - -Another programmer extended Maxia's work even further. Fabien Coelho changed -and generalized Maxia's technique, introducing symmetry and avoiding some -problems that might have caused too-frequent checksum collisions. This work -grew into pg_comparator, `http://www.coelho.net/pg_comparator/ `_. Coelho also -explained the technique further in a paper titled "Remote Comparison of Database -Tables" (`http://cri.ensmp.fr/classement/doc/A-375.pdf `_). - -This existing literature mostly addressed how to find the differences between -the tables, not how to resolve them once found. I needed a tool that would not -only find them efficiently, but would then resolve them. I first began thinking -about how to improve the technique further with my article -`http://tinyurl.com/mysql-data-diff-algorithm `_, -where I discussed a number of problems with the Maxia/Coelho "bottom-up" -algorithm. After writing that article, I began to write this tool. I wanted to -actually implement their algorithm with some improvements so I was sure I -understood it completely. I discovered it is not what I thought it was, and is -considerably more complex than it appeared to me at first. Fabien Coelho was -kind enough to address some questions over email. - -The first versions of this tool implemented a version of the Coelho/Maxia -algorithm, which I called "bottom-up", and my own, which I called "top-down." -Those algorithms are considerably more complex than the current algorithms and -I have removed them from this tool, and may add them back later. The -improvements to the bottom-up algorithm are my original work, as is the -top-down algorithm. The techniques to actually resolve the differences are -also my own work. - -Another tool that can synchronize tables is the SQLyog Job Agent from webyog. -Thanks to Rohit Nadhani, SJA's author, for the conversations about the general -techniques. There is a comparison of pt-table-sync and SJA at -`http://tinyurl.com/maatkit-vs-sqlyog `_ - -Thanks to the following people and organizations for helping in many ways: - -The Rimm-Kaufman Group `http://www.rimmkaufman.com/ `_, -MySQL AB `http://www.mysql.com/ `_, -Blue Ridge InternetWorks `http://www.briworks.com/ `_, -Percona `http://www.percona.com/ `_, -Fabien Coelho, -Giuseppe Maxia and others at MySQL AB, -Kristian Koehntopp (MySQL AB), -Rohit Nadhani (WebYog), -The helpful monks at Perlmonks, -And others too numerous to mention. - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-table-sync 1.0.1 - diff --git a/docs/user/pt-tcp-model.rst b/docs/user/pt-tcp-model.rst deleted file mode 100644 index ca4f24ed..00000000 --- a/docs/user/pt-tcp-model.rst +++ /dev/null @@ -1,531 +0,0 @@ - -############ -pt-tcp-model -############ - -.. highlight:: perl - - -**** -NAME -**** - - -pt-tcp-model - Transform tcpdump into metrics that permit performance and scalability modeling. - - -******** -SYNOPSIS -******** - - -Usage: pt-tcp-model [OPTION...] [FILE] - -pt-tcp-model parses and analyzes tcpdump files. With no FILE, or when -FILE is -, it read standard input. - -Dump TCP requests and responses to a file, capturing only the packet headers to -avoid dropped packets, and ignoring any packets without a payload (such as -ack-only packets). Capture port 3306 (MySQL database traffic). Note that to -avoid line breaking in terminals and man pages, the TCP filtering expression -that follows has a line break at the end of the second line; you should omit -this from your tcpdump command. - - -.. code-block:: perl - - tcpdump -s 384 -i any -nnq -tttt \ - 'tcp port 3306 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - - ((tcp[12]&0xf0)>>2)) != 0)' \ - > /path/to/tcp-file.txt - - -Extract individual response times, sorted by end time: - - -.. code-block:: perl - - pt-tcp-model /path/to/tcp-file.txt > requests.txt - - -Sort the result by arrival time, for input to the next step: - - -.. code-block:: perl - - sort -n -k1,1 requests.txt > sorted.txt - - -Slice the result into 10-second intervals and emit throughput, concurrency, and -response time metrics for each interval: - - -.. code-block:: perl - - pt-tcp-model --type=requests --run-time=10 sorted.txt > sliced.txt - - -Transform the result for modeling with Aspersa's usl tool, discarding the first -and last line of each file if you specify multiple files (the first and last -line are normally incomplete observation periods and are aberrant): - - -.. code-block:: perl - - for f in sliced.txt; do - tail -n +2 "$f" | head -n -1 | awk '{print $2, $3, $7/$4}' - done > usl-input.txt - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-tcp-model merely reads and transforms its input, printing it to the output. -It should be very low risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-tcp-model `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -This tool recognizes requests and responses in a TCP stream, and extracts the -"conversations". You can use it to capture the response times of individual -queries to a database, for example. It expects the TCP input to be in the -following format, which should result from the sample shown in the SYNOPSIS: - - -.. code-block:: perl - - IP > : - - -The tool watches for "incoming" packets to the port you specify with the -"--watch-server" option. This begins a request. If multiple inbound packets -follow each other, then by default the last inbound packet seen determines the -time at which the request is assumed to begin. This is logical if one assumes -that a server must receive the whole SQL statement before beginning execution, -for example. - -When the first outbound packet is seen, the server is considered to have -responded to the request. The tool might see an inbound packet, but never see a -response. This can happen when the kernel drops packets, for example. As a -result, the tool never prints a request unless it sees the response to it. -However, the tool actually does not print any request until it sees the "last" -outbound packet. It determines this by waiting for either another inbound -packet, or EOF, and then considers the previous inbound/outbound pair to be -complete. As a result, the tool prints requests in a relatively random order. -Most types of analysis require processing in either arrival or completion order. -Therefore, the second type of processing this tool can do requires that you sort -the output from the first stage and supply it as input. - -The second type of processing is selected with the "--type" option set to -"requests". In this mode, the tool reads a group of requests and aggregates -them, then emits the aggregated metrics. - - -****** -OUTPUT -****** - - -In the default mode (parsing tcpdump output), requests are printed out one per -line, in the following format: - - -.. code-block:: perl - - - - -The ID is an incrementing number, assigned in arrival order in the original TCP -traffic. The start and end timestamps, and the elapsed time, can be customized -with the "--start-end" option. - -In "--type=requests" mode, the tool prints out one line per time interval as -defined by "--run-time", with the following columns: ts, concurrency, -throughput, arrivals, completions, busy_time, weighted_time, sum_time, -variance_mean, quantile_time, obs_time. A detailed explanation follows: - - -ts - - The timestamp that defines the beginning of the interval. - - - -concurrency - - The average number of requests resident in the server during the interval. - - - -throughput - - The number of arrivals per second during the interval. - - - -arrivals - - The number of arrivals during the interval. - - - -completions - - The number of completions during the interval. - - - -busy_time - - The total amount of time during which at least one request was resident in - the server during the interval. - - - -weighted_time - - The total response time of all the requests resident in the server during the - interval, including requests that neither arrived nor completed during the - interval. - - - -sum_time - - The total response time of all the requests that arrived in the interval. - - - -variance_mean - - The variance-to-mean ratio (index of dispersion) of the response times of the - requests that arrived in the interval. - - - -quantile_time - - The Nth percentile response time for all the requests that arrived in the - interval. See also "--quantile". - - - -obs_time - - The length of the observation time window. This will usually be the same as the - interval length, except for the first and last intervals in a file, which might - have a shorter observation time. - - - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---help - - Show help and exit. - - - ---progress - - type: array; default: time,30 - - Print progress reports to STDERR. The value is a comma-separated list with two - parts. The first part can be percentage, time, or iterations; the second part - specifies how often an update should be printed, in percentage, seconds, or - number of iterations. - - - ---quantile - - type: float - - The percentile for the last column when "--type" is "requests" (default .99). - - - ---run-time - - type: float - - The size of the aggregation interval in seconds when "--type" is "requests" - (default 1). Fractional values are permitted. - - - ---start-end - - type: Array; default: ts,end - - Define how the arrival and completion timestamps of a query, and thus its - response time (elapsed time) are computed. Recall that there may be multiple - inbound and outbound packets per request and response, and refer to the - following ASCII diagram. Suppose that a client sends a series of three inbound - (I) packets to the server, whch computes the result and then sends two outbound - (O) packets back: - - - .. code-block:: perl - - I I I ..................... O O - |<---->|<---response time----->|<-->| - ts0 ts end end1 - - - By default, the query is considered to arrive at time ts, and complete at time - end. However, this might not be what you want. Perhaps you do not want to - consider the query to have completed until time end1. You can accomplish this - by setting this option to \ ``ts,end1``\ . - - - ---type - - type: string - - The type of input to parse (default tcpdump). The permitted types are - - - tcpdump - - The parser expects the input to be formatted with the following options: \ ``-x -n - -q -tttt``\ . For example, if you want to capture output from your local machine, - you can do something like the following (the port must come last on FreeBSD): - - - .. code-block:: perl - - tcpdump -s 65535 -x -nn -q -tttt -i any -c 1000 port 3306 \ - > mysql.tcp.txt - pt-query-digest --type tcpdump mysql.tcp.txt - - - The other tcpdump parameters, such as -s, -c, and -i, are up to you. Just make - sure the output looks like this (there is a line break in the first line to - avoid man-page problems): - - - .. code-block:: perl - - 2009-04-12 09:50:16.804849 IP 127.0.0.1.42167 - > 127.0.0.1.3306: tcp 37 - - - All MySQL servers running on port 3306 are automatically detected in the - tcpdump output. Therefore, if the tcpdump out contains packets from - multiple servers on port 3306 (for example, 10.0.0.1:3306, 10.0.0.2:3306, - etc.), all packets/queries from all these servers will be analyzed - together as if they were one server. - - If you're analyzing traffic for a protocol that is not running on port - 3306, see "--watch-server". - - - - - ---version - - Show version and exit. - - - ---watch-server - - type: string; default: 10.10.10.10:3306 - - This option tells pt-tcp-model which server IP address and port (such as - "10.0.0.1:3306") to watch when parsing tcpdump for "--type" tcpdump. If you - don't specify it, the tool watches all servers by looking for any IP address - using port 3306. If you're watching a server with a non-standard port, this - won't work, so you must specify the IP address and port to watch. - - Currently, IP address filtering isn't implemented; so even though you must - specify the option in IP:port form, it ignores the IP and only looks at the port - number. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-tcp-model ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-tcp-model `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-tcp-model 1.0.1 - diff --git a/docs/user/pt-trend.rst b/docs/user/pt-trend.rst deleted file mode 100644 index acdc9803..00000000 --- a/docs/user/pt-trend.rst +++ /dev/null @@ -1,258 +0,0 @@ - -######## -pt-trend -######## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-trend - Compute statistics over a set of time-series data points. - - -******** -SYNOPSIS -******** - - -Usage: pt-trend [OPTION...] [FILE ...] - -pt-trend reads a slow query log and outputs statistics on it. - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-trend simply reads files give on the command-line. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-trend `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -You can specify multiple files on the command line. If you don't specify any, -or if you use the special filename \ ``-``\ , lines are read from standard input. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---help - - Show help and exit. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---progress - - type: array; default: time,15 - - Print progress reports to STDERR. The value is a comma-separated list with two - parts. The first part can be percentage, time, or iterations; the second part - specifies how often an update should be printed, in percentage, seconds, or - number of iterations. - - - ---version - - Show version and exit. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-trend ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-trend `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-trend 1.0.1 - diff --git a/docs/user/pt-upgrade.rst b/docs/user/pt-upgrade.rst deleted file mode 100644 index 92cddcd9..00000000 --- a/docs/user/pt-upgrade.rst +++ /dev/null @@ -1,824 +0,0 @@ - -########## -pt-upgrade -########## - -.. highlight:: perl - - -**** -NAME -**** - - -pt-upgrade - Execute queries on multiple servers and check for differences. - - -******** -SYNOPSIS -******** - - -Usage: pt-upgrade [OPTION...] DSN [DSN...] [FILE] - -pt-upgrade compares query execution on two hosts by executing queries in the -given file (or STDIN if no file given) and examining the results, errors, -warnings, etc.produced on each. - -Execute and compare all queries in slow.log on host1 to host2: - - -.. code-block:: perl - - pt-upgrade slow.log h=host1 h=host2 - - -Use pt-query-digest to get, execute and compare queries from tcpdump: - - -.. code-block:: perl - - tcpdump -i eth0 port 3306 -s 65535 -x -n -q -tttt \ - | pt-query-digest --type tcpdump --no-report --print \ - | pt-upgrade h=host1 h=host2 - - -Compare only query times on host1 to host2 and host3: - - -.. code-block:: perl - - pt-upgrade slow.log h=host1 h=host2 h=host3 --compare query_times - - -Compare a single query, no slowlog needed: - - -.. code-block:: perl - - pt-upgrade h=host1 h=host2 --query 'SELECT * FROM db.tbl' - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-upgrade is a read-only tool that is meant to be used on non-production -servers. It executes the SQL that you give it as input, which could cause -undesired load on a production server. - -At the time of this release, there is a bug that causes the tool to crash, -and a bug that causes a deadlock. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-upgrade `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-upgrade executes queries from slowlogs on one or more MySQL server to find -differences in query time, warnings, results, and other aspects of the querys' -execution. This helps evaluate upgrades, migrations and configuration -changes. The comparisons specified by "--compare" determine what -differences can be found. A report is printed which outlines all the -differences found; see "OUTPUT" below. - -The first DSN (host) specified on the command line is authoritative; it defines -the results to which the other DSNs are compared. You can "compare" only one -host, in which case there will be no differences but the output can be saved -to be diffed later against the output of another single host "comparison". - -At present, pt-upgrade only reads slowlogs. Use \ ``pt-query-digest --print``\ to -transform other log formats to slowlog. - -DSNs and slowlog files can be specified in any order. pt-upgrade will -automatically determine if an argument is a DSN or a slowlog file. If no -slowlog files are given and "--query" is not specified then pt-upgrade -will read from \ ``STDIN``\ . - - -****** -OUTPUT -****** - - -TODO - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---base-dir - - type: string; default: /tmp - - Save outfiles for the \ ``rows``\ comparison method in this directory. - - See the \ ``rows``\ "--compare-results-method". - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---[no]clear-warnings - - default: yes - - Clear warnings before each warnings comparison. - - If comparing warnings ("--compare" includes \ ``warnings``\ ), this option - causes pt-upgrade to execute a successful \ ``SELECT``\ statement which clears - any warnings left over from previous queries. This requires a current - database that pt-upgrade usually detects automatically, but in some cases - it might be necessary to specify "--temp-database". If pt-upgrade can't - auto-detect the current database, it will create a temporary table in the - "--temp-database" called \ ``mk_upgrade_clear_warnings``\ . - - - ---clear-warnings-table - - type: string - - Execute \ ``SELECT \* FROM ... LIMIT 1``\ from this table to clear warnings. - - - ---compare - - type: Hash; default: query_times,results,warnings - - What to compare for each query executed on each host. - - Comparisons determine differences when the queries are executed on the hosts. - More comparisons enable more differences to be detected. The following - comparisons are available: - - - query_times - - Compare query execution times. If this comparison is disabled, the queries - are still executed so that other comparisons will work, but the query time - attributes are removed from the events. - - - - results - - Compare result sets to find differences in rows, columns, etc. - - What differences can be found depends on the "--compare-results-method" used. - - - - warnings - - Compare warnings from \ ``SHOW WARNINGS``\ . Requires at least MySQL 4.1. - - - - - ---compare-results-method - - type: string; default: CHECKSUM; group: Comparisons - - Method to use for "--compare" \ ``results``\ . This option has no effect - if \ ``--no-compare-results``\ is given. - - Available compare methods (case-insensitive): - - - CHECKSUM - - Do \ ``CREATE TEMPORARY TABLE \`mk_upgrade\` AS query``\ then - \ ``CHECKSUM TABLE \`mk_upgrade\```\ . This method is fast and simple but in - rare cases might it be inaccurate because the MySQL manual says: - - - .. code-block:: perl - - [The] fact that two tables produce the same checksum does I mean that - the tables are identical. - - - Requires at least MySQL 4.1. - - - - rows - - Compare rows one-by-one to find differences. This method has advantages - and disadvantages. Its disadvantages are that it may be slower and it - requires writing and reading outfiles from disk. Its advantages are that - it is universal (works for all versions of MySQL), it doesn't alter the query - in any way, and it can find column value differences. - - The \ ``rows``\ method works as follows: - - - .. code-block:: perl - - 1. Rows from each host are compared one-by-one. - 2. If no differences are found, comparison stops, else... - 3. All remain rows (after the point where they begin to differ) - are written to outfiles. - 4. The outfiles are loaded into temporary tables with - C. - 5. The temporary tables are analyzed to determine the differences. - - - The outfiles are written to the "--base-dir". - - - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---continue-on-error - - Continue working even if there is an error. - - - ---convert-to-select - - Convert non-SELECT statements to SELECTs and compare. - - By default non-SELECT statements are not allowed. This option causes - non-SELECT statments (like UPDATE, INSERT and DELETE) to be converted - to SELECT statements, executed and compared. - - For example, \ ``DELETE col FROM tbl WHERE id=1``\ is converted to - \ ``SELECT col FROM tbl WHERE id=1``\ . - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---explain-hosts - - Print connection information and exit. - - - ---filter - - type: string - - Discard events for which this Perl code doesn't return true. - - This option is a string of Perl code or a file containing Perl code that gets - compiled into a subroutine with one argument: $event. This is a hashref. - If the given value is a readable file, then pt-upgrade reads the entire - file and uses its contents as the code. The file should not contain - a shebang (#!/usr/bin/perl) line. - - If the code returns true, the chain of callbacks continues; otherwise it ends. - The code is the last statement in the subroutine other than \ ``return $event``\ . - The subroutine template is: - - - .. code-block:: perl - - sub { $event = shift; filter && return $event; } - - - Filters given on the command line are wrapped inside parentheses like like - \ ``( filter )``\ . For complex, multi-line filters, you must put the code inside - a file so it will not be wrapped inside parentheses. Either way, the filter - must produce syntactically valid code given the template. For example, an - if-else branch given on the command line would not be valid: - - - .. code-block:: perl - - --filter 'if () { } else { }' # WRONG - - - Since it's given on the command line, the if-else branch would be wrapped inside - parentheses which is not syntactically valid. So to accomplish something more - complex like this would require putting the code in a file, for example - filter.txt: - - - .. code-block:: perl - - my $event_ok; if (...) { $event_ok=1; } else { $event_ok=0; } $event_ok - - - Then specify \ ``--filter filter.txt``\ to read the code from filter.txt. - - If the filter code won't compile, pt-upgrade will die with an error. - If the filter code does compile, an error may still occur at runtime if the - code tries to do something wrong (like pattern match an undefined value). - pt-upgrade does not provide any safeguards so code carefully! - - An example filter that discards everything but SELECT statements: - - - .. code-block:: perl - - --filter '$event->{arg} =~ m/^select/i' - - - This is compiled into a subroutine like the following: - - - .. code-block:: perl - - sub { $event = shift; ( $event->{arg} =~ m/^select/i ) && return $event; } - - - It is permissible for the code to have side effects (to alter $event). - - You can find an explanation of the structure of $event at - `http://code.google.com/p/maatkit/wiki/EventAttributes `_. - - - ---fingerprints - - Add query fingerprints to the standard query analysis report. This is mostly - useful for debugging purposes. - - - ---float-precision - - type: int - - Round float, double and decimal values to this many places. - - This option helps eliminate false-positives caused by floating-point - imprecision. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---iterations - - type: int; default: 1 - - How many times to iterate through the collect-and-report cycle. If 0, iterate - to infinity. See also --run-time. - - - ---limit - - type: string; default: 95%:20 - - Limit output to the given percentage or count. - - If the argument is an integer, report only the top N worst queries. If the - argument is an integer followed by the \ ``%``\ sign, report that percentage of the - worst queries. If the percentage is followed by a colon and another integer, - report the top percentage or the number specified by that integer, whichever - comes first. - - - ---log - - type: string - - Print all output to this file when daemonized. - - - ---max-different-rows - - type: int; default: 10 - - Stop comparing rows for \ ``--compare-results-method rows``\ after this many - differences are found. - - - ---order-by - - type: string; default: differences:sum - - Sort events by this attribute and aggregate function. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---query - - type: string - - Execute and compare this single query; ignores files on command line. - - This option allows you to supply a single query on the command line. Any - slowlogs also specified on the command line are ignored. - - - ---reports - - type: Hash; default: queries,differences,errors,statistics - - Print these reports. Valid reports are queries, differences, errors, and - statistics. - - See "OUTPUT" for more information on the various parts of the report. - - - ---run-time - - type: time - - How long to run before exiting. The default is to run forever (you can - interrupt with CTRL-C). - - - ---set-vars - - type: string; default: wait_timeout=10000,query_cache_type=0 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---shorten - - type: int; default: 1024 - - Shorten long statements in reports. - - Shortens long statements, replacing the omitted portion with a \ ``/\*... omitted - ...\*/``\ comment. This applies only to the output in reports. It prevents a - large statement from causing difficulty in a report. The argument is the - preferred length of the shortened statement. Not all statements can be - shortened, but very large INSERT and similar statements often can; and so - can IN() lists, although only the first such list in the statement will be - shortened. - - If it shortens something beyond recognition, you can find the original statement - in the log, at the offset shown in the report header (see "OUTPUT"). - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---temp-database - - type: string - - Use this database for creating temporary tables. - - If given, this database is used for creating temporary tables for the - results comparison (see "--compare"). Otherwise, the current - database (from the last event that specified its database) is used. - - - ---temp-table - - type: string; default: mk_upgrade - - Use this table for checksumming results. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - ---zero-query-times - - Zero the query times in the report. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ , and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-upgrade ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-upgrade `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2009-2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-upgrade 1.0.1 - diff --git a/docs/user/pt-variable-advisor.rst b/docs/user/pt-variable-advisor.rst deleted file mode 100644 index bf97b224..00000000 --- a/docs/user/pt-variable-advisor.rst +++ /dev/null @@ -1,1100 +0,0 @@ - -################### -pt-variable-advisor -################### - -.. highlight:: perl - - -**** -NAME -**** - - -pt-variable-advisor - Analyze MySQL variables and advise on possible problems. - - -******** -SYNOPSIS -******** - - -Usage: pt-variable-advisor [OPTION...] [DSN] - -pt-variable-advisor analyzes variables and advises on possible problems. - -Get SHOW VARIABLES from localhost: - - -.. code-block:: perl - - pt-variable-advisor localhost - - -Get SHOW VARIABLES output saved in vars.txt: - - -.. code-block:: perl - - pt-variable-advisor --source-of-variables vars.txt - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-variable-advisor reads MySQL's configuration and examines it and is thus -very low risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-variable-advisor `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-variable-advisor examines \ ``SHOW VARIABLES``\ for bad values and settings -according to the "RULES" described below. It reports on variables that -match the rules, so you can find bad settings in your MySQL server. - -At the time of this release, pt-variable-advisor only examples -\ ``SHOW VARIABLES``\ , but other input sources are planned like \ ``SHOW STATUS``\ -and \ ``SHOW SLAVE STATUS``\ . - - -***** -RULES -***** - - -These are the rules that pt-variable-advisor will apply to SHOW VARIABLES. -Each rule has three parts: an ID, a severity, and a description. - -The rule's ID is a short, unique name for the rule. It usually relates -to the variable that the rule examines. If a variable is examined by -several rules, then the rules' IDs are numbered like "-1", "-2", "-N". - -The rule's severity is an indication of how important it is that this -rule matched a query. We use NOTE, WARN, and CRIT to denote these -levels. - -The rule's description is a textual, human-readable explanation of -what it means when a variable matches this rule. Depending on the -verbosity of the report you generate, you will see more of the text in -the description. By default, you'll see only the first sentence, -which is sort of a terse synopsis of the rule's meaning. At a higher -verbosity, you'll see subsequent sentences. - - -auto_increment - - severity: note - - Are you trying to write to more than one server in a dual-master or - ring replication configuration? This is potentially very dangerous and in - most cases is a serious mistake. Most people's reasons for doing this are - actually not valid at all. - - - -concurrent_insert - - severity: note - - Holes (spaces left by deletes) in MyISAM tables might never be - reused. - - - -connect_timeout - - severity: note - - A large value of this setting can create a denial of service - vulnerability. - - - -debug - - severity: crit - - Servers built with debugging capability should not be used in - production because of the large performance impact. - - - -delay_key_write - - severity: warn - - MyISAM index blocks are never flushed until necessary. If there is - a server crash, data corruption on MyISAM tables can be much worse than - usual. - - - -flush - - severity: warn - - This option might decrease performance greatly. - - - -flush_time - - severity: warn - - This option might decrease performance greatly. - - - -have_bdb - - severity: note - - The BDB engine is deprecated. If you aren't using it, you should - disable it with the skip_bdb option. - - - -init_connect - - severity: note - - The init_connect option is enabled on this server. - - - -init_file - - severity: note - - The init_file option is enabled on this server. - - - -init_slave - - severity: note - - The init_slave option is enabled on this server. - - - -innodb_additional_mem_pool_size - - severity: warn - - This variable generally doesn't need to be larger than 20MB. - - - -innodb_buffer_pool_size - - severity: warn - - The InnoDB buffer pool size is unconfigured. In a production - environment it should always be configured explicitly, and the default - 10MB size is not good. - - - -innodb_checksums - - severity: warn - - InnoDB checksums are disabled. Your data is not protected from - hardware corruption or other errors! - - - -innodb_doublewrite - - severity: warn - - InnoDB doublewrite is disabled. Unless you use a filesystem that - protects against partial page writes, your data is not safe! - - - -innodb_fast_shutdown - - severity: warn - - InnoDB's shutdown behavior is not the default. This can lead to - poor performance, or the need to perform crash recovery upon startup. - - - -innodb_flush_log_at_trx_commit-1 - - severity: warn - - InnoDB is not configured in strictly ACID mode. If there - is a crash, some transactions can be lost. - - - -innodb_flush_log_at_trx_commit-2 - - severity: warn - - Setting innodb_flush_log_at_trx_commit to 0 has no performance - benefits over setting it to 2, and more types of data loss are possible. - If you are trying to change it from 1 for performance reasons, you should - set it to 2 instead of 0. - - - -innodb_force_recovery - - severity: warn - - InnoDB is in forced recovery mode! This should be used only - temporarily when recovering from data corruption or other bugs, not for - normal usage. - - - -innodb_lock_wait_timeout - - severity: warn - - This option has an unusually long value, which can cause - system overload if locks are not being released. - - - -innodb_log_buffer_size - - severity: warn - - The InnoDB log buffer size generally should not be set larger than - 16MB. If you are doing large BLOB operations, InnoDB is not really a good - choice of engines anyway. - - - -innodb_log_file_size - - severity: warn - - The InnoDB log file size is set to its default value, which is not - usable on production systems. - - - -innodb_max_dirty_pages_pct - - severity: note - - The innodb_max_dirty_pages_pct is lower than the default. This can - cause overly aggressive flushing and add load to the I/O system. - - - -flush_time - - severity: warn - - This setting is likely to cause very bad performance every - flush_time seconds. - - - -key_buffer_size - - severity: warn - - The key buffer size is unconfigured. In a production - environment it should always be configured explicitly, and the default - 8MB size is not good. - - - -large_pages - - severity: note - - Large pages are enabled. - - - -locked_in_memory - - severity: note - - The server is locked in memory with --memlock. - - - -log_warnings-1 - - severity: note - - Log_warnings is disabled, so unusual events such as statements - unsafe for replication and aborted connections will not be logged to the - error log. - - - -log_warnings-2 - - severity: note - - Log_warnings must be set greater than 1 to log unusual events such - as aborted connections. - - - -low_priority_updates - - severity: note - - The server is running with non-default lock priority for updates. - This could cause update queries to wait unexpectedly for read queries. - - - -max_binlog_size - - severity: note - - The max_binlog_size is smaller than the default of 1GB. - - - -max_connect_errors - - severity: note - - max_connect_errors should probably be set as large as your platform - allows. - - - -max_connections - - severity: warn - - If the server ever really has more than a thousand threads running, - then the system is likely to spend more time scheduling threads than - really doing useful work. This variable's value should be considered in - light of your workload. - - - -myisam_repair_threads - - severity: note - - myisam_repair_threads > 1 enables multi-threaded repair, which is - relatively untested and is still listed as beta-quality code in the - official documentation. - - - -old_passwords - - severity: warn - - Old-style passwords are insecure. They are sent in plain text - across the wire. - - - -optimizer_prune_level - - severity: warn - - The optimizer will use an exhaustive search when planning complex - queries, which can cause the planning process to take a long time. - - - -port - - severity: note - - The server is listening on a non-default port. - - - -query_cache_size-1 - - severity: note - - The query cache does not scale to large sizes and can cause unstable - performance when larger than 128MB, especially on multi-core machines. - - - -query_cache_size-2 - - severity: warn - - The query cache can cause severe performance problems when it is - larger than 256MB, especially on multi-core machines. - - - -read_buffer_size-1 - - severity: note - - The read_buffer_size variable should generally be left at its - default unless an expert determines it is necessary to change it. - - - -read_buffer_size-2 - - severity: warn - - The read_buffer_size variable should not be larger than 8MB. It - should generally be left at its default unless an expert determines it is - necessary to change it. Making it larger than 2MB can hurt performance - significantly, and can make the server crash, swap to death, or just - become extremely unstable. - - - -read_rnd_buffer_size-1 - - severity: note - - The read_rnd_buffer_size variable should generally be left at its - default unless an expert determines it is necessary to change it. - - - -read_rnd_buffer_size-2 - - severity: warn - - The read_rnd_buffer_size variable should not be larger than 4M. It - should generally be left at its default unless an expert determines it is - necessary to change it. - - - -relay_log_space_limit - - severity: warn - - Setting relay_log_space_limit is relatively rare, and could cause - an increased risk of previously unknown bugs in replication. - - - -slave_net_timeout - - severity: warn - - This variable is set too high. This is too long to wait before - noticing that the connection to the master has failed and retrying. This - should probably be set to 60 seconds or less. It is also a good idea to - use pt-heartbeat to ensure that the connection does not appear to time out - when the master is simply idle. - - - -slave_skip_errors - - severity: crit - - You should not set this option. If replication is having errors, - you need to find and resolve the cause of that; it is likely that your - slave's data is different from the master. You can find out with - pt-table-checksum. - - - -sort_buffer_size-1 - - severity: note - - The sort_buffer_size variable should generally be left at its - default unless an expert determines it is necessary to change it. - - - -sort_buffer_size-2 - - severity: note - - The sort_buffer_size variable should generally be left at its - default unless an expert determines it is necessary to change it. Making - it larger than a few MB can hurt performance significantly, and can make - the server crash, swap to death, or just become extremely unstable. - - - -sql_notes - - severity: note - - This server is configured not to log Note level warnings to the - error log. - - - -sync_frm - - severity: warn - - It is best to set sync_frm so that .frm files are flushed safely to - disk in case of a server crash. - - - -tx_isolation-1 - - severity: note - - This server's transaction isolation level is non-default. - - - -tx_isolation-2 - - severity: warn - - Most applications should use the default REPEATABLE-READ transaction - isolation level, or in a few cases READ-COMMITTED. - - - -expire_log_days - - severity: warn - - Binary logs are enabled, but automatic purging is not enabled. If - you do not purge binary logs, your disk will fill up. If you delete - binary logs externally to MySQL, you will cause unwanted behaviors. - Always ask MySQL to purge obsolete logs, never delete them externally. - - - -innodb_file_io_threads - - severity: note - - This option is useless except on Windows. - - - -innodb_data_file_path - - severity: note - - Auto-extending InnoDB files can consume a lot of disk space that is - very difficult to reclaim later. Some people prefer to set - innodb_file_per_table and allocate a fixed-size file for ibdata1. - - - -innodb_flush_method - - severity: note - - Most production database servers that use InnoDB should set - innodb_flush_method to O_DIRECT to avoid double-buffering, unless the I/O - system is very low performance. - - - -innodb_locks_unsafe_for_binlog - - severity: warn - - This option makes point-in-time recovery from binary logs, and - replication, untrustworthy if statement-based logging is used. - - - -innodb_support_xa - - severity: warn - - MySQL's internal XA transaction support between InnoDB and the - binary log is disabled. The binary log might not match InnoDB's state - after crash recovery, and replication might drift out of sync due to - out-of-order statements in the binary log. - - - -log_bin - - severity: warn - - Binary logging is disabled, so point-in-time recovery and - replication are not possible. - - - -log_output - - severity: warn - - Directing log output to tables has a high performance impact. - - - -max_relay_log_size - - severity: note - - A custom max_relay_log_size is defined. - - - -myisam_recover_options - - severity: warn - - myisam_recover_options should be set to some value such as - BACKUP,FORCE to ensure that table corruption is noticed. - - - -storage_engine - - severity: note - - The server is using a non-standard storage engine as default. - - - -sync_binlog - - severity: warn - - Binary logging is enabled, but sync_binlog isn't configured so that - every transaction is flushed to the binary log for durability. - - - -tmp_table_size - - severity: note - - The effective minimum size of in-memory implicit temporary tables - used internally during query execution is min(tmp_table_size, - max_heap_table_size), so max_heap_table_size should be at least as large - as tmp_table_size. - - - -old mysql version - - severity: warn - - These are the recommended minimum version for each major release: 3.23, 4.1.20, 5.0.37, 5.1.30. - - - -end-of-life mysql version - - severity: note - - Every release older than 5.1 is now officially end-of-life. - - - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---daemonize - - Fork to the background and detach from the shell. POSIX - operating systems only. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---ignore-rules - - type: hash - - Ignore these rule IDs. - - Specify a comma-separated list of rule IDs (e.g. LIT.001,RES.002,etc.) - to ignore. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file when daemonized. The file contains the process - ID of the daemonized instance. The PID file is removed when the - daemonized instance exits. The program checks for the existence of the - PID file when starting; if it exists and the process with the matching PID - exists, the program exits. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this string - will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---source-of-variables - - type: string; default: mysql - - Read \ ``SHOW VARIABLES``\ from this source. Possible values are "mysql", "none" - or a file name. If "mysql" is specified then you must also specify a DSN - on the command line. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---verbose - - short form: -v; cumulative: yes; default: 1 - - Increase verbosity of output. At the default level of verbosity, the - program prints only the first sentence of each rule's description. At - higher levels, the program prints more of the description. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-variable-advisor ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-variable-advisor `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz and Daniel Nichter - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2010-2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-variable-advisor 1.0.1 - diff --git a/docs/user/pt-visual-explain.rst b/docs/user/pt-visual-explain.rst deleted file mode 100644 index 2fa40979..00000000 --- a/docs/user/pt-visual-explain.rst +++ /dev/null @@ -1,963 +0,0 @@ - -################# -pt-visual-explain -################# - -.. highlight:: perl - - -**** -NAME -**** - - -pt-visual-explain - Format EXPLAIN output as a tree. - - -******** -SYNOPSIS -******** - - -Usage: pt-visual-explain [OPTION...] [FILE...] - -pt-visual-explain transforms EXPLAIN output into a tree representation of -the query plan. If FILE is given, input is read from the file(s). With no -FILE, or when FILE is -, read standard input. - -Examples: - - -.. code-block:: perl - - pt-visual-explain - - pt-visual-explain -c - - mysql -e "explain select * from mysql.user" | pt-visual-explain - - - -***** -RISKS -***** - - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-visual-explain is read-only and very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm to -users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -`http://www.percona.com/bugs/pt-visual-explain `_. - -See also "BUGS" for more information on filing bugs and getting help. - - -*********** -DESCRIPTION -*********** - - -pt-visual-explain reverse-engineers MySQL's EXPLAIN output into a query -execution plan, which it then formats as a left-deep tree -- the same way the -plan is represented inside MySQL. It is possible to do this by hand, or to read -EXPLAIN's output directly, but it requires patience and expertise. Many people -find a tree representation more understandable. - -You can pipe input into pt-visual-explain or specify a filename at the -command line, including the magical '-' filename, which will read from standard -input. It can do two things with the input: parse it for something that looks -like EXPLAIN output, or connect to a MySQL instance and run EXPLAIN on the -input. - -When parsing its input, pt-visual-explain understands three formats: tabular -like that shown in the mysql command-line client, vertical like that created by -using the \G line terminator in the mysql command-line client, and tab -separated. It ignores any lines it doesn't know how to parse. - -When executing the input, pt-visual-explain replaces everything in the input -up to the first SELECT keyword with 'EXPLAIN SELECT,' and then executes the -result. You must specify "--connect" to execute the input as a query. - -Either way, it builds a tree from the result set and prints it to standard -output. For the following query, - - -.. code-block:: perl - - select * from sakila.film_actor join sakila.film using(film_id); - - -pt-visual-explain generates this query plan: - - -.. code-block:: perl - - JOIN - +- Bookmark lookup - | +- Table - | | table film_actor - | | possible_keys idx_fk_film_id - | +- Index lookup - | key film_actor->idx_fk_film_id - | possible_keys idx_fk_film_id - | key_len 2 - | ref sakila.film.film_id - | rows 2 - +- Table scan - rows 952 - +- Table - table film - possible_keys PRIMARY - - -The query plan is left-deep, depth-first search, and the tree's root is the -output node -- the last step in the execution plan. In other words, read it -like this: - - -1 - - Table scan the 'film' table, which accesses an estimated 952 rows. - - - -2 - - For each row, find matching rows by doing an index lookup into the - film_actor->idx_fk_film_id index with the value from sakila.film.film_id, then a - bookmark lookup into the film_actor table. - - - -For more information on how to read EXPLAIN output, please see -`http://dev.mysql.com/doc/en/explain.html `_, and this talk titled "Query -Optimizer Internals and What's New in the MySQL 5.2 Optimizer," from Timour -Katchaounov, one of the MySQL developers: -`http://maatkit.org/presentations/katchaounov_timour.pdf `_. - - -******* -MODULES -******* - - -This program is actually a runnable module, not just an ordinary Perl script. -In fact, there are two modules embedded in it. This makes unit testing easy, -but it also makes it easy for you to use the parsing and tree-building -functionality if you want. - -The ExplainParser package accepts a string and parses whatever it thinks looks -like EXPLAIN output from it. The synopsis is as follows: - - -.. code-block:: perl - - require "pt-visual-explain"; - my $p = ExplainParser->new(); - my $rows = $p->parse("some text"); - # $rows is an arrayref of hashrefs. - - -The ExplainTree package accepts a set of rows and turns it into a tree. For -convenience, you can also have it delegate to ExplainParser and parse text for -you. Here's the synopsis: - - -.. code-block:: perl - - require "pt-visual-explain"; - my $e = ExplainTree->new(); - my $tree = $e->parse("some text", \%options); - my $output = $e->pretty_print($tree); - print $tree; - - - -********* -ALGORITHM -********* - - -This section explains the algorithm that converts EXPLAIN into a tree. You may -be interested in reading this if you want to understand EXPLAIN more fully, or -trying to figure out how this works, but otherwise this section will probably -not make your life richer. - -The tree can be built by examining the id, select_type, and table columns of -each row. Here's what I know about them: - -The id column is the sequential number of the select. This does not indicate -nesting; it just comes from counting SELECT from the left of the SQL statement. -It's like capturing parentheses in a regular expression. A UNION RESULT row -doesn't have an id, because it isn't a SELECT. The source code actually refers -to UNIONs as a fake_lex, as I recall. - -If two adjacent rows have the same id value, they are joined with the standard -single-sweep multi-join method. - -The select_type column tells a) that a new sub-scope has opened b) what kind -of relationship the row has to the previous row c) what kind of operation the -row represents. - - -\* - - SIMPLE means there are no subqueries or unions in the whole query. - - - -\* - - PRIMARY means there are, but this is the outermost SELECT. - - - -\* - - [DEPENDENT] UNION means this result is UNIONed with the previous result (not - row; a result might encompass more than one row). - - - -\* - - UNION RESULT terminates a set of UNIONed results. - - - -\* - - [DEPENDENT|UNCACHEABLE] SUBQUERY means a new sub-scope is opening. This is the - kind of subquery that happens in a WHERE clause, SELECT list or whatnot; it does - not return a so-called "derived table." - - - -\* - - DERIVED is a subquery in the FROM clause. - - - -Tables that are JOINed all have the same select_type. For example, if you JOIN -three tables inside a dependent subquery, they'll all say the same thing: -DEPENDENT SUBQUERY. - -The table column usually specifies the table name or alias, but may also say - or . If it says , the row represents an -access to the temporary table that holds the result of the subquery whose id is -N. If it says it's the same thing, but it refers to the results it -UNIONs together. - -Finally, order matters. If a row's id is less than the one before it, I think -that means it is dependent on something other than the one before it. For -example, - - -.. code-block:: perl - - explain select - (select 1 from sakila.film), - (select 2 from sakila.film_actor), - (select 3 from sakila.actor); - - | id | select_type | table | - +----+-------------+------------+ - | 1 | PRIMARY | NULL | - | 4 | SUBQUERY | actor | - | 3 | SUBQUERY | film_actor | - | 2 | SUBQUERY | film | - - -If the results were in order 2-3-4, I think that would mean 3 is a subquery of -2, 4 is a subquery of 3. As it is, this means 4 is a subquery of the nearest -previous recent row with a smaller id, which is 1. Likewise for 3 and 2. - -This structure is hard to programatically build into a tree for the same reason -it's hard to understand by inspection: there are both forward and backward -references. is a forward reference to selectN, while is a -backward reference to selectM and selectN. That makes recursion and other -tree-building algorithms hard to get right (NOTE: after implementation, I now -see how it would be possible to deal with both forward and backward references, -but I have no motivation to change something that works). Consider the -following: - - -.. code-block:: perl - - select * from ( - select 1 from sakila.actor as actor_1 - union - select 1 from sakila.actor as actor_2 - ) as der_1 - union - select * from ( - select 1 from sakila.actor as actor_3 - union all - select 1 from sakila.actor as actor_4 - ) as der_2; - - | id | select_type | table | - +------+--------------+------------+ - | 1 | PRIMARY | | - | 2 | DERIVED | actor_1 | - | 3 | UNION | actor_2 | - | NULL | UNION RESULT | | - | 4 | UNION | | - | 5 | DERIVED | actor_3 | - | 6 | UNION | actor_4 | - | NULL | UNION RESULT | | - | NULL | UNION RESULT | | - - -This would be a lot easier to work with if it looked like this (I've -bracketed the id on rows I moved): - - -.. code-block:: perl - - | id | select_type | table | - +------+--------------+------------+ - | [1] | UNION RESULT | | - | 1 | PRIMARY | | - | [2] | UNION RESULT | | - | 2 | DERIVED | actor_1 | - | 3 | UNION | actor_2 | - | 4 | UNION | | - | [5] | UNION RESULT | | - | 5 | DERIVED | actor_3 | - | 6 | UNION | actor_4 | - - -In fact, why not re-number all the ids, so the PRIMARY row becomes 2, and so on? -That would make it even easier to read. Unfortunately that would also have the -effect of destroying the meaning of the id column, which I think is important to -preserve in the final tree. Also, though it makes it easier to read, it doesn't -make it easier to manipulate programmatically; so it's fine to leave them -numbered as they are. - -The goal of re-ordering is to make it easier to figure out which rows are -children of which rows in the execution plan. Given the reordered list and some -row whose table is or , it is easy to find the beginning of -the slice of rows that should be child nodes in the tree: you just look for the -first row whose ID is the same as the first number in the table. - -The next question is how to find the last row that should be a child node of a -UNION or DERIVED. I'll start with DERIVED, because the solution makes UNION -easy. - -Consider how MySQL numbers the SELECTs sequentially according to their position -in the SQL, left-to-right. Since a DERIVED table encloses everything within it -in a scope, which becomes a temporary table, there are only two things to think -about: its child subqueries and unions (if any), and its next siblings in the -scope that encloses it. Its children will all have an id greater than it does, -by definition, so any later rows with a smaller id terminate the scope. - -Here's an example. The middle derived table here has a subquery and a UNION to -make it a little more complex for the example. - - -.. code-block:: perl - - explain select 1 - from ( - select film_id from sakila.film limit 1 - ) as der_1 - join ( - select film_id, actor_id, (select count(*) from sakila.rental) as r - from sakila.film_actor limit 1 - union all - select 1, 1, 1 from sakila.film_actor as dummy - ) as der_2 using (film_id) - join ( - select actor_id from sakila.actor limit 1 - ) as der_3 using (actor_id); - - -Here's the output of EXPLAIN: - - -.. code-block:: perl - - | id | select_type | table | - | 1 | PRIMARY | | - | 1 | PRIMARY | | - | 1 | PRIMARY | | - | 6 | DERIVED | actor | - | 3 | DERIVED | film_actor | - | 4 | SUBQUERY | rental | - | 5 | UNION | dummy | - | NULL | UNION RESULT | | - | 2 | DERIVED | film | - - -The siblings all have id 1, and the middle one I care about is derived3. -(Notice MySQL doesn't execute them in the order I defined them, which is fine). -Now notice that MySQL prints out the rows in the opposite order I defined the -subqueries: 6, 3, 2. It always seems to do this, and there might be other -methods of finding the scope boundaries including looking for the lower boundary -of the next largest sibling, but this is a good enough heuristic. I am forced -to rely on it for non-DERIVED subqueries, so I rely on it here too. Therefore, -I decide that everything greater than or equal to 3 belongs to the DERIVED -scope. - -The rule for UNION is simple: they consume the entire enclosing scope, and to -find the component parts of each one, you find each part's beginning as referred -to in the definition, and its end is either just before the next -one, or if it's the last part, the end is the end of the scope. - -This is only simple because UNION consumes the entire scope, which is either the -entire statement, or the scope of a DERIVED table. This is because a UNION -cannot be a sibling of another UNION or a table, DERIVED or not. (Try writing -such a statement if you don't see it intuitively). Therefore, you can just find -the enclosing scope's boundaries, and the rest is easy. Notice in the example -above, the UNION is over , which includes the row with id 4 -- it -includes every row between 3 and 5. - -Finally, there are non-derived subqueries to deal with as well. In this case I -can't look at siblings to find the end of the scope as I did for DERIVED. I -have to trust that MySQL executes depth-first. Here's an example: - - -.. code-block:: perl - - explain - select actor_id, - ( - select count(film_id) - + (select count(*) from sakila.film) - from sakila.film join sakila.film_actor using(film_id) - where exists( - select * from sakila.actor - where sakila.actor.actor_id = sakila.film_actor.actor_id - ) - ) - from sakila.actor; - - | id | select_type | table | - | 1 | PRIMARY | actor | - | 2 | SUBQUERY | film | - | 2 | SUBQUERY | film_actor | - | 4 | DEPENDENT SUBQUERY | actor | - | 3 | SUBQUERY | film | - - -In order, the tree should be built like this: - - -\* - - See row 1. - - - -\* - - See row 2. It's a higher id than 1, so it's a subquery, along with every other - row whose id is greater than 2. - - - -\* - - Inside this scope, see 2 and 2 and JOIN them. See 4. It's a higher id than 2, - so it's again a subquery; recurse. After that, see 3, which is also higher; - recurse. - - - -But the only reason the nested subquery didn't include select 3 is because -select 4 came first. In other words, if EXPLAIN looked like this, - - -.. code-block:: perl - - | id | select_type | table | - | 1 | PRIMARY | actor | - | 2 | SUBQUERY | film | - | 2 | SUBQUERY | film_actor | - | 3 | SUBQUERY | film | - | 4 | DEPENDENT SUBQUERY | actor | - - -I would be forced to assume upon seeing select 3 that select 4 is a subquery -of it, rather than just being the next sibling in the enclosing scope. If this -is ever wrong, then the algorithm is wrong, and I don't see what could be done -about it. - -UNION is a little more complicated than just "the entire scope is a UNION," -because the UNION might itself be inside an enclosing scope that's only -indicated by the first item inside the UNION. There are only three kinds of -enclosing scopes: UNION, DERIVED, and SUBQUERY. A UNION can't enclose a UNION, -and a DERIVED has its own "scope markers," but a SUBQUERY can wholly enclose a -UNION, like this strange example on the empty table t1: - - -.. code-block:: perl - - explain select * from t1 where not exists( - (select t11.i from t1 t11) union (select t12.i from t1 t12)); - - | id | select_type | table | Extra | - +------+--------------+------------+--------------------------------+ - | 1 | PRIMARY | t1 | const row not found | - | 2 | SUBQUERY | NULL | No tables used | - | 3 | SUBQUERY | NULL | no matching row in const table | - | 4 | UNION | t12 | const row not found | - | NULL | UNION RESULT | | | - - -The UNION's backward references might make it look like the UNION encloses the -subquery, but studying the query makes it clear this isn't the case. So when a -UNION's first row says SUBQUERY, it is this special case. - -By the way, I don't fully understand this query plan; there are 4 numbered -SELECT in the plan, but only 3 in the query. The parens around the UNIONs are -meaningful. Removing them will make the EXPLAIN different. Please tell me how -and why this works if you know. - -Armed with this knowledge, it's possible to use recursion to turn the -parent-child relationship between all the rows into a tree representing the -execution plan. - -MySQL prints the rows in execution order, even the forward and backward -references. At any given scope, the rows are processed as a left-deep tree. -MySQL does not do "bushy" execution plans. It begins with a table, finds a -matching row in the next table, and continues till the last table, when it emits -a row. When it runs out, it backtracks till it can find the next row and -repeats. There are subtleties of course, but this is the basic plan. This is -why MySQL transforms all RIGHT OUTER JOINs into LEFT OUTER JOINs and cannot do -FULL OUTER JOIN. - -This means in any given scope, say - - -.. code-block:: perl - - | id | select_type | table | - | 1 | SIMPLE | tbl1 | - | 1 | SIMPLE | tbl2 | - | 1 | SIMPLE | tbl3 | - - -The execution plan looks like a depth-first traversal of this tree: - - -.. code-block:: perl - - JOIN - / \ - JOIN tbl3 - / \ - tbl1 tbl2 - - -The JOIN might not be a JOIN. It might be a subquery, for example. This comes -from the type column of EXPLAIN. The documentation says this is a "join type," -but I think "access type" is more accurate, because it's "how MySQL accesses -rows." - -pt-visual-explain decorates the tree significantly more than just turning -rows into nodes. Each node may get a series of transformations that turn it -into a subtree of more than one node. For example, an index scan not marked -with 'Using index' must do a bookmark lookup into the table rows; that is a -three-node subtree. However, after the above node-ordering and scoping stuff, -the rest of the process is pretty simple. - - -******* -OPTIONS -******* - - -This tool accepts additional command-line arguments. Refer to the -"SYNOPSIS" and usage information for details. - - ---ask-pass - - Prompt for a password when connecting to MySQL. - - - ---charset - - short form: -A; type: string - - Default character set. If the value is utf8, sets Perl's binmode on - STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and - runs SET NAMES UTF8 after connecting to MySQL. Any other value sets - binmode on STDOUT without the utf8 layer, and runs SET NAMES after - connecting to MySQL. - - - ---clustered-pk - - Assume that PRIMARY KEY index accesses don't need to do a bookmark lookup to - retrieve rows. This is the case for InnoDB. - - - ---config - - type: Array - - Read this comma-separated list of config files; if specified, this must be the - first option on the command line. - - - ---connect - - Treat input as a query, and obtain EXPLAIN output by connecting to a MySQL - instance and running EXPLAIN on the query. When this option is given, - pt-visual-explain uses the other connection-specific options such as - "--user" to connect to the MySQL instance. If you have a .my.cnf file, - it will read it, so you may not need to specify any connection-specific - options. - - - ---database - - short form: -D; type: string - - Connect to this database. - - - ---defaults-file - - short form: -F; type: string - - Only read mysql options from the given file. You must give an absolute - pathname. - - - ---format - - type: string; default: tree - - Set output format. - - The default is a terse pretty-printed tree. The valid values are: - - - .. code-block:: perl - - Value Meaning - ===== ================================================ - tree Pretty-printed terse tree. - dump Data::Dumper output (see Data::Dumper for more). - - - - ---help - - Show help and exit. - - - ---host - - short form: -h; type: string - - Connect to host. - - - ---password - - short form: -p; type: string - - Password to use when connecting. - - - ---pid - - type: string - - Create the given PID file. The file contains the process ID of the script. - The PID file is removed when the script exits. Before starting, the script - checks if the PID file already exists. If it does not, then the script creates - and writes its own PID to it. If it does, then the script checks the following: - if the file contains a PID and a process is running with that PID, then - the script dies; or, if there is no process running with that PID, then the - script overwrites the file with its own PID and starts; else, if the file - contains no PID, then the script dies. - - - ---port - - short form: -P; type: int - - Port number to use for connection. - - - ---set-vars - - type: string; default: wait_timeout=10000 - - Set these MySQL variables. Immediately after connecting to MySQL, this - string will be appended to SET and executed. - - - ---socket - - short form: -S; type: string - - Socket file to use for connection. - - - ---user - - short form: -u; type: string - - User for login if not current user. - - - ---version - - Show version and exit. - - - - -*********** -DSN OPTIONS -*********** - - -These DSN options are used to create a DSN. Each option is given like -\ ``option=value``\ . The options are case-sensitive, so P and p are not the -same option. There cannot be whitespace before or after the \ ``=``\ and -if the value contains whitespace it must be quoted. DSN options are -comma-separated. See the percona-toolkit manpage for full details. - - -\* A - - dsn: charset; copy: yes - - Default character set. - - - -\* D - - dsn: database; copy: yes - - Default database. - - - -\* F - - dsn: mysql_read_default_file; copy: yes - - Only read default options from the given file - - - -\* h - - dsn: host; copy: yes - - Connect to host. - - - -\* p - - dsn: password; copy: yes - - Password to use when connecting. - - - -\* P - - dsn: port; copy: yes - - Port number to use for connection. - - - -\* S - - dsn: mysql_socket; copy: yes - - Socket file to use for connection. - - - -\* u - - dsn: user; copy: yes - - User for login if not current user. - - - - -*********** -ENVIRONMENT -*********** - - -The environment variable \ ``PTDEBUG``\ enables verbose debugging output to STDERR. -To enable debugging and capture all output to a file, run the tool like: - - -.. code-block:: perl - - PTDEBUG=1 pt-visual-explain ... > FILE 2>&1 - - -Be careful: debugging output is voluminous and can generate several megabytes -of output. - - -******************* -SYSTEM REQUIREMENTS -******************* - - -You need Perl, DBI, DBD::mysql, and some core packages that ought to be -installed in any reasonably new version of Perl. - - -**** -BUGS -**** - - -For a list of known bugs, see `http://www.percona.com/bugs/pt-visual-explain `_. - -Please report bugs at `https://bugs.launchpad.net/percona-toolkit `_. -Include the following information in your bug report: - - -\* Complete command-line used to run the tool - - - -\* Tool "--version" - - - -\* MySQL version of all servers involved - - - -\* Output from the tool including STDERR - - - -\* Input files (log/dump/config files, etc.) - - - -If possible, include debugging output by running the tool with \ ``PTDEBUG``\ ; -see "ENVIRONMENT". - - -*********** -DOWNLOADING -*********** - - -Visit `http://www.percona.com/software/percona-toolkit/ `_ to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - -.. code-block:: perl - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - - -You can also get individual tools from the latest release: - - -.. code-block:: perl - - wget percona.com/get/TOOL - - -Replace \ ``TOOL``\ with the name of any tool. - - -******* -AUTHORS -******* - - -Baron Schwartz - - -********************* -ABOUT PERCONA TOOLKIT -********************* - - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -`http://www.percona.com/software/ `_ for more software developed by Percona. - - -******************************** -COPYRIGHT, LICENSE, AND WARRANTY -******************************** - - -This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue \`man perlgpl' or \`man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - - -******* -VERSION -******* - - -pt-visual-explain 1.0.1 - diff --git a/docs/user/release_notes.rst b/docs/user/release_notes.rst deleted file mode 100644 index 5beed767..00000000 --- a/docs/user/release_notes.rst +++ /dev/null @@ -1,41 +0,0 @@ -Release Notes -************* - -v1.0.1 released 2011-09-01 -========================== - -Percona Toolkit 1.0.1 has been released. In July, Baron announced planned -changes to Maatkit and Aspersa development;[1] Percona Toolkit is the -result. In brief, Percona Toolkit is the combined fork of Maatkit and -Aspersa, so although the toolkit is new, the programs are not. That means -Percona Toolkit 1.0.1 is mature, stable, and production-ready. In fact, -it's even a little more stable because we fixed a few bugs in this release. - -Percona Toolkit packages can be downloaded from -http://www.percona.com/downloads/percona-toolkit/ -or the Percona Software Repositories -(http://www.percona.com/docs/wiki/repositories:start). - -Although Maatkit and Aspersa development use Google Code, Percona Toolkit -uses Launchpad: https://launchpad.net/percona-toolkit - -[1] http://www.xaprb.com/blog/2011/07/06/planned-change-in-maatkit-aspersa-development/ - -Changelog ---------- - -* Fixed bug 819421: MasterSlave::is_replication_thread() doesn't match all -* Fixed bug 821673: pt-table-checksum doesn't include --where in min max queries -* Fixed bug 821688: pt-table-checksum SELECT MIN MAX for char chunking is wrong -* Fixed bug 838211: pt-collect: line 24: [: : integer expression expected -* Fixed bug 838248: pt-collect creates a "5.1" file - -v0.9.5 released 2011-08-04 -========================== - -Percona Toolkit 0.9.5 represents the completed transition from Maatkit and Aspersa. There are no bug fixes or new features, but some features have been removed (like --save-results from pt-query-digest). This release is the starting point for the 1.0 series where new development will happen, and no more changes will be made to the 0.9 series. - -Changelog ---------- - -* Forked, combined, and rebranded Maatkit and Aspersa as Percona Toolkit. diff --git a/docs/user/system_requirements.rst b/docs/user/system_requirements.rst deleted file mode 100644 index 9ba60830..00000000 --- a/docs/user/system_requirements.rst +++ /dev/null @@ -1,25 +0,0 @@ - -******************* -SYSTEM REQUIREMENTS -******************* - -Most tools require: - -\* Perl v5.8 or newer - -\* Bash v3 or newer - -\* Core Perl modules like Time::HiRes - -Tools that connect to MySQL require: - -\* Perl modules DBI and DBD::mysql - -\* MySQL 5.0 or newer - -Percona Toolkit is only tested on UNIX systems, primarily Debian and -Red Hat derivatives; other operating systems are not supported. - -Tools that connect to MySQL may work with MySQL v4.1, but this is not -test or supported. - diff --git a/docs/user/tools.rst b/docs/user/tools.rst deleted file mode 100644 index 70a99258..00000000 --- a/docs/user/tools.rst +++ /dev/null @@ -1,211 +0,0 @@ - -***** -TOOLS -***** - -This release of Percona Toolkit includes the following tools: - -:doc:`pt-archiver` - - Archive rows from a MySQL table into another table or a file. - - -:doc:`pt-collect` - - Collect information from a server for some period of time. - - -:doc:`pt-config-diff` - - Diff MySQL configuration files and server variables. - - -:doc:`pt-deadlock-logger` - - Extract and log MySQL deadlock information. - - -:doc:`pt-diskstats` - - Aggregate and summarize \ */proc/diskstats*\ . - - -:doc:`pt-duplicate-key-checker` - - Find duplicate indexes and foreign keys on MySQL tables. - - -:doc:`pt-fifo-split` - - Split files and pipe lines to a fifo without really splitting. - - -:doc:`pt-find` - - Find MySQL tables and execute actions, like GNU find. - - -:doc:`pt-fk-error-logger` - - Extract and log MySQL foreign key errors. - - -:doc:`pt-heartbeat` - - Monitor MySQL replication delay. - - -:doc:`pt-index-usage` - - Read queries from a log and analyze how they use indexes. - - -:doc:`pt-kill` - - Kill MySQL queries that match certain criteria. - - -:doc:`pt-log-player` - - Replay MySQL query logs. - - -:doc:`pt-mext` - - Look at many samples of MySQL \ ``SHOW GLOBAL STATUS``\ side-by-side. - - -:doc:`pt-mysql-summary` - - Summarize MySQL information in a nice way. - - -:doc:`pt-online-schema-change` - - Perform online, non-blocking table schema changes. - - -:doc:`pt-pmp` - - Aggregate GDB stack traces for a selected program. - - -:doc:`pt-query-advisor` - - Analyze queries and advise on possible problems. - - -:doc:`pt-query-digest` - - Analyze query execution logs and generate a query report, filter, replay, or transform queries for MySQL, PostgreSQL, memcached, and more. - - -:doc:`pt-show-grants` - - Canonicalize and print MySQL grants so you can effectively replicate, compare and version-control them. - - -:doc:`pt-sift` - - Browses files created by pt-collect. - - -:doc:`pt-slave-delay` - - Make a MySQL slave server lag behind its master. - - -:doc:`pt-slave-find` - - Find and print replication hierarchy tree of MySQL slaves. - - -:doc:`pt-slave-restart` - - Watch and restart MySQL replication after errors. - - -:doc:`pt-stalk` - - Wait for a condition to occur then begin collecting data. - - -:doc:`pt-summary` - - Summarize system information in a nice way. - - -:doc:`pt-table-checksum` - - Perform an online replication consistency check, or checksum MySQL tables efficiently on one or many servers. - - -:doc:`pt-table-sync` - - Synchronize MySQL table data efficiently. - - -:doc:`pt-tcp-model` - - Transform tcpdump into metrics that permit performance and scalability modeling. - - -:doc:`pt-trend` - - Compute statistics over a set of time-series data points. - - -:doc:`pt-upgrade` - - Execute queries on multiple servers and check for differences. - - -:doc:`pt-variable-advisor` - - Analyze MySQL variables and advise on possible problems. - - -:doc:`pt-visual-explain` - - Format EXPLAIN output as a tree. - - -For more free, open-source software developed Percona, visit -`http://www.percona.com/software/ `_. - -.. toctree:: - :hidden: - - pt-archiver - pt-collect - pt-config-diff - pt-deadlock-logger - pt-diskstats - pt-duplicate-key-checker - pt-fifo-split - pt-find - pt-fk-error-logger - pt-heartbeat - pt-index-usage - pt-kill - pt-log-player - pt-mext - pt-mysql-summary - pt-online-schema-change - pt-pmp - pt-query-advisor - pt-query-digest - pt-show-grants - pt-sift - pt-slave-delay - pt-slave-find - pt-slave-restart - pt-stalk - pt-summary - pt-table-checksum - pt-table-sync - pt-tcp-model - pt-trend - pt-upgrade - pt-variable-advisor - pt-visual-explain diff --git a/docs/user/version.rst b/docs/user/version.rst deleted file mode 100644 index 34e5c17a..00000000 --- a/docs/user/version.rst +++ /dev/null @@ -1,7 +0,0 @@ - -******* -VERSION -******* - -Percona Toolkit v1.0.1 released 2011-09-01 - diff --git a/lib/Quoter.pm b/lib/Quoter.pm index 41331007..6cdf9248 100644 --- a/lib/Quoter.pm +++ b/lib/Quoter.pm @@ -149,9 +149,20 @@ sub join_quote { sub serialize_list { my ( $self, @args ) = @_; return unless @args; + + # If the only value is undef, which is NULL for MySQL, then return + # the same. undef/NULL is a valid boundary value, however... return $args[0] if @args == 1 && !defined $args[0]; - die "serialize_list can't handle undef (NULLs) unless they are the only value" + + # ... if there's an undef/NULL value and more than one value, + # then we have no easy way to serialize the values into a list. + # We can't convert undef to "NULL" because "NULL" is a valid + # value itself, and we can't make it "" because a blank string + # is also a valid value. In practice, a boundary value with + # two NULL values should be rare. + die "Cannot serialize multiple values with undef/NULL" if grep { !defined $_ } @args; + return join ',', map { quotemeta } @args; } diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t index 59bb9f09..ca98eefc 100644 --- a/t/lib/Quoter.t +++ b/t/lib/Quoter.t @@ -9,7 +9,7 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 52; +use Test::More tests => 54; use Quoter; use PerconaTest; @@ -121,6 +121,18 @@ is( "Serialize 3 empty strings", ); +is( + $q->serialize_list(undef), + undef, + "Serialize undef string", +); + +is( + $q->deserialize_list(undef), + undef, + "Deserialize undef string", +); + my @serialize_tests = ( [ 'a', 'b', ], [ 'a,', 'b', ], diff --git a/t/pt-table-checksum/char_chunking.t b/t/pt-table-checksum/char_chunking.t index 86a37bb2..9597a74d 100644 --- a/t/pt-table-checksum/char_chunking.t +++ b/t/pt-table-checksum/char_chunking.t @@ -25,7 +25,7 @@ if ( !$master_dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 2; + plan tests => 5; } # The sandbox servers run with lock_wait_timeout=3 and it's not dynamic @@ -58,6 +58,27 @@ ok( "Char chunk ascii, chunk size 20" ); +my $row = $master_dbh->selectrow_arrayref("select lower_boundary, upper_boundary from percona.checksums where db='test' and tbl='ascii' and chunk=1"); +is_deeply( + $row, + [ '', 'burt' ], + "First boundaries" +); + +$row = $master_dbh->selectrow_arrayref("select lower_boundary, upper_boundary from percona.checksums where db='test' and tbl='ascii' and chunk=9"); +is_deeply( + $row, + [ undef, '' ], + "Lower oob boundary" +); + +$row = $master_dbh->selectrow_arrayref("select lower_boundary, upper_boundary from percona.checksums where db='test' and tbl='ascii' and chunk=10"); +is_deeply( + $row, + [ 'ZESUS\!\!\!', undef ], + "Upper oob boundary" +); + # ############################################################################# # Done. # #############################################################################