mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-08 07:38:06 +00:00
392 lines
14 KiB
Bash
Executable File
392 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# This program is part of Percona Toolkit: http://www.percona.com/software/
|
|
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
|
|
# notices and disclaimers.
|
|
|
|
# Print a usage message and exit.
|
|
usage() {
|
|
if [ "${OPT_ERR}" ]; then
|
|
echo "${OPT_ERR}"
|
|
fi
|
|
cat <<-USAGE
|
|
Usage: $0 OPTIONS [MYSQL-OPTIONS]
|
|
Collects diagnostic data on a MySQL server and stores it into files.
|
|
The MYSQL-OPTIONS are standard options to connect to MySQL: -uhPpS. Any
|
|
options or arguments that follow this tool's -digos options will be treated
|
|
as options to pass directly to mysql and mysqladmin.
|
|
Options: (required: -dfigmos)
|
|
-d DESTINATION Where to store the resulting data; must already exist.
|
|
-f PERCENT Exit if the disk is more than this percent full.
|
|
-i INTERVAL How many seconds to collect data.
|
|
-g <yes/no> Collect GDB stack traces.
|
|
-m MEGABYTES Exit unless there are this many megabytes free disk space.
|
|
-o <yes/no> Collect oprofile data; disables -s.
|
|
-p PREFIX Store the data into files with this prefix (optional).
|
|
-s <yes/no> Collect strace data.
|
|
-t <yes/no> Collect tcpdump data.
|
|
USAGE
|
|
exit 1
|
|
}
|
|
|
|
# Make sure the disk isn't getting too full. Exit if the disk is more than $1
|
|
# percent full, or there is less than $2 megabytes of free space on $3 drive.
|
|
check_disk_space() {
|
|
PCT="$1"
|
|
MB="$2"
|
|
DEST="$3"
|
|
avail=$(df -m -P "${DEST}" | awk '/^\//{print $4}');
|
|
full=$(df -m -P "${DEST}" | awk '/^\//{print $5}' | sed -e 's/%//g');
|
|
if [ "${avail}" -le "${MB}" -o "${full}" -ge "${PCT}" ]; then
|
|
echo "Not enough free space (${full}% full, ${avail}MB free)"
|
|
echo "Wanted less than ${PCT}% full and more than ${MB}MB"
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
for o; do
|
|
case "${o}" in
|
|
--)
|
|
shift; break;
|
|
;;
|
|
--help)
|
|
usage;
|
|
;;
|
|
-d)
|
|
shift; OPT_d="${1}"; shift;
|
|
;;
|
|
-f)
|
|
shift; OPT_f="${1}"; shift;
|
|
;;
|
|
-i)
|
|
shift; OPT_i="${1}"; shift;
|
|
;;
|
|
-g)
|
|
shift; OPT_g="${1}"; shift;
|
|
;;
|
|
-m)
|
|
shift; OPT_m="${1}"; shift;
|
|
;;
|
|
-o)
|
|
shift; OPT_o="${1}"; shift;
|
|
;;
|
|
-p)
|
|
shift; OPT_p="${1}"; shift;
|
|
;;
|
|
-s)
|
|
shift; OPT_s="${1}"; shift;
|
|
;;
|
|
-t)
|
|
shift; OPT_t="${1}"; shift;
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ -z "${OPT_d}" -o -z "${OPT_i}" -o -z "${OPT_o}" -o -z "${OPT_g}" -o -z "${OPT_s}" ]; then
|
|
OPT_ERR="Missing command-line option."
|
|
usage
|
|
fi
|
|
|
|
if [ "${OPT_p}" ]; then
|
|
d="${OPT_p}"
|
|
else
|
|
d=$(date +%F-%T | tr :- _);
|
|
fi
|
|
|
|
# Check disk space up-front.
|
|
check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || exit 1
|
|
|
|
echo "Gathering info for $d"
|
|
|
|
# Make sure there's only one of me.
|
|
(
|
|
flock 200
|
|
|
|
# Get pidof mysqld; pidof doesn't exist on some systems. We try our best...
|
|
p=$(pidof -s mysqld);
|
|
if [ -z "${p}" ]; then
|
|
p=$(pgrep -o -x mysqld);
|
|
fi
|
|
if [ -z "${p}" ]; then
|
|
p=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1);
|
|
fi
|
|
|
|
# Get memory allocation info before anything else.
|
|
if [ "${p}" ]; then
|
|
if pmap --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then
|
|
pmap -x $p > "$OPT_d/$d-pmap"
|
|
else
|
|
# Some pmap's apparently don't support -x (issue 116).
|
|
pmap $p > "$OPT_d/$d-pmap"
|
|
fi
|
|
fi
|
|
|
|
# Getting a GDB stacktrace can be an intensive operation, so do this only if
|
|
# necessary.
|
|
if [ "${OPT_g}" = "yes" -a "${p}" ]; then
|
|
gdb -ex "set pagination 0" -ex "thread apply all bt" --batch -p $p >> "$OPT_d/$d-stacktrace"
|
|
else
|
|
echo "GDB (-g) was not enabled" >> "$OPT_d/$d-stacktrace"
|
|
fi
|
|
|
|
# Get MySQL's variables if possible. Then sleep long enough that we probably
|
|
# complete SHOW VARIABLES if all's well. (We don't want to run mysql in the
|
|
# foreground, because it could hang.)
|
|
mysql "$@" -e 'SHOW GLOBAL VARIABLES' >> "$OPT_d/$d-variables" 2>&1 &
|
|
sleep .2
|
|
|
|
# Get the major.minor version number. Version 3.23 doesn't matter for our
|
|
# purposes, and other releases have x.x.x* version conventions so far.
|
|
VER="$(awk '/^version[^_]/{print substr($2,1,3)}' "$OPT_d/$d-variables")"
|
|
|
|
# Is MySQL logging its errors to a file? If so, tail that file.
|
|
errfile="$(awk '/log_error/{print $2}' "$OPT_d/$d-variables")"
|
|
if [ -z "${errfile}" -a "${p}" ]; then
|
|
# Try getting it from the open filehandle...
|
|
errfile="$(ls -l /proc/${p}/fd | awk '/ 2 ->/{print $NF}')"
|
|
fi
|
|
|
|
if [ "${errfile}" ]; then
|
|
echo "The error file seems to be ${errfile}"
|
|
tail -f "${errfile}" >"$OPT_d/$d-log_error" 2>&1 &
|
|
error_pid=$!
|
|
# Send a mysqladmin debug to the server so we can potentially learn about
|
|
# locking etc.
|
|
mysqladmin debug "$@"
|
|
else
|
|
echo "Could not detect error file; will not tail MySQL's log file"
|
|
fi
|
|
|
|
# Get a sample of these right away, so we can get these without interaction
|
|
# with the other commands we're about to run.
|
|
INNOSTAT="SHOW /*!40100 ENGINE*/ INNODB STATUS\G"
|
|
mysql "$@" -e "${INNOSTAT}" >> "$OPT_d/$d-innodbstatus1" 2>&1 &
|
|
mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist1" 2>&1 &
|
|
mysql "$@" -e 'SHOW OPEN TABLES' >> "$OPT_d/$d-opentables1" 2>&1 &
|
|
if [ "${VER}" > "5.1" ]; then
|
|
mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status1" 2>&1 &
|
|
else
|
|
mysql "$@" -e 'SHOW MUTEX STATUS' >> "$OPT_d/$d-mutex-status1" 2>&1 &
|
|
fi
|
|
|
|
# If TCP dumping is specified, start that on the server's port.
|
|
if [ "${OPT_t}" = "yes" ]; then
|
|
port=$(awk '/^port/{print $2}' "$OPT_d/$d-variables")
|
|
if [ "${port}" ]; then
|
|
tcpdump -i any -s 4096 -w "$OPT_d/$d-tcpdump" port ${port} &
|
|
tcpdump_pid=$!
|
|
fi
|
|
fi
|
|
|
|
# Next, start oprofile gathering data during the whole rest of this process.
|
|
# The --init should be a no-op if it has already been init-ed.
|
|
if [ "${OPT_o}" = "yes" ]; then
|
|
if opcontrol --init; then
|
|
opcontrol --start --no-vmlinux
|
|
else
|
|
OPT_o="no"
|
|
fi
|
|
elif [ "${OPT_s}" = "yes" ]; then
|
|
# Don't run oprofile and strace at the same time.
|
|
strace -T -s 0 -f -p $p > "${DEST}/$d-strace" 2>&1 &
|
|
strace_pid=$!
|
|
fi
|
|
|
|
# Grab a few general things first. Background all of these so we can start
|
|
# them all up as quickly as possible. We use mysqladmin -c even though it is
|
|
# buggy and won't stop on its own in 5.1 and newer, because there is a chance
|
|
# that we will get and keep a connection to the database; in troubled times
|
|
# the database tends to exceed max_connections, so reconnecting in the loop
|
|
# tends not to work very well.
|
|
ps -eaf >> "$OPT_d/$d-ps" 2>&1 &
|
|
sysctl -a >> "$OPT_d/$d-sysctl" 2>&1 &
|
|
top -bn1 >> "$OPT_d/$d-top" 2>&1 &
|
|
vmstat 1 $OPT_i >> "$OPT_d/$d-vmstat" 2>&1 &
|
|
vmstat $OPT_i 2 >> "$OPT_d/$d-vmstat-overall" 2>&1 &
|
|
iostat -dx 1 $OPT_i >> "$OPT_d/$d-iostat" 2>&1 &
|
|
iostat -dx $OPT_i 2 >> "$OPT_d/$d-iostat-overall" 2>&1 &
|
|
mpstat -P ALL 1 $OPT_i >> "$OPT_d/$d-mpstat" 2>&1 &
|
|
mpstat -P ALL $OPT_i 1 >> "$OPT_d/$d-mpstat-overall" 2>&1 &
|
|
lsof -nP -p $p -bw >> "$OPT_d/$d-lsof" 2>&1 &
|
|
mysqladmin "$@" ext -i1 -c$OPT_i >> "$OPT_d/$d-mysqladmin" 2>&1 &
|
|
mysqladmin_pid=$!
|
|
|
|
# This loop gathers data for the rest of the duration, and defines the time
|
|
# of the whole job.
|
|
echo "Loop start: $(date +'TS %s.%N %F %T')"
|
|
for a in `seq 1 $OPT_i`; do
|
|
# We check the disk, but don't exit, because we need to stop jobs if we
|
|
# need to exit.
|
|
check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || break
|
|
|
|
# Synchronize ourselves onto the clock tick, so the sleeps are 1-second
|
|
sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}')
|
|
ts="$(date +"TS %s.%N %F %T")"
|
|
|
|
# Collect the stuff for this cycle
|
|
(cat /proc/diskstats 2>&1; echo $ts) >> "$OPT_d/$d-diskstats" &
|
|
(cat /proc/stat 2>&1; echo $ts) >> "$OPT_d/$d-procstat" &
|
|
(cat /proc/vmstat 2>&1; echo $ts) >> "$OPT_d/$d-procvmstat" &
|
|
(cat /proc/meminfo 2>&1; echo $ts) >> "$OPT_d/$d-meminfo" &
|
|
(cat /proc/slabinfo 2>&1; echo $ts) >> "$OPT_d/$d-slabinfo" &
|
|
(cat /proc/interrupts 2>&1; echo $ts) >> "$OPT_d/$d-interrupts" &
|
|
(df -h 2>&1; echo $ts) >> "$OPT_d/$d-df" &
|
|
(netstat -antp 2>&1; echo $ts) >> "$OPT_d/$d-netstat" &
|
|
(netstat -s 2>&1; echo $ts) >> "$OPT_d/$d-netstat_s" &
|
|
done
|
|
echo "Loop end: $(date +'TS %s.%N %F %T')"
|
|
|
|
if [ "${OPT_o}" = "yes" ]; then
|
|
opcontrol --stop
|
|
opcontrol --dump
|
|
kill $(pidof oprofiled);
|
|
opcontrol --save=aspersa_collect_$d
|
|
|
|
# Attempt to generate a report; if this fails, then just tell the user how
|
|
# to generate the report.
|
|
path_to_binary=$(which mysqld);
|
|
if [ "${path_to_binary}" -a -f "${path_to_binary}" ]; then
|
|
opreport --demangle=smart --symbols --merge tgid session:aspersa_collect_$d "${path_to_binary}" > "$OPT_d/$d-opreport"
|
|
else
|
|
echo "oprofile data saved to aspersa_collect_$d; you should now be able to get a report" > "$OPT_d/$d-opreport"
|
|
echo "by running something like" >> "$OPT_d/$d-opreport"
|
|
echo "opreport --demangle=smart --symbols --merge tgid session:aspersa_collect_$d /path/to/mysqld" >> "$OPT_d/$d-opreport"
|
|
fi
|
|
elif [ "${OPT_s}" = "yes" ]; then
|
|
kill -s 2 ${strace_pid}
|
|
sleep 1
|
|
kill -s 15 ${strace_pid}
|
|
# Sometimes strace leaves threads/processes in T status.
|
|
kill -s 18 $p
|
|
fi
|
|
|
|
mysql "$@" -e "${INNOSTAT}" >> "$OPT_d/$d-innodbstatus2" 2>&1 &
|
|
mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist2" 2>&1 &
|
|
mysql "$@" -e 'SHOW OPEN TABLES' >> "$OPT_d/$d-opentables2" 2>&1 &
|
|
if [ "${VER}" > "5.1" ]; then
|
|
mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status2" 2>&1 &
|
|
else
|
|
mysql "$@" -e 'SHOW MUTEX STATUS' >> "$OPT_d/$d-mutex-status2" 2>&1 &
|
|
fi
|
|
|
|
# Kill backgrounded tasks.
|
|
kill $mysqladmin_pid
|
|
[ "$error_pid" ] && kill $error_pid
|
|
[ "$tcpdump_pid" ] && kill $tcpdump_pid
|
|
|
|
# Finally, record what system we collected this data from.
|
|
hostname > "$OPT_d/$d-hostname"
|
|
)200>/tmp/aspersa-collect-lockfile >> "$OPT_d/$d-output" 2>&1
|
|
|
|
# ############################################################################
|
|
# Documentation
|
|
# ############################################################################
|
|
:<<'DOCUMENTATION'
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
pt-collect - Collect information from a server for some period of time.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
Usage: pt-collect [OPTION...]
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
pt-collect focuses on gathering diagnostic data during a MySQL performance
|
|
problem. It is typically executed by C<stalk>.
|
|
|
|
=head1 DOWNLOADING
|
|
|
|
Visit L<http://www.percona.com/software/> to download the latest release of
|
|
Percona Toolkit. Or, to get the latest release from the command line:
|
|
|
|
wget percona.com/latest/percona-toolkit/PKG
|
|
|
|
Replace C<PKG> with C<tar>, C<rpm>, or C<deb> to download the package in that
|
|
format. You can also get individual tools from the latest release:
|
|
|
|
wget percona.com/latest/percona-toolkit/TOOL
|
|
|
|
Replace C<TOOL> with the name of any tool.
|
|
|
|
=head1 ENVIRONMENT
|
|
|
|
The environment variable C<PTDEBUG> enables verbose debugging output to STDERR.
|
|
To enable debugging and capture all output to a file, run the tool like:
|
|
|
|
PTDEBUG=1 pt-collect ... > FILE 2>&1
|
|
|
|
Be careful: debugging output is voluminous and can generate several megabytes
|
|
of output.
|
|
|
|
=head1 SYSTEM REQUIREMENTS
|
|
|
|
You need Bash.
|
|
|
|
=head1 BUGS
|
|
|
|
For a list of known bugs, see L<http://www.percona.com/bugs/pt-collect>.
|
|
|
|
Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
|
|
Include the following information in your bug report:
|
|
|
|
=over
|
|
|
|
=item * Complete command-line used to run the tool
|
|
|
|
=item * Tool L<"--version">
|
|
|
|
=item * MySQL version of all servers involved
|
|
|
|
=item * Output from the tool including STDERR
|
|
|
|
=item * Input files (log/dump/config files, etc.)
|
|
|
|
=back
|
|
|
|
If possible, include debugging output by running the tool with C<PTDEBUG>;
|
|
see L<"ENVIRONMENT">.
|
|
|
|
=head1 AUTHORS
|
|
|
|
Baron Schwartz
|
|
|
|
=head1 ABOUT PERCONA TOOLKIT
|
|
|
|
This tool is part of Percona Toolkit, a collection of advanced command-line
|
|
tools developed by Percona for MySQL support and consulting. Percona Toolkit
|
|
was forked from two projects in June, 2011: Maatkit and Aspersa. Those
|
|
projects were created by Baron Schwartz and developed primarily by him and
|
|
Daniel Nichter, both of whom are employed by Percona. Visit
|
|
L<http://www.percona.com/software/> for more software developed by Percona.
|
|
|
|
=head1 COPYRIGHT, LICENSE, AND WARRANTY
|
|
|
|
This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc.
|
|
Feedback and improvements are welcome.
|
|
|
|
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
licenses.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
|
|
=head1 VERSION
|
|
|
|
Percona Toolkit v1.0.0 released 2011-08-01
|
|
|
|
=cut
|
|
|
|
DOCUMENTATION
|