Files
percona-toolkit/bin/pt-stalk
Daniel Nichter b93b7de472 Use tmpdir lib.
2011-11-28 11:22:03 -07:00

601 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
# This program is part of Percona Toolkit: http://www.percona.com/software/
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
# notices and disclaimers.
# ###########################################################################
# tmpdir package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/tmpdir.sh
# t/lib/bash/tmpdir.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
TMPDIR=""
OPT_TMPDIR=${OPT_TMPDIR:""}
mk_tmpdir() {
if [ -n "$OPT_TMPDIR" ]; then
TMPDIR="$OPT_TMPDIR"
if [ ! -d "$TMPDIR" ]; then
mkdir $TMPDIR || die "Cannot make $TMPDIR"
fi
else
local tool=`basename $0`
local pid="$$"
TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \
|| die "Cannot make secure tmpdir"
fi
}
rm_tmpdir() {
if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then
rm -rf $TMPDIR
fi
TMPDIR=""
}
# ###########################################################################
# End tmpdir package
# ###########################################################################
set +u
# ########################################################################
# Check for the existence of a config file and source it if it exists
# ########################################################################
if [ -f "${0}.conf" ]; then
. "${0}.conf"
fi
# ########################################################################
# Configuration settings.
# ########################################################################
# This is the max number of <whatever> we want to tolerate.
THRESHOLD=${THRESHOLD:-30}
# This is the thing to check for.
VARIABLE=${VARIABLE:-Threads_running}
# How many times must the condition be met before the script will fire?
CYCLES=${CYCLES:-1}
# Collect GDB stacktraces?
GDB=${GDB:-no}
# Collect oprofile data?
OPROFILE=${OPROFILE:-yes}
# Collect strace data?
STRACE=${STRACE:-no}
# Collect tcpdump data?
TCPDUMP=${TCPDUMP:-yes}
# Send mail to this list of addresses when the script triggers.
# EMAIL=
# Any options to pass to mysql/mysqladmin, such as -u, -p, etc
# MYSQLOPTIONS=""
# This is the interval between checks.
INTERVAL=${INTERVAL:-30}
# If the command you're running to detect the condition is allowed to return
# nothing (e.g. a grep line that might not even exist if there's no problem),
# then set this to "yes".
MAYBE_EMPTY=${MAYBE_EMPTY:-no}
# This is the location of the 'collect' script.
if [ -z "${COLLECT}" ]; then
COLLECT="${HOME}/bin/pt-collect";
fi
# This is where to store the collected data.
if [ -z "${DEST}" ]; then
DEST="${HOME}/collected/"
fi
# How long to collect statistics data for? Make sure that this isn't longer
# than SLEEP.
DURATION=${DURATION:-30}
# How long to sleep after collecting?
if [ -z "${SLEEP}" ]; then
SLEEP=$(($DURATION * 10))
fi
# Bail out if the disk is more than this %full.
PCT_THRESHOLD=${PCT_THRESHOLD:-95}
# Bail out if the disk has less than this many MB free.
MB_THRESHOLD=${MB_THRESHOLD:-100}
# Remove samples after this many days.
PURGE=${PURGE:-30}
# Which trigger function to call to get the value of VARIABLE.
TRIGGER_FUNCTION=${TRIGGER_FUNCTION:-"status"}
# ########################################################################
# End configuration
# ########################################################################
# ########################################################################
# Echo to STDERR and exit false.
# ########################################################################
die() {
echo "${1}" >&2
exit 1
}
grep_processlist() {
local file=$1
local col=$2
local pat=${3:-""}
local gt=${4:-0}
local quiet=${5:-0}
awk "
BEGIN {
FS=\"|\"
OFS=\" | \"
n_cols=0
found=0
}
/^\|/ {
if ( n_cols ) {
val=colno_for_name[\"$col\"]
if ((\"$pat\" && match(\$val, \"$pat\")) || ($gt && \$val > $gt) ) {
found++
if (!$quiet) print \$0
}
}
else {
for (i = 1; i <= NF; i++) {
gsub(/^[ ]*/, \"\", \$i)
gsub(/[ ]*$/, \"\", \$i)
if ( \$i != \"\" ) {
name_for_colno[i]=\$i
colno_for_name[\$i]=i
n_cols++
}
}
}
}
END {
if ( found )
exit 0
exit 1
}
" $file
}
set_trg_func() {
if [ -f "$TRIGGER_FUNCTION" ]; then
source $TRIGGER_FUNCTION
TRIGGER_FUNCTION="trg_plugin"
else
TRIGGER_FUNCTION="trg_$TRIGGER_FUNCTION"
fi
}
# ########################################################################
# Trigger functions
# ########################################################################
trg_status() {
local var=$1
mysqladmin ${MYSQLOPTIONS} extended-status | grep ${VARIABLE} | awk '{print $4}'
}
trg_processlist() {
local var=$1
local tmpfile="$TMPDIR/processlist"
mysqladmin ${MYSQLOPTIONS} processlist > $tmpfile-1
grep_processlist $tmpfile-1 $var $MATCH 0 0 > $tmpfile-2
wc -l $tmpfile-2 | awk '{print $1}'
rm -rf $tmpfile*
return
}
trg_magic() {
echo "TODO"
return
}
# ########################################################################
# Echo to STDERR and possibly email.
# ########################################################################
log() {
if [ "${EMAIL}" ]; then
echo "${1} on $(hostname)" | mail -s "${2} on $(hostname)" ${EMAIL}
fi
echo "${1}" >&2
}
# The main code that runs by default. Arguments are the command-line options.
main() {
# Make the collection location
mkdir -p "${DEST}" || die "Can't make the destination directory"
test -d "${DEST}" || die "${DEST} isn't a directory"
test -w "${DEST}" || die "${DEST} isn't writable"
# Test if we have root; warn if not, but it isn't critical.
if [ "$(id -u)" != "0" ]; then
echo 'Not running with root privileges!';
fi
# Make a secure tmpdir. Any output should be saved only in $TMPDIR/.
mk_tmpdir
# We increment this variable every time that the check is true,
# and set it to 0 if it's false.
cycles_true=0;
set_trg_func
while true; do
d=$(date +%F-%T | tr :- _);
# This is where we decide whether to execute 'collect'.
# The idea is to generate a number and store into $detected,
# and if $detected > $THRESHOLD, then we'll execute pt-collect.
local detected=$("${TRIGGER_FUNCTION}" $VARIABLE)
if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then
# Oops, couldn't connect, maybe max_connections problem?
echo "$d The detected value is empty; something failed? Exit status is $?"
matched="yes"
cycles_true=$(($cycles_true + 1))
elif [ "${detected:-0}" -gt ${THRESHOLD} ]; then
matched="yes"
cycles_true=$(($cycles_true + 1))
else
matched="no"
cycles_true=0
fi
NOTE="$d check results: ${VARIABLE} = ${detected}, matched = ${matched}, cycles_true = ${cycles_true}"
# Actually execute the collection script.
if [ "${matched:-no}" = "yes" -a ${cycles_true} -ge ${CYCLES} ]; then
log "${NOTE}" "${COLLECT} triggered"
PREFIX="$(date +%F-%T | tr :- _)"
echo "${NOTE}" > "${DEST}/${PREFIX}-trigger"
${COLLECT} -d "${DEST}" -i "${DURATION}" -g "${GDB}" -o "${OPROFILE}" -p "${PREFIX}" -s "${STRACE}" -t "${TCPDUMP}" -f "${PCT_THRESHOLD}" -m "${MB_THRESHOLD}" -- ${MYSQLOPTIONS}
echo "$d sleeping ${SLEEP} seconds to avoid DOS attack"
sleep ${SLEEP}
else
echo ${NOTE}
sleep ${INTERVAL}
fi
# Delete things more than $PURGE days old
find "${DEST}" -type f -mtime +${PURGE} -exec rm -f '{}' \;
find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \
-depth -mtime +${PURGE} -exec rm -f '{}' \;
done
# Remove the secure tmpdir. This is not actually called because
# this tool runs forever.
rm_tmpdir
}
# Execute the program if it was not included from another file.
# This makes it possible to include without executing, and thus test.
if [ "$(basename "$0")" = "pt-stalk" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then
main "$@"
fi
# ############################################################################
# Documentation
# ############################################################################
:<<'DOCUMENTATION'
=pod
=head1 NAME
pt-stalk - Wait for a condition to occur then begin collecting data.
=head1 SYNOPSIS
Usage: pt-stalk
pt-stalk watches for a condition to become true, and when it does, executes
a script. By default it executes L<pt-collect>, but that can be customized.
This tool is useful for gathering diagnostic data when an infrequent event
occurs, so an expert person can review the data later.
=head1 RISKS
The following section is included to inform users about the potential risks,
whether known or unknown, of using this tool. The two main categories of risks
are those created by the nature of the tool (e.g. read-only tools vs. read-write
tools) and those created by bugs.
pt-stalk is a read-only tool. It should be very low-risk.
At the time of this release, we know of no bugs that could cause serious harm
to users.
The authoritative source for updated information is always the online issue
tracking system. Issues that affect this tool will be marked as such. You can
see a list of such issues at the following URL:
L<http://www.percona.com/bugs/pt-stalk>.
See also L<"BUGS"> for more information on filing bugs and getting help.
=head1 DESCRIPTION
Although pt-stalk comes pre-configured to do a specific thing, in general
this tool is just a skeleton script for the following flow of actions:
=over
=item 1.
Loop infinitely, sleeping between iterations.
=item 2.
In each iteration, run some command and get the output.
=item 3.
If the command fails or the output is larger than the threshold,
execute the collection script; but do not execute if the destination disk
is too full.
=back
By default, the tool is configured to execute mysqladmin extended-status and
extract the value of the Threads_running variable; if this is greater than
30, it runs the collection script. This is really just placeholder code,
and almost certainly needs to be customized!
If the tool does execute the collection script, it will wait for a while
before checking and executing again. This is to prevent a continuous
condition from causing a huge number of executions to fire off.
The name 'stalk' is because 'watch' is already taken, and 'stalk' is fun.
=head1 CONFIGURING
If the file F<pt-stalk.conf> exists in the current working directory, then
L<"ENVIRONMENT"> variables are imported from it. For example, the config
file has the format:
INTERVAL=10
GDB=yes
See L<"ENVIRONMENT">.
=head1 OPTIONS
This tool does not have any command-line options, but see
L<"ENVIRONMENT"> and L<"CONFIGURING">.
=head1 ENVIRONMENT
The following environment variables configure how, what, and when the tool
runs. They are all optional and can be specified either on the command line
or in the F<pt-stalk.conf> config file (see L<"CONFIGURING">).
=over
=item THRESHOLD (default 30)
This is the max number of <whatever> we want to tolerate.
=item VARIABLE (default Threads_running)
This is the thing to check for.
=item TRIGGER_FUNCTION (default status)
Built-in function name or plugin file name which returns the value of C<VARIABLE>. Possible values are:
=over
=item * status
Grep the value of C<VARIABLE> from C<mysqladmin extended-status>.
=item * processlist
Count the number of processes in C<mysqladmin processlist> whose
C<VARIABLE> column matches C<MATCH>. For example:
TRIGGER_FUNCTION="processlist" \
VARIABLE="State" \
MATCH="statistics" \
THRESHOLD="10"
The above triggers when more than 10 processes are in the "statistics" state.
C<MATCH> must be specified for this trigger function.
=item * magic
TODO
=item * plugin file name
A plugin file allows you to specify a custom trigger function. The plugin
file must contain a function called C<trg_plugin>. For example:
trg_plugin() {
# Do some stuff.
echo "$value"
}
The last output if the function (its "return value") must be a number.
This number is compared to C<THRESHOLD>. All L<"ENVIRONMENT"> variables
are available to the function.
Do not alter the tool's existing global variables. Prefix any plugin-specific
global variables with "PLUGIN_".
=back
=item CYCLES (default 1)
How many times must the condition be met before the script will fire?
=item GDB (default no)
Collect GDB stacktraces?
=item OPROFILE (default yes)
Collect oprofile data?
=item STRACE (default no)
Collect strace data?
=item TCPDUMP (default yes)
Collect tcpdump data?
=item EMAIL
Send mail to this list of addresses when the script triggers.
=item MYSQLOPTIONS
Any options to pass to mysql/mysqladmin, such as -u, -p, etc
=item INTERVAL (default 30)
This is the interval between checks.
=item MAYBE_EMPTY (default no)
If the command you're running to detect the condition is allowed to return
nothing (e.g. a grep line that might not even exist if there's no problem),
then set this to "yes".
=item COLLECT (default ${HOME}/bin/pt-collect)
This is the location of the 'collect' script.
=item DEST (default ${HOME}/collected/)
This is where to store the collected data.
=item DURATION (default 30)
How long to collect statistics data for? Make sure that this isn't longer
than SLEEP.
=item SLEEP (default DURATION * 10)
How long to sleep after collecting?
=item PCT_THRESHOLD (default 95)
Bail out if the disk is more than this %full.
=item MB_THRESHOLD (default 100)
Bail out if the disk has less than this many MB free.
=item PURGE (default 30)
Remove samples after this many days.
=back
=head1 SYSTEM REQUIREMENTS
This tool requires Bash v3 or newer.
=head1 BUGS
For a list of known bugs, see L<http://www.percona.com/bugs/pt-stalk>.
Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
Include the following information in your bug report:
=over
=item * Complete command-line used to run the tool
=item * Tool L<"--version">
=item * MySQL version of all servers involved
=item * Output from the tool including STDERR
=item * Input files (log/dump/config files, etc.)
=back
If possible, include debugging output by running the tool with C<PTDEBUG>;
see L<"ENVIRONMENT">.
=head1 DOWNLOADING
Visit L<http://www.percona.com/software/percona-toolkit/> to download the
latest release of Percona Toolkit. Or, get the latest release from the
command line:
wget percona.com/get/percona-toolkit.tar.gz
wget percona.com/get/percona-toolkit.rpm
wget percona.com/get/percona-toolkit.deb
You can also get individual tools from the latest release:
wget percona.com/get/TOOL
Replace C<TOOL> with the name of any tool.
=head1 AUTHORS
Baron Schwartz, Justin Swanhart, Fernando Ipar, and Daniel Nichter
=head1 ABOUT PERCONA TOOLKIT
This tool is part of Percona Toolkit, a collection of advanced command-line
tools developed by Percona for MySQL support and consulting. Percona Toolkit
was forked from two projects in June, 2011: Maatkit and Aspersa. Those
projects were created by Baron Schwartz and developed primarily by him and
Daniel Nichter, both of whom are employed by Percona. Visit
L<http://www.percona.com/software/> for more software developed by Percona.
=head1 COPYRIGHT, LICENSE, AND WARRANTY
This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc.
Feedback and improvements are welcome.
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA.
=head1 VERSION
pt-stalk 2.0.0
=cut
DOCUMENTATION