mirror of
https://github.com/percona/percona-toolkit.git
synced 2026-03-07 02:00:50 +08:00
PT-2037 - option --system-only for pt-stalk
This commit: 1. Moves all changes to the proper place: lib/bash/collect.sh 2. Refactors pt-stalk so it is more modular 3. Places fix for PT-1734 into the right place: lib/bash/collect.sh 4. Fixes tests for PT-1336
This commit is contained in:
@@ -49,10 +49,77 @@ collect() {
|
||||
local d="$1" # directory to save results in
|
||||
local p="$2" # prefix for each result file
|
||||
|
||||
local mysqld_pid=""
|
||||
local cnt=$(($OPT_RUN_TIME / $OPT_SLEEP_COLLECT))
|
||||
|
||||
if [ ! "$OPT_SYSTEM_ONLY" ]; then
|
||||
local mysqld_pid=""
|
||||
local mysql_version=""
|
||||
local mysql_error_log=""
|
||||
local tail_error_log_pid=""
|
||||
local have_lock_waits_table=""
|
||||
local have_oprofile=""
|
||||
local mysqladmin_pid=""
|
||||
local mutex=""
|
||||
local tcpdump_pid=""
|
||||
local ps_instrumentation_enabled=""
|
||||
|
||||
collect_mysql_data_one
|
||||
fi
|
||||
|
||||
# Grab a few general things first. Background all of these so we can start
|
||||
# them all up as quickly as possible.
|
||||
if [ ! "$OPT_MYSQL_ONLY" ]; then
|
||||
collect_system_data
|
||||
fi
|
||||
|
||||
# This loop gathers data for the rest of the duration, and defines the time
|
||||
# of the whole job.
|
||||
log "Loop start: $(date +'TS %s.%N %F %T')"
|
||||
local start_time=$(date +'%s')
|
||||
local curr_time=$start_time
|
||||
local ts="$(date +"TS %s.%N %F %T")"
|
||||
|
||||
while [ $((curr_time - start_time)) -lt $OPT_RUN_TIME ]; do
|
||||
if [ ! "$OPT_MYSQL_ONLY" ]; then
|
||||
collect_system_data_loop
|
||||
fi
|
||||
|
||||
if [ ! "$OPT_SYSTEM_ONLY" ]; then
|
||||
collect_mysql_data_loop
|
||||
fi
|
||||
|
||||
curr_time=$(date +'%s')
|
||||
done
|
||||
log "Loop end: $(date +'TS %s.%N %F %T')"
|
||||
|
||||
if [ ! "$OPT_SYSTEM_ONLY" ]; then
|
||||
collect_mysql_data_two
|
||||
fi
|
||||
|
||||
# Finally, record what system we collected this data from.
|
||||
hostname > "$d/$p-hostname"
|
||||
|
||||
# Remove "empty" files, i.e. ones that are truly empty or
|
||||
# just contain timestamp lines. When a command above fails,
|
||||
# it may leave an empty file. But first wait another --run-time
|
||||
# seconds for any slow process to finish:
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1047701
|
||||
wait_for_subshells $OPT_RUN_TIME
|
||||
kill_all_subshells
|
||||
for file in "$d/$p-"*; do
|
||||
# If there's not at least 1 line that's not a TS,
|
||||
# then the file is empty.
|
||||
if [ -z "$(grep -v '^TS ' --max-count 10 "$file")" ]; then
|
||||
log "Removing empty file $file";
|
||||
rm "$file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
collect_mysql_data_one() {
|
||||
# Get pidof mysqld.
|
||||
if [ ! "$OPT_MYSQL_ONLY" ]; then
|
||||
port=$(mysql -ss -e 'SELECT @@port')
|
||||
port=$($CMD_MYSQL $EXT_ARGV -ss -e 'SELECT @@port')
|
||||
mysqld_pid=$(lsof -i ":${port}" | grep -i listen | cut -f 3 -d" ")
|
||||
fi
|
||||
|
||||
@@ -84,16 +151,16 @@ collect() {
|
||||
|
||||
# Get the major.minor version number. Version 3.23 doesn't matter for our
|
||||
# purposes, and other releases have x.x.x* version conventions so far.
|
||||
local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")"
|
||||
mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")"
|
||||
|
||||
# Is MySQL logging its errors to a file? If so, tail that file.
|
||||
local mysql_error_log="$(awk '/^log_error/{print $2}' "$d/$p-variables")"
|
||||
mysql_error_log="$(awk '/^log_error\s/{print $2}' "$d/$p-variables")"
|
||||
if [ -z "$mysql_error_log" -a "$mysqld_pid" ]; then
|
||||
log $mysqld_pid
|
||||
# Try getting it from the open filehandle...
|
||||
mysql_error_log="$(ls -l /proc/$mysqld_pid/fd | awk '/ 2 ->/{print $NF}')"
|
||||
fi
|
||||
|
||||
local tail_error_log_pid=""
|
||||
if [ "$mysql_error_log" -a ! "$OPT_MYSQL_ONLY" ]; then
|
||||
log "The MySQL error log seems to be $mysql_error_log"
|
||||
tail -f "$mysql_error_log" >"$d/$p-log_error" &
|
||||
@@ -108,9 +175,9 @@ collect() {
|
||||
# Get a sample of these right away, so we can get these without interaction
|
||||
# with the other commands we're about to run.
|
||||
if [ "${mysql_version}" '>' "5.1" ]; then
|
||||
local mutex="SHOW ENGINE INNODB MUTEX"
|
||||
mutex="SHOW ENGINE INNODB MUTEX"
|
||||
else
|
||||
local mutex="SHOW MUTEX STATUS"
|
||||
mutex="SHOW MUTEX STATUS"
|
||||
fi
|
||||
innodb_status 1
|
||||
tokudb_status 1
|
||||
@@ -120,7 +187,6 @@ collect() {
|
||||
open_tables >> "$d/$p-opentables1" &
|
||||
|
||||
# If TCP dumping is specified, start that on the server's port.
|
||||
local tcpdump_pid=""
|
||||
if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then
|
||||
local port=$(awk '/^port/{print $2}' "$d/$p-variables")
|
||||
if [ "$port" ]; then
|
||||
@@ -131,7 +197,6 @@ collect() {
|
||||
|
||||
# Next, start oprofile gathering data during the whole rest of this process.
|
||||
# The --init should be a no-op if it has already been init-ed.
|
||||
local have_oprofile=""
|
||||
if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then
|
||||
if $CMD_OPCONTROL --init; then
|
||||
$CMD_OPCONTROL --start --no-vmlinux
|
||||
@@ -143,38 +208,11 @@ collect() {
|
||||
local strace_pid=$!
|
||||
fi
|
||||
|
||||
# Grab a few general things first. Background all of these so we can start
|
||||
# them all up as quickly as possible.
|
||||
if [ ! "$OPT_MYSQL_ONLY" ]; then
|
||||
ps -eaF >> "$d/$p-ps" &
|
||||
top -bn${OPT_RUN_TIME} >> "$d/$p-top" &
|
||||
|
||||
[ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" &
|
||||
|
||||
if [ "$CMD_SYSCTL" ]; then
|
||||
$CMD_SYSCTL -a >> "$d/$p-sysctl" &
|
||||
fi
|
||||
|
||||
# collect dmesg events from 60 seconds ago until present
|
||||
if [ "$CMD_DMESG" ]; then
|
||||
local UPTIME=`cat /proc/uptime | awk '{ print $1 }'`
|
||||
local START_TIME=$(echo "$UPTIME 60" | awk '{print ($1 - $2)}')
|
||||
$CMD_DMESG | perl -ne 'm/\[\s*(\d+)\./; if ($1 > '${START_TIME}') { print }' >> "$d/$p-dmesg" &
|
||||
fi
|
||||
|
||||
local cnt=$(($OPT_RUN_TIME / $OPT_SLEEP_COLLECT))
|
||||
if [ "$CMD_VMSTAT" ]; then
|
||||
$CMD_VMSTAT $OPT_SLEEP_COLLECT $cnt >> "$d/$p-vmstat" &
|
||||
$CMD_VMSTAT $OPT_RUN_TIME 2 >> "$d/$p-vmstat-overall" &
|
||||
fi
|
||||
if [ "$CMD_IOSTAT" ]; then
|
||||
$CMD_IOSTAT -dx $OPT_SLEEP_COLLECT $cnt >> "$d/$p-iostat" &
|
||||
$CMD_IOSTAT -dx $OPT_RUN_TIME 2 >> "$d/$p-iostat-overall" &
|
||||
fi
|
||||
if [ "$CMD_MPSTAT" ]; then
|
||||
$CMD_MPSTAT -P ALL $OPT_SLEEP_COLLECT $cnt >> "$d/$p-mpstat" &
|
||||
$CMD_MPSTAT -P ALL $OPT_RUN_TIME 1 >> "$d/$p-mpstat-overall" &
|
||||
fi
|
||||
$CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \
|
||||
| grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
have_lock_waits_table="yes"
|
||||
fi
|
||||
|
||||
# Collect multiple snapshots of the status variables. We use
|
||||
# mysqladmin -c even though it is buggy and won't stop on its
|
||||
@@ -183,92 +221,110 @@ collect() {
|
||||
# the database tends to exceed max_connections, so reconnecting
|
||||
# in the loop tends not to work very well.
|
||||
$CMD_MYSQLADMIN $EXT_ARGV ext -i$OPT_SLEEP_COLLECT -c$cnt >>"$d/$p-mysqladmin" &
|
||||
local mysqladmin_pid=$!
|
||||
fi
|
||||
mysqladmin_pid=$!
|
||||
|
||||
local have_lock_waits_table=""
|
||||
$CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \
|
||||
| grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
have_lock_waits_table="yes"
|
||||
fi
|
||||
|
||||
# This loop gathers data for the rest of the duration, and defines the time
|
||||
# of the whole job.
|
||||
log "Loop start: $(date +'TS %s.%N %F %T')"
|
||||
local start_time=$(date +'%s')
|
||||
local curr_time=$start_time
|
||||
local ps_instrumentation_enabled=$($CMD_MYSQL $EXT_ARGV -e 'SELECT ENABLED FROM performance_schema.setup_instruments WHERE NAME = "transaction";' \
|
||||
ps_instrumentation_enabled=$($CMD_MYSQL $EXT_ARGV -e 'SELECT ENABLED FROM performance_schema.setup_instruments WHERE NAME = "transaction";' \
|
||||
| sed "2q;d" | sed 'y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/')
|
||||
|
||||
if [ $ps_instrumentation_enabled != "yes" ]; then
|
||||
log "Performance Schema instrumentation is disabled"
|
||||
fi
|
||||
}
|
||||
|
||||
while [ $((curr_time - start_time)) -lt $OPT_RUN_TIME ]; do
|
||||
if [ ! "$OPT_MYSQL_ONLY" ]; then
|
||||
# We check the disk, but don't exit, because we need to stop jobs if we
|
||||
# need to exit.
|
||||
disk_space $d > $d/$p-disk-space
|
||||
check_disk_space \
|
||||
$d/$p-disk-space \
|
||||
"$OPT_DISK_BYTES_FREE" \
|
||||
"$OPT_DISK_PCT_FREE" \
|
||||
|| break
|
||||
collect_system_data() {
|
||||
ps -eaF >> "$d/$p-ps" &
|
||||
top -bn${OPT_RUN_TIME} >> "$d/$p-top" &
|
||||
|
||||
# Sleep between collect cycles.
|
||||
# Synchronize ourselves onto the clock tick, so the sleeps are 1-second
|
||||
sleep $(date +'%s.%N' | awk "{print $OPT_SLEEP_COLLECT - (\$1 % $OPT_SLEEP_COLLECT)}")
|
||||
local ts="$(date +"TS %s.%N %F %T")"
|
||||
[ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" &
|
||||
|
||||
# #####################################################################
|
||||
# Collect data for this cycle.
|
||||
# #####################################################################
|
||||
if [ -d "/proc" ]; then
|
||||
if [ -f "/proc/diskstats" ]; then
|
||||
(echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" &
|
||||
fi
|
||||
if [ -f "/proc/stat" ]; then
|
||||
(echo $ts; cat /proc/stat) >> "$d/$p-procstat" &
|
||||
fi
|
||||
if [ -f "/proc/vmstat" ]; then
|
||||
(echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" &
|
||||
fi
|
||||
if [ -f "/proc/meminfo" ]; then
|
||||
(echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" &
|
||||
fi
|
||||
if [ -f "/proc/slabinfo" ]; then
|
||||
(echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" &
|
||||
fi
|
||||
if [ -f "/proc/interrupts" ]; then
|
||||
(echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" &
|
||||
fi
|
||||
fi
|
||||
(echo $ts; df -k) >> "$d/$p-df" &
|
||||
(echo $ts; netstat -antp) >> "$d/$p-netstat" &
|
||||
(echo $ts; netstat -s) >> "$d/$p-netstat_s" &
|
||||
if [ "$CMD_SYSCTL" ]; then
|
||||
$CMD_SYSCTL -a >> "$d/$p-sysctl" &
|
||||
fi
|
||||
|
||||
# collect dmesg events from 60 seconds ago until present
|
||||
if [ "$CMD_DMESG" ]; then
|
||||
local UPTIME=`cat /proc/uptime | awk '{ print $1 }'`
|
||||
local START_TIME=$(echo "$UPTIME 60" | awk '{print ($1 - $2)}')
|
||||
$CMD_DMESG | perl -ne 'm/\[\s*(\d+)\./; if ($1 > '${START_TIME}') { print }' >> "$d/$p-dmesg" &
|
||||
fi
|
||||
|
||||
if [ "$CMD_VMSTAT" ]; then
|
||||
$CMD_VMSTAT $OPT_SLEEP_COLLECT $cnt >> "$d/$p-vmstat" &
|
||||
$CMD_VMSTAT $OPT_RUN_TIME 2 >> "$d/$p-vmstat-overall" &
|
||||
fi
|
||||
if [ "$CMD_IOSTAT" ]; then
|
||||
$CMD_IOSTAT -dx $OPT_SLEEP_COLLECT $cnt >> "$d/$p-iostat" &
|
||||
$CMD_IOSTAT -dx $OPT_RUN_TIME 2 >> "$d/$p-iostat-overall" &
|
||||
fi
|
||||
if [ "$CMD_MPSTAT" ]; then
|
||||
$CMD_MPSTAT -P ALL $OPT_SLEEP_COLLECT $cnt >> "$d/$p-mpstat" &
|
||||
$CMD_MPSTAT -P ALL $OPT_RUN_TIME 1 >> "$d/$p-mpstat-overall" &
|
||||
fi
|
||||
}
|
||||
|
||||
collect_mysql_data_loop() {
|
||||
(echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \
|
||||
>> "$d/$p-processlist" &
|
||||
if [ "$have_lock_waits_table" ]; then
|
||||
(echo $ts; lock_waits) >>"$d/$p-lock-waits" &
|
||||
(echo $ts; transactions) >>"$d/$p-transactions" &
|
||||
fi
|
||||
|
||||
if [ "${mysql_version}" '>' "5.6" ] && [ $ps_instrumentation_enabled == "yes" ]; then
|
||||
ps_locks_transactions "$d/$p-ps-locks-transactions"
|
||||
fi
|
||||
|
||||
if [ "${mysql_version}" '>' "5.6" ]; then
|
||||
(echo $ts; ps_prepared_statements) >> "$d/$p-prepared-statements" &
|
||||
fi
|
||||
|
||||
slave_status "$d/$p-slave-status" "${mysql_version}"
|
||||
}
|
||||
|
||||
collect_system_data_loop() {
|
||||
# We check the disk, but don't exit, because we need to stop jobs if we
|
||||
# need to exit.
|
||||
disk_space $d > $d/$p-disk-space
|
||||
check_disk_space \
|
||||
$d/$p-disk-space \
|
||||
"$OPT_DISK_BYTES_FREE" \
|
||||
"$OPT_DISK_PCT_FREE" \
|
||||
|| break
|
||||
|
||||
# Sleep between collect cycles.
|
||||
# Synchronize ourselves onto the clock tick, so the sleeps are 1-second
|
||||
sleep $(date +'%s.%N' | awk "{print $OPT_SLEEP_COLLECT - (\$1 % $OPT_SLEEP_COLLECT)}")
|
||||
ts="$(date +"TS %s.%N %F %T")"
|
||||
|
||||
# #####################################################################
|
||||
# Collect data for this cycle.
|
||||
# #####################################################################
|
||||
if [ -d "/proc" ]; then
|
||||
if [ -f "/proc/diskstats" ]; then
|
||||
(echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" &
|
||||
fi
|
||||
(echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \
|
||||
>> "$d/$p-processlist" &
|
||||
if [ "$have_lock_waits_table" ]; then
|
||||
(echo $ts; lock_waits) >>"$d/$p-lock-waits" &
|
||||
(echo $ts; transactions) >>"$d/$p-transactions" &
|
||||
if [ -f "/proc/stat" ]; then
|
||||
(echo $ts; cat /proc/stat) >> "$d/$p-procstat" &
|
||||
fi
|
||||
|
||||
if [ "${mysql_version}" '>' "5.6" ] && [ $ps_instrumentation_enabled == "yes" ]; then
|
||||
ps_locks_transactions "$d/$p-ps-locks-transactions"
|
||||
if [ -f "/proc/vmstat" ]; then
|
||||
(echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" &
|
||||
fi
|
||||
|
||||
if [ "${mysql_version}" '>' "5.6" ]; then
|
||||
(echo $ts; ps_prepared_statements) >> "$d/$p-prepared-statements" &
|
||||
if [ -f "/proc/meminfo" ]; then
|
||||
(echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" &
|
||||
fi
|
||||
if [ -f "/proc/slabinfo" ]; then
|
||||
(echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" &
|
||||
fi
|
||||
if [ -f "/proc/interrupts" ]; then
|
||||
(echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" &
|
||||
fi
|
||||
fi
|
||||
(echo $ts; df -k) >> "$d/$p-df" &
|
||||
(echo $ts; netstat -antp) >> "$d/$p-netstat" &
|
||||
(echo $ts; netstat -s) >> "$d/$p-netstat_s" &
|
||||
}
|
||||
|
||||
slave_status "$d/$p-slave-status" "${mysql_version}"
|
||||
|
||||
curr_time=$(date +'%s')
|
||||
done
|
||||
log "Loop end: $(date +'TS %s.%N %F %T')"
|
||||
|
||||
collect_mysql_data_two() {
|
||||
if [ "$have_oprofile" ]; then
|
||||
$CMD_OPCONTROL --stop
|
||||
$CMD_OPCONTROL --dump
|
||||
@@ -316,28 +372,9 @@ collect() {
|
||||
open_tables >> "$d/$p-opentables2" &
|
||||
|
||||
# Kill backgrounded tasks.
|
||||
kill $mysqladmin_pid
|
||||
[ "$mysqladmin_pid" ] && kill $mysqladmin_pid
|
||||
[ "$tail_error_log_pid" ] && kill $tail_error_log_pid
|
||||
[ "$tcpdump_pid" ] && kill $tcpdump_pid
|
||||
|
||||
# Finally, record what system we collected this data from.
|
||||
hostname > "$d/$p-hostname"
|
||||
|
||||
# Remove "empty" files, i.e. ones that are truly empty or
|
||||
# just contain timestamp lines. When a command above fails,
|
||||
# it may leave an empty file. But first wait another --run-time
|
||||
# seconds for any slow process to finish:
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1047701
|
||||
wait_for_subshells $OPT_RUN_TIME
|
||||
kill_all_subshells
|
||||
for file in "$d/$p-"*; do
|
||||
# If there's not at least 1 line that's not a TS,
|
||||
# then the file is empty.
|
||||
if [ -z "$(grep -v '^TS ' --max-count 10 "$file")" ]; then
|
||||
log "Removing empty file $file";
|
||||
rm "$file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
open_tables() {
|
||||
@@ -391,8 +428,13 @@ transactions() {
|
||||
tokudb_status() {
|
||||
local n=$1
|
||||
|
||||
$CMD_MYSQL $EXT_ARGV -e "SHOW ENGINE TOKUDB STATUS\G" \
|
||||
>> "$d/$p-tokudbstatus$n" || rm -f "$d/$p-tokudbstatus$n"
|
||||
has_tokudb=`$CMD_MYSQL $EXT_ARGV -e "SHOW ENGINES" | grep -i 'tokudb'`
|
||||
exit_code=$?
|
||||
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
$CMD_MYSQL $EXT_ARGV -e "SHOW ENGINE TOKUDB STATUS\G" \
|
||||
>> "$d/$p-tokudbstatus$n" || rm -f "$d/$p-tokudbstatus$n"
|
||||
fi
|
||||
}
|
||||
|
||||
innodb_status() {
|
||||
@@ -475,11 +517,11 @@ slave_status() {
|
||||
echo -e "\n$sql\n" >> $outfile
|
||||
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
|
||||
|
||||
sql="SELECT * FROM replication_connection_status\G"
|
||||
sql="SELECT * FROM performance_schema.replication_connection_status\G"
|
||||
echo -e "\n$sql\n" >> $outfile
|
||||
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
|
||||
|
||||
sql="SELECT * FROM replication_applier_status JOIN replication_applier_status_by_coordinator USING(channel_name)\G"
|
||||
sql="SELECT * FROM performance_schema.replication_applier_status JOIN performance_schema.replication_applier_status_by_coordinator USING(channel_name)\G"
|
||||
echo -e "\n$sql\n" >> $outfile
|
||||
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user