#!/usr/bin/env bash # This program is part of Percona Toolkit: http://www.percona.com/software/ # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal # notices and disclaimers. # ######################################################################## # A bunch of snippets of awk code, to be reused in the functions below. # ######################################################################## awk_parse_line=" # The entries in each stat line are as follows: # 1 major # 2 minor # 3 device name dev = \$3; # 4 reads reads = \$4; # 5 reads merged reads_merged = \$5; # 6 read sectors read_sectors = \$6; # 7 ms spent reading ms_spent_reading = \$7; # 8 writes writes = \$8; # 9 writes merged writes_merged = \$9; # 10 written sectors written_sectors = \$10; # 11 ms spent writing ms_spent_writing = \$11; # 12 IOs in progress ios_in_progress = \$12; # 13 ms spent doing io ms_spent_doing_io = \$13; # 14 ms spent, weighted by ios_in_progress ms_weighted = \$14; " # NOTE: this one is necessary in order to get the device listing. NOTE: the # 'devs' variable is initialized to 0, but it is pre-incremented, so a) it will # reflect the accurate number of devices found (after filtering); b) iteration # must be from 1 to devs, not from 0 to devs-1. awk_save_sample_to_first=" # Keep track of the natural order of the devices, so we can print them out # nicely later; and also keep the first-ever line of output. This only # executes the first time through. devices[dev]++; if ( devices[dev] == 1 ) { devsort[++devs] = dev; first[dev \"_reads\"] = reads; first[dev \"_reads_merged\"] = reads_merged; first[dev \"_read_sectors\"] = read_sectors; first[dev \"_ms_spent_reading\"] = ms_spent_reading; first[dev \"_writes\"] = writes; first[dev \"_writes_merged\"] = writes_merged; first[dev \"_written_sectors\"] = written_sectors; first[dev \"_ms_spent_writing\"] = ms_spent_writing; first[dev \"_ios_in_progress\"] = ios_in_progress; first[dev \"_ms_spent_doing_io\"] = ms_spent_doing_io; first[dev \"_ms_weighted\"] = ms_weighted; } " awk_set_iterations_and_timestamp=" iterations++; curr_ts = \$2; if ( iterations == 1 ) { first[\"ts\"] = curr_ts; } " awk_save_sample_to_curr=" curr[dev \"_reads\"] = reads; curr[dev \"_reads_merged\"] = reads_merged; curr[dev \"_read_sectors\"] = read_sectors; curr[dev \"_ms_spent_reading\"] = ms_spent_reading; curr[dev \"_writes\"] = writes; curr[dev \"_writes_merged\"] = writes_merged; curr[dev \"_written_sectors\"] = written_sectors; curr[dev \"_ms_spent_writing\"] = ms_spent_writing; curr[dev \"_ios_in_progress\"] = ios_in_progress; curr[dev \"_ms_spent_doing_io\"] = ms_spent_doing_io; curr[dev \"_ms_weighted\"] = ms_weighted; " awk_save_curr_as_prev=" curr[\"ts\"] = curr_ts; for (i in curr) { prev[i] = curr[i]; } for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; prev[dev \"_sum_ios_in_progress\"] += curr[dev \"_ios_in_progress\"]; } ts = curr_ts; " awk_find_max_device_name_length=" mdev = 6; for ( i = 1; i <= devs; i++ ) { dlen = length(devsort[i]); if ( dlen > mdev ) { mdev = dlen; } } " awk_get_overall_increments=" # Get incremental numbers. reads = curr[dev \"_reads\"] - first[dev \"_reads\"]; reads_merged = curr[dev \"_reads_merged\"] - first[dev \"_reads_merged\"]; read_sectors = curr[dev \"_read_sectors\"] - first[dev \"_read_sectors\"]; ms_spent_reading = curr[dev \"_ms_spent_reading\"] - first[dev \"_ms_spent_reading\"]; writes = curr[dev \"_writes\"] - first[dev \"_writes\"]; writes_merged = curr[dev \"_writes_merged\"] - first[dev \"_writes_merged\"]; written_sectors = curr[dev \"_written_sectors\"] - first[dev \"_written_sectors\"]; ms_spent_writing = curr[dev \"_ms_spent_writing\"] - first[dev \"_ms_spent_writing\"]; ms_spent_doing_io = curr[dev \"_ms_spent_doing_io\"] - first[dev \"_ms_spent_doing_io\"]; ms_weighted = curr[dev \"_ms_weighted\"] - first[dev \"_ms_weighted\"]; in_progress = curr[dev \"_ios_in_progress\"]; tot_in_progress = prev[dev \"_sum_ios_in_progress\"]; " awk_compute_incremental_stats=" # Get incremental numbers. reads = curr[dev \"_reads\"] - prev[dev \"_reads\"]; reads_merged = curr[dev \"_reads_merged\"] - prev[dev \"_reads_merged\"]; read_sectors = curr[dev \"_read_sectors\"] - prev[dev \"_read_sectors\"]; ms_spent_reading = curr[dev \"_ms_spent_reading\"] - prev[dev \"_ms_spent_reading\"]; writes = curr[dev \"_writes\"] - prev[dev \"_writes\"]; writes_merged = curr[dev \"_writes_merged\"] - prev[dev \"_writes_merged\"]; written_sectors = curr[dev \"_written_sectors\"] - prev[dev \"_written_sectors\"]; ms_spent_writing = curr[dev \"_ms_spent_writing\"] - prev[dev \"_ms_spent_writing\"]; ms_spent_doing_io = curr[dev \"_ms_spent_doing_io\"] - prev[dev \"_ms_spent_doing_io\"]; ms_weighted = curr[dev \"_ms_weighted\"] - prev[dev \"_ms_weighted\"]; in_progress = curr[dev \"_ios_in_progress\"]; tot_in_progress = curr[dev \"_sum_ios_in_progress\"]; " awk_reset_accumulators=" t_reads = 0; t_reads_merged = 0; t_read_sectors = 0; t_ms_spent_reading = 0; t_writes = 0; t_writes_merged = 0; t_written_sectors = 0; t_ms_spent_writing = 0; t_ms_spent_doing_io = 0; t_ms_weighted = 0; t_in_progress = 0; " awk_copy_variables_to_accumulators=" t_reads = reads; t_reads_merged = reads_merged; t_read_sectors = read_sectors; t_ms_spent_reading = ms_spent_reading; t_writes = writes; t_writes_merged = writes_merged; t_written_sectors = written_sectors; t_ms_spent_writing = ms_spent_writing; t_ms_spent_doing_io = ms_spent_doing_io; t_ms_weighted = ms_weighted; " awk_compute_read_write_stats=" # Compute the per-second stats for reads, writes, and overall. reads_sec = t_reads / elapsed; read_requests = t_reads_merged + t_reads; mbytes_read_sec = t_read_sectors / elapsed / 2048; read_conc = t_ms_spent_reading / elapsed / 1000 / devs_in_group; if ( t_reads > 0 ) { read_rtime = t_ms_spent_reading / t_reads; avg_read_sz = t_read_sectors / t_reads; } else { read_rtime = 0; avg_read_sz = 0; } if ( read_requests > 0 ) { read_merge_pct = 100 * t_reads_merged / read_requests; } else { read_merge_pct = 0; } writes_sec = t_writes / elapsed; write_requests = t_writes_merged + t_writes; mbytes_written_sec = t_written_sectors / elapsed / 2048; write_conc = t_ms_spent_writing / elapsed / 1000 / devs_in_group; if ( t_writes > 0 ) { write_rtime = t_ms_spent_writing / t_writes; avg_write_sz = t_written_sectors / t_writes; } else { write_rtime = 0; avg_write_sz = 0; } if ( write_requests > 0 ) { write_merge_pct = 100 * t_writes_merged / write_requests; } else { write_merge_pct = 0; } # Compute the numbers for reads and writes together, the things for # which we do not have separate statistics. # Busy is what iostat calls %util. This is the percent of # wall-clock time during which the device has I/O happening. busy = 100 * t_ms_spent_doing_io / (1000 * elapsed * devs_in_group); if ( first[\"ts\"] > 0 ) { line_ts = sprintf(\"%5.1f\", curr_ts - first[\"ts\"]); } else { line_ts = sprintf(\"%5.1f\", 0); } " # Returns true if the column should be displayed. col_ok() { result=$(echo $1 | awk "/${OPT_c:-.}/{print 0}") return ${result:-1} } # Based on which columns match $OPT_c, designs a header and line printf format, # and a printf statement to print the lines. design_print_formats() { # For each device, print out the following: The timestamp offset and # device name. Must embed the mdev Awk variable here, because the device # name is variable-length. fmt="\"%5s %-\" mdev \"s"; hdr="${fmt}"; vars=""; # The per-second reads, read size (kB), per-second MB read, read merged pct, read # concurrency, and average response time for each read. if col_ok rd_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_s"; vars="${vars}, reads_sec"; fi if col_ok rd_avkb ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_avkb"; vars="${vars}, avg_read_sz"; fi if col_ok rd_mb_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_mb_s"; vars="${vars}, mbytes_read_sec"; fi if col_ok rd_mrg ; then fmt="${fmt} %5.0f%%"; hdr="${hdr} rd_mrg"; vars="${vars}, read_merge_pct"; fi if col_ok rd_cnc ; then fmt="${fmt} %6.1f"; hdr="${hdr} rd_cnc"; vars="${vars}, read_conc"; fi if col_ok rd_rt ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_rt"; vars="${vars}, read_rtime"; fi # The same for writes. if col_ok wr_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_s"; vars="${vars}, writes_sec"; fi if col_ok wr_avkb ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_avkb"; vars="${vars}, avg_write_sz"; fi if col_ok wr_mb_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_mb_s"; vars="${vars}, mbytes_written_sec"; fi if col_ok wr_mrg ; then fmt="${fmt} %5.0f%%"; hdr="${hdr} wr_mrg"; vars="${vars}, write_merge_pct"; fi if col_ok wr_cnc ; then fmt="${fmt} %6.1f"; hdr="${hdr} wr_cnc"; vars="${vars}, write_conc"; fi if col_ok wr_rt ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_rt"; vars="${vars}, write_rtime"; fi # Then busy%, in-progress, and line-ending. if col_ok busy ; then fmt="${fmt} %3.0f%%"; hdr="${hdr} busy"; vars="${vars}, busy"; fi if col_ok in_prg ; then fmt="${fmt} %6d"; hdr="${hdr} in_prg"; vars="${vars}, t_in_progress"; fi fmt="${fmt}\n\""; hdr="${hdr}\n\""; awk_print_header="printf(${hdr}, \"#ts\", \"device\");"; awk_print_line="printf(${fmt}, line_ts, dev${vars});"; } # Prints out one line for each disk, summing over the interval from first to # last sample. group_by_disk () { [ -z "${awk_print_line}" ] && design_print_formats cat > /tmp/aspersa.awk < 1 { ${awk_set_iterations_and_timestamp} } END { if ( iterations < 2 ) { exit; } ${awk_find_max_device_name_length} ${awk_print_header} elapsed = curr_ts - first["ts"]; for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; ${awk_get_overall_increments} ${awk_copy_variables_to_accumulators} # The in-progress operations needs to be averaged. t_in_progress = (tot_in_progress / (iterations - 1)); devs_in_group = 1; ${awk_compute_read_write_stats} line_ts="{" (iterations - 1) "}"; ${awk_print_line} } } EOF awk -f /tmp/aspersa.awk "$@" } # Prints out one line for each sample, summing up all disks together. group_by_sample() { [ -z "${awk_print_line}" ] && design_print_formats cat > /tmp/aspersa.awk < 1 { ${awk_set_iterations_and_timestamp} printed_a_line = 0; if ( iterations == 1 ) { # The second time we see a timestamp we are ready to print a header. mdev = 6; if ( devs == 1 ) { ${awk_find_max_device_name_length} } ${awk_print_header} } elapsed = curr_ts - ts; if ( ts > 0 && elapsed > ${OPT_i:-0} ) { # Reset the t_ variables to zero. ${awk_reset_accumulators} for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; # Save the incrementals into named variables. ${awk_compute_incremental_stats} # Add the increments to the accumulators. t_reads += reads; t_reads_merged += reads_merged; t_read_sectors += read_sectors; t_ms_spent_reading += ms_spent_reading; t_writes += writes; t_writes_merged += writes_merged; t_written_sectors += written_sectors; t_ms_spent_writing += ms_spent_writing; t_ms_spent_doing_io += ms_spent_doing_io; t_ms_weighted += ms_weighted; t_in_progress += in_progress; } devs_in_group = devs; ${awk_compute_read_write_stats} if ( devs > 1 ) { dev = "{" devs "}"; } else { dev = devsort[1]; } ${awk_print_line} printed_a_line = 1; } if ( iterations == 1 || printed_a_line == 1 ) { # We don't save "curr" as "prev" on every sample we see, because if the # interval of printing is more than one sample, we want "prev" to be # the first sample in the interval, not the previous sample seen. ${awk_save_curr_as_prev} } } EOF awk -f /tmp/aspersa.awk "$@" } # Prints out one line for each sample, for each disk that matches the pattern. # TODO: omits the first sample. group_by_all () { [ -z "${awk_print_line}" ] && design_print_formats cat > /tmp/aspersa.awk < 1 { ${awk_set_iterations_and_timestamp} ${awk_find_max_device_name_length} if ( iterations > 1 ) { if ( devs > 1 || iterations == 2 ) { ${awk_print_header} } ${awk_reset_accumulators} elapsed = curr_ts - prev["ts"]; for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; ${awk_compute_incremental_stats} ${awk_copy_variables_to_accumulators} t_in_progress = curr[dev "_ios_in_progress"]; devs_in_group = 1; ${awk_compute_read_write_stats} ${awk_print_line} } } ${awk_save_curr_as_prev} } EOF awk -f /tmp/aspersa.awk "$@" } # Show current help and settings print_help() { cat <<-HELP You can control this program by key presses: ------------------- Key ------------------- ---- Current Setting ---- A, D, S) Set the group-by mode ${OPT_g:-(none)} c) Enter an awk regex to match column names ${OPT_c:-(none)} d) Enter an awk regex to match disk names ${OPT_d:-(none)} i) Set the sample size in seconds ${OPT_i:-(none)} s) Set the redisplay interval in seconds ${OPT_s:-(none)} p) Pause the program q) Quit the program ------------------- Press any key to continue ----------------------- HELP } usage() { if [ "${OPT_ERR}" ]; then echo "${OPT_ERR}" fi cat <<-USAGE Usage: $0 [OPTIONS] [FILE] $0 does two things: 1) get /proc/diskstats periodically 2) aggregate the result. If you specify a FILE, or send input to STDIN, then step 1) is not performed. Options: -c COLS Awk regex of which columns to include (default cnc|rt|mb|busy|prg). -d DEVICES Awk regex of which devices to include. -g GROUPBY Group-by mode (default disk); specify one of the following: disk) # Each line of output shows one disk device. sample) # Each line of output shows one sample of statistics. all) # Each line of output shows one sample and one disk device. -i INTERVAL In -g sample mode, include INTERVAL seconds per sample. -k KEEPFILE File to save diskstats samples in (default /tmp/aspersa). If a non-default filename is used, it will be saved for later analysis. -n SAMPLES When in interactive mode, stop after N samples. -s INTERVAL Sample /proc/diskstats every N seconds (default 1). USAGE exit 1 } # The main code that runs by default. Arguments are the command-line options. main() { # Get command-line options. for o; do case "${o}" in --) shift; break; ;; --help) usage; ;; -c) shift; OPT_c="${1}"; shift; ;; -d) shift; OPT_d="${1}"; shift; ;; -g) shift; OPT_g="${1}"; shift; case "${OPT_g}" in disk) ;; sample) ;; all) ;; *) OPT_ERR="Bad option value"; usage ;; esac ;; -i) shift; OPT_i="${1}"; shift; ;; -k) shift; OPT_k="${1}"; shift; ;; -n) shift; OPT_n="${1}"; shift; ;; -s) shift; OPT_s="${1}"; shift; ;; -*) OPT_ERR="Unknown option ${o}." usage ;; esac done OPT_i="${OPT_i:-}"; export OPT_i; OPT_k="${OPT_k:-/tmp/aspersa}"; export OPT_k; OPT_n="${OPT_n:-}"; export OPT_n; OPT_c="${OPT_c:-cnc|rt|mb|busy|prg}"; export OPT_c; OPT_d="${OPT_d:-}"; export OPT_d; OPT_s="${OPT_s:-1}"; export OPT_s; OPT_g="${OPT_g:-disk}"; export OPT_g; # We need to "do the right thing." The user might invoke any of several # ways; we get samples every now and then unless there is data on STDIN or a # file to read. if [ $# -gt 0 -o -p 1 ]; then READ_FILE=1 fi # If we are interactive and there's no file, we gather stats to play with. if [ -z "${READ_FILE}" ]; then PARENT=$$ loops=1 while true; do cat /proc/diskstats >> "${OPT_k}" date +"TS %s.%N %F %T" >> "${OPT_k}" if ! ps -p ${PARENT} >/dev/null 2>&1 ; then # The parent process doesn't exist anymore -- quit. finished="yes" elif [ "${OPT_n}" ]; then if [ "${loops}" -gt "${OPT_n}" ] ; then finished="yes" fi fi if [ "${finished}" ]; then if [ "${OPT_k}" = "/tmp/aspersa" ]; then rm -f /tmp/aspersa fi break; fi sleep ${OPT_s} loops=$(($loops + 1)) done & # Sleep until the loop has gathered 2 samples. while [ "$(grep -c TS "${OPT_k}")" -lt "2" ]; do sleep .5 done fi if [ -z "${READ_FILE}" ]; then group_by_${OPT_g} "${OPT_k}" else group_by_${OPT_g} "$@" fi # Don't be "interactive" unless the user actually has control. if [ ! -t 0 -o ! -t 1 ]; then exit; fi # We use this in iterative-loop mode if [ -z "${READ_FILE}" ]; then TAIL_LINES=$(cat /proc/diskstats | wc -l) fi while [ -z "${OPT_n}" -o "${i:-0}" -le "${OPT_n:-0}" ]; do i=$(( ${i:-1} + 1 )) # Re-decide the timeout every loop if [ -z "${READ_FILE}" ]; then TIMEOUT="-t ${OPT_s}" fi cmd="" # Must reset, some bash won't clear it after a read times out. read $TIMEOUT -n 1 -s cmd junk case "${cmd}" in A) OPT_g="all" FIRST_LOOP="1" ;; d) read -p "Enter a disk/device pattern: " OPT_d FIRST_LOOP="1" ;; D) OPT_g="disk" FIRST_LOOP="1" ;; c) read -p "Enter a column pattern: " OPT_c FIRST_LOOP="1" awk_print_line="" # Make it re-compute the column headers ;; i) read -p "Enter a sample size: " OPT_i FIRST_LOOP="1" ;; p) read -n 1 -p "Paused - press any key to continue" ;; q) break ;; s) read -p "Enter a redisplay interval: " OPT_s FIRST_LOOP="1" ;; S) OPT_g="sample" FIRST_LOOP="1" ;; '?') print_help; read -n1 -s ;; esac if [ -z "${READ_FILE}" ]; then if [ -z "${FIRST_LOOP}" ]; then # We only print out what's new since last printout N=$(($TAIL_LINES * 2 + 2)) # Extra is for TS lines tail -n $N "${OPT_k}" 2>/dev/null | group_by_${OPT_g} | tail -n +2 else group_by_${OPT_g} "${OPT_k}" fi FIRST_LOOP="" else group_by_${OPT_g} "$@" fi done if [ "${OPT_k}" = "/tmp/aspersa" ]; then rm -f "/tmp/aspersa" fi rm -f /tmp/aspersa.awk } # Execute the program if it was not included from another file. This makes it # possible to include without executing, and thus test. if [ "$(basename "$0")" = "pt-diskstats" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then main "$@" fi # ############################################################################ # Documentation # ############################################################################ :<<'DOCUMENTATION' =pod =head1 NAME pt-diskstats - Read a file generated from /proc/diskstats and summarize it. =head1 SYNOPSIS Usage: pt-diskstats [OPTION...] =head1 DESCRIPTION The file read should look like this: TS ... et cetera TS <-- must end with a TS line. =head1 AUTHORS Baron Schwartz =head1 COPYRIGHT, LICENSE, AND WARRANTY This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. Feedback and improvements are welcome. THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2; OR the Perl Artistic License. On UNIX and similar systems, you can issue `man perlgpl' or `man perlartistic' to read these licenses. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. legal.pod (END) =cut DOCUMENTATION