From d3ef9edaaa93c5adeb8a673b5182aa854295aeb2 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Thu, 22 Dec 2011 19:24:56 -0300 Subject: [PATCH] Several corrections as per Daniel and Baron's feedback. Still TODO: Attributes still have a single method that doubles as a getter and setter. The constructor for Diskstats is still weird -- A tad more stricter than it should be, if anything. ->print_rest is still rest, even though that's hardly memorable, mostly because of a lack of ideas on what to rename it. The main loop in the Menu is still a while (1). As a nice perk, it's nearly twice as fast now! It also adds a _very_ experimental --memory-for-speed argument, which turns on memoization for the current biggest bottleneck. --- bin/pt-diskstats-shell | 895 ---------------------------------- lib/Diskstats.pm | 489 +++++++++++-------- lib/DiskstatsGroupByAll.pm | 45 +- lib/DiskstatsGroupByDisk.pm | 46 +- lib/DiskstatsGroupBySample.pm | 104 ++-- lib/DiskstatsMenu.pm | 219 ++++----- lib/ReadKeyMini.pm | 60 +-- lib/pt_diskstats.pm | 39 +- t/lib/Diskstats.t | 61 ++- t/pt-diskstats/pt-diskstats.t | 2 +- 10 files changed, 619 insertions(+), 1341 deletions(-) delete mode 100755 bin/pt-diskstats-shell diff --git a/bin/pt-diskstats-shell b/bin/pt-diskstats-shell deleted file mode 100755 index 489b00fa..00000000 --- a/bin/pt-diskstats-shell +++ /dev/null @@ -1,895 +0,0 @@ -#!/usr/bin/env bash - -# This program is part of Percona Toolkit: http://www.percona.com/software/ -# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal -# notices and disclaimers. - -usage() { - if [ "${OPT_ERR}" ]; then - echo "${OPT_ERR}" >&2 - fi - echo "Usage: pt-diskstats [OPTIONS] [FILES]" >&2 - echo "For more information, 'man pt-diskstats' or 'perldoc $0'" >&2 - exit 1 -} - -# Show current help and settings -print_help() { - cat <<-HELP - You can control this program by key presses: - ------------------- Key ------------------- ---- Current Setting ---- - A, D, S) Set the group-by mode ${OPT_g:-(none)} - c) Enter an awk regex to match column names ${OPT_c:-(none)} - d) Enter an awk regex to match disk names ${OPT_d:-(none)} - i) Set the sample size in seconds ${OPT_i:-(none)} - s) Set the redisplay interval in seconds ${OPT_s:-(none)} - p) Pause the program - q) Quit the program - ------------------- Press any key to continue ----------------------- - HELP -} - -# ######################################################################## -# A bunch of snippets of awk code, to be reused in the functions below. -# ######################################################################## -awk_parse_line=" - # The entries in each stat line are as follows: - # 1 major - # 2 minor - # 3 device name - dev = \$3; - # 4 reads - reads = \$4; - # 5 reads merged - reads_merged = \$5; - # 6 read sectors - read_sectors = \$6; - # 7 ms spent reading - ms_spent_reading = \$7; - # 8 writes - writes = \$8; - # 9 writes merged - writes_merged = \$9; - # 10 written sectors - written_sectors = \$10; - # 11 ms spent writing - ms_spent_writing = \$11; - # 12 IOs in progress - ios_in_progress = \$12; - # 13 ms spent doing io - ms_spent_doing_io = \$13; - # 14 ms spent, weighted by ios_in_progress - ms_weighted = \$14; -" - -# NOTE: this one is necessary in order to get the device listing. NOTE: the -# 'devs' variable is initialized to 0, but it is pre-incremented, so a) it will -# reflect the accurate number of devices found (after filtering); b) iteration -# must be from 1 to devs, not from 0 to devs-1. -awk_save_sample_to_first=" - # Keep track of the natural order of the devices, so we can print them out - # nicely later; and also keep the first-ever line of output. This only - # executes the first time through. - devices[dev]++; - if ( devices[dev] == 1 ) { - devsort[++devs] = dev; - first[dev \"_reads\"] = reads; - first[dev \"_reads_merged\"] = reads_merged; - first[dev \"_read_sectors\"] = read_sectors; - first[dev \"_ms_spent_reading\"] = ms_spent_reading; - first[dev \"_writes\"] = writes; - first[dev \"_writes_merged\"] = writes_merged; - first[dev \"_written_sectors\"] = written_sectors; - first[dev \"_ms_spent_writing\"] = ms_spent_writing; - first[dev \"_ios_in_progress\"] = ios_in_progress; - first[dev \"_ms_spent_doing_io\"] = ms_spent_doing_io; - first[dev \"_ms_weighted\"] = ms_weighted; - } -" - -awk_set_iterations_and_timestamp=" - iterations++; - curr_ts = \$2; - if ( iterations == 1 ) { - first[\"ts\"] = curr_ts; - } -" - -awk_save_sample_to_curr=" - curr[dev \"_reads\"] = reads; - curr[dev \"_reads_merged\"] = reads_merged; - curr[dev \"_read_sectors\"] = read_sectors; - curr[dev \"_ms_spent_reading\"] = ms_spent_reading; - curr[dev \"_writes\"] = writes; - curr[dev \"_writes_merged\"] = writes_merged; - curr[dev \"_written_sectors\"] = written_sectors; - curr[dev \"_ms_spent_writing\"] = ms_spent_writing; - curr[dev \"_ios_in_progress\"] = ios_in_progress; - curr[dev \"_ms_spent_doing_io\"] = ms_spent_doing_io; - curr[dev \"_ms_weighted\"] = ms_weighted; -" - -awk_save_curr_as_prev=" - curr[\"ts\"] = curr_ts; - for (i in curr) { - prev[i] = curr[i]; - } - for ( i = 1; i <= devs; i++ ) { - dev = devsort[i]; - prev[dev \"_sum_ios_in_progress\"] += curr[dev \"_ios_in_progress\"]; - } - ts = curr_ts; -" - -awk_find_max_device_name_length=" - mdev = 6; - for ( i = 1; i <= devs; i++ ) { - dlen = length(devsort[i]); - if ( dlen > mdev ) { - mdev = dlen; - } - } -" - -awk_get_overall_increments=" - # Get incremental numbers. - reads = curr[dev \"_reads\"] - first[dev \"_reads\"]; - reads_merged = curr[dev \"_reads_merged\"] - first[dev \"_reads_merged\"]; - read_sectors = curr[dev \"_read_sectors\"] - first[dev \"_read_sectors\"]; - ms_spent_reading = curr[dev \"_ms_spent_reading\"] - first[dev \"_ms_spent_reading\"]; - writes = curr[dev \"_writes\"] - first[dev \"_writes\"]; - writes_merged = curr[dev \"_writes_merged\"] - first[dev \"_writes_merged\"]; - written_sectors = curr[dev \"_written_sectors\"] - first[dev \"_written_sectors\"]; - ms_spent_writing = curr[dev \"_ms_spent_writing\"] - first[dev \"_ms_spent_writing\"]; - ms_spent_doing_io = curr[dev \"_ms_spent_doing_io\"] - first[dev \"_ms_spent_doing_io\"]; - ms_weighted = curr[dev \"_ms_weighted\"] - first[dev \"_ms_weighted\"]; - in_progress = curr[dev \"_ios_in_progress\"]; - tot_in_progress = prev[dev \"_sum_ios_in_progress\"]; -" - -awk_compute_incremental_stats=" - # Get incremental numbers. - reads = curr[dev \"_reads\"] - prev[dev \"_reads\"]; - reads_merged = curr[dev \"_reads_merged\"] - prev[dev \"_reads_merged\"]; - read_sectors = curr[dev \"_read_sectors\"] - prev[dev \"_read_sectors\"]; - ms_spent_reading = curr[dev \"_ms_spent_reading\"] - prev[dev \"_ms_spent_reading\"]; - writes = curr[dev \"_writes\"] - prev[dev \"_writes\"]; - writes_merged = curr[dev \"_writes_merged\"] - prev[dev \"_writes_merged\"]; - written_sectors = curr[dev \"_written_sectors\"] - prev[dev \"_written_sectors\"]; - ms_spent_writing = curr[dev \"_ms_spent_writing\"] - prev[dev \"_ms_spent_writing\"]; - ms_spent_doing_io = curr[dev \"_ms_spent_doing_io\"] - prev[dev \"_ms_spent_doing_io\"]; - ms_weighted = curr[dev \"_ms_weighted\"] - prev[dev \"_ms_weighted\"]; - in_progress = curr[dev \"_ios_in_progress\"]; - tot_in_progress = curr[dev \"_sum_ios_in_progress\"]; -" - -awk_reset_accumulators=" - t_reads = 0; - t_reads_merged = 0; - t_read_sectors = 0; - t_ms_spent_reading = 0; - t_writes = 0; - t_writes_merged = 0; - t_written_sectors = 0; - t_ms_spent_writing = 0; - t_ms_spent_doing_io = 0; - t_ms_weighted = 0; - t_in_progress = 0; -" - -awk_copy_variables_to_accumulators=" - t_reads = reads; - t_reads_merged = reads_merged; - t_read_sectors = read_sectors; - t_ms_spent_reading = ms_spent_reading; - t_writes = writes; - t_writes_merged = writes_merged; - t_written_sectors = written_sectors; - t_ms_spent_writing = ms_spent_writing; - t_ms_spent_doing_io = ms_spent_doing_io; - t_ms_weighted = ms_weighted; -" - -awk_compute_read_write_stats=" - # Compute the per-second stats for reads, writes, and overall. - reads_sec = t_reads / elapsed; - read_requests = t_reads_merged + t_reads; - mbytes_read_sec = (t_read_sectors * 512 / 1024) / elapsed / 1024; - read_conc = t_ms_spent_reading / elapsed / 1000 / devs_in_group; - if ( t_reads > 0 ) { - read_rtime = t_ms_spent_reading / t_reads; - avg_read_sz = (t_read_sectors * 512 / 1024) / t_reads; - } - else { - read_rtime = 0; - avg_read_sz = 0; - } - if ( read_requests > 0 ) { - read_merge_pct = 100 * t_reads_merged / read_requests; - } - else { - read_merge_pct = 0; - } - writes_sec = t_writes / elapsed; - write_requests = t_writes_merged + t_writes; - mbytes_written_sec = (t_written_sectors * 512 / 1024) / elapsed / 1024; - write_conc = t_ms_spent_writing / elapsed / 1000 / devs_in_group; - if ( t_writes > 0 ) { - write_rtime = t_ms_spent_writing / t_writes; - avg_write_sz = (t_written_sectors * 512 / 1024) / t_writes; - } - else { - write_rtime = 0; - avg_write_sz = 0; - } - if ( write_requests > 0 ) { - write_merge_pct = 100 * t_writes_merged / write_requests; - } - else { - write_merge_pct = 0; - } - # Compute the numbers for reads and writes together, the things for - # which we do not have separate statistics. - # Busy is what iostat calls %util. This is the percent of - # wall-clock time during which the device has I/O happening. - busy = 100 * t_ms_spent_doing_io / (1000 * elapsed * devs_in_group); - if ( first[\"ts\"] > 0 ) { - line_ts = sprintf(\"%5.1f\", curr_ts - first[\"ts\"]); - } - else { - line_ts = sprintf(\"%5.1f\", 0); - } -" - -# Returns true if the column should be displayed. -col_ok() { - result=$(echo $1 | awk "/${OPT_c:-.}/{print 0}") - return ${result:-1} -} - -# Based on which columns match $OPT_c, designs a header and line printf format, -# and a printf statement to print the lines. -design_print_formats() { - # For each device, print out the following: The timestamp offset and - # device name. Must embed the mdev Awk variable here, because the device - # name is variable-length. - fmt="\"%5s %-\" mdev \"s"; - hdr="${fmt}"; - vars=""; - # The per-second reads, read size (kB), per-second MB read, read merged pct, read - # concurrency, and average response time for each read. - if col_ok rd_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_s"; vars="${vars}, reads_sec"; fi - if col_ok rd_avkb ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_avkb"; vars="${vars}, avg_read_sz"; fi - if col_ok rd_mb_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_mb_s"; vars="${vars}, mbytes_read_sec"; fi - if col_ok rd_mrg ; then fmt="${fmt} %5.0f%%"; hdr="${hdr} rd_mrg"; vars="${vars}, read_merge_pct"; fi - if col_ok rd_cnc ; then fmt="${fmt} %6.1f"; hdr="${hdr} rd_cnc"; vars="${vars}, read_conc"; fi - if col_ok rd_rt ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_rt"; vars="${vars}, read_rtime"; fi - # The same for writes. - if col_ok wr_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_s"; vars="${vars}, writes_sec"; fi - if col_ok wr_avkb ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_avkb"; vars="${vars}, avg_write_sz"; fi - if col_ok wr_mb_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_mb_s"; vars="${vars}, mbytes_written_sec"; fi - if col_ok wr_mrg ; then fmt="${fmt} %5.0f%%"; hdr="${hdr} wr_mrg"; vars="${vars}, write_merge_pct"; fi - if col_ok wr_cnc ; then fmt="${fmt} %6.1f"; hdr="${hdr} wr_cnc"; vars="${vars}, write_conc"; fi - if col_ok wr_rt ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_rt"; vars="${vars}, write_rtime"; fi - # Then busy%, in-progress, and line-ending. - if col_ok busy ; then fmt="${fmt} %3.0f%%"; hdr="${hdr} busy"; vars="${vars}, busy"; fi - if col_ok in_prg ; then fmt="${fmt} %6d"; hdr="${hdr} in_prg"; vars="${vars}, t_in_progress"; fi - fmt="${fmt}\n\""; - hdr="${hdr}\n\""; - awk_print_header="printf(${hdr}, \"#ts\", \"device\");"; - awk_print_line="printf(${fmt}, line_ts, dev${vars});"; -} - -# Prints out one line for each disk, summing over the interval from first to -# last sample. -group_by_disk () { - [ -z "${awk_print_line}" ] && design_print_formats - awk " - BEGIN { - devs = 0; - devname = \"${OPT_d}\"; - } - \$1 !~ /TS/ && \$3 ~ devname { - ${awk_parse_line} - ${awk_save_sample_to_first} - ${awk_save_sample_to_curr} - } - \$1 ~ /TS/ && NR > 1 { - ${awk_set_iterations_and_timestamp} - } - END { - if ( iterations < 2 ) { - exit; - } - ${awk_find_max_device_name_length} - ${awk_print_header} - elapsed = curr_ts - first[\"ts\"]; - for ( i = 1; i <= devs; i++ ) { - dev = devsort[i]; - ${awk_get_overall_increments} - ${awk_copy_variables_to_accumulators} - # The in-progress operations needs to be averaged. - t_in_progress = (tot_in_progress / (iterations - 1)); - devs_in_group = 1; - ${awk_compute_read_write_stats} - line_ts=\"{\" (iterations - 1) \"}\"; - ${awk_print_line} - } - } " "$@" -} - -# Prints out one line for each sample, summing up all disks together. -group_by_sample() { - [ -z "${awk_print_line}" ] && design_print_formats - awk " - BEGIN { - devs = 0; - devname = \"${OPT_d}\"; - } - \$1 !~ /TS/ && \$3 ~ devname { - ${awk_parse_line} - ${awk_save_sample_to_first} - ${awk_save_sample_to_curr} - } - \$1 ~ /TS/ && NR > 1 { - ${awk_set_iterations_and_timestamp} - printed_a_line = 0; - if ( iterations == 1 ) { - # The second time we see a timestamp we are ready to print a header. - mdev = 6; - if ( devs == 1 ) { - ${awk_find_max_device_name_length} - } - ${awk_print_header} - } - elapsed = curr_ts - ts; - if ( ts > 0 && elapsed > ${OPT_i:-0} ) { - # Reset the t_ variables to zero. - ${awk_reset_accumulators} - for ( i = 1; i <= devs; i++ ) { - dev = devsort[i]; - # Save the incrementals into named variables. - ${awk_compute_incremental_stats} - # Add the increments to the accumulators. - t_reads += reads; - t_reads_merged += reads_merged; - t_read_sectors += read_sectors; - t_ms_spent_reading += ms_spent_reading; - t_writes += writes; - t_writes_merged += writes_merged; - t_written_sectors += written_sectors; - t_ms_spent_writing += ms_spent_writing; - t_ms_spent_doing_io += ms_spent_doing_io; - t_ms_weighted += ms_weighted; - t_in_progress += in_progress; - } - devs_in_group = devs; - ${awk_compute_read_write_stats} - if ( devs > 1 ) { - dev = \"{\" devs \"}\"; - } - else { - dev = devsort[1]; - } - ${awk_print_line} - printed_a_line = 1; - } - if ( iterations == 1 || printed_a_line == 1 ) { - # We don't save curr as prev on every sample we see, because if the - # interval of printing is more than one sample, we want prev to be - # the first sample in the interval, not the previous sample seen. - ${awk_save_curr_as_prev} - } - } " "$@" -} - -# Prints out one line for each sample, for each disk that matches the pattern. -# TODO: omits the first sample. -group_by_all () { - [ -z "${awk_print_line}" ] && design_print_formats - cat > /tmp/percona-toolkit.awk < 1 { - ${awk_set_iterations_and_timestamp} - ${awk_find_max_device_name_length} - if ( iterations > 1 ) { - if ( devs > 1 || iterations == 2 ) { - ${awk_print_header} - } - ${awk_reset_accumulators} - elapsed = curr_ts - prev["ts"]; - for ( i = 1; i <= devs; i++ ) { - dev = devsort[i]; - ${awk_compute_incremental_stats} - ${awk_copy_variables_to_accumulators} - t_in_progress = curr[dev "_ios_in_progress"]; - devs_in_group = 1; - ${awk_compute_read_write_stats} - ${awk_print_line} - } - } - ${awk_save_curr_as_prev} - } -EOF - awk -f /tmp/percona-toolkit.awk "$@" -} - - -# The main code that runs by default. Arguments are the command-line options. -main() { - - # Get command-line options. - for o; do - case "${o}" in - --) - shift; break; - ;; - --help) - usage; - ;; - -c) - shift; OPT_c="${1}"; shift; - ;; - -d) - shift; OPT_d="${1}"; shift; - ;; - -g) - shift; OPT_g="${1}"; shift; - case "${OPT_g}" in - disk) - ;; - sample) - ;; - all) - ;; - *) - OPT_ERR="Bad option value"; - usage - ;; - esac - ;; - -i) - shift; OPT_i="${1}"; shift; - ;; - -k) - shift; OPT_k="${1}"; shift; - ;; - -n) - shift; OPT_n="${1}"; shift; - ;; - -s) - shift; OPT_s="${1}"; shift; - ;; - -*) - OPT_ERR="Unknown option ${o}." - usage - ;; - esac - done - OPT_i="${OPT_i:-}"; export OPT_i; - OPT_k="${OPT_k:-/tmp/diskstats-samples}"; export OPT_k; - OPT_n="${OPT_n:-}"; export OPT_n; - OPT_c="${OPT_c:-cnc|rt|mb|busy|prg}"; export OPT_c; - OPT_d="${OPT_d:-}"; export OPT_d; - OPT_s="${OPT_s:-1}"; export OPT_s; - OPT_g="${OPT_g:-disk}"; export OPT_g; - - # We need to "do the right thing." The user might invoke any of several - # ways; we get samples every now and then unless there is data on STDIN or a - # file to read. - if [ $# -gt 0 -o -p 1 ]; then - READ_FILE=1 - fi - - # If we are interactive and there's no file, we gather stats to play with. - if [ -z "${READ_FILE}" ]; then - PARENT=$$ - loops=1 - while true; do - cat /proc/diskstats >> "${OPT_k}" - date +"TS %s.%N %F %T" >> "${OPT_k}" - if ! ps -p ${PARENT} >/dev/null 2>&1 ; then - # The parent process doesn't exist anymore -- quit. - finished="yes" - elif [ "${OPT_n}" ]; then - if [ "${loops}" -gt "${OPT_n}" ] ; then - finished="yes" - fi - fi - if [ "${finished}" ]; then - if [ "${OPT_k}" = "/tmp/diskstats-samples" ]; then - rm -f /tmp/diskstats-samples - fi - break; - fi - sleep ${OPT_s} - loops=$(($loops + 1)) - done & - - # Sleep until the loop has gathered 2 samples. - while [ "$(grep -c TS "${OPT_k}")" -lt "2" ]; do - sleep .5 - done - fi - - if [ -z "${READ_FILE}" ]; then - group_by_${OPT_g} "${OPT_k}" - else - group_by_${OPT_g} "$@" - fi - - # Don't be "interactive" unless the user actually has control. - if [ ! -t 0 -o ! -t 1 ]; then - exit; - fi - - # We use this in iterative-loop mode - if [ -z "${READ_FILE}" ]; then - TAIL_LINES=$(cat /proc/diskstats | wc -l) - fi - - while [ -z "${OPT_n}" -o "${i:-0}" -le "${OPT_n:-0}" ]; do - i=$(( ${i:-1} + 1 )) - - # Re-decide the timeout every loop - if [ -z "${READ_FILE}" ]; then - TIMEOUT="-t ${OPT_s}" - fi - cmd="" # Must reset, some bash won't clear it after a read times out. - read $TIMEOUT -n 1 -s cmd junk - case "${cmd}" in - A) - OPT_g="all" - FIRST_LOOP="1" - ;; - d) - read -p "Enter a disk/device pattern: " OPT_d - FIRST_LOOP="1" - ;; - D) - OPT_g="disk" - FIRST_LOOP="1" - ;; - c) - read -p "Enter a column pattern: " OPT_c - FIRST_LOOP="1" - awk_print_line="" # Make it re-compute the column headers - ;; - i) - read -p "Enter a sample size: " OPT_i - FIRST_LOOP="1" - ;; - p) - read -n 1 -p "Paused - press any key to continue" - ;; - q) - break - ;; - s) - read -p "Enter a redisplay interval: " OPT_s - FIRST_LOOP="1" - ;; - S) - OPT_g="sample" - FIRST_LOOP="1" - ;; - '?') - print_help; read -n1 -s - ;; - esac - - if [ -z "${READ_FILE}" ]; then - if [ -z "${FIRST_LOOP}" ]; then - # We only print out what's new since last printout - N=$(($TAIL_LINES * 2 + 2)) # Extra is for TS lines - tail -n $N "${OPT_k}" 2>/dev/null | group_by_${OPT_g} | tail -n +2 - else - group_by_${OPT_g} "${OPT_k}" - fi - FIRST_LOOP="" - else - group_by_${OPT_g} "$@" - fi - - done - - if [ "${OPT_k}" = "/tmp/diskstats-samples" ]; then - rm -f "/tmp/diskstats-samples" - fi - rm -f /tmp/percona-toolkit.awk -} - -# Execute the program if it was not included from another file. This makes it -# possible to include without executing, and thus test. -if [ "$(basename "$0")" = "pt-diskstats" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then - main "$@" -fi - -# ############################################################################ -# Documentation -# ############################################################################ -:<<'DOCUMENTATION' -=pod - -=head1 NAME - -pt-diskstats - Aggregate and summarize F. - -=head1 SYNOPSIS - -Usage: pt-diskstats [OPTIONS] [FILES] - -pt-diskstats reads F periodically, or files with the -contents of F, aggregates the data, and prints it nicely. - -=head1 RISKS - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-diskstats is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -L. - -See also L<"BUGS"> for more information on filing bugs and getting help. - -=head1 DESCRIPTION - -pt-diskstats tool is similar to iostat, but has some advantages. It separates -reads and writes, for example, and computes some things that iostat does in -either incorrect or confusing ways. It is also menu-driven and interactive -with several different ways to aggregate the data, and integrates well with -the L tool. These properties make it very convenient for quickly -drilling down into I/O performance at the desired level of granularity. - -This program works in two main modes. One way is to process a file with saved -disk statistics, which you specify on the command line. The other way is to -start a background process gathering samples at intervals and saving them into -a file, and process this file in the foreground. In both cases, the tool is -interactively controlled by keystrokes, so you can redisplay and slice the -data flexibly and easily. If the tool is not attached to a terminal, it -doesn't run interactively; it just processes and prints its output, then exits. -Otherwise it loops until you exit with the 'q' key. - -If you press the '?' key, you will bring up the interactive help menu that -shows which keys control the program. - -Files should have this format: - - - TS - - ... et cetera - TS <-- must end with a TS line. - -See L for a detailed -example of using the tool. - -=head1 OUTPUT - -The columns are as follows: - -=over - -=item #ts - -The number of seconds of samples in the line. If there is only one, then -the timestamp itself is shown, without the {curly braces}. - -=item device - -The device name. If there is more than one device, then instead the number -of devices aggregated into the line is shown, in {curly braces}. - -=item rd_mb_s - -The number of megabytes read per second, average, during the sampled interval. - -=item rd_cnc - -The average concurrency of the read operations, as computed by Little's Law -(a.k.a. queueing theory). - -=item rd_rt - -The average response time of the read operations, in milliseconds. - -=item wr_mb_s - -Megabytes written per second, average. - -=item wr_cnc - -Write concurrency, similar to read concurrency. - -=item wr_rt - -Write response time, similar to read response time. - -=item busy - -The fraction of time that the device had at least one request in progress; -this is what iostat calls %util (which is a misleading name). - -=item in_prg - -The number of requests that were in progress. Unlike the read and write -concurrencies, which are averages that are generated from reliable numbers, -this number is an instantaneous sample, and you can see that it might -represent a spike of requests, rather than the true long-term average. - -=back - -In addition to the above columns, there are a few columns that are hidden by -default. If you press the 'c' key, and then press Enter, you will blank out -the regular expression pattern that selects columns to display, and you will -then see the extra columns: - -=over - -=item rd_s - -The number of reads per second. - -=item rd_avkb - -The average size of the reads, in kilobytes. - -=item rd_mrg - -The percentage of read requests that were merged together in the disk -scheduler before reaching the device. - -=item wr_s, wr_avgkb, and wr_mrg - -These are analogous to their C cousins. - -=back - -=head1 OPTIONS - -Options must precede files on the command line. - -=over - -=item -c COLS - -Awk regex of which columns to include (default cnc|rt|mb|busy|prg). - -=item -d DEVICES - -Awk regex of which devices to include. - -=item -g GROUPBY - -Group-by mode (default disk); specify one of the following: - - disk - Each line of output shows one disk device. - sample - Each line of output shows one sample of statistics. - all - Each line of output shows one sample and one disk device. - -=item -i INTERVAL - -In -g sample mode, include INTERVAL seconds per sample. - -=item -k KEEPFILE - -File to save diskstats samples in (default /tmp/diskstats-samples). -If a non-default filename is used, it will be saved for later analysis. - -=item -n SAMPLES - -When in interactive mode, stop after N samples. - -=item -s INTERVAL - -Sample /proc/diskstats every N seconds (default 1). - -=back - -=head1 ENVIRONMENT - -This tool does not use any environment variables. - -=head1 SYSTEM REQUIREMENTS - -This tool requires Bash v3 or newer and the F filesystem unless -reading from files. - -=head1 BUGS - -For a list of known bugs, see L. - -Please report bugs at L. -Include the following information in your bug report: - -=over - -=item * Complete command-line used to run the tool - -=item * Tool L<"--version"> - -=item * MySQL version of all servers involved - -=item * Output from the tool including STDERR - -=item * Input files (log/dump/config files, etc.) - -=back - -If possible, include debugging output by running the tool with C; -see L<"ENVIRONMENT">. - -=head1 DOWNLOADING - -Visit L to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - -You can also get individual tools from the latest release: - - wget percona.com/get/TOOL - -Replace C with the name of any tool. - -=head1 AUTHORS - -Baron Schwartz - -=head1 ABOUT PERCONA TOOLKIT - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -L for more software developed by Percona. - -=head1 COPYRIGHT, LICENSE, AND WARRANTY - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue `man perlgpl' or `man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - -=head1 VERSION - -pt-diskstats 1.0.1 - -=cut - -DOCUMENTATION diff --git a/lib/Diskstats.pm b/lib/Diskstats.pm index a3db9e44..0a12b355 100644 --- a/lib/Diskstats.pm +++ b/lib/Diskstats.pm @@ -32,39 +32,15 @@ use constant MKDEBUG => $ENV{MKDEBUG} || 0; use IO::Handle; use List::Util qw( max first ); -BEGIN { - # This BEGIN block checks if we can use Storable::dclone: If we can't, - # it clobbers this package's dclone glob (*{ __PACKAGE__ . "::dclone" }) - # with an anonymous function that provides more or less what we need. - my $have_storable = eval { require Storable }; - - if ( $have_storable ) { - Storable->import(qw(dclone)); - } - else { - require Scalar::Util; - - # An extrenely poor man's dclone. - # Nevermind the prototype. dclone has it, so it's here only it for - # the sake of completeness. - *dclone = sub ($) { - my ($ref) = @_; - my $reftype = Scalar::Util::reftype($ref) || ''; - - if ( $reftype eq ref({}) ) { - # Only one level of depth. Not worth making it any deeper/recursive, I think. - return { map { $_ => {%{$ref->{$_}}} } keys %$ref }; - } - else { - die "This basic dclone does not support [$reftype]"; - } - }; - } -} - sub new { my ( $class, %args ) = @_; + my @required_args = qw(OptionParser); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($o) = @args{@required_args}; + my $self = { # Defaults filename => '/proc/diskstats', @@ -72,12 +48,12 @@ sub new { device_regex => qr/(?=)/, block_size => 512, out_fh => \*STDOUT, - filter_zeroed_rows => 0, - sample_time => 0, + filter_zeroed_rows => $o->get('zero-rows') ? undef : 1, + sample_time => $o->get('sample-time') || 0, interactive => 0, _stats_for => {}, - _sorted_devs => [], + _ordered_devs => [], _ts => {}, _first => 1, @@ -86,6 +62,24 @@ sub new { _print_header => 1, }; + if ( $o->get('memory-for-speed') ) { + eval { + require Memoize; Memoize::memoize('_parse_diskstats_line') + }; + if ($EVAL_ERROR) { + warn "Can't trade memory for speed: $EVAL_ERROR. Continuing as usual."; + } + } + + my %pod_to_attribute = ( + columns => 'column_regex', + devices => 'device_regex' + ); + for my $key ( grep { defined $o->get($_) } keys %pod_to_attribute ) { + my $re = $o->get($key) || '(?=)'; + $self->{ $pod_to_attribute{$key} } = qr/$re/i; + } + # If they passed us an attribute explicitly, we use those. for my $attribute ( grep { !/^_/ && defined $args{$_} } keys %$self ) { $self->{$attribute} = $args{$attribute}; @@ -104,14 +98,14 @@ sub _ts_common { return $self->{_ts}->{$key}; } -sub current_ts { +sub curr_ts { my ($self, $val) = @_; - return $self->_ts_common("current", $val); + return $self->_ts_common("curr", $val); } -sub previous_ts { +sub prev_ts { my ($self, $val) = @_; - return $self->_ts_common("previous", $val); + return $self->_ts_common("prev", $val); } sub first_ts { @@ -179,10 +173,10 @@ sub device_regex { sub filename { my ( $self, $new_filename ) = @_; - if ( defined $new_filename ) { + if ( $new_filename ) { return $self->{filename} = $new_filename; } - return $self->{filename} || '/proc/diskstats'; + return $self->{filename}; } sub block_size { @@ -190,19 +184,24 @@ sub block_size { return $self->{block_size}; } -sub sorted_devs { - my ( $self, $new_dev ) = @_; - if ( $new_dev && ref($new_dev) eq ref( [] ) ) { - $self->{_sorted_devs} = $new_dev; +# Returns a list of devices seen. You may pass an arrayref argument to +# replace the internal list, but consider using clear_ordered_devs and +# add_ordered_dev instead. + +sub ordered_devs { + my ( $self, $replacement_list ) = @_; + if ( $replacement_list ) { + $self->{_ordered_devs} = $replacement_list; } - return @{ $self->{_sorted_devs} }; + return @{ $self->{_ordered_devs} }; } -sub add_sorted_devs { +sub add_ordered_dev { my ( $self, $new_dev ) = @_; if ( !$self->{_seen_devs}->{$new_dev}++ ) { - push @{ $self->{_sorted_devs} }, $new_dev; + push @{ $self->{_ordered_devs} }, $new_dev; } + return; } # clear_stuff methods. Like the name says, they clear state stored inside @@ -212,11 +211,11 @@ sub clear_state { my ($self) = @_; $self->{_first} = 1; $self->{_print_header} = 1; - $self->clear_current_stats(); - $self->clear_previous_stats(); + $self->clear_curr_stats(); + $self->clear_prev_stats(); $self->clear_first_stats(); $self->clear_ts(); - $self->clear_sorted_devs(); + $self->clear_ordered_devs(); } sub clear_ts { @@ -224,10 +223,10 @@ sub clear_ts { $self->{_ts} = {}; } -sub clear_sorted_devs { +sub clear_ordered_devs { my $self = shift; $self->{_seen_devs} = {}; - $self->sorted_devs( [] ); + $self->ordered_devs( [] ); } sub _clear_stats_common { @@ -242,14 +241,14 @@ sub _clear_stats_common { } } -sub clear_current_stats { +sub clear_curr_stats { my ( $self, @args ) = @_; $self->_clear_stats_common( "_stats_for", @args ); } -sub clear_previous_stats { +sub clear_prev_stats { my ( $self, @args ) = @_; - $self->_clear_stats_common( "_previous_stats_for", @args ); + $self->_clear_stats_common( "_prev_stats_for", @args ); } sub clear_first_stats { @@ -271,9 +270,9 @@ sub stats_for { $self->_stats_for_common( $dev, '_stats_for' ); } -sub previous_stats_for { +sub prev_stats_for { my ( $self, $dev ) = @_; - $self->_stats_for_common( $dev, '_previous_stats_for' ); + $self->_stats_for_common( $dev, '_prev_stats_for' ); } sub first_stats_for { @@ -283,39 +282,47 @@ sub first_stats_for { sub has_stats { my ($self) = @_; + my $stats = $self->stats_for; - return $self->stats_for - && scalar grep 1, @{ $self->stats_for }{ $self->sorted_devs }; -} - -sub _save_current_as_previous { - my ( $self, $curr_hashref ) = @_; - - if ( $self->{_save_curr_as_prev} ) { - $self->{_previous_stats_for} = $curr_hashref; - for my $dev (keys %$curr_hashref) { - $self->{_previous_stats_for}->{$dev}->{sum_ios_in_progress} += - $curr_hashref->{$dev}->{ios_in_progress}; - } - $self->previous_ts($self->current_ts()); + for my $key ( keys %$stats ) { + return 1 if $stats->{$key} && %{ $stats->{$key} } } return; } -sub _save_current_as_first { - my ($self, $curr_hashref) = @_; +sub _save_curr_as_prev { + my ( $self, $curr ) = @_; + + if ( $self->{_save_curr_as_prev} ) { + $self->{_prev_stats_for} = $curr; + for my $dev (keys %$curr) { + $self->{_prev_stats_for}->{$dev}->{sum_ios_in_progress} += + $curr->{$dev}->{ios_in_progress}; + } + $self->prev_ts($self->curr_ts()); + } + + return; +} + +sub _save_curr_as_first { + my ($self, $curr) = @_; if ( $self->{_first} ) { - $self->{_first_stats_for} = $curr_hashref; - $self->first_ts($self->current_ts()); + $self->{_first_stats_for} = { + # 1-level deep copy of the original structure. Should + # be enough. + map { $_ => {%{$curr->{$_}}} } keys %$curr + }; + $self->first_ts($self->curr_ts()); $self->{_first} = undef; } } sub _save_stats { - my ( $self, $hashref ) = @_; - $self->{_stats_for} = $hashref; + my ( $self, $stats ) = @_; + return $self->{_stats_for} = $stats; } sub trim { @@ -327,13 +334,13 @@ sub trim { sub col_ok { my ( $self, $column ) = @_; - my $regex = $self->column_regex; - return $column =~ $regex || trim($column) =~ $regex; + my $regex = $self->column_regex(); + return ($column =~ $regex) || (trim($column) =~ $regex); } sub dev_ok { my ( $self, $device ) = @_; - my $regex = $self->device_regex; + my $regex = $self->device_regex(); return $device =~ $regex; } @@ -383,22 +390,23 @@ my @columns_in_order = ( } # Method: design_print_formats() -# What says on the label. Returns three things: the format for the header and the -# data, and an arrayref of the columns used to make it. +# What says on the label. Returns three things: the format for the header +# and the data, and an arrayref of the columns used to make it. # # Parameters: # %args - Arguments # # Optional Arguments: -# columns - An arrayref with column names. If absent, uses ->col_ok to -# decide which columns to use. -# max_device_length - How much space to leave for device names. Defaults at 6. +# columns - An arrayref with column names. If absent, +# uses ->col_ok to decide which columns to use. +# max_device_length - How much space to leave for device names. +# Defaults to 6. # sub design_print_formats { my ( $self, %args ) = @_; my ( $dev_length, $columns ) = @args{qw( max_device_length columns )}; - $dev_length ||= max 6, map length, $self->sorted_devs; + $dev_length ||= max 6, map length, $self->ordered_devs; my ( $header, $format ); # For each device, print out the following: The timestamp offset and @@ -418,46 +426,69 @@ sub design_print_formats { return ( $header, $format, $columns ); } -sub parse_diskstats_line { - my ( $self, $line, $block_size ) = @_; - my @keys = qw( - reads reads_merged read_sectors ms_spent_reading - writes writes_merged written_sectors ms_spent_writing - ios_in_progress ms_spent_doing_io ms_weighted - ); - my ( $dev, %dev_stats ); +{ +# This is hot code. In any given run it could end up being called +# thousands of times, so beware: Here could be dragons. +my @diskstats_fields = qw( + reads reads_merged read_sectors ms_spent_reading + writes writes_merged written_sectors ms_spent_writing + ios_in_progress ms_spent_doing_io ms_weighted +); +# This allows parse_diskstats_line() to be overriden, but also to be +# memoized without a normalization function. - if ( ( @dev_stats{qw( major minor )}, $dev, @dev_stats{@keys} ) = - $line =~ /^ - # Disk format - \s* (\d+) # major - \s+ (\d+) # minor - \s+ (.+?) # Device name - \s+ (\d+) # # of reads issued - \s+ (\d+) # # of reads merged - \s+ (\d+) # # of sectors read - \s+ (\d+) # # of milliseconds spent reading - \s+ (\d+) # # of writes completed - \s+ (\d+) # # of writes merged - \s+ (\d+) # # of sectors written - \s+ (\d+) # # of milliseconds spent writing - \s+ (\d+) # # of IOs currently in progress - \s+ (\d+) # # of milliseconds spent doing IOs - \s+ (\d+) # weighted # of milliseconds spent doing IOs - \s*$/x - ) +# Magic goto, removes this function from the return stack. Haven't +# benchmarked it, but ostensibly faster. +sub parse_diskstats_line { shift; goto &_parse_diskstats_line } +sub _parse_diskstats_line { + my ( $line, $block_size ) = @_; + my $dev; + keys my %dev_stats = 30; # Pre-expand the amount of buckets for this hash. + +# The following split replaces this: +# $line =~ /^ +# # Disk format +# \s* (\d+) # major +# \s+ (\d+) # minor +# \s+ (.+?) # Device name +# \s+ (\d+) # # of reads issued +# \s+ (\d+) # # of reads merged +# \s+ (\d+) # # of sectors read +# \s+ (\d+) # # of milliseconds spent reading +# \s+ (\d+) # # of writes completed +# \s+ (\d+) # # of writes merged +# \s+ (\d+) # # of sectors written +# \s+ (\d+) # # of milliseconds spent writing +# \s+ (\d+) # # of IOs currently in progress +# \s+ (\d+) # # of milliseconds spent doing IOs +# \s+ (\d+) # weighted # of milliseconds spent doing IOs +# \s*$/x +# +# Since we assume that device names can't have spaces. + + # Assigns the first two elements of the list created by split() into + # %dev_stats as the major and minor, the third element into $dev, + # and the remaining elements back into %dev_stats. + if ( 14 == (( @dev_stats{qw( major minor )}, $dev, @dev_stats{@diskstats_fields} ) = + split " ", $line, 14 ) ) { - $dev_stats{read_bytes} = $dev_stats{read_sectors} * $block_size; - $dev_stats{written_bytes} = - $dev_stats{written_sectors} * $block_size; - $dev_stats{read_kbs} = $dev_stats{read_bytes} / 1024; - $dev_stats{written_kbs} = $dev_stats{written_bytes} / 1024; - $dev_stats{ios_requested} += $dev_stats{reads} + $dev_stats{writes}; - $dev_stats{ios_in_bytes} += $dev_stats{read_bytes} + $dev_stats{written_bytes}; + $dev_stats{read_kbs} = + ( $dev_stats{read_bytes} = $dev_stats{read_sectors} + * $block_size ) / 1024; + $dev_stats{written_kbs} = + ( $dev_stats{written_bytes} = $dev_stats{written_sectors} + * $block_size ) / 1024; + $dev_stats{ios_requested} = $dev_stats{reads} + + $dev_stats{writes}; + + $dev_stats{ios_in_bytes} = $dev_stats{read_bytes} + + $dev_stats{written_bytes}; return ( $dev, \%dev_stats ); } - elsif ((@dev_stats{qw( major minor )}, $dev, @dev_stats{ qw( reads read_sectors writes written_sectors ) }) = $line =~ /^ + elsif ((@dev_stats{qw( major minor )}, $dev, + @dev_stats{ qw( reads read_sectors writes written_sectors ) }) = + $line =~ /^ # Partition format \s* (\d+) # major \s+ (\d+) # minor @@ -466,18 +497,21 @@ sub parse_diskstats_line { \s+ (\d+) # # of sectors read \s+ (\d+) # # of writes issued \s+ (\d+) # # of sectors written - \s*$/x) { - for my $key ( @keys ) { + \s*$/x) + { + for my $key ( @diskstats_fields ) { $dev_stats{$key} ||= 0; } - # Copypaste from above, abstract? + # Copypaste from above, should probably abstract, but it would make + # the common case slower. $dev_stats{read_bytes} = $dev_stats{read_sectors} * $block_size; $dev_stats{written_bytes} = $dev_stats{written_sectors} * $block_size; $dev_stats{read_kbs} = $dev_stats{read_bytes} / 1024; $dev_stats{written_kbs} = $dev_stats{written_bytes} / 1024; - $dev_stats{ttreq} += $dev_stats{reads} + $dev_stats{writes}; - $dev_stats{ttbyt} += $dev_stats{read_bytes} + $dev_stats{written_bytes}; + $dev_stats{ios_requested} = $dev_stats{reads} + $dev_stats{writes}; + $dev_stats{ios_in_bytes} = $dev_stats{read_bytes} + + $dev_stats{written_bytes}; return ( $dev, \%dev_stats ); } @@ -485,6 +519,7 @@ sub parse_diskstats_line { return; } } +} # Method: parse_from() # Parses data from one of the sources. @@ -493,32 +528,36 @@ sub parse_diskstats_line { # %args - Arguments # # Optional Arguments: -# filehandle - Reads data from a filehandle by calling readline() on it. +# filehandle - Reads data from a filehandle by calling readline() +# on it. # data - Reads data one line at a time. -# filename - Opens a filehandle to the file and reads it one line at a time. -# sample_callback - Called each time a sample is processed, passed the latest timestamp. +# filename - Opens a filehandle to the file and reads it one +# line at a time. +# sample_callback - Called each time a sample is processed, passed +# the latest timestamp. # sub parse_from { - my ( $self, %args ) = @_; + my ( $self, %args ) = @_; - my $lines_read = $args{filehandle} - ? $self->parse_from_filehandle( @args{qw( filehandle sample_callback )} ) : - $args{data} - ? $self->parse_from_data( @args{qw( data sample_callback )} ) : - $self->parse_from_filename( @args{qw( filename sample_callback )} ); - return $lines_read; + my $lines_read = $args{filehandle} + ? $self->parse_from_filehandle( @args{qw( filehandle sample_callback )} ) + : $args{data} + ? $self->parse_from_data( @args{qw( data sample_callback )} ) + : $self->parse_from_filename( @args{qw( filename sample_callback )} ); + return $lines_read; } + sub parse_from_filename { my ( $self, $filename, $sample_callback ) = @_; - $filename ||= $self->filename; + $filename ||= $self->filename(); open my $fh, "<", $filename - or die "Couldn't open ", $filename, ": $OS_ERROR"; + or die "Cannot parse $filename: $OS_ERROR"; my $lines_read = $self->parse_from_filehandle( $fh, $sample_callback ); - close($fh) or die "Couldn't close: $OS_ERROR"; + close $fh or die "Cannot close: $OS_ERROR"; return $lines_read; } @@ -531,47 +570,58 @@ sub parse_from_filename { # # Parameters: # filehandle - -# sample_callback - Called each time a sample is processed, passed the latest timestamp. +# sample_callback - Called each time a sample is processed, passed +# the latest timestamp. # sub parse_from_filehandle { my ( $self, $filehandle, $sample_callback ) = @_; - return $self->_load( $filehandle, $sample_callback );; + return $self->_load( $filehandle, $sample_callback ); } +# Method: parse_from_data() +# Similar to parse_from_filehandle, but uses a reference to a scalar +# as a filehandle +# +# Parameters: +# data - A normal Perl scalar, or a ref to a scalar. +# sample_callback - Same as parse_from_filehandle. +# sub parse_from_data { my ( $self, $data, $sample_callback ) = @_; - open my $fh, "<", \$data - or die "Couldn't open scalar as filehandle: $OS_ERROR"; + open( my $fh, "<", ref($data) ? $data : \$data ) + or die "Couldn't parse data: $OS_ERROR"; my $lines_read = $self->parse_from_filehandle( $fh, $sample_callback ); - close($fh); + close $fh or die ""; return $lines_read; } -# Method: INTERNAL: _load() +# Method: _load() +# !!!!INTERNAL!!!!! # Reads from the filehandle, either saving the data as needed if dealing # with a diskstats-formatted line, or if it finds a TS line and has a # callback, defering to that. sub _load { my ( $self, $fh, $sample_callback ) = @_; - my $block_size = $self->block_size; + my $block_size = $self->block_size(); my $current_ts = 0; my $new_cur = {}; while ( my $line = <$fh> ) { - if ( my ( $dev, $dev_stats ) = $self->parse_diskstats_line($line, $block_size) ) { + if ( my ( $dev, $dev_stats ) = $self->parse_diskstats_line($line, $block_size) ) + { $new_cur->{$dev} = $dev_stats; - $self->add_sorted_devs($dev); + $self->add_ordered_dev($dev); } elsif ( my ($new_ts) = $line =~ /TS\s+([0-9]+(?:\.[0-9]+)?)/ ) { - if ( $current_ts && %{$new_cur} ) { - $self->_save_current_as_previous( $self->stats_for() ); + if ( $current_ts && %$new_cur ) { + $self->_save_curr_as_prev( $self->stats_for() ); $self->_save_stats($new_cur); - $self->current_ts($current_ts); - $self->_save_current_as_first( dclone($self->stats_for) ); + $self->curr_ts($current_ts); + $self->_save_curr_as_first( $new_cur ); $new_cur = {}; } if ($sample_callback) { @@ -581,16 +631,16 @@ sub _load { } else { chomp($line); - die "Line [$line] isn't in the diskstats format"; + warn "Line $INPUT_LINE_NUMBER: [$line] isn't in the diskstats format"; } } - if ( eof $fh && $current_ts ) { + if ( $current_ts ) { if ( %{$new_cur} ) { - $self->_save_current_as_previous( $self->stats_for() ); + $self->_save_curr_as_prev( $self->stats_for() ); $self->_save_stats($new_cur); - $self->current_ts($current_ts); - $self->_save_current_as_first( dclone($self->stats_for) ); + $self->curr_ts($current_ts); + $self->_save_curr_as_first( $new_cur ); $new_cur = {}; } if ($sample_callback) { @@ -602,7 +652,13 @@ sub _load { } sub _calc_read_stats { - my ( $self, $delta_for, $elapsed, $devs_in_group ) = @_; + my ( $self, %args ) = @_; + + my @required_args = qw( delta_for elapsed devs_in_group ); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($delta_for, $elapsed, $devs_in_group) = @args{ @required_args }; my %read_stats = ( reads_sec => $delta_for->{reads} / $elapsed, @@ -633,7 +689,13 @@ sub _calc_read_stats { } sub _calc_write_stats { - my ( $self, $delta_for, $elapsed, $devs_in_group ) = @_; + my ( $self, %args ) = @_; + + my @required_args = qw( delta_for elapsed devs_in_group ); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($delta_for, $elapsed, $devs_in_group) = @args{ @required_args }; my %write_stats = ( writes_sec => $delta_for->{writes} / $elapsed, @@ -667,74 +729,104 @@ sub _calc_write_stats { # Compute the numbers for reads and writes together, the things for # which we do not have separate statistics. -# Busy is what iostat calls %util. This is the percent of -# wall-clock time during which the device has I/O happening. sub _calc_misc_stats { - my ( $self, $delta_for, $elapsed, $devs_in_group, $stats ) = @_; + my ( $self, %args ) = @_; + + my @required_args = qw( delta_for elapsed devs_in_group stats ); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($delta_for, $elapsed, $devs_in_group, $stats) = @args{ @required_args }; my %extra_stats; + # Busy is what iostat calls %util. This is the percent of + # wall-clock time during which the device has I/O happening. $extra_stats{busy} = 100 * $delta_for->{ms_spent_doing_io} / ( 1000 * $elapsed * $devs_in_group ); - my $number_of_ios = $stats->{ios_requested}; - my $total_ms_spent_on_io = $delta_for->{ms_spent_reading} + $delta_for->{ms_spent_writing}; + my $number_of_ios = $stats->{ios_requested}; + my $total_ms_spent_on_io = $delta_for->{ms_spent_reading} + + $delta_for->{ms_spent_writing}; - $extra_stats{qtime} = $number_of_ios ? $total_ms_spent_on_io / $number_of_ios : 0; - $extra_stats{stime} = $number_of_ios ? $delta_for->{ms_spent_doing_io} / $number_of_ios : 0; + if ( $number_of_ios ) { + $extra_stats{qtime} = $total_ms_spent_on_io / $number_of_ios; + $extra_stats{stime} = $delta_for->{ms_spent_doing_io} / $number_of_ios; + } + else { + $extra_stats{qtime} = 0; + $extra_stats{stime} = 0; + } $extra_stats{s_spent_doing_io} = $total_ms_spent_on_io / 1000; $extra_stats{line_ts} = $self->compute_line_ts( first_ts => $self->first_ts(), - current_ts => $self->current_ts(), + curr_ts => $self->curr_ts(), ); return %extra_stats; } sub _calc_delta_for { - my ( $self, $current, $against ) = @_; - return { - map { ( $_ => $current->{$_} - $against->{$_} ) } + my ( $self, $curr, $against ) = @_; + my %deltas = ( + map { ( $_ => ($curr->{$_} || 0) - ($against->{$_} || 0) ) } qw( reads reads_merged read_sectors ms_spent_reading writes writes_merged written_sectors ms_spent_writing read_kbs written_kbs ms_spent_doing_io ms_weighted ) - }; + ); + return \%deltas; } sub _calc_stats_for_deltas { my ( $self, $elapsed ) = @_; my @end_stats; + my @devices = $self->ordered_devs(); - for my $dev ( grep { $self->dev_ok($_) && $self->stats_for($_) } $self->sorted_devs ) { - my $curr = $self->stats_for($dev); - my $against = $self->delta_against($dev); + my $devs_in_group = $self->compute_devs_in_group(); - my $delta_for = $self->_calc_delta_for( $curr, $against ); + # Read "For each device that passes the regex, and we have stats for" + foreach my $dev ( + grep { $self->dev_ok($_) && $self->stats_for($_) } + @devices ) + { + my $curr = $self->stats_for($dev); + my $against = $self->delta_against($dev); - my $in_progress = $curr->{"ios_in_progress"}; + my $delta_for = $self->_calc_delta_for( $curr, $against ); + my $in_progress = $curr->{"ios_in_progress"}; my $tot_in_progress = $against->{"sum_ios_in_progress"} || 0; - my $devs_in_group = $self->compute_devs_in_group; - # Compute the per-second stats for reads, writes, and overall. my %stats = ( - $self->_calc_read_stats( $delta_for, $elapsed, $devs_in_group ), - $self->_calc_write_stats( $delta_for, $elapsed, $devs_in_group ), + $self->_calc_read_stats( + delta_for => $delta_for, + elapsed => $elapsed, + devs_in_group => $devs_in_group, + ), + $self->_calc_write_stats( + delta_for => $delta_for, + elapsed => $elapsed, + devs_in_group => $devs_in_group, + ), in_progress => $self->compute_in_progress( $in_progress, $tot_in_progress ), ); - my %extras = $self->_calc_misc_stats( $delta_for, $elapsed, $devs_in_group, \%stats ); - while ( my ($k, $v) = each %extras ) { - $stats{$k} = $v; - } + my %extras = $self->_calc_misc_stats( + delta_for => $delta_for, + elapsed => $elapsed, + devs_in_group => $devs_in_group, + stats => \%stats, + ); + + @stats{ keys %extras } = values %extras; $stats{dev} = $dev; @@ -746,7 +838,7 @@ sub _calc_stats_for_deltas { sub _calc_deltas { my ( $self ) = @_; - my $elapsed = $self->current_ts() - $self->delta_against_ts(); + my $elapsed = $self->curr_ts() - $self->delta_against_ts(); die "Time elapsed is [$elapsed]" unless $elapsed; return $self->_calc_stats_for_deltas($elapsed); @@ -755,14 +847,23 @@ sub _calc_deltas { sub print_header { my ($self, $header, @args) = @_; if ( $self->{_print_header} ) { - printf { $self->out_fh } $header . "\n", @args; + printf { $self->out_fh() } $header . "\n", @args; } } sub print_rest { my ($self, $format, $cols, $stat) = @_; if ( $self->filter_zeroed_rows() ) { - return unless grep { sprintf("%7.1f", $_) != 0 } @{$stat}{ grep { $self->col_ok($_) } @$cols }; + # Conundrum: What is "zero"? + # Is 0.000001 zero? How about 0.1? + # Here the answer is "it looks like zero after formatting"; + # unfortunately, we lack the formats at this point. We could + # fetch them again, but that's a pain, so instead we use + # %7.1f, which is what most of them are anyway, and should + # work for nearly all cases. + return unless grep { + sprintf("%7.1f", $_) != 0 + } @{$stat}{ @$cols }; } printf { $self->out_fh() } $format . "\n", @{$stat}{ qw( line_ts dev ), @$cols }; @@ -778,18 +879,18 @@ sub print_deltas { return unless $self->delta_against_ts(); @$cols = map { $self->_column_to_key($_) } @$cols; - my ( $header_cb, $rest_cb ) = @args{qw( header_cb rest_cb )}; + my ( $header_callback, $rest_callback ) = @args{qw( header_callback rest_callback )}; - if ( $header_cb ) { - $self->$header_cb( $header, "#ts", "device" ); + if ( $header_callback ) { + $self->$header_callback( $header, "#ts", "device" ); } else { $self->print_header( $header, "#ts", "device" ); } for my $stat ( $self->_calc_deltas() ) { - if ($rest_cb) { - $self->$rest_cb( $format, $cols, $stat ); + if ($rest_callback) { + $self->$rest_callback( $format, $cols, $stat ); } else { $self->print_rest( $format, $cols, $stat ); @@ -799,9 +900,9 @@ sub print_deltas { sub compute_line_ts { my ( $self, %args ) = @_; - return $args{first_ts} > 0 - ? sprintf( "%5.1f", $args{current_ts} - $args{first_ts} ) - : sprintf( "%5.1f", 0 ); + return sprintf( "%5.1f", $args{first_ts} > 0 + ? $args{curr_ts} - $args{first_ts} + : 0 ); } sub compute_in_progress { diff --git a/lib/DiskstatsGroupByAll.pm b/lib/DiskstatsGroupByAll.pm index 028f89e3..295e2068 100644 --- a/lib/DiskstatsGroupByAll.pm +++ b/lib/DiskstatsGroupByAll.pm @@ -30,23 +30,17 @@ use constant MKDEBUG => $ENV{MKDEBUG} || 0; use base qw( Diskstats ); -sub group_by { - my $self = shift; - $self->group_by_all(@_); -} - sub group_by_all { my ($self, %args) = @_; - if ( !$args{clear_state} ) { - $self->clear_state(); - } + $self->clear_state(); if (!$self->interactive) { $self->parse_from( sample_callback => sub { $self->print_deltas( - map { ( $_ => $args{$_} ) } qw( header_cb rest_cb ), + map { ( $_ => $args{$_} ) } + qw( header_callback rest_callback ), ); }, map( { ($_ => $args{$_}) } qw(filehandle filename data) ), @@ -57,20 +51,20 @@ sub group_by_all { $self->parse_from( sample_callback => sub { $self->print_deltas( - header_cb => sub { + header_callback => sub { my $self = shift; if ( $self->{_print_header} ) { - my $meth = $args{header_cb} || "print_header"; + my $meth = $args{header_callback} || "print_header"; $self->$meth(@_); } $self->{_print_header} = undef; }, - rest_cb => $args{rest_cb}, + rest_callback => $args{rest_callback}, ); }, map( { ($_ => $args{$_}) } qw(filehandle filename data) ), ); - if (!$self->previous_ts) { + if (!$self->prev_ts) { seek $args{filehandle}, $orig, 0; } return; @@ -78,6 +72,13 @@ sub group_by_all { $self->clear_state(); } +# The next methods are all overrides! + +sub group_by { + my $self = shift; + $self->group_by_all(@_); +} + sub clear_state { my $self = shift; if (!$self->interactive()) { @@ -92,12 +93,26 @@ sub clear_state { sub delta_against { my ($self, $dev) = @_; - return $self->previous_stats_for($dev); + return $self->prev_stats_for($dev); } sub delta_against_ts { my ($self) = @_; - return $self->previous_ts(); + return $self->prev_ts(); +} + +sub compute_line_ts { + my ($self, %args) = @_; + if ( $self->interactive() ) { + # In interactive mode, we always compare against the previous sample, + # but the default is to compare against the first. + # This is generally a non-issue, because it can only happen + # when there are more than two samples left to parse in the file, + # which can only happen when someone sets a redisplay or sampling + # interval (or both) too high. + $args{first_ts} = $self->prev_ts(); + } + return $self->SUPER::compute_line_ts(%args); } 1; diff --git a/lib/DiskstatsGroupByDisk.pm b/lib/DiskstatsGroupByDisk.pm index 51f48dba..4122db24 100644 --- a/lib/DiskstatsGroupByDisk.pm +++ b/lib/DiskstatsGroupByDisk.pm @@ -33,25 +33,23 @@ use base qw( Diskstats ); sub new { my ($class, %args) = @_; my $self = $class->SUPER::new(%args); - $self->{_iterations} = 0; + $self->{_iterations} = 0; $self->{_print_header} = 1; return $self; } sub group_by { - my $self = shift; - $self->group_by_disk(@_); + my ($self, @args) = @_; + $self->group_by_disk(@args); } # Prints out one line for each disk, summing over the interval from first to # last sample. sub group_by_disk { - my ($self, %args) = @_; - my ($header_cb, $rest_cb) = $args{ qw( header_cb rest_cb ) }; + my ($self, %args) = @_; + my ($header_callback, $rest_callback) = $args{ qw( header_callback rest_callback ) }; - if (!$self->interactive()) { - $self->clear_state(); - } + $self->clear_state() unless $self->interactive(); my $original_offset = $args{filehandle} ? tell($args{filehandle}) : undef; @@ -59,37 +57,40 @@ sub group_by_disk { sample_callback => sub { my ($self, $ts) = @_; - if ( $self->has_stats ) { + if ( $self->has_stats() ) { $self->{_iterations}++; if ($self->interactive() && $self->{_iterations} >= 2) { - my $elapsed = - ( $self->current_ts() || 0 ) - - ( $self->first_ts() || 0 ); + my $elapsed = ( $self->curr_ts() || 0 ) + - ( $self->first_ts() || 0 ); if ( $ts > 0 && $elapsed >= $self->sample_time() ) { $self->print_deltas( - header_cb => sub { + header_callback => sub { my ($self, @args) = @_; if ( $self->{_print_header} ) { - my $meth = $args{header_cb} || "print_header"; - $self->$meth(@args); + my $method = $args{header_callback} + || "print_header"; + $self->$method(@args); } $self->{_print_header} = undef; }, - rest_cb => $args{rest_cb}, + rest_callback => $args{rest_callback}, ); $self->{_iterations} = -1; - return "Stop interactive reading"; + return; } } } }, - map({ ($_ => $args{$_}) } qw(filehandle filename data)), + filehandle => $args{filehandle}, + filename => $args{filename}, + data => $args{data}, ); if ($self->interactive) { - if ($self->{_iterations} == -1 && defined($original_offset) && eof($args{filehandle})) { + if ($self->{_iterations} == -1 && defined($original_offset) + && eof($args{filehandle})) { $self->clear_state; seek $args{filehandle}, $original_offset, 0; } @@ -100,9 +101,12 @@ sub group_by_disk { return; } - $self->print_deltas( map( { ( $_ => $args{$_} ) } qw( header_cb rest_cb ) ) ); + $self->print_deltas( + header_callback => $args{header_callback}, + rest_callback => $args{rest_callback}, + ); - $self->clear_state; + $self->clear_state(); return $lines_read; } diff --git a/lib/DiskstatsGroupBySample.pm b/lib/DiskstatsGroupBySample.pm index 77258120..78bc2cc4 100644 --- a/lib/DiskstatsGroupBySample.pm +++ b/lib/DiskstatsGroupBySample.pm @@ -48,50 +48,57 @@ sub group_by { # last sample. sub group_by_sample { my ( $self, %args ) = @_; - my ( $header_cb, $rest_cb ) = $args{qw( header_cb rest_cb )}; + my ( $header_callback, $rest_callback ) = $args{qw( header_callback rest_callback )}; - if (!$self->interactive) { - $self->clear_state; - } + $self->clear_state() unless $self->interactive(); $self->parse_from( + # ->can comes from UNIVERSAL. Returns a coderef to the method, if found. + # undef otherwise. + # Basically \&func, but always in runtime, and allows overriding + # the method in child classes. sample_callback => $self->can("_sample_callback"), - map( { ( $_ => $args{$_} ) } qw(filehandle filename data) ), + filehandle => $args{filehandle}, + filename => $args{filename}, + data => $args{data}, ); - if (!$self->interactive) { - $self->clear_state; - } + $self->clear_state() unless $self->interactive(); + return; } sub _sample_callback { my ( $self, $ts, %args ) = @_; my $printed_a_line = 0; - if ( $self->has_stats ) { + if ( $self->has_stats() ) { $self->{_iterations}++; } - my $elapsed = - ( $self->current_ts() || 0 ) - - ( $self->previous_ts() || 0 ); + my $elapsed = ($self->curr_ts() || 0) + - ($self->prev_ts() || 0); if ( $ts > 0 && $elapsed >= $self->sample_time() ) { $self->print_deltas( - max_device_length => 6, - header_cb => sub { + # When grouping by samples, we don't usually show the device names, + # only a count of how many devices each sample has, which causes the + # columns' width change depending on simple invisible. That's uncalled + # for, so we hardcode the width here + # (6 is what the shell version used). + max_device_length => 6, + header_callback => sub { my ( $self, $header, @args ) = @_; if ( $self->{_print_header} ) { - my $method = $args{header_cb} || "print_header"; + my $method = $args{header_callback} || "print_header"; $self->$method( $header, @args ); $self->{_print_header} = undef; } }, - rest_cb => sub { + rest_callback => sub { my ( $self, $format, $cols, $stat ) = @_; - my $method = $args{rest_cb} || "print_rest"; + my $method = $args{rest_callback} || "print_rest"; $self->$method( $format, $cols, $stat ); $printed_a_line = 1; } @@ -99,23 +106,24 @@ sub _sample_callback { } if ( $self->{_iterations} == 1 || $printed_a_line == 1 ) { $self->{_save_curr_as_prev} = 1; - $self->_save_current_as_previous( $self->stats_for() ); + $self->_save_curr_as_prev( $self->stats_for() ); $self->{_save_curr_as_prev} = 0; } + return; } sub delta_against { my ( $self, $dev ) = @_; - return $self->previous_stats_for($dev); + return $self->prev_stats_for($dev); } sub delta_against_ts { my ( $self ) = @_; - return $self->previous_ts(); + return $self->prev_ts(); } sub clear_state { - my ( $self, @args ) = @_; + my ( $self, @args ) = @_; $self->{_iterations} = 0; $self->{_save_curr_as_prev} = 0; $self->{_print_header} = 1; @@ -124,17 +132,20 @@ sub clear_state { sub compute_devs_in_group { my ($self) = @_; + my $stats = $self->stats_for(); + my $re = $self->device_regex(); return scalar grep { - # Got stats for that device, and we want to print it - $self->stats_for($_) && $self->dev_ok($_) - } $self->sorted_devs; + # Got stats for that device, and it matches the devices re + $stats->{$_} && $_ =~ $re + } $self->ordered_devs; } sub compute_dev { - my ( $self, $dev ) = @_; - return $self->compute_devs_in_group() > 1 - ? "{" . $self->compute_devs_in_group() . "}" - : ( $self->sorted_devs )[0]; + my ( $self, $devs ) = @_; + $devs ||= $self->compute_devs_in_group(); + return $devs > 1 + ? "{" . $devs . "}" + : ( $self->ordered_devs )[0]; } # Terrible breach of encapsulation, but it'll have to do for the moment. @@ -143,7 +154,7 @@ sub _calc_stats_for_deltas { my $delta_for; - for my $dev ( grep { $self->dev_ok($_) } $self->sorted_devs ) { + foreach my $dev ( grep { $self->dev_ok($_) } $self->ordered_devs ) { my $curr = $self->stats_for($dev); my $against = $self->delta_against($dev); @@ -154,24 +165,35 @@ sub _calc_stats_for_deltas { } } - my $in_progress = $delta_for->{ios_in_progress}; #$curr->{"ios_in_progress"}; - my $tot_in_progress = 0; #$against->{"sum_ios_in_progress"} || 0; - - my $devs_in_group = $self->compute_devs_in_group() || 1; + my $in_progress = $delta_for->{ios_in_progress}; + my $tot_in_progress = 0; + my $devs_in_group = $self->compute_devs_in_group() || 1; my %stats = ( - $self->_calc_read_stats( $delta_for, $elapsed, $devs_in_group ), - $self->_calc_write_stats( $delta_for, $elapsed, $devs_in_group ), + $self->_calc_read_stats( + delta_for => $delta_for, + elapsed => $elapsed, + devs_in_group => $devs_in_group, + ), + $self->_calc_write_stats( + delta_for => $delta_for, + elapsed => $elapsed, + devs_in_group => $devs_in_group, + ), in_progress => - $self->compute_in_progress( $in_progress, $tot_in_progress ), + $self->compute_in_progress( $in_progress, $tot_in_progress ), ); - my %extras = $self->_calc_misc_stats( $delta_for, $elapsed, $devs_in_group, \%stats ); - while ( my ($k, $v) = each %extras ) { - $stats{$k} = $v; - } + my %extras = $self->_calc_misc_stats( + delta_for => $delta_for, + elapsed => $elapsed, + devs_in_group => $devs_in_group, + stats => \%stats, + ); - $stats{dev} = $self->compute_dev( \%stats ); + @stats{ keys %extras } = values %extras; + + $stats{dev} = $self->compute_dev( $devs_in_group ); return \%stats; } diff --git a/lib/DiskstatsMenu.pm b/lib/DiskstatsMenu.pm index 6ea47e3c..99b79032 100644 --- a/lib/DiskstatsMenu.pm +++ b/lib/DiskstatsMenu.pm @@ -37,19 +37,26 @@ require DiskstatsGroupByAll; require DiskstatsGroupByDisk; require DiskstatsGroupBySample; -our $VERSION = '0.01'; - my %actions = ( 'A' => \&group_by, 'D' => \&group_by, 'S' => \&group_by, 'i' => \&hide_inactive_disks, - 'd' => get_new_value_for( "redisplay_interval", "Enter a new redisplay interval in seconds: " ), - 'z' => get_new_value_for( "sample_time", "Enter a new interval between samples in seconds: " ), - 'c' => get_new_regex_for( "column_regex", "Enter a column pattern: " ), - '/' => get_new_regex_for( "device_regex", "Enter a disk/device pattern: " ), + 'd' => get_new_value_for( "redisplay_interval", + "Enter a new redisplay interval in seconds: " ), + 'z' => get_new_value_for( "sample_time", + "Enter a new interval between samples in seconds: " ), + 'c' => get_new_regex_for( "column_regex", + "Enter a column pattern: " ), + '/' => get_new_regex_for( "device_regex", + "Enter a disk/device pattern: " ), + # Magical return value. 'q' => sub { return 'last' }, - 'p' => \&pause, + 'p' => sub { + print "Paused - press any key to continue\n"; + pause(@_); + return; + }, '?' => \&help, ); @@ -59,52 +66,39 @@ my %option_to_object = ( S => "DiskstatsGroupBySample", ); -my %object_to_option = reverse %option_to_object; +sub new { + bless {}, shift; +} sub run_interactive { my ($self, %args) = @_; - die "I need an [o] argument" unless $args{o} && blessed($args{o}) - && ( - $args{o}->isa("OptionParser") - || $args{o}->can("get") - ); - my $o = $args{o}; + my @required_args = qw(OptionParser); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($o) = @args{@required_args}; my %opts = ( - save_samples => $o->get('save-samples') || undef, - samples_to_gather => $o->get('iterations') || undef, - sampling_interval => $o->get('interval') || 1, - redisplay_interval => 1, - sample_time => $o->get('sample-time') || 1, - column_regex => $o->get('columns') || undef, - device_regex => $o->get('devices') || undef, interactive => 1, - filter_zeroed_rows => !$o->get('zero-rows'), + OptionParser => $o, ); - for my $re_key ( grep { $opts{$_} } qw( column_regex device_regex ) ) { - $opts{$re_key} = qr/$opts{$re_key}/i; - } - my ($tmp_fh, $filename, $child_pid, $child_fh); # Here's a big crux of the program. If we have a filename, we don't # need to fork and create a child, just read from it. - if ( $args{filename} ) { - $filename = $args{filename}; - open $tmp_fh, "<", $filename or die "Couldn't open [$filename]: $OS_ERROR"; + if ( $filename = $args{filename} ) { + open $tmp_fh, "<", $filename or die "Cannot open $filename: $OS_ERROR"; } else { - ($tmp_fh, $filename) = file_to_use( $opts{save_samples} ); + ($tmp_fh, $filename) = file_to_use( $o->get('save-samples') ); # fork(), but future-proofing it in case we ever need to speak to # the child $child_pid = open $child_fh, "|-"; - if (not defined $child_pid) { - die "Couldn't fork: $OS_ERROR"; - } + die "Cannot fork: $OS_ERROR" unless defined $child_pid; if ( !$child_pid ) { # Child @@ -113,52 +107,55 @@ sub run_interactive { # so it's easier to track in things like ps. local $PROGRAM_NAME = "$PROGRAM_NAME (data-gathering daemon)"; - close($tmp_fh); - - open my $fh, ">>", $filename or die $!; + close $tmp_fh; gather_samples( gather_while => sub { getppid() }, - samples_to_gather => $opts{samples_to_gather}, - sampling_interval => $opts{sampling_interval}, - filehandle => $fh, + samples_to_gather => $o->get('iterations'), + sampling_interval => $o->get('interval'), + filename => $filename, ); - close $fh or die $!; - unlink $filename unless $opts{save_samples}; + unlink $filename unless $o->get('save-samples'); exit(0); } } + # I don't think either of these are needed actually, since piped opens + # are supposed to deal with children on their own, but it doesn't hurt. local $SIG{CHLD} = 'IGNORE'; local $SIG{PIPE} = 'IGNORE'; STDOUT->autoflush; STDIN->blocking(0); - my $sel = IO::Select->new(\*STDIN); - my $class = $option_to_object{ substr ucfirst($o->get('group-by') || 'Disk'), 0, 1 }; - $opts{obj} = $class->new( %opts ); + my $sel = IO::Select->new(\*STDIN); + my $group_by = $o->get('group-by') || 'disk'; + my $class = $group_by =~ m/disk/i ? 'DiskstatsGroupByDisk' + : $group_by =~ m/sample/i ? 'DiskstatsGroupBySample' + : $group_by =~ m/all/i ? 'DiskstatsGroupByAll' + : die "Invalid --group-by: $group_by"; + $opts{obj} = $class->new( %opts ); if ( $args{filename} ) { group_by( - header_cb => sub { shift->print_header(@_) }, + header_callback => sub { shift->print_header(@_) }, select_obj => $sel, options => \%opts, filehandle => $tmp_fh, - got => substr(ucfirst($o->get('group-by') || 'Disk'), 0, 1), + input => substr(ucfirst($group_by), 0, 1), ); } ReadKeyMini::cbreak(); MAIN_LOOP: while (1) { - if ( my $got = read_command_timeout($sel, $opts{redisplay_interval} ) ) { - if ($actions{$got}) { - my $ret = $actions{$got}->( + if ( my $input = read_command_timeout($sel, $o->get('redisplay-interval') ) ) { + if ($actions{$input}) { + my $ret = $actions{$input}->( select_obj => $sel, options => \%opts, - got => $got, + input => $input, filehandle => $tmp_fh, ) || ''; last MAIN_LOOP if $ret eq 'last'; @@ -167,13 +164,15 @@ sub run_interactive { # As a possible source of confusion, note that this calls the group_by # _method_ in DiskstatsGroupBySomething, not the group_by _function_ # defined below. - $opts{obj}->group_by( filehandle => $tmp_fh, clear_state => 0 ) || 0; + $opts{obj}->group_by( filehandle => $tmp_fh ) || 0; if ( eof $tmp_fh ) { - # If we are gathering samples (don't have a filename), and we have a sample - # limit (set by --iterations), the child process just calls it quits once - # it gathers enough samples. When that happens, we are also done. - if ( !$args{filename} && $opts{samples_to_gather} && kill 0, $child_pid ) { + # If we are gathering samples (don't have a filename), and + # we have a sample limit (set by --iterations), the child + # process just calls it quits once it gathers enough samples. + # When that happens, we are also done. + if ( !$args{filename} && $o->get('iterations') + && kill 0, $child_pid ) { last MAIN_LOOP; } @@ -189,8 +188,8 @@ sub run_interactive { waitpid $child_pid, 0; } - close($tmp_fh) or die "Couldn't close: $OS_ERROR"; - return; + close $tmp_fh or die "Cannot close: $OS_ERROR"; + return 0; # Exit status } sub read_command_timeout { @@ -206,26 +205,31 @@ sub gather_samples { my $samples = 0; STDIN->blocking(0); - my $sel = IO::Select->new(\*STDIN); + my $sel = IO::Select->new(\*STDIN); + my $filename = $opts{filename}; GATHER_DATA: while ( $opts{gather_while}->() ) { if ( read_command_timeout( $sel, $opts{sampling_interval} ) ) { last GATHER_DATA; } + + open my $fh, ">>", $filename or die $OS_ERROR; open my $diskstats_fh, "<", "/proc/diskstats" - or die $!; + or die $OS_ERROR; my @to_print = `date +'TS %s.%N %F %T'`; push @to_print, <$diskstats_fh>; # Lovely little method from IO::Handle: turns on autoflush, # prints, and then restores the original autoflush state. - $opts{filehandle}->printflush(@to_print); - close $diskstats_fh or die $!; + $fh->printflush(@to_print); + close $diskstats_fh or die $OS_ERROR; + close $fh or die $OS_ERROR; $samples++; - if ( defined($opts{samples_to_gather}) && $samples >= $opts{samples_to_gather} ) { + if ( defined($opts{samples_to_gather}) + && $samples >= $opts{samples_to_gather} ) { last GATHER_DATA; } } @@ -234,17 +238,18 @@ sub gather_samples { sub group_by { my (%args) = @_; + my $input = $args{input}; - my $got = $args{got}; - - if ( ref( $args{options}->{obj} ) ne $option_to_object{$got} ) { + if ( ref( $args{options}->{obj} ) ne $option_to_object{$input} ) { # Particularly important! Otherwise we would depend on the # object's ->new being smart about discarding unrecognized # values. delete $args{options}->{obj}; # This would fail on a stricter constructor, so it probably # needs fixing. - $args{options}->{obj} = $option_to_object{$got}->new( %{$args{options}}); + $args{options}->{obj} = $option_to_object{$input}->new( + %{$args{options}} + ); } seek $args{filehandle}, 0, 0; @@ -259,7 +264,7 @@ sub group_by { $obj->group_by( filehandle => $args{filehandle}, # Only print the header once, as if in interactive. - header_cb => $args{header_cb} || sub { + header_callback => $args{header_callback} || sub { my $print_header; return sub { unless ($print_header++) { @@ -273,38 +278,18 @@ sub group_by { } } -# regexp_pattern is used for pretty-printing regexen, since they can stringify to -# different things depending on the version of Perl. Unfortunately, 5.8 -# lacks this, so in that version, we put in a facsimile. -BEGIN { - local $EVAL_ERROR; - - eval { require re; re::regexp_pattern(qr//) }; - if ( $EVAL_ERROR ) { - *regexp_pattern = sub { - my ($re) = @_; - (my $string_re = $re) =~ s/\A\(\?[^:]*?:(.*)\)\z/$1/sm; - return $string_re; - }; - } - else { - re->import("regexp_pattern"); - } -} - sub help { my (%args) = @_; my $obj = $args{options}->{obj}; - my $mode = $object_to_option{ref($obj)}; - my ($column_re) = regexp_pattern( $obj->column_regex() ); - my ($device_re) = regexp_pattern( $obj->device_regex() ); + my $mode = substr ref($obj), 16, 1; + my $column_re = $args{options}->{OptionParser}->get('columns'); + my $device_re = $args{options}->{OptionParser}->get('devices'); my $interval = $obj->sample_time() || '(none)'; - my $disp_int = $args{options}->{redisplay_interval} || '(none)'; + my $disp_int = $args{options}->{OptionParser}->get('redisplay-interval'); my $inact_disk = $obj->filter_zeroed_rows() ? 'yes' : 'no'; for my $re ( $column_re, $device_re ) { $re ||= '(none)'; - $re =~ s/^\Q(?=)\E$/(none)/; } print <<"HELP"; @@ -321,6 +306,7 @@ sub help { ------------------- Press any key to continue ----------------------- HELP pause(@_); + return; } sub file_to_use { @@ -332,13 +318,15 @@ sub file_to_use { if ( $filename ) { open my $fh, "<", $filename - or die "Couldn't open $filename: $OS_ERROR"; + or die "Cannot open $filename: $OS_ERROR"; return $fh, $filename; } else { local $EVAL_ERROR; if ( !eval { require File::Temp } ) { - die "Can't call mktemp nor load File::Temp. Install either of those, or pass in an explicit filename through --save-samples."; + die "Can't call mktemp nor load File::Temp. + Install either of those, or pass in an explicit + filename through --save-samples."; } my $dir = File::Temp::tempdir( CLEANUP => 1 ); return File::Temp::tempfile( @@ -350,7 +338,7 @@ sub file_to_use { } } -sub get_input { +sub get_blocking_input { my ($message) = @_; STDIN->blocking(1); @@ -365,55 +353,58 @@ sub get_input { } sub hide_inactive_disks { - my (%args) = @_; - my $new_val = !!get_input("Filter inactive rows? (Leave blank for 'No') "); + my (%args) = @_; + my $new_val = get_blocking_input("Filter inactive rows? (Leave blank for 'No') "); - $args{options}->{filter_zeroed_rows} = $new_val; + # Eeep. In OptionParser, "true" means show; in Diskstats, "true" means hide. + # Thus !$new_val for OptionParser + $args{options}->{OptionParser}->set('zero-rows', !$new_val); $args{options}->{obj}->filter_zeroed_rows($new_val); + return; } sub get_new_value_for { my ($looking_for, $message) = @_; + (my $looking_for_o = $looking_for) =~ tr/_/-/; return sub { my (%args) = @_; - my $new_interval = get_input($message); + my $new_interval = get_blocking_input($message) || 0; - $new_interval ||= 0; - - if ( looks_like_number($new_interval) ) { - if ( $args{options}->{obj}->can($looking_for) ) { - $args{options}->{obj}->$looking_for($new_interval); - } - return $args{options}->{$looking_for} = $new_interval; - } - else { - die("invalid timeout specification"); + die "invalid timeout specification" unless looks_like_number($new_interval); + + if ( $args{options}->{obj}->can($looking_for) ) { + $args{options}->{obj}->$looking_for($new_interval); } + $args{options}->{OptionParser}->set($looking_for_o, $new_interval); + return $new_interval; }; } sub get_new_regex_for { my ($looking_for, $message) = @_; + (my $looking_for_o = $looking_for) =~ s/_.*$/s/; return sub { - my (%args) = @_; - my $new_regex = get_input($message); + my (%args) = @_; + my $new_regex = get_blocking_input($message); local $EVAL_ERROR; if ( $new_regex && (my $re = eval { qr/$new_regex/i }) ) { - $args{options}->{$looking_for} = $re; + $args{options}->{obj}->$looking_for( $re ); + $args{options}->{OptionParser}->set($looking_for_o, $new_regex); } elsif ( !$EVAL_ERROR && !$new_regex ) { # This might seem weird, but an empty pattern is # somewhat magical, and basically just asking for trouble. # Instead we give them what awk would, a pattern that always # matches. - $args{options}->{$looking_for} = qr/(?=)/; + $args{options}->{obj}->$looking_for( qr/(?=)/ ); + $args{options}->{OptionParser}->set($looking_for_o, ''); } else { - die("invalid regex specification: $EVAL_ERROR"); + die "invalid regex specification: $EVAL_ERROR"; } - $args{options}->{obj}->$looking_for( $args{options}->{$looking_for} ); + return; }; } diff --git a/lib/ReadKeyMini.pm b/lib/ReadKeyMini.pm index 2fe94116..182f2486 100644 --- a/lib/ReadKeyMini.pm +++ b/lib/ReadKeyMini.pm @@ -39,7 +39,6 @@ use strict; use English qw(-no_match_vars); use constant MKDEBUG => $ENV{MKDEBUG} || 0; -use Carp qw( croak ); use POSIX qw( :termios_h ); use base qw( Exporter ); @@ -52,13 +51,15 @@ BEGIN { Term::ReadKey->import(@EXPORT_OK); } else { + # If we don't have Term::ReadKey, fake it. We clobber our own glob, + # ReadKeyMini::Function, and the Term::ReadKey glob, so callers can + # both import it if requested, or even use the fully-qualified name + # without issues. *ReadMode = *Term::ReadKey::ReadMode = \&_ReadMode; *GetTerminalSize = *Term::ReadKey::GetTerminalSize = \&_GetTerminalSize; } } -our $VERSION = '0.01'; - my %modes = ( original => 0, restore => 0, @@ -76,21 +77,20 @@ my %modes = ( my $fd_stdin = fileno(STDIN); my $term = POSIX::Termios->new(); $term->getattr($fd_stdin); - my $oterm = $term->getlflag(); - - my $echo = ECHO | ECHOK | ICANON; - my $noecho = $oterm & ~$echo; + my $oterm = $term->getlflag(); + my $echo = ECHO | ECHOK | ICANON; + my $noecho = $oterm & ~$echo; sub _ReadMode { my $mode = $modes{ $_[0] }; if ( $mode == $modes{normal} ) { - cooked(); + cooked(); } elsif ( $mode == $modes{cbreak} || $mode == $modes{noecho} ) { - cbreak( $mode == $modes{noecho} ? $noecho : $oterm ); + cbreak( $mode == $modes{noecho} ? $noecho : $oterm ); } else { - croak("ReadMore('$_[0]') not supported"); + die("ReadMore('$_[0]') not supported"); } } @@ -112,29 +112,31 @@ my %modes = ( } sub readkey { - my $key = ''; - cbreak(); - sysread(STDIN, $key, 1); - my $timeout = 0.1; - if ( $key eq "\033" ) { # Ugly and broken hack, but good enough for the two minutes it took to write. - { - my $x = ''; - STDIN->blocking(0); - sysread(STDIN, $x, 2); - STDIN->blocking(1); - $key .= $x; - redo if $key =~ /\[[0-2](?:[0-9];)?$/ - } - } - cooked(); - return $key; + my $key = ''; + cbreak(); + sysread(STDIN, $key, 1); + my $timeout = 0.1; + if ( $key eq "\033" ) { # Ugly and broken hack, but good enough for the two minutes it took to write. + # Namely, Ctrl escapes, the F keys, and other stuff you can send from the keyboard + # take more than one "character" to represent, and wrong be wrong to break into pieces. + { + my $x = ''; + STDIN->blocking(0); + sysread(STDIN, $x, 2); + STDIN->blocking(1); + $key .= $x; + redo if $key =~ /\[[0-2](?:[0-9];)?$/ + } + } + cooked(); + return $key; } # As per perlfaq8: sub _GetTerminalSize { if ( @_ ) { - croak "My::Term::ReadKey doesn't implement GetTerminalSize with arguments"; + die "My::Term::ReadKey doesn't implement GetTerminalSize with arguments"; } eval { require 'sys/ioctl.ph' }; if ( !defined &TIOCGWINSZ ) { @@ -147,10 +149,10 @@ sub _GetTerminalSize { : 0x40087468; }; } - open( TTY, "+<", "/dev/tty" ) or croak "No tty: $OS_ERROR"; + open( TTY, "+<", "/dev/tty" ) or die "No tty: $OS_ERROR"; my $winsize = ''; unless ( ioctl( TTY, &TIOCGWINSZ, $winsize ) ) { - croak sprintf "$0: ioctl TIOCGWINSZ (%08x: $OS_ERROR)\n", &TIOCGWINSZ; + die sprintf "$0: ioctl TIOCGWINSZ (%08x: $OS_ERROR)\n", &TIOCGWINSZ; } my ( $row, $col, $xpixel, $ypixel ) = unpack( 'S4', $winsize ); return ( $col, $row, $xpixel, $ypixel ); diff --git a/lib/pt_diskstats.pm b/lib/pt_diskstats.pm index 80160033..6f3ee81b 100644 --- a/lib/pt_diskstats.pm +++ b/lib/pt_diskstats.pm @@ -1,4 +1,12 @@ { +# ########################################################################### +# This is a combination of modules and programs in one -- a runnable module. +# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last +# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition. +# +# Check at the end of this package for the call to main() which actually runs +# the program. +# ########################################################################### package pt_diskstats; use strict; @@ -17,27 +25,36 @@ local $SIG{__DIE__} = sub { } if MKDEBUG; sub main { - shift; local @ARGV = @_; # set global ARGV for this package # ######################################################################## # Get configuration information. # ######################################################################## - my $o = OptionParser->new( file => __FILE__ ); + my $o = new OptionParser file => __FILE__; $o->get_specs(); $o->get_opts(); - # Interactive mode. Delegate to Diskstats::Menu - return DiskstatsMenu->run_interactive( o => $o, filename => $ARGV[0] ); + my $diskstats = new DiskstatsMenu; + + # Interactive mode. Delegate to DiskstatsMenu::run_interactive + return $diskstats->run_interactive( OptionParser => $o, filename => $ARGV[0] ); } # Somewhat important if STDOUT is tied to a terminal. END { close STDOUT or die "Couldn't close stdout: $OS_ERROR" } -__PACKAGE__->main(@ARGV) unless caller; +# ############################################################################ +# Run the program. +# ############################################################################ +if ( !caller ) { exit main(@ARGV); } 1; } + +# ############################################################################# +# Documentation. +# ############################################################################# + =pod =head1 NAME @@ -247,6 +264,12 @@ type: int When in interactive mode, stop after N samples. +=item --redisplay-interval + +type: int; default: 1 + +When in interactive mode, wait N seconds before printing to the screen. + =item --interval type: int; default: 1 @@ -257,6 +280,10 @@ Sample /proc/diskstats every N seconds. Show rows with all zero values. +=item --memory-for-speed + +XXX TODO INTERNAL yadda + =item --help Show help and exit. @@ -320,7 +347,7 @@ Replace C with the name of any tool. =head1 AUTHORS -Baron Schwartz +Baron Schwartz, Brian Fraser, and Daniel Nichter =head1 ABOUT PERCONA TOOLKIT diff --git a/t/lib/Diskstats.t b/t/lib/Diskstats.t index 8dc56267..bbf5d7ba 100644 --- a/t/lib/Diskstats.t +++ b/t/lib/Diskstats.t @@ -22,14 +22,18 @@ BEGIN { use_ok "DiskstatsGroupBySample"; } +sub FakeParser::get {}; + { -my $obj = new_ok("Diskstats"); +my $o = bless {}, "FakeParser"; + +my $obj = new_ok(Diskstats => [OptionParser => $o]); can_ok( $obj, qw( out_fh column_regex device_regex filename - block_size sorted_devs clear_state clear_sorted_devs - stats_for previous_stats_for first_stats_for + block_size ordered_devs clear_state clear_ordered_devs + stats_for prev_stats_for first_stats_for has_stats design_print_formats parse_diskstats_line parse_from print_deltas ) ); @@ -47,7 +51,7 @@ for my $attr ( ) { my $attribute = $attr->[0]; my $value = $attr->[1]; - my $test_obj = Diskstats->new( @$attr ); + my $test_obj = Diskstats->new( @$attr, OptionParser => $o ); is( $test_obj->$attribute(), @@ -80,8 +84,8 @@ my %expected_results = ( 'ms_spent_doing_io' => 6869437, 'ms_weighted' => 20744582, - 'ttbyt' => 434566047232, - 'ttreq' => 20139567, + 'ios_requested' => 20139567, + 'ios_in_bytes' => 434566047232, ); # Copypasted from Diskstats.pm. If the one in there changes so should this. @@ -107,7 +111,8 @@ my ($dev, $res) = $obj->parse_diskstats_line($line, $obj->block_size); is_deeply( $res, \%expected_results, "parse_diskstats_line works" ); -$obj->column_regex(qr/./); + # qr/ \A (?!.*io_s$|\s*[qs]time$) /x +$obj->column_regex(qr/cnc|rt|busy|prg|[mk]b|[dr]_s|mrg/); my ($header, $rest, $cols) = $obj->design_print_formats(); is($header, join(" ", q{%5s %-6s}, map { $_->[0] } @columns_in_order), "design_print_formats: sanity check for defaults"); @@ -128,7 +133,7 @@ is( "design_print_formats respects column_regex" ); -$obj->column_regex(qr//); +$obj->column_regex(qr/./); ($header, $rest, $cols) = $obj->design_print_formats( max_device_length => 10, columns => [] @@ -160,7 +165,7 @@ throws_ok( sub { $obj->design_print_formats( columns => {} ) }, qr/The columns argument to design_print_formats should be an arrayref/, "design_print_formats dies when passed an invalid columns argument"); -for my $meth ( qw( current_ts previous_ts first_ts ) ) { +for my $meth ( qw( curr_ts prev_ts first_ts ) ) { ok(!$obj->$meth(), "Diskstats->$meth is initially false"); $obj->$meth(10); @@ -180,34 +185,34 @@ is($obj->out_fh(), \*STDOUT, "and if we close the set filehandle, it reverts to is_deeply( - [ $obj->sorted_devs() ], + [ $obj->ordered_devs() ], [], - "sorted_devs starts empty" + "ordered_devs starts empty" ); -$obj->add_sorted_devs("sda"); +$obj->add_ordered_dev("sda"); is_deeply( - [ $obj->sorted_devs() ], + [ $obj->ordered_devs() ], [ qw( sda ) ], "We can add devices just fine," ); -$obj->add_sorted_devs("sda"); +$obj->add_ordered_dev("sda"); is_deeply( - [ $obj->sorted_devs() ], + [ $obj->ordered_devs() ], [ qw( sda ) ], "...And duplicates get detected and discarded" ); -$obj->clear_sorted_devs(); +$obj->clear_ordered_devs(); is_deeply( - [ $obj->sorted_devs() ], + [ $obj->ordered_devs() ], [], - "clear_sorted_devs does as advertized," + "clear_ordered_devs does as advertized," ); -$obj->add_sorted_devs("sda"); +$obj->add_ordered_dev("sda"); is_deeply( - [ $obj->sorted_devs() ], + [ $obj->ordered_devs() ], [ qw( sda ) ], "...And clears the internal duplicate-checking list" ); @@ -252,14 +257,20 @@ is( "INTERNAL: _print_header works" ); -throws_ok( +my $output = output( sub { $obj->parse_from_data( "ASMFHNASJNFASKLFLKHNSKD" ); }, + stderr => 1, +); + +like( + $output, qr/isn't in the diskstats format/, "->parse_from and friends fail on malformed data" ); } # Common tests for all three subclasses +my $o = bless {}, "FakeParser"; for my $test ( { class => "DiskstatsGroupByAll", @@ -276,11 +287,11 @@ for my $test ( method => "group_by_sample", results_file_prefix => "sample", }) { - my $obj = $test->{class}->new(); + my $obj = $test->{class}->new(OptionParser => $o, filter_zeroed_rows => 0); my $method = $test->{method}; my $prefix = $test->{results_file_prefix}; - $obj->column_regex(qr/ ^ (?!.*io_s$|\s*[qs]time$) /x); + $obj->column_regex(qr/ \A (?!.*io_s$|\s*[qs]time$) /x); for my $filename ( map "diskstats-00$_.txt", 1..5 ) { my $file = File::Spec->catfile( "t", "pt-diskstats", "samples", $filename ); @@ -347,8 +358,8 @@ EOF ok(!$got, "$method: 1 line of data between two TS lines results in no output"); - $obj->current_ts(0); - $obj->previous_ts(0); + $obj->curr_ts(0); + $obj->prev_ts(0); $obj->first_ts(0); throws_ok( diff --git a/t/pt-diskstats/pt-diskstats.t b/t/pt-diskstats/pt-diskstats.t index 21ed97cc..54dd6cd4 100644 --- a/t/pt-diskstats/pt-diskstats.t +++ b/t/pt-diskstats/pt-diskstats.t @@ -43,7 +43,7 @@ for my $ext ( qw( all disk sample ) ) { tie local *STDIN, "Test"; my $file = File::Spec->catfile( $trunk, "t", "pt-diskstats", "samples", $filename ); - pt_diskstats->main( + pt_diskstats::main( "--group-by" => $ext, "--columns" => "cnc|rt|mb|busy|prg", "--zero-rows",