#!/usr/bin/env bash # This program is part of Percona Toolkit: http://www.percona.com/software/ # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal # notices and disclaimers. usage() { if [ "${OPT_ERR}" ]; then echo "${OPT_ERR}" >&2 fi echo "Usage: pt-usl [OPTIONS] [FILES]" >&2 echo "For more information, 'man pt-usl' or 'perldoc $0'." >&2 exit 1 } # Converts SHOW GLOBAL STATUS into concurrency-vs-throughput. There are # basically three things we are interested in from SHOW GLOBAL STATUS. # Questions 118357171 # Threads_running 8 # Uptime 614909 # # Command-line options (not optional) # -i The time interval over which to aggregate. # -n The number of slaves connected to the server, and # thus running Binlog Dump commands. # -m The max number of threads possible (to filter outliers). # # XXX In the future, when we see Uptime, we need to check whether it is greater # than what we saw the first time. If we see it decrease or stay the same, then # this output probably came from "mysqladmin ext -ri" For now we assume not. convert_globalstatus() { # Get the -n command-line option. for o; do case "${o}" in --) break; ;; -i) shift; INTERVAL="${1}"; shift; ;; -m) shift; THREADS_MAX="${1}"; shift; ;; -n) shift; NUM_SLAVES="${1}"; shift; ;; esac done cat > /tmp/aspersa <<-EOF BEGIN { threads = 0; threads_sum = 0; threads_max = ${THREADS_MAX}; questions = 0; start_questions = 0; samples = 0; skipped = 0; start = 0; } /Threads_running/ { # There will always be at least 1 thread running, the one doing SHOW # STATUS. And if there are slaves doing Binlog Dump, they really do # not count as concurrency, so we remove them too. threads = \$2 - 1 - ${NUM_SLAVES}; if ( threads_max > 0 && threads > threads_max ) { # We do not count these. Later we'll adjust the denominator of the # average thread count accordingly. skipped++; } else { threads_sum += threads; } } /Questions/ { questions = \$2; if ( start_questions == 0 ) { # Initial condition, runs only once. start_questions = questions; } } /Uptime/ { end = \$2; if ( start == 0 ) { # Initial condition, runs only once. start = end; } # This is where the main work takes place. We compute the concurrency # over the interval as the average of Threads_running. elapsed = end - start; if ( elapsed > 0 && elapsed >= ${INTERVAL} && samples > skipped ) { concurrency = threads_sum / (samples - skipped + 1); throughput = (questions - start_questions) / elapsed; printf "%f %f\\n", concurrency, throughput; start_questions = questions; start = end; samples = 0; skipped = 0; threads_sum = threads; if ( threads_max > 0 && threads_sum > threads_max ) { threads_sum = threads_max; } } samples++; } EOF awk -f /tmp/aspersa "$@" } # Converts tcpdump into concurrency-vs-throughput. Use a tcpdump command line # such as the following: # sudo tcpdump -s 384 -i any -nnq -tttt -c 10 'tcp port 3306 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)' # # Command-line options (not optional) # -P The TCP port that the server is listening on. convert_tcpdump() { # For ease of testing, this process is split into two parts, and this # function is just a wrapper around helpers that do the main work. convert_tcpdump_tabulate "$@" | convert_tcpdump_tabulated_to_1sec } # Accepts the same input as convert_tcpdump(). Output fields: # 1 - date # 2 - time # 3 - timestamp as a number (NOTE: doesn't handle wrapping past midnight) # 4 - client IP address and port number # 5 - response time # 6 - number of requests still pending / in progress # 7 - total time spent serving requests (increases while #6 is nonzero) # 8 - weighted time spent serving requests (incremented by #6 * elapsed) convert_tcpdump_tabulate() { # Get the command-line options. for o; do case "${o}" in --) break; ;; -P) shift; PORT="${1}"; shift; ;; esac done cat > /tmp/aspersa-convert_tcpdump_tabulate <<-EOF BEGIN { watch_port = ${PORT}; from_pat = "[.]" watch_port "$"; # Matches IP.port combination to_pat = "[.]" watch_port ":$"; pending = 0; # The number of requests that haven't been replied yet. # Other global variables: # current # The queries that have been sent to the server. # last_ts # Used for computing weighted time spent doing queries. # busy # Like Field 10 in /proc/diskstats: increases if pending > 0. # weighted # The time spent doing IO, like Field 11 in /proc/diskstats. } # Ignore any zero-length, it's usually just an ack or something. # ts = \$1 \$2 # from = \$4 # to = \$6 \$NF > 0 { ts = (3600 * substr(\$2, 1, 2)) + (60 * substr(\$2, 4, 2)) + substr(\$2, 7); # Packets from the client. if ( \$6 ~ to_pat ) { client = \$4; if ( !current[client] ) { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: new request from '%s' at line %d\\n", client, NR; } current[client] = ts; if ( last_ts ) { elapsed = ts - last_ts; if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: weighted (%.6f) += %.6f\\n", weighted, elapsed; } weighted += pending * elapsed; if ( pending > 0 ) { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: busy (%.6f) += %.6f\\n", busy, elapsed; } busy += (ts - last_ts); } } last_ts = ts; pending++; } else if ( ${MKDEBUG:-0} > 0 ) { print "MKDEBUG: existing request for client"; } } # Packets from the server we're watching to the client. else if ( \$4 ~ from_pat ) { client = substr(\$6, 1, length(\$6) - 1); if ( current[client] ) { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: reply to '%s' at line %d\\n", client, NR; } rt = ts - current[client]; weighted += pending * (ts - last_ts); busy += (ts - last_ts); last_ts = ts; pending--; delete current[client]; printf("%s %s %12.6f %-21s %10.6f %3d %10.6f %10.6f\\n", \$1, \$2, ts, client, rt, pending, busy, weighted); } else if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: reply to '%s' at line %d (DNE)\\n", client, NR; } } } END { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: %d sessions currently open\\n", pending; for (c in current) { if ( current[c] ) { printf "MKDEBUG: client '%s' started %.6f\\n", c, current[c]; } } } } EOF awk -f /tmp/aspersa-convert_tcpdump_tabulate "$@" } # Takes in the output of convert_tcpdump_tabulate() and outputs one line per # second, showing the concurrency and throughput for that second. convert_tcpdump_tabulated_to_1sec() { cat > /tmp/aspersa-convert_tcpdump_tabulated_to_1sec <<-EOF { if ( !ts ) { # Initial condition ts = \$3; # timestamp ct = 0; # count of queries bt = \$7; # busy time wt = \$8; # weighted busy time } if ( \$3 >= ts + 1 ) { concurrency = (\$8 - wt) / (\$7 - bt); throughput = ct / (\$7 - bt); printf "%.6f %.6f %s %s\\n", concurrency, throughput, \$1, \$2; ts = \$3; # timestamp ct = 0; # count of queries bt = \$7; # busy time wt = \$8; # weighted busy time } else { ct++; } } EOF awk -f /tmp/aspersa-convert_tcpdump_tabulated_to_1sec "$@" } # To find the deviation from linearity, we have to find the C(N) for N=1 (or if # 1 is not available, then we interpolate, which may require human judgment, as # in Gunther p.94). We take an average. Input columns must be N and C # (throughput). The command-line parameter is the N value we wish to look for. find_C_of_one() { cat > /tmp/aspersa <<-EOF /^[^#]/ { if ( \$1 == $1 ) { count++; sum += \$2; } } END { if ( count > 0 ) { print (sum/count) / $1; } else { print 0; } } EOF awk -f /tmp/aspersa "$2" } # The main code that runs by default. Arguments are the command-line options. main() { echo "# Command-line: $0 $@" # Get command-line options. for o; do case "${o}" in --) break; ;; -a) shift; X_AXIS="node count"; ;; -c) shift; CONV="${1}"; shift; ;; -d) DEL="0"; shift; ;; -e) shift; ERRL="1"; ;; -i) shift; INT="${1}"; shift; ;; -k) shift; KEEP="${1}"; shift; ;; -l) shift; LIM="${1}"; shift; ;; -L) shift; LTY="${1}"; shift; ;; -m) shift; MXT="${1}"; shift; ;; -n) shift; ADJ="${1}"; shift; ;; -o) shift; ONLY="${1}"; shift; ;; -p) shift; PRE="${1}"; shift; ;; -P) shift; PORT="${1}"; shift; ;; -r) shift; COLOR="color"; ;; -R) shift; REFIT=0; ;; -t) shift; TYP="${1}"; shift; ;; -T) shift; PTY="${1}"; shift; ;; -x) shift; XAD="${1}"; shift; ;; -X) shift; FITC1=", C1"; ;; -*) OPT_ERR="Unknown option '${o}'." usage 1 ;; esac done ERRL=${ERRL:-0} ADJ=${ADJ:-0} DEL=${DEL:-1} INT=${INT:-0} MXT=${MXT:-0} TYP=${TYP:-png} XAD=${XAD:-1} FILE="${KEEP:-/tmp/aspersa-N-C}" EXT="${TYP}" LIM="${LIM:-0}" X_AXIS="${X_AXIS:-concurrency}" PORT="${PORT:-3306}" PTY="${PTY:-6}" REFIT="${REFIT:-1}" FITC1="${FITC1:-}" if [ "${COLOR}" ]; then LTY="${LTY:-lt rgb \"#8B0000\"}" BLUE=" lt rgb \"#0000FF\""; fi # ###################################################################### # Set up the gnuplot instructions for filetype. # ###################################################################### case "${TYP}" in png) GNUPLOT_TERM="set terminal ${TYP}" GNUPLOT_SIZE="" GNUPLOT_SIGMA='sigma' GNUPLOT_KAPPA='kappa' ;; eps|pdf) # We make PDFs as EPSs and then convert them. GNUPLOT_TERM="set terminal postscript eps enhanced ${COLOR} solid" GNUPLOT_SIZE="set size 0.6, 0.6" GNUPLOT_SIGMA='{/Symbol s}' GNUPLOT_KAPPA='{/Symbol k}' if [ "${TYP}" = "pdf" ]; then EXT="eps" fi ;; *) echo "Unknown -t value ${TYP}" usage exit 1 ;; esac GNUPLOT_LABEL="set xlabel 'N (${X_AXIS})'; set ylabel 'C (throughput)'" # ###################################################################### # Convert the input file if needed. # ###################################################################### if [ "${CONV}" ]; then case "${CONV}" in globalstatus) convert_globalstatus -i ${INT} -m ${MXT} -n ${ADJ} "$@" \ > "${FILE}" ;; tcpdump) convert_tcpdump -P $PORT "$@" > "${FILE}" ;; *) echo "Unknown -c value ${CONV}" usage exit 1 esac else cat "$@" > "${FILE}" fi # ###################################################################### # From here on, we have an input file with N in the first column, # and C in the second column. # ###################################################################### # We need to find some data points such as C(1) for subsequent operations. min_N=$(awk 'BEGIN{min=999999}/^[^#]/{if($10){min=$1}}END{print min}' \ "${FILE}"); max_N=$(awk '/^[^#]/{if($1>max){max=$1}}END{print max}' "${FILE}"); max_C=$(awk '/^[^#]/{if($2>max){max=$2}}END{print max}' "${FILE}"); # Find C(1) if it exists, else use C(min(N)). N_one=$(awk '{if($1 == 1) {print 1; exit}}' "${FILE}"); if [ "$N_one" = "1" ]; then C_of_one="$(find_C_of_one 1 "${FILE}")" else C_of_one="$(find_C_of_one ${min_N} "${FILE}")" fi echo "# Using $(gnuplot -V)" echo "# Parameters to the model:" echo "min(N) ${min_N}" echo "max(N) ${max_N}" echo "max(C) ${max_C}" echo "C(1) ${C_of_one} (pre-adjustment by ${XAD})" echo "N=1 ??? ${N_one:-no}" # ###################################################################### # Use gnuplot to 1) find the scalability law parameters and 2) plot the # original data with the model. We have to go about the fitting in a funny # way, because gnuplot can get stuck at a local maximum with the fitting if # we don't give it good starting parameters. So we use the quadratic # equation to figure out the starting parameters, and then refit with the USL # equation for the final result. This results in a better fit. # ###################################################################### # Sets up parameters. We'll always load this file. if [ "${XAD}" != "1" ]; then echo "# Adjusting C(1) by ${XAD}" C_of_one="$(echo ${C_of_one} | awk "{print \$1 * ${XAD}}")"; fi cat > /tmp/aspersa-gnuplot0 <<-EOF ${GNUPLOT_TERM} ${GNUPLOT_SIZE} ${GNUPLOT_LABEL} C1 = ${C_of_one} max_N = ${max_N} max_C = ${max_C} max(a,b) = (a > b) ? a : b min(a,b) = (a > b) ? b : a f(x) = a*x*x + b*x g(x) = x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) EOF # Plot the efficiency relative to C(1). cat > /tmp/aspersa-gnuplot1 <<-EOF set output "${PRE}usl-efficiency.${EXT}" set ylabel "Relative Efficiency" plot 1 title 'Unity', \\ "${FILE}" using 1:(\$2/C1/\$1) pt ${PTY}${BLUE} title 'Computed Efficiency' EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Fit the deviation from linearity relative to C(1). echo "# Fitting the transformed data against a 2nd-degree polynomial." cat > /tmp/aspersa-gnuplot1 <<-EOF set fit logfile "/dev/null" # defines a_err and b_err set fit errorvariables fit f(x) '${FILE}' using (\$1-1):(\$1/(\$2/C1)-1) via a, b EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-qfit.log # If everything went OK, then the fit should have converged. if ! grep 'the fit converged' /tmp/aspersa-qfit.log >/dev/null ; then echo "The quadratic regression failed, check /tmp/aspersa-qfit.log" exit 1; fi # Now we can check the log and extract the parameters from it. sed -n -e '/the fit converged/,/^b /p' /tmp/aspersa-qfit.log > /tmp/aspersa.params PARAM_A="$(awk '/^a / { print $3 }' /tmp/aspersa.params)" PARAM_A_ERR="$(awk '/^a / { print $5 }' /tmp/aspersa.params)" PARAM_A_PCT="$(awk '/^a / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_B="$(awk '/^b / { print $3 }' /tmp/aspersa.params)" PARAM_B_ERR="$(awk '/^b / { print $5 }' /tmp/aspersa.params)" PARAM_B_PCT="$(awk '/^b / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" echo " a ${PARAM_A} +/- ${PARAM_A_ERR} (${PARAM_A_PCT}%)" echo " b ${PARAM_B} +/- ${PARAM_B_ERR} (${PARAM_B_PCT}%)" # Now find the coefficient of determination, R^2. Although gnuplot # documentation says it is not the best metric, it's the one everyone is used # to, and it needs to be on the graph. FYI, gnuplot prints the # sum_of_squared_errors as the "final sum of squares of residuals." cat > /tmp/aspersa <<-EOF /^[^#]/{ x = \$1 - 1; y = \$1 / (\$2/${C_of_one}) -1; f = ${PARAM_A}*x*x + ${PARAM_B}*x; sum_of_squares += y * y; sum_of_squared_errors += (f - y) * (f - y); } END { print 1 - (sum_of_squared_errors / sum_of_squares); } EOF R_SQUARED="$(awk -f /tmp/aspersa "${FILE}")" echo " R^2 ${R_SQUARED}" cat > /tmp/aspersa-gnuplot1 <<-EOF set output "${PRE}usl-deviation.${EXT}" set ylabel "Deviation From Linearity" set label "R^2 = ${R_SQUARED}" at graph .1, .5 set label "b = ${PARAM_B}" at graph .1, .57 set label "a = ${PARAM_A}" at graph .1, .64 set key left a = ${PARAM_A} b = ${PARAM_B} plot f(x) title 'Modeled' w lines${BLUE}, \\ "${FILE}" using (\$1-1):(\$1/(\$2/C1)-1) pt ${PTY} ${LTY} title 'Measured' # '' using (\$1-1):(\$1/(\$2/C1)):1 w labels EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Plot the residual errors to look for a pattern in them. cat > /tmp/aspersa-gnuplot1 <<-EOF set key off set ylabel "Residual Errors" set output "${PRE}usl-quadratic-residuals.${EXT}" a = ${PARAM_A} b = ${PARAM_B} plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1)) pt ${PTY}${BLUE} EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Plot the residuals squared, for finding outliers and removing them. cat > /tmp/aspersa-gnuplot1 <<-EOF set key off set ylabel "Residual Squared" set output "${PRE}usl-quadratic-squared.${EXT}" a = ${PARAM_A} b = ${PARAM_B} plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 pt ${PTY}${BLUE}, \\ '' using 1:((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 w labels EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 if [ "${REFIT}" = "1" ]; then # Re-fit against the Universal Scalability Law -- it will be more # accurate. Put the C(1) point into a new file for this purpose. echo "# Re-fitting against the USL with (a, b-a) as a starting point." echo "# Treating (1, ${C_of_one}) as a point in original measurements." echo "1 ${C_of_one}" > /tmp/aspersa-usl-input-extended cat "${FILE}" >> /tmp/aspersa-usl-input-extended cat > /tmp/aspersa-gnuplot1 <<-EOF a = ${PARAM_A} b = ${PARAM_B} kappa = a sigma = b - kappa set fit logfile "/dev/null" # defines sigma_err and kappa_err set fit errorvariables fit g(x) '/tmp/aspersa-usl-input-extended' using 1:2 via sigma, kappa${FITC1} EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-ufit.log # If everything went OK, then the fit should have converged. if ! grep 'the fit converged' /tmp/aspersa-ufit.log >/dev/null ; then echo "The USL regression failed, check /tmp/aspersa-ufit.log" exit 1; fi # Now check the log again, and extract the parameters from it. if [ "${FITC1}" ]; then sed -n -e '/the fit converged/,/^C1 /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)" PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)" PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)" PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)" PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_C_ONE="$(awk '/^C1 / { print $3 }' /tmp/aspersa.params)" PARAM_C_ONE_ERR="$(awk '/^C1 / { print $5 }' /tmp/aspersa.params)" PARAM_C_ONE_PCT="$(awk '/^C1 / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)" echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)" echo " C(1) ${PARAM_C_ONE} +/- ${PARAM_C_ONE_ERR} (${PARAM_C_ONE_PCT}%)" else sed -n -e '/the fit converged/,/^kappa /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)" PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)" PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)" PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)" PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_C_ONE="${C_of_one}" echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)" echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)" echo " C(1) ${PARAM_C_ONE} (not a regression parameter)" fi # Now find the coefficient of determination again, this time against the # USL model. cat > /tmp/aspersa <<-EOF /^[^#]/{ x = \$1; y = \$2; g = x*${PARAM_C_ONE}/(1+${PARAM_SIGMA}*(x-1) + ${PARAM_KAPPA}*x*(x-1)); sum_of_squares += y * y; sum_of_squared_errors += (g - y) * (g - y); } END { print 1 - (sum_of_squared_errors / sum_of_squares); } EOF R_SQUARED="$(awk -f /tmp/aspersa "/tmp/aspersa-usl-input-extended")" echo " R^2 ${R_SQUARED}" # Plot the USL residuals squared, for finding outliers and removing them. cat > /tmp/aspersa-gnuplot1 <<-EOF set key off set ylabel "Residual Errors Squared" set output "${PRE}usl-residuals-squared.${EXT}" sigma = ${PARAM_SIGMA} kappa = ${PARAM_KAPPA} plot "/tmp/aspersa-usl-input-extended" using 1:((g(\$1)-\$2)**2) pt ${PTY}${BLUE}, \\ '' using (\$1+1):((g(\$1)-\$2)**2):1 w labels EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 else # We need to set the PARAM_SIGMA and PARAM_KAPPA stuff. PARAM_C_ONE="${C_of_one}" echo | awk "BEGIN{ a = ${PARAM_A}; a_err = ${PARAM_A_ERR}; b = ${PARAM_B}; b_err = ${PARAM_B_ERR}; printf \"PARAM_SIGMA=%.6f\\n\", b - a; printf \"PARAM_KAPPA=%.6f\\n\", a; print \"PARAM_SIGMA_ERR=0\"; print \"PARAM_SIGMA_PCT=0\"; print \"PARAM_KAPPA_ERR=0\"; print \"PARAM_KAPPA_PCT=0\"; }" > /tmp/aspersa . /tmp/aspersa echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)" echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)" fi # Plot the original points and the Universal Scalability Law's predictions. cat > /tmp/aspersa-gnuplot1 <<-EOF sigma = ${PARAM_SIGMA} kappa = ${PARAM_KAPPA} C1 = ${PARAM_C_ONE} sigma_best = sigma - ${PARAM_SIGMA_ERR}; sigma_worst = sigma + ${PARAM_SIGMA_ERR}; kappa_best = kappa - ${PARAM_KAPPA}; kappa_worst = kappa + ${PARAM_KAPPA_ERR}; set output "${PRE}usl-model-vs-actual.${EXT}" N_star = floor(sqrt((1-sigma)/kappa)) modelC = N_star*C1 / (1 + sigma*(N_star-1) + kappa*N_star*(N_star-1)) set label sprintf("Peak capacity is C=%d at N=%d", modelC, N_star) at graph 0.1, 0.9 set label sprintf('${GNUPLOT_SIGMA} = %f', sigma) at graph .5, .3 set label sprintf('${GNUPLOT_KAPPA} = %f', kappa) at graph .5, .25 set label sprintf('R^2 = %f', ${R_SQUARED}) at graph .5, .2 set key bottom xlimit = ${LIM} if ( xlimit == 0 ) xlimit = max(N_star, max_N) * 2 set xrange [0.0:xlimit] set yrange [0:max(max_C, modelC * 1.3)] if ( ${ERRL} == 1 ) plot \\ x*C1/(1+sigma_best*(x-1) + kappa_best*x*(x-1)) title '',\\ x*C1/(1+sigma_worst*(x-1) + kappa_worst*x*(x-1)) title '',\\ x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\ "${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured' if ( ${ERRL} == 0 ) plot \\ x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\ "${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured' EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Remove files not mentioned in -o. if [ "${ONLY}" ]; then for f in deviation efficiency quadratic-residuals residuals-squared \ quadratic-squared model-vs-actual; do if [[ "${ONLY}" != *$f* ]]; then file="${PRE}usl-${f}.${EXT}" rm -f "${file}" fi done fi # The type "pdf" is really just eps, and now we need to convert it to pdf. if [ "${TYP}" = "pdf" ]; then for f in deviation efficiency quadratic-residuals residuals-squared \ quadratic-squared model-vs-actual; do file="${PRE}usl-${f}.eps" if [ -e "${file}" ]; then epstopdf "${file}" rm -f "${file}" fi done fi # Remove temp files. if [ "${DEL}" = "1" ]; then rm -f /tmp/aspersa{,-N-C,.params,-gnuplot0,-gnuplot1,-qfit.log,-ufit.log,-usl-input-extended} fi } # Execute the program if it was not included from another file. This makes it # possible to include without executing, and thus test. if [ "$(basename "$0")" = "usl" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then main "$@" fi # ############################################################################ # Documentation # ############################################################################ :<<'DOCUMENTATION' =pod =head1 NAME pt-usl - Model Universal Scalability Law. =head1 SYNOPSIS Usage: pt-usl [OPTIONS] [FILES] =head1 DESCRIPTION This tool is based on Neil Gunther's book Guerrilla Capacity Planning. It expects as input a file with columns N (load; concurrency; independentvar) and C (throughput). The file may contain comment lines beginning with the # character. The tool outputs .png images with the deviation, efficiency, residuals, and model-vs-actual data. It also prints out information from the process of fitting the curve to the data, such as gnuplot's error estimates. =head1 OPTIONS Options must precede files on the command line. -a Set X-axis label to 'node count' (default is concurrency). -c CONVERSION Converts the input file into N-vs-C format as specified: globalstatus Convert from MySQL's SHOW GLOBAL STATUS tcpdump Convert from 'tcpdump -tttt -nnq' format -d Don't delete the gnuplot files used to generate the charts. -e Draw error lines on the final plot. -i INTERVAL When -c is given, group the input into -i second intervals. -k KEEPFILE Save the N-vs-C data in the specified file. -l LIMIT X-axis limit for the final plot. -L COLOR The color for plotting points (default lt rgb "#8B0000"). -m THREADS When -c is given, max valid value of N, to filter outliers. -n ADJUSTMENT Adjust the N variable downwards to compensate for known errors such as error of observation. -o ONLY Only produce plots specified in this comma-separated list. -p PREFIX Prefix for the generated image file names. -P PORT TCP port for when -c tcpdump is used (default 3306). -r Render pdf and eps plots in color. -R Don't re-fit the data; use the results from quadratic fit. -t FILETYPE Type for the image files (png, pdf, eps). -T POINTTYPE Point-type and options for the plots (default 6). -x ADJUSTMENT Multiply the C(1) regression parameter by this factor. -X Include C(1) as a fit parameter for USL regression. =head1 TODO * Need to make it optionally make logarithmic X axis graph. Also, apply -i and -n and so on in the main body, not in the converter itself, so that I can convert a file and then manipulate it separately. * I want it to entirely skip samples that have too-large concurrency, as defined by -m. I don't want it to just average the concurrency across the other samples; it will introduce skew into the throughput for that sample, too. =head1 ENVIRONMENT This tool does not use any environment variables. =head1 SYSTEM REQUIREMENTS This tool requires Bash v3 or newer and gnuplot. =head1 BUGS For a list of known bugs, see L. Please report bugs at L. Include the following information in your bug report: =over =item * Complete command-line used to run the tool =item * Tool L<"--version"> =item * MySQL version of all servers involved =item * Output from the tool including STDERR =item * Input files (log/dump/config files, etc.) =back If possible, include debugging output by running the tool with C; see L<"ENVIRONMENT">. =head1 DOWNLOADING Visit L to download the latest release of Percona Toolkit. Or, get the latest release from the command line: wget percona.com/get/percona-toolkit.tar.gz wget percona.com/get/percona-toolkit.rpm wget percona.com/get/percona-toolkit.deb You can also get individual tools from the latest release: wget percona.com/get/TOOL Replace C with the name of any tool. =head1 AUTHORS Baron Schwartz =head1 ABOUT PERCONA TOOLKIT This tool is part of Percona Toolkit, a collection of advanced command-line tools developed by Percona for MySQL support and consulting. Percona Toolkit was forked from two projects in June, 2011: Maatkit and Aspersa. Those projects were created by Baron Schwartz and developed primarily by him and Daniel Nichter, both of whom are employed by Percona. Visit L for more software developed by Percona. =head1 COPYRIGHT, LICENSE, AND WARRANTY This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. Feedback and improvements are welcome. THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2; OR the Perl Artistic License. On UNIX and similar systems, you can issue `man perlgpl' or `man perlartistic' to read these licenses. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. =head1 VERSION Percona Toolkit v1.0.0 released 2011-08-01 =cut DOCUMENTATION