Files
percona-toolkit/bin/pt-usl

888 lines
31 KiB
Bash
Executable File

#!/usr/bin/env bash
# This program is part of Percona Toolkit: http://www.percona.com/software/
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
# notices and disclaimers.
usage() {
if [ "${OPT_ERR}" ]; then
echo "${OPT_ERR}" >&2
fi
echo "Usage: pt-usl [OPTIONS] [FILES]" >&2
echo "For more information, 'man pt-usl' or 'perldoc $0'." >&2
exit 1
}
# Converts SHOW GLOBAL STATUS into concurrency-vs-throughput. There are
# basically three things we are interested in from SHOW GLOBAL STATUS.
# Questions 118357171
# Threads_running 8
# Uptime 614909
#
# Command-line options (not optional)
# -i The time interval over which to aggregate.
# -n The number of slaves connected to the server, and
# thus running Binlog Dump commands.
# -m The max number of threads possible (to filter outliers).
#
# XXX In the future, when we see Uptime, we need to check whether it is greater
# than what we saw the first time. If we see it decrease or stay the same, then
# this output probably came from "mysqladmin ext -ri" For now we assume not.
convert_globalstatus() {
# Get the -n command-line option.
for o; do
case "${o}" in
--)
break;
;;
-i)
shift; INTERVAL="${1}"; shift;
;;
-m)
shift; THREADS_MAX="${1}"; shift;
;;
-n)
shift; NUM_SLAVES="${1}"; shift;
;;
esac
done
cat > /tmp/aspersa <<-EOF
BEGIN {
threads = 0;
threads_sum = 0;
threads_max = ${THREADS_MAX};
questions = 0;
start_questions = 0;
samples = 0;
skipped = 0;
start = 0;
}
/Threads_running/ {
# There will always be at least 1 thread running, the one doing SHOW
# STATUS. And if there are slaves doing Binlog Dump, they really do
# not count as concurrency, so we remove them too.
threads = \$2 - 1 - ${NUM_SLAVES};
if ( threads_max > 0 && threads > threads_max ) {
# We do not count these. Later we'll adjust the denominator of the
# average thread count accordingly.
skipped++;
}
else {
threads_sum += threads;
}
}
/Questions/ {
questions = \$2;
if ( start_questions == 0 ) { # Initial condition, runs only once.
start_questions = questions;
}
}
/Uptime/ {
end = \$2;
if ( start == 0 ) { # Initial condition, runs only once.
start = end;
}
# This is where the main work takes place. We compute the concurrency
# over the interval as the average of Threads_running.
elapsed = end - start;
if ( elapsed > 0 && elapsed >= ${INTERVAL} && samples > skipped ) {
concurrency = threads_sum / (samples - skipped + 1);
throughput = (questions - start_questions) / elapsed;
printf "%f %f\\n", concurrency, throughput;
start_questions = questions;
start = end;
samples = 0;
skipped = 0;
threads_sum = threads;
if ( threads_max > 0 && threads_sum > threads_max ) {
threads_sum = threads_max;
}
}
samples++;
}
EOF
awk -f /tmp/aspersa "$@"
}
# Converts tcpdump into concurrency-vs-throughput. Use a tcpdump command line
# such as the following:
# sudo tcpdump -s 384 -i any -nnq -tttt -c 10 'tcp port 3306 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)'
#
# Command-line options (not optional)
# -P The TCP port that the server is listening on.
convert_tcpdump() {
# For ease of testing, this process is split into two parts, and this
# function is just a wrapper around helpers that do the main work.
convert_tcpdump_tabulate "$@" | convert_tcpdump_tabulated_to_1sec
}
# Accepts the same input as convert_tcpdump(). Output fields:
# 1 - date
# 2 - time
# 3 - timestamp as a number (NOTE: doesn't handle wrapping past midnight)
# 4 - client IP address and port number
# 5 - response time
# 6 - number of requests still pending / in progress
# 7 - total time spent serving requests (increases while #6 is nonzero)
# 8 - weighted time spent serving requests (incremented by #6 * elapsed)
convert_tcpdump_tabulate() {
# Get the command-line options.
for o; do
case "${o}" in
--)
break;
;;
-P)
shift; PORT="${1}"; shift;
;;
esac
done
cat > /tmp/aspersa-convert_tcpdump_tabulate <<-EOF
BEGIN {
watch_port = ${PORT};
from_pat = "[.]" watch_port "$"; # Matches IP.port combination
to_pat = "[.]" watch_port ":$";
pending = 0; # The number of requests that haven't been replied yet.
# Other global variables:
# current # The queries that have been sent to the server.
# last_ts # Used for computing weighted time spent doing queries.
# busy # Like Field 10 in /proc/diskstats: increases if pending > 0.
# weighted # The time spent doing IO, like Field 11 in /proc/diskstats.
}
# Ignore any zero-length, it's usually just an ack or something.
# ts = \$1 \$2
# from = \$4
# to = \$6
\$NF > 0 {
ts = (3600 * substr(\$2, 1, 2)) + (60 * substr(\$2, 4, 2)) + substr(\$2, 7);
# Packets from the client.
if ( \$6 ~ to_pat ) {
client = \$4;
if ( !current[client] ) {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: new request from '%s' at line %d\\n", client, NR;
}
current[client] = ts;
if ( last_ts ) {
elapsed = ts - last_ts;
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: weighted (%.6f) += %.6f\\n", weighted, elapsed;
}
weighted += pending * elapsed;
if ( pending > 0 ) {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: busy (%.6f) += %.6f\\n", busy, elapsed;
}
busy += (ts - last_ts);
}
}
last_ts = ts;
pending++;
}
else if ( ${MKDEBUG:-0} > 0 ) {
print "MKDEBUG: existing request for client";
}
}
# Packets from the server we're watching to the client.
else if ( \$4 ~ from_pat ) {
client = substr(\$6, 1, length(\$6) - 1);
if ( current[client] ) {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: reply to '%s' at line %d\\n", client, NR;
}
rt = ts - current[client];
weighted += pending * (ts - last_ts);
busy += (ts - last_ts);
last_ts = ts;
pending--;
delete current[client];
printf("%s %s %12.6f %-21s %10.6f %3d %10.6f %10.6f\\n", \$1, \$2, ts, client, rt, pending, busy, weighted);
}
else if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: reply to '%s' at line %d (DNE)\\n", client, NR;
}
}
}
END {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: %d sessions currently open\\n", pending;
for (c in current) {
if ( current[c] ) {
printf "MKDEBUG: client '%s' started %.6f\\n", c, current[c];
}
}
}
}
EOF
awk -f /tmp/aspersa-convert_tcpdump_tabulate "$@"
}
# Takes in the output of convert_tcpdump_tabulate() and outputs one line per
# second, showing the concurrency and throughput for that second.
convert_tcpdump_tabulated_to_1sec() {
cat > /tmp/aspersa-convert_tcpdump_tabulated_to_1sec <<-EOF
{
if ( !ts ) { # Initial condition
ts = \$3; # timestamp
ct = 0; # count of queries
bt = \$7; # busy time
wt = \$8; # weighted busy time
}
if ( \$3 >= ts + 1 ) {
concurrency = (\$8 - wt) / (\$7 - bt);
throughput = ct / (\$7 - bt);
printf "%.6f %.6f %s %s\\n", concurrency, throughput, \$1, \$2;
ts = \$3; # timestamp
ct = 0; # count of queries
bt = \$7; # busy time
wt = \$8; # weighted busy time
}
else {
ct++;
}
}
EOF
awk -f /tmp/aspersa-convert_tcpdump_tabulated_to_1sec "$@"
}
# To find the deviation from linearity, we have to find the C(N) for N=1 (or if
# 1 is not available, then we interpolate, which may require human judgment, as
# in Gunther p.94). We take an average. Input columns must be N and C
# (throughput). The command-line parameter is the N value we wish to look for.
find_C_of_one() {
cat > /tmp/aspersa <<-EOF
/^[^#]/ {
if ( \$1 == $1 ) {
count++;
sum += \$2;
}
}
END {
if ( count > 0 ) {
print (sum/count) / $1;
}
else {
print 0;
}
}
EOF
awk -f /tmp/aspersa "$2"
}
# The main code that runs by default. Arguments are the command-line options.
main() {
echo "# Command-line: $0 $@"
# Get command-line options.
for o; do
case "${o}" in
--)
break;
;;
-a)
shift; X_AXIS="node count";
;;
-c)
shift; CONV="${1}"; shift;
;;
-d)
DEL="0"; shift;
;;
-e)
shift; ERRL="1";
;;
-i)
shift; INT="${1}"; shift;
;;
-k)
shift; KEEP="${1}"; shift;
;;
-l)
shift; LIM="${1}"; shift;
;;
-L)
shift; LTY="${1}"; shift;
;;
-m)
shift; MXT="${1}"; shift;
;;
-n)
shift; ADJ="${1}"; shift;
;;
-o)
shift; ONLY="${1}"; shift;
;;
-p)
shift; PRE="${1}"; shift;
;;
-P)
shift; PORT="${1}"; shift;
;;
-r)
shift; COLOR="color";
;;
-R)
shift; REFIT=0;
;;
-t)
shift; TYP="${1}"; shift;
;;
-T)
shift; PTY="${1}"; shift;
;;
-x)
shift; XAD="${1}"; shift;
;;
-X)
shift; FITC1=", C1";
;;
-*)
OPT_ERR="Unknown option '${o}'."
usage 1
;;
esac
done
ERRL=${ERRL:-0}
ADJ=${ADJ:-0}
DEL=${DEL:-1}
INT=${INT:-0}
MXT=${MXT:-0}
TYP=${TYP:-png}
XAD=${XAD:-1}
FILE="${KEEP:-/tmp/aspersa-N-C}"
EXT="${TYP}"
LIM="${LIM:-0}"
X_AXIS="${X_AXIS:-concurrency}"
PORT="${PORT:-3306}"
PTY="${PTY:-6}"
REFIT="${REFIT:-1}"
FITC1="${FITC1:-}"
if [ "${COLOR}" ]; then
LTY="${LTY:-lt rgb \"#8B0000\"}"
BLUE=" lt rgb \"#0000FF\"";
fi
# ######################################################################
# Set up the gnuplot instructions for filetype.
# ######################################################################
case "${TYP}" in
png)
GNUPLOT_TERM="set terminal ${TYP}"
GNUPLOT_SIZE=""
GNUPLOT_SIGMA='sigma'
GNUPLOT_KAPPA='kappa'
;;
eps|pdf)
# We make PDFs as EPSs and then convert them.
GNUPLOT_TERM="set terminal postscript eps enhanced ${COLOR} solid"
GNUPLOT_SIZE="set size 0.6, 0.6"
GNUPLOT_SIGMA='{/Symbol s}'
GNUPLOT_KAPPA='{/Symbol k}'
if [ "${TYP}" = "pdf" ]; then
EXT="eps"
fi
;;
*)
echo "Unknown -t value ${TYP}"
usage
exit 1
;;
esac
GNUPLOT_LABEL="set xlabel 'N (${X_AXIS})'; set ylabel 'C (throughput)'"
# ######################################################################
# Convert the input file if needed.
# ######################################################################
if [ "${CONV}" ]; then
case "${CONV}" in
globalstatus)
convert_globalstatus -i ${INT} -m ${MXT} -n ${ADJ} "$@" \
> "${FILE}"
;;
tcpdump)
convert_tcpdump -P $PORT "$@" > "${FILE}"
;;
*)
echo "Unknown -c value ${CONV}"
usage
exit 1
esac
else
cat "$@" > "${FILE}"
fi
# ######################################################################
# From here on, we have an input file with N in the first column,
# and C in the second column.
# ######################################################################
# We need to find some data points such as C(1) for subsequent operations.
min_N=$(awk 'BEGIN{min=999999}/^[^#]/{if($1<min&&$1>0){min=$1}}END{print min}' \
"${FILE}");
max_N=$(awk '/^[^#]/{if($1>max){max=$1}}END{print max}' "${FILE}");
max_C=$(awk '/^[^#]/{if($2>max){max=$2}}END{print max}' "${FILE}");
# Find C(1) if it exists, else use C(min(N)).
N_one=$(awk '{if($1 == 1) {print 1; exit}}' "${FILE}");
if [ "$N_one" = "1" ]; then
C_of_one="$(find_C_of_one 1 "${FILE}")"
else
C_of_one="$(find_C_of_one ${min_N} "${FILE}")"
fi
echo "# Using $(gnuplot -V)"
echo "# Parameters to the model:"
echo "min(N) ${min_N}"
echo "max(N) ${max_N}"
echo "max(C) ${max_C}"
echo "C(1) ${C_of_one} (pre-adjustment by ${XAD})"
echo "N=1 ??? ${N_one:-no}"
# ######################################################################
# Use gnuplot to 1) find the scalability law parameters and 2) plot the
# original data with the model. We have to go about the fitting in a funny
# way, because gnuplot can get stuck at a local maximum with the fitting if
# we don't give it good starting parameters. So we use the quadratic
# equation to figure out the starting parameters, and then refit with the USL
# equation for the final result. This results in a better fit.
# ######################################################################
# Sets up parameters. We'll always load this file.
if [ "${XAD}" != "1" ]; then
echo "# Adjusting C(1) by ${XAD}"
C_of_one="$(echo ${C_of_one} | awk "{print \$1 * ${XAD}}")";
fi
cat > /tmp/aspersa-gnuplot0 <<-EOF
${GNUPLOT_TERM}
${GNUPLOT_SIZE}
${GNUPLOT_LABEL}
C1 = ${C_of_one}
max_N = ${max_N}
max_C = ${max_C}
max(a,b) = (a > b) ? a : b
min(a,b) = (a > b) ? b : a
f(x) = a*x*x + b*x
g(x) = x*C1/(1+sigma*(x-1) + kappa*x*(x-1))
EOF
# Plot the efficiency relative to C(1).
cat > /tmp/aspersa-gnuplot1 <<-EOF
set output "${PRE}usl-efficiency.${EXT}"
set ylabel "Relative Efficiency"
plot 1 title 'Unity', \\
"${FILE}" using 1:(\$2/C1/\$1) pt ${PTY}${BLUE} title 'Computed Efficiency'
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Fit the deviation from linearity relative to C(1).
echo "# Fitting the transformed data against a 2nd-degree polynomial."
cat > /tmp/aspersa-gnuplot1 <<-EOF
set fit logfile "/dev/null"
# defines a_err and b_err
set fit errorvariables
fit f(x) '${FILE}' using (\$1-1):(\$1/(\$2/C1)-1) via a, b
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-qfit.log
# If everything went OK, then the fit should have converged.
if ! grep 'the fit converged' /tmp/aspersa-qfit.log >/dev/null ; then
echo "The quadratic regression failed, check /tmp/aspersa-qfit.log"
exit 1;
fi
# Now we can check the log and extract the parameters from it.
sed -n -e '/the fit converged/,/^b /p' /tmp/aspersa-qfit.log > /tmp/aspersa.params
PARAM_A="$(awk '/^a / { print $3 }' /tmp/aspersa.params)"
PARAM_A_ERR="$(awk '/^a / { print $5 }' /tmp/aspersa.params)"
PARAM_A_PCT="$(awk '/^a / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_B="$(awk '/^b / { print $3 }' /tmp/aspersa.params)"
PARAM_B_ERR="$(awk '/^b / { print $5 }' /tmp/aspersa.params)"
PARAM_B_PCT="$(awk '/^b / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
echo " a ${PARAM_A} +/- ${PARAM_A_ERR} (${PARAM_A_PCT}%)"
echo " b ${PARAM_B} +/- ${PARAM_B_ERR} (${PARAM_B_PCT}%)"
# Now find the coefficient of determination, R^2. Although gnuplot
# documentation says it is not the best metric, it's the one everyone is used
# to, and it needs to be on the graph. FYI, gnuplot prints the
# sum_of_squared_errors as the "final sum of squares of residuals."
cat > /tmp/aspersa <<-EOF
/^[^#]/{
x = \$1 - 1;
y = \$1 / (\$2/${C_of_one}) -1;
f = ${PARAM_A}*x*x + ${PARAM_B}*x;
sum_of_squares += y * y;
sum_of_squared_errors += (f - y) * (f - y);
}
END {
print 1 - (sum_of_squared_errors / sum_of_squares);
}
EOF
R_SQUARED="$(awk -f /tmp/aspersa "${FILE}")"
echo " R^2 ${R_SQUARED}"
cat > /tmp/aspersa-gnuplot1 <<-EOF
set output "${PRE}usl-deviation.${EXT}"
set ylabel "Deviation From Linearity"
set label "R^2 = ${R_SQUARED}" at graph .1, .5
set label "b = ${PARAM_B}" at graph .1, .57
set label "a = ${PARAM_A}" at graph .1, .64
set key left
a = ${PARAM_A}
b = ${PARAM_B}
plot f(x) title 'Modeled' w lines${BLUE}, \\
"${FILE}" using (\$1-1):(\$1/(\$2/C1)-1) pt ${PTY} ${LTY} title 'Measured'
# '' using (\$1-1):(\$1/(\$2/C1)):1 w labels
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Plot the residual errors to look for a pattern in them.
cat > /tmp/aspersa-gnuplot1 <<-EOF
set key off
set ylabel "Residual Errors"
set output "${PRE}usl-quadratic-residuals.${EXT}"
a = ${PARAM_A}
b = ${PARAM_B}
plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1)) pt ${PTY}${BLUE}
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Plot the residuals squared, for finding outliers and removing them.
cat > /tmp/aspersa-gnuplot1 <<-EOF
set key off
set ylabel "Residual Squared"
set output "${PRE}usl-quadratic-squared.${EXT}"
a = ${PARAM_A}
b = ${PARAM_B}
plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 pt ${PTY}${BLUE}, \\
'' using 1:((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 w labels
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
if [ "${REFIT}" = "1" ]; then
# Re-fit against the Universal Scalability Law -- it will be more
# accurate. Put the C(1) point into a new file for this purpose.
echo "# Re-fitting against the USL with (a, b-a) as a starting point."
echo "# Treating (1, ${C_of_one}) as a point in original measurements."
echo "1 ${C_of_one}" > /tmp/aspersa-usl-input-extended
cat "${FILE}" >> /tmp/aspersa-usl-input-extended
cat > /tmp/aspersa-gnuplot1 <<-EOF
a = ${PARAM_A}
b = ${PARAM_B}
kappa = a
sigma = b - kappa
set fit logfile "/dev/null"
# defines sigma_err and kappa_err
set fit errorvariables
fit g(x) '/tmp/aspersa-usl-input-extended' using 1:2 via sigma, kappa${FITC1}
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-ufit.log
# If everything went OK, then the fit should have converged.
if ! grep 'the fit converged' /tmp/aspersa-ufit.log >/dev/null ; then
echo "The USL regression failed, check /tmp/aspersa-ufit.log"
exit 1;
fi
# Now check the log again, and extract the parameters from it.
if [ "${FITC1}" ]; then
sed -n -e '/the fit converged/,/^C1 /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params
PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)"
PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)"
PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)"
PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)"
PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_C_ONE="$(awk '/^C1 / { print $3 }' /tmp/aspersa.params)"
PARAM_C_ONE_ERR="$(awk '/^C1 / { print $5 }' /tmp/aspersa.params)"
PARAM_C_ONE_PCT="$(awk '/^C1 / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)"
echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)"
echo " C(1) ${PARAM_C_ONE} +/- ${PARAM_C_ONE_ERR} (${PARAM_C_ONE_PCT}%)"
else
sed -n -e '/the fit converged/,/^kappa /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params
PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)"
PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)"
PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)"
PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)"
PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_C_ONE="${C_of_one}"
echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)"
echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)"
echo " C(1) ${PARAM_C_ONE} (not a regression parameter)"
fi
# Now find the coefficient of determination again, this time against the
# USL model.
cat > /tmp/aspersa <<-EOF
/^[^#]/{
x = \$1;
y = \$2;
g = x*${PARAM_C_ONE}/(1+${PARAM_SIGMA}*(x-1) + ${PARAM_KAPPA}*x*(x-1));
sum_of_squares += y * y;
sum_of_squared_errors += (g - y) * (g - y);
}
END {
print 1 - (sum_of_squared_errors / sum_of_squares);
}
EOF
R_SQUARED="$(awk -f /tmp/aspersa "/tmp/aspersa-usl-input-extended")"
echo " R^2 ${R_SQUARED}"
# Plot the USL residuals squared, for finding outliers and removing them.
cat > /tmp/aspersa-gnuplot1 <<-EOF
set key off
set ylabel "Residual Errors Squared"
set output "${PRE}usl-residuals-squared.${EXT}"
sigma = ${PARAM_SIGMA}
kappa = ${PARAM_KAPPA}
plot "/tmp/aspersa-usl-input-extended" using 1:((g(\$1)-\$2)**2) pt ${PTY}${BLUE}, \\
'' using (\$1+1):((g(\$1)-\$2)**2):1 w labels
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
else
# We need to set the PARAM_SIGMA and PARAM_KAPPA stuff.
PARAM_C_ONE="${C_of_one}"
echo | awk "BEGIN{
a = ${PARAM_A};
a_err = ${PARAM_A_ERR};
b = ${PARAM_B};
b_err = ${PARAM_B_ERR};
printf \"PARAM_SIGMA=%.6f\\n\", b - a;
printf \"PARAM_KAPPA=%.6f\\n\", a;
print \"PARAM_SIGMA_ERR=0\";
print \"PARAM_SIGMA_PCT=0\";
print \"PARAM_KAPPA_ERR=0\";
print \"PARAM_KAPPA_PCT=0\";
}" > /tmp/aspersa
. /tmp/aspersa
echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)"
echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)"
fi
# Plot the original points and the Universal Scalability Law's predictions.
cat > /tmp/aspersa-gnuplot1 <<-EOF
sigma = ${PARAM_SIGMA}
kappa = ${PARAM_KAPPA}
C1 = ${PARAM_C_ONE}
sigma_best = sigma - ${PARAM_SIGMA_ERR};
sigma_worst = sigma + ${PARAM_SIGMA_ERR};
kappa_best = kappa - ${PARAM_KAPPA};
kappa_worst = kappa + ${PARAM_KAPPA_ERR};
set output "${PRE}usl-model-vs-actual.${EXT}"
N_star = floor(sqrt((1-sigma)/kappa))
modelC = N_star*C1 / (1 + sigma*(N_star-1) + kappa*N_star*(N_star-1))
set label sprintf("Peak capacity is C=%d at N=%d", modelC, N_star) at graph 0.1, 0.9
set label sprintf('${GNUPLOT_SIGMA} = %f', sigma) at graph .5, .3
set label sprintf('${GNUPLOT_KAPPA} = %f', kappa) at graph .5, .25
set label sprintf('R^2 = %f', ${R_SQUARED}) at graph .5, .2
set key bottom
xlimit = ${LIM}
if ( xlimit == 0 ) xlimit = max(N_star, max_N) * 2
set xrange [0.0:xlimit]
set yrange [0:max(max_C, modelC * 1.3)]
if ( ${ERRL} == 1 ) plot \\
x*C1/(1+sigma_best*(x-1) + kappa_best*x*(x-1)) title '',\\
x*C1/(1+sigma_worst*(x-1) + kappa_worst*x*(x-1)) title '',\\
x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\
"${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured'
if ( ${ERRL} == 0 ) plot \\
x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\
"${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured'
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Remove files not mentioned in -o.
if [ "${ONLY}" ]; then
for f in deviation efficiency quadratic-residuals residuals-squared \
quadratic-squared model-vs-actual; do
if [[ "${ONLY}" != *$f* ]]; then
file="${PRE}usl-${f}.${EXT}"
rm -f "${file}"
fi
done
fi
# The type "pdf" is really just eps, and now we need to convert it to pdf.
if [ "${TYP}" = "pdf" ]; then
for f in deviation efficiency quadratic-residuals residuals-squared \
quadratic-squared model-vs-actual; do
file="${PRE}usl-${f}.eps"
if [ -e "${file}" ]; then
epstopdf "${file}"
rm -f "${file}"
fi
done
fi
# Remove temp files.
if [ "${DEL}" = "1" ]; then
rm -f /tmp/aspersa{,-N-C,.params,-gnuplot0,-gnuplot1,-qfit.log,-ufit.log,-usl-input-extended}
fi
}
# Execute the program if it was not included from another file. This makes it
# possible to include without executing, and thus test.
if [ "$(basename "$0")" = "usl" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then
main "$@"
fi
# ############################################################################
# Documentation
# ############################################################################
:<<'DOCUMENTATION'
=pod
=head1 NAME
pt-usl - Model Universal Scalability Law.
=head1 SYNOPSIS
Usage: pt-usl [OPTIONS] [FILES]
=head1 DESCRIPTION
This tool is based on Neil Gunther's book Guerrilla Capacity Planning.
It expects as input a file with columns N (load; concurrency; independentvar)
and C (throughput). The file may contain comment lines beginning with the #
character. The tool outputs .png images with the deviation, efficiency,
residuals, and model-vs-actual data. It also prints out information from the
process of fitting the curve to the data, such as gnuplot's error estimates.
=head1 OPTIONS
Options must precede files on the command line.
-a Set X-axis label to 'node count' (default is concurrency).
-c CONVERSION Converts the input file into N-vs-C format as specified:
globalstatus Convert from MySQL's SHOW GLOBAL STATUS
tcpdump Convert from 'tcpdump -tttt -nnq' format
-d Don't delete the gnuplot files used to generate the charts.
-e Draw error lines on the final plot.
-i INTERVAL When -c is given, group the input into -i second intervals.
-k KEEPFILE Save the N-vs-C data in the specified file.
-l LIMIT X-axis limit for the final plot.
-L COLOR The color for plotting points (default lt rgb "#8B0000").
-m THREADS When -c is given, max valid value of N, to filter outliers.
-n ADJUSTMENT Adjust the N variable downwards to compensate for known
errors such as error of observation.
-o ONLY Only produce plots specified in this comma-separated list.
-p PREFIX Prefix for the generated image file names.
-P PORT TCP port for when -c tcpdump is used (default 3306).
-r Render pdf and eps plots in color.
-R Don't re-fit the data; use the results from quadratic fit.
-t FILETYPE Type for the image files (png, pdf, eps).
-T POINTTYPE Point-type and options for the plots (default 6).
-x ADJUSTMENT Multiply the C(1) regression parameter by this factor.
-X Include C(1) as a fit parameter for USL regression.
=head1 TODO
* Need to make it optionally make logarithmic X axis graph. Also, apply
-i and -n and so on in the main body, not in the converter itself,
so that I can convert a file and then manipulate it separately.
* I want it to entirely skip samples that have too-large concurrency, as
defined by -m. I don't want it to just average the concurrency across the
other samples; it will introduce skew into the throughput for that sample,
too.
=head1 ENVIRONMENT
This tool does not use any environment variables.
=head1 SYSTEM REQUIREMENTS
This tool requires Bash v3 or newer and gnuplot.
=head1 BUGS
For a list of known bugs, see L<http://www.percona.com/bugs/pt-usl>.
Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
Include the following information in your bug report:
=over
=item * Complete command-line used to run the tool
=item * Tool L<"--version">
=item * MySQL version of all servers involved
=item * Output from the tool including STDERR
=item * Input files (log/dump/config files, etc.)
=back
If possible, include debugging output by running the tool with C<PTDEBUG>;
see L<"ENVIRONMENT">.
=head1 DOWNLOADING
Visit L<http://www.percona.com/software/percona-toolkit/> to download the
latest release of Percona Toolkit. Or, get the latest release from the
command line:
wget percona.com/get/percona-toolkit.tar.gz
wget percona.com/get/percona-toolkit.rpm
wget percona.com/get/percona-toolkit.deb
You can also get individual tools from the latest release:
wget percona.com/get/TOOL
Replace C<TOOL> with the name of any tool.
=head1 AUTHORS
Baron Schwartz
=head1 ABOUT PERCONA TOOLKIT
This tool is part of Percona Toolkit, a collection of advanced command-line
tools developed by Percona for MySQL support and consulting. Percona Toolkit
was forked from two projects in June, 2011: Maatkit and Aspersa. Those
projects were created by Baron Schwartz and developed primarily by him and
Daniel Nichter, both of whom are employed by Percona. Visit
L<http://www.percona.com/software/> for more software developed by Percona.
=head1 COPYRIGHT, LICENSE, AND WARRANTY
This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc.
Feedback and improvements are welcome.
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA.
=head1 VERSION
Percona Toolkit v1.0.0 released 2011-08-01
=cut
DOCUMENTATION