Remove pt-usl.

This commit is contained in:
Daniel Nichter
2011-07-22 16:05:14 -06:00
parent d0580626a8
commit be739c7c72
10 changed files with 0 additions and 4955 deletions

View File

@@ -1,887 +0,0 @@
#!/usr/bin/env bash
# This program is part of Percona Toolkit: http://www.percona.com/software/
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
# notices and disclaimers.
usage() {
if [ "${OPT_ERR}" ]; then
echo "${OPT_ERR}" >&2
fi
echo "Usage: pt-usl [OPTIONS] [FILES]" >&2
echo "For more information, 'man pt-usl' or 'perldoc $0'." >&2
exit 1
}
# Converts SHOW GLOBAL STATUS into concurrency-vs-throughput. There are
# basically three things we are interested in from SHOW GLOBAL STATUS.
# Questions 118357171
# Threads_running 8
# Uptime 614909
#
# Command-line options (not optional)
# -i The time interval over which to aggregate.
# -n The number of slaves connected to the server, and
# thus running Binlog Dump commands.
# -m The max number of threads possible (to filter outliers).
#
# XXX In the future, when we see Uptime, we need to check whether it is greater
# than what we saw the first time. If we see it decrease or stay the same, then
# this output probably came from "mysqladmin ext -ri" For now we assume not.
convert_globalstatus() {
# Get the -n command-line option.
for o; do
case "${o}" in
--)
break;
;;
-i)
shift; INTERVAL="${1}"; shift;
;;
-m)
shift; THREADS_MAX="${1}"; shift;
;;
-n)
shift; NUM_SLAVES="${1}"; shift;
;;
esac
done
cat > /tmp/aspersa <<-EOF
BEGIN {
threads = 0;
threads_sum = 0;
threads_max = ${THREADS_MAX};
questions = 0;
start_questions = 0;
samples = 0;
skipped = 0;
start = 0;
}
/Threads_running/ {
# There will always be at least 1 thread running, the one doing SHOW
# STATUS. And if there are slaves doing Binlog Dump, they really do
# not count as concurrency, so we remove them too.
threads = \$2 - 1 - ${NUM_SLAVES};
if ( threads_max > 0 && threads > threads_max ) {
# We do not count these. Later we'll adjust the denominator of the
# average thread count accordingly.
skipped++;
}
else {
threads_sum += threads;
}
}
/Questions/ {
questions = \$2;
if ( start_questions == 0 ) { # Initial condition, runs only once.
start_questions = questions;
}
}
/Uptime/ {
end = \$2;
if ( start == 0 ) { # Initial condition, runs only once.
start = end;
}
# This is where the main work takes place. We compute the concurrency
# over the interval as the average of Threads_running.
elapsed = end - start;
if ( elapsed > 0 && elapsed >= ${INTERVAL} && samples > skipped ) {
concurrency = threads_sum / (samples - skipped + 1);
throughput = (questions - start_questions) / elapsed;
printf "%f %f\\n", concurrency, throughput;
start_questions = questions;
start = end;
samples = 0;
skipped = 0;
threads_sum = threads;
if ( threads_max > 0 && threads_sum > threads_max ) {
threads_sum = threads_max;
}
}
samples++;
}
EOF
awk -f /tmp/aspersa "$@"
}
# Converts tcpdump into concurrency-vs-throughput. Use a tcpdump command line
# such as the following:
# sudo tcpdump -s 384 -i any -nnq -tttt -c 10 'tcp port 3306 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)'
#
# Command-line options (not optional)
# -P The TCP port that the server is listening on.
convert_tcpdump() {
# For ease of testing, this process is split into two parts, and this
# function is just a wrapper around helpers that do the main work.
convert_tcpdump_tabulate "$@" | convert_tcpdump_tabulated_to_1sec
}
# Accepts the same input as convert_tcpdump(). Output fields:
# 1 - date
# 2 - time
# 3 - timestamp as a number (NOTE: doesn't handle wrapping past midnight)
# 4 - client IP address and port number
# 5 - response time
# 6 - number of requests still pending / in progress
# 7 - total time spent serving requests (increases while #6 is nonzero)
# 8 - weighted time spent serving requests (incremented by #6 * elapsed)
convert_tcpdump_tabulate() {
# Get the command-line options.
for o; do
case "${o}" in
--)
break;
;;
-P)
shift; PORT="${1}"; shift;
;;
esac
done
cat > /tmp/aspersa-convert_tcpdump_tabulate <<-EOF
BEGIN {
watch_port = ${PORT};
from_pat = "[.]" watch_port "$"; # Matches IP.port combination
to_pat = "[.]" watch_port ":$";
pending = 0; # The number of requests that haven't been replied yet.
# Other global variables:
# current # The queries that have been sent to the server.
# last_ts # Used for computing weighted time spent doing queries.
# busy # Like Field 10 in /proc/diskstats: increases if pending > 0.
# weighted # The time spent doing IO, like Field 11 in /proc/diskstats.
}
# Ignore any zero-length, it's usually just an ack or something.
# ts = \$1 \$2
# from = \$4
# to = \$6
\$NF > 0 {
ts = (3600 * substr(\$2, 1, 2)) + (60 * substr(\$2, 4, 2)) + substr(\$2, 7);
# Packets from the client.
if ( \$6 ~ to_pat ) {
client = \$4;
if ( !current[client] ) {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: new request from '%s' at line %d\\n", client, NR;
}
current[client] = ts;
if ( last_ts ) {
elapsed = ts - last_ts;
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: weighted (%.6f) += %.6f\\n", weighted, elapsed;
}
weighted += pending * elapsed;
if ( pending > 0 ) {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: busy (%.6f) += %.6f\\n", busy, elapsed;
}
busy += (ts - last_ts);
}
}
last_ts = ts;
pending++;
}
else if ( ${MKDEBUG:-0} > 0 ) {
print "MKDEBUG: existing request for client";
}
}
# Packets from the server we're watching to the client.
else if ( \$4 ~ from_pat ) {
client = substr(\$6, 1, length(\$6) - 1);
if ( current[client] ) {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: reply to '%s' at line %d\\n", client, NR;
}
rt = ts - current[client];
weighted += pending * (ts - last_ts);
busy += (ts - last_ts);
last_ts = ts;
pending--;
delete current[client];
printf("%s %s %12.6f %-21s %10.6f %3d %10.6f %10.6f\\n", \$1, \$2, ts, client, rt, pending, busy, weighted);
}
else if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: reply to '%s' at line %d (DNE)\\n", client, NR;
}
}
}
END {
if ( ${MKDEBUG:-0} > 0 ) {
printf "MKDEBUG: %d sessions currently open\\n", pending;
for (c in current) {
if ( current[c] ) {
printf "MKDEBUG: client '%s' started %.6f\\n", c, current[c];
}
}
}
}
EOF
awk -f /tmp/aspersa-convert_tcpdump_tabulate "$@"
}
# Takes in the output of convert_tcpdump_tabulate() and outputs one line per
# second, showing the concurrency and throughput for that second.
convert_tcpdump_tabulated_to_1sec() {
cat > /tmp/aspersa-convert_tcpdump_tabulated_to_1sec <<-EOF
{
if ( !ts ) { # Initial condition
ts = \$3; # timestamp
ct = 0; # count of queries
bt = \$7; # busy time
wt = \$8; # weighted busy time
}
if ( \$3 >= ts + 1 ) {
concurrency = (\$8 - wt) / (\$7 - bt);
throughput = ct / (\$7 - bt);
printf "%.6f %.6f %s %s\\n", concurrency, throughput, \$1, \$2;
ts = \$3; # timestamp
ct = 0; # count of queries
bt = \$7; # busy time
wt = \$8; # weighted busy time
}
else {
ct++;
}
}
EOF
awk -f /tmp/aspersa-convert_tcpdump_tabulated_to_1sec "$@"
}
# To find the deviation from linearity, we have to find the C(N) for N=1 (or if
# 1 is not available, then we interpolate, which may require human judgment, as
# in Gunther p.94). We take an average. Input columns must be N and C
# (throughput). The command-line parameter is the N value we wish to look for.
find_C_of_one() {
cat > /tmp/aspersa <<-EOF
/^[^#]/ {
if ( \$1 == $1 ) {
count++;
sum += \$2;
}
}
END {
if ( count > 0 ) {
print (sum/count) / $1;
}
else {
print 0;
}
}
EOF
awk -f /tmp/aspersa "$2"
}
# The main code that runs by default. Arguments are the command-line options.
main() {
echo "# Command-line: $0 $@"
# Get command-line options.
for o; do
case "${o}" in
--)
break;
;;
-a)
shift; X_AXIS="node count";
;;
-c)
shift; CONV="${1}"; shift;
;;
-d)
DEL="0"; shift;
;;
-e)
shift; ERRL="1";
;;
-i)
shift; INT="${1}"; shift;
;;
-k)
shift; KEEP="${1}"; shift;
;;
-l)
shift; LIM="${1}"; shift;
;;
-L)
shift; LTY="${1}"; shift;
;;
-m)
shift; MXT="${1}"; shift;
;;
-n)
shift; ADJ="${1}"; shift;
;;
-o)
shift; ONLY="${1}"; shift;
;;
-p)
shift; PRE="${1}"; shift;
;;
-P)
shift; PORT="${1}"; shift;
;;
-r)
shift; COLOR="color";
;;
-R)
shift; REFIT=0;
;;
-t)
shift; TYP="${1}"; shift;
;;
-T)
shift; PTY="${1}"; shift;
;;
-x)
shift; XAD="${1}"; shift;
;;
-X)
shift; FITC1=", C1";
;;
-*)
OPT_ERR="Unknown option '${o}'."
usage 1
;;
esac
done
ERRL=${ERRL:-0}
ADJ=${ADJ:-0}
DEL=${DEL:-1}
INT=${INT:-0}
MXT=${MXT:-0}
TYP=${TYP:-png}
XAD=${XAD:-1}
FILE="${KEEP:-/tmp/aspersa-N-C}"
EXT="${TYP}"
LIM="${LIM:-0}"
X_AXIS="${X_AXIS:-concurrency}"
PORT="${PORT:-3306}"
PTY="${PTY:-6}"
REFIT="${REFIT:-1}"
FITC1="${FITC1:-}"
if [ "${COLOR}" ]; then
LTY="${LTY:-lt rgb \"#8B0000\"}"
BLUE=" lt rgb \"#0000FF\"";
fi
# ######################################################################
# Set up the gnuplot instructions for filetype.
# ######################################################################
case "${TYP}" in
png)
GNUPLOT_TERM="set terminal ${TYP}"
GNUPLOT_SIZE=""
GNUPLOT_SIGMA='sigma'
GNUPLOT_KAPPA='kappa'
;;
eps|pdf)
# We make PDFs as EPSs and then convert them.
GNUPLOT_TERM="set terminal postscript eps enhanced ${COLOR} solid"
GNUPLOT_SIZE="set size 0.6, 0.6"
GNUPLOT_SIGMA='{/Symbol s}'
GNUPLOT_KAPPA='{/Symbol k}'
if [ "${TYP}" = "pdf" ]; then
EXT="eps"
fi
;;
*)
echo "Unknown -t value ${TYP}"
usage
exit 1
;;
esac
GNUPLOT_LABEL="set xlabel 'N (${X_AXIS})'; set ylabel 'C (throughput)'"
# ######################################################################
# Convert the input file if needed.
# ######################################################################
if [ "${CONV}" ]; then
case "${CONV}" in
globalstatus)
convert_globalstatus -i ${INT} -m ${MXT} -n ${ADJ} "$@" \
> "${FILE}"
;;
tcpdump)
convert_tcpdump -P $PORT "$@" > "${FILE}"
;;
*)
echo "Unknown -c value ${CONV}"
usage
exit 1
esac
else
cat "$@" > "${FILE}"
fi
# ######################################################################
# From here on, we have an input file with N in the first column,
# and C in the second column.
# ######################################################################
# We need to find some data points such as C(1) for subsequent operations.
min_N=$(awk 'BEGIN{min=999999}/^[^#]/{if($1<min&&$1>0){min=$1}}END{print min}' \
"${FILE}");
max_N=$(awk '/^[^#]/{if($1>max){max=$1}}END{print max}' "${FILE}");
max_C=$(awk '/^[^#]/{if($2>max){max=$2}}END{print max}' "${FILE}");
# Find C(1) if it exists, else use C(min(N)).
N_one=$(awk '{if($1 == 1) {print 1; exit}}' "${FILE}");
if [ "$N_one" = "1" ]; then
C_of_one="$(find_C_of_one 1 "${FILE}")"
else
C_of_one="$(find_C_of_one ${min_N} "${FILE}")"
fi
echo "# Using $(gnuplot -V)"
echo "# Parameters to the model:"
echo "min(N) ${min_N}"
echo "max(N) ${max_N}"
echo "max(C) ${max_C}"
echo "C(1) ${C_of_one} (pre-adjustment by ${XAD})"
echo "N=1 ??? ${N_one:-no}"
# ######################################################################
# Use gnuplot to 1) find the scalability law parameters and 2) plot the
# original data with the model. We have to go about the fitting in a funny
# way, because gnuplot can get stuck at a local maximum with the fitting if
# we don't give it good starting parameters. So we use the quadratic
# equation to figure out the starting parameters, and then refit with the USL
# equation for the final result. This results in a better fit.
# ######################################################################
# Sets up parameters. We'll always load this file.
if [ "${XAD}" != "1" ]; then
echo "# Adjusting C(1) by ${XAD}"
C_of_one="$(echo ${C_of_one} | awk "{print \$1 * ${XAD}}")";
fi
cat > /tmp/aspersa-gnuplot0 <<-EOF
${GNUPLOT_TERM}
${GNUPLOT_SIZE}
${GNUPLOT_LABEL}
C1 = ${C_of_one}
max_N = ${max_N}
max_C = ${max_C}
max(a,b) = (a > b) ? a : b
min(a,b) = (a > b) ? b : a
f(x) = a*x*x + b*x
g(x) = x*C1/(1+sigma*(x-1) + kappa*x*(x-1))
EOF
# Plot the efficiency relative to C(1).
cat > /tmp/aspersa-gnuplot1 <<-EOF
set output "${PRE}usl-efficiency.${EXT}"
set ylabel "Relative Efficiency"
plot 1 title 'Unity', \\
"${FILE}" using 1:(\$2/C1/\$1) pt ${PTY}${BLUE} title 'Computed Efficiency'
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Fit the deviation from linearity relative to C(1).
echo "# Fitting the transformed data against a 2nd-degree polynomial."
cat > /tmp/aspersa-gnuplot1 <<-EOF
set fit logfile "/dev/null"
# defines a_err and b_err
set fit errorvariables
fit f(x) '${FILE}' using (\$1-1):(\$1/(\$2/C1)-1) via a, b
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-qfit.log
# If everything went OK, then the fit should have converged.
if ! grep 'the fit converged' /tmp/aspersa-qfit.log >/dev/null ; then
echo "The quadratic regression failed, check /tmp/aspersa-qfit.log"
exit 1;
fi
# Now we can check the log and extract the parameters from it.
sed -n -e '/the fit converged/,/^b /p' /tmp/aspersa-qfit.log > /tmp/aspersa.params
PARAM_A="$(awk '/^a / { print $3 }' /tmp/aspersa.params)"
PARAM_A_ERR="$(awk '/^a / { print $5 }' /tmp/aspersa.params)"
PARAM_A_PCT="$(awk '/^a / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_B="$(awk '/^b / { print $3 }' /tmp/aspersa.params)"
PARAM_B_ERR="$(awk '/^b / { print $5 }' /tmp/aspersa.params)"
PARAM_B_PCT="$(awk '/^b / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
echo " a ${PARAM_A} +/- ${PARAM_A_ERR} (${PARAM_A_PCT}%)"
echo " b ${PARAM_B} +/- ${PARAM_B_ERR} (${PARAM_B_PCT}%)"
# Now find the coefficient of determination, R^2. Although gnuplot
# documentation says it is not the best metric, it's the one everyone is used
# to, and it needs to be on the graph. FYI, gnuplot prints the
# sum_of_squared_errors as the "final sum of squares of residuals."
cat > /tmp/aspersa <<-EOF
/^[^#]/{
x = \$1 - 1;
y = \$1 / (\$2/${C_of_one}) -1;
f = ${PARAM_A}*x*x + ${PARAM_B}*x;
sum_of_squares += y * y;
sum_of_squared_errors += (f - y) * (f - y);
}
END {
print 1 - (sum_of_squared_errors / sum_of_squares);
}
EOF
R_SQUARED="$(awk -f /tmp/aspersa "${FILE}")"
echo " R^2 ${R_SQUARED}"
cat > /tmp/aspersa-gnuplot1 <<-EOF
set output "${PRE}usl-deviation.${EXT}"
set ylabel "Deviation From Linearity"
set label "R^2 = ${R_SQUARED}" at graph .1, .5
set label "b = ${PARAM_B}" at graph .1, .57
set label "a = ${PARAM_A}" at graph .1, .64
set key left
a = ${PARAM_A}
b = ${PARAM_B}
plot f(x) title 'Modeled' w lines${BLUE}, \\
"${FILE}" using (\$1-1):(\$1/(\$2/C1)-1) pt ${PTY} ${LTY} title 'Measured'
# '' using (\$1-1):(\$1/(\$2/C1)):1 w labels
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Plot the residual errors to look for a pattern in them.
cat > /tmp/aspersa-gnuplot1 <<-EOF
set key off
set ylabel "Residual Errors"
set output "${PRE}usl-quadratic-residuals.${EXT}"
a = ${PARAM_A}
b = ${PARAM_B}
plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1)) pt ${PTY}${BLUE}
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Plot the residuals squared, for finding outliers and removing them.
cat > /tmp/aspersa-gnuplot1 <<-EOF
set key off
set ylabel "Residual Squared"
set output "${PRE}usl-quadratic-squared.${EXT}"
a = ${PARAM_A}
b = ${PARAM_B}
plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 pt ${PTY}${BLUE}, \\
'' using 1:((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 w labels
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
if [ "${REFIT}" = "1" ]; then
# Re-fit against the Universal Scalability Law -- it will be more
# accurate. Put the C(1) point into a new file for this purpose.
echo "# Re-fitting against the USL with (a, b-a) as a starting point."
echo "# Treating (1, ${C_of_one}) as a point in original measurements."
echo "1 ${C_of_one}" > /tmp/aspersa-usl-input-extended
cat "${FILE}" >> /tmp/aspersa-usl-input-extended
cat > /tmp/aspersa-gnuplot1 <<-EOF
a = ${PARAM_A}
b = ${PARAM_B}
kappa = a
sigma = b - kappa
set fit logfile "/dev/null"
# defines sigma_err and kappa_err
set fit errorvariables
fit g(x) '/tmp/aspersa-usl-input-extended' using 1:2 via sigma, kappa${FITC1}
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-ufit.log
# If everything went OK, then the fit should have converged.
if ! grep 'the fit converged' /tmp/aspersa-ufit.log >/dev/null ; then
echo "The USL regression failed, check /tmp/aspersa-ufit.log"
exit 1;
fi
# Now check the log again, and extract the parameters from it.
if [ "${FITC1}" ]; then
sed -n -e '/the fit converged/,/^C1 /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params
PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)"
PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)"
PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)"
PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)"
PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_C_ONE="$(awk '/^C1 / { print $3 }' /tmp/aspersa.params)"
PARAM_C_ONE_ERR="$(awk '/^C1 / { print $5 }' /tmp/aspersa.params)"
PARAM_C_ONE_PCT="$(awk '/^C1 / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)"
echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)"
echo " C(1) ${PARAM_C_ONE} +/- ${PARAM_C_ONE_ERR} (${PARAM_C_ONE_PCT}%)"
else
sed -n -e '/the fit converged/,/^kappa /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params
PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)"
PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)"
PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)"
PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)"
PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)"
PARAM_C_ONE="${C_of_one}"
echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)"
echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)"
echo " C(1) ${PARAM_C_ONE} (not a regression parameter)"
fi
# Now find the coefficient of determination again, this time against the
# USL model.
cat > /tmp/aspersa <<-EOF
/^[^#]/{
x = \$1;
y = \$2;
g = x*${PARAM_C_ONE}/(1+${PARAM_SIGMA}*(x-1) + ${PARAM_KAPPA}*x*(x-1));
sum_of_squares += y * y;
sum_of_squared_errors += (g - y) * (g - y);
}
END {
print 1 - (sum_of_squared_errors / sum_of_squares);
}
EOF
R_SQUARED="$(awk -f /tmp/aspersa "/tmp/aspersa-usl-input-extended")"
echo " R^2 ${R_SQUARED}"
# Plot the USL residuals squared, for finding outliers and removing them.
cat > /tmp/aspersa-gnuplot1 <<-EOF
set key off
set ylabel "Residual Errors Squared"
set output "${PRE}usl-residuals-squared.${EXT}"
sigma = ${PARAM_SIGMA}
kappa = ${PARAM_KAPPA}
plot "/tmp/aspersa-usl-input-extended" using 1:((g(\$1)-\$2)**2) pt ${PTY}${BLUE}, \\
'' using (\$1+1):((g(\$1)-\$2)**2):1 w labels
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
else
# We need to set the PARAM_SIGMA and PARAM_KAPPA stuff.
PARAM_C_ONE="${C_of_one}"
echo | awk "BEGIN{
a = ${PARAM_A};
a_err = ${PARAM_A_ERR};
b = ${PARAM_B};
b_err = ${PARAM_B_ERR};
printf \"PARAM_SIGMA=%.6f\\n\", b - a;
printf \"PARAM_KAPPA=%.6f\\n\", a;
print \"PARAM_SIGMA_ERR=0\";
print \"PARAM_SIGMA_PCT=0\";
print \"PARAM_KAPPA_ERR=0\";
print \"PARAM_KAPPA_PCT=0\";
}" > /tmp/aspersa
. /tmp/aspersa
echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)"
echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)"
fi
# Plot the original points and the Universal Scalability Law's predictions.
cat > /tmp/aspersa-gnuplot1 <<-EOF
sigma = ${PARAM_SIGMA}
kappa = ${PARAM_KAPPA}
C1 = ${PARAM_C_ONE}
sigma_best = sigma - ${PARAM_SIGMA_ERR};
sigma_worst = sigma + ${PARAM_SIGMA_ERR};
kappa_best = kappa - ${PARAM_KAPPA};
kappa_worst = kappa + ${PARAM_KAPPA_ERR};
set output "${PRE}usl-model-vs-actual.${EXT}"
N_star = floor(sqrt((1-sigma)/kappa))
modelC = N_star*C1 / (1 + sigma*(N_star-1) + kappa*N_star*(N_star-1))
set label sprintf("Peak capacity is C=%d at N=%d", modelC, N_star) at graph 0.1, 0.9
set label sprintf('${GNUPLOT_SIGMA} = %f', sigma) at graph .5, .3
set label sprintf('${GNUPLOT_KAPPA} = %f', kappa) at graph .5, .25
set label sprintf('R^2 = %f', ${R_SQUARED}) at graph .5, .2
set key bottom
xlimit = ${LIM}
if ( xlimit == 0 ) xlimit = max(N_star, max_N) * 2
set xrange [0.0:xlimit]
set yrange [0:max(max_C, modelC * 1.3)]
if ( ${ERRL} == 1 ) plot \\
x*C1/(1+sigma_best*(x-1) + kappa_best*x*(x-1)) title '',\\
x*C1/(1+sigma_worst*(x-1) + kappa_worst*x*(x-1)) title '',\\
x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\
"${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured'
if ( ${ERRL} == 0 ) plot \\
x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\
"${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured'
EOF
gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1
# Remove files not mentioned in -o.
if [ "${ONLY}" ]; then
for f in deviation efficiency quadratic-residuals residuals-squared \
quadratic-squared model-vs-actual; do
if [[ "${ONLY}" != *$f* ]]; then
file="${PRE}usl-${f}.${EXT}"
rm -f "${file}"
fi
done
fi
# The type "pdf" is really just eps, and now we need to convert it to pdf.
if [ "${TYP}" = "pdf" ]; then
for f in deviation efficiency quadratic-residuals residuals-squared \
quadratic-squared model-vs-actual; do
file="${PRE}usl-${f}.eps"
if [ -e "${file}" ]; then
epstopdf "${file}"
rm -f "${file}"
fi
done
fi
# Remove temp files.
if [ "${DEL}" = "1" ]; then
rm -f /tmp/aspersa{,-N-C,.params,-gnuplot0,-gnuplot1,-qfit.log,-ufit.log,-usl-input-extended}
fi
}
# Execute the program if it was not included from another file. This makes it
# possible to include without executing, and thus test.
if [ "$(basename "$0")" = "usl" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then
main "$@"
fi
# ############################################################################
# Documentation
# ############################################################################
:<<'DOCUMENTATION'
=pod
=head1 NAME
pt-usl - Model Universal Scalability Law.
=head1 SYNOPSIS
Usage: pt-usl [OPTIONS] [FILES]
=head1 DESCRIPTION
This tool is based on Neil Gunther's book Guerrilla Capacity Planning.
It expects as input a file with columns N (load; concurrency; independentvar)
and C (throughput). The file may contain comment lines beginning with the #
character. The tool outputs .png images with the deviation, efficiency,
residuals, and model-vs-actual data. It also prints out information from the
process of fitting the curve to the data, such as gnuplot's error estimates.
=head1 OPTIONS
Options must precede files on the command line.
-a Set X-axis label to 'node count' (default is concurrency).
-c CONVERSION Converts the input file into N-vs-C format as specified:
globalstatus Convert from MySQL's SHOW GLOBAL STATUS
tcpdump Convert from 'tcpdump -tttt -nnq' format
-d Don't delete the gnuplot files used to generate the charts.
-e Draw error lines on the final plot.
-i INTERVAL When -c is given, group the input into -i second intervals.
-k KEEPFILE Save the N-vs-C data in the specified file.
-l LIMIT X-axis limit for the final plot.
-L COLOR The color for plotting points (default lt rgb "#8B0000").
-m THREADS When -c is given, max valid value of N, to filter outliers.
-n ADJUSTMENT Adjust the N variable downwards to compensate for known
errors such as error of observation.
-o ONLY Only produce plots specified in this comma-separated list.
-p PREFIX Prefix for the generated image file names.
-P PORT TCP port for when -c tcpdump is used (default 3306).
-r Render pdf and eps plots in color.
-R Don't re-fit the data; use the results from quadratic fit.
-t FILETYPE Type for the image files (png, pdf, eps).
-T POINTTYPE Point-type and options for the plots (default 6).
-x ADJUSTMENT Multiply the C(1) regression parameter by this factor.
-X Include C(1) as a fit parameter for USL regression.
=head1 TODO
* Need to make it optionally make logarithmic X axis graph. Also, apply
-i and -n and so on in the main body, not in the converter itself,
so that I can convert a file and then manipulate it separately.
* I want it to entirely skip samples that have too-large concurrency, as
defined by -m. I don't want it to just average the concurrency across the
other samples; it will introduce skew into the throughput for that sample,
too.
=head1 ENVIRONMENT
This tool does not use any environment variables.
=head1 SYSTEM REQUIREMENTS
This tool requires Bash v3 or newer and gnuplot.
=head1 BUGS
For a list of known bugs, see L<http://www.percona.com/bugs/pt-usl>.
Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
Include the following information in your bug report:
=over
=item * Complete command-line used to run the tool
=item * Tool L<"--version">
=item * MySQL version of all servers involved
=item * Output from the tool including STDERR
=item * Input files (log/dump/config files, etc.)
=back
If possible, include debugging output by running the tool with C<PTDEBUG>;
see L<"ENVIRONMENT">.
=head1 DOWNLOADING
Visit L<http://www.percona.com/software/percona-toolkit/> to download the
latest release of Percona Toolkit. Or, get the latest release from the
command line:
wget percona.com/get/percona-toolkit.tar.gz
wget percona.com/get/percona-toolkit.rpm
wget percona.com/get/percona-toolkit.deb
You can also get individual tools from the latest release:
wget percona.com/get/TOOL
Replace C<TOOL> with the name of any tool.
=head1 AUTHORS
Baron Schwartz
=head1 ABOUT PERCONA TOOLKIT
This tool is part of Percona Toolkit, a collection of advanced command-line
tools developed by Percona for MySQL support and consulting. Percona Toolkit
was forked from two projects in June, 2011: Maatkit and Aspersa. Those
projects were created by Baron Schwartz and developed primarily by him and
Daniel Nichter, both of whom are employed by Percona. Visit
L<http://www.percona.com/software/> for more software developed by Percona.
=head1 COPYRIGHT, LICENSE, AND WARRANTY
This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc.
Feedback and improvements are welcome.
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA.
=head1 VERSION
Percona Toolkit v1.0.0 released 2011-08-01
=cut
DOCUMENTATION

View File

@@ -1,20 +0,0 @@
#!/bin/bash
#convert_globalstatus -i 0 -n 3 -m 0 samples/N-vs-X-003.txt
cat <<EOF > $1
3.000000 655.000000
1.000000 594.400000
1.000000 625.090909
1.500000 570.000000
2.000000 586.545455
3.500000 614.200000
2.000000 590.300000
0.500000 519.000000
1.500000 606.100000
1.500000 503.090909
1.000000 544.600000
2.000000 640.545455
2.000000 764.800000
0.500000 655.500000
1.000000 590.727273
EOF

View File

@@ -1,49 +0,0 @@
#!/bin/bash
#convert_globalstatus -i 300 -n 3 -m 0 samples/N-vs-X-002.txt
cat <<EOF > $1
1.700000 610.429043
2.333333 621.303630
3.333333 591.254125
2.000000 635.986842
2.666667 685.423841
2.366667 658.056106
3.100000 760.286184
2.133333 705.463576
3.600000 765.470395
3.166667 779.663366
2.433333 779.122112
2.600000 800.455446
3.466667 915.521452
4.266667 914.595395
3.833333 952.924092
5.333333 958.417763
5.400000 1023.608553
5.300000 992.983553
5.500000 1086.019802
4.800000 1071.555921
7.033333 1198.287129
7.700000 1237.222951
9.933333 1220.671053
9.466667 1229.255738
7.900000 1172.654605
6.266667 1179.848185
7.566667 1230.125413
8.800000 1255.257426
8.866667 1276.825658
7.366667 1195.782895
8.833333 1252.013158
8.000000 1308.296053
6.733333 1406.128289
10.900000 1317.398026
8.500000 1310.421053
9.366667 1371.562500
8.066667 1280.009836
7.833333 1223.884868
6.900000 1224.726974
6.800000 1238.796053
7.533333 1196.743421
5.833333 1145.574257
5.666667 1119.325658
5.133333 1075.766447
EOF

View File

@@ -1,7 +0,0 @@
#!/bin/bash
#convert_tcpdump_tabulate -P 5141 samples/mysql-tcp-conversation-001.txt
cat <<EOF > $1
2010-10-24 14:25:48.108945 51948.108945 127.0.0.1.51187 0.000271 0 0.000271 0.000271
2010-10-24 14:25:54.405323 51954.405323 127.0.0.1.51187 1.000576 0 1.000847 1.000847
EOF

View File

@@ -1,7 +0,0 @@
#!/bin/bash
#convert_tcpdump_tabulate -P 5141 samples/mysql-tcp-conversation-002.txt
cat <<EOF > $1
2010-10-24 14:33:40.088173 52420.088173 127.0.0.1.41161 0.000210 1 1.600519 1.600729
2010-10-24 14:33:48.488121 52428.488121 127.0.0.1.51187 10.000467 0 10.000467 10.000677
EOF

View File

@@ -1,17 +0,0 @@
#!/usr/bin/evn perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use PerconaTest;
system("$trunk/util/test-bash-tool pt-usl @ARGV");
exit;

File diff suppressed because it is too large Load Diff

View File

@@ -1,50 +0,0 @@
# This sample is from Percona customer issue 12189
Questions 118357171
Threads_running 8
Uptime 614909
Questions 118364376
Threads_running 6
Uptime 614920
Questions 118370320
Threads_running 4
Uptime 614930
Questions 118377196
Threads_running 6
Uptime 614941
Questions 118382896
Threads_running 5
Uptime 614951
Questions 118389348
Threads_running 7
Uptime 614962
Questions 118395490
Threads_running 8
Uptime 614972
Questions 118401393
Threads_running 4
Uptime 614982
Questions 118407102
Threads_running 5
Uptime 614993
Questions 118413163
Threads_running 6
Uptime 615003
Questions 118418697
Threads_running 5
Uptime 615014
Questions 118424143
Threads_running 5
Uptime 615024
Questions 118431189
Threads_running 7
Uptime 615035
Questions 118438837
Threads_running 5
Uptime 615045
Questions 118445392
Threads_running 4
Uptime 615055
Questions 118451890
Threads_running 6
Uptime 615066
Questions 118457893

View File

@@ -1,7 +0,0 @@
2010-10-24 14:25:48.108674 IP 127.0.0.1.51187 > 127.0.0.1.5141: tcp 13
2010-10-24 14:25:48.108945 IP 127.0.0.1.5141 > 127.0.0.1.51187: tcp 56
2010-10-24 14:25:48.109019 IP 127.0.0.1.51187 > 127.0.0.1.5141: tcp 0
2010-10-24 14:25:53.404747 IP 127.0.0.1.51187 > 127.0.0.1.5141: tcp 20
2010-10-24 14:25:53.444621 IP 127.0.0.1.5141 > 127.0.0.1.51187: tcp 0
2010-10-24 14:25:54.405323 IP 127.0.0.1.5141 > 127.0.0.1.51187: tcp 63
2010-10-24 14:25:54.405416 IP 127.0.0.1.51187 > 127.0.0.1.5141: tcp 0

View File

@@ -1,7 +0,0 @@
2010-10-24 14:33:38.487654 IP 127.0.0.1.51187 > 127.0.0.1.5141: tcp 21
2010-10-24 14:33:38.487730 IP 127.0.0.1.5141 > 127.0.0.1.51187: tcp 0
2010-10-24 14:33:40.087963 IP 127.0.0.1.41161 > 127.0.0.1.5141: tcp 13
2010-10-24 14:33:40.088173 IP 127.0.0.1.5141 > 127.0.0.1.41161: tcp 56
2010-10-24 14:33:40.088237 IP 127.0.0.1.41161 > 127.0.0.1.5141: tcp 0
2010-10-24 14:33:48.488121 IP 127.0.0.1.5141 > 127.0.0.1.51187: tcp 64
2010-10-24 14:33:48.488212 IP 127.0.0.1.51187 > 127.0.0.1.5141: tcp 0