This commit is contained in:
Carlos Salguero
2017-04-01 23:06:23 -03:00
parent 4ff8e664ae
commit 248c74e6b7
6 changed files with 385 additions and 289 deletions

View File

@@ -0,0 +1,115 @@
package fingerprinter
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
"github.com/percona/percona-toolkit/src/go/mongolib/util"
)
var (
MAX_DEPTH_LEVEL = 10
DEFAULT_KEY_FILTERS = []string{"^shardVersion$", "^\\$"}
)
type Fingerprinter interface {
Fingerprint(query map[string]interface{}) (string, error)
}
type Fingerprint struct {
keyFilters []string
}
func NewFingerprinter(keyFilters []string) *Fingerprint {
return &Fingerprint{
keyFilters: keyFilters,
}
}
// Query is the top level map query element
// Example for MongoDB 3.2+
// "query" : {
// "find" : "col1",
// "filter" : {
// "s2" : {
// "$lt" : "54701",
// "$gte" : "73754"
// }
// },
// "sort" : {
// "user_id" : 1
// }
// }
func (f *Fingerprint) Fingerprint(query map[string]interface{}) (string, error) {
realQuery, err := util.GetQueryField(query)
if err != nil {
// Try to encode doc.Query as json for prettiness
if buf, err := json.Marshal(realQuery); err == nil {
return "", fmt.Errorf("%v for query %s", err, string(buf))
}
// If we cannot encode as json, return just the error message without the query
return "", err
}
retKeys := keys(realQuery, f.keyFilters)
sort.Strings(retKeys)
// if there is a sort clause in the query, we have to add all fields in the sort
// fields list that are not in the query keys list (retKeys)
if sortKeys, ok := query["sort"]; ok {
if sortKeysMap, ok := sortKeys.(map[string]interface{}); ok {
sortKeys := keys(sortKeysMap, f.keyFilters)
for _, sortKey := range sortKeys {
if !inSlice(sortKey, retKeys) {
retKeys = append(retKeys, sortKey)
}
}
}
}
return strings.Join(retKeys, ","), nil
}
func inSlice(str string, list []string) bool {
for _, v := range list {
if v == str {
return true
}
}
return false
}
func keys(query map[string]interface{}, keyFilters []string) []string {
return getKeys(query, keyFilters, 0)
}
func getKeys(query map[string]interface{}, keyFilters []string, level int) []string {
ks := []string{}
for key, value := range query {
if shouldSkipKey(key, keyFilters) {
continue
}
ks = append(ks, key)
if m, ok := value.(map[string]interface{}); ok {
level++
if level <= MAX_DEPTH_LEVEL {
ks = append(ks, getKeys(m, keyFilters, level)...)
}
}
}
sort.Strings(ks)
return ks
}
func shouldSkipKey(key string, keyFilters []string) bool {
for _, filter := range keyFilters {
if matched, _ := regexp.MatchString(filter, key); matched {
return true
}
}
return false
}

View File

@@ -0,0 +1,176 @@
package profiler
import (
"crypto/md5"
"errors"
"fmt"
"time"
"github.com/percona/percona-toolkit/src/go/mongolib/fingerprinter"
"github.com/percona/percona-toolkit/src/go/mongolib/proto"
"github.com/percona/percona-toolkit/src/go/mongolib/util"
"github.com/percona/percona-toolkit/src/go/pt-mongodb-query-digest/filter"
"github.com/prometheus/common/log"
)
var (
MAX_DEPTH_LEVEL = 10
CANNOT_GET_QUERY_ERROR = errors.New("cannot get query field from the profile document (it is not a map)")
)
type Times []time.Time
func (a Times) Len() int { return len(a) }
func (a Times) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a Times) Less(i, j int) bool { return a[i].Before(a[j]) }
type StatsGroupKey struct {
Operation string
Fingerprint string
Namespace string
}
type Stat struct {
BlockedTime Times
Count int
Fingerprint string
FirstSeen time.Time
ID string
LastSeen time.Time
LockTime Times
NReturned []float64
NScanned []float64
Namespace string
Operation string
Query map[string]interface{}
QueryTime []float64 // in milliseconds
ResponseLength []float64
TableScan bool
}
type Iter interface {
All(result interface{}) error
Close() error
Err() error
For(result interface{}, f func() error) (err error)
Next(result interface{}) bool
Timeout() bool
}
type Profiler interface {
StatsChan() chan []Stat
Start()
Stop()
}
type Profile struct {
filters []filter.Filter
iterator Iter
ticker chan time.Time
statsChan chan []Stat
stopChan chan bool
stats []Stat
keyFilters []string
fingerprinter fingerprinter.Fingerprinter
}
func NewProfiler(iterator Iter, filters []filter.Filter, ticker chan time.Time, fp fingerprinter.Fingerprinter) Profiler {
return &Profile{
filters: filters,
fingerprinter: fp,
iterator: iterator,
ticker: ticker,
statsChan: make(chan []Stat),
stats: make([]Stat, 100),
keyFilters: []string{"^shardVersion$", "^\\$"},
}
}
func (p *Profile) StatsChan() chan []Stat {
return p.statsChan
}
func (p *Profile) Start() {
go p.getData()
}
func (p *Profile) Stop() {
p.stopChan <- true
}
func (p *Profile) getData() {
var doc proto.SystemProfile
stop := false
stats := make(map[StatsGroupKey]*Stat)
for !stop && p.iterator.Next(&doc) && p.iterator.Err() == nil {
valid := true
for _, filter := range p.filters {
if filter(doc) == false {
valid = false
break
}
}
if !valid {
continue
}
select {
case <-p.ticker:
p.statsChan <- statsToArray(stats)
case <-p.stopChan:
stop = true
continue
default:
if len(doc.Query) > 0 {
fp, err := p.fingerprinter.Fingerprint(doc.Query)
if err != nil {
log.Errorf("cannot get fingerprint: %s", err.Error())
continue
}
var s *Stat
var ok bool
key := StatsGroupKey{
Operation: doc.Op,
Fingerprint: fp,
Namespace: doc.Ns,
}
if s, ok = stats[key]; !ok {
realQuery, _ := util.GetQueryField(doc.Query)
s = &Stat{
ID: fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%s", key)))),
Operation: doc.Op,
Fingerprint: fp,
Namespace: doc.Ns,
TableScan: false,
Query: realQuery,
}
stats[key] = s
}
s.Count++
s.NScanned = append(s.NScanned, float64(doc.DocsExamined))
s.NReturned = append(s.NReturned, float64(doc.Nreturned))
s.QueryTime = append(s.QueryTime, float64(doc.Millis))
s.ResponseLength = append(s.ResponseLength, float64(doc.ResponseLength))
var zeroTime time.Time
if s.FirstSeen == zeroTime || s.FirstSeen.After(doc.Ts) {
s.FirstSeen = doc.Ts
}
if s.LastSeen == zeroTime || s.LastSeen.Before(doc.Ts) {
s.LastSeen = doc.Ts
}
}
}
}
p.statsChan <- statsToArray(stats)
}
func statsToArray(stats map[StatsGroupKey]*Stat) []Stat {
sa := make([]Stat, len(stats))
for _, s := range stats {
sa = append(sa, *s)
}
return sa
}

View File

@@ -12,6 +12,10 @@ import (
"gopkg.in/mgo.v2/bson"
)
var (
CANNOT_GET_QUERY_ERROR = errors.New("cannot get query field from the profile document (it is not a map)")
)
func GetReplicasetMembers(dialer pmgo.Dialer, di *pmgo.DialInfo) ([]proto.Members, error) {
hostnames, err := GetHostnames(dialer, di)
if err != nil {
@@ -232,3 +236,22 @@ func GetServerStatus(dialer pmgo.Dialer, di *pmgo.DialInfo, hostname string) (pr
return ss, nil
}
func GetQueryField(query map[string]interface{}) (map[string]interface{}, error) {
// MongoDB 3.0
if squery, ok := query["$query"]; ok {
// just an extra check to ensure this type assertion won't fail
if ssquery, ok := squery.(map[string]interface{}); ok {
return ssquery, nil
}
return nil, CANNOT_GET_QUERY_ERROR
}
// MongoDB 3.2+
if squery, ok := query["filter"]; ok {
if ssquery, ok := squery.(map[string]interface{}); ok {
return ssquery, nil
}
return nil, CANNOT_GET_QUERY_ERROR
}
return query, nil
}

View File

@@ -0,0 +1,23 @@
package filter
import (
"strings"
"github.com/percona/percona-toolkit/src/go/mongolib/proto"
)
type Filter func(proto.SystemProfile) bool
// This func receives a doc from the profiler and returns:
// true : the document must be considered
// false: the document must be skipped
func NewFilterByCollection(collectionsToSkip []string) func(proto.SystemProfile) bool {
return func(doc proto.SystemProfile) bool {
for _, collection := range collectionsToSkip {
if strings.HasSuffix(doc.Ns, collection) {
return false
}
}
return true
}
}

View File

@@ -1,25 +1,24 @@
package main
import (
"crypto/md5"
"encoding/json"
"errors"
"fmt"
"os"
"regexp"
"sort"
"strings"
"text/template"
"time"
"github.com/howeyc/gopass"
"github.com/kr/pretty"
"github.com/montanaflynn/stats"
"github.com/pborman/getopt"
"github.com/percona/percona-toolkit/src/go/lib/config"
"github.com/percona/percona-toolkit/src/go/lib/versioncheck"
"github.com/percona/percona-toolkit/src/go/mongolib/fingerprinter"
"github.com/percona/percona-toolkit/src/go/mongolib/profiler"
"github.com/percona/percona-toolkit/src/go/mongolib/proto"
"github.com/percona/percona-toolkit/src/go/mongolib/util"
"github.com/percona/percona-toolkit/src/go/pt-mongodb-query-digest/filter"
"github.com/percona/pmgo"
log "github.com/sirupsen/logrus"
"gopkg.in/mgo.v2"
@@ -28,7 +27,6 @@ import (
const (
TOOLNAME = "pt-mongodb-query-digest"
MAX_DEPTH_LEVEL = 10
DEFAULT_AUTHDB = "admin"
DEFAULT_HOST = "localhost:27017"
@@ -41,24 +39,8 @@ var (
Build string = "01-01-1980"
GoVersion string = "1.8"
Version string = "3.0.1"
CANNOT_GET_QUERY_ERROR = errors.New("cannot get query field from the profile document (it is not a map)")
// This is a regexp array to filter out the keys we don't want in the fingerprint
keyFilters = func() []string {
return []string{"^shardVersion$", "^\\$"}
}
)
type iter interface {
All(result interface{}) error
Close() error
Err() error
For(result interface{}, f func() error) (err error)
Next(result interface{}) bool
Timeout() bool
}
type options struct {
AuthDB string
Database string
@@ -82,42 +64,6 @@ type options struct {
// false: the document must be skipped
type docsFilter func(proto.SystemProfile) bool
type statsArray []stat
func (a statsArray) Len() int { return len(a) }
func (a statsArray) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a statsArray) Less(i, j int) bool { return a[i].Count < a[j].Count }
type times []time.Time
func (a times) Len() int { return len(a) }
func (a times) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a times) Less(i, j int) bool { return a[i].Before(a[j]) }
type stat struct {
ID string
Operation string
Fingerprint string
Namespace string
Query map[string]interface{}
Count int
TableScan bool
NScanned []float64
NReturned []float64
QueryTime []float64 // in milliseconds
ResponseLength []float64
LockTime times
BlockedTime times
FirstSeen time.Time
LastSeen time.Time
}
type groupKey struct {
Operation string
Fingerprint string
Namespace string
}
type statistics struct {
Pct float64
Total float64
@@ -215,36 +161,22 @@ func main() {
fmt.Println("Using those documents for the stats")
}
filters := []docsFilter{}
opts.SkipCollections = sanitizeSkipCollections(opts.SkipCollections)
filters := []filter.Filter{}
if len(opts.SkipCollections) > 0 {
// Sanitize the param. using --skip-collections="" will produce an 1 element array but
// that element will be empty. The same would be using --skip-collections=a,,d
cols := []string{}
for _, c := range opts.SkipCollections {
if strings.TrimSpace(c) != "" {
cols = append(cols, c)
}
}
if len(cols) > 0 {
// This func receives a doc from the profiler and returns:
// true : the document must be considered
// false: the document must be skipped
filterSystemProfile := func(doc proto.SystemProfile) bool {
for _, collection := range cols {
if strings.HasSuffix(doc.Ns, collection) {
return false
}
}
return true
}
filters = append(filters, filterSystemProfile)
}
filters = append(filters, filter.NewFilterByCollection(opts.SkipCollections))
}
query := bson.M{"op": bson.M{"$nin": []string{"getmore", "delete"}}}
i := session.DB(di.Database).C("system.profile").Find(query).Sort("-$natural").Iter()
queries := sortQueries(getData(i, filters), opts.OrderBy)
fp := fingerprinter.NewFingerprinter(fingerprinter.DEFAULT_KEY_FILTERS)
prof := profiler.NewProfiler(i, filters, nil, fp)
prof.Start()
queries := <-prof.StatsChan()
queries = sortQueries(queries, opts.OrderBy)
uptime := uptime(session)
@@ -306,9 +238,9 @@ func uptime(session pmgo.SessionManager) int64 {
return ss.Uptime
}
func calcTotalQueryStats(queries []stat, uptime int64) queryInfo {
func calcTotalQueryStats(queries []profiler.Stat, uptime int64) queryInfo {
qi := queryInfo{}
qs := stat{}
qs := profiler.Stat{}
_, totalScanned, totalReturned, totalQueryTime, totalBytes := calcTotals(queries)
for _, query := range queries {
qs.NScanned = append(qs.NScanned, query.NScanned...)
@@ -342,7 +274,7 @@ func calcTotalQueryStats(queries []stat, uptime int64) queryInfo {
return qi
}
func calcQueryStats(queries []stat, uptime int64) []queryInfo {
func calcQueryStats(queries []profiler.Stat, uptime int64) []queryInfo {
queryStats := []queryInfo{}
_, totalScanned, totalReturned, totalQueryTime, totalBytes := calcTotals(queries)
for rank, query := range queries {
@@ -383,9 +315,9 @@ func calcQueryStats(queries []stat, uptime int64) []queryInfo {
return queryStats
}
func getTotals(queries []stat) stat {
func getTotals(queries []profiler.Stat) profiler.Stat {
qt := stat{}
qt := profiler.Stat{}
for _, query := range queries {
qt.NScanned = append(qt.NScanned, query.NScanned...)
qt.NReturned = append(qt.NReturned, query.NReturned...)
@@ -396,7 +328,7 @@ func getTotals(queries []stat) stat {
}
func calcTotals(queries []stat) (totalCount int, totalScanned, totalReturned, totalQueryTime, totalBytes float64) {
func calcTotals(queries []profiler.Stat) (totalCount int, totalScanned, totalReturned, totalQueryTime, totalBytes float64) {
for _, query := range queries {
totalCount += query.Count
@@ -428,78 +360,6 @@ func calcStats(samples []float64) statistics {
return s
}
func getData(i iter, filters []docsFilter) []stat {
var doc proto.SystemProfile
stats := make(map[groupKey]*stat)
log.Debug(`Documents returned by db.getSiblinfDB("<dbnamehere>").system.profile.Find({"op": {"$nin": []string{"getmore", "delete"}}).Sort("-$natural")`)
for i.Next(&doc) && i.Err() == nil {
valid := true
for _, filter := range filters {
if filter(doc) == false {
valid = false
break
}
}
if !valid {
continue
}
log.Debugln("====================================================================================================")
log.Debug(pretty.Sprint(doc))
if len(doc.Query) > 0 {
fp, err := fingerprint(doc.Query)
if err != nil {
log.Errorf("cannot get fingerprint: %s", err.Error())
continue
}
var s *stat
var ok bool
key := groupKey{
Operation: doc.Op,
Fingerprint: fp,
Namespace: doc.Ns,
}
if s, ok = stats[key]; !ok {
realQuery, _ := getQueryField(doc.Query)
s = &stat{
ID: fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%s", key)))),
Operation: doc.Op,
Fingerprint: fp,
Namespace: doc.Ns,
TableScan: false,
Query: realQuery,
}
stats[key] = s
}
s.Count++
s.NScanned = append(s.NScanned, float64(doc.DocsExamined))
s.NReturned = append(s.NReturned, float64(doc.Nreturned))
s.QueryTime = append(s.QueryTime, float64(doc.Millis))
s.ResponseLength = append(s.ResponseLength, float64(doc.ResponseLength))
var zeroTime time.Time
if s.FirstSeen == zeroTime || s.FirstSeen.After(doc.Ts) {
s.FirstSeen = doc.Ts
}
if s.LastSeen == zeroTime || s.LastSeen.Before(doc.Ts) {
s.LastSeen = doc.Ts
}
}
}
// We need to sort the data but a hash cannot be sorted so, convert the hash having
// the results to a slice
sa := statsArray{}
for _, s := range stats {
sa = append(sa, *s)
}
sort.Sort(sa)
return sa
}
func getOptions() (*options, error) {
opts := &options{
Host: DEFAULT_HOST,
@@ -600,121 +460,6 @@ func getDialInfo(opts *options) *pmgo.DialInfo {
return pmgoDialInfo
}
func getQueryField(query map[string]interface{}) (map[string]interface{}, error) {
// MongoDB 3.0
if squery, ok := query["$query"]; ok {
// just an extra check to ensure this type assertion won't fail
if ssquery, ok := squery.(map[string]interface{}); ok {
return ssquery, nil
}
return nil, CANNOT_GET_QUERY_ERROR
}
// MongoDB 3.2+
if squery, ok := query["filter"]; ok {
if ssquery, ok := squery.(map[string]interface{}); ok {
return ssquery, nil
}
return nil, CANNOT_GET_QUERY_ERROR
}
return query, nil
}
// Query is the top level map query element
// Example for MongoDB 3.2+
// "query" : {
// "find" : "col1",
// "filter" : {
// "s2" : {
// "$lt" : "54701",
// "$gte" : "73754"
// }
// },
// "sort" : {
// "user_id" : 1
// }
// }
func fingerprint(query map[string]interface{}) (string, error) {
realQuery, err := getQueryField(query)
if err != nil {
// Try to encode doc.Query as json for prettiness
if buf, err := json.Marshal(realQuery); err == nil {
return "", fmt.Errorf("%v for query %s", err, string(buf))
}
// If we cannot encode as json, return just the error message without the query
return "", err
}
retKeys := keys(realQuery, 0)
sort.Strings(retKeys)
// if there is a sort clause in the query, we have to add all fields in the sort
// fields list that are not in the query keys list (retKeys)
if sortKeys, ok := query["sort"]; ok {
if sortKeysMap, ok := sortKeys.(map[string]interface{}); ok {
sortKeys := mapKeys(sortKeysMap, 0)
for _, sortKey := range sortKeys {
if !inSlice(sortKey, retKeys) {
retKeys = append(retKeys, sortKey)
}
}
}
}
return strings.Join(retKeys, ","), nil
}
func inSlice(str string, list []string) bool {
for _, v := range list {
if v == str {
return true
}
}
return false
}
func keys(query map[string]interface{}, level int) []string {
ks := []string{}
for key, value := range query {
if shouldSkipKey(key) {
continue
}
ks = append(ks, key)
if m, ok := value.(map[string]interface{}); ok {
level++
if level <= MAX_DEPTH_LEVEL {
ks = append(ks, keys(m, level)...)
}
}
}
sort.Strings(ks)
return ks
}
func mapKeys(query map[string]interface{}, level int) []string {
ks := []string{}
for key, value := range query {
ks = append(ks, key)
if m, ok := value.(map[string]interface{}); ok {
level++
if level <= MAX_DEPTH_LEVEL {
ks = append(ks, keys(m, level)...)
}
}
}
sort.Strings(ks)
return ks
}
func shouldSkipKey(key string) bool {
for _, filter := range keyFilters() {
if matched, _ := regexp.MatchString(filter, key); matched {
return true
}
}
return false
}
func printHeader(opts *options) {
fmt.Printf("%s - %s\n", TOOLNAME, time.Now().Format(time.RFC1123Z))
fmt.Printf("Host: %s\n", opts.Host)
@@ -760,15 +505,15 @@ func getTotalsTemplate() string {
return t
}
type lessFunc func(p1, p2 *stat) bool
type lessFunc func(p1, p2 *profiler.Stat) bool
type multiSorter struct {
queries []stat
queries []profiler.Stat
less []lessFunc
}
// Sort sorts the argument slice according to the less functions passed to OrderedBy.
func (ms *multiSorter) Sort(queries []stat) {
func (ms *multiSorter) Sort(queries []profiler.Stat) {
ms.queries = queries
sort.Sort(ms)
}
@@ -817,23 +562,23 @@ func (ms *multiSorter) Less(i, j int) bool {
return ms.less[k](p, q)
}
func sortQueries(queries []stat, orderby []string) []stat {
func sortQueries(queries []profiler.Stat, orderby []string) []profiler.Stat {
sortFuncs := []lessFunc{}
for _, field := range orderby {
var f lessFunc
switch field {
//
case "count":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
return c1.Count < c2.Count
}
case "-count":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
return c1.Count > c2.Count
}
case "ratio":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
ns1, _ := stats.Max(c1.NScanned)
ns2, _ := stats.Max(c2.NScanned)
nr1, _ := stats.Max(c1.NReturned)
@@ -843,7 +588,7 @@ func sortQueries(queries []stat, orderby []string) []stat {
return ratio1 < ratio2
}
case "-ratio":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
ns1, _ := stats.Max(c1.NScanned)
ns2, _ := stats.Max(c2.NScanned)
nr1, _ := stats.Max(c1.NReturned)
@@ -855,13 +600,13 @@ func sortQueries(queries []stat, orderby []string) []stat {
//
case "query-time":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
qt1, _ := stats.Max(c1.QueryTime)
qt2, _ := stats.Max(c2.QueryTime)
return qt1 < qt2
}
case "-query-time":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
qt1, _ := stats.Max(c1.QueryTime)
qt2, _ := stats.Max(c2.QueryTime)
return qt1 > qt2
@@ -869,13 +614,13 @@ func sortQueries(queries []stat, orderby []string) []stat {
//
case "docs-scanned":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
ns1, _ := stats.Max(c1.NScanned)
ns2, _ := stats.Max(c2.NScanned)
return ns1 < ns2
}
case "-docs-scanned":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
ns1, _ := stats.Max(c1.NScanned)
ns2, _ := stats.Max(c2.NScanned)
return ns1 > ns2
@@ -883,13 +628,13 @@ func sortQueries(queries []stat, orderby []string) []stat {
//
case "docs-returned":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
nr1, _ := stats.Max(c1.NReturned)
nr2, _ := stats.Max(c2.NReturned)
return nr1 < nr2
}
case "-docs-returned":
f = func(c1, c2 *stat) bool {
f = func(c1, c2 *profiler.Stat) bool {
nr1, _ := stats.Max(c1.NReturned)
nr2, _ := stats.Max(c2.NReturned)
return nr1 > nr2
@@ -951,3 +696,17 @@ func isReplicasetEnabled(session pmgo.SessionManager) bool {
}
return true
}
// Sanitize the param. using --skip-collections="" will produce an 1 element array but
// that element will be empty. The same would be using --skip-collections=a,,d
func sanitizeSkipCollections(skipCollections []string) []string {
cols := []string{}
if len(skipCollections) > 0 {
for _, c := range skipCollections {
if strings.TrimSpace(c) != "" {
cols = append(cols, c)
}
}
}
return cols
}

Binary file not shown.