From 10fbe646491af43460229733fe8a691e991fcdaa Mon Sep 17 00:00:00 2001 From: Carlos Salguero Date: Sat, 2 Apr 2022 19:39:35 -0300 Subject: [PATCH] PT-1978 WIP --- go.mod | 5 +- go.sum | 5 + .../indexes/duplicated.go | 96 +++++++++++++++ .../indexes/duplicated_test.go | 113 ++++++++++++++++++ .../pt-mongodb-index-check/indexes/unused.go | 46 +++++++ .../indexes/unused_test.go | 76 ++++++++++++ src/go/pt-mongodb-index-check/main.go | 60 ++++++++++ .../templates/duplicated_indexes.go | 16 +++ 8 files changed, 415 insertions(+), 2 deletions(-) create mode 100644 src/go/pt-mongodb-index-check/indexes/duplicated.go create mode 100644 src/go/pt-mongodb-index-check/indexes/duplicated_test.go create mode 100644 src/go/pt-mongodb-index-check/indexes/unused.go create mode 100644 src/go/pt-mongodb-index-check/indexes/unused_test.go create mode 100644 src/go/pt-mongodb-index-check/main.go create mode 100644 src/go/pt-mongodb-summary/templates/duplicated_indexes.go diff --git a/go.mod b/go.mod index 01194fc6..d8a0a613 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/shirou/gopsutil v2.20.8+incompatible github.com/sirupsen/logrus v1.6.0 - github.com/stretchr/testify v1.6.1 + github.com/stretchr/testify v1.7.0 go.mongodb.org/mongo-driver v1.7.1 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 @@ -29,6 +29,7 @@ require ( require ( github.com/AlekSi/pointer v1.2.0 // indirect github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 // indirect + github.com/alecthomas/kong v0.5.0 // indirect github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/alecthomas/units v0.0.0-20210208195552-ff826a37aa15 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -59,7 +60,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.62.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect + gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect k8s.io/apimachinery v0.18.6 // indirect k8s.io/klog v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v3 v3.0.0 // indirect diff --git a/go.sum b/go.sum index 3ecdc949..90edf12f 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,9 @@ github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 h1:fLjPD/aNc3UIO github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/alecthomas/kingpin v2.2.6+incompatible h1:5svnBTFgJjZvGKyYBtMB0+m5wvrbUHiqye8wRJMlnYI= github.com/alecthomas/kingpin v2.2.6+incompatible/go.mod h1:59OFYbFVLKQKq+mqrL6Rw5bR0c3ACQaawgXx0QYndlE= +github.com/alecthomas/kong v0.5.0 h1:u8Kdw+eeml93qtMZ04iei0CFYve/WPcA5IFh+9wSskE= +github.com/alecthomas/kong v0.5.0/go.mod h1:uzxf/HUh0tj43x1AyJROl3JT7SgsZ5m+icOv1csRhc0= +github.com/alecthomas/repr v0.0.0-20210801044451-80ca428c5142/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20210208195552-ff826a37aa15 h1:AUNCr9CiJuwrRYS3XieqF+Z9B9gNxo/eANAJCF2eiN4= @@ -172,6 +175,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= @@ -258,6 +262,7 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.18.6 h1:osqrAXbOQjkKIWDTjrqxWQ3w0GkKb1KA1XkUGHHYpeE= k8s.io/api v0.18.6/go.mod h1:eeyxr+cwCjMdLAmr2W3RyDI0VvTawSg/3RFFBEnmZGI= k8s.io/apimachinery v0.18.6 h1:RtFHnfGNfd1N0LeSrKCUznz5xtUP1elRGvHJbL3Ntag= diff --git a/src/go/pt-mongodb-index-check/indexes/duplicated.go b/src/go/pt-mongodb-index-check/indexes/duplicated.go new file mode 100644 index 00000000..b69a9525 --- /dev/null +++ b/src/go/pt-mongodb-index-check/indexes/duplicated.go @@ -0,0 +1,96 @@ +package indexes + +import ( + "context" + "log" + "sort" + "strings" + + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" +) + +type collectionIndex struct { + Name string `bson:"name"` + Namespace string `bson:"ns"` + V int `bson:"v"` + Key primitive.D `bson:"key"` +} + +func (di collectionIndex) ComparableKey() string { + str := "" + for _, elem := range di.Key { + sign := "+" + if elem.Value.(int32) < 0 { + sign = "-" + } + str += sign + elem.Key + } + return str +} + +// IndexKey holds the list of fields that are part of an index, along with the field order. +type IndexKey []primitive.E + +// String returns the index fields as a string. The + sign means ascending on this field +// and a - sign indicates a descending order for that field. +func (di IndexKey) String() string { + str := "" + for _, elem := range di { + sign := "+" + if elem.Value.(int32) < 0 { + sign = "-" + } + str += sign + elem.Key + " " + } + + return str +} + +// DuplicateIndex represents a duplicated index pair. +// An index is considered as the duplicate of another one if it is it's prefix. +// Example: the index +f1-f2 is the prefix of +f1-f2+f3. +type Duplicate struct { + Namespace string + Name string + Key IndexKey + ContainerName string + ContainerKey IndexKey +} + +func FindDuplicated(ctx context.Context, client *mongo.Client, database, collection string) ([]Duplicate, error) { + di := []Duplicate{} + + cursor, err := client.Database(database).Collection(collection).Indexes().List(ctx, nil) + if err != nil { + return nil, err + } + + var results []collectionIndex + if err = cursor.All(context.TODO(), &results); err != nil { + log.Fatal(err) + } + + sort.Slice(results, func(i, j int) bool { + return results[i].ComparableKey() < results[j].ComparableKey() + }) + + for i := 0; i < len(results)-1; i++ { + for j := i + 1; j < len(results); j++ { + if strings.HasPrefix(results[j].ComparableKey(), results[i].ComparableKey()) { + idx := Duplicate{ + Namespace: database + "." + collection, + Name: results[i].Name, + Key: make([]primitive.E, len(results[i].Key)), + ContainerName: results[j].Name, + ContainerKey: make([]primitive.E, len(results[j].Key)), + } + copy(idx.Key, results[i].Key) + copy(idx.ContainerKey, results[j].Key) + di = append(di, idx) + } + } + } + + return di, nil +} diff --git a/src/go/pt-mongodb-index-check/indexes/duplicated_test.go b/src/go/pt-mongodb-index-check/indexes/duplicated_test.go new file mode 100644 index 00000000..e94f782f --- /dev/null +++ b/src/go/pt-mongodb-index-check/indexes/duplicated_test.go @@ -0,0 +1,113 @@ +package indexes + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/AlekSi/pointer" + tu "github.com/percona/percona-toolkit/src/go/internal/testutils" + "github.com/stretchr/testify/assert" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "gopkg.in/mgo.v2/bson" +) + +func TestDuplicateIndexes(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + client, err := tu.TestClient(ctx, tu.MongoDBShard1PrimaryPort) + if err != nil { + t.Fatalf("cannot get a new MongoDB client: %s", err) + } + + dbname := "test_db" + collname := "test_col" + + database := client.Database(dbname) + database.Drop(ctx) //nolint:errcheck + defer database.Drop(ctx) //nolint:errcheck + + _, err = database.Collection(collname).InsertOne(ctx, bson.M{"f1": 1, "f2": "2", "f3": "a", "f4": "c"}) + assert.NoError(t, err) + + testCases := []primitive.D{ + {{"f1", 1}, {"f2", -1}, {"f3", 1}, {"f4", 1}}, + {{"f1", 1}, {"f2", -1}, {"f3", 1}, {"f4", 1}}, // this will throw a duplicate index error + {{"f1", 1}, {"f2", -1}, {"f3", 1}}, + {{"f1", 1}, {"f2", -1}}, + {{"f3", -1}}, + } + + errCount := 0 + for i, tc := range testCases { + mod := mongo.IndexModel{ + Keys: tc, + Options: &options.IndexOptions{ + Name: pointer.ToString(fmt.Sprintf("idx_%02d", i)), + }, + } + _, err := database.Collection(collname).Indexes().CreateOne(ctx, mod) + if err != nil { + errCount++ + } + } + /* + At this point we have 5 indexes: _id: (MongoDB's default), idx_00, idx_02, idx_03, idx_04. + idx_01 wasn't created since it duplicates idx_00 and errCount=1. + */ + + assert.Equal(t, 1, errCount) + + want := []DuplicateIndex{ + { + Name: "idx_03", + Key: IndexKey{ + {Key: "f1", Value: int32(1)}, + {Key: "f2", Value: int32(-1)}, + }, + ContainerName: "idx_02", + ContainerKey: IndexKey{ + {Key: "f1", Value: int32(1)}, + {Key: "f2", Value: int32(-1)}, + {Key: "f3", Value: int32(1)}, + }, + }, + { + Name: "idx_03", + Key: IndexKey{ + {Key: "f1", Value: int32(1)}, + {Key: "f2", Value: int32(-1)}, + }, + ContainerName: "idx_00", + ContainerKey: IndexKey{ + {Key: "f1", Value: int32(1)}, + {Key: "f2", Value: int32(-1)}, + {Key: "f3", Value: int32(1)}, + {Key: "f4", Value: int32(1)}, + }, + }, + { + Name: "idx_02", + Key: IndexKey{ + {Key: "f1", Value: int32(1)}, + {Key: "f2", Value: int32(-1)}, + {Key: "f3", Value: int32(1)}, + }, + ContainerName: "idx_00", + ContainerKey: IndexKey{ + {Key: "f1", Value: int32(1)}, + {Key: "f2", Value: int32(-1)}, + {Key: "f3", Value: int32(1)}, + {Key: "f4", Value: int32(1)}, + }, + }, + } + + di, err := FindDuplicatedIndexes(ctx, client, dbname, collname) + assert.NoError(t, err) + assert.Equal(t, want, di) +} diff --git a/src/go/pt-mongodb-index-check/indexes/unused.go b/src/go/pt-mongodb-index-check/indexes/unused.go new file mode 100644 index 00000000..a7d7a5dc --- /dev/null +++ b/src/go/pt-mongodb-index-check/indexes/unused.go @@ -0,0 +1,46 @@ +package indexes + +import ( + "context" + + "github.com/pkg/errors" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" +) + +// IndexStat hold an index usage statistics. +type IndexStat struct { + Accesses struct { + Ops int64 `bson:"ops"` + Since primitive.DateTime `bson:"since"` + } `bson:"accesses"` + Spec struct { + Name string `bson:"name"` + Namespace string `bson:"ns"` + V int32 `bson:"v"` + Key primitive.D `bson:"key"` + } `bson:"spec"` + Name string `bson:"name"` + Key primitive.D `bson:"key"` + Host string `bson:"host"` +} + +// FindUnusedIndexes returns a list of unused indexes for the given database and collection. +func FindUnusedIndexes(ctx context.Context, client *mongo.Client, database, collection string) ([]IndexStat, error) { + aggregation := mongo.Pipeline{ + {{Key: "$indexStats", Value: primitive.M{}}}, + {{Key: "$match", Value: primitive.M{"accesses.ops": 0}}}, + } + + cursor, err := client.Database(database).Collection(collection).Aggregate(ctx, aggregation) + if err != nil { + return nil, errors.Wrap(err, "cannot run $indexStats for unused indexes") + } + + var stats []IndexStat + if err = cursor.All(ctx, &stats); err != nil { + return nil, errors.Wrap(err, "cannot get $indexStats for unused indexes") + } + + return stats, nil +} diff --git a/src/go/pt-mongodb-index-check/indexes/unused_test.go b/src/go/pt-mongodb-index-check/indexes/unused_test.go new file mode 100644 index 00000000..6f39268d --- /dev/null +++ b/src/go/pt-mongodb-index-check/indexes/unused_test.go @@ -0,0 +1,76 @@ +package indexes + +import ( + "context" + "fmt" + "math/rand" + "sort" + "testing" + "time" + + "github.com/AlekSi/pointer" + tu "github.com/percona/percona-toolkit/src/go/internal/testutils" + "github.com/stretchr/testify/assert" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "gopkg.in/mgo.v2/bson" +) + +func TestUnusedIndexes(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + client, err := tu.TestClient(ctx, tu.MongoDBShard1PrimaryPort) + if err != nil { + t.Fatalf("cannot get a new MongoDB client: %s", err) + } + + dbname := "test_db" + collname := "test_col" + + database := client.Database(dbname) + database.Drop(ctx) //nolint:errcheck + defer database.Drop(ctx) //nolint:errcheck + + testCases := []primitive.D{ + {{"f1", 1}, {"f2", -1}, {"f3", 1}, {"f4", 1}}, + {{"f3", -1}}, + {{"f4", -1}}, + } + + errCount := 0 + for i, tc := range testCases { + mod := mongo.IndexModel{ + Keys: tc, + Options: &options.IndexOptions{ + Name: pointer.ToString(fmt.Sprintf("idx_%02d", i)), + }, + } + _, err := database.Collection(collname).Indexes().CreateOne(ctx, mod) + if err != nil { + errCount++ + } + } + + for i := 0; i < 100; i++ { + _, err = database.Collection(collname).InsertOne(ctx, + bson.M{"f1": rand.Int63n(1000), "f2": rand.Int63n(1000), "f3": rand.Int63n(1000), "f4": rand.Int63n(1000)}) + assert.NoError(t, err) + } + + want := []string{"_id_", "idx_00", "idx_01", "idx_02"} + + ui, err := FindUnusedIndexes(ctx, client, dbname, collname) + assert.NoError(t, err) + + got := make([]string, 0, len(ui)) + for _, idx := range ui { + // compare only names because the index struct has a timestamp in it and it is variable. + got = append(got, idx.Name) + } + + sort.Strings(got) + + assert.Equal(t, want, got) +} diff --git a/src/go/pt-mongodb-index-check/main.go b/src/go/pt-mongodb-index-check/main.go new file mode 100644 index 00000000..f595d1d8 --- /dev/null +++ b/src/go/pt-mongodb-index-check/main.go @@ -0,0 +1,60 @@ +package main + +import ( + "context" + "fmt" + "log" + "time" + + "github.com/alecthomas/kong" + "github.com/percona/percona-toolkit/src/go/pt-mongodb-index-check/indexes" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +type cmdlineArgs struct { + CheckUnused struct{} `cmd:"" name:"check-unused" help:"Check for unused indexes."` + CheckDuplicated struct{} `cmd:"" name:"check-duplicates" help:"Check for duplicated indexes."` + CheckAll struct{} `cmd:"" name:"check-all" help:"Check for unused and duplicated indexes."` + ShowHelp struct{} `cmd:"" default:"1"` + + AllDatabases bool `name:"all-databases" xor:"db" help:"Check in all databases excluding system dbs"` + Databases []string `name:"databases" xor:"db" help:"Comma separated list of databases to check"` + + AllCollections bool `name:"all-collections" xor:"colls" help:"Check in all collections in the selected databases."` + Collections []string `name:"collections" xor:"colls" help:"Comma separated list of collections to check"` + URI string `name:"mongodb.uri" help:"Connection URI"` +} + +type response struct { + Unused []indexes.IndexStat + Duplicated []indexes.Duplicate +} + +func main() { + var args cmdlineArgs + kongctx := kong.Parse(&args, kong.UsageOnError()) + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + client, err := mongo.Connect(ctx, options.Client().ApplyURI(args.URI)) + if err != nil { + log.Fatalf("Cannot connect to the database: %q", err) + } + + resp := response{} + + switch kongctx.Command() { + case "list-unused": + for _, database := range args.Databases { + for _, collection := range args.Collections { + dups, err = indexes.FindDuplicated(ctx, client, database, collection) + } + } + fmt.Printf("databases: %v\n", args.Databases) + case "list-duplicates": + default: + kong.DefaultHelpPrinter(kong.HelpOptions{}, kongctx) + } +} diff --git a/src/go/pt-mongodb-summary/templates/duplicated_indexes.go b/src/go/pt-mongodb-summary/templates/duplicated_indexes.go new file mode 100644 index 00000000..c9debb1b --- /dev/null +++ b/src/go/pt-mongodb-summary/templates/duplicated_indexes.go @@ -0,0 +1,16 @@ +package templates + +const Security = ` +# Security ############################################################################################### +Users : {{.Users}} +Roles : {{.Roles}} +Auth : {{.Auth}} +SSL : {{.SSL}} +Port : {{.Port}} +Bind IP: {{.BindIP}} +{{- if .WarningMsgs -}} +{{- range .WarningMsgs }} +{{ . }} +{{end}} +{{end }} + `