Compare commits

...

16 Commits

Author SHA1 Message Date
Max Dudin
6596920085 Merge pull request #452 from percona/CLOUD-535
PT-1865, PT-1866 Added pt-k8s-debug-collector
2020-08-05 18:41:48 +03:00
Max Dudin
08c9621c14 PT-1865 fix typo 2020-08-05 18:00:30 +03:00
Max Dudin
8ed3b1cdc9 CLOUD-535 Updated Gopkg.lock 2020-08-05 14:01:34 +03:00
Max Dudin
61ba3c729e CLOUD-535 gopath added, changed suumary func name 2020-08-05 13:59:46 +03:00
Max Dudin
212d2032ed PT-1865 small changes 2020-08-04 00:00:54 +03:00
Max Dudin
9b90408451 PT-1865 Changed tool name 2020-08-03 20:39:12 +03:00
Max Dudin
0d2a2f8fdc PT-1865 Updated README 2020-07-17 14:29:17 +03:00
Max Dudin
309bb24f8a PT-1865 Added pt-mysql-summury and pt-mobgodb-summary 2020-07-17 14:28:13 +03:00
Max Dudin
57c7769c5b PT-1865 Update readme, rename tool 2020-07-07 12:06:01 +03:00
Max Dudin
dbb1982321 PT-1865 Clean-up 2020-07-06 12:19:32 +03:00
Max Dudin
36e9a2f07f PT-1865 add README file 2020-07-03 12:12:45 +03:00
Max Dudin
118110a671 PT-1865 small changes 2020-07-03 12:02:30 +03:00
Max Dudin
faffa70867 PT-1865 Remove archive package 2020-07-01 10:53:28 +03:00
Max Dudin
a65dbefbd9 CLOUD-535 Add contexts to errors, small fixes 2020-06-19 16:22:24 +03:00
Max Dudin
1d6da2fdca CLOUD-535 Rework archive and dumper 2020-06-19 15:53:55 +03:00
Max Dudin
136e506549 CLOUD-535 Add cluster debug collector 2020-06-17 16:14:55 +03:00
4 changed files with 548 additions and 1 deletions

115
Gopkg.lock generated
View File

@@ -52,6 +52,14 @@
revision = "c85607071cf08ca1adaf48319cd1aa322e81d8c1"
version = "v1.42.0"
[[projects]]
digest = "1:fd2ee29b7807f198e72dbd6371267b34d05aa83151c8c81b2ade14854e50f4ee"
name = "github.com/go-logr/logr"
packages = ["."]
pruneopts = ""
revision = "d18fcbf02861580d05a1f23601145b272c4e7b4b"
version = "v0.2.0"
[[projects]]
digest = "1:b6581f9180e0f2d5549280d71819ab951db9d511478c87daca95669589d505c0"
name = "github.com/go-ole/go-ole"
@@ -63,6 +71,17 @@
revision = "97b6244175ae18ea6eef668034fd6565847501c9"
version = "v1.2.4"
[[projects]]
digest = "1:d69d2ba23955582a64e367ff2b0808cdbd048458c178cea48f11ab8c40bd7aea"
name = "github.com/gogo/protobuf"
packages = [
"proto",
"sortkeys",
]
pruneopts = ""
revision = "5628607bb4c51c3157aacc3a50f0ab707582b805"
version = "v1.3.1"
[[projects]]
digest = "1:530233672f656641b365f8efb38ed9fba80e420baff2ce87633813ab3755ed6d"
name = "github.com/golang/mock"
@@ -71,6 +90,14 @@
revision = "51421b967af1f557f93a59e0057aaf15ca02e29c"
version = "v1.2.0"
[[projects]]
digest = "1:16ecf9e89b8b1310d9566a53484c31c5241bb47c32162eba780b46c0dfb58fef"
name = "github.com/google/gofuzz"
packages = ["."]
pruneopts = ""
revision = "db92cf7ae75e4a7a28abc005addab2b394362888"
version = "v1.1.0"
[[projects]]
branch = "master"
digest = "1:b759103c9b4135568253c17d2866064cde398e93764b611caabf5aa8e3059685"
@@ -216,7 +243,13 @@
branch = "master"
digest = "1:adcb9e84ce154ef1d45851b57c40f8a211db3e36373a65b7c4f10c79b7428718"
name = "golang.org/x/net"
packages = ["context"]
packages = [
"context",
"http/httpguts",
"http2",
"http2/hpack",
"idna",
]
pruneopts = ""
revision = "74de082e2cca95839e88aa0aeee5aadf6ce7710f"
@@ -231,6 +264,39 @@
pruneopts = ""
revision = "9eb1bfa1ce65ae8a6ff3114b0aaf9a41a6cf3560"
[[projects]]
digest = "1:fccda34e4c58111b1908d8d69bf8d57c41c8e2542bc18ec8cd38c4fa21057f71"
name = "golang.org/x/text"
packages = [
"collate",
"collate/build",
"internal/colltab",
"internal/gen",
"internal/language",
"internal/language/compact",
"internal/tag",
"internal/triegen",
"internal/ucd",
"language",
"secure/bidirule",
"transform",
"unicode/bidi",
"unicode/cldr",
"unicode/norm",
"unicode/rangetable",
]
pruneopts = ""
revision = "23ae387dee1f90d29a23c0e87ee0b46038fbed0e"
version = "v0.3.3"
[[projects]]
digest = "1:75fb3fcfc73a8c723efde7777b40e8e8ff9babf30d8c56160d01beffea8a95a6"
name = "gopkg.in/inf.v0"
packages = ["."]
pruneopts = ""
revision = "d2d2541c53f18d2a059457998ce2876cc8e67cbf"
version = "v0.9.1"
[[projects]]
branch = "v2"
digest = "1:f54ba71a035aac92ced3e902d2bff3734a15d1891daff73ec0f90ef236750139"
@@ -254,6 +320,52 @@
pruneopts = ""
revision = "d5d1b5820637886def9eef33e03a27a9f166942c"
[[projects]]
digest = "1:a249e341b9bf261a982ab262c69f08223e839302d0a21cfe6e00f2ef2e8695a2"
name = "k8s.io/api"
packages = ["core/v1"]
pruneopts = ""
revision = "f822fed505d4c9dd4eb2c5f4ca2f4c49c19ea394"
version = "v0.18.6"
[[projects]]
digest = "1:74eeecf1188777314a92348555adcb977912d530269130143daf7fc0e80bb512"
name = "k8s.io/apimachinery"
packages = [
"pkg/api/resource",
"pkg/apis/meta/v1",
"pkg/conversion",
"pkg/conversion/queryparams",
"pkg/fields",
"pkg/labels",
"pkg/runtime",
"pkg/runtime/schema",
"pkg/selection",
"pkg/types",
"pkg/util/errors",
"pkg/util/intstr",
"pkg/util/json",
"pkg/util/naming",
"pkg/util/net",
"pkg/util/runtime",
"pkg/util/sets",
"pkg/util/validation",
"pkg/util/validation/field",
"pkg/watch",
"third_party/forked/golang/reflect",
]
pruneopts = ""
revision = "fbe88689c3c2735e949f67884a4f58cb99379159"
version = "v0.17.9"
[[projects]]
digest = "1:5ad0a3bf1b13f9b8bd99f4079c635cb813d87b70db65b98fe5503762e1d39735"
name = "k8s.io/klog"
packages = ["."]
pruneopts = ""
revision = "b5c3182dac44f851522e32c97c86ac32755c296d"
version = "v2.3.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
@@ -280,6 +392,7 @@
"gopkg.in/mgo.v2",
"gopkg.in/mgo.v2/bson",
"gopkg.in/mgo.v2/dbtest",
"k8s.io/api/core/v1",
]
solver-name = "gps-cdcl"
solver-version = 1

View File

@@ -0,0 +1,44 @@
# Debug collector tool
Collects debug data (logs, resource statuses etc.) from a k8s/opeshift cluster. Data packed into "cluster-dump.tar.gz" archive in the current working directory.
### Data that will be collected
"pods",
"replicasets",
"deployments",
"statefulsets",
"replicationcontrollers",
"events",
"configmaps",
"secrets",
"cronjobs",
"jobs",
"podsecuritypolicies",
"poddisruptionbudgets",
"perconaxtradbbackups",
"perconaxtradbclusterbackups",
"perconaxtradbclusterrestores",
"perconaxtradbclusters",
"clusterrolebindings",
"clusterroles",
"rolebindings",
"roles",
"storageclasses",
"persistentvolumeclaims",
"persistentvolumes",
"modes",
"your-custom-resource" (depend on 'resource' flag)
### Usage
`pt-k8s-debug-collector <flags>`
Flags:
`--resource` targeted custom resource name (default "pxc")
`--namespace` targeted namespace. By default data will be collected from all namespaces
`--cluster` targeted pxc/psmdb cluster. By default data from all available clusters to be collected
### Requirements
Installed and configured 'kubectl'
Installed and configured 'pt-mysql-summary'
Installed and configured 'pt-mongodb-summary'

View File

@@ -0,0 +1,355 @@
package dumper
import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/base64"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
)
// Dumper struct is for dumping cluster
type Dumper struct {
cmd string
resources []string
namespace string
location string
errors string
mode int64
crType string
}
// New return new Dumper object
func New(location, namespace, resource string) Dumper {
resources := []string{
"pods",
"replicasets",
"deployments",
"statefulsets",
"replicationcontrollers",
"events",
"configmaps",
"secrets",
"cronjobs",
"jobs",
"podsecuritypolicies",
"poddisruptionbudgets",
"perconaxtradbbackups",
"perconaxtradbclusterbackups",
"perconaxtradbclusterrestores",
"perconaxtradbclusters",
"clusterrolebindings",
"clusterroles",
"rolebindings",
"roles",
"storageclasses",
"persistentvolumeclaims",
"persistentvolumes",
}
if len(resource) > 0 {
resources = append(resources, resource)
}
return Dumper{
cmd: "kubectl",
resources: resources,
location: "cluster-dump",
mode: int64(0777),
namespace: namespace,
crType: resource,
}
}
type k8sPods struct {
Items []corev1.Pod `json:"items"`
}
type namespaces struct {
Items []corev1.Namespace `json:"items"`
}
// DumpCluster create dump of a cluster in Dumper.location
func (d *Dumper) DumpCluster() error {
file, err := os.Create(d.location + ".tar.gz")
if err != nil {
return errors.Wrap(err, "create tar file")
}
zr := gzip.NewWriter(file)
tw := tar.NewWriter(zr)
defer func() {
err = addToArchive(d.location+"/errors.txt", d.mode, []byte(d.errors), tw)
if err != nil {
log.Println("Error: add errors.txt to archive:", err)
}
err = tw.Close()
if err != nil {
log.Println("close tar writer", err)
return
}
err = zr.Close()
if err != nil {
log.Println("close gzip writer", err)
return
}
err = file.Close()
if err != nil {
log.Println("close file", err)
return
}
}()
var nss namespaces
if len(d.namespace) > 0 {
ns := corev1.Namespace{}
ns.Name = d.namespace
nss.Items = append(nss.Items, ns)
} else {
args := []string{"get", "namespaces", "-o", "json"}
output, err := d.runCmd(args...)
if err != nil {
d.logError(err.Error(), args...)
return errors.Wrap(err, "get namespaces")
}
err = json.Unmarshal(output, &nss)
if err != nil {
d.logError(err.Error(), "unmarshal namespaces")
return errors.Wrap(err, "unmarshal namespaces")
}
}
for _, ns := range nss.Items {
args := []string{"get", "pods", "-o", "json", "--namespace", ns.Name}
output, err := d.runCmd(args...)
if err != nil {
d.logError(err.Error(), args...)
continue
}
var pods k8sPods
err = json.Unmarshal(output, &pods)
if err != nil {
d.logError(err.Error(), "unmarshal pods from namespace", ns.Name)
log.Printf("Error: unmarshal pods in namespace %s: %v", ns.Name, err)
}
for _, pod := range pods.Items {
location := filepath.Join(d.location, ns.Name, pod.Name, "logs.txt")
args := []string{"logs", pod.Name, "--namespace", ns.Name, "--all-containers"}
output, err = d.runCmd(args...)
if err != nil {
d.logError(err.Error(), args...)
err = addToArchive(location, d.mode, []byte(err.Error()), tw)
if err != nil {
log.Printf("Error: create archive with logs for pod %s in namespace %s: %v", pod.Name, ns.Name, err)
}
continue
}
err = addToArchive(location, d.mode, output, tw)
if err != nil {
d.logError(err.Error(), "create archive for pod "+pod.Name)
log.Printf("Error: create archive for pod %s: %v", pod.Name, err)
}
if len(pod.Labels) == 0 {
continue
}
location = filepath.Join(d.location, ns.Name, pod.Name, "/pt-summary.txt")
component := d.crType
if d.crType == "psmdb" {
component = "mongod"
}
if pod.Labels["app.kubernetes.io/component"] == component {
output, err = d.getPodSummary(d.crType, pod.Name, pod.Labels["app.kubernetes.io/instance"], tw)
if err != nil {
d.logError(err.Error(), d.crType, pod.Name)
err = addToArchive(location, d.mode, []byte(err.Error()), tw)
if err != nil {
log.Printf("Error: create pt-summary errors archive for pod %s in namespace %s: %v", pod.Name, ns.Name, err)
}
continue
}
err = addToArchive(location, d.mode, output, tw)
if err != nil {
d.logError(err.Error(), "create pt-summary archive for pod "+pod.Name)
log.Printf("Error: create pt-summary archive for pod %s: %v", pod.Name, err)
}
}
}
for _, resource := range d.resources {
err = d.getResource(resource, ns.Name, tw)
if err != nil {
log.Printf("Error: get %s resource: %v", resource, err)
}
}
}
err = d.getResource("nodes", "", tw)
if err != nil {
return errors.Wrapf(err, "get nodes")
}
return nil
}
// runCmd run command (Dumper.cmd) with given args, return it output
func (d *Dumper) runCmd(args ...string) ([]byte, error) {
var outb, errb bytes.Buffer
cmd := exec.Command(d.cmd, args...)
cmd.Stdout = &outb
cmd.Stderr = &errb
err := cmd.Run()
if err != nil || errb.Len() > 0 {
return nil, errors.Errorf("error: %v, stderr: %s, stdout: %s", err, errb.String(), outb.String())
}
return outb.Bytes(), nil
}
func (d *Dumper) getResource(name, namespace string, tw *tar.Writer) error {
location := d.location
args := []string{"get", name, "-o", "yaml"}
if len(namespace) > 0 {
args = append(args, "--namespace", namespace)
location = filepath.Join(d.location, namespace)
}
location = filepath.Join(location, name+".yaml")
output, err := d.runCmd(args...)
if err != nil {
d.logError(err.Error(), args...)
log.Printf("Error: get resource %s in namespace %s: %v", name, namespace, err)
return addToArchive(location, d.mode, []byte(err.Error()), tw)
}
return addToArchive(location, d.mode, output, tw)
}
func (d *Dumper) logError(err string, args ...string) {
d.errors += d.cmd + " " + strings.Join(args, " ") + ": " + err + "\n"
}
func addToArchive(location string, mode int64, content []byte, tw *tar.Writer) error {
hdr := &tar.Header{
Name: location,
Mode: mode,
Size: int64(len(content)),
}
if err := tw.WriteHeader(hdr); err != nil {
return errors.Wrapf(err, "write header to %s", location)
}
if _, err := tw.Write(content); err != nil {
return errors.Wrapf(err, "write content to %s", location)
}
return nil
}
type crSecrets struct {
Spec struct {
SecretName string `json:"secretsName,omitempty"`
Secrets struct {
Users string `json:"users,omitempty"`
} `json:"secrets,omitempty"`
} `json:"spec"`
}
func (d *Dumper) getPodSummary(resource, podName, crName string, tw *tar.Writer) ([]byte, error) {
var (
summCmdName string
ports string
summCmdArgs []string
)
switch resource {
case "pxc":
cr, err := d.getCR("pxc/" + crName)
if err != nil {
return nil, errors.Wrap(err, "get cr")
}
pass, err := d.getDataFromSecret(cr.Spec.SecretName, "root")
if err != nil {
return nil, errors.Wrap(err, "get password from pxc users secret")
}
ports = "3306:3306"
summCmdName = "pt-mysql-summary"
summCmdArgs = []string{"--host=127.0.0.1", "--port=3306", "--user=root", "--password=" + string(pass)}
case "psmdb":
cr, err := d.getCR("psmdb/" + crName)
if err != nil {
return nil, errors.Wrap(err, "get cr")
}
pass, err := d.getDataFromSecret(cr.Spec.Secrets.Users, "MONGODB_CLUSTER_ADMIN_PASSWORD")
if err != nil {
return nil, errors.Wrap(err, "get password from psmdb users secret")
}
ports = "27017:27017"
summCmdName = "pt-mongodb-summary"
summCmdArgs = []string{"--username=clusterAdmin", "--password=" + pass, "--authenticationDatabase=admin", "127.0.0.1:27017"}
}
cmdPortFwd := exec.Command(d.cmd, "port-forward", "pod/"+podName, ports)
go func() {
err := cmdPortFwd.Run()
if err != nil {
d.logError(err.Error(), "port-forward")
}
}()
defer func() {
err := cmdPortFwd.Process.Kill()
if err != nil {
d.logError(err.Error(), "kill port-forward")
}
}()
time.Sleep(3 * time.Second) // wait for port-forward command
var outb, errb bytes.Buffer
cmd := exec.Command(summCmdName, summCmdArgs...)
cmd.Stdout = &outb
cmd.Stderr = &errb
err := cmd.Run()
if err != nil {
return nil, errors.Errorf("error: %v, stderr: %s, stdout: %s", err, errb.String(), outb.String())
}
return []byte(fmt.Sprintf("stderr: %s, stdout: %s", errb.String(), outb.String())), nil
}
func (d *Dumper) getCR(crName string) (crSecrets, error) {
var cr crSecrets
output, err := d.runCmd("get", crName, "-o", "json")
if err != nil {
return cr, errors.Wrap(err, "get "+crName)
}
err = json.Unmarshal(output, &cr)
if err != nil {
return cr, errors.Wrap(err, "unmarshal psmdb cr")
}
return cr, nil
}
func (d *Dumper) getDataFromSecret(secretName, dataName string) (string, error) {
passEncoded, err := d.runCmd("get", "secrets/"+secretName, "--template={{.data."+dataName+"}}")
if err != nil {
return "", errors.Wrap(err, "run get secret cmd")
}
pass, err := base64.StdEncoding.DecodeString(string(passEncoded))
if err != nil {
return "", errors.Wrap(err, "decode data")
}
return string(pass), nil
}

View File

@@ -0,0 +1,35 @@
package main
import (
"flag"
"log"
"os"
"github.com/percona/percona-toolkit/src/go/pt-k8s-debug-collector/dumper"
)
func main() {
namespace := ""
resource := ""
clusterName := ""
flag.StringVar(&namespace, "namespace", "", "Namespace for collecting data. If empty data will be collected from all namespaces")
flag.StringVar(&resource, "resource", "pxc", "Resource name. Default value - 'pxc'")
flag.StringVar(&clusterName, "cluster", "", "Cluster name")
flag.Parse()
if len(clusterName) > 0 {
resource += "/" + clusterName
}
d := dumper.New("", namespace, resource)
log.Println("Start collecting cluster data")
err := d.DumpCluster()
if err != nil {
log.Println("Error:", err)
os.Exit(1)
}
log.Println("Done")
}