diff --git a/src/go/pt-galera-log-explainer/regex/idents.go b/src/go/pt-galera-log-explainer/regex/idents.go index a446a85e..d41ba3e7 100644 --- a/src/go/pt-galera-log-explainer/regex/idents.go +++ b/src/go/pt-galera-log-explainer/regex/idents.go @@ -72,7 +72,7 @@ var IdentsMap = types.RegexMap{ "RegexMemberCount": &types.LogRegex{ Regex: regexp.MustCompile("members.[0-9]+.:"), - InternalRegex: regexp.MustCompile(regexMembers), + InternalRegex: regexp.MustCompile("members." + regexMembers + ".:"), Handler: func(submatches map[string]string, logCtx types.LogCtx, log string, date time.Time) (types.LogCtx, types.LogDisplayer) { members := submatches[groupMembers] diff --git a/src/go/pt-galera-log-explainer/regex/idents_test.go b/src/go/pt-galera-log-explainer/regex/idents_test.go index 48225b0e..214c7d8a 100644 --- a/src/go/pt-galera-log-explainer/regex/idents_test.go +++ b/src/go/pt-galera-log-explainer/regex/idents_test.go @@ -194,6 +194,14 @@ func TestIdentsRegex(t *testing.T) { }, key: "RegexMemberCount", }, + { + log: "{\"log\":\"2001-01-01T01:01:01.000000Z 10 [Note] [MY-000000] [Galera] ================================================\\nView:\\n id: 9f191762-2542-11ee-89be-13bdb1218f0e:9339113\\n status: primary\\n protocol_version: 4\\n capabilities: MULTI-MASTER, CERTIFICATION, PARALLEL_APPLYING, REPLAY, ISOLATION, PAUSE, CAUSAL_READ, INCREMENTAL_WS, UNORDERED, PREORDERED, STREAMING, NBO\\n final: no\\n own_index: 1\\n members(2):\\n\\t0: 45406e8d-2de0-11ee-95fc-f29a5fdf1ee0, cluster1-0\\n\\t1: 5bf18376-2de0-11ee-8333-6e755a3456ca, cluster1-2\\n=================================================\\n\",\"file\":\"/var/lib/mysql/mysqld-error.log\"}", + expectedOut: "view member count: 2", + expected: regexTestState{ + LogCtx: types.LogCtx{MemberCount: 2}, + }, + key: "RegexMemberCount", + }, { log: "2001-01-01T01:01:01.000000Z 1 [Note] [MY-000000] [Galera] ####### My UUID: 60205de0-5cf6-11ec-8884-3a01908be11a", diff --git a/src/go/pt-galera-log-explainer/regex/operator.go b/src/go/pt-galera-log-explainer/regex/operator.go index 4d613be1..b220da8c 100644 --- a/src/go/pt-galera-log-explainer/regex/operator.go +++ b/src/go/pt-galera-log-explainer/regex/operator.go @@ -61,7 +61,7 @@ var PXCOperatorMap = types.RegexMap{ // so this regex is about capturing subgroups to re-handle each them to the appropriate existing IdentsMap regex "RegexOperatorMemberAssociations": &types.LogRegex{ Regex: regexp.MustCompile("================================================.*View:"), - InternalRegex: regexp.MustCompile("own_index: " + regexIdx + ".*(?P" + IdentsMap["RegexMemberCount"].Regex.String() + ")(?P(....-?[0-9]{1,2}(\\.-?[0-9])?: [a-z0-9]+-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]+, [a-zA-Z0-9-_\\.]+)+)"), + InternalRegex: regexp.MustCompile("own_index: " + regexIdx + ".*" + IdentsMap["RegexMemberCount"].Regex.String() + "(?P(....-?[0-9]{1,2}(\\.-?[0-9])?: [a-z0-9]+-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]+, [a-zA-Z0-9-_\\.]+)+)"), Handler: func(submatches map[string]string, logCtx types.LogCtx, log string, date time.Time) (types.LogCtx, types.LogDisplayer) { logCtx.MyIdx = submatches[groupIdx] @@ -71,12 +71,10 @@ var PXCOperatorMap = types.RegexMap{ msg string ) - logCtx, displayer = IdentsMap["RegexMemberCount"].Handle(logCtx, submatches["memberlog"], date) - msg += displayer(logCtx) + "; " - subAssociations := strings.Split(submatches["compiledAssociations"], "\\n\\t") + // if it only has a single element, the regular non-operator logRegex will trigger normally already if len(subAssociations) < 2 { - return logCtx, types.SimpleDisplayer(msg) + return logCtx, types.SimpleDisplayer("") } for _, subAssociation := range subAssociations[1:] { // better to reuse the idents regex diff --git a/src/go/pt-galera-log-explainer/regex/operator_test.go b/src/go/pt-galera-log-explainer/regex/operator_test.go index 8bab386b..253bd22f 100644 --- a/src/go/pt-galera-log-explainer/regex/operator_test.go +++ b/src/go/pt-galera-log-explainer/regex/operator_test.go @@ -21,15 +21,14 @@ func TestPXCOperatorRegex(t *testing.T) { }, expected: regexTestState{ LogCtx: types.LogCtx{ - MyIdx: "0", - MemberCount: 3, - OwnHashes: []string{"45406e8d-95fc"}, - OwnNames: []string{"cluster1-0"}, + MyIdx: "0", + OwnHashes: []string{"45406e8d-95fc"}, + OwnNames: []string{"cluster1-0"}, }, HashToNodeNames: map[string]string{"45406e8d-95fc": "cluster1-0", "5bf18376-8333": "cluster1-2", "66e2b7bf-8000": "cluster1-1"}, State: "PRIMARY", }, - expectedOut: "view member count: 3; 45406e8d-95fc is cluster1-0; 5bf18376-8333 is cluster1-2; 66e2b7bf-8000 is cluster1-1; ", + expectedOut: "45406e8d-95fc is cluster1-0; 5bf18376-8333 is cluster1-2; 66e2b7bf-8000 is cluster1-1; ", key: "RegexOperatorMemberAssociations", }, diff --git a/src/go/pt-galera-log-explainer/translate/translate.go b/src/go/pt-galera-log-explainer/translate/translate.go index 295833dc..1492bfa7 100644 --- a/src/go/pt-galera-log-explainer/translate/translate.go +++ b/src/go/pt-galera-log-explainer/translate/translate.go @@ -17,7 +17,7 @@ type translationUnit struct { type translationsDB struct { // 1 hash: only 1 IP. wsrep_node_address is not dynamic // if there's a restart, the hash will change as well anyway - HashToIP map[string]translationUnit + HashToIP map[string]*translationUnit // wsrep_node_name is dynamic HashToNodeNames map[string][]translationUnit @@ -38,7 +38,7 @@ func init() { func initTranslationsDB() { db = translationsDB{ - HashToIP: map[string]translationUnit{}, + HashToIP: map[string]*translationUnit{}, HashToNodeNames: map[string][]translationUnit{}, IPToMethods: map[string][]translationUnit{}, IPToNodeNames: map[string][]translationUnit{}, @@ -59,55 +59,78 @@ func GetDB() translationsDB { return db } +func (tu *translationUnit) UpdateTimestamp(ts time.Time) { + // we want to avoid gap of information, so the earliest proof should be kept + if tu.Timestamp.After(ts) { + tu.Timestamp = ts + } +} + func AddHashToIP(hash, ip string, ts time.Time) { db.rwlock.Lock() defer db.rwlock.Unlock() - db.HashToIP[hash] = translationUnit{Value: ip, Timestamp: ts} + latestValue, ok := db.HashToIP[hash] + if !ok { + db.HashToIP[hash] = &translationUnit{Value: ip, Timestamp: ts} + } else { + latestValue.UpdateTimestamp(ts) + } } -func sameAsLatestValue(m map[string][]translationUnit, key string, newvalue string) bool { - return len(m[key]) > 0 && m[key][len(m[key])-1].Value == newvalue +func getLatestValue(m map[string][]translationUnit, key string) *translationUnit { + if len(m[key]) == 0 { + return nil + } + return &m[key][len(m[key])-1] +} + +func upsertToMap(m map[string][]translationUnit, key string, tu translationUnit) { + + latestValue := getLatestValue(m, key) + if latestValue == nil || latestValue.Value != tu.Value { + m[key] = append(m[key], tu) + return + } + // we want to avoid gap of information, so the earliest proof should be kept + if latestValue.Timestamp.After(tu.Timestamp) { + latestValue.Timestamp = tu.Timestamp + } } func AddHashToNodeName(hash, name string, ts time.Time) { db.rwlock.Lock() defer db.rwlock.Unlock() name = utils.ShortNodeName(name) - if sameAsLatestValue(db.HashToNodeNames, hash, name) { - return - } - db.HashToNodeNames[hash] = append(db.HashToNodeNames[hash], translationUnit{Value: name, Timestamp: ts}) + upsertToMap(db.HashToNodeNames, hash, translationUnit{Value: name, Timestamp: ts}) } func AddIPToNodeName(ip, name string, ts time.Time) { db.rwlock.Lock() defer db.rwlock.Unlock() name = utils.ShortNodeName(name) - if sameAsLatestValue(db.IPToNodeNames, ip, name) { - return - } - db.IPToNodeNames[ip] = append(db.IPToNodeNames[ip], translationUnit{Value: name, Timestamp: ts}) + upsertToMap(db.IPToNodeNames, ip, translationUnit{Value: name, Timestamp: ts}) } func AddIPToMethod(ip, method string, ts time.Time) { db.rwlock.Lock() defer db.rwlock.Unlock() - if sameAsLatestValue(db.IPToMethods, ip, method) { - return - } - db.IPToMethods[ip] = append(db.IPToMethods[ip], translationUnit{Value: method, Timestamp: ts}) + upsertToMap(db.IPToMethods, ip, translationUnit{Value: method, Timestamp: ts}) } func GetIPFromHash(hash string) string { db.rwlock.RLock() defer db.rwlock.RUnlock() - return db.HashToIP[hash].Value + ip, ok := db.HashToIP[hash] + if ok { + return ip.Value + } + return "" } -func mostAppropriateValueFromTS(units []translationUnit, ts time.Time) string { +func mostAppropriateValueFromTS(units []translationUnit, ts time.Time) translationUnit { if len(units) == 0 { - return "" + return translationUnit{} } // We start from the first unit, this ensures we can retroactively use information that were @@ -119,28 +142,28 @@ func mostAppropriateValueFromTS(units []translationUnit, ts time.Time) string { cur = unit } } - return cur.Value + return cur } func GetNodeNameFromHash(hash string, ts time.Time) string { db.rwlock.RLock() names := db.HashToNodeNames[hash] db.rwlock.RUnlock() - return mostAppropriateValueFromTS(names, ts) + return mostAppropriateValueFromTS(names, ts).Value } func GetNodeNameFromIP(ip string, ts time.Time) string { db.rwlock.RLock() names := db.IPToNodeNames[ip] db.rwlock.RUnlock() - return mostAppropriateValueFromTS(names, ts) + return mostAppropriateValueFromTS(names, ts).Value } func GetMethodFromIP(ip string, ts time.Time) string { db.rwlock.RLock() methods := db.IPToMethods[ip] db.rwlock.RUnlock() - return mostAppropriateValueFromTS(methods, ts) + return mostAppropriateValueFromTS(methods, ts).Value } func (db *translationsDB) getHashSliceFromIP(ip string) []translationUnit { @@ -162,7 +185,7 @@ func (db *translationsDB) getHashSliceFromIP(ip string) []translationUnit { func (db *translationsDB) getHashFromIP(ip string, ts time.Time) string { units := db.getHashSliceFromIP(ip) - return mostAppropriateValueFromTS(units, ts) + return mostAppropriateValueFromTS(units, ts).Value } // SimplestInfoFromIP is useful to get the most easily to read string for a given IP diff --git a/src/go/pt-galera-log-explainer/translate/translate_test.go b/src/go/pt-galera-log-explainer/translate/translate_test.go index 1d69aa0f..7f9fcb03 100644 --- a/src/go/pt-galera-log-explainer/translate/translate_test.go +++ b/src/go/pt-galera-log-explainer/translate/translate_test.go @@ -96,7 +96,7 @@ func testMostAppropriateValueFromTS(t *testing.T) { for i, test := range tests { out := mostAppropriateValueFromTS(test.inputunits, test.inputts) - if out != test.expected { + if out.Value != test.expected { t.Errorf("test %d, expected: %s, got: %s", i, test.expected, out) } } diff --git a/src/go/pt-galera-log-explainer/translate/whois.go b/src/go/pt-galera-log-explainer/translate/whois.go index 916983ae..c597643a 100644 --- a/src/go/pt-galera-log-explainer/translate/whois.go +++ b/src/go/pt-galera-log-explainer/translate/whois.go @@ -148,6 +148,11 @@ func (n *WhoisNode) FilterDBUsingUUID() { func (n *WhoisNode) FilterDBUsingNodeName() { for nodename, valueData := range n.Values { + // unspecified will sometimes appears in some failures + // using it will lead to non-sense data as it can bridge the rest of the whole graph + if nodename == "unspecified" { + continue + } for uuid, nodenames2 := range db.HashToNodeNames { for _, nodename2 := range nodenames2 { if nodename == nodename2.Value { diff --git a/src/go/pt-galera-log-explainer/types/logctx.go b/src/go/pt-galera-log-explainer/types/logctx.go index 5c246cd9..d0c0c9b7 100644 --- a/src/go/pt-galera-log-explainer/types/logctx.go +++ b/src/go/pt-galera-log-explainer/types/logctx.go @@ -107,11 +107,11 @@ func (logCtx *LogCtx) AddOwnName(name string, date time.Time) { return } logCtx.OwnNames = append(logCtx.OwnNames, name) - for _, hash := range logCtx.OwnHashes { - translate.AddHashToNodeName(hash, name, date) - } - for _, ip := range logCtx.OwnIPs { - translate.AddIPToNodeName(ip, name, date) + + // because we frequently lack ip=>nodename clear associations, propagating is important + // we only infer the last verified ip will be associated to the verified name as it's enough + if lenIPs := len(logCtx.OwnIPs); lenIPs > 0 { + translate.AddIPToNodeName(logCtx.OwnIPs[lenIPs-1], name, date) } } @@ -122,11 +122,15 @@ func (logCtx *LogCtx) AddOwnHash(hash string, date time.Time) { } logCtx.OwnHashes = append(logCtx.OwnHashes, hash) - for _, ip := range logCtx.OwnIPs { - translate.AddHashToIP(hash, ip, date) + // optimistically assume this new hash will have the same ip/name + // it may be wrong in some situations (all operator related, it will be overriden eventually in those) + // but it will also bridge the gap in sparse on-premise logs + // why only the last one: the earliest information may be obsolete + if lenIPs := len(logCtx.OwnIPs); lenIPs > 0 { + translate.AddHashToIP(hash, logCtx.OwnIPs[lenIPs-1], date) } - for _, name := range logCtx.OwnNames { - translate.AddHashToNodeName(hash, name, date) + if lenNodeNames := len(logCtx.OwnNames); lenNodeNames > 0 { + translate.AddHashToNodeName(hash, logCtx.OwnNames[lenNodeNames-1], date) } } @@ -137,8 +141,10 @@ func (logCtx *LogCtx) AddOwnIP(ip string, date time.Time) { return } logCtx.OwnIPs = append(logCtx.OwnIPs, ip) - for _, name := range logCtx.OwnNames { - translate.AddIPToNodeName(ip, name, date) + + // see note in AddOwnName + if lenNodeNames := len(logCtx.OwnNames); lenNodeNames > 0 { + translate.AddIPToNodeName(ip, logCtx.OwnNames[lenNodeNames-1], date) } }