pt-galera-log-explainer: fixes: operator identity, keeping oldest

translations, avoiding unspecified names loops for whois command

Those bugs were not breaking behavior, but they were causing variations
of results. It would not always store the same timestamps, sometimes breaking
tests
Most of the random come from regex map iteration
This commit is contained in:
Yoann La Cancellera
2024-03-07 18:45:44 +01:00
parent 417a4ab9f4
commit 6636265ef2
8 changed files with 87 additions and 48 deletions

View File

@@ -72,7 +72,7 @@ var IdentsMap = types.RegexMap{
"RegexMemberCount": &types.LogRegex{
Regex: regexp.MustCompile("members.[0-9]+.:"),
InternalRegex: regexp.MustCompile(regexMembers),
InternalRegex: regexp.MustCompile("members." + regexMembers + ".:"),
Handler: func(submatches map[string]string, logCtx types.LogCtx, log string, date time.Time) (types.LogCtx, types.LogDisplayer) {
members := submatches[groupMembers]

View File

@@ -194,6 +194,14 @@ func TestIdentsRegex(t *testing.T) {
},
key: "RegexMemberCount",
},
{
log: "{\"log\":\"2001-01-01T01:01:01.000000Z 10 [Note] [MY-000000] [Galera] ================================================\\nView:\\n id: 9f191762-2542-11ee-89be-13bdb1218f0e:9339113\\n status: primary\\n protocol_version: 4\\n capabilities: MULTI-MASTER, CERTIFICATION, PARALLEL_APPLYING, REPLAY, ISOLATION, PAUSE, CAUSAL_READ, INCREMENTAL_WS, UNORDERED, PREORDERED, STREAMING, NBO\\n final: no\\n own_index: 1\\n members(2):\\n\\t0: 45406e8d-2de0-11ee-95fc-f29a5fdf1ee0, cluster1-0\\n\\t1: 5bf18376-2de0-11ee-8333-6e755a3456ca, cluster1-2\\n=================================================\\n\",\"file\":\"/var/lib/mysql/mysqld-error.log\"}",
expectedOut: "view member count: 2",
expected: regexTestState{
LogCtx: types.LogCtx{MemberCount: 2},
},
key: "RegexMemberCount",
},
{
log: "2001-01-01T01:01:01.000000Z 1 [Note] [MY-000000] [Galera] ####### My UUID: 60205de0-5cf6-11ec-8884-3a01908be11a",

View File

@@ -61,7 +61,7 @@ var PXCOperatorMap = types.RegexMap{
// so this regex is about capturing subgroups to re-handle each them to the appropriate existing IdentsMap regex
"RegexOperatorMemberAssociations": &types.LogRegex{
Regex: regexp.MustCompile("================================================.*View:"),
InternalRegex: regexp.MustCompile("own_index: " + regexIdx + ".*(?P<memberlog>" + IdentsMap["RegexMemberCount"].Regex.String() + ")(?P<compiledAssociations>(....-?[0-9]{1,2}(\\.-?[0-9])?: [a-z0-9]+-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]+, [a-zA-Z0-9-_\\.]+)+)"),
InternalRegex: regexp.MustCompile("own_index: " + regexIdx + ".*" + IdentsMap["RegexMemberCount"].Regex.String() + "(?P<compiledAssociations>(....-?[0-9]{1,2}(\\.-?[0-9])?: [a-z0-9]+-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]+, [a-zA-Z0-9-_\\.]+)+)"),
Handler: func(submatches map[string]string, logCtx types.LogCtx, log string, date time.Time) (types.LogCtx, types.LogDisplayer) {
logCtx.MyIdx = submatches[groupIdx]
@@ -71,12 +71,10 @@ var PXCOperatorMap = types.RegexMap{
msg string
)
logCtx, displayer = IdentsMap["RegexMemberCount"].Handle(logCtx, submatches["memberlog"], date)
msg += displayer(logCtx) + "; "
subAssociations := strings.Split(submatches["compiledAssociations"], "\\n\\t")
// if it only has a single element, the regular non-operator logRegex will trigger normally already
if len(subAssociations) < 2 {
return logCtx, types.SimpleDisplayer(msg)
return logCtx, types.SimpleDisplayer("")
}
for _, subAssociation := range subAssociations[1:] {
// better to reuse the idents regex

View File

@@ -22,14 +22,13 @@ func TestPXCOperatorRegex(t *testing.T) {
expected: regexTestState{
LogCtx: types.LogCtx{
MyIdx: "0",
MemberCount: 3,
OwnHashes: []string{"45406e8d-95fc"},
OwnNames: []string{"cluster1-0"},
},
HashToNodeNames: map[string]string{"45406e8d-95fc": "cluster1-0", "5bf18376-8333": "cluster1-2", "66e2b7bf-8000": "cluster1-1"},
State: "PRIMARY",
},
expectedOut: "view member count: 3; 45406e8d-95fc is cluster1-0; 5bf18376-8333 is cluster1-2; 66e2b7bf-8000 is cluster1-1; ",
expectedOut: "45406e8d-95fc is cluster1-0; 5bf18376-8333 is cluster1-2; 66e2b7bf-8000 is cluster1-1; ",
key: "RegexOperatorMemberAssociations",
},

View File

@@ -17,7 +17,7 @@ type translationUnit struct {
type translationsDB struct {
// 1 hash: only 1 IP. wsrep_node_address is not dynamic
// if there's a restart, the hash will change as well anyway
HashToIP map[string]translationUnit
HashToIP map[string]*translationUnit
// wsrep_node_name is dynamic
HashToNodeNames map[string][]translationUnit
@@ -38,7 +38,7 @@ func init() {
func initTranslationsDB() {
db = translationsDB{
HashToIP: map[string]translationUnit{},
HashToIP: map[string]*translationUnit{},
HashToNodeNames: map[string][]translationUnit{},
IPToMethods: map[string][]translationUnit{},
IPToNodeNames: map[string][]translationUnit{},
@@ -59,55 +59,78 @@ func GetDB() translationsDB {
return db
}
func (tu *translationUnit) UpdateTimestamp(ts time.Time) {
// we want to avoid gap of information, so the earliest proof should be kept
if tu.Timestamp.After(ts) {
tu.Timestamp = ts
}
}
func AddHashToIP(hash, ip string, ts time.Time) {
db.rwlock.Lock()
defer db.rwlock.Unlock()
db.HashToIP[hash] = translationUnit{Value: ip, Timestamp: ts}
latestValue, ok := db.HashToIP[hash]
if !ok {
db.HashToIP[hash] = &translationUnit{Value: ip, Timestamp: ts}
} else {
latestValue.UpdateTimestamp(ts)
}
}
func sameAsLatestValue(m map[string][]translationUnit, key string, newvalue string) bool {
return len(m[key]) > 0 && m[key][len(m[key])-1].Value == newvalue
func getLatestValue(m map[string][]translationUnit, key string) *translationUnit {
if len(m[key]) == 0 {
return nil
}
return &m[key][len(m[key])-1]
}
func upsertToMap(m map[string][]translationUnit, key string, tu translationUnit) {
latestValue := getLatestValue(m, key)
if latestValue == nil || latestValue.Value != tu.Value {
m[key] = append(m[key], tu)
return
}
// we want to avoid gap of information, so the earliest proof should be kept
if latestValue.Timestamp.After(tu.Timestamp) {
latestValue.Timestamp = tu.Timestamp
}
}
func AddHashToNodeName(hash, name string, ts time.Time) {
db.rwlock.Lock()
defer db.rwlock.Unlock()
name = utils.ShortNodeName(name)
if sameAsLatestValue(db.HashToNodeNames, hash, name) {
return
}
db.HashToNodeNames[hash] = append(db.HashToNodeNames[hash], translationUnit{Value: name, Timestamp: ts})
upsertToMap(db.HashToNodeNames, hash, translationUnit{Value: name, Timestamp: ts})
}
func AddIPToNodeName(ip, name string, ts time.Time) {
db.rwlock.Lock()
defer db.rwlock.Unlock()
name = utils.ShortNodeName(name)
if sameAsLatestValue(db.IPToNodeNames, ip, name) {
return
}
db.IPToNodeNames[ip] = append(db.IPToNodeNames[ip], translationUnit{Value: name, Timestamp: ts})
upsertToMap(db.IPToNodeNames, ip, translationUnit{Value: name, Timestamp: ts})
}
func AddIPToMethod(ip, method string, ts time.Time) {
db.rwlock.Lock()
defer db.rwlock.Unlock()
if sameAsLatestValue(db.IPToMethods, ip, method) {
return
}
db.IPToMethods[ip] = append(db.IPToMethods[ip], translationUnit{Value: method, Timestamp: ts})
upsertToMap(db.IPToMethods, ip, translationUnit{Value: method, Timestamp: ts})
}
func GetIPFromHash(hash string) string {
db.rwlock.RLock()
defer db.rwlock.RUnlock()
return db.HashToIP[hash].Value
ip, ok := db.HashToIP[hash]
if ok {
return ip.Value
}
return ""
}
func mostAppropriateValueFromTS(units []translationUnit, ts time.Time) string {
func mostAppropriateValueFromTS(units []translationUnit, ts time.Time) translationUnit {
if len(units) == 0 {
return ""
return translationUnit{}
}
// We start from the first unit, this ensures we can retroactively use information that were
@@ -119,28 +142,28 @@ func mostAppropriateValueFromTS(units []translationUnit, ts time.Time) string {
cur = unit
}
}
return cur.Value
return cur
}
func GetNodeNameFromHash(hash string, ts time.Time) string {
db.rwlock.RLock()
names := db.HashToNodeNames[hash]
db.rwlock.RUnlock()
return mostAppropriateValueFromTS(names, ts)
return mostAppropriateValueFromTS(names, ts).Value
}
func GetNodeNameFromIP(ip string, ts time.Time) string {
db.rwlock.RLock()
names := db.IPToNodeNames[ip]
db.rwlock.RUnlock()
return mostAppropriateValueFromTS(names, ts)
return mostAppropriateValueFromTS(names, ts).Value
}
func GetMethodFromIP(ip string, ts time.Time) string {
db.rwlock.RLock()
methods := db.IPToMethods[ip]
db.rwlock.RUnlock()
return mostAppropriateValueFromTS(methods, ts)
return mostAppropriateValueFromTS(methods, ts).Value
}
func (db *translationsDB) getHashSliceFromIP(ip string) []translationUnit {
@@ -162,7 +185,7 @@ func (db *translationsDB) getHashSliceFromIP(ip string) []translationUnit {
func (db *translationsDB) getHashFromIP(ip string, ts time.Time) string {
units := db.getHashSliceFromIP(ip)
return mostAppropriateValueFromTS(units, ts)
return mostAppropriateValueFromTS(units, ts).Value
}
// SimplestInfoFromIP is useful to get the most easily to read string for a given IP

View File

@@ -96,7 +96,7 @@ func testMostAppropriateValueFromTS(t *testing.T) {
for i, test := range tests {
out := mostAppropriateValueFromTS(test.inputunits, test.inputts)
if out != test.expected {
if out.Value != test.expected {
t.Errorf("test %d, expected: %s, got: %s", i, test.expected, out)
}
}

View File

@@ -148,6 +148,11 @@ func (n *WhoisNode) FilterDBUsingUUID() {
func (n *WhoisNode) FilterDBUsingNodeName() {
for nodename, valueData := range n.Values {
// unspecified will sometimes appears in some failures
// using it will lead to non-sense data as it can bridge the rest of the whole graph
if nodename == "unspecified" {
continue
}
for uuid, nodenames2 := range db.HashToNodeNames {
for _, nodename2 := range nodenames2 {
if nodename == nodename2.Value {

View File

@@ -107,11 +107,11 @@ func (logCtx *LogCtx) AddOwnName(name string, date time.Time) {
return
}
logCtx.OwnNames = append(logCtx.OwnNames, name)
for _, hash := range logCtx.OwnHashes {
translate.AddHashToNodeName(hash, name, date)
}
for _, ip := range logCtx.OwnIPs {
translate.AddIPToNodeName(ip, name, date)
// because we frequently lack ip=>nodename clear associations, propagating is important
// we only infer the last verified ip will be associated to the verified name as it's enough
if lenIPs := len(logCtx.OwnIPs); lenIPs > 0 {
translate.AddIPToNodeName(logCtx.OwnIPs[lenIPs-1], name, date)
}
}
@@ -122,11 +122,15 @@ func (logCtx *LogCtx) AddOwnHash(hash string, date time.Time) {
}
logCtx.OwnHashes = append(logCtx.OwnHashes, hash)
for _, ip := range logCtx.OwnIPs {
translate.AddHashToIP(hash, ip, date)
// optimistically assume this new hash will have the same ip/name
// it may be wrong in some situations (all operator related, it will be overriden eventually in those)
// but it will also bridge the gap in sparse on-premise logs
// why only the last one: the earliest information may be obsolete
if lenIPs := len(logCtx.OwnIPs); lenIPs > 0 {
translate.AddHashToIP(hash, logCtx.OwnIPs[lenIPs-1], date)
}
for _, name := range logCtx.OwnNames {
translate.AddHashToNodeName(hash, name, date)
if lenNodeNames := len(logCtx.OwnNames); lenNodeNames > 0 {
translate.AddHashToNodeName(hash, logCtx.OwnNames[lenNodeNames-1], date)
}
}
@@ -137,8 +141,10 @@ func (logCtx *LogCtx) AddOwnIP(ip string, date time.Time) {
return
}
logCtx.OwnIPs = append(logCtx.OwnIPs, ip)
for _, name := range logCtx.OwnNames {
translate.AddIPToNodeName(ip, name, date)
// see note in AddOwnName
if lenNodeNames := len(logCtx.OwnNames); lenNodeNames > 0 {
translate.AddIPToNodeName(ip, logCtx.OwnNames[lenNodeNames-1], date)
}
}