diff --git a/src/go/pt-galera-log-explainer/conflicts.go b/src/go/pt-galera-log-explainer/conflicts.go index 310d10c8..4fa6983a 100644 --- a/src/go/pt-galera-log-explainer/conflicts.go +++ b/src/go/pt-galera-log-explainer/conflicts.go @@ -22,7 +22,7 @@ func (c *conflicts) Help() string { func (c *conflicts) Run() error { regexes := regex.IdentsMap.Merge(regex.ApplicativeMap) - timeline, err := timelineFromPaths(c.Paths, regexes, CLI.Since, CLI.Until) + timeline, err := timelineFromPaths(c.Paths, regexes) if err != nil { return err } diff --git a/src/go/pt-galera-log-explainer/ctx.go b/src/go/pt-galera-log-explainer/ctx.go index 3cf4ab1a..486effdc 100644 --- a/src/go/pt-galera-log-explainer/ctx.go +++ b/src/go/pt-galera-log-explainer/ctx.go @@ -22,7 +22,7 @@ func (c *ctx) Run() error { return errors.New("Can only use 1 path at a time for ctx subcommand") } - timeline, err := timelineFromPaths(c.Paths, regex.AllRegexes(), CLI.Since, CLI.Until) + timeline, err := timelineFromPaths(c.Paths, regex.AllRegexes()) if err != nil { return err } diff --git a/src/go/pt-galera-log-explainer/internal.go b/src/go/pt-galera-log-explainer/internal.go new file mode 100644 index 00000000..d983bc9a --- /dev/null +++ b/src/go/pt-galera-log-explainer/internal.go @@ -0,0 +1,202 @@ +package main + +import ( + "bufio" + "os/exec" + "runtime" + "strings" + + "github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/regex" + "github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/types" + "github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/utils" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" +) + +var logger = log.With().Str("component", "extractor").Logger() + +func init() { + + if CLI.Since != nil { + logger = logger.With().Time("since", *CLI.Since).Logger() + } + if CLI.Until != nil { + logger = logger.With().Time("until", *CLI.Until).Logger() + } +} + +// timelineFromPaths takes every path, search them using a list of regexes +// and organize them in a timeline that will be ready to aggregate or read +func timelineFromPaths(paths []string, regexes types.RegexMap) (types.Timeline, error) { + timeline := make(types.Timeline) + found := false + + compiledRegex := prepareGrepArgument(regexes) + + for _, path := range paths { + stdout := make(chan string) + + go func() { + err := execGrepAndIterate(path, compiledRegex, stdout) + if err != nil { + logger.Error().Str("path", path).Err(err).Msg("execGrepAndIterate returned error") + } + }() + + // it will iterate on stdout pipe results + localTimeline, err := iterateOnGrepResults(path, regexes, stdout) + if err != nil { + logger.Warn().Err(err).Msg("Failed to iterate on results") + } + found = true + logger.Debug().Str("path", path).Msg("Finished searching") + + // Why it should not just identify using the file path: + // so that we are able to merge files that belong to the same nodes + // we wouldn't want them to be shown as from different nodes + if CLI.PxcOperator { + timeline[path] = localTimeline + } else if CLI.MergeByDirectory { + timeline.MergeByDirectory(path, localTimeline) + } else { + timeline.MergeByIdentifier(localTimeline) + } + } + if !found { + return nil, errors.New("Could not find data") + } + return timeline, nil +} + +func prepareGrepArgument(regexes types.RegexMap) string { + + regexToSendSlice := regexes.Compile() + + grepRegex := "^" + if CLI.PxcOperator { + // special case + // I'm not adding pxcoperator map the same way others are used, because they do not have the same formats and same place + // it needs to be put on the front so that it's not 'merged' with the '{"log":"' json prefix + // this is to keep things as close as '^' as possible to keep doing prefix searches + grepRegex += "((" + strings.Join(regex.PXCOperatorMap.Compile(), "|") + ")|^{\"log\":\"" + regexes.Merge(regex.PXCOperatorMap) + } + if CLI.Since != nil { + grepRegex += "(" + regex.BetweenDateRegex(CLI.Since, CLI.PxcOperator) + "|" + regex.NoDatesRegex(CLI.PxcOperator) + ")" + } + grepRegex += ".*" + grepRegex += "(" + strings.Join(regexToSendSlice, "|") + ")" + if CLI.PxcOperator { + grepRegex += ")" + } + logger.Debug().Str("grepArg", grepRegex).Msg("Compiled grep arguments") + return grepRegex +} + +func execGrepAndIterate(path, compiledRegex string, stdout chan<- string) error { + + defer close(stdout) + + // A first pass is done, with every regexes we want compiled in a single one. + + /* + Regular grep is actually used + + There are no great alternatives, even less as golang libraries. + grep itself do not have great alternatives: they are less performant for common use-cases, or are not easily portable, or are costlier to execute. + grep is everywhere, grep is good enough, it even enable to use the stdout pipe. + + The usual bottleneck with grep is that it is single-threaded, but we actually benefit + from a sequential scan here as we will rely on the log order. + + Also, being sequential also ensure this program is light enough to run without too much impacts + It also helps to be transparent and not provide an obscure tool that work as a blackbox + */ + if runtime.GOOS == "darwin" && CLI.GrepCmd == "grep" { + logger.Warn().Msg("On Darwin systems, use 'pt-galera-log-explainer --grep-cmd=ggrep' as it requires grep v3") + } + + cmd := exec.Command(CLI.GrepCmd, CLI.GrepArgs, compiledRegex, path) + + out, _ := cmd.StdoutPipe() + defer out.Close() + + err := cmd.Start() + if err != nil { + return errors.Wrapf(err, "failed to search in %s", path) + } + + // grep treatment + s := bufio.NewScanner(out) + for s.Scan() { + stdout <- s.Text() + } + + // double-check it stopped correctly + if err = cmd.Wait(); err != nil { + if exiterr, ok := err.(*exec.ExitError); ok && exiterr.ExitCode() == 1 { + return errors.New("Found nothing") + } + return errors.Wrap(err, "grep subprocess error") + } + + return nil +} + +func sanitizeLine(s string) string { + if len(s) > 0 && s[0] == '\t' { + return s[1:] + } + return s +} + +// iterateOnGrepResults will take line by line each logs that matched regex +// it will iterate on every regexes in slice, and apply the handler for each +// it also filters out --since and --until rows +func iterateOnGrepResults(path string, regexes types.RegexMap, grepStdout <-chan string) (types.LocalTimeline, error) { + + var ( + lt types.LocalTimeline + recentEnough bool + displayer types.LogDisplayer + ) + ctx := types.NewLogCtx() + ctx.FilePath = path + + for line := range grepStdout { + line = sanitizeLine(line) + + var date *types.Date + t, layout, ok := regex.SearchDateFromLog(line) + if ok { + date = types.NewDate(t, layout) + } + + // If it's recentEnough, it means we already validated a log: every next logs necessarily happened later + // this is useful because not every logs have a date attached, and some without date are very useful + if !recentEnough && CLI.Since != nil && (date == nil || (date != nil && CLI.Since.After(date.Time))) { + continue + } + if CLI.Until != nil && date != nil && CLI.Until.Before(date.Time) { + return lt, nil + } + recentEnough = true + + filetype := regex.FileType(line, CLI.PxcOperator) + ctx.FileType = filetype + + // We have to find again what regex worked to get this log line + // it can match multiple regexes + for key, regex := range regexes { + if !regex.Regex.MatchString(line) || utils.SliceContains(CLI.ExcludeRegexes, key) { + continue + } + ctx, displayer = regex.Handle(ctx, line) + li := types.NewLogInfo(date, displayer, line, regex, key, ctx, filetype) + + lt = lt.Add(li) + } + + } + return lt, nil +} diff --git a/src/go/pt-galera-log-explainer/list.go b/src/go/pt-galera-log-explainer/list.go index 3e29015f..3e099eda 100644 --- a/src/go/pt-galera-log-explainer/list.go +++ b/src/go/pt-galera-log-explainer/list.go @@ -41,7 +41,7 @@ func (l *list) Run() error { toCheck := l.regexesToUse() - timeline, err := timelineFromPaths(CLI.List.Paths, toCheck, CLI.Since, CLI.Until) + timeline, err := timelineFromPaths(CLI.List.Paths, toCheck) if err != nil { return errors.Wrap(err, "Could not list events") } diff --git a/src/go/pt-galera-log-explainer/main.go b/src/go/pt-galera-log-explainer/main.go index 3850d11e..ad1321bf 100644 --- a/src/go/pt-galera-log-explainer/main.go +++ b/src/go/pt-galera-log-explainer/main.go @@ -1,19 +1,13 @@ package main import ( - "bufio" "fmt" "os" - "os/exec" - "runtime" - "strings" "time" "github.com/alecthomas/kong" - "github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/regex" "github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/types" "github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/utils" - "github.com/pkg/errors" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -46,6 +40,16 @@ var CLI struct { GrepArgs string `help:"'grep' arguments. perl regexp (-P) is necessary. -o will break the tool" default:"-P"` } +type versioncmd struct{} + +func (v *versioncmd) Help() string { + return "" +} +func (v *versioncmd) Run() error { + fmt.Printf("version: %s, commit:%s, built at %s\n", version, commit, date) + return nil +} + func main() { ctx := kong.Parse(&CLI, kong.Name("pt-galera-log-explainer"), @@ -64,212 +68,3 @@ func main() { err := ctx.Run() ctx.FatalIfErrorf(err) } - -type versioncmd struct{} - -func (v *versioncmd) Help() string { - return "" -} -func (v *versioncmd) Run() error { - fmt.Printf("version: %s, commit:%s, built at %s\n", version, commit, date) - return nil -} - -// timelineFromPaths takes every path, search them using a list of regexes -// and organize them in a timeline that will be ready to aggregate or read -func timelineFromPaths(paths []string, toCheck types.RegexMap, since, until *time.Time) (types.Timeline, error) { - timeline := make(types.Timeline) - found := false - - for _, path := range paths { - - extr := newExtractor(path, toCheck, since, until) - - localTimeline, err := extr.search() - if err != nil { - extr.logger.Warn().Err(err).Msg("Search failed") - continue - } - found = true - extr.logger.Debug().Str("path", path).Msg("Finished searching") - - // Why it should not just identify using the file path: - // so that we are able to merge files that belong to the same nodes - // we wouldn't want them to be shown as from different nodes - if CLI.PxcOperator { - timeline[path] = localTimeline - } else if CLI.MergeByDirectory { - timeline.MergeByDirectory(path, localTimeline) - } else { - timeline.MergeByIdentifier(localTimeline) - } - } - if !found { - return nil, errors.New("Could not find data") - } - return timeline, nil -} - -// extractor is an utility struct to store what needs to be done -type extractor struct { - regexes types.RegexMap - path string - since, until *time.Time - logger zerolog.Logger -} - -func newExtractor(path string, toCheck types.RegexMap, since, until *time.Time) extractor { - e := extractor{regexes: toCheck, path: path, since: since, until: until} - e.logger = log.With().Str("component", "extractor").Str("path", e.path).Logger() - if since != nil { - e.logger = e.logger.With().Time("since", *e.since).Logger() - } - if until != nil { - e.logger = e.logger.With().Time("until", *e.until).Logger() - } - e.logger.Debug().Msg("new extractor") - - return e -} - -func (e *extractor) grepArgument() string { - - regexToSendSlice := e.regexes.Compile() - - grepRegex := "^" - if CLI.PxcOperator { - // special case - // I'm not adding pxcoperator map the same way others are used, because they do not have the same formats and same place - // it needs to be put on the front so that it's not 'merged' with the '{"log":"' json prefix - // this is to keep things as close as '^' as possible to keep doing prefix searches - grepRegex += "((" + strings.Join(regex.PXCOperatorMap.Compile(), "|") + ")|^{\"log\":\"" - e.regexes.Merge(regex.PXCOperatorMap) - } - if e.since != nil { - grepRegex += "(" + regex.BetweenDateRegex(e.since, CLI.PxcOperator) + "|" + regex.NoDatesRegex(CLI.PxcOperator) + ")" - } - grepRegex += ".*" - grepRegex += "(" + strings.Join(regexToSendSlice, "|") + ")" - if CLI.PxcOperator { - grepRegex += ")" - } - e.logger.Debug().Str("grepArg", grepRegex).Msg("Compiled grep arguments") - return grepRegex -} - -// search is the main function to search what we want in a file -func (e *extractor) search() (types.LocalTimeline, error) { - - // A first pass is done, with every regexes we want compiled in a single one. - grepRegex := e.grepArgument() - - /* - Regular grep is actually used - - There are no great alternatives, even less as golang libraries. - grep itself do not have great alternatives: they are less performant for common use-cases, or are not easily portable, or are costlier to execute. - grep is everywhere, grep is good enough, it even enable to use the stdout pipe. - - The usual bottleneck with grep is that it is single-threaded, but we actually benefit - from a sequential scan here as we will rely on the log order. - - Also, being sequential also ensure this program is light enough to run without too much impacts - It also helps to be transparent and not provide an obscure tool that work as a blackbox - */ - if runtime.GOOS == "darwin" && CLI.GrepCmd == "grep" { - e.logger.Warn().Msg("On Darwin systems, use 'pt-galera-log-explainer --grep-cmd=ggrep' as it requires grep v3") - } - - cmd := exec.Command(CLI.GrepCmd, CLI.GrepArgs, grepRegex, e.path) - - out, _ := cmd.StdoutPipe() - defer out.Close() - - err := cmd.Start() - if err != nil { - return nil, errors.Wrapf(err, "failed to search in %s", e.path) - } - - // grep treatment - s := bufio.NewScanner(out) - - // it will iterate on stdout pipe results - lt, err := e.iterateOnResults(s) - if err != nil { - e.logger.Warn().Err(err).Msg("Failed to iterate on results") - } - - // double-check it stopped correctly - if err = cmd.Wait(); err != nil { - if exiterr, ok := err.(*exec.ExitError); ok && exiterr.ExitCode() == 1 { - return nil, errors.New("Found nothing") - } - return nil, errors.Wrap(err, "grep subprocess error") - } - - if len(lt) == 0 { - return nil, errors.New("Found nothing") - } - - return lt, nil -} - -func (e *extractor) sanitizeLine(s string) string { - if len(s) > 0 && s[0] == '\t' { - return s[1:] - } - return s -} - -// iterateOnResults will take line by line each logs that matched regex -// it will iterate on every regexes in slice, and apply the handler for each -// it also filters out --since and --until rows -func (e *extractor) iterateOnResults(s *bufio.Scanner) ([]types.LogInfo, error) { - - var ( - line string - lt types.LocalTimeline - recentEnough bool - displayer types.LogDisplayer - ) - ctx := types.NewLogCtx() - ctx.FilePath = e.path - - for s.Scan() { - line = e.sanitizeLine(s.Text()) - - var date *types.Date - t, layout, ok := regex.SearchDateFromLog(line) - if ok { - d := types.NewDate(t, layout) - date = &d - } - - // If it's recentEnough, it means we already validated a log: every next logs necessarily happened later - // this is useful because not every logs have a date attached, and some without date are very useful - if !recentEnough && e.since != nil && (date == nil || (date != nil && e.since.After(date.Time))) { - continue - } - if e.until != nil && date != nil && e.until.Before(date.Time) { - return lt, nil - } - recentEnough = true - - filetype := regex.FileType(line, CLI.PxcOperator) - ctx.FileType = filetype - - // We have to find again what regex worked to get this log line - // it can match multiple regexes - for key, regex := range e.regexes { - if !regex.Regex.MatchString(line) || utils.SliceContains(CLI.ExcludeRegexes, key) { - continue - } - ctx, displayer = regex.Handle(ctx, line) - li := types.NewLogInfo(date, displayer, line, regex, key, ctx, filetype) - - lt = lt.Add(li) - } - - } - return lt, nil -} diff --git a/src/go/pt-galera-log-explainer/sed.go b/src/go/pt-galera-log-explainer/sed.go index 13453bcf..3612b495 100644 --- a/src/go/pt-galera-log-explainer/sed.go +++ b/src/go/pt-galera-log-explainer/sed.go @@ -29,7 +29,7 @@ You can also simply call the command to get a generated sed command to review an func (s *sed) Run() error { toCheck := regex.AllRegexes() - timeline, err := timelineFromPaths(s.Paths, toCheck, CLI.Since, CLI.Until) + timeline, err := timelineFromPaths(s.Paths, toCheck) if err != nil { return errors.Wrap(err, "Found nothing worth replacing") } diff --git a/src/go/pt-galera-log-explainer/types/loginfo.go b/src/go/pt-galera-log-explainer/types/loginfo.go index 9cd923f5..a31e110c 100644 --- a/src/go/pt-galera-log-explainer/types/loginfo.go +++ b/src/go/pt-galera-log-explainer/types/loginfo.go @@ -79,8 +79,8 @@ type Date struct { Layout string } -func NewDate(t time.Time, layout string) Date { - return Date{ +func NewDate(t time.Time, layout string) *Date { + return &Date{ Time: t, Layout: layout, DisplayTime: t.Format(layout), diff --git a/src/go/pt-galera-log-explainer/whois.go b/src/go/pt-galera-log-explainer/whois.go index f8a7b500..c057cf9c 100644 --- a/src/go/pt-galera-log-explainer/whois.go +++ b/src/go/pt-galera-log-explainer/whois.go @@ -24,7 +24,7 @@ It will list known node name(s), IP(s), hostname(s), and other known node's UUID func (w *whois) Run() error { toCheck := regex.AllRegexes() - timeline, err := timelineFromPaths(CLI.Whois.Paths, toCheck, CLI.Since, CLI.Until) + timeline, err := timelineFromPaths(CLI.Whois.Paths, toCheck) if err != nil { return errors.Wrap(err, "Found nothing to translate") }