Simplified the main loop

This commit is contained in:
Yoann La Cancellera
2023-09-08 12:07:27 +02:00
parent 5bbb11df17
commit 7f8071ce3f
8 changed files with 219 additions and 222 deletions

View File

@@ -22,7 +22,7 @@ func (c *conflicts) Help() string {
func (c *conflicts) Run() error {
regexes := regex.IdentsMap.Merge(regex.ApplicativeMap)
timeline, err := timelineFromPaths(c.Paths, regexes, CLI.Since, CLI.Until)
timeline, err := timelineFromPaths(c.Paths, regexes)
if err != nil {
return err
}

View File

@@ -22,7 +22,7 @@ func (c *ctx) Run() error {
return errors.New("Can only use 1 path at a time for ctx subcommand")
}
timeline, err := timelineFromPaths(c.Paths, regex.AllRegexes(), CLI.Since, CLI.Until)
timeline, err := timelineFromPaths(c.Paths, regex.AllRegexes())
if err != nil {
return err
}

View File

@@ -0,0 +1,202 @@
package main
import (
"bufio"
"os/exec"
"runtime"
"strings"
"github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/regex"
"github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/types"
"github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/utils"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
)
var logger = log.With().Str("component", "extractor").Logger()
func init() {
if CLI.Since != nil {
logger = logger.With().Time("since", *CLI.Since).Logger()
}
if CLI.Until != nil {
logger = logger.With().Time("until", *CLI.Until).Logger()
}
}
// timelineFromPaths takes every path, search them using a list of regexes
// and organize them in a timeline that will be ready to aggregate or read
func timelineFromPaths(paths []string, regexes types.RegexMap) (types.Timeline, error) {
timeline := make(types.Timeline)
found := false
compiledRegex := prepareGrepArgument(regexes)
for _, path := range paths {
stdout := make(chan string)
go func() {
err := execGrepAndIterate(path, compiledRegex, stdout)
if err != nil {
logger.Error().Str("path", path).Err(err).Msg("execGrepAndIterate returned error")
}
}()
// it will iterate on stdout pipe results
localTimeline, err := iterateOnGrepResults(path, regexes, stdout)
if err != nil {
logger.Warn().Err(err).Msg("Failed to iterate on results")
}
found = true
logger.Debug().Str("path", path).Msg("Finished searching")
// Why it should not just identify using the file path:
// so that we are able to merge files that belong to the same nodes
// we wouldn't want them to be shown as from different nodes
if CLI.PxcOperator {
timeline[path] = localTimeline
} else if CLI.MergeByDirectory {
timeline.MergeByDirectory(path, localTimeline)
} else {
timeline.MergeByIdentifier(localTimeline)
}
}
if !found {
return nil, errors.New("Could not find data")
}
return timeline, nil
}
func prepareGrepArgument(regexes types.RegexMap) string {
regexToSendSlice := regexes.Compile()
grepRegex := "^"
if CLI.PxcOperator {
// special case
// I'm not adding pxcoperator map the same way others are used, because they do not have the same formats and same place
// it needs to be put on the front so that it's not 'merged' with the '{"log":"' json prefix
// this is to keep things as close as '^' as possible to keep doing prefix searches
grepRegex += "((" + strings.Join(regex.PXCOperatorMap.Compile(), "|") + ")|^{\"log\":\""
regexes.Merge(regex.PXCOperatorMap)
}
if CLI.Since != nil {
grepRegex += "(" + regex.BetweenDateRegex(CLI.Since, CLI.PxcOperator) + "|" + regex.NoDatesRegex(CLI.PxcOperator) + ")"
}
grepRegex += ".*"
grepRegex += "(" + strings.Join(regexToSendSlice, "|") + ")"
if CLI.PxcOperator {
grepRegex += ")"
}
logger.Debug().Str("grepArg", grepRegex).Msg("Compiled grep arguments")
return grepRegex
}
func execGrepAndIterate(path, compiledRegex string, stdout chan<- string) error {
defer close(stdout)
// A first pass is done, with every regexes we want compiled in a single one.
/*
Regular grep is actually used
There are no great alternatives, even less as golang libraries.
grep itself do not have great alternatives: they are less performant for common use-cases, or are not easily portable, or are costlier to execute.
grep is everywhere, grep is good enough, it even enable to use the stdout pipe.
The usual bottleneck with grep is that it is single-threaded, but we actually benefit
from a sequential scan here as we will rely on the log order.
Also, being sequential also ensure this program is light enough to run without too much impacts
It also helps to be transparent and not provide an obscure tool that work as a blackbox
*/
if runtime.GOOS == "darwin" && CLI.GrepCmd == "grep" {
logger.Warn().Msg("On Darwin systems, use 'pt-galera-log-explainer --grep-cmd=ggrep' as it requires grep v3")
}
cmd := exec.Command(CLI.GrepCmd, CLI.GrepArgs, compiledRegex, path)
out, _ := cmd.StdoutPipe()
defer out.Close()
err := cmd.Start()
if err != nil {
return errors.Wrapf(err, "failed to search in %s", path)
}
// grep treatment
s := bufio.NewScanner(out)
for s.Scan() {
stdout <- s.Text()
}
// double-check it stopped correctly
if err = cmd.Wait(); err != nil {
if exiterr, ok := err.(*exec.ExitError); ok && exiterr.ExitCode() == 1 {
return errors.New("Found nothing")
}
return errors.Wrap(err, "grep subprocess error")
}
return nil
}
func sanitizeLine(s string) string {
if len(s) > 0 && s[0] == '\t' {
return s[1:]
}
return s
}
// iterateOnGrepResults will take line by line each logs that matched regex
// it will iterate on every regexes in slice, and apply the handler for each
// it also filters out --since and --until rows
func iterateOnGrepResults(path string, regexes types.RegexMap, grepStdout <-chan string) (types.LocalTimeline, error) {
var (
lt types.LocalTimeline
recentEnough bool
displayer types.LogDisplayer
)
ctx := types.NewLogCtx()
ctx.FilePath = path
for line := range grepStdout {
line = sanitizeLine(line)
var date *types.Date
t, layout, ok := regex.SearchDateFromLog(line)
if ok {
date = types.NewDate(t, layout)
}
// If it's recentEnough, it means we already validated a log: every next logs necessarily happened later
// this is useful because not every logs have a date attached, and some without date are very useful
if !recentEnough && CLI.Since != nil && (date == nil || (date != nil && CLI.Since.After(date.Time))) {
continue
}
if CLI.Until != nil && date != nil && CLI.Until.Before(date.Time) {
return lt, nil
}
recentEnough = true
filetype := regex.FileType(line, CLI.PxcOperator)
ctx.FileType = filetype
// We have to find again what regex worked to get this log line
// it can match multiple regexes
for key, regex := range regexes {
if !regex.Regex.MatchString(line) || utils.SliceContains(CLI.ExcludeRegexes, key) {
continue
}
ctx, displayer = regex.Handle(ctx, line)
li := types.NewLogInfo(date, displayer, line, regex, key, ctx, filetype)
lt = lt.Add(li)
}
}
return lt, nil
}

View File

@@ -41,7 +41,7 @@ func (l *list) Run() error {
toCheck := l.regexesToUse()
timeline, err := timelineFromPaths(CLI.List.Paths, toCheck, CLI.Since, CLI.Until)
timeline, err := timelineFromPaths(CLI.List.Paths, toCheck)
if err != nil {
return errors.Wrap(err, "Could not list events")
}

View File

@@ -1,19 +1,13 @@
package main
import (
"bufio"
"fmt"
"os"
"os/exec"
"runtime"
"strings"
"time"
"github.com/alecthomas/kong"
"github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/regex"
"github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/types"
"github.com/percona/percona-toolkit/src/go/pt-galera-log-explainer/utils"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
@@ -46,6 +40,16 @@ var CLI struct {
GrepArgs string `help:"'grep' arguments. perl regexp (-P) is necessary. -o will break the tool" default:"-P"`
}
type versioncmd struct{}
func (v *versioncmd) Help() string {
return ""
}
func (v *versioncmd) Run() error {
fmt.Printf("version: %s, commit:%s, built at %s\n", version, commit, date)
return nil
}
func main() {
ctx := kong.Parse(&CLI,
kong.Name("pt-galera-log-explainer"),
@@ -64,212 +68,3 @@ func main() {
err := ctx.Run()
ctx.FatalIfErrorf(err)
}
type versioncmd struct{}
func (v *versioncmd) Help() string {
return ""
}
func (v *versioncmd) Run() error {
fmt.Printf("version: %s, commit:%s, built at %s\n", version, commit, date)
return nil
}
// timelineFromPaths takes every path, search them using a list of regexes
// and organize them in a timeline that will be ready to aggregate or read
func timelineFromPaths(paths []string, toCheck types.RegexMap, since, until *time.Time) (types.Timeline, error) {
timeline := make(types.Timeline)
found := false
for _, path := range paths {
extr := newExtractor(path, toCheck, since, until)
localTimeline, err := extr.search()
if err != nil {
extr.logger.Warn().Err(err).Msg("Search failed")
continue
}
found = true
extr.logger.Debug().Str("path", path).Msg("Finished searching")
// Why it should not just identify using the file path:
// so that we are able to merge files that belong to the same nodes
// we wouldn't want them to be shown as from different nodes
if CLI.PxcOperator {
timeline[path] = localTimeline
} else if CLI.MergeByDirectory {
timeline.MergeByDirectory(path, localTimeline)
} else {
timeline.MergeByIdentifier(localTimeline)
}
}
if !found {
return nil, errors.New("Could not find data")
}
return timeline, nil
}
// extractor is an utility struct to store what needs to be done
type extractor struct {
regexes types.RegexMap
path string
since, until *time.Time
logger zerolog.Logger
}
func newExtractor(path string, toCheck types.RegexMap, since, until *time.Time) extractor {
e := extractor{regexes: toCheck, path: path, since: since, until: until}
e.logger = log.With().Str("component", "extractor").Str("path", e.path).Logger()
if since != nil {
e.logger = e.logger.With().Time("since", *e.since).Logger()
}
if until != nil {
e.logger = e.logger.With().Time("until", *e.until).Logger()
}
e.logger.Debug().Msg("new extractor")
return e
}
func (e *extractor) grepArgument() string {
regexToSendSlice := e.regexes.Compile()
grepRegex := "^"
if CLI.PxcOperator {
// special case
// I'm not adding pxcoperator map the same way others are used, because they do not have the same formats and same place
// it needs to be put on the front so that it's not 'merged' with the '{"log":"' json prefix
// this is to keep things as close as '^' as possible to keep doing prefix searches
grepRegex += "((" + strings.Join(regex.PXCOperatorMap.Compile(), "|") + ")|^{\"log\":\""
e.regexes.Merge(regex.PXCOperatorMap)
}
if e.since != nil {
grepRegex += "(" + regex.BetweenDateRegex(e.since, CLI.PxcOperator) + "|" + regex.NoDatesRegex(CLI.PxcOperator) + ")"
}
grepRegex += ".*"
grepRegex += "(" + strings.Join(regexToSendSlice, "|") + ")"
if CLI.PxcOperator {
grepRegex += ")"
}
e.logger.Debug().Str("grepArg", grepRegex).Msg("Compiled grep arguments")
return grepRegex
}
// search is the main function to search what we want in a file
func (e *extractor) search() (types.LocalTimeline, error) {
// A first pass is done, with every regexes we want compiled in a single one.
grepRegex := e.grepArgument()
/*
Regular grep is actually used
There are no great alternatives, even less as golang libraries.
grep itself do not have great alternatives: they are less performant for common use-cases, or are not easily portable, or are costlier to execute.
grep is everywhere, grep is good enough, it even enable to use the stdout pipe.
The usual bottleneck with grep is that it is single-threaded, but we actually benefit
from a sequential scan here as we will rely on the log order.
Also, being sequential also ensure this program is light enough to run without too much impacts
It also helps to be transparent and not provide an obscure tool that work as a blackbox
*/
if runtime.GOOS == "darwin" && CLI.GrepCmd == "grep" {
e.logger.Warn().Msg("On Darwin systems, use 'pt-galera-log-explainer --grep-cmd=ggrep' as it requires grep v3")
}
cmd := exec.Command(CLI.GrepCmd, CLI.GrepArgs, grepRegex, e.path)
out, _ := cmd.StdoutPipe()
defer out.Close()
err := cmd.Start()
if err != nil {
return nil, errors.Wrapf(err, "failed to search in %s", e.path)
}
// grep treatment
s := bufio.NewScanner(out)
// it will iterate on stdout pipe results
lt, err := e.iterateOnResults(s)
if err != nil {
e.logger.Warn().Err(err).Msg("Failed to iterate on results")
}
// double-check it stopped correctly
if err = cmd.Wait(); err != nil {
if exiterr, ok := err.(*exec.ExitError); ok && exiterr.ExitCode() == 1 {
return nil, errors.New("Found nothing")
}
return nil, errors.Wrap(err, "grep subprocess error")
}
if len(lt) == 0 {
return nil, errors.New("Found nothing")
}
return lt, nil
}
func (e *extractor) sanitizeLine(s string) string {
if len(s) > 0 && s[0] == '\t' {
return s[1:]
}
return s
}
// iterateOnResults will take line by line each logs that matched regex
// it will iterate on every regexes in slice, and apply the handler for each
// it also filters out --since and --until rows
func (e *extractor) iterateOnResults(s *bufio.Scanner) ([]types.LogInfo, error) {
var (
line string
lt types.LocalTimeline
recentEnough bool
displayer types.LogDisplayer
)
ctx := types.NewLogCtx()
ctx.FilePath = e.path
for s.Scan() {
line = e.sanitizeLine(s.Text())
var date *types.Date
t, layout, ok := regex.SearchDateFromLog(line)
if ok {
d := types.NewDate(t, layout)
date = &d
}
// If it's recentEnough, it means we already validated a log: every next logs necessarily happened later
// this is useful because not every logs have a date attached, and some without date are very useful
if !recentEnough && e.since != nil && (date == nil || (date != nil && e.since.After(date.Time))) {
continue
}
if e.until != nil && date != nil && e.until.Before(date.Time) {
return lt, nil
}
recentEnough = true
filetype := regex.FileType(line, CLI.PxcOperator)
ctx.FileType = filetype
// We have to find again what regex worked to get this log line
// it can match multiple regexes
for key, regex := range e.regexes {
if !regex.Regex.MatchString(line) || utils.SliceContains(CLI.ExcludeRegexes, key) {
continue
}
ctx, displayer = regex.Handle(ctx, line)
li := types.NewLogInfo(date, displayer, line, regex, key, ctx, filetype)
lt = lt.Add(li)
}
}
return lt, nil
}

View File

@@ -29,7 +29,7 @@ You can also simply call the command to get a generated sed command to review an
func (s *sed) Run() error {
toCheck := regex.AllRegexes()
timeline, err := timelineFromPaths(s.Paths, toCheck, CLI.Since, CLI.Until)
timeline, err := timelineFromPaths(s.Paths, toCheck)
if err != nil {
return errors.Wrap(err, "Found nothing worth replacing")
}

View File

@@ -79,8 +79,8 @@ type Date struct {
Layout string
}
func NewDate(t time.Time, layout string) Date {
return Date{
func NewDate(t time.Time, layout string) *Date {
return &Date{
Time: t,
Layout: layout,
DisplayTime: t.Format(layout),

View File

@@ -24,7 +24,7 @@ It will list known node name(s), IP(s), hostname(s), and other known node's UUID
func (w *whois) Run() error {
toCheck := regex.AllRegexes()
timeline, err := timelineFromPaths(CLI.Whois.Paths, toCheck, CLI.Since, CLI.Until)
timeline, err := timelineFromPaths(CLI.Whois.Paths, toCheck)
if err != nil {
return errors.Wrap(err, "Found nothing to translate")
}