From c97adf0dc4c3e07f44cfc9d7e4ee2a6b852cee37 Mon Sep 17 00:00:00 2001 From: JuanLeon Lahoz Date: Wed, 18 Aug 2021 08:23:06 +0200 Subject: [PATCH] feature: Implement the common-term plot --- .github/workflows/test.yml | 4 +- Cargo.lock | 4 +- Cargo.toml | 2 +- README.md | 47 +++++++++++++------- src/app.rs | 28 ++++++++++-- src/main.rs | 40 +++++++++++++++-- src/plot/histogram.rs | 2 +- src/plot/mod.rs | 2 + src/plot/terms.rs | 90 ++++++++++++++++++++++++++++++++++++++ src/read/buckets.rs | 53 ++++++++++++++++++++-- src/read/dateparser.rs | 6 +-- src/read/times.rs | 2 +- 12 files changed, 244 insertions(+), 36 deletions(-) create mode 100644 src/plot/terms.rs diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 15164f4..b01948b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,5 +21,7 @@ jobs: run: cargo test -- --test-threads=1 - name: Check format run: cargo fmt -- --check + - name: Get clippy version + run: cargo clippy -V - name: Run clippy - run: cargo clippy -- -D clippy + run: cargo clippy -- -D clippy::all diff --git a/Cargo.lock b/Cargo.lock index af06c9c..4678e14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" version = "0.7.15" @@ -294,7 +296,7 @@ dependencies = [ [[package]] name = "lowcharts" -version = "0.4.1" +version = "0.4.2" dependencies = [ "assert_cmd", "atty", diff --git a/Cargo.toml b/Cargo.toml index d4dadd7..62307ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lowcharts" -version = "0.4.1" +version = "0.4.2" authors = ["JuanLeon Lahoz "] edition = "2018" description = "Tool to draw low-resolution graphs in terminal" diff --git a/README.md b/README.md index dd64c18..121e42e 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ terminal. Type `lowcharts --help`, or `lowcharts PLOT-TYPE --help` for a complete list of options. -Currently five basic types of plots are supported: +Currently six basic types of plots are supported: #### Bar chart for matches in the input @@ -33,7 +33,7 @@ This chart is generated using `lowcharts matches database.log SELECT UPDATE DELE [![Simple bar chart with lowcharts](resources/matches-example.png)](resources/matches-example.png) -#### Histogram +#### Histogram for numerical inputs This chart is generated using `python3 -c 'import random; [print(random.normalvariate(5, 5)) for _ in range(100000)]' | lowcharts hist`: @@ -85,19 +85,6 @@ each ∎ represents a count of 228 [0.044 .. 0.049] [ 183] ``` -#### X-Y Plot - -This chart is generated using `cat ram-usage | lowcharts plot --height 20 --width 50`: - -[![Sample plot with lowcharts](resources/plot-example.png)](resources/plot-example.png) - -Note that x axis is not labelled. The tool splits the input data by chunks of a -fixed size and then the chart display the averages of those chunks. In other -words: grouping data by time is not (yet?) supported; you can see the evolution -of a metric over time, but not the speed of that evolution. - -There is regex support for this type of plots. - #### Time Histogram This chart is generated using `strace -tt ls -lR * 2>&1 | lowcharts timehist --intervals 10`: @@ -109,7 +96,7 @@ similar way, and would give you a glimpse of when and how many 404s are being triggered in your server. The idea is to depict the frequency of logs that match a regex (by default any -log that is read by the tool). The sub-command can autodetect the more common +log that is read by the tool). The sub-command can autodetect the most common (in my personal and biased experience) datetime/timestamp formats: rfc 3339, rfc 2822, python `%(asctime)s`, golang default log format, nginx, rabbitmq, strace -t (or -tt, or -ttt),ltrace,... as long as the timestamp is present in the first @@ -130,12 +117,38 @@ timezones). This adds up the time histogram and bar chart in a single visualization. -This chart is generated using `strace -tt ls -lR 2>&1 | lowcharts split-timehist open mmap close read write --intervals 10`: +This chart is generated using `strace -tt ls -lR 2>&1 | lowcharts split-timehist open mmap close read write --intervals 10`: [![Sample plot with lowcharts](resources/split-timehist-example.png)](resources/split-timehist-example.png) This graph depicts the relative frequency of search terms in time. +#### Common terms histogram + +Useful for plotting most common terms in input lines. + +This sample chart is generated using `strace ls -l 2>&1 | lowcharts common-terms --lines 8 -R '(.*?)\('`: + +[![Sample plot with lowcharts](resources/common-terms-example.png)](resources/common-terms-example.png) + +The graph depicts the 8 syscalls most used by `ls -l` command, along with its +number of uses and sorted. In general, using `lowcharts common-terms` is a +handy substitute to commands of the form `awk ... | sort | uniq -c | sort -rn | +head`. + +#### X-Y Plot + +This chart is generated using `cat ram-usage | lowcharts plot --height 20 --width 50`: + +[![Sample plot with lowcharts](resources/plot-example.png)](resources/plot-example.png) + +Note that x axis is not labelled. The tool splits the input data by chunks of a +fixed size and then the chart display the averages of those chunks. In other +words: grouping data by time is not (yet?) supported; you can see the evolution +of a metric over time, but not the speed of that evolution. + +There is regex support for this type of plots. + ### Installing #### Via release diff --git a/src/app.rs b/src/app.rs index d5fac64..87f7a40 100644 --- a/src/app.rs +++ b/src/app.rs @@ -46,8 +46,7 @@ lines. By default this will use a capture group named `value`. If not present, it will use first capture group. -If no regex is used, a number per line is expected (something that can be parsed -as float). +If no regex is used, the whole input lines will be matched. Examples of regex are ' 200 \\d+ ([0-9.]+)' (where there is one anonymous capture group) and 'a(a)? (?P[0-9.]+)' (where there are two capture groups, and @@ -68,7 +67,7 @@ fn add_non_capturing_regex(app: App) -> App { Arg::new("regex") .long("regex") .short('R') - .about("Filter out lines where regex is notr present") + .about("Filter out lines where regex is not present") .takes_value(true), ) } @@ -170,6 +169,19 @@ pub fn get_app() -> App<'static> { .multiple(true), ); + let mut common_terms = App::new("common-terms") + .version(clap::crate_version!()) + .setting(AppSettings::ColoredHelp) + .about("Plot histogram with most common terms in input lines"); + common_terms = add_input(add_regex(add_width(common_terms))).arg( + Arg::new("lines") + .long("lines") + .short('l') + .about("Display that many lines, sorting by most frequent") + .default_value("10") + .takes_value(true), + ); + App::new("lowcharts") .author(clap::crate_authors!()) .version(clap::crate_version!()) @@ -198,6 +210,7 @@ pub fn get_app() -> App<'static> { .subcommand(matches) .subcommand(timehist) .subcommand(splittimehist) + .subcommand(common_terms) } #[cfg(test)] @@ -279,4 +292,13 @@ mod tests { sub_m.values_of("match").unwrap().collect::>() ); } + + #[test] + fn terms_subcommand_arg_parsing() { + let arg_vec = vec!["lowcharts", "common-terms", "--regex", "foo", "some"]; + let m = get_app().get_matches_from(arg_vec); + let sub_m = m.subcommand_matches("common-terms").unwrap(); + assert_eq!("some", sub_m.value_of("input").unwrap()); + assert_eq!("foo", sub_m.value_of("regex").unwrap()); + } } diff --git a/src/main.rs b/src/main.rs index 732007d..267a389 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,7 +85,7 @@ fn get_float_reader(matches: &ArgMatches) -> Result { builder.range(min..max); } if let Some(string) = matches.value_of("regex") { - match Regex::new(&string) { + match Regex::new(string) { Ok(re) => { builder.regex(re); } @@ -100,7 +100,7 @@ fn get_float_reader(matches: &ArgMatches) -> Result { /// Implements the hist cli-subcommand fn histogram(matches: &ArgMatches) -> i32 { - let reader = match get_float_reader(&matches) { + let reader = match get_float_reader(matches) { Ok(r) => r, _ => return 2, }; @@ -122,7 +122,7 @@ fn histogram(matches: &ArgMatches) -> i32 { /// Implements the plot cli-subcommand fn plot(matches: &ArgMatches) -> i32 { - let reader = match get_float_reader(&matches) { + let reader = match get_float_reader(matches) { Ok(r) => r, _ => return 2, }; @@ -155,11 +155,42 @@ fn matchbar(matches: &ArgMatches) -> i32 { 0 } +/// Implements the common-terms cli-subcommand +fn common_terms(matches: &ArgMatches) -> i32 { + let mut builder = read::DataReaderBuilder::default(); + if let Some(string) = matches.value_of("regex") { + match Regex::new(string) { + Ok(re) => { + builder.regex(re); + } + _ => { + error!("Failed to parse regex {}", string); + return 1; + } + }; + } else { + builder.regex(Regex::new("(.*)").unwrap()); + }; + let reader = builder.build().unwrap(); + let width = matches.value_of_t("width").unwrap(); + let lines = matches.value_of_t("lines").unwrap(); + if lines < 1 { + error!("You should specify a potitive number of lines"); + return 2; + }; + print!( + "{:width$}", + reader.read_terms(matches.value_of("input").unwrap(), lines), + width = width + ); + 0 +} + /// Implements the timehist cli-subcommand fn timehist(matches: &ArgMatches) -> i32 { let mut builder = read::TimeReaderBuilder::default(); if let Some(string) = matches.value_of("regex") { - match Regex::new(&string) { + match Regex::new(string) { Ok(re) => { builder.regex(re); } @@ -236,6 +267,7 @@ fn main() { Some(("plot", subcommand_matches)) => plot(subcommand_matches), Some(("matches", subcommand_matches)) => matchbar(subcommand_matches), Some(("timehist", subcommand_matches)) => timehist(subcommand_matches), + Some(("common-terms", subcommand_matches)) => common_terms(subcommand_matches), Some(("split-timehist", subcommand_matches)) => splittime(subcommand_matches), _ => unreachable!("Invalid subcommand"), }); diff --git a/src/plot/histogram.rs b/src/plot/histogram.rs index ba424a9..23905fd 100644 --- a/src/plot/histogram.rs +++ b/src/plot/histogram.rs @@ -77,7 +77,7 @@ impl fmt::Display for Histogram { let writer = HistWriter { width: f.width().unwrap_or(110), }; - writer.write(f, &self) + writer.write(f, self) } } diff --git a/src/plot/mod.rs b/src/plot/mod.rs index 1850e7f..a904e85 100644 --- a/src/plot/mod.rs +++ b/src/plot/mod.rs @@ -1,12 +1,14 @@ pub use self::histogram::Histogram; pub use self::matchbar::{MatchBar, MatchBarRow}; pub use self::splittimehist::SplitTimeHistogram; +pub use self::terms::CommonTerms; pub use self::timehist::TimeHistogram; pub use self::xy::XyPlot; mod histogram; mod matchbar; mod splittimehist; +mod terms; mod timehist; mod xy; diff --git a/src/plot/terms.rs b/src/plot/terms.rs new file mode 100644 index 0000000..2032cc4 --- /dev/null +++ b/src/plot/terms.rs @@ -0,0 +1,90 @@ +use std::collections::HashMap; +use std::fmt; + +use yansi::Color::{Blue, Green, Red}; + +#[derive(Debug)] +pub struct CommonTerms { + pub terms: HashMap, + lines: usize, +} + +impl CommonTerms { + pub fn new(lines: usize) -> CommonTerms { + CommonTerms { + terms: HashMap::new(), + lines, + } + } + + pub fn observe(&mut self, term: String) { + *self.terms.entry(term).or_insert(0) += 1 + } +} + +impl fmt::Display for CommonTerms { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let width = f.width().unwrap_or(100); + let mut counts: Vec<(&String, &usize)> = self.terms.iter().collect(); + if counts.is_empty() { + writeln!(f, "No data")?; + return Ok(()); + } + counts.sort_by(|a, b| b.1.cmp(a.1)); + let values = &counts[..self.lines.min(counts.len())]; + let label_width = values.iter().fold(1, |acc, x| acc.max(x.0.len())); + let divisor = 1.max(counts[0].1 / width); + let width_count = format!("{}", counts[0].1).len(); + writeln!( + f, + "Each {} represents a count of {}", + Red.paint("∎"), + Blue.paint(divisor.to_string()), + )?; + for (term, count) in values.iter() { + writeln!( + f, + "[{label}] [{count}] {bar}", + label = Blue.paint(format!("{:>width$}", term, width = label_width)), + count = Green.paint(format!("{:width$}", count, width = width_count)), + bar = Red.paint(format!("{:∎ { - if let Some(n) = line_parser(&self, &as_string) { + if let Some(n) = line_parser(self, &as_string) { match &self.range { Some(range) => { if range.contains(&n) { @@ -55,9 +55,9 @@ impl DataReader { match self.regex.as_ref().unwrap().captures(line) { Some(cap) => { if let Some(name) = cap.name("value") { - self.parse_float(&name.as_str()) + self.parse_float(name.as_str()) } else if let Some(capture) = cap.get(1) { - self.parse_float(&capture.as_str()) + self.parse_float(capture.as_str()) } else { None } @@ -86,6 +86,26 @@ impl DataReader { } MatchBar::new(rows) } + + pub fn read_terms(&self, path: &str, lines: usize) -> CommonTerms { + let mut terms = CommonTerms::new(lines); + let regex = self.regex.as_ref().unwrap(); + for line in open_file(path).lines() { + match line { + Ok(as_string) => { + if let Some(cap) = regex.captures(&as_string) { + if let Some(name) = cap.name("value") { + terms.observe(String::from(name.as_str())) + } else if let Some(capture) = cap.get(1) { + terms.observe(String::from(capture.as_str())) + } + }; + } + Err(error) => error!("{}", error), + } + } + terms + } } #[cfg(test)] @@ -180,4 +200,29 @@ mod tests { assert_eq!(mb.vec[2].label, "data"); assert_eq!(mb.vec[2].count, 2); } + + #[test] + fn basic_term_reader() { + let re = Regex::new("^foo ([0-9.-]+) (?P[0-9.-]+)").unwrap(); + let reader = DataReaderBuilder::default().regex(re).build().unwrap(); + let mut file = NamedTempFile::new().unwrap(); + writeln!(file, "foo 1.1 1.6").unwrap(); + writeln!(file, "foo 1.2 1.5").unwrap(); + writeln!(file, "foo 1.3 1.6").unwrap(); + writeln!(file, "foo 1.4 1.7").unwrap(); + let ct = reader.read_terms(file.path().to_str().unwrap(), 10); + assert_eq!(ct.terms.len(), 3); + assert_eq!(*ct.terms.get(&String::from("1.5")).unwrap(), 1); + assert_eq!(*ct.terms.get(&String::from("1.6")).unwrap(), 2); + assert_eq!(*ct.terms.get(&String::from("1.7")).unwrap(), 1); + // Now, with no named capture group + let re = Regex::new("^foo ([0-9.-]+) ([0-9.-]+)").unwrap(); + let reader = DataReaderBuilder::default().regex(re).build().unwrap(); + let ct = reader.read_terms(file.path().to_str().unwrap(), 10); + assert_eq!(ct.terms.len(), 4); + assert_eq!(*ct.terms.get(&String::from("1.1")).unwrap(), 1); + assert_eq!(*ct.terms.get(&String::from("1.2")).unwrap(), 1); + assert_eq!(*ct.terms.get(&String::from("1.3")).unwrap(), 1); + assert_eq!(*ct.terms.get(&String::from("1.4")).unwrap(), 1); + } } diff --git a/src/read/dateparser.rs b/src/read/dateparser.rs index efe94e7..2e61fbd 100644 --- a/src/read/dateparser.rs +++ b/src/read/dateparser.rs @@ -30,8 +30,8 @@ pub struct LogDateParser { impl LogDateParser { pub fn new(log_line: &str, format_string: &Option) -> Result { match format_string { - Some(ts_format) => Self::new_with_format(&log_line, &ts_format), - None => Self::new_with_guess(&log_line), + Some(ts_format) => Self::new_with_format(log_line, ts_format), + None => Self::new_with_guess(log_line), } } @@ -98,7 +98,7 @@ impl LogDateParser { return Some(Box::new(DateTime::parse_from_rfc3339)); } else if DateTime::parse_from_rfc2822(s).is_ok() { return Some(Box::new(DateTime::parse_from_rfc2822)); - } else if Self::looks_like_timestamp(&s) { + } else if Self::looks_like_timestamp(s) { return Some(Box::new(|string: &str| { let dot = match string.find('.') { Some(x) => x, diff --git a/src/read/times.rs b/src/read/times.rs index bc61702..bae0ee9 100644 --- a/src/read/times.rs +++ b/src/read/times.rs @@ -82,7 +82,7 @@ impl TimeReader { } } if let Some(re) = &self.regex { - if re.is_match(&line) { + if re.is_match(line) { vec.push(d); } } else {