diff --git a/Cargo.lock b/Cargo.lock index 3d08e68..1e64561 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -44,6 +44,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + [[package]] name = "clap" version = "3.0.0-beta.2" @@ -234,6 +247,7 @@ checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" name = "lowcharts" version = "0.2.0" dependencies = [ + "chrono", "clap", "derive_builder", "float_eq", @@ -249,6 +263,25 @@ version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + [[package]] name = "os_str_bytes" version = "2.4.0" @@ -433,6 +466,16 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "unicode-segmentation" version = "1.7.1" diff --git a/Cargo.toml b/Cargo.toml index b9c2ff6..54214f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ yansi = "0.5.0" isatty = "0.1" derive_builder = "0.10.0" regex = "1.4.5" +chrono = "0.4" [dev-dependencies] float_eq = "0.5.0" diff --git a/README.md b/README.md index 975a577..6051833 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ terminal. Type `lowcharts --help`, or `lowcharts PLOT-TYPE --help` for a complete list of options. -Currently three basic types of plots are supported: +Currently four basic types of plots are supported: #### Bar chart for matches in the input @@ -95,6 +95,34 @@ of a metric over time, but not the speed of that evolution. There is regex support for this type of plots. +#### Time Histogram + +This chart is generated using `strace -tt ls -lR * 2>&1 | lowcharts timehist --intervals 10`: + +[![Sample plot with lowcharts](resources/timehist-example.png)](resources/timehist-example.png) + +Things like `lowcharts timehist --regex ' 404 ' nginx.log` should work in a +similar way, and would give you a glimpse of when and how many 404s are being +triggered in your server. + +The idea is to depict the frequency of logs that match a regex (by default any +log that is read by the tool). The sub-command can autodetect the more common +(in my personal and biased experience) datetime/timestamp formats: rfc 3339, rfc +2822, python `%(asctime)s`, golang default log format, nginx, rabbitmq, strace +-t (or -tt, or -ttt),ltrace,... as long as the timestamp is present in the first +line in the log and the format is consistent in all the lines that contain +timestamp. It is ok to have lines with no timestamp. The consistency is +required because of performance reasons: the 1st log line is the only one that +triggers the heuristics needed to create an specialized datetime parser on the +fly. + +However, if you have a format that lowcharts cannot autodetected, you can +specify it via command line flag. For instance, `--format +'%d-%b-%Y::%H:%M:%S'`. Note that, as of today, you need to leave out the +timezone part of the format string (the autodetection works fine with +timezones). + + ### Installing #### Via release diff --git a/resources/timehist-example.png b/resources/timehist-example.png new file mode 100644 index 0000000..50b4d50 Binary files /dev/null and b/resources/timehist-example.png differ diff --git a/src/app.rs b/src/app.rs index 6ccc893..87f6873 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,6 +1,32 @@ use clap::{self, App, AppSettings, Arg}; -fn add_common_options(app: App) -> App { +fn add_input(app: App) -> App { + app.arg( + Arg::new("input") + .about("Input file") + .default_value("-") + .long_about("If not present or a single dash, standard input will be used"), + ) +} + +fn add_min_max(app: App) -> App { + app.arg( + Arg::new("max") + .long("max") + .short('M') + .about("Filter out values bigger than this") + .takes_value(true), + ) + .arg( + Arg::new("min") + .long("min") + .short('m') + .about("Filter out values smaller than this") + .takes_value(true), + ) +} + +fn add_regex(app: App) -> App { const LONG_RE_ABOUT: &str = "\ A regular expression used for capturing the values to be plotted inside input lines. @@ -15,22 +41,18 @@ Examples of regex are ' 200 \\d+ ([0-9.]+)' (where there is one anonymous captur group) and 'a(a)? (?P[0-9.]+)' (where there are two capture groups, and the named one will be used). "; - app.arg( - Arg::new("max") - .long("max") - .short('M') - .about("Filter out values bigger than this") - .takes_value(true), - ) - .arg( - Arg::new("min") - .long("min") - .short('m') - .about("Filter out values smaller than this") + Arg::new("regex") + .long("regex") + .short('R') + .about("Use a regex to capture input values") + .long_about(LONG_RE_ABOUT) .takes_value(true), ) - .arg( +} + +fn add_width(app: App) -> App { + app.arg( Arg::new("width") .long("width") .short('w') @@ -38,37 +60,25 @@ the named one will be used). .default_value("110") .takes_value(true), ) - .arg( - Arg::new("regex") - .long("regex") - .short('R') - .about("Use a regex to capture input values") - .long_about(LONG_RE_ABOUT) +} + +fn add_intervals(app: App) -> App { + app.arg( + Arg::new("intervals") + .long("intervals") + .short('i') + .about("Use no more than this amount of buckets to classify data") + .default_value("20") .takes_value(true), ) - .arg( - Arg::new("input") - .about("Input file") - .default_value("-") - .long_about("If not present or a single dash, standard input will be used"), - ) } pub fn get_app() -> App<'static> { let mut hist = App::new("hist") .version(clap::crate_version!()) .setting(AppSettings::ColoredHelp) - .about("Plot an histogram from input values") - .arg( - Arg::new("intervals") - .long("intervals") - .short('i') - .about("Use no more than this amount of buckets to classify data") - .default_value("20") - .takes_value(true), - ); - - hist = add_common_options(hist); + .about("Plot an histogram from input values"); + hist = add_input(add_regex(add_width(add_min_max(add_intervals(hist))))); let mut plot = App::new("plot") .version(clap::crate_version!()) @@ -82,34 +92,33 @@ pub fn get_app() -> App<'static> { .default_value("40") .takes_value(true), ); - plot = add_common_options(plot); + plot = add_input(add_regex(add_width(add_min_max(plot)))); - let matches = App::new("matches") + let mut matches = App::new("matches") .version(clap::crate_version!()) .setting(AppSettings::ColoredHelp) .setting(AppSettings::AllowMissingPositional) - .about("Plot barchar with counts of occurences of matches params") + .about("Plot barchar with counts of occurences of matches params"); + matches = add_input(add_width(matches)).arg( + Arg::new("match") + .about("Count maches for those strings") + .required(true) + .takes_value(true) + .multiple(true), + ); + + let mut timehist = App::new("timehist") + .version(clap::crate_version!()) + .setting(AppSettings::ColoredHelp) + .about("Plot histogram with amount of matches over time") .arg( - Arg::new("width") - .long("width") - .short('w') - .about("Use this many characters as terminal width") - .default_value("110") + Arg::new("format") + .long("format") + .short('f') + .about("Use this string formatting") .takes_value(true), - ) - .arg( - Arg::new("input") - .about("Input file") - .required(true) - .long_about("If not present or a single dash, standard input will be used"), - ) - .arg( - Arg::new("match") - .about("Count maches for those strings") - .required(true) - .takes_value(true) - .multiple(true), ); + timehist = add_input(add_width(add_regex(add_intervals(timehist)))); App::new("lowcharts") .author(clap::crate_authors!()) @@ -136,6 +145,7 @@ pub fn get_app() -> App<'static> { .subcommand(hist) .subcommand(plot) .subcommand(matches) + .subcommand(timehist) } #[cfg(test)] @@ -183,4 +193,32 @@ mod tests { assert!(false, "Subcommand `plot` not detected"); } } + + #[test] + fn matches_subcommand_arg_parsing() { + let arg_vec = vec!["lowcharts", "matches", "-", "A", "B", "C"]; + let m = get_app().get_matches_from(arg_vec); + if let Some(sub_m) = m.subcommand_matches("matches") { + assert_eq!("-", sub_m.value_of("input").unwrap()); + assert_eq!( + // vec![String::from("A"), String::from("B"), String::from("C")], + vec!["A", "B", "C"], + sub_m.values_of("match").unwrap().collect::>() + ); + } else { + assert!(false, "Subcommand `matches` not detected"); + } + } + + #[test] + fn timehist_subcommand_arg_parsing() { + let arg_vec = vec!["lowcharts", "timehist", "--regex", "foo", "some"]; + let m = get_app().get_matches_from(arg_vec); + if let Some(sub_m) = m.subcommand_matches("timehist") { + assert_eq!("some", sub_m.value_of("input").unwrap()); + assert_eq!("foo", sub_m.value_of("regex").unwrap()); + } else { + assert!(false, "Subcommand `timehist` not detected"); + } + } } diff --git a/src/dateparser.rs b/src/dateparser.rs new file mode 100644 index 0000000..e33bc2d --- /dev/null +++ b/src/dateparser.rs @@ -0,0 +1,367 @@ +use std::ops::Range; + +use chrono::{DateTime, FixedOffset, Local, NaiveDateTime, NaiveTime, ParseError, TimeZone, Utc}; +use regex::Regex; + +type DateParsingFun = fn(s: &str) -> Result, ParseError>; + +// Those are some date formats that are common for my personal (and biased) +// experience. So, there is logic to detect and parse them. +const PARSE_SPECIFIERS: &[&str] = &[ + "%Y-%m-%d %H:%M:%S,%3f", // python %(asctime)s + "%Y-%m-%d %H:%M:%S", + "%Y/%m/%d %H:%M:%S", // Seen in some nginx logs + "%d-%b-%Y::%H:%M:%S", // Seen in rabbitmq logs + "%H:%M:%S", // strace -t + "%H:%M:%S.%6f", // strace -tt (-ttt generates timestamps) +]; + +// Max length that a timestamp can have +const MAX_LEN: usize = 28; + +pub struct LogDateParser<'a> { + range: Range, + parser: Option, + ts_format: Option<&'a str>, +} + +impl<'a> LogDateParser<'a> { + pub fn new_with_guess(log_line: &str) -> Result, String> { + if let Some(x) = Self::from_brackets(log_line) { + Ok(x) + } else if let Some(x) = Self::from_heuristic(log_line) { + Ok(x) + } else { + Err(format!("Could not parse a timestamp in {}", log_line)) + } + } + + pub fn new_with_format( + log_line: &str, + format_string: &'a str, + ) -> Result, String> { + // We look for where the timestamp is in logs using a brute force + // approach with 1st log line, but capping the max length we scan for + for i in 0..log_line.len() { + for j in (i..(i + (MAX_LEN * 2)).min(log_line.len() + 1)).rev() { + if NaiveDateTime::parse_from_str(&log_line[i..j], format_string).is_ok() { + // I would like to capture ts_format in a closure and assign + // it to parser, but I cannot coerce a capturing closure to + // a typed fn. I still need to learn the idiomatic way of + // dealing with this. + return Ok(LogDateParser { + range: i..j, + parser: None, + ts_format: Some(format_string), + }); + } + } + } + Err(format!( + "Could locate a '{}' timestamp in '{}'", + format_string, log_line + )) + } + + pub fn parse(&self, s: &str) -> Result, ParseError> { + let range = self.range.start.min(s.len())..self.range.end.min(s.len()); + match self.parser { + Some(p) => p(&s[range]), + None => match NaiveDateTime::parse_from_str(&s[range], self.ts_format.unwrap()) { + Ok(naive) => { + let date_time: DateTime = Local.from_local_datetime(&naive).unwrap(); + Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) + } + Err(err) => Err(err), + }, + } + } + + fn guess_parser(s: &str) -> Option { + if DateTime::parse_from_rfc3339(s).is_ok() { + Some(DateTime::parse_from_rfc3339) + } else if DateTime::parse_from_rfc2822(s).is_ok() { + Some(DateTime::parse_from_rfc2822) + } else if Self::looks_like_timestamp(&s) { + Some(|string: &str| { + let dot = match string.find('.') { + Some(x) => x, + None => string.len(), + }; + let nanosecs = if dot < string.len() { + let missing_zeros = (10 + dot - string.len()) as u32; + match string[dot + 1..].parse::() { + Ok(x) => x * 10_u32.pow(missing_zeros), + _ => 0, + } + } else { + 0 + }; + match string[..dot].parse::() { + Ok(secs) => { + let naive = NaiveDateTime::from_timestamp(secs, nanosecs); + let date_time: DateTime = Local.from_local_datetime(&naive).unwrap(); + Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) + } + Err(_) => DateTime::parse_from_rfc3339(""), + } + }) + } else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[0]).is_ok() { + // TODO: All of this stuff below should be rewritten using macros. + // Reason for "repeating myself" is that I cannot coerce closures to + // fn types if they capture variables (an index to PARSE_SPECIFIERS, + // for instance). + Some( + |string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[0]) { + Ok(naive) => { + let date_time: DateTime = Local.from_local_datetime(&naive).unwrap(); + Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) + } + Err(err) => Err(err), + }, + ) + } else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[1]).is_ok() { + Some( + |string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[1]) { + Ok(naive) => { + let date_time: DateTime = Local.from_local_datetime(&naive).unwrap(); + Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) + } + Err(err) => Err(err), + }, + ) + } else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[2]).is_ok() { + Some( + |string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[2]) { + Ok(naive) => { + let date_time: DateTime = Local.from_local_datetime(&naive).unwrap(); + Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) + } + Err(err) => Err(err), + }, + ) + } else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[3]).is_ok() { + Some( + |string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[3]) { + Ok(naive) => { + let date_time: DateTime = Local.from_local_datetime(&naive).unwrap(); + Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) + } + Err(err) => Err(err), + }, + ) + } else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[4]).is_ok() { + Some( + |string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[4]) { + Ok(naive_time) => Ok(Utc::today() + .and_time(naive_time) + .unwrap() + .with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))), + Err(err) => Err(err), + }, + ) + } else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[5]).is_ok() { + Some( + |string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[5]) { + Ok(naive_time) => Ok(Utc::today() + .and_time(naive_time) + .unwrap() + .with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))), + Err(err) => Err(err), + }, + ) + } else { + None + } + } + + fn from_brackets(s: &str) -> Option { + match s.chars().next() { + Some('[') => { + if let Some(x) = s.find(']') { + match Self::guess_parser(&s[1..x]) { + Some(parser) => Some(LogDateParser { + range: 1..x, + parser: Some(parser), + ts_format: None, + }), + _ => None, + } + } else { + None + } + } + _ => None, + } + } + + fn from_heuristic(s: &str) -> Option { + // First we locate the first digit + for (i, c) in s.chars().enumerate() { + if c.is_digit(10) { + for j in (i..(i + MAX_LEN).min(s.len() + 1)).rev() { + if let Some(parser) = Self::guess_parser(&s[i..j]) { + return Some(LogDateParser { + range: i..j, + parser: Some(parser), + ts_format: None, + }); + } + } + break; + } + } + None + } + + // Returns true if string looks like a unix-like timestamp of arbitrary + // precision + fn looks_like_timestamp(s: &str) -> bool { + Regex::new(r"^[0-9]{10}(\.[0-9]{1,9})?$") + .unwrap() + .is_match(s) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_rfc3339_brackets() { + let r = LogDateParser::new_with_guess("[1996-12-19T16:39:57-08:00] foobar").unwrap(); + assert_eq!( + r.parse("[2096-11-19T16:39:57-08:00]"), + DateTime::parse_from_rfc3339("2096-11-19T16:39:57-08:00") + ); + } + + #[test] + fn test_rfc3339_no_brackets() { + let r = LogDateParser::new_with_guess("2021-04-25T16:57:15.337Z foobar").unwrap(); + assert_eq!( + r.parse("2031-04-25T16:57:15.337Z"), + DateTime::parse_from_rfc3339("2031-04-25T16:57:15.337Z") + ); + } + + #[test] + fn test_rfc2822() { + let r = LogDateParser::new_with_guess("12 Jul 2003 10:52:37 +0200 foobar").unwrap(); + assert_eq!( + r.parse("22 Jun 2003 10:52:37 +0500"), + DateTime::parse_from_rfc2822("22 Jun 2003 10:52:37 +0500") + ); + } + + #[test] + fn test_bad_bracket() { + let r = LogDateParser::new_with_guess("[12 Jul 2003 10:52:37 +0200 foobar").unwrap(); + assert_eq!( + r.parse("[22 Jun 2003 10:52:37 +0500"), + DateTime::parse_from_rfc2822("22 Jun 2003 10:52:37 +0500") + ); + } + + #[test] + fn test_prefix() { + let r = LogDateParser::new_with_guess("foobar 1996-12-19T16:39:57-08:00 foobar").unwrap(); + assert_eq!( + r.parse("foobar 2096-11-19T16:39:57-08:00"), + DateTime::parse_from_rfc3339("2096-11-19T16:39:57-08:00") + ); + } + + #[test] + fn test_bad_format() { + assert!(LogDateParser::new_with_guess("996-12-19T16:39:57-08:00 foobar").is_err()); + } + + #[test] + fn test_short_line() { + assert!(LogDateParser::new_with_guess("9").is_err()); + } + + #[test] + fn test_empty_line() { + assert!(LogDateParser::new_with_guess("").is_err()); + } + + #[test] + #[ignore] // need to make code LocalTime agnostic + fn test_timestamps() { + let r = LogDateParser::new_with_guess("ts 1619688527.018165").unwrap(); + assert_eq!( + r.parse("ts 1619655527.888165"), + DateTime::parse_from_rfc3339("2021-04-28T22:18:47.888165+00:00") + ); + let r = LogDateParser::new_with_guess("1619688527.123").unwrap(); + assert_eq!( + r.parse("1619655527.123"), + DateTime::parse_from_rfc3339("2021-04-28T22:18:47.123+00:00") + ); + } + + #[test] + #[ignore] // need to make code LocalTime agnostic + fn test_known_formats() { + let r = LogDateParser::new_with_guess("2021-04-28 06:25:24,321").unwrap(); + assert_eq!( + r.parse("2021-04-28 06:25:24,321"), + DateTime::parse_from_rfc3339("2021-04-28T04:25:24.321+00:00") + ); + let r = LogDateParser::new_with_guess("2021-04-28 06:25:24").unwrap(); + assert_eq!( + r.parse("2021-04-28 06:25:24"), + DateTime::parse_from_rfc3339("2021-04-28T04:25:24+00:00") + ); + let r = LogDateParser::new_with_guess("28-Apr-2021::12:10:42").unwrap(); + assert_eq!( + r.parse("28-Apr-2021::12:10:42"), + DateTime::parse_from_rfc3339("2021-04-28T10:10:42+00:00") + ); + let r = LogDateParser::new_with_guess("2019/12/19 05:01:02").unwrap(); + assert_eq!( + r.parse("2019/12/19 05:01:02"), + DateTime::parse_from_rfc3339("2019-12-19T04:01:02+00:00") + ); + let r = LogDateParser::new_with_guess("11:29:13.120535").unwrap(); + let now_as_date = format!("{}", Utc::today()); + assert_eq!( + r.parse("11:29:13.120535"), + DateTime::parse_from_rfc3339(&format!( + "{}{}", + &now_as_date[..10], + "T11:29:13.120535+00:00" + )) + ); + let r = LogDateParser::new_with_guess("11:29:13").unwrap(); + assert_eq!( + r.parse("11:29:13.120535"), + DateTime::parse_from_rfc3339(&format!("{}{}", &now_as_date[..10], "T11:29:13+00:00")) + ); + } + + #[test] + fn test_tricky_line() { + let r = LogDateParser::new_with_guess("[1996-12-19T16:39:57-08:00] foobar").unwrap(); + assert!(r.parse("nothing").is_err()); + } + + #[test] + #[ignore] // need to make code LocalTime agnostic + fn test_custom_format() { + assert!(LogDateParser::new_with_format( + "[1996-12-19T16:39:57-08:00] foobar", + "%Y-%m-%d %H:%M:%S" + ) + .is_err()); + let r = LogDateParser::new_with_format("[1996-12-19 16-39-57] foobar", "%Y-%m-%d %H-%M-%S") + .unwrap(); + assert_eq!( + r.parse("[2096-11-19 04-25-24]"), + DateTime::parse_from_rfc3339("2096-11-19T03:25:24+00:00") + ); + } +} diff --git a/src/main.rs b/src/main.rs index 5095452..74689a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,11 +10,13 @@ use yansi::Paint; extern crate derive_builder; mod app; +mod dateparser; mod histogram; mod matchbar; mod plot; mod reader; mod stats; +mod timehist; fn disable_color_if_needed(option: &str) { match option { @@ -107,6 +109,35 @@ fn matchbar(matches: &ArgMatches) { ); } +fn timehist(matches: &ArgMatches) { + let mut builder = reader::TimeReaderBuilder::default(); + if let Some(string) = matches.value_of("regex") { + match Regex::new(&string) { + Ok(re) => { + builder.regex(re); + } + _ => { + eprintln!("[{}]: Failed to parse regex {}", Red.paint("ERROR"), string); + std::process::exit(1); + } + }; + } + if let Some(as_str) = matches.value_of("format") { + builder.ts_format(as_str.to_string()); + } + let width = matches.value_of_t("width").unwrap(); + let reader = builder.build().unwrap(); + let vec = reader.read(matches.value_of("input").unwrap()); + if vec.len() <= 1 { + eprintln!("[{}] Not enough data to process", Yellow.paint("WARN")); + std::process::exit(0); + } + let mut timehist = timehist::TimeHistogram::new(matches.value_of_t("intervals").unwrap(), &vec); + timehist.load(&vec); + + print!("{:width$}", timehist, width = width); +} + fn main() { let matches = app::get_app().get_matches(); let verbose = matches.is_present("verbose"); @@ -123,6 +154,9 @@ fn main() { Some(("matches", subcommand_matches)) => { matchbar(subcommand_matches); } + Some(("timehist", subcommand_matches)) => { + timehist(subcommand_matches); + } _ => unreachable!("Invalid subcommand"), }; } diff --git a/src/reader.rs b/src/reader.rs index e004999..094fd9b 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2,9 +2,11 @@ use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::ops::Range; +use chrono::{DateTime, FixedOffset}; use regex::Regex; use yansi::Color::{Magenta, Red}; +use crate::dateparser::LogDateParser; use crate::matchbar::{MatchBar, MatchBarRow}; #[derive(Debug, Default, Builder)] @@ -104,6 +106,73 @@ impl DataReader { } } +#[derive(Default, Builder)] +pub struct TimeReader { + #[builder(setter(strip_option), default)] + regex: Option, + #[builder(setter(strip_option), default)] + ts_format: Option, +} + +impl TimeReader { + pub fn read(&self, path: &str) -> Vec> { + let mut vec: Vec> = Vec::new(); + let mut iterator = open_file(path).lines(); + let first_line = match iterator.next() { + Some(Ok(as_string)) => as_string, + Some(Err(error)) => { + eprintln!("[{}]: {}", Red.paint("ERROR"), error); + return vec; + } + _ => return vec, + }; + let parser = match &self.ts_format { + Some(ts_format) => match LogDateParser::new_with_format(&first_line, &ts_format) { + Ok(p) => p, + Err(error) => { + eprintln!( + "[{}]: Could not figure out parsing strategy: {}", + Red.paint("ERROR"), + error + ); + return vec; + } + }, + None => match LogDateParser::new_with_guess(&first_line) { + Ok(p) => p, + Err(error) => { + eprintln!( + "[{}]: Could not figure out parsing strategy: {}", + Red.paint("ERROR"), + error + ); + return vec; + } + }, + }; + if let Ok(x) = parser.parse(&first_line) { + vec.push(x); + } + for line in iterator { + match line { + Ok(string) => { + if let Ok(x) = parser.parse(&string) { + if let Some(re) = &self.regex { + if re.is_match(&string) { + vec.push(x); + } + } else { + vec.push(x); + } + } + } + Err(error) => eprintln!("[{}]: {}", Red.paint("ERROR"), error), + } + } + vec + } +} + fn open_file(path: &str) -> Box { match path { "-" => Box::new(BufReader::new(io::stdin())), @@ -238,4 +307,59 @@ mod tests { Err(_) => assert!(false, "Could not create temp file"), } } + + #[test] + fn time_reader_guessing_with_regex() { + let mut builder = TimeReaderBuilder::default(); + builder.regex(Regex::new("f.o").unwrap()); + let reader = builder.build().unwrap(); + match NamedTempFile::new() { + Ok(ref mut file) => { + writeln!(file, "[2021-04-15T06:25:31+00:00] foobar").unwrap(); + writeln!(file, "[2021-04-15T06:26:31+00:00] bar").unwrap(); + writeln!(file, "[2021-04-15T06:27:31+00:00] foobar").unwrap(); + writeln!(file, "[2021-04-15T06:28:31+00:00] foobar").unwrap(); + writeln!(file, "none").unwrap(); + let ts = reader.read(file.path().to_str().unwrap()); + assert_eq!(ts.len(), 3); + assert_eq!( + ts[0], + DateTime::parse_from_rfc3339("2021-04-15T06:25:31+00:00").unwrap() + ); + assert_eq!( + ts[2], + DateTime::parse_from_rfc3339("2021-04-15T06:28:31+00:00").unwrap() + ); + } + Err(_) => assert!(false, "Could not create temp file"), + } + } + + #[test] + #[ignore] // need to make code LocalTime agnostic + fn time_reader_with_format() { + let mut builder = TimeReaderBuilder::default(); + builder.ts_format(String::from("%Y_%m_%d %H:%M")); + let reader = builder.build().unwrap(); + match NamedTempFile::new() { + Ok(ref mut file) => { + writeln!(file, "_2021_04_15 06:25] foobar").unwrap(); + writeln!(file, "_2021_04_15 06:26] bar").unwrap(); + writeln!(file, "_2021_04_15 06:27] foobar").unwrap(); + writeln!(file, "_2021_04_15 06:28] foobar").unwrap(); + writeln!(file, "none").unwrap(); + let ts = reader.read(file.path().to_str().unwrap()); + assert_eq!(ts.len(), 4); + assert_eq!( + ts[0], + DateTime::parse_from_rfc3339("2021-04-15T04:25:00+00:00").unwrap() + ); + assert_eq!( + ts[3], + DateTime::parse_from_rfc3339("2021-04-15T04:28:00+00:00").unwrap() + ); + } + Err(_) => assert!(false, "Could not create temp file"), + } + } } diff --git a/src/timehist.rs b/src/timehist.rs new file mode 100644 index 0000000..45b4271 --- /dev/null +++ b/src/timehist.rs @@ -0,0 +1,172 @@ +use std::fmt; + +use chrono::{DateTime, Duration, FixedOffset}; +use yansi::Color::{Blue, Green, Red}; + +#[derive(Debug)] +struct TimeBucket { + start: DateTime, + count: usize, +} + +// TODO: use trait for Bucket and TimeBucket +impl TimeBucket { + fn new(start: DateTime) -> TimeBucket { + TimeBucket { start, count: 0 } + } + + fn inc(&mut self) { + self.count += 1; + } +} + +#[derive(Debug)] +pub struct TimeHistogram { + vec: Vec, + min: DateTime, + max: DateTime, + step: Duration, + top: usize, + last: usize, + nanos: u64, +} + +// TODO: use trait for Histogram and TimeHistogram +impl TimeHistogram { + pub fn new(size: usize, ts: &[DateTime]) -> TimeHistogram { + let mut vec = Vec::::with_capacity(size); + let min = ts.iter().min().unwrap().clone(); + let max = ts.iter().max().unwrap().clone(); + let step = max - min; + let inc = step / size as i32; + for i in 0..size { + vec.push(TimeBucket::new(min + (inc * i as i32))); + } + TimeHistogram { + vec, + min, + max, + step, + top: 0, + last: size - 1, + nanos: (max - min).num_microseconds().unwrap() as u64, + } + } + + pub fn load(&mut self, vec: &[DateTime]) { + for x in vec { + self.add(*x); + } + } + + pub fn add(&mut self, ts: DateTime) { + if let Some(slot) = self.find_slot(ts) { + self.vec[slot].inc(); + self.top = self.top.max(self.vec[slot].count); + } + } + + fn find_slot(&self, ts: DateTime) -> Option { + if ts < self.min || ts > self.max { + None + } else { + let x = (ts - self.min).num_microseconds().unwrap() as u64; + Some(((x * self.vec.len() as u64 / self.nanos) as usize).min(self.last)) + } + } + + fn date_fmt_string(&self) -> &str { + match self.step.num_seconds() { + x if x > 86400 => "%Y-%m-%d %H:%M:%S", + x if x > 300 => "%H:%M:%S", + x if x > 1 => "%H:%M:%S%.3f", + _ => "%H:%M:%S%.6f", + } + } +} + +impl fmt::Display for TimeHistogram { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let width = f.width().unwrap_or(100); + let divisor = 1.max(self.top / width); + let width_count = format!("{}", self.top).len(); + writeln!( + f, + "Matches: {}.", + Blue.paint(format!( + "{}", + self.vec.iter().map(|r| r.count).sum::() + )), + )?; + writeln!( + f, + "Each {} represents a count of {}", + Red.paint("∎"), + Blue.paint(divisor.to_string()), + )?; + let fmt = self.date_fmt_string(); + for row in self.vec.iter() { + // println!("ROW"); + // println!("COUNT {}", row.count); + // println!("WIDTH {}", row.count / divisor); + // println!("WIDTH2 {:A>::new(); + vec.push(DateTime::parse_from_rfc3339("2021-04-15T04:25:00+00:00").unwrap()); + vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap()); + vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap()); + vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap()); + vec.push(DateTime::parse_from_rfc3339("2023-04-15T04:25:00+00:00").unwrap()); + let mut th = TimeHistogram::new(3, &vec); + th.load(&vec); + println!("{}", th); + let display = format!("{}", th); + assert!(display.contains("Matches: 5")); + assert!(display.contains("represents a count of 1")); + assert!(display.contains("[2021-04-15 04:25:00] [1] ∎\n")); + assert!(display.contains("[2021-12-14 12:25:00] [3] ∎∎∎\n")); + assert!(display.contains("[2022-08-14 20:25:00] [1] ∎\n")); + } + + #[test] + fn test_small_time_interval() { + Paint::disable(); + let mut vec = Vec::>::new(); + vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.001+00:00").unwrap()); + vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.002+00:00").unwrap()); + vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.006+00:00").unwrap()); + let mut th = TimeHistogram::new(4, &vec); + th.load(&vec); + println!("{}", th); + println!("{:#?}", th); + let display = format!("{}", th); + assert!(display.contains("Matches: 3")); + assert!(display.contains("represents a count of 1")); + assert!(display.contains("[04:25:00.001000] [2] ∎∎\n")); + assert!(display.contains("[04:25:00.002250] [0] \n")); + assert!(display.contains("[04:25:00.003500] [0] \n")); + assert!(display.contains("[04:25:00.004750] [1] ∎\n")); + } +}