Browse Source

Implement timehist subcommand

pull/2/head v0.3.0
JuanLeon Lahoz 5 years ago
parent
commit
980442786c
  1. 43
      Cargo.lock
  2. 1
      Cargo.toml
  3. 30
      README.md
  4. BIN
      resources/timehist-example.png
  5. 146
      src/app.rs
  6. 367
      src/dateparser.rs
  7. 34
      src/main.rs
  8. 124
      src/reader.rs
  9. 172
      src/timehist.rs

43
Cargo.lock generated

@ -44,6 +44,19 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi",
]
[[package]]
name = "clap"
version = "3.0.0-beta.2"
@ -234,6 +247,7 @@ checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
name = "lowcharts"
version = "0.2.0"
dependencies = [
"chrono",
"clap",
"derive_builder",
"float_eq",
@ -249,6 +263,25 @@ version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "os_str_bytes"
version = "2.4.0"
@ -433,6 +466,16 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "time"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "unicode-segmentation"
version = "1.7.1"

1
Cargo.toml

@ -11,6 +11,7 @@ yansi = "0.5.0"
isatty = "0.1"
derive_builder = "0.10.0"
regex = "1.4.5"
chrono = "0.4"
[dev-dependencies]
float_eq = "0.5.0"

30
README.md

@ -20,7 +20,7 @@ terminal.
Type `lowcharts --help`, or `lowcharts PLOT-TYPE --help` for a complete list of
options.
Currently three basic types of plots are supported:
Currently four basic types of plots are supported:
#### Bar chart for matches in the input
@ -95,6 +95,34 @@ of a metric over time, but not the speed of that evolution.
There is regex support for this type of plots.
#### Time Histogram
This chart is generated using `strace -tt ls -lR * 2>&1 | lowcharts timehist --intervals 10`:
[![Sample plot with lowcharts](resources/timehist-example.png)](resources/timehist-example.png)
Things like `lowcharts timehist --regex ' 404 ' nginx.log` should work in a
similar way, and would give you a glimpse of when and how many 404s are being
triggered in your server.
The idea is to depict the frequency of logs that match a regex (by default any
log that is read by the tool). The sub-command can autodetect the more common
(in my personal and biased experience) datetime/timestamp formats: rfc 3339, rfc
2822, python `%(asctime)s`, golang default log format, nginx, rabbitmq, strace
-t (or -tt, or -ttt),ltrace,... as long as the timestamp is present in the first
line in the log and the format is consistent in all the lines that contain
timestamp. It is ok to have lines with no timestamp. The consistency is
required because of performance reasons: the 1st log line is the only one that
triggers the heuristics needed to create an specialized datetime parser on the
fly.
However, if you have a format that lowcharts cannot autodetected, you can
specify it via command line flag. For instance, `--format
'%d-%b-%Y::%H:%M:%S'`. Note that, as of today, you need to leave out the
timezone part of the format string (the autodetection works fine with
timezones).
### Installing
#### Via release

BIN
resources/timehist-example.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

146
src/app.rs

@ -1,6 +1,32 @@
use clap::{self, App, AppSettings, Arg};
fn add_common_options(app: App) -> App {
fn add_input(app: App) -> App {
app.arg(
Arg::new("input")
.about("Input file")
.default_value("-")
.long_about("If not present or a single dash, standard input will be used"),
)
}
fn add_min_max(app: App) -> App {
app.arg(
Arg::new("max")
.long("max")
.short('M')
.about("Filter out values bigger than this")
.takes_value(true),
)
.arg(
Arg::new("min")
.long("min")
.short('m')
.about("Filter out values smaller than this")
.takes_value(true),
)
}
fn add_regex(app: App) -> App {
const LONG_RE_ABOUT: &str = "\
A regular expression used for capturing the values to be plotted inside input
lines.
@ -15,22 +41,18 @@ Examples of regex are ' 200 \\d+ ([0-9.]+)' (where there is one anonymous captur
group) and 'a(a)? (?P<value>[0-9.]+)' (where there are two capture groups, and
the named one will be used).
";
app.arg(
Arg::new("max")
.long("max")
.short('M')
.about("Filter out values bigger than this")
.takes_value(true),
)
.arg(
Arg::new("min")
.long("min")
.short('m')
.about("Filter out values smaller than this")
Arg::new("regex")
.long("regex")
.short('R')
.about("Use a regex to capture input values")
.long_about(LONG_RE_ABOUT)
.takes_value(true),
)
.arg(
}
fn add_width(app: App) -> App {
app.arg(
Arg::new("width")
.long("width")
.short('w')
@ -38,37 +60,25 @@ the named one will be used).
.default_value("110")
.takes_value(true),
)
.arg(
Arg::new("regex")
.long("regex")
.short('R')
.about("Use a regex to capture input values")
.long_about(LONG_RE_ABOUT)
.takes_value(true),
)
.arg(
Arg::new("input")
.about("Input file")
.default_value("-")
.long_about("If not present or a single dash, standard input will be used"),
)
}
pub fn get_app() -> App<'static> {
let mut hist = App::new("hist")
.version(clap::crate_version!())
.setting(AppSettings::ColoredHelp)
.about("Plot an histogram from input values")
.arg(
fn add_intervals(app: App) -> App {
app.arg(
Arg::new("intervals")
.long("intervals")
.short('i')
.about("Use no more than this amount of buckets to classify data")
.default_value("20")
.takes_value(true),
);
)
}
hist = add_common_options(hist);
pub fn get_app() -> App<'static> {
let mut hist = App::new("hist")
.version(clap::crate_version!())
.setting(AppSettings::ColoredHelp)
.about("Plot an histogram from input values");
hist = add_input(add_regex(add_width(add_min_max(add_intervals(hist)))));
let mut plot = App::new("plot")
.version(clap::crate_version!())
@ -82,28 +92,14 @@ pub fn get_app() -> App<'static> {
.default_value("40")
.takes_value(true),
);
plot = add_common_options(plot);
plot = add_input(add_regex(add_width(add_min_max(plot))));
let matches = App::new("matches")
let mut matches = App::new("matches")
.version(clap::crate_version!())
.setting(AppSettings::ColoredHelp)
.setting(AppSettings::AllowMissingPositional)
.about("Plot barchar with counts of occurences of matches params")
.arg(
Arg::new("width")
.long("width")
.short('w')
.about("Use this many characters as terminal width")
.default_value("110")
.takes_value(true),
)
.arg(
Arg::new("input")
.about("Input file")
.required(true)
.long_about("If not present or a single dash, standard input will be used"),
)
.arg(
.about("Plot barchar with counts of occurences of matches params");
matches = add_input(add_width(matches)).arg(
Arg::new("match")
.about("Count maches for those strings")
.required(true)
@ -111,6 +107,19 @@ pub fn get_app() -> App<'static> {
.multiple(true),
);
let mut timehist = App::new("timehist")
.version(clap::crate_version!())
.setting(AppSettings::ColoredHelp)
.about("Plot histogram with amount of matches over time")
.arg(
Arg::new("format")
.long("format")
.short('f')
.about("Use this string formatting")
.takes_value(true),
);
timehist = add_input(add_width(add_regex(add_intervals(timehist))));
App::new("lowcharts")
.author(clap::crate_authors!())
.version(clap::crate_version!())
@ -136,6 +145,7 @@ pub fn get_app() -> App<'static> {
.subcommand(hist)
.subcommand(plot)
.subcommand(matches)
.subcommand(timehist)
}
#[cfg(test)]
@ -183,4 +193,32 @@ mod tests {
assert!(false, "Subcommand `plot` not detected");
}
}
#[test]
fn matches_subcommand_arg_parsing() {
let arg_vec = vec!["lowcharts", "matches", "-", "A", "B", "C"];
let m = get_app().get_matches_from(arg_vec);
if let Some(sub_m) = m.subcommand_matches("matches") {
assert_eq!("-", sub_m.value_of("input").unwrap());
assert_eq!(
// vec![String::from("A"), String::from("B"), String::from("C")],
vec!["A", "B", "C"],
sub_m.values_of("match").unwrap().collect::<Vec<&str>>()
);
} else {
assert!(false, "Subcommand `matches` not detected");
}
}
#[test]
fn timehist_subcommand_arg_parsing() {
let arg_vec = vec!["lowcharts", "timehist", "--regex", "foo", "some"];
let m = get_app().get_matches_from(arg_vec);
if let Some(sub_m) = m.subcommand_matches("timehist") {
assert_eq!("some", sub_m.value_of("input").unwrap());
assert_eq!("foo", sub_m.value_of("regex").unwrap());
} else {
assert!(false, "Subcommand `timehist` not detected");
}
}
}

367
src/dateparser.rs

@ -0,0 +1,367 @@
use std::ops::Range;
use chrono::{DateTime, FixedOffset, Local, NaiveDateTime, NaiveTime, ParseError, TimeZone, Utc};
use regex::Regex;
type DateParsingFun = fn(s: &str) -> Result<DateTime<FixedOffset>, ParseError>;
// Those are some date formats that are common for my personal (and biased)
// experience. So, there is logic to detect and parse them.
const PARSE_SPECIFIERS: &[&str] = &[
"%Y-%m-%d %H:%M:%S,%3f", // python %(asctime)s
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S", // Seen in some nginx logs
"%d-%b-%Y::%H:%M:%S", // Seen in rabbitmq logs
"%H:%M:%S", // strace -t
"%H:%M:%S.%6f", // strace -tt (-ttt generates timestamps)
];
// Max length that a timestamp can have
const MAX_LEN: usize = 28;
pub struct LogDateParser<'a> {
range: Range<usize>,
parser: Option<DateParsingFun>,
ts_format: Option<&'a str>,
}
impl<'a> LogDateParser<'a> {
pub fn new_with_guess(log_line: &str) -> Result<LogDateParser<'_>, String> {
if let Some(x) = Self::from_brackets(log_line) {
Ok(x)
} else if let Some(x) = Self::from_heuristic(log_line) {
Ok(x)
} else {
Err(format!("Could not parse a timestamp in {}", log_line))
}
}
pub fn new_with_format(
log_line: &str,
format_string: &'a str,
) -> Result<LogDateParser<'a>, String> {
// We look for where the timestamp is in logs using a brute force
// approach with 1st log line, but capping the max length we scan for
for i in 0..log_line.len() {
for j in (i..(i + (MAX_LEN * 2)).min(log_line.len() + 1)).rev() {
if NaiveDateTime::parse_from_str(&log_line[i..j], format_string).is_ok() {
// I would like to capture ts_format in a closure and assign
// it to parser, but I cannot coerce a capturing closure to
// a typed fn. I still need to learn the idiomatic way of
// dealing with this.
return Ok(LogDateParser {
range: i..j,
parser: None,
ts_format: Some(format_string),
});
}
}
}
Err(format!(
"Could locate a '{}' timestamp in '{}'",
format_string, log_line
))
}
pub fn parse(&self, s: &str) -> Result<DateTime<FixedOffset>, ParseError> {
let range = self.range.start.min(s.len())..self.range.end.min(s.len());
match self.parser {
Some(p) => p(&s[range]),
None => match NaiveDateTime::parse_from_str(&s[range], self.ts_format.unwrap()) {
Ok(naive) => {
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
}
}
fn guess_parser(s: &str) -> Option<DateParsingFun> {
if DateTime::parse_from_rfc3339(s).is_ok() {
Some(DateTime::parse_from_rfc3339)
} else if DateTime::parse_from_rfc2822(s).is_ok() {
Some(DateTime::parse_from_rfc2822)
} else if Self::looks_like_timestamp(&s) {
Some(|string: &str| {
let dot = match string.find('.') {
Some(x) => x,
None => string.len(),
};
let nanosecs = if dot < string.len() {
let missing_zeros = (10 + dot - string.len()) as u32;
match string[dot + 1..].parse::<u32>() {
Ok(x) => x * 10_u32.pow(missing_zeros),
_ => 0,
}
} else {
0
};
match string[..dot].parse::<i64>() {
Ok(secs) => {
let naive = NaiveDateTime::from_timestamp(secs, nanosecs);
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(_) => DateTime::parse_from_rfc3339(""),
}
})
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[0]).is_ok() {
// TODO: All of this stuff below should be rewritten using macros.
// Reason for "repeating myself" is that I cannot coerce closures to
// fn types if they capture variables (an index to PARSE_SPECIFIERS,
// for instance).
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[0]) {
Ok(naive) => {
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[1]).is_ok() {
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[1]) {
Ok(naive) => {
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[2]).is_ok() {
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[2]) {
Ok(naive) => {
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[3]).is_ok() {
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[3]) {
Ok(naive) => {
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[4]).is_ok() {
Some(
|string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[4]) {
Ok(naive_time) => Ok(Utc::today()
.and_time(naive_time)
.unwrap()
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))),
Err(err) => Err(err),
},
)
} else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[5]).is_ok() {
Some(
|string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[5]) {
Ok(naive_time) => Ok(Utc::today()
.and_time(naive_time)
.unwrap()
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))),
Err(err) => Err(err),
},
)
} else {
None
}
}
fn from_brackets(s: &str) -> Option<LogDateParser> {
match s.chars().next() {
Some('[') => {
if let Some(x) = s.find(']') {
match Self::guess_parser(&s[1..x]) {
Some(parser) => Some(LogDateParser {
range: 1..x,
parser: Some(parser),
ts_format: None,
}),
_ => None,
}
} else {
None
}
}
_ => None,
}
}
fn from_heuristic(s: &str) -> Option<LogDateParser> {
// First we locate the first digit
for (i, c) in s.chars().enumerate() {
if c.is_digit(10) {
for j in (i..(i + MAX_LEN).min(s.len() + 1)).rev() {
if let Some(parser) = Self::guess_parser(&s[i..j]) {
return Some(LogDateParser {
range: i..j,
parser: Some(parser),
ts_format: None,
});
}
}
break;
}
}
None
}
// Returns true if string looks like a unix-like timestamp of arbitrary
// precision
fn looks_like_timestamp(s: &str) -> bool {
Regex::new(r"^[0-9]{10}(\.[0-9]{1,9})?$")
.unwrap()
.is_match(s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rfc3339_brackets() {
let r = LogDateParser::new_with_guess("[1996-12-19T16:39:57-08:00] foobar").unwrap();
assert_eq!(
r.parse("[2096-11-19T16:39:57-08:00]"),
DateTime::parse_from_rfc3339("2096-11-19T16:39:57-08:00")
);
}
#[test]
fn test_rfc3339_no_brackets() {
let r = LogDateParser::new_with_guess("2021-04-25T16:57:15.337Z foobar").unwrap();
assert_eq!(
r.parse("2031-04-25T16:57:15.337Z"),
DateTime::parse_from_rfc3339("2031-04-25T16:57:15.337Z")
);
}
#[test]
fn test_rfc2822() {
let r = LogDateParser::new_with_guess("12 Jul 2003 10:52:37 +0200 foobar").unwrap();
assert_eq!(
r.parse("22 Jun 2003 10:52:37 +0500"),
DateTime::parse_from_rfc2822("22 Jun 2003 10:52:37 +0500")
);
}
#[test]
fn test_bad_bracket() {
let r = LogDateParser::new_with_guess("[12 Jul 2003 10:52:37 +0200 foobar").unwrap();
assert_eq!(
r.parse("[22 Jun 2003 10:52:37 +0500"),
DateTime::parse_from_rfc2822("22 Jun 2003 10:52:37 +0500")
);
}
#[test]
fn test_prefix() {
let r = LogDateParser::new_with_guess("foobar 1996-12-19T16:39:57-08:00 foobar").unwrap();
assert_eq!(
r.parse("foobar 2096-11-19T16:39:57-08:00"),
DateTime::parse_from_rfc3339("2096-11-19T16:39:57-08:00")
);
}
#[test]
fn test_bad_format() {
assert!(LogDateParser::new_with_guess("996-12-19T16:39:57-08:00 foobar").is_err());
}
#[test]
fn test_short_line() {
assert!(LogDateParser::new_with_guess("9").is_err());
}
#[test]
fn test_empty_line() {
assert!(LogDateParser::new_with_guess("").is_err());
}
#[test]
#[ignore] // need to make code LocalTime agnostic
fn test_timestamps() {
let r = LogDateParser::new_with_guess("ts 1619688527.018165").unwrap();
assert_eq!(
r.parse("ts 1619655527.888165"),
DateTime::parse_from_rfc3339("2021-04-28T22:18:47.888165+00:00")
);
let r = LogDateParser::new_with_guess("1619688527.123").unwrap();
assert_eq!(
r.parse("1619655527.123"),
DateTime::parse_from_rfc3339("2021-04-28T22:18:47.123+00:00")
);
}
#[test]
#[ignore] // need to make code LocalTime agnostic
fn test_known_formats() {
let r = LogDateParser::new_with_guess("2021-04-28 06:25:24,321").unwrap();
assert_eq!(
r.parse("2021-04-28 06:25:24,321"),
DateTime::parse_from_rfc3339("2021-04-28T04:25:24.321+00:00")
);
let r = LogDateParser::new_with_guess("2021-04-28 06:25:24").unwrap();
assert_eq!(
r.parse("2021-04-28 06:25:24"),
DateTime::parse_from_rfc3339("2021-04-28T04:25:24+00:00")
);
let r = LogDateParser::new_with_guess("28-Apr-2021::12:10:42").unwrap();
assert_eq!(
r.parse("28-Apr-2021::12:10:42"),
DateTime::parse_from_rfc3339("2021-04-28T10:10:42+00:00")
);
let r = LogDateParser::new_with_guess("2019/12/19 05:01:02").unwrap();
assert_eq!(
r.parse("2019/12/19 05:01:02"),
DateTime::parse_from_rfc3339("2019-12-19T04:01:02+00:00")
);
let r = LogDateParser::new_with_guess("11:29:13.120535").unwrap();
let now_as_date = format!("{}", Utc::today());
assert_eq!(
r.parse("11:29:13.120535"),
DateTime::parse_from_rfc3339(&format!(
"{}{}",
&now_as_date[..10],
"T11:29:13.120535+00:00"
))
);
let r = LogDateParser::new_with_guess("11:29:13").unwrap();
assert_eq!(
r.parse("11:29:13.120535"),
DateTime::parse_from_rfc3339(&format!("{}{}", &now_as_date[..10], "T11:29:13+00:00"))
);
}
#[test]
fn test_tricky_line() {
let r = LogDateParser::new_with_guess("[1996-12-19T16:39:57-08:00] foobar").unwrap();
assert!(r.parse("nothing").is_err());
}
#[test]
#[ignore] // need to make code LocalTime agnostic
fn test_custom_format() {
assert!(LogDateParser::new_with_format(
"[1996-12-19T16:39:57-08:00] foobar",
"%Y-%m-%d %H:%M:%S"
)
.is_err());
let r = LogDateParser::new_with_format("[1996-12-19 16-39-57] foobar", "%Y-%m-%d %H-%M-%S")
.unwrap();
assert_eq!(
r.parse("[2096-11-19 04-25-24]"),
DateTime::parse_from_rfc3339("2096-11-19T03:25:24+00:00")
);
}
}

34
src/main.rs

@ -10,11 +10,13 @@ use yansi::Paint;
extern crate derive_builder;
mod app;
mod dateparser;
mod histogram;
mod matchbar;
mod plot;
mod reader;
mod stats;
mod timehist;
fn disable_color_if_needed(option: &str) {
match option {
@ -107,6 +109,35 @@ fn matchbar(matches: &ArgMatches) {
);
}
fn timehist(matches: &ArgMatches) {
let mut builder = reader::TimeReaderBuilder::default();
if let Some(string) = matches.value_of("regex") {
match Regex::new(&string) {
Ok(re) => {
builder.regex(re);
}
_ => {
eprintln!("[{}]: Failed to parse regex {}", Red.paint("ERROR"), string);
std::process::exit(1);
}
};
}
if let Some(as_str) = matches.value_of("format") {
builder.ts_format(as_str.to_string());
}
let width = matches.value_of_t("width").unwrap();
let reader = builder.build().unwrap();
let vec = reader.read(matches.value_of("input").unwrap());
if vec.len() <= 1 {
eprintln!("[{}] Not enough data to process", Yellow.paint("WARN"));
std::process::exit(0);
}
let mut timehist = timehist::TimeHistogram::new(matches.value_of_t("intervals").unwrap(), &vec);
timehist.load(&vec);
print!("{:width$}", timehist, width = width);
}
fn main() {
let matches = app::get_app().get_matches();
let verbose = matches.is_present("verbose");
@ -123,6 +154,9 @@ fn main() {
Some(("matches", subcommand_matches)) => {
matchbar(subcommand_matches);
}
Some(("timehist", subcommand_matches)) => {
timehist(subcommand_matches);
}
_ => unreachable!("Invalid subcommand"),
};
}

124
src/reader.rs

@ -2,9 +2,11 @@ use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::ops::Range;
use chrono::{DateTime, FixedOffset};
use regex::Regex;
use yansi::Color::{Magenta, Red};
use crate::dateparser::LogDateParser;
use crate::matchbar::{MatchBar, MatchBarRow};
#[derive(Debug, Default, Builder)]
@ -104,6 +106,73 @@ impl DataReader {
}
}
#[derive(Default, Builder)]
pub struct TimeReader {
#[builder(setter(strip_option), default)]
regex: Option<Regex>,
#[builder(setter(strip_option), default)]
ts_format: Option<String>,
}
impl TimeReader {
pub fn read(&self, path: &str) -> Vec<DateTime<FixedOffset>> {
let mut vec: Vec<DateTime<FixedOffset>> = Vec::new();
let mut iterator = open_file(path).lines();
let first_line = match iterator.next() {
Some(Ok(as_string)) => as_string,
Some(Err(error)) => {
eprintln!("[{}]: {}", Red.paint("ERROR"), error);
return vec;
}
_ => return vec,
};
let parser = match &self.ts_format {
Some(ts_format) => match LogDateParser::new_with_format(&first_line, &ts_format) {
Ok(p) => p,
Err(error) => {
eprintln!(
"[{}]: Could not figure out parsing strategy: {}",
Red.paint("ERROR"),
error
);
return vec;
}
},
None => match LogDateParser::new_with_guess(&first_line) {
Ok(p) => p,
Err(error) => {
eprintln!(
"[{}]: Could not figure out parsing strategy: {}",
Red.paint("ERROR"),
error
);
return vec;
}
},
};
if let Ok(x) = parser.parse(&first_line) {
vec.push(x);
}
for line in iterator {
match line {
Ok(string) => {
if let Ok(x) = parser.parse(&string) {
if let Some(re) = &self.regex {
if re.is_match(&string) {
vec.push(x);
}
} else {
vec.push(x);
}
}
}
Err(error) => eprintln!("[{}]: {}", Red.paint("ERROR"), error),
}
}
vec
}
}
fn open_file(path: &str) -> Box<dyn io::BufRead> {
match path {
"-" => Box::new(BufReader::new(io::stdin())),
@ -238,4 +307,59 @@ mod tests {
Err(_) => assert!(false, "Could not create temp file"),
}
}
#[test]
fn time_reader_guessing_with_regex() {
let mut builder = TimeReaderBuilder::default();
builder.regex(Regex::new("f.o").unwrap());
let reader = builder.build().unwrap();
match NamedTempFile::new() {
Ok(ref mut file) => {
writeln!(file, "[2021-04-15T06:25:31+00:00] foobar").unwrap();
writeln!(file, "[2021-04-15T06:26:31+00:00] bar").unwrap();
writeln!(file, "[2021-04-15T06:27:31+00:00] foobar").unwrap();
writeln!(file, "[2021-04-15T06:28:31+00:00] foobar").unwrap();
writeln!(file, "none").unwrap();
let ts = reader.read(file.path().to_str().unwrap());
assert_eq!(ts.len(), 3);
assert_eq!(
ts[0],
DateTime::parse_from_rfc3339("2021-04-15T06:25:31+00:00").unwrap()
);
assert_eq!(
ts[2],
DateTime::parse_from_rfc3339("2021-04-15T06:28:31+00:00").unwrap()
);
}
Err(_) => assert!(false, "Could not create temp file"),
}
}
#[test]
#[ignore] // need to make code LocalTime agnostic
fn time_reader_with_format() {
let mut builder = TimeReaderBuilder::default();
builder.ts_format(String::from("%Y_%m_%d %H:%M"));
let reader = builder.build().unwrap();
match NamedTempFile::new() {
Ok(ref mut file) => {
writeln!(file, "_2021_04_15 06:25] foobar").unwrap();
writeln!(file, "_2021_04_15 06:26] bar").unwrap();
writeln!(file, "_2021_04_15 06:27] foobar").unwrap();
writeln!(file, "_2021_04_15 06:28] foobar").unwrap();
writeln!(file, "none").unwrap();
let ts = reader.read(file.path().to_str().unwrap());
assert_eq!(ts.len(), 4);
assert_eq!(
ts[0],
DateTime::parse_from_rfc3339("2021-04-15T04:25:00+00:00").unwrap()
);
assert_eq!(
ts[3],
DateTime::parse_from_rfc3339("2021-04-15T04:28:00+00:00").unwrap()
);
}
Err(_) => assert!(false, "Could not create temp file"),
}
}
}

172
src/timehist.rs

@ -0,0 +1,172 @@
use std::fmt;
use chrono::{DateTime, Duration, FixedOffset};
use yansi::Color::{Blue, Green, Red};
#[derive(Debug)]
struct TimeBucket {
start: DateTime<FixedOffset>,
count: usize,
}
// TODO: use trait for Bucket and TimeBucket
impl TimeBucket {
fn new(start: DateTime<FixedOffset>) -> TimeBucket {
TimeBucket { start, count: 0 }
}
fn inc(&mut self) {
self.count += 1;
}
}
#[derive(Debug)]
pub struct TimeHistogram {
vec: Vec<TimeBucket>,
min: DateTime<FixedOffset>,
max: DateTime<FixedOffset>,
step: Duration,
top: usize,
last: usize,
nanos: u64,
}
// TODO: use trait for Histogram and TimeHistogram
impl TimeHistogram {
pub fn new(size: usize, ts: &[DateTime<FixedOffset>]) -> TimeHistogram {
let mut vec = Vec::<TimeBucket>::with_capacity(size);
let min = ts.iter().min().unwrap().clone();
let max = ts.iter().max().unwrap().clone();
let step = max - min;
let inc = step / size as i32;
for i in 0..size {
vec.push(TimeBucket::new(min + (inc * i as i32)));
}
TimeHistogram {
vec,
min,
max,
step,
top: 0,
last: size - 1,
nanos: (max - min).num_microseconds().unwrap() as u64,
}
}
pub fn load(&mut self, vec: &[DateTime<FixedOffset>]) {
for x in vec {
self.add(*x);
}
}
pub fn add(&mut self, ts: DateTime<FixedOffset>) {
if let Some(slot) = self.find_slot(ts) {
self.vec[slot].inc();
self.top = self.top.max(self.vec[slot].count);
}
}
fn find_slot(&self, ts: DateTime<FixedOffset>) -> Option<usize> {
if ts < self.min || ts > self.max {
None
} else {
let x = (ts - self.min).num_microseconds().unwrap() as u64;
Some(((x * self.vec.len() as u64 / self.nanos) as usize).min(self.last))
}
}
fn date_fmt_string(&self) -> &str {
match self.step.num_seconds() {
x if x > 86400 => "%Y-%m-%d %H:%M:%S",
x if x > 300 => "%H:%M:%S",
x if x > 1 => "%H:%M:%S%.3f",
_ => "%H:%M:%S%.6f",
}
}
}
impl fmt::Display for TimeHistogram {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let width = f.width().unwrap_or(100);
let divisor = 1.max(self.top / width);
let width_count = format!("{}", self.top).len();
writeln!(
f,
"Matches: {}.",
Blue.paint(format!(
"{}",
self.vec.iter().map(|r| r.count).sum::<usize>()
)),
)?;
writeln!(
f,
"Each {} represents a count of {}",
Red.paint("∎"),
Blue.paint(divisor.to_string()),
)?;
let fmt = self.date_fmt_string();
for row in self.vec.iter() {
// println!("ROW");
// println!("COUNT {}", row.count);
// println!("WIDTH {}", row.count / divisor);
// println!("WIDTH2 {:A<width$}", "", width = row.count / divisor);
// println!("LABEL1 {}", row.start);
// println!("LABEFMT {}", self.date_fmt_string());
// println!("LABEL2 {}", row.start.format(self.date_fmt_string()));
writeln!(
f,
"[{label}] [{count}] {bar}",
label = Blue.paint(format!("{}", row.start.format(fmt))),
count = Green.paint(format!("{:width$}", row.count, width = width_count)),
bar = Red.paint(format!("{:∎<width$}", "", width = row.count / divisor))
)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use yansi::Paint;
#[test]
fn test_big_time_interval() {
Paint::disable();
let mut vec = Vec::<DateTime<FixedOffset>>::new();
vec.push(DateTime::parse_from_rfc3339("2021-04-15T04:25:00+00:00").unwrap());
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap());
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap());
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap());
vec.push(DateTime::parse_from_rfc3339("2023-04-15T04:25:00+00:00").unwrap());
let mut th = TimeHistogram::new(3, &vec);
th.load(&vec);
println!("{}", th);
let display = format!("{}", th);
assert!(display.contains("Matches: 5"));
assert!(display.contains("represents a count of 1"));
assert!(display.contains("[2021-04-15 04:25:00] [1] ∎\n"));
assert!(display.contains("[2021-12-14 12:25:00] [3] ∎∎∎\n"));
assert!(display.contains("[2022-08-14 20:25:00] [1] ∎\n"));
}
#[test]
fn test_small_time_interval() {
Paint::disable();
let mut vec = Vec::<DateTime<FixedOffset>>::new();
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.001+00:00").unwrap());
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.002+00:00").unwrap());
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.006+00:00").unwrap());
let mut th = TimeHistogram::new(4, &vec);
th.load(&vec);
println!("{}", th);
println!("{:#?}", th);
let display = format!("{}", th);
assert!(display.contains("Matches: 3"));
assert!(display.contains("represents a count of 1"));
assert!(display.contains("[04:25:00.001000] [2] ∎∎\n"));
assert!(display.contains("[04:25:00.002250] [0] \n"));
assert!(display.contains("[04:25:00.003500] [0] \n"));
assert!(display.contains("[04:25:00.004750] [1] ∎\n"));
}
}
Loading…
Cancel
Save