9 changed files with 866 additions and 59 deletions
|
After Width: | Height: | Size: 27 KiB |
@ -0,0 +1,367 @@
|
||||
use std::ops::Range; |
||||
|
||||
use chrono::{DateTime, FixedOffset, Local, NaiveDateTime, NaiveTime, ParseError, TimeZone, Utc}; |
||||
use regex::Regex; |
||||
|
||||
type DateParsingFun = fn(s: &str) -> Result<DateTime<FixedOffset>, ParseError>; |
||||
|
||||
// Those are some date formats that are common for my personal (and biased)
|
||||
// experience. So, there is logic to detect and parse them.
|
||||
const PARSE_SPECIFIERS: &[&str] = &[ |
||||
"%Y-%m-%d %H:%M:%S,%3f", // python %(asctime)s
|
||||
"%Y-%m-%d %H:%M:%S", |
||||
"%Y/%m/%d %H:%M:%S", // Seen in some nginx logs
|
||||
"%d-%b-%Y::%H:%M:%S", // Seen in rabbitmq logs
|
||||
"%H:%M:%S", // strace -t
|
||||
"%H:%M:%S.%6f", // strace -tt (-ttt generates timestamps)
|
||||
]; |
||||
|
||||
// Max length that a timestamp can have
|
||||
const MAX_LEN: usize = 28; |
||||
|
||||
pub struct LogDateParser<'a> { |
||||
range: Range<usize>, |
||||
parser: Option<DateParsingFun>, |
||||
ts_format: Option<&'a str>, |
||||
} |
||||
|
||||
impl<'a> LogDateParser<'a> { |
||||
pub fn new_with_guess(log_line: &str) -> Result<LogDateParser<'_>, String> { |
||||
if let Some(x) = Self::from_brackets(log_line) { |
||||
Ok(x) |
||||
} else if let Some(x) = Self::from_heuristic(log_line) { |
||||
Ok(x) |
||||
} else { |
||||
Err(format!("Could not parse a timestamp in {}", log_line)) |
||||
} |
||||
} |
||||
|
||||
pub fn new_with_format( |
||||
log_line: &str, |
||||
format_string: &'a str, |
||||
) -> Result<LogDateParser<'a>, String> { |
||||
// We look for where the timestamp is in logs using a brute force
|
||||
// approach with 1st log line, but capping the max length we scan for
|
||||
for i in 0..log_line.len() { |
||||
for j in (i..(i + (MAX_LEN * 2)).min(log_line.len() + 1)).rev() { |
||||
if NaiveDateTime::parse_from_str(&log_line[i..j], format_string).is_ok() { |
||||
// I would like to capture ts_format in a closure and assign
|
||||
// it to parser, but I cannot coerce a capturing closure to
|
||||
// a typed fn. I still need to learn the idiomatic way of
|
||||
// dealing with this.
|
||||
return Ok(LogDateParser { |
||||
range: i..j, |
||||
parser: None, |
||||
ts_format: Some(format_string), |
||||
}); |
||||
} |
||||
} |
||||
} |
||||
Err(format!( |
||||
"Could locate a '{}' timestamp in '{}'", |
||||
format_string, log_line |
||||
)) |
||||
} |
||||
|
||||
pub fn parse(&self, s: &str) -> Result<DateTime<FixedOffset>, ParseError> { |
||||
let range = self.range.start.min(s.len())..self.range.end.min(s.len()); |
||||
match self.parser { |
||||
Some(p) => p(&s[range]), |
||||
None => match NaiveDateTime::parse_from_str(&s[range], self.ts_format.unwrap()) { |
||||
Ok(naive) => { |
||||
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap(); |
||||
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) |
||||
} |
||||
Err(err) => Err(err), |
||||
}, |
||||
} |
||||
} |
||||
|
||||
fn guess_parser(s: &str) -> Option<DateParsingFun> { |
||||
if DateTime::parse_from_rfc3339(s).is_ok() { |
||||
Some(DateTime::parse_from_rfc3339) |
||||
} else if DateTime::parse_from_rfc2822(s).is_ok() { |
||||
Some(DateTime::parse_from_rfc2822) |
||||
} else if Self::looks_like_timestamp(&s) { |
||||
Some(|string: &str| { |
||||
let dot = match string.find('.') { |
||||
Some(x) => x, |
||||
None => string.len(), |
||||
}; |
||||
let nanosecs = if dot < string.len() { |
||||
let missing_zeros = (10 + dot - string.len()) as u32; |
||||
match string[dot + 1..].parse::<u32>() { |
||||
Ok(x) => x * 10_u32.pow(missing_zeros), |
||||
_ => 0, |
||||
} |
||||
} else { |
||||
0 |
||||
}; |
||||
match string[..dot].parse::<i64>() { |
||||
Ok(secs) => { |
||||
let naive = NaiveDateTime::from_timestamp(secs, nanosecs); |
||||
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap(); |
||||
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) |
||||
} |
||||
Err(_) => DateTime::parse_from_rfc3339(""), |
||||
} |
||||
}) |
||||
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[0]).is_ok() { |
||||
// TODO: All of this stuff below should be rewritten using macros.
|
||||
// Reason for "repeating myself" is that I cannot coerce closures to
|
||||
// fn types if they capture variables (an index to PARSE_SPECIFIERS,
|
||||
// for instance).
|
||||
Some( |
||||
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[0]) { |
||||
Ok(naive) => { |
||||
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap(); |
||||
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) |
||||
} |
||||
Err(err) => Err(err), |
||||
}, |
||||
) |
||||
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[1]).is_ok() { |
||||
Some( |
||||
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[1]) { |
||||
Ok(naive) => { |
||||
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap(); |
||||
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) |
||||
} |
||||
Err(err) => Err(err), |
||||
}, |
||||
) |
||||
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[2]).is_ok() { |
||||
Some( |
||||
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[2]) { |
||||
Ok(naive) => { |
||||
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap(); |
||||
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) |
||||
} |
||||
Err(err) => Err(err), |
||||
}, |
||||
) |
||||
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[3]).is_ok() { |
||||
Some( |
||||
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[3]) { |
||||
Ok(naive) => { |
||||
let date_time: DateTime<Local> = Local.from_local_datetime(&naive).unwrap(); |
||||
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))) |
||||
} |
||||
Err(err) => Err(err), |
||||
}, |
||||
) |
||||
} else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[4]).is_ok() { |
||||
Some( |
||||
|string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[4]) { |
||||
Ok(naive_time) => Ok(Utc::today() |
||||
.and_time(naive_time) |
||||
.unwrap() |
||||
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))), |
||||
Err(err) => Err(err), |
||||
}, |
||||
) |
||||
} else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[5]).is_ok() { |
||||
Some( |
||||
|string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[5]) { |
||||
Ok(naive_time) => Ok(Utc::today() |
||||
.and_time(naive_time) |
||||
.unwrap() |
||||
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))), |
||||
Err(err) => Err(err), |
||||
}, |
||||
) |
||||
} else { |
||||
None |
||||
} |
||||
} |
||||
|
||||
fn from_brackets(s: &str) -> Option<LogDateParser> { |
||||
match s.chars().next() { |
||||
Some('[') => { |
||||
if let Some(x) = s.find(']') { |
||||
match Self::guess_parser(&s[1..x]) { |
||||
Some(parser) => Some(LogDateParser { |
||||
range: 1..x, |
||||
parser: Some(parser), |
||||
ts_format: None, |
||||
}), |
||||
_ => None, |
||||
} |
||||
} else { |
||||
None |
||||
} |
||||
} |
||||
_ => None, |
||||
} |
||||
} |
||||
|
||||
fn from_heuristic(s: &str) -> Option<LogDateParser> { |
||||
// First we locate the first digit
|
||||
for (i, c) in s.chars().enumerate() { |
||||
if c.is_digit(10) { |
||||
for j in (i..(i + MAX_LEN).min(s.len() + 1)).rev() { |
||||
if let Some(parser) = Self::guess_parser(&s[i..j]) { |
||||
return Some(LogDateParser { |
||||
range: i..j, |
||||
parser: Some(parser), |
||||
ts_format: None, |
||||
}); |
||||
} |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
// Returns true if string looks like a unix-like timestamp of arbitrary
|
||||
// precision
|
||||
fn looks_like_timestamp(s: &str) -> bool { |
||||
Regex::new(r"^[0-9]{10}(\.[0-9]{1,9})?$") |
||||
.unwrap() |
||||
.is_match(s) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
|
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn test_rfc3339_brackets() { |
||||
let r = LogDateParser::new_with_guess("[1996-12-19T16:39:57-08:00] foobar").unwrap(); |
||||
assert_eq!( |
||||
r.parse("[2096-11-19T16:39:57-08:00]"), |
||||
DateTime::parse_from_rfc3339("2096-11-19T16:39:57-08:00") |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_rfc3339_no_brackets() { |
||||
let r = LogDateParser::new_with_guess("2021-04-25T16:57:15.337Z foobar").unwrap(); |
||||
assert_eq!( |
||||
r.parse("2031-04-25T16:57:15.337Z"), |
||||
DateTime::parse_from_rfc3339("2031-04-25T16:57:15.337Z") |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_rfc2822() { |
||||
let r = LogDateParser::new_with_guess("12 Jul 2003 10:52:37 +0200 foobar").unwrap(); |
||||
assert_eq!( |
||||
r.parse("22 Jun 2003 10:52:37 +0500"), |
||||
DateTime::parse_from_rfc2822("22 Jun 2003 10:52:37 +0500") |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_bad_bracket() { |
||||
let r = LogDateParser::new_with_guess("[12 Jul 2003 10:52:37 +0200 foobar").unwrap(); |
||||
assert_eq!( |
||||
r.parse("[22 Jun 2003 10:52:37 +0500"), |
||||
DateTime::parse_from_rfc2822("22 Jun 2003 10:52:37 +0500") |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_prefix() { |
||||
let r = LogDateParser::new_with_guess("foobar 1996-12-19T16:39:57-08:00 foobar").unwrap(); |
||||
assert_eq!( |
||||
r.parse("foobar 2096-11-19T16:39:57-08:00"), |
||||
DateTime::parse_from_rfc3339("2096-11-19T16:39:57-08:00") |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_bad_format() { |
||||
assert!(LogDateParser::new_with_guess("996-12-19T16:39:57-08:00 foobar").is_err()); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_short_line() { |
||||
assert!(LogDateParser::new_with_guess("9").is_err()); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_empty_line() { |
||||
assert!(LogDateParser::new_with_guess("").is_err()); |
||||
} |
||||
|
||||
#[test] |
||||
#[ignore] // need to make code LocalTime agnostic
|
||||
fn test_timestamps() { |
||||
let r = LogDateParser::new_with_guess("ts 1619688527.018165").unwrap(); |
||||
assert_eq!( |
||||
r.parse("ts 1619655527.888165"), |
||||
DateTime::parse_from_rfc3339("2021-04-28T22:18:47.888165+00:00") |
||||
); |
||||
let r = LogDateParser::new_with_guess("1619688527.123").unwrap(); |
||||
assert_eq!( |
||||
r.parse("1619655527.123"), |
||||
DateTime::parse_from_rfc3339("2021-04-28T22:18:47.123+00:00") |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
#[ignore] // need to make code LocalTime agnostic
|
||||
fn test_known_formats() { |
||||
let r = LogDateParser::new_with_guess("2021-04-28 06:25:24,321").unwrap(); |
||||
assert_eq!( |
||||
r.parse("2021-04-28 06:25:24,321"), |
||||
DateTime::parse_from_rfc3339("2021-04-28T04:25:24.321+00:00") |
||||
); |
||||
let r = LogDateParser::new_with_guess("2021-04-28 06:25:24").unwrap(); |
||||
assert_eq!( |
||||
r.parse("2021-04-28 06:25:24"), |
||||
DateTime::parse_from_rfc3339("2021-04-28T04:25:24+00:00") |
||||
); |
||||
let r = LogDateParser::new_with_guess("28-Apr-2021::12:10:42").unwrap(); |
||||
assert_eq!( |
||||
r.parse("28-Apr-2021::12:10:42"), |
||||
DateTime::parse_from_rfc3339("2021-04-28T10:10:42+00:00") |
||||
); |
||||
let r = LogDateParser::new_with_guess("2019/12/19 05:01:02").unwrap(); |
||||
assert_eq!( |
||||
r.parse("2019/12/19 05:01:02"), |
||||
DateTime::parse_from_rfc3339("2019-12-19T04:01:02+00:00") |
||||
); |
||||
let r = LogDateParser::new_with_guess("11:29:13.120535").unwrap(); |
||||
let now_as_date = format!("{}", Utc::today()); |
||||
assert_eq!( |
||||
r.parse("11:29:13.120535"), |
||||
DateTime::parse_from_rfc3339(&format!( |
||||
"{}{}", |
||||
&now_as_date[..10], |
||||
"T11:29:13.120535+00:00" |
||||
)) |
||||
); |
||||
let r = LogDateParser::new_with_guess("11:29:13").unwrap(); |
||||
assert_eq!( |
||||
r.parse("11:29:13.120535"), |
||||
DateTime::parse_from_rfc3339(&format!("{}{}", &now_as_date[..10], "T11:29:13+00:00")) |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_tricky_line() { |
||||
let r = LogDateParser::new_with_guess("[1996-12-19T16:39:57-08:00] foobar").unwrap(); |
||||
assert!(r.parse("nothing").is_err()); |
||||
} |
||||
|
||||
#[test] |
||||
#[ignore] // need to make code LocalTime agnostic
|
||||
fn test_custom_format() { |
||||
assert!(LogDateParser::new_with_format( |
||||
"[1996-12-19T16:39:57-08:00] foobar", |
||||
"%Y-%m-%d %H:%M:%S" |
||||
) |
||||
.is_err()); |
||||
let r = LogDateParser::new_with_format("[1996-12-19 16-39-57] foobar", "%Y-%m-%d %H-%M-%S") |
||||
.unwrap(); |
||||
assert_eq!( |
||||
r.parse("[2096-11-19 04-25-24]"), |
||||
DateTime::parse_from_rfc3339("2096-11-19T03:25:24+00:00") |
||||
); |
||||
} |
||||
} |
||||
@ -0,0 +1,172 @@
|
||||
use std::fmt; |
||||
|
||||
use chrono::{DateTime, Duration, FixedOffset}; |
||||
use yansi::Color::{Blue, Green, Red}; |
||||
|
||||
#[derive(Debug)] |
||||
struct TimeBucket { |
||||
start: DateTime<FixedOffset>, |
||||
count: usize, |
||||
} |
||||
|
||||
// TODO: use trait for Bucket and TimeBucket
|
||||
impl TimeBucket { |
||||
fn new(start: DateTime<FixedOffset>) -> TimeBucket { |
||||
TimeBucket { start, count: 0 } |
||||
} |
||||
|
||||
fn inc(&mut self) { |
||||
self.count += 1; |
||||
} |
||||
} |
||||
|
||||
#[derive(Debug)] |
||||
pub struct TimeHistogram { |
||||
vec: Vec<TimeBucket>, |
||||
min: DateTime<FixedOffset>, |
||||
max: DateTime<FixedOffset>, |
||||
step: Duration, |
||||
top: usize, |
||||
last: usize, |
||||
nanos: u64, |
||||
} |
||||
|
||||
// TODO: use trait for Histogram and TimeHistogram
|
||||
impl TimeHistogram { |
||||
pub fn new(size: usize, ts: &[DateTime<FixedOffset>]) -> TimeHistogram { |
||||
let mut vec = Vec::<TimeBucket>::with_capacity(size); |
||||
let min = ts.iter().min().unwrap().clone(); |
||||
let max = ts.iter().max().unwrap().clone(); |
||||
let step = max - min; |
||||
let inc = step / size as i32; |
||||
for i in 0..size { |
||||
vec.push(TimeBucket::new(min + (inc * i as i32))); |
||||
} |
||||
TimeHistogram { |
||||
vec, |
||||
min, |
||||
max, |
||||
step, |
||||
top: 0, |
||||
last: size - 1, |
||||
nanos: (max - min).num_microseconds().unwrap() as u64, |
||||
} |
||||
} |
||||
|
||||
pub fn load(&mut self, vec: &[DateTime<FixedOffset>]) { |
||||
for x in vec { |
||||
self.add(*x); |
||||
} |
||||
} |
||||
|
||||
pub fn add(&mut self, ts: DateTime<FixedOffset>) { |
||||
if let Some(slot) = self.find_slot(ts) { |
||||
self.vec[slot].inc(); |
||||
self.top = self.top.max(self.vec[slot].count); |
||||
} |
||||
} |
||||
|
||||
fn find_slot(&self, ts: DateTime<FixedOffset>) -> Option<usize> { |
||||
if ts < self.min || ts > self.max { |
||||
None |
||||
} else { |
||||
let x = (ts - self.min).num_microseconds().unwrap() as u64; |
||||
Some(((x * self.vec.len() as u64 / self.nanos) as usize).min(self.last)) |
||||
} |
||||
} |
||||
|
||||
fn date_fmt_string(&self) -> &str { |
||||
match self.step.num_seconds() { |
||||
x if x > 86400 => "%Y-%m-%d %H:%M:%S", |
||||
x if x > 300 => "%H:%M:%S", |
||||
x if x > 1 => "%H:%M:%S%.3f", |
||||
_ => "%H:%M:%S%.6f", |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for TimeHistogram { |
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
||||
let width = f.width().unwrap_or(100); |
||||
let divisor = 1.max(self.top / width); |
||||
let width_count = format!("{}", self.top).len(); |
||||
writeln!( |
||||
f, |
||||
"Matches: {}.", |
||||
Blue.paint(format!( |
||||
"{}", |
||||
self.vec.iter().map(|r| r.count).sum::<usize>() |
||||
)), |
||||
)?; |
||||
writeln!( |
||||
f, |
||||
"Each {} represents a count of {}", |
||||
Red.paint("∎"), |
||||
Blue.paint(divisor.to_string()), |
||||
)?; |
||||
let fmt = self.date_fmt_string(); |
||||
for row in self.vec.iter() { |
||||
// println!("ROW");
|
||||
// println!("COUNT {}", row.count);
|
||||
// println!("WIDTH {}", row.count / divisor);
|
||||
// println!("WIDTH2 {:A<width$}", "", width = row.count / divisor);
|
||||
// println!("LABEL1 {}", row.start);
|
||||
// println!("LABEFMT {}", self.date_fmt_string());
|
||||
// println!("LABEL2 {}", row.start.format(self.date_fmt_string()));
|
||||
writeln!( |
||||
f, |
||||
"[{label}] [{count}] {bar}", |
||||
label = Blue.paint(format!("{}", row.start.format(fmt))), |
||||
count = Green.paint(format!("{:width$}", row.count, width = width_count)), |
||||
bar = Red.paint(format!("{:∎<width$}", "", width = row.count / divisor)) |
||||
)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
use super::*; |
||||
use yansi::Paint; |
||||
|
||||
#[test] |
||||
fn test_big_time_interval() { |
||||
Paint::disable(); |
||||
let mut vec = Vec::<DateTime<FixedOffset>>::new(); |
||||
vec.push(DateTime::parse_from_rfc3339("2021-04-15T04:25:00+00:00").unwrap()); |
||||
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap()); |
||||
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap()); |
||||
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap()); |
||||
vec.push(DateTime::parse_from_rfc3339("2023-04-15T04:25:00+00:00").unwrap()); |
||||
let mut th = TimeHistogram::new(3, &vec); |
||||
th.load(&vec); |
||||
println!("{}", th); |
||||
let display = format!("{}", th); |
||||
assert!(display.contains("Matches: 5")); |
||||
assert!(display.contains("represents a count of 1")); |
||||
assert!(display.contains("[2021-04-15 04:25:00] [1] ∎\n")); |
||||
assert!(display.contains("[2021-12-14 12:25:00] [3] ∎∎∎\n")); |
||||
assert!(display.contains("[2022-08-14 20:25:00] [1] ∎\n")); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_small_time_interval() { |
||||
Paint::disable(); |
||||
let mut vec = Vec::<DateTime<FixedOffset>>::new(); |
||||
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.001+00:00").unwrap()); |
||||
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.002+00:00").unwrap()); |
||||
vec.push(DateTime::parse_from_rfc3339("2022-04-15T04:25:00.006+00:00").unwrap()); |
||||
let mut th = TimeHistogram::new(4, &vec); |
||||
th.load(&vec); |
||||
println!("{}", th); |
||||
println!("{:#?}", th); |
||||
let display = format!("{}", th); |
||||
assert!(display.contains("Matches: 3")); |
||||
assert!(display.contains("represents a count of 1")); |
||||
assert!(display.contains("[04:25:00.001000] [2] ∎∎\n")); |
||||
assert!(display.contains("[04:25:00.002250] [0] \n")); |
||||
assert!(display.contains("[04:25:00.003500] [0] \n")); |
||||
assert!(display.contains("[04:25:00.004750] [1] ∎\n")); |
||||
} |
||||
} |
||||
Loading…
Reference in new issue