Browse Source

refactor: Use DRY principle when iterating potential date/time formats

pull/2/head
JuanLeon Lahoz 5 years ago
parent
commit
fb3d527996
  1. 165
      src/dateparser.rs

165
src/dateparser.rs

@ -3,30 +3,32 @@ use std::ops::Range;
use chrono::{DateTime, FixedOffset, NaiveDateTime, NaiveTime, ParseError, TimeZone, Utc};
use regex::Regex;
type DateParsingFun = fn(s: &str) -> Result<DateTime<FixedOffset>, ParseError>;
type DateParsingFun = dyn Fn(&str) -> Result<DateTime<FixedOffset>, ParseError>;
// Those are some date formats that are common for my personal (and biased)
// experience. So, there is logic to detect and parse them.
const PARSE_SPECIFIERS: &[&str] = &[
const DATE_FORMATS: &[&str] = &[
"%Y-%m-%d %H:%M:%S,%3f", // python %(asctime)s
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S", // Seen in some nginx logs
"%d-%b-%Y::%H:%M:%S", // Seen in rabbitmq logs
"%H:%M:%S", // strace -t
"%H:%M:%S.%6f", // strace -tt (-ttt generates timestamps)
];
const TIME_FORMATS: &[&str] = &[
"%H:%M:%S", // strace -t
"%H:%M:%S.%6f", // strace -tt (-ttt generates timestamps)
];
// Max length that a timestamp can have
const MAX_LEN: usize = 28;
pub struct LogDateParser<'a> {
pub struct LogDateParser {
range: Range<usize>,
parser: Option<DateParsingFun>,
ts_format: Option<&'a str>,
parser: Box<DateParsingFun>,
}
impl<'a> LogDateParser<'a> {
pub fn new_with_guess(log_line: &str) -> Result<LogDateParser<'_>, String> {
impl LogDateParser {
pub fn new_with_guess(log_line: &str) -> Result<LogDateParser, String> {
if let Some(x) = Self::from_brackets(log_line) {
Ok(x)
} else if let Some(x) = Self::from_heuristic(log_line) {
@ -36,23 +38,27 @@ impl<'a> LogDateParser<'a> {
}
}
pub fn new_with_format(
log_line: &str,
format_string: &'a str,
) -> Result<LogDateParser<'a>, String> {
pub fn new_with_format(log_line: &str, format_string: &str) -> Result<LogDateParser, String> {
// We look for where the timestamp is in logs using a brute force
// approach with 1st log line, but capping the max length we scan for
for i in 0..log_line.len() {
for j in (i..(i + (MAX_LEN * 2)).min(log_line.len() + 1)).rev() {
if NaiveDateTime::parse_from_str(&log_line[i..j], format_string).is_ok() {
// I would like to capture ts_format in a closure and assign
// it to parser, but I cannot coerce a capturing closure to
// a typed fn. I still need to learn the idiomatic way of
// dealing with this.
let fmt = Box::new(format_string.to_string());
return Ok(LogDateParser {
range: i..j,
parser: None,
ts_format: Some(format_string),
parser: Box::new(move |string: &str| {
match NaiveDateTime::parse_from_str(string, &*fmt) {
Ok(naive) => {
let date_time: DateTime<Utc> =
Utc.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(
&FixedOffset::west(0),
)))
}
Err(err) => Err(err),
}
}),
});
}
}
@ -65,25 +71,16 @@ impl<'a> LogDateParser<'a> {
pub fn parse(&self, s: &str) -> Result<DateTime<FixedOffset>, ParseError> {
let range = self.range.start.min(s.len())..self.range.end.min(s.len());
match self.parser {
Some(p) => p(&s[range]),
None => match NaiveDateTime::parse_from_str(&s[range], self.ts_format.unwrap()) {
Ok(naive) => {
let date_time: DateTime<Utc> = Utc.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
}
(self.parser)(&s[range])
}
fn guess_parser(s: &str) -> Option<DateParsingFun> {
fn guess_parser(s: &str) -> Option<Box<DateParsingFun>> {
if DateTime::parse_from_rfc3339(s).is_ok() {
Some(DateTime::parse_from_rfc3339)
return Some(Box::new(DateTime::parse_from_rfc3339));
} else if DateTime::parse_from_rfc2822(s).is_ok() {
Some(DateTime::parse_from_rfc2822)
return Some(Box::new(DateTime::parse_from_rfc2822));
} else if Self::looks_like_timestamp(&s) {
Some(|string: &str| {
return Some(Box::new(|string: &str| {
let dot = match string.find('.') {
Some(x) => x,
None => string.len(),
@ -105,74 +102,36 @@ impl<'a> LogDateParser<'a> {
}
Err(_) => DateTime::parse_from_rfc3339(""),
}
})
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[0]).is_ok() {
// TODO: All of this stuff below should be rewritten using macros.
// Reason for "repeating myself" is that I cannot coerce closures to
// fn types if they capture variables (an index to PARSE_SPECIFIERS,
// for instance).
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[0]) {
Ok(naive) => {
let date_time: DateTime<Utc> = Utc.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[1]).is_ok() {
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[1]) {
Ok(naive) => {
let date_time: DateTime<Utc> = Utc.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[2]).is_ok() {
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[2]) {
Ok(naive) => {
let date_time: DateTime<Utc> = Utc.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveDateTime::parse_from_str(s, PARSE_SPECIFIERS[3]).is_ok() {
Some(
|string: &str| match NaiveDateTime::parse_from_str(string, PARSE_SPECIFIERS[3]) {
Ok(naive) => {
let date_time: DateTime<Utc> = Utc.from_local_datetime(&naive).unwrap();
Ok(date_time.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
)
} else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[4]).is_ok() {
Some(
|string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[4]) {
Ok(naive_time) => Ok(Utc::today()
.and_time(naive_time)
.unwrap()
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))),
Err(err) => Err(err),
},
)
} else if NaiveTime::parse_from_str(s, PARSE_SPECIFIERS[5]).is_ok() {
Some(
|string: &str| match NaiveTime::parse_from_str(string, PARSE_SPECIFIERS[5]) {
Ok(naive_time) => Ok(Utc::today()
.and_time(naive_time)
.unwrap()
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))),
Err(err) => Err(err),
},
)
} else {
None
}));
}
for format in DATE_FORMATS.iter() {
if NaiveDateTime::parse_from_str(s, format).is_ok() {
return Some(Box::new(
move |string: &str| match NaiveDateTime::parse_from_str(string, format) {
Ok(naive) => {
let date_time: DateTime<Utc> = Utc.from_local_datetime(&naive).unwrap();
Ok(date_time
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0))))
}
Err(err) => Err(err),
},
));
}
}
for format in TIME_FORMATS.iter() {
if NaiveTime::parse_from_str(s, format).is_ok() {
return Some(Box::new(
move |string: &str| match NaiveTime::parse_from_str(string, format) {
Ok(naive_time) => Ok(Utc::today()
.and_time(naive_time)
.unwrap()
.with_timezone(&TimeZone::from_offset(&FixedOffset::west(0)))),
Err(err) => Err(err),
},
));
}
}
None
}
fn from_brackets(s: &str) -> Option<LogDateParser> {
@ -182,8 +141,7 @@ impl<'a> LogDateParser<'a> {
match Self::guess_parser(&s[1..x]) {
Some(parser) => Some(LogDateParser {
range: 1..x,
parser: Some(parser),
ts_format: None,
parser,
}),
_ => None,
}
@ -203,8 +161,7 @@ impl<'a> LogDateParser<'a> {
if let Some(parser) = Self::guess_parser(&s[i..j]) {
return Some(LogDateParser {
range: i..j,
parser: Some(parser),
ts_format: None,
parser,
});
}
}

Loading…
Cancel
Save