You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

227 lines
6.9 KiB

use std::fmt;
use chrono::{DateTime, Duration, FixedOffset};
use yansi::Color::{Blue, Cyan, Green, Magenta, Red};
use crate::plot::date_fmt_string;
const COLORS: &[yansi::Color] = &[Red, Blue, Magenta, Green, Cyan];
#[derive(Debug)]
struct TimeBucket {
start: DateTime<FixedOffset>,
count: Vec<usize>,
}
impl TimeBucket {
fn new(start: DateTime<FixedOffset>, counts: usize) -> TimeBucket {
TimeBucket {
start,
count: vec![0; counts],
}
}
fn inc(&mut self, index: usize) {
self.count[index] += 1;
}
fn total(&self) -> usize {
self.count.iter().sum::<usize>()
}
}
#[derive(Debug)]
/// A struct holding data to plot a split time histogram, where the display
/// shows the frequency of selected terms over time.
pub struct SplitTimeHistogram {
vec: Vec<TimeBucket>,
strings: Vec<String>,
min: DateTime<FixedOffset>,
max: DateTime<FixedOffset>,
step: Duration,
last: usize,
nanos: u64,
}
impl SplitTimeHistogram {
/// Creates a SplitTimeHistogram from a vector of `strings` (the terms whose
/// frequency we want to display) and a vector of timestamps where the terms
/// appear.
///
/// `size` is the number of time slots in the histogram. Parameter 'ts' is
/// a slice of tuples of DateTime (the timestamp of a term occurrence) and
/// the index of the term in the `strings` parameter.
pub fn new(
size: usize,
strings: Vec<String>,
ts: &[(DateTime<FixedOffset>, usize)],
) -> SplitTimeHistogram {
let mut vec = Vec::<TimeBucket>::with_capacity(size);
let min = ts.iter().min().unwrap().0;
let max = ts.iter().max().unwrap().0;
let step = max - min;
let inc = step / size as i32;
for i in 0..size {
vec.push(TimeBucket::new(min + (inc * i as i32), strings.len()));
}
let mut sth = SplitTimeHistogram {
vec,
strings,
min,
max,
step,
last: size - 1,
nanos: (max - min).num_microseconds().unwrap() as u64,
};
sth.load(ts);
sth
}
/// Add to the `SplitTimeHistogram` data the values of a slice of tuples of
/// DateTime (the timestamp of a term occurrence) and the index of the term
/// in the in the list of common terms.
pub fn load(&mut self, vec: &[(DateTime<FixedOffset>, usize)]) {
for x in vec {
self.add(x.0, x.1);
}
}
/// Add to the `SplitTimeHistogram` data another data point (a timestamp and
/// index of the term in the list of common terms).
pub fn add(&mut self, ts: DateTime<FixedOffset>, index: usize) {
if let Some(slot) = self.find_slot(ts) {
self.vec[slot].inc(index);
}
}
fn find_slot(&self, ts: DateTime<FixedOffset>) -> Option<usize> {
if ts < self.min || ts > self.max {
None
} else {
let x = (ts - self.min).num_microseconds().unwrap() as u64;
Some(((x * self.vec.len() as u64 / self.nanos) as usize).min(self.last))
}
}
// Clippy gets badly confused because self.strings and COLORS may have
// different lengths
#[allow(clippy::needless_range_loop)]
fn fmt_row(
&self,
f: &mut fmt::Formatter,
row: &TimeBucket,
divisor: usize,
widths: &[usize],
ts_fmt: &str,
) -> fmt::Result {
write!(
f,
"[{}] [",
Blue.paint(format!("{}", row.start.format(ts_fmt)))
)?;
for i in 0..self.strings.len() {
write!(
f,
"{}",
COLORS[i].paint(format!("{:width$}", row.count[i], width = widths[i]))
)?;
if i < self.strings.len() - 1 {
write!(f, "/")?;
}
}
write!(f, "] ")?;
for i in 0..self.strings.len() {
write!(
f,
"{}",
COLORS[i].paint("∎".repeat(row.count[i] / divisor).to_string())
)?;
}
writeln!(f)
}
}
impl fmt::Display for SplitTimeHistogram {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let width = f.width().unwrap_or(100);
let total = self.vec.iter().map(|r| r.total()).sum::<usize>();
let top = self.vec.iter().map(|r| r.total()).max().unwrap_or(1);
let divisor = 1.max(top / width);
// These are the widths of every count column
let widths: Vec<usize> = (0..self.strings.len())
.map(|i| {
self.vec
.iter()
.map(|r| r.count[i].to_string().len())
.max()
.unwrap()
})
.collect();
writeln!(f, "Matches: {}.", total)?;
for (i, s) in self.strings.iter().enumerate() {
let total = self.vec.iter().map(|r| r.count[i]).sum::<usize>();
writeln!(f, "{}: {}.", COLORS[i].paint(s), total)?;
}
writeln!(
f,
"Each {} represents a count of {}",
Red.paint("∎"),
divisor
)?;
let ts_fmt = date_fmt_string(self.step.num_seconds());
for row in self.vec.iter() {
self.fmt_row(f, row, divisor, &widths, ts_fmt)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use yansi::Paint;
#[test]
fn test_big_time_interval() {
Paint::disable();
let mut vec = Vec::<(DateTime<FixedOffset>, usize)>::new();
vec.push((
DateTime::parse_from_rfc3339("2021-04-15T04:25:00+00:00").unwrap(),
1,
));
vec.push((
DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap(),
1,
));
vec.push((
DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap(),
0,
));
vec.push((
DateTime::parse_from_rfc3339("2022-04-15T04:25:00+00:00").unwrap(),
2,
));
for _ in 0..11 {
vec.push((
DateTime::parse_from_rfc3339("2023-04-15T04:25:00+00:00").unwrap(),
2,
));
}
let th = SplitTimeHistogram::new(
3,
vec!["one".to_string(), "two".to_string(), "three".to_string()],
&vec,
);
println!("{}", th);
let display = format!("{}", th);
assert!(display.contains("Matches: 15"));
assert!(display.contains("one: 1."));
assert!(display.contains("two: 2."));
assert!(display.contains("three: 12."));
assert!(display.contains("represents a count of 1"));
assert!(display.contains("[2021-04-15 04:25:00] [0/1/ 0] ∎\n"));
assert!(display.contains("[2021-12-14 12:25:00] [1/1/ 1] ∎∎∎\n"));
assert!(display.contains("[2022-08-14 20:25:00] [0/0/11] ∎∎∎∎∎∎∎∎∎∎∎\n"));
}
}