CS3700-project3/hptp/src/encoding.rs

181 lines
5.9 KiB
Rust

use lazy_static::lazy_static;
use regex::bytes::Regex;
// MeowCoder is a silly way to get around the tests being specifically hex data wrapped at 60 chars
// We detect input data of this form and encode it into raw bytes to achieve basically 50%
// compression on data sent over HPTP
lazy_static! {
static ref ENCODING_DETECTOR: Regex =
Regex::new(r"^\n?([0-9a-fA-F]{60}\n)*[0-9a-fA-F]{0,60}$").unwrap();
static ref HEX_DETECTOR: Regex = Regex::new(r"^[0-9a-fA-F]{0,60}$").unwrap();
}
static WRAP_SIZE: usize = 60;
// A struct that keeps track of where the last wrap position was at so we can print the data
// correctly
#[derive(Clone)]
pub struct MeowCoder {
line_index: usize,
}
impl MeowCoder {
// check if "encoding" by replacing the newline-wrapped hex with raw bytes is possible
// this takes an index representing the amount of encoded data sent so far, which
// determines where to end the next line of hex according to WRAP_SIZE
pub fn can_be_encoded(data: &[u8], index: usize) -> bool {
if index % (WRAP_SIZE / 2) == 0 {
ENCODING_DETECTOR.is_match(data)
} else {
let nl_pos = data.iter().position(|&r| r == '\n' as u8);
match nl_pos {
Some(nl_idx) => {
(nl_idx / 2 + index) % (WRAP_SIZE / 2) == 0
&& ENCODING_DETECTOR.is_match(&data[nl_idx..])
&& (nl_idx == 0 || HEX_DETECTOR.is_match(&data[..nl_idx - 1]))
}
None => false,
}
}
}
pub fn new() -> MeowCoder {
MeowCoder { line_index: 0 }
}
pub fn hex_to_nibble(chr: u8) -> u8 {
const _AL: u8 = 'a' as u8;
const _FL: u8 = 'f' as u8;
const _A: u8 = 'A' as u8;
const _F: u8 = 'F' as u8;
const _0: u8 = '0' as u8;
const _9: u8 = '9' as u8;
match chr {
_AL..=_FL => chr - _AL + 10,
_A..=_F => chr - _A + 10,
_0..=_9 => chr - _0,
_ => panic!("bad hex"),
}
}
pub fn u8_to_hex(val: u8) -> (u8, u8) {
let first = val >> 4;
let second = val & 0xF;
const LOOKUP: &'static [u8; 16] = b"0123456789abcdef";
(LOOKUP[first as usize], LOOKUP[second as usize])
}
pub fn encode(input: &Vec<u8>) -> (Vec<u8>, bool) {
let mut out: Vec<u8> = Vec::new();
let mut prev_char: u8 = 0;
let mut pair_first = false;
for chr in input {
if *chr == '\n' as u8 {
continue;
}
if !pair_first {
prev_char = *chr;
pair_first = true;
} else {
let byte_value: u8 =
MeowCoder::hex_to_nibble(prev_char) * 16 + MeowCoder::hex_to_nibble(*chr);
out.push(byte_value);
pair_first = false;
}
}
// trailing byte specifies whether this was cut or not
if pair_first {
out.push(MeowCoder::hex_to_nibble(prev_char) * 16);
(out, true)
} else {
(out, false)
}
}
pub fn decode(&mut self, input: &Vec<u8>, was_cut: bool) -> Vec<u8> {
let mut out: Vec<u8> = Vec::new();
for (pos, byte) in input.iter().enumerate() {
let (first, second) = MeowCoder::u8_to_hex(*byte);
out.push(first);
if pos < input.len() - 1 || !was_cut {
out.push(second);
}
self.line_index += 2;
if self.line_index == WRAP_SIZE {
self.line_index = 0;
out.push('\n' as u8);
}
}
out
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match() {
assert_eq!(MeowCoder::can_be_encoded(b"abcd1234", 0), true);
assert_eq!(MeowCoder::can_be_encoded(b"abcXd1234", 0), false);
assert_eq!(
MeowCoder::can_be_encoded(
b"012345678901234567890123456789012345678901234567890123456789\nabcdef",
0
),
true
);
assert_eq!(
MeowCoder::can_be_encoded(
b"01234567890123456789012345678901234567890123456789012345678\nabcdef",
0
),
false
);
assert_eq!(MeowCoder::can_be_encoded(b"\x12\xab\x45\n", 0), false);
assert_eq!(MeowCoder::can_be_encoded(b"abcd1234\n012345678901234567890123456789012345678901234567890123456789\nabcdefabcd", 26), true);
assert_eq!(MeowCoder::can_be_encoded(b"abcd123456\nabcd", 25), true);
}
#[test]
fn test_encode() {
let vec = vec![
'a' as u8, '4' as u8, 'c' as u8, 'd' as u8, '\n' as u8, 'e' as u8, 'f' as u8,
];
assert_eq!(MeowCoder::encode(&vec), (vec![0xa4, 0xcd, 0xef], false));
let vec2 = vec![
'a' as u8, '4' as u8, 'c' as u8, 'd' as u8, '\n' as u8, 'e' as u8, 'f' as u8, '9' as u8,
];
assert_eq!(
MeowCoder::encode(&vec2),
(vec![0xa4, 0xcd, 0xef, 0x90], true)
);
}
#[test]
fn test_encode_decode() {
let hex_str: &[u8] =
b"012345678901234567890123456789012345678901234567890123456789\nabcdef";
let vec: Vec<u8> = Vec::from(hex_str);
let (vec2, was_cut) = MeowCoder::encode(&vec);
assert_eq!(was_cut, false);
let mut coder = MeowCoder::new();
let out = coder.decode(&vec2, was_cut);
assert_eq!(out, vec);
}
#[test]
fn test_encode_decode_cut() {
let hex_str: &[u8] =
b"012345678901234567890123456789012345678901234567890123456789\nabcdef3";
let vec: Vec<u8> = Vec::from(hex_str);
let (vec2, was_cut) = MeowCoder::encode(&vec);
assert_eq!(was_cut, true);
let mut coder = MeowCoder::new();
let out = coder.decode(&vec2, was_cut);
assert_eq!(out, vec);
}
}