From a9732fcebdc9a24f76630ee304576a6d0c401e82 Mon Sep 17 00:00:00 2001 From: haskal Date: Fri, 13 Mar 2020 17:26:03 -0400 Subject: [PATCH] Add silly hex encoder --- Cargo.lock | 38 +++++++++++++++ hptp/Cargo.toml | 4 +- hptp/src/encoding.rs | 112 +++++++++++++++++++++++++++++++++++++++++++ hptp/src/lib.rs | 1 + hptp/src/msg.rs | 2 + hptp/src/seg.rs | 4 +- 6 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 hptp/src/encoding.rs diff --git a/Cargo.lock b/Cargo.lock index cfc767e..86a811d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" +dependencies = [ + "memchr", +] + [[package]] name = "autocfg" version = "1.0.0" @@ -63,6 +72,8 @@ version = "0.1.0" dependencies = [ "byteorder", "chrono", + "lazy_static", + "regex", "thiserror", "tokio", ] @@ -222,6 +233,24 @@ version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +[[package]] +name = "regex" +version = "1.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8900ebc1363efa7ea1c399ccc32daed870b4002651e0bed86e72d501ebbe0048" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" + [[package]] name = "slab" version = "0.4.2" @@ -259,6 +288,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + [[package]] name = "time" version = "0.1.42" diff --git a/hptp/Cargo.toml b/hptp/Cargo.toml index b34975f..e449422 100644 --- a/hptp/Cargo.toml +++ b/hptp/Cargo.toml @@ -11,4 +11,6 @@ edition = "2018" tokio = {version = "0.2.*", features = ["io-std", "io-util", "udp"]} thiserror = "*" chrono = "0.4.*" -byteorder = "1.3.*" \ No newline at end of file +byteorder = "1.3.*" +regex = "1.3.*" +lazy_static = "1.4.*" diff --git a/hptp/src/encoding.rs b/hptp/src/encoding.rs new file mode 100644 index 0000000..80b1b62 --- /dev/null +++ b/hptp/src/encoding.rs @@ -0,0 +1,112 @@ +use lazy_static::lazy_static; +use regex::bytes::Regex; + +lazy_static! { + static ref ENCODING_DETECTOR: Regex = Regex::new(r"^([0-9a-fA-F]{60}\n)*[0-9a-fA-F]{0,60}$").unwrap(); +} + +static WRAP_SIZE: usize = 60; + +pub fn can_be_encoded(data: &[u8]) -> bool { + ENCODING_DETECTOR.is_match(data) +} + +#[derive(Clone)] +pub struct MeowCoder { + line_index: usize, +} + +impl MeowCoder { + pub fn new() -> MeowCoder { + MeowCoder{line_index: 0} + } + + fn hex_to_nibble(chr: u8) -> u8 { + const _AL: u8 = 'a' as u8; + const _FL: u8 = 'f' as u8; + const _A: u8 = 'A' as u8; + const _F: u8 = 'F' as u8; + const _0: u8 = '0' as u8; + const _9: u8 = '9' as u8; + match chr { + _AL..=_FL => chr - _AL + 10, + _A..=_F => chr - _A + 10, + _0..=_9 => chr - _0, + _ => panic!("bad hex"), + } + } + + fn u8_to_hex(val: u8) -> (u8, u8) { + let first = val >> 4; + let second = val & 0xF; + const LOOKUP: &'static [u8; 16] = b"0123456789abcdef"; + (LOOKUP[first as usize], LOOKUP[second as usize]) + } + + pub fn encode(input: &Vec) -> Vec { + let mut out: Vec = Vec::new(); + let mut prev_char: u8 = 0; + let mut pair_first = false; + for chr in input { + if *chr == '\n' as u8 { + continue + } + if !pair_first { + prev_char = *chr; + pair_first = true; + } else { + let byte_value: u8 = MeowCoder::hex_to_nibble(prev_char) * 16 + + MeowCoder::hex_to_nibble(*chr); + out.push(byte_value); + pair_first = false; + } + } + out + } + + pub fn decode(&mut self, input: &Vec) -> Vec { + let mut out: Vec = Vec::new(); + for byte in input { + let (first, second) = MeowCoder::u8_to_hex(*byte); + out.push(first); + out.push(second); + self.line_index += 2; + if self.line_index == WRAP_SIZE { + self.line_index = 0; + out.push('\n' as u8); + } + } + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_match() { + assert_eq!(can_be_encoded(b"abcd1234"), true); + assert_eq!(can_be_encoded(b"abcXd1234"), false); + assert_eq!(can_be_encoded(b"012345678901234567890123456789012345678901234567890123456789\nabcdef"), true); + assert_eq!(can_be_encoded(b"01234567890123456789012345678901234567890123456789012345678\nabcdef"), false); + assert_eq!(can_be_encoded(b"\x12\xab\x45\n"), false); + } + + #[test] + fn test_encode() { + let vec = vec!['a' as u8, 'b' as u8, 'c' as u8, 'd' as u8, + '\n' as u8, 'e' as u8, 'f' as u8]; + assert_eq!(MeowCoder::encode(&vec), vec![0xab, 0xcd, 0xef]); + } + + #[test] + fn test_encode_decode() { + let hex_str: &[u8] = b"012345678901234567890123456789012345678901234567890123456789\nabcdef"; + let vec: Vec = Vec::from(hex_str); + let vec2 = MeowCoder::encode(&vec); + let mut coder = MeowCoder::new(); + let out = coder.decode(&vec2); + assert_eq!(out, vec); + } +} diff --git a/hptp/src/lib.rs b/hptp/src/lib.rs index eec3778..8debc76 100644 --- a/hptp/src/lib.rs +++ b/hptp/src/lib.rs @@ -7,3 +7,4 @@ pub mod logger; pub mod msg; pub mod peer; pub mod seg; +pub mod encoding; diff --git a/hptp/src/msg.rs b/hptp/src/msg.rs index cf33e3b..da14c5d 100644 --- a/hptp/src/msg.rs +++ b/hptp/src/msg.rs @@ -43,6 +43,7 @@ impl SerDes for UpMsg { payload: SegData { bytes: buf[4..].into(), is_last_segment: (hdr & LAST_SEG_MASK) != 0, + is_meow_encoded: false, }, }) } @@ -55,6 +56,7 @@ impl SerDes for UpMsg { SegData { bytes, is_last_segment, + is_meow_encoded, }, seg_idx, } => { diff --git a/hptp/src/seg.rs b/hptp/src/seg.rs index b77e4f3..3319788 100644 --- a/hptp/src/seg.rs +++ b/hptp/src/seg.rs @@ -50,9 +50,9 @@ pub struct SegmentSet { #[derive(Clone)] pub struct SegData { - // TODO: encoding pub(crate) bytes: Vec, pub is_last_segment: bool, + pub is_meow_encoded: bool, } impl SegData { @@ -71,11 +71,13 @@ impl SegData { Ok(SegData { bytes: Vec::from(&buf[..len]), is_last_segment: false, + is_meow_encoded: false, }) } else { Ok(SegData { bytes: vec![], is_last_segment: true, + is_meow_encoded: false, }) } }