use std::char;
use std::cmp;
use std::error;
use std::fmt;
use std::u8;
use ast::Span;
use hir::interval::{Interval, IntervalSet, IntervalSetIter};
use unicode;
pub use hir::visitor::{Visitor, visit};
mod interval;
pub mod literal;
pub mod print;
pub mod translate;
mod visitor;
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Error {
kind: ErrorKind,
pattern: String,
span: Span,
}
impl Error {
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
pub fn pattern(&self) -> &str {
&self.pattern
}
pub fn span(&self) -> &Span {
&self.span
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
UnicodeNotAllowed,
InvalidUtf8,
UnicodePropertyNotFound,
UnicodePropertyValueNotFound,
EmptyClassNotAllowed,
#[doc(hidden)]
__Nonexhaustive,
}
impl ErrorKind {
fn description(&self) -> &str {
use self::ErrorKind::*;
match *self {
UnicodeNotAllowed => "Unicode not allowed here",
InvalidUtf8 => "pattern can match invalid UTF-8",
UnicodePropertyNotFound => "Unicode property not found",
UnicodePropertyValueNotFound => "Unicode property value not found",
EmptyClassNotAllowed => "empty character classes are not allowed",
_ => unreachable!(),
}
}
}
impl error::Error for Error {
fn description(&self) -> &str {
self.kind.description()
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
::error::Formatter::from(self).fmt(f)
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(self.description())
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Hir {
kind: HirKind,
info: HirInfo,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum HirKind {
Empty,
Literal(Literal),
Class(Class),
Anchor(Anchor),
WordBoundary(WordBoundary),
Repetition(Repetition),
Group(Group),
Concat(Vec<Hir>),
Alternation(Vec<Hir>),
}
impl Hir {
pub fn kind(&self) -> &HirKind {
&self.kind
}
pub fn into_kind(mut self) -> HirKind {
use std::mem;
mem::replace(&mut self.kind, HirKind::Empty)
}
pub fn empty() -> Hir {
let mut info = HirInfo::new();
info.set_always_utf8(true);
info.set_all_assertions(true);
info.set_anchored_start(false);
info.set_anchored_end(false);
info.set_line_anchored_start(false);
info.set_line_anchored_end(false);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(true);
info.set_literal(true);
info.set_alternation_literal(true);
Hir {
kind: HirKind::Empty,
info: info,
}
}
pub fn literal(lit: Literal) -> Hir {
if let Literal::Byte(b) = lit {
assert!(b > 0x7F);
}
let mut info = HirInfo::new();
info.set_always_utf8(lit.is_unicode());
info.set_all_assertions(false);
info.set_anchored_start(false);
info.set_anchored_end(false);
info.set_line_anchored_start(false);
info.set_line_anchored_end(false);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(false);
info.set_literal(true);
info.set_alternation_literal(true);
Hir {
kind: HirKind::Literal(lit),
info: info,
}
}
pub fn class(class: Class) -> Hir {
let mut info = HirInfo::new();
info.set_always_utf8(class.is_always_utf8());
info.set_all_assertions(false);
info.set_anchored_start(false);
info.set_anchored_end(false);
info.set_line_anchored_start(false);
info.set_line_anchored_end(false);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(false);
info.set_literal(false);
info.set_alternation_literal(false);
Hir {
kind: HirKind::Class(class),
info: info,
}
}
pub fn anchor(anchor: Anchor) -> Hir {
let mut info = HirInfo::new();
info.set_always_utf8(true);
info.set_all_assertions(true);
info.set_anchored_start(false);
info.set_anchored_end(false);
info.set_line_anchored_start(false);
info.set_line_anchored_end(false);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(true);
info.set_literal(false);
info.set_alternation_literal(false);
if let Anchor::StartText = anchor {
info.set_anchored_start(true);
info.set_line_anchored_start(true);
info.set_any_anchored_start(true);
}
if let Anchor::EndText = anchor {
info.set_anchored_end(true);
info.set_line_anchored_end(true);
info.set_any_anchored_end(true);
}
if let Anchor::StartLine = anchor {
info.set_line_anchored_start(true);
}
if let Anchor::EndLine = anchor {
info.set_line_anchored_end(true);
}
Hir {
kind: HirKind::Anchor(anchor),
info: info,
}
}
pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
let mut info = HirInfo::new();
info.set_always_utf8(true);
info.set_all_assertions(true);
info.set_anchored_start(false);
info.set_anchored_end(false);
info.set_line_anchored_start(false);
info.set_line_anchored_end(false);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_literal(false);
info.set_alternation_literal(false);
info.set_match_empty(word_boundary.is_negated());
if let WordBoundary::AsciiNegate = word_boundary {
info.set_always_utf8(false);
}
Hir {
kind: HirKind::WordBoundary(word_boundary),
info: info,
}
}
pub fn repetition(rep: Repetition) -> Hir {
let mut info = HirInfo::new();
info.set_always_utf8(rep.hir.is_always_utf8());
info.set_all_assertions(rep.hir.is_all_assertions());
info.set_anchored_start(
!rep.is_match_empty() && rep.hir.is_anchored_start()
);
info.set_anchored_end(
!rep.is_match_empty() && rep.hir.is_anchored_end()
);
info.set_line_anchored_start(
!rep.is_match_empty() && rep.hir.is_anchored_start()
);
info.set_line_anchored_end(
!rep.is_match_empty() && rep.hir.is_anchored_end()
);
info.set_any_anchored_start(rep.hir.is_any_anchored_start());
info.set_any_anchored_end(rep.hir.is_any_anchored_end());
info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
info.set_literal(false);
info.set_alternation_literal(false);
Hir {
kind: HirKind::Repetition(rep),
info: info,
}
}
pub fn group(group: Group) -> Hir {
let mut info = HirInfo::new();
info.set_always_utf8(group.hir.is_always_utf8());
info.set_all_assertions(group.hir.is_all_assertions());
info.set_anchored_start(group.hir.is_anchored_start());
info.set_anchored_end(group.hir.is_anchored_end());
info.set_line_anchored_start(group.hir.is_line_anchored_start());
info.set_line_anchored_end(group.hir.is_line_anchored_end());
info.set_any_anchored_start(group.hir.is_any_anchored_start());
info.set_any_anchored_end(group.hir.is_any_anchored_end());
info.set_match_empty(group.hir.is_match_empty());
info.set_literal(false);
info.set_alternation_literal(false);
Hir {
kind: HirKind::Group(group),
info: info,
}
}
pub fn concat(mut exprs: Vec<Hir>) -> Hir {
match exprs.len() {
0 => Hir::empty(),
1 => { exprs.pop().unwrap() }
_ => {
let mut info = HirInfo::new();
info.set_always_utf8(true);
info.set_all_assertions(true);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(true);
info.set_literal(true);
info.set_alternation_literal(true);
for e in &exprs {
let x = info.is_always_utf8() && e.is_always_utf8();
info.set_always_utf8(x);
let x = info.is_all_assertions() && e.is_all_assertions();
info.set_all_assertions(x);
let x =
info.is_any_anchored_start()
|| e.is_any_anchored_start();
info.set_any_anchored_start(x);
let x =
info.is_any_anchored_end()
|| e.is_any_anchored_end();
info.set_any_anchored_end(x);
let x = info.is_match_empty() && e.is_match_empty();
info.set_match_empty(x);
let x = info.is_literal() && e.is_literal();
info.set_literal(x);
let x =
info.is_alternation_literal()
&& e.is_alternation_literal();
info.set_alternation_literal(x);
}
info.set_anchored_start(
exprs.iter()
.take_while(|e| {
e.is_anchored_start() || e.is_all_assertions()
})
.any(|e| {
e.is_anchored_start()
}));
info.set_anchored_end(
exprs.iter()
.rev()
.take_while(|e| {
e.is_anchored_end() || e.is_all_assertions()
})
.any(|e| {
e.is_anchored_end()
}));
info.set_line_anchored_start(
exprs.iter()
.take_while(|e| {
e.is_line_anchored_start() || e.is_all_assertions()
})
.any(|e| {
e.is_line_anchored_start()
}));
info.set_line_anchored_end(
exprs.iter()
.rev()
.take_while(|e| {
e.is_line_anchored_end() || e.is_all_assertions()
})
.any(|e| {
e.is_line_anchored_end()
}));
Hir {
kind: HirKind::Concat(exprs),
info: info,
}
}
}
}
pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
match exprs.len() {
0 => Hir::empty(),
1 => exprs.pop().unwrap(),
_ => {
let mut info = HirInfo::new();
info.set_always_utf8(true);
info.set_all_assertions(true);
info.set_anchored_start(true);
info.set_anchored_end(true);
info.set_line_anchored_start(true);
info.set_line_anchored_end(true);
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(false);
info.set_literal(false);
info.set_alternation_literal(true);
for e in &exprs {
let x = info.is_always_utf8() && e.is_always_utf8();
info.set_always_utf8(x);
let x = info.is_all_assertions() && e.is_all_assertions();
info.set_all_assertions(x);
let x = info.is_anchored_start() && e.is_anchored_start();
info.set_anchored_start(x);
let x = info.is_anchored_end() && e.is_anchored_end();
info.set_anchored_end(x);
let x = info.is_line_anchored_start()
&& e.is_line_anchored_start();
info.set_line_anchored_start(x);
let x = info.is_line_anchored_end()
&& e.is_line_anchored_end();
info.set_line_anchored_end(x);
let x =
info.is_any_anchored_start()
|| e.is_any_anchored_start();
info.set_any_anchored_start(x);
let x =
info.is_any_anchored_end()
|| e.is_any_anchored_end();
info.set_any_anchored_end(x);
let x = info.is_match_empty() || e.is_match_empty();
info.set_match_empty(x);
let x =
info.is_alternation_literal()
&& e.is_literal();
info.set_alternation_literal(x);
}
Hir {
kind: HirKind::Alternation(exprs),
info: info,
}
}
}
}
pub fn dot(bytes: bool) -> Hir {
if bytes {
let mut cls = ClassBytes::empty();
cls.push(ClassBytesRange::new(b'\0', b'\x09'));
cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
Hir::class(Class::Bytes(cls))
} else {
let mut cls = ClassUnicode::empty();
cls.push(ClassUnicodeRange::new('\0', '\x09'));
cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
Hir::class(Class::Unicode(cls))
}
}
pub fn any(bytes: bool) -> Hir {
if bytes {
let mut cls = ClassBytes::empty();
cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
Hir::class(Class::Bytes(cls))
} else {
let mut cls = ClassUnicode::empty();
cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
Hir::class(Class::Unicode(cls))
}
}
pub fn is_always_utf8(&self) -> bool {
self.info.is_always_utf8()
}
pub fn is_all_assertions(&self) -> bool {
self.info.is_all_assertions()
}
pub fn is_anchored_start(&self) -> bool {
self.info.is_anchored_start()
}
pub fn is_anchored_end(&self) -> bool {
self.info.is_anchored_end()
}
pub fn is_line_anchored_start(&self) -> bool {
self.info.is_line_anchored_start()
}
pub fn is_line_anchored_end(&self) -> bool {
self.info.is_line_anchored_end()
}
pub fn is_any_anchored_start(&self) -> bool {
self.info.is_any_anchored_start()
}
pub fn is_any_anchored_end(&self) -> bool {
self.info.is_any_anchored_end()
}
pub fn is_match_empty(&self) -> bool {
self.info.is_match_empty()
}
pub fn is_literal(&self) -> bool {
self.info.is_literal()
}
pub fn is_alternation_literal(&self) -> bool {
self.info.is_alternation_literal()
}
}
impl HirKind {
pub fn is_empty(&self) -> bool {
match *self {
HirKind::Empty => true,
_ => false,
}
}
pub fn has_subexprs(&self) -> bool {
match *self {
HirKind::Empty
| HirKind::Literal(_)
| HirKind::Class(_)
| HirKind::Anchor(_)
| HirKind::WordBoundary(_) => false,
HirKind::Group(_)
| HirKind::Repetition(_)
| HirKind::Concat(_)
| HirKind::Alternation(_) => true,
}
}
}
impl fmt::Display for Hir {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use hir::print::Printer;
Printer::new().print(self, f)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Literal {
Unicode(char),
Byte(u8),
}
impl Literal {
pub fn is_unicode(&self) -> bool {
match *self {
Literal::Unicode(_) => true,
Literal::Byte(b) if b <= 0x7F => true,
Literal::Byte(_) => false,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Class {
Unicode(ClassUnicode),
Bytes(ClassBytes),
}
impl Class {
pub fn case_fold_simple(&mut self) {
match *self {
Class::Unicode(ref mut x) => x.case_fold_simple(),
Class::Bytes(ref mut x) => x.case_fold_simple(),
}
}
pub fn negate(&mut self) {
match *self {
Class::Unicode(ref mut x) => x.negate(),
Class::Bytes(ref mut x) => x.negate(),
}
}
pub fn is_always_utf8(&self) -> bool {
match *self {
Class::Unicode(_) => true,
Class::Bytes(ref x) => x.is_all_ascii(),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ClassUnicode {
set: IntervalSet<ClassUnicodeRange>,
}
impl ClassUnicode {
pub fn new<I>(ranges: I) -> ClassUnicode
where I: IntoIterator<Item=ClassUnicodeRange>
{
ClassUnicode { set: IntervalSet::new(ranges) }
}
pub fn empty() -> ClassUnicode {
ClassUnicode::new(vec![])
}
pub fn push(&mut self, range: ClassUnicodeRange) {
self.set.push(range);
}
pub fn iter(&self) -> ClassUnicodeIter {
ClassUnicodeIter(self.set.iter())
}
pub fn ranges(&self) -> &[ClassUnicodeRange] {
self.set.intervals()
}
pub fn case_fold_simple(&mut self) {
self.set.case_fold_simple();
}
pub fn negate(&mut self) {
self.set.negate();
}
pub fn union(&mut self, other: &ClassUnicode) {
self.set.union(&other.set);
}
pub fn intersect(&mut self, other: &ClassUnicode) {
self.set.intersect(&other.set);
}
pub fn difference(&mut self, other: &ClassUnicode) {
self.set.difference(&other.set);
}
pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
self.set.symmetric_difference(&other.set);
}
}
#[derive(Debug)]
pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
impl<'a> Iterator for ClassUnicodeIter<'a> {
type Item = &'a ClassUnicodeRange;
fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
self.0.next()
}
}
#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct ClassUnicodeRange {
start: char,
end: char,
}
impl fmt::Debug for ClassUnicodeRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let start =
if !self.start.is_whitespace() && !self.start.is_control() {
self.start.to_string()
} else {
format!("0x{:X}", self.start as u32)
};
let end =
if !self.end.is_whitespace() && !self.end.is_control() {
self.end.to_string()
} else {
format!("0x{:X}", self.end as u32)
};
f.debug_struct("ClassUnicodeRange")
.field("start", &start)
.field("end", &end)
.finish()
}
}
impl Interval for ClassUnicodeRange {
type Bound = char;
#[inline] fn lower(&self) -> char { self.start }
#[inline] fn upper(&self) -> char { self.end }
#[inline] fn set_lower(&mut self, bound: char) { self.start = bound; }
#[inline] fn set_upper(&mut self, bound: char) { self.end = bound; }
fn case_fold_simple(&self, ranges: &mut Vec<ClassUnicodeRange>) {
if !unicode::contains_simple_case_mapping(self.start, self.end) {
return;
}
let start = self.start as u32;
let end = (self.end as u32).saturating_add(1);
let mut next_simple_cp = None;
for cp in (start..end).filter_map(char::from_u32) {
if next_simple_cp.map_or(false, |next| cp < next) {
continue;
}
let it = match unicode::simple_fold(cp) {
Ok(it) => it,
Err(next) => {
next_simple_cp = next;
continue;
}
};
for cp_folded in it {
ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
}
}
}
}
impl ClassUnicodeRange {
pub fn new(start: char, end: char) -> ClassUnicodeRange {
ClassUnicodeRange::create(start, end)
}
pub fn start(&self) -> char {
self.start
}
pub fn end(&self) -> char {
self.end
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ClassBytes {
set: IntervalSet<ClassBytesRange>,
}
impl ClassBytes {
pub fn new<I>(ranges: I) -> ClassBytes
where I: IntoIterator<Item=ClassBytesRange>
{
ClassBytes { set: IntervalSet::new(ranges) }
}
pub fn empty() -> ClassBytes {
ClassBytes::new(vec![])
}
pub fn push(&mut self, range: ClassBytesRange) {
self.set.push(range);
}
pub fn iter(&self) -> ClassBytesIter {
ClassBytesIter(self.set.iter())
}
pub fn ranges(&self) -> &[ClassBytesRange] {
self.set.intervals()
}
pub fn case_fold_simple(&mut self) {
self.set.case_fold_simple();
}
pub fn negate(&mut self) {
self.set.negate();
}
pub fn union(&mut self, other: &ClassBytes) {
self.set.union(&other.set);
}
pub fn intersect(&mut self, other: &ClassBytes) {
self.set.intersect(&other.set);
}
pub fn difference(&mut self, other: &ClassBytes) {
self.set.difference(&other.set);
}
pub fn symmetric_difference(&mut self, other: &ClassBytes) {
self.set.symmetric_difference(&other.set);
}
pub fn is_all_ascii(&self) -> bool {
self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
}
}
#[derive(Debug)]
pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
impl<'a> Iterator for ClassBytesIter<'a> {
type Item = &'a ClassBytesRange;
fn next(&mut self) -> Option<&'a ClassBytesRange> {
self.0.next()
}
}
#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct ClassBytesRange {
start: u8,
end: u8,
}
impl Interval for ClassBytesRange {
type Bound = u8;
#[inline] fn lower(&self) -> u8 { self.start }
#[inline] fn upper(&self) -> u8 { self.end }
#[inline] fn set_lower(&mut self, bound: u8) { self.start = bound; }
#[inline] fn set_upper(&mut self, bound: u8) { self.end = bound; }
fn case_fold_simple(&self, ranges: &mut Vec<ClassBytesRange>) {
if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
let lower = cmp::max(self.start, b'a');
let upper = cmp::min(self.end, b'z');
ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
}
if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
let lower = cmp::max(self.start, b'A');
let upper = cmp::min(self.end, b'Z');
ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
}
}
}
impl ClassBytesRange {
pub fn new(start: u8, end: u8) -> ClassBytesRange {
ClassBytesRange::create(start, end)
}
pub fn start(&self) -> u8 {
self.start
}
pub fn end(&self) -> u8 {
self.end
}
}
impl fmt::Debug for ClassBytesRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut debug = f.debug_struct("ClassBytesRange");
if self.start <= 0x7F {
debug.field("start", &(self.start as char));
} else {
debug.field("start", &self.start);
}
if self.end <= 0x7F {
debug.field("end", &(self.end as char));
} else {
debug.field("end", &self.end);
}
debug.finish()
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Anchor {
StartLine,
EndLine,
StartText,
EndText,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum WordBoundary {
Unicode,
UnicodeNegate,
Ascii,
AsciiNegate,
}
impl WordBoundary {
pub fn is_negated(&self) -> bool {
match *self {
WordBoundary::Unicode | WordBoundary::Ascii => false,
WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Group {
pub kind: GroupKind,
pub hir: Box<Hir>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum GroupKind {
CaptureIndex(u32),
CaptureName {
name: String,
index: u32,
},
NonCapturing,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Repetition {
pub kind: RepetitionKind,
pub greedy: bool,
pub hir: Box<Hir>,
}
impl Repetition {
pub fn is_match_empty(&self) -> bool {
match self.kind {
RepetitionKind::ZeroOrOne => true,
RepetitionKind::ZeroOrMore => true,
RepetitionKind::OneOrMore => false,
RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum RepetitionKind {
ZeroOrOne,
ZeroOrMore,
OneOrMore,
Range(RepetitionRange),
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum RepetitionRange {
Exactly(u32),
AtLeast(u32),
Bounded(u32, u32),
}
impl Drop for Hir {
fn drop(&mut self) {
use std::mem;
match *self.kind() {
HirKind::Empty
| HirKind::Literal(_)
| HirKind::Class(_)
| HirKind::Anchor(_)
| HirKind::WordBoundary(_) => return,
HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
HirKind::Concat(ref x) if x.is_empty() => return,
HirKind::Alternation(ref x) if x.is_empty() => return,
_ => {}
}
let mut stack = vec![mem::replace(self, Hir::empty())];
while let Some(mut expr) = stack.pop() {
match expr.kind {
HirKind::Empty
| HirKind::Literal(_)
| HirKind::Class(_)
| HirKind::Anchor(_)
| HirKind::WordBoundary(_) => {}
HirKind::Group(ref mut x) => {
stack.push(mem::replace(&mut x.hir, Hir::empty()));
}
HirKind::Repetition(ref mut x) => {
stack.push(mem::replace(&mut x.hir, Hir::empty()));
}
HirKind::Concat(ref mut x) => {
stack.extend(x.drain(..));
}
HirKind::Alternation(ref mut x) => {
stack.extend(x.drain(..));
}
}
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct HirInfo {
bools: u16,
}
macro_rules! define_bool {
($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
fn $is_fn_name(&self) -> bool {
self.bools & (0b1 << $bit) > 0
}
fn $set_fn_name(&mut self, yes: bool) {
if yes {
self.bools |= 1 << $bit;
} else {
self.bools &= !(1 << $bit);
}
}
}
}
impl HirInfo {
fn new() -> HirInfo {
HirInfo {
bools: 0,
}
}
define_bool!(0, is_always_utf8, set_always_utf8);
define_bool!(1, is_all_assertions, set_all_assertions);
define_bool!(2, is_anchored_start, set_anchored_start);
define_bool!(3, is_anchored_end, set_anchored_end);
define_bool!(4, is_line_anchored_start, set_line_anchored_start);
define_bool!(5, is_line_anchored_end, set_line_anchored_end);
define_bool!(6, is_any_anchored_start, set_any_anchored_start);
define_bool!(7, is_any_anchored_end, set_any_anchored_end);
define_bool!(8, is_match_empty, set_match_empty);
define_bool!(9, is_literal, set_literal);
define_bool!(10, is_alternation_literal, set_alternation_literal);
}
#[cfg(test)]
mod tests {
use super::*;
fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
let ranges: Vec<ClassUnicodeRange> = ranges
.iter()
.map(|&(s, e)| ClassUnicodeRange::new(s, e))
.collect();
ClassUnicode::new(ranges)
}
fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
let ranges: Vec<ClassBytesRange> = ranges
.iter()
.map(|&(s, e)| ClassBytesRange::new(s, e))
.collect();
ClassBytes::new(ranges)
}
fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
cls.iter().map(|x| (x.start(), x.end())).collect()
}
fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
let mut cls_ = cls.clone();
cls_.case_fold_simple();
cls_
}
fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
let mut cls_ = cls1.clone();
cls_.union(cls2);
cls_
}
fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
let mut cls_ = cls1.clone();
cls_.intersect(cls2);
cls_
}
fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
let mut cls_ = cls1.clone();
cls_.difference(cls2);
cls_
}
fn usymdifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
let mut cls_ = cls1.clone();
cls_.symmetric_difference(cls2);
cls_
}
fn unegate(cls: &ClassUnicode) -> ClassUnicode {
let mut cls_ = cls.clone();
cls_.negate();
cls_
}
fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
cls.iter().map(|x| (x.start(), x.end())).collect()
}
fn bcasefold(cls: &ClassBytes) -> ClassBytes {
let mut cls_ = cls.clone();
cls_.case_fold_simple();
cls_
}
fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
let mut cls_ = cls1.clone();
cls_.union(cls2);
cls_
}
fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
let mut cls_ = cls1.clone();
cls_.intersect(cls2);
cls_
}
fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
let mut cls_ = cls1.clone();
cls_.difference(cls2);
cls_
}
fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
let mut cls_ = cls1.clone();
cls_.symmetric_difference(cls2);
cls_
}
fn bnegate(cls: &ClassBytes) -> ClassBytes {
let mut cls_ = cls.clone();
cls_.negate();
cls_
}
#[test]
fn class_range_canonical_unicode() {
let range = ClassUnicodeRange::new('\u{00FF}', '\0');
assert_eq!('\0', range.start());
assert_eq!('\u{00FF}', range.end());
}
#[test]
fn class_range_canonical_bytes() {
let range = ClassBytesRange::new(b'\xFF', b'\0');
assert_eq!(b'\0', range.start());
assert_eq!(b'\xFF', range.end());
}
#[test]
fn class_canonicalize_unicode() {
let cls = uclass(&[('a', 'c'), ('x', 'z')]);
let expected = vec![('a', 'c'), ('x', 'z')];
assert_eq!(expected, uranges(&cls));
let cls = uclass(&[('x', 'z'), ('a', 'c')]);
let expected = vec![('a', 'c'), ('x', 'z')];
assert_eq!(expected, uranges(&cls));
let cls = uclass(&[('x', 'z'), ('w', 'y')]);
let expected = vec![('w', 'z')];
assert_eq!(expected, uranges(&cls));
let cls = uclass(&[
('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
('m', 'p'), ('l', 's'),
]);
let expected = vec![('a', 'j'), ('l', 's')];
assert_eq!(expected, uranges(&cls));
let cls = uclass(&[('x', 'z'), ('u', 'w')]);
let expected = vec![('u', 'z')];
assert_eq!(expected, uranges(&cls));
let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
let expected = vec![('\x00', '\u{10FFFF}')];
assert_eq!(expected, uranges(&cls));
let cls = uclass(&[('a', 'a'), ('b', 'b')]);
let expected = vec![('a', 'b')];
assert_eq!(expected, uranges(&cls));
}
#[test]
fn class_canonicalize_bytes() {
let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
let expected = vec![(b'a', b'c'), (b'x', b'z')];
assert_eq!(expected, branges(&cls));
let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
let expected = vec![(b'a', b'c'), (b'x', b'z')];
assert_eq!(expected, branges(&cls));
let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
let expected = vec![(b'w', b'z')];
assert_eq!(expected, branges(&cls));
let cls = bclass(&[
(b'c', b'f'), (b'a', b'g'), (b'd', b'j'), (b'a', b'c'),
(b'm', b'p'), (b'l', b's'),
]);
let expected = vec![(b'a', b'j'), (b'l', b's')];
assert_eq!(expected, branges(&cls));
let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
let expected = vec![(b'u', b'z')];
assert_eq!(expected, branges(&cls));
let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
let expected = vec![(b'\x00', b'\xFF')];
assert_eq!(expected, branges(&cls));
let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
let expected = vec![(b'a', b'b')];
assert_eq!(expected, branges(&cls));
}
#[test]
fn class_case_fold_unicode() {
let cls = uclass(&[
('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
('M', 'P'), ('L', 'S'), ('c', 'f'),
]);
let expected = uclass(&[
('A', 'J'), ('L', 'S'),
('a', 'j'), ('l', 's'),
('\u{17F}', '\u{17F}'),
]);
assert_eq!(expected, ucasefold(&cls));
let cls = uclass(&[('A', 'Z')]);
let expected = uclass(&[
('A', 'Z'), ('a', 'z'),
('\u{17F}', '\u{17F}'),
('\u{212A}', '\u{212A}'),
]);
assert_eq!(expected, ucasefold(&cls));
let cls = uclass(&[('a', 'z')]);
let expected = uclass(&[
('A', 'Z'), ('a', 'z'),
('\u{17F}', '\u{17F}'),
('\u{212A}', '\u{212A}'),
]);
assert_eq!(expected, ucasefold(&cls));
let cls = uclass(&[('A', 'A'), ('_', '_')]);
let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
assert_eq!(expected, ucasefold(&cls));
let cls = uclass(&[('A', 'A'), ('=', '=')]);
let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
assert_eq!(expected, ucasefold(&cls));
let cls = uclass(&[('\x00', '\x10')]);
assert_eq!(cls, ucasefold(&cls));
let cls = uclass(&[('k', 'k')]);
let expected = uclass(&[
('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
]);
assert_eq!(expected, ucasefold(&cls));
let cls = uclass(&[('@', '@')]);
assert_eq!(cls, ucasefold(&cls));
}
#[test]
fn class_case_fold_bytes() {
let cls = bclass(&[
(b'C', b'F'), (b'A', b'G'), (b'D', b'J'), (b'A', b'C'),
(b'M', b'P'), (b'L', b'S'), (b'c', b'f'),
]);
let expected = bclass(&[
(b'A', b'J'), (b'L', b'S'),
(b'a', b'j'), (b'l', b's'),
]);
assert_eq!(expected, bcasefold(&cls));
let cls = bclass(&[(b'A', b'Z')]);
let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
assert_eq!(expected, bcasefold(&cls));
let cls = bclass(&[(b'a', b'z')]);
let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
assert_eq!(expected, bcasefold(&cls));
let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
assert_eq!(expected, bcasefold(&cls));
let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
assert_eq!(expected, bcasefold(&cls));
let cls = bclass(&[(b'\x00', b'\x10')]);
assert_eq!(cls, bcasefold(&cls));
let cls = bclass(&[(b'k', b'k')]);
let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
assert_eq!(expected, bcasefold(&cls));
let cls = bclass(&[(b'@', b'@')]);
assert_eq!(cls, bcasefold(&cls));
}
#[test]
fn class_negate_unicode() {
let cls = uclass(&[('a', 'a')]);
let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('a', 'a'), ('b', 'b')]);
let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('a', 'c'), ('x', 'z')]);
let expected = uclass(&[
('\x00', '\x60'), ('\x64', '\x77'), ('\x7B', '\u{10FFFF}'),
]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('\x00', 'a')]);
let expected = uclass(&[('\x62', '\u{10FFFF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('a', '\u{10FFFF}')]);
let expected = uclass(&[('\x00', '\x60')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('\x00', '\u{10FFFF}')]);
let expected = uclass(&[]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[]);
let expected = uclass(&[('\x00', '\u{10FFFF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[
('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}'),
]);
let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('\x00', '\u{D7FF}')]);
let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('\x00', '\u{D7FE}')]);
let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
let expected = uclass(&[('\x00', '\u{D7FF}')]);
assert_eq!(expected, unegate(&cls));
let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
let expected = uclass(&[('\x00', '\u{E000}')]);
assert_eq!(expected, unegate(&cls));
}
#[test]
fn class_negate_bytes() {
let cls = bclass(&[(b'a', b'a')]);
let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
let expected = bclass(&[
(b'\x00', b'\x60'), (b'\x64', b'\x77'), (b'\x7B', b'\xFF'),
]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[(b'\x00', b'a')]);
let expected = bclass(&[(b'\x62', b'\xFF')]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[(b'a', b'\xFF')]);
let expected = bclass(&[(b'\x00', b'\x60')]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[(b'\x00', b'\xFF')]);
let expected = bclass(&[]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[]);
let expected = bclass(&[(b'\x00', b'\xFF')]);
assert_eq!(expected, bnegate(&cls));
let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
let expected = bclass(&[(b'\xFE', b'\xFE')]);
assert_eq!(expected, bnegate(&cls));
}
#[test]
fn class_union_unicode() {
let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
let cls2 = uclass(&[('a', 'z')]);
let expected = uclass(&[('a', 'z'), ('A', 'C')]);
assert_eq!(expected, uunion(&cls1, &cls2));
}
#[test]
fn class_union_bytes() {
let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
let cls2 = bclass(&[(b'a', b'z')]);
let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
assert_eq!(expected, bunion(&cls1, &cls2));
}
#[test]
fn class_intersect_unicode() {
let cls1 = uclass(&[]);
let cls2 = uclass(&[('a', 'a')]);
let expected = uclass(&[]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'a')]);
let cls2 = uclass(&[('a', 'a')]);
let expected = uclass(&[('a', 'a')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'a')]);
let cls2 = uclass(&[('b', 'b')]);
let expected = uclass(&[]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'a')]);
let cls2 = uclass(&[('a', 'c')]);
let expected = uclass(&[('a', 'a')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b')]);
let cls2 = uclass(&[('a', 'c')]);
let expected = uclass(&[('a', 'b')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b')]);
let cls2 = uclass(&[('b', 'c')]);
let expected = uclass(&[('b', 'b')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b')]);
let cls2 = uclass(&[('c', 'd')]);
let expected = uclass(&[]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('b', 'c')]);
let cls2 = uclass(&[('a', 'd')]);
let expected = uclass(&[('b', 'c')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
let cls2 = uclass(&[('a', 'h')]);
let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
let expected = uclass(&[]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
let cls2 = uclass(&[('h', 'h')]);
let expected = uclass(&[('h', 'h')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
let expected = uclass(&[]);
assert_eq!(expected, uintersect(&cls1, &cls2));
let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
let expected = uclass(&[('b', 'f')]);
assert_eq!(expected, uintersect(&cls1, &cls2));
}
#[test]
fn class_intersect_bytes() {
let cls1 = bclass(&[]);
let cls2 = bclass(&[(b'a', b'a')]);
let expected = bclass(&[]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'a')]);
let cls2 = bclass(&[(b'a', b'a')]);
let expected = bclass(&[(b'a', b'a')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'a')]);
let cls2 = bclass(&[(b'b', b'b')]);
let expected = bclass(&[]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'a')]);
let cls2 = bclass(&[(b'a', b'c')]);
let expected = bclass(&[(b'a', b'a')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b')]);
let cls2 = bclass(&[(b'a', b'c')]);
let expected = bclass(&[(b'a', b'b')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b')]);
let cls2 = bclass(&[(b'b', b'c')]);
let expected = bclass(&[(b'b', b'b')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b')]);
let cls2 = bclass(&[(b'c', b'd')]);
let expected = bclass(&[]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'b', b'c')]);
let cls2 = bclass(&[(b'a', b'd')]);
let expected = bclass(&[(b'b', b'c')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
let cls2 = bclass(&[(b'a', b'h')]);
let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
let expected = bclass(&[]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
let cls2 = bclass(&[(b'h', b'h')]);
let expected = bclass(&[(b'h', b'h')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
let expected = bclass(&[]);
assert_eq!(expected, bintersect(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
let expected = bclass(&[(b'b', b'f')]);
assert_eq!(expected, bintersect(&cls1, &cls2));
}
#[test]
fn class_difference_unicode() {
let cls1 = uclass(&[('a', 'a')]);
let cls2 = uclass(&[('a', 'a')]);
let expected = uclass(&[]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'a')]);
let cls2 = uclass(&[]);
let expected = uclass(&[('a', 'a')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[]);
let cls2 = uclass(&[('a', 'a')]);
let expected = uclass(&[]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'z')]);
let cls2 = uclass(&[('a', 'a')]);
let expected = uclass(&[('b', 'z')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'z')]);
let cls2 = uclass(&[('z', 'z')]);
let expected = uclass(&[('a', 'y')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'z')]);
let cls2 = uclass(&[('m', 'm')]);
let expected = uclass(&[('a', 'l'), ('n', 'z')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
let cls2 = uclass(&[('a', 'z')]);
let expected = uclass(&[]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
let cls2 = uclass(&[('d', 'v')]);
let expected = uclass(&[('a', 'c')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('x', 'z')]);
let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
let expected = uclass(&[('x', 'z')]);
assert_eq!(expected, udifference(&cls1, &cls2));
let cls1 = uclass(&[('a', 'z')]);
let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
assert_eq!(expected, udifference(&cls1, &cls2));
}
#[test]
fn class_difference_bytes() {
let cls1 = bclass(&[(b'a', b'a')]);
let cls2 = bclass(&[(b'a', b'a')]);
let expected = bclass(&[]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'a')]);
let cls2 = bclass(&[]);
let expected = bclass(&[(b'a', b'a')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[]);
let cls2 = bclass(&[(b'a', b'a')]);
let expected = bclass(&[]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'z')]);
let cls2 = bclass(&[(b'a', b'a')]);
let expected = bclass(&[(b'b', b'z')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'z')]);
let cls2 = bclass(&[(b'z', b'z')]);
let expected = bclass(&[(b'a', b'y')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'z')]);
let cls2 = bclass(&[(b'm', b'm')]);
let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
let cls2 = bclass(&[(b'a', b'z')]);
let expected = bclass(&[]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
let cls2 = bclass(&[(b'd', b'v')]);
let expected = bclass(&[(b'a', b'c')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'x', b'z')]);
let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
let expected = bclass(&[(b'x', b'z')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
let cls1 = bclass(&[(b'a', b'z')]);
let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
assert_eq!(expected, bdifference(&cls1, &cls2));
}
#[test]
fn class_symmetric_difference_unicode() {
let cls1 = uclass(&[('a', 'm')]);
let cls2 = uclass(&[('g', 't')]);
let expected = uclass(&[('a', 'f'), ('n', 't')]);
assert_eq!(expected, usymdifference(&cls1, &cls2));
}
#[test]
fn class_symmetric_difference_bytes() {
let cls1 = bclass(&[(b'a', b'm')]);
let cls2 = bclass(&[(b'g', b't')]);
let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
assert_eq!(expected, bsymdifference(&cls1, &cls2));
}
#[test]
#[should_panic]
fn hir_byte_literal_non_ascii() {
Hir::literal(Literal::Byte(b'a'));
}
#[test]
#[cfg(any(unix, windows))]
fn no_stack_overflow_on_drop() {
use std::thread;
let run = || {
let mut expr = Hir::empty();
for _ in 0..100 {
expr = Hir::group(Group {
kind: GroupKind::NonCapturing,
hir: Box::new(expr),
});
expr = Hir::repetition(Repetition {
kind: RepetitionKind::ZeroOrOne,
greedy: true,
hir: Box::new(expr),
});
expr = Hir {
kind: HirKind::Concat(vec![expr]),
info: HirInfo::new(),
};
expr = Hir {
kind: HirKind::Alternation(vec![expr]),
info: HirInfo::new(),
};
}
assert!(!expr.kind.is_empty());
};
thread::Builder::new()
.stack_size(1<<10)
.spawn(run)
.unwrap()
.join()
.unwrap();
}
}