automaton

An automaton library & basic programs written in Rust
git clone git://git.ethandl.dev/automaton
Log | Files | Refs | README

regex_tests.rs (7129B)


      1 use crate::{regex::Regex, Automaton, Encodable};
      2 use rand::Rng;
      3 
      4 #[test]
      5 fn tokeniser() {
      6     // Not realy sure how I'm going to test this
      7     /*
      8     assert_eq!(
      9         Regex::tokenise("a*").unwrap(),
     10         RegexNonTerminal::KleeneGroup(vec![RegexNonTerminal::Terminal(RegexTerminal::Character(
     11             'a'
     12         ))])
     13     );
     14     assert_eq!(Regex::tokenise("\\*\\\\").unwrap(), vec![]);
     15     assert_eq!(Regex::tokenise("\\*\\\\*").unwrap(), vec![]);
     16     */
     17 }
     18 
     19 const PARSE_FAIL_STR: &str = "Unable to parse regex!";
     20 
     21 /// Tests if given a regex that is a concatenation of a bunch of random characters, the regex will recognise itself.
     22 /// The direct correlation between regex source string and accepted input breaks down when you consider escape sequences, but the concept still holds.
     23 ///
     24 /// E.g:
     25 ///  - The regex `abdfa` should accept the string `"abdfa"`.
     26 ///  - The regex `a\\cd` should accept the string `"a\cd"`.
     27 #[test]
     28 fn accepts_long_concats() {
     29     let alphabet = [
     30         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
     31         "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
     32         "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "\\.",
     33         "\\+", "\\*", "\\?", "\\(", "\\)", "\\\\",
     34     ];
     35 
     36     let mut rng = rand::thread_rng();
     37     for len in 1..500 {
     38         let regex_str: String = (0..len)
     39             .map(|_| alphabet[rng.gen_range(0..alphabet.len())])
     40             .collect();
     41         let mut escaped = false;
     42         let input_str = regex_str
     43             .chars()
     44             .filter(|&c| {
     45                 let ret = escaped || c != '\\';
     46                 escaped = !ret;
     47                 ret
     48             })
     49             .collect::<String>();
     50         let regex = Regex::parse(regex_str.as_str()).expect(PARSE_FAIL_STR);
     51         assert!(regex.accepts(input_str.as_str()));
     52     }
     53 }
     54 
     55 /// Tests a bunch of different wildcard scenarios and edge cases.
     56 ///
     57 /// E.g:
     58 ///  - The regex `a.*a` should recognise all strings starting and ending with `'a'`.
     59 #[test]
     60 fn wildcard_tests() {
     61     let alphabet = [
     62         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
     63         "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
     64         "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", " ",
     65     ];
     66 
     67     let mut rng = rand::thread_rng();
     68 
     69     let regex = Regex::parse("a.*a").expect(PARSE_FAIL_STR);
     70 
     71     for len in 2..1000 {
     72         let input_str: String = (0..len)
     73             .enumerate()
     74             .map(|(i, _)| {
     75                 if i != 0 && i != len - 1 {
     76                     alphabet[rng.gen_range(0..alphabet.len())]
     77                 } else {
     78                     "a"
     79                 }
     80             })
     81             .collect();
     82         assert!(regex.accepts(input_str.as_str()))
     83     }
     84 }
     85 
     86 /// Tests Kleene groups, both the Kleene star and the "Kleene plus" sugar.
     87 ///
     88 /// E.g:
     89 ///  - The regex `a*` should recognise zero or more `'a'`'s.
     90 ///  - The regex `a+` should recognise one or more `'a'`'s.
     91 ///  - The regex `(a*)*` should recognise zero or more groups of zero or more `'a'`'s.
     92 ///  - The regex `(a+)*` should recognise zero or more groups of one or more `'a'`'s.
     93 ///  - The regex `(a+)+` should recognise one or more groups of one or more `'a'`'s.
     94 ///  - The regex `a+b` should recognise one or more `'a'`'s all followed by a `'b'`.
     95 ///  - The regex `(a+b)+` should recognise one or more groups of one or more `'a'`'s followed by a `'b'`.
     96 #[test]
     97 fn kleene_tests() {
     98     let alphabet = [
     99         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
    100         "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
    101         "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", " ",
    102     ];
    103     let mut rng = rand::thread_rng();
    104 
    105     let regex = Regex::parse("a*").expect(PARSE_FAIL_STR);
    106     for len in 0..1000 {
    107         let s: String = (0..len).map(|_| 'a').collect();
    108         assert!(regex.accepts(s.as_str()));
    109     }
    110 
    111     let regex = Regex::parse("a+").expect(PARSE_FAIL_STR);
    112     assert!(!regex.accepts(""));
    113     for len in 1..1000 {
    114         let s: String = (0..len).map(|_| 'a').collect();
    115         assert!(regex.accepts(s.as_str()));
    116     }
    117 
    118     let regex = Regex::parse("(a*)*").expect(PARSE_FAIL_STR);
    119     for n_substrings in 0..100 {
    120         let s: String = (0..n_substrings)
    121             .map(|_| (0..rng.gen_range(1..100)).map(|_| 'a').collect::<String>())
    122             .collect();
    123         assert!(regex.accepts(s.as_str()));
    124     }
    125 
    126     let regex = Regex::parse("(a+)*").expect(PARSE_FAIL_STR);
    127     for n_substrings in 0..100 {
    128         let s: String = (0..n_substrings)
    129             .map(|_| (1..rng.gen_range(1..100)).map(|_| 'a').collect::<String>())
    130             .collect();
    131         assert!(regex.accepts(s.as_str()));
    132     }
    133 
    134     let regex = Regex::parse("(a+)+").expect(PARSE_FAIL_STR);
    135     assert!(!regex.accepts(""));
    136     for n_substrings in 1..100 {
    137         let s: String = (0..n_substrings)
    138             .map(|_| (1..rng.gen_range(1..100)).map(|_| 'a').collect::<String>())
    139             .collect();
    140         assert!(regex.accepts(s.as_str()));
    141     }
    142 
    143     let regex = Regex::parse("a+b").expect(PARSE_FAIL_STR);
    144     // Should accept any number of `a`'s followed by a `b`
    145     for len in 2..1000 {
    146         let s: String = (0..len)
    147             .map(|i| if i != len - 1 { 'a' } else { 'b' })
    148             .collect();
    149         assert!(regex.accepts(s.as_str()));
    150     }
    151     // Should not accept multiple `b`'s
    152     for len in 3..1000 {
    153         let n_bs = rng.gen_range(2..len);
    154         let s: String = (0..len)
    155             .map(|i| if i < len - n_bs { 'a' } else { 'b' })
    156             .collect();
    157         assert!(!regex.accepts(s.as_str()));
    158     }
    159     // Should not accept just `b`
    160     assert!(!regex.accepts("b"));
    161     // Should not accept just `a`'s
    162     for len in 1..1000 {
    163         let s: String = (0..len).map(|_| 'a').collect();
    164         assert!(!regex.accepts(s.as_str()));
    165     }
    166     // Should not accept any other char in the middle of the `a`'s
    167     for len in 3..1000 {
    168         let rand_index = rng.gen_range(1..len - 1);
    169         let s: String = (0..len)
    170             .map(|i| {
    171                 if i == rand_index {
    172                     alphabet[rng.gen_range(2..alphabet.len())] // Can't be 'a' or 'b'
    173                 } else if i != len - 1 {
    174                     "a"
    175                 } else {
    176                     "b"
    177                 }
    178             })
    179             .collect();
    180         assert!(!regex.accepts(s.as_str()));
    181     }
    182 
    183     let regex = Regex::parse("(a+b)+").expect(PARSE_FAIL_STR);
    184     for n_substrings in 1..100 {
    185         let s: String = (0..n_substrings)
    186             .map(|_| {
    187                 let substr_len = rng.gen_range(2..100);
    188                 (0..substr_len)
    189                     .map(|i| if i != substr_len - 1 { 'a' } else { 'b' })
    190                     .collect::<String>()
    191             })
    192             .collect();
    193         assert!(regex.accepts(s.as_str()));
    194     }
    195 }