regex_tests.rs (7129B)
1 use crate::{regex::Regex, Automaton, Encodable}; 2 use rand::Rng; 3 4 #[test] 5 fn tokeniser() { 6 // Not realy sure how I'm going to test this 7 /* 8 assert_eq!( 9 Regex::tokenise("a*").unwrap(), 10 RegexNonTerminal::KleeneGroup(vec![RegexNonTerminal::Terminal(RegexTerminal::Character( 11 'a' 12 ))]) 13 ); 14 assert_eq!(Regex::tokenise("\\*\\\\").unwrap(), vec![]); 15 assert_eq!(Regex::tokenise("\\*\\\\*").unwrap(), vec![]); 16 */ 17 } 18 19 const PARSE_FAIL_STR: &str = "Unable to parse regex!"; 20 21 /// Tests if given a regex that is a concatenation of a bunch of random characters, the regex will recognise itself. 22 /// The direct correlation between regex source string and accepted input breaks down when you consider escape sequences, but the concept still holds. 23 /// 24 /// E.g: 25 /// - The regex `abdfa` should accept the string `"abdfa"`. 26 /// - The regex `a\\cd` should accept the string `"a\cd"`. 27 #[test] 28 fn accepts_long_concats() { 29 let alphabet = [ 30 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", 31 "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", 32 "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "\\.", 33 "\\+", "\\*", "\\?", "\\(", "\\)", "\\\\", 34 ]; 35 36 let mut rng = rand::thread_rng(); 37 for len in 1..500 { 38 let regex_str: String = (0..len) 39 .map(|_| alphabet[rng.gen_range(0..alphabet.len())]) 40 .collect(); 41 let mut escaped = false; 42 let input_str = regex_str 43 .chars() 44 .filter(|&c| { 45 let ret = escaped || c != '\\'; 46 escaped = !ret; 47 ret 48 }) 49 .collect::<String>(); 50 let regex = Regex::parse(regex_str.as_str()).expect(PARSE_FAIL_STR); 51 assert!(regex.accepts(input_str.as_str())); 52 } 53 } 54 55 /// Tests a bunch of different wildcard scenarios and edge cases. 56 /// 57 /// E.g: 58 /// - The regex `a.*a` should recognise all strings starting and ending with `'a'`. 59 #[test] 60 fn wildcard_tests() { 61 let alphabet = [ 62 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", 63 "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", 64 "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", " ", 65 ]; 66 67 let mut rng = rand::thread_rng(); 68 69 let regex = Regex::parse("a.*a").expect(PARSE_FAIL_STR); 70 71 for len in 2..1000 { 72 let input_str: String = (0..len) 73 .enumerate() 74 .map(|(i, _)| { 75 if i != 0 && i != len - 1 { 76 alphabet[rng.gen_range(0..alphabet.len())] 77 } else { 78 "a" 79 } 80 }) 81 .collect(); 82 assert!(regex.accepts(input_str.as_str())) 83 } 84 } 85 86 /// Tests Kleene groups, both the Kleene star and the "Kleene plus" sugar. 87 /// 88 /// E.g: 89 /// - The regex `a*` should recognise zero or more `'a'`'s. 90 /// - The regex `a+` should recognise one or more `'a'`'s. 91 /// - The regex `(a*)*` should recognise zero or more groups of zero or more `'a'`'s. 92 /// - The regex `(a+)*` should recognise zero or more groups of one or more `'a'`'s. 93 /// - The regex `(a+)+` should recognise one or more groups of one or more `'a'`'s. 94 /// - The regex `a+b` should recognise one or more `'a'`'s all followed by a `'b'`. 95 /// - The regex `(a+b)+` should recognise one or more groups of one or more `'a'`'s followed by a `'b'`. 96 #[test] 97 fn kleene_tests() { 98 let alphabet = [ 99 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", 100 "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", 101 "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", " ", 102 ]; 103 let mut rng = rand::thread_rng(); 104 105 let regex = Regex::parse("a*").expect(PARSE_FAIL_STR); 106 for len in 0..1000 { 107 let s: String = (0..len).map(|_| 'a').collect(); 108 assert!(regex.accepts(s.as_str())); 109 } 110 111 let regex = Regex::parse("a+").expect(PARSE_FAIL_STR); 112 assert!(!regex.accepts("")); 113 for len in 1..1000 { 114 let s: String = (0..len).map(|_| 'a').collect(); 115 assert!(regex.accepts(s.as_str())); 116 } 117 118 let regex = Regex::parse("(a*)*").expect(PARSE_FAIL_STR); 119 for n_substrings in 0..100 { 120 let s: String = (0..n_substrings) 121 .map(|_| (0..rng.gen_range(1..100)).map(|_| 'a').collect::<String>()) 122 .collect(); 123 assert!(regex.accepts(s.as_str())); 124 } 125 126 let regex = Regex::parse("(a+)*").expect(PARSE_FAIL_STR); 127 for n_substrings in 0..100 { 128 let s: String = (0..n_substrings) 129 .map(|_| (1..rng.gen_range(1..100)).map(|_| 'a').collect::<String>()) 130 .collect(); 131 assert!(regex.accepts(s.as_str())); 132 } 133 134 let regex = Regex::parse("(a+)+").expect(PARSE_FAIL_STR); 135 assert!(!regex.accepts("")); 136 for n_substrings in 1..100 { 137 let s: String = (0..n_substrings) 138 .map(|_| (1..rng.gen_range(1..100)).map(|_| 'a').collect::<String>()) 139 .collect(); 140 assert!(regex.accepts(s.as_str())); 141 } 142 143 let regex = Regex::parse("a+b").expect(PARSE_FAIL_STR); 144 // Should accept any number of `a`'s followed by a `b` 145 for len in 2..1000 { 146 let s: String = (0..len) 147 .map(|i| if i != len - 1 { 'a' } else { 'b' }) 148 .collect(); 149 assert!(regex.accepts(s.as_str())); 150 } 151 // Should not accept multiple `b`'s 152 for len in 3..1000 { 153 let n_bs = rng.gen_range(2..len); 154 let s: String = (0..len) 155 .map(|i| if i < len - n_bs { 'a' } else { 'b' }) 156 .collect(); 157 assert!(!regex.accepts(s.as_str())); 158 } 159 // Should not accept just `b` 160 assert!(!regex.accepts("b")); 161 // Should not accept just `a`'s 162 for len in 1..1000 { 163 let s: String = (0..len).map(|_| 'a').collect(); 164 assert!(!regex.accepts(s.as_str())); 165 } 166 // Should not accept any other char in the middle of the `a`'s 167 for len in 3..1000 { 168 let rand_index = rng.gen_range(1..len - 1); 169 let s: String = (0..len) 170 .map(|i| { 171 if i == rand_index { 172 alphabet[rng.gen_range(2..alphabet.len())] // Can't be 'a' or 'b' 173 } else if i != len - 1 { 174 "a" 175 } else { 176 "b" 177 } 178 }) 179 .collect(); 180 assert!(!regex.accepts(s.as_str())); 181 } 182 183 let regex = Regex::parse("(a+b)+").expect(PARSE_FAIL_STR); 184 for n_substrings in 1..100 { 185 let s: String = (0..n_substrings) 186 .map(|_| { 187 let substr_len = rng.gen_range(2..100); 188 (0..substr_len) 189 .map(|i| if i != substr_len - 1 { 'a' } else { 'b' }) 190 .collect::<String>() 191 }) 192 .collect(); 193 assert!(regex.accepts(s.as_str())); 194 } 195 }