nfa.rs (16563B)
1 use crate::{ 2 dfa::{self, DFA}, 3 Automaton, AutomatonError, Dot, Encodable, FiniteStateAutomaton, 4 }; 5 use core::fmt::Display; 6 use petgraph::{Directed, Graph}; 7 use std::{ 8 collections::{BTreeMap, BTreeSet, HashMap, HashSet}, 9 fmt, 10 }; 11 12 use serde_json::{from_str, Value}; 13 14 #[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] 15 pub enum Transition { 16 Char(char), 17 Wildcard, 18 } 19 20 impl Display for Transition { 21 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 22 match self { 23 Transition::Char(c) => write!(f, "'{}'", c), 24 Transition::Wildcard => write!(f, "•"), 25 } 26 } 27 } 28 29 // TODO: From doesn't seem to be 2-way... Maybe I'm using it wrong? 30 impl From<dfa::Transition> for Transition { 31 fn from(transition: dfa::Transition) -> Self { 32 match transition { 33 dfa::Transition::Char(c) => Transition::Char(c), 34 dfa::Transition::Wildcard => Transition::Wildcard, 35 } 36 } 37 } 38 39 impl From<Transition> for dfa::Transition { 40 fn from(transition: Transition) -> Self { 41 match transition { 42 Transition::Char(c) => dfa::Transition::Char(c), 43 Transition::Wildcard => dfa::Transition::Wildcard, 44 } 45 } 46 } 47 48 #[derive(Clone, Debug, PartialEq)] 49 pub struct NFA { 50 pub alphabet: HashSet<char>, 51 states: HashSet<u64>, 52 initial_state: u64, 53 final_states: HashSet<u64>, 54 transition_table: HashMap<(u64, Transition), Vec<u64>>, 55 } 56 57 impl NFA { 58 pub fn new( 59 alphabet: HashSet<char>, 60 states: HashSet<u64>, 61 initial_state: u64, 62 final_states: HashSet<u64>, 63 transition_table: HashMap<(u64, Transition), Vec<u64>>, 64 ) -> Self { 65 NFA { 66 alphabet, 67 states, 68 initial_state, 69 final_states, 70 transition_table, 71 } 72 } 73 } 74 75 impl Automaton for NFA { 76 fn accepts(&self, input: &str) -> bool { 77 let mut current_states: Vec<u64> = vec![self.initial_state]; 78 let mut new_states: Vec<u64> = Vec::new(); 79 for c in input.chars() { 80 for i in 0..current_states.len() { 81 match current_states.get(i).map_or(None, |&s| { 82 self.transition_table.get(&(s, Transition::Char(c))).map_or( 83 self.transition_table 84 .get(&(s, Transition::Wildcard)) 85 .and_then(|s| Some(s.to_owned())), 86 |v| Some(v.to_owned()), 87 ) 88 }) { 89 Some(states) => { 90 new_states.append(&mut states.clone()); 91 } 92 None => (), 93 } 94 } 95 current_states = new_states; 96 new_states = Vec::new(); 97 if current_states.len() == 0 { 98 return false; 99 } 100 } 101 current_states.iter().any(|s| self.final_states.contains(s)) 102 } 103 } 104 105 impl FiniteStateAutomaton for NFA { 106 fn as_dfa(&self) -> Result<dfa::DFA, AutomatonError> { 107 let mut states = HashSet::new(); 108 let initial_state = 0; 109 let mut current_state = initial_state; 110 let mut final_states = HashSet::new(); 111 let mut dfa_transition_to_nfa_sets: HashMap<(u64, dfa::Transition), BTreeSet<u64>> = 112 HashMap::new(); 113 114 let mut to_be_processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new(); 115 let mut processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new(); 116 let mut nfa_set_to_dfa_state: HashMap<BTreeSet<u64>, u64> = HashMap::new(); 117 let mut dfa_state_to_nfa_set: HashMap<u64, BTreeSet<u64>> = HashMap::new(); 118 119 to_be_processed.insert(BTreeSet::from([self.initial_state])); 120 121 let mut nfa_alphabet: Vec<Transition> = 122 self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); 123 nfa_alphabet.push(Transition::Wildcard); 124 125 while to_be_processed.len() != 0 { 126 let current_nfa_set = match to_be_processed.pop_first() { 127 Some(set) => set, 128 None => continue, 129 }; 130 if processed.contains(¤t_nfa_set) { 131 continue; 132 } 133 134 states.insert(current_state); 135 if current_nfa_set 136 .iter() 137 .any(|state| self.final_states.contains(state)) 138 { 139 final_states.insert(current_state); 140 } 141 142 nfa_set_to_dfa_state.insert(current_nfa_set.clone(), current_state); 143 dfa_state_to_nfa_set.insert(current_state, current_nfa_set.clone()); 144 145 for &transition in nfa_alphabet.iter() { 146 let nfa_dests: BTreeSet<u64> = current_nfa_set 147 .iter() 148 .filter_map(|&s| { 149 // We need to take into account the wildcard transition dests and take the union of them with our transition dests (determinism) 150 let wildcard_transitions = 151 self.transition_table.get(&(s, Transition::Wildcard)); 152 if transition == Transition::Wildcard { 153 wildcard_transitions.map(Vec::to_owned) 154 } else { 155 match wildcard_transitions { 156 Some(wcs) => { 157 let regular_dests = self 158 .transition_table 159 .get(&(s, transition)) 160 .map_or([].iter(), |dests| dests.into_iter()); 161 let mut union = wcs.to_owned(); 162 union.extend(regular_dests); 163 Some(union) 164 } 165 None => self 166 .transition_table 167 .get(&(s, transition)) 168 .map(Vec::to_owned), 169 } 170 } 171 }) 172 .flatten() 173 .collect(); 174 to_be_processed.insert(nfa_dests.clone()); 175 dfa_transition_to_nfa_sets.insert( 176 (current_state, dfa::Transition::from(transition)), 177 nfa_dests.clone(), 178 ); 179 } 180 181 processed.insert(current_nfa_set); 182 current_state += 1; 183 } 184 185 let transition_table: HashMap<(u64, dfa::Transition), u64> = dfa_transition_to_nfa_sets 186 .iter() 187 .map(|((from, transition), to)| { 188 Ok::<((u64, dfa::Transition), u64), AutomatonError>(( 189 (*from, *transition), 190 nfa_set_to_dfa_state.get(to).map(u64::to_owned).ok_or( 191 AutomatonError::conversion_error( 192 "Unable to get the DFA state from a set of NFA states!", 193 ), 194 )?, 195 )) 196 }) 197 .collect::<Result<HashMap<(u64, dfa::Transition), u64>, AutomatonError>>()?; // A monad is just a monoid in the category of endofunctors 198 199 Ok(DFA { 200 alphabet: self.alphabet.clone(), 201 states, 202 initial_state, 203 final_states, 204 transition_table, 205 }) 206 } 207 208 fn from_dfa(dfa: DFA) -> Result<Self, AutomatonError> 209 where 210 Self: Sized, 211 { 212 let transition_table = dfa 213 .transition_table 214 .iter() 215 .map(|(&(from, input), &to)| ((from, Transition::from(input)), vec![to])) 216 .collect(); 217 Ok(NFA { 218 alphabet: dfa.alphabet, 219 states: dfa.states, 220 initial_state: dfa.initial_state, 221 final_states: dfa.final_states, 222 transition_table, 223 }) 224 } 225 } 226 227 impl Display for NFA { 228 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 229 let mut transition_vec: Vec<Transition> = 230 self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); 231 let mut states_vec: Vec<u64> = self.states.iter().copied().collect(); 232 let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect(); 233 234 transition_vec.push(Transition::Wildcard); 235 transition_vec.sort(); 236 states_vec.sort(); 237 final_states_vec.sort(); 238 239 let transition_str = format!("{:?}", transition_vec); 240 let states_str = format!("{:?}", states_vec); 241 let initial_state_str = self.initial_state.to_string(); 242 let final_states_str = format!("{:?}", final_states_vec); 243 244 let transition_tbl_hdr = transition_vec 245 .iter() 246 .fold("\t|".to_owned(), |acc, &t| format!("{}\t{}\t|", acc, t)); 247 248 let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new(); 249 for &state in states_vec.iter() { 250 let input_dests = transition_vec 251 .iter() 252 .map(|&transition| { 253 self.transition_table 254 .get(&(state, transition)) 255 .and_then(|v| Some(v.to_owned())) 256 }) 257 .collect(); 258 transition_map.insert(state, input_dests); 259 } 260 261 let transition_table_str = 262 transition_map 263 .iter() 264 .fold(transition_tbl_hdr, |acc, (from, dest_groups)| { 265 let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| { 266 let dests_str = match dests { 267 None => "∅".to_owned(), 268 Some(v) => { 269 v.iter() 270 .enumerate() 271 .fold("{".to_owned(), |acc, (index, &dest)| { 272 let suffix = if index == v.len() - 1 { "}" } else { ", " }; 273 format!("{}{}{}", acc, dest, suffix) 274 }) 275 } 276 }; 277 format!("{}{}\t|\t", acc, dests_str) 278 }); 279 format!("{}\n{}\t|\t{}", acc, from, line_str) 280 }); 281 writeln!(f, "Alphabet: {}", transition_str)?; 282 writeln!(f, "States: {}", states_str)?; 283 writeln!(f, "Initial State: {}", initial_state_str)?; 284 writeln!(f, "Final States: {}", final_states_str)?; 285 writeln!(f, "Transition Table:\n{}", transition_table_str)?; 286 let graphviz_repr: Result<Dot<Transition>, AutomatonError> = self.try_into(); 287 match graphviz_repr { 288 Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s), 289 Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e), 290 } 291 } 292 } 293 294 impl Encodable<&str> for NFA { 295 fn parse(json_str: &str) -> Result<Self, AutomatonError> { 296 NFA::parse(from_str::<Value>(json_str)?) 297 } 298 } 299 300 impl Encodable<Value> for NFA { 301 fn parse(json: Value) -> Result<Self, AutomatonError> { 302 let alphabet_vec: Vec<char> = json["alphabet"] 303 .as_array() 304 .ok_or(AutomatonError::parse_error( 305 "Could not get the alphabet from the JSON", 306 ))? 307 .iter() 308 .map(|v| v.to_string().chars().next()) 309 .collect::<Option<Vec<char>>>() 310 .ok_or(AutomatonError::parse_error( 311 "Could not convert the alphabet to UTF-8 characters", 312 ))?; 313 314 let alphabet: HashSet<char> = alphabet_vec.iter().map(char::to_owned).collect(); 315 316 let states: HashSet<u64> = json["states"] 317 .as_array() 318 .ok_or(AutomatonError::parse_error( 319 "Could not get the states from the JSON", 320 ))? 321 .iter() 322 .map(|v| v.as_u64()) 323 .collect::<Option<_>>() 324 .ok_or(AutomatonError::parse_error( 325 "Could not convert the states to u64", 326 ))?; 327 328 let initial_state: u64 = 329 json["initial_state"] 330 .as_u64() 331 .ok_or(AutomatonError::parse_error( 332 "Initial state incorrectly formatted", 333 ))?; 334 335 let final_states: HashSet<u64> = json["final_states"] 336 .as_array() 337 .ok_or(AutomatonError::parse_error( 338 "Could not get the final states from the JSON", 339 ))? 340 .iter() 341 .map(|v| v.as_u64()) 342 .collect::<Option<_>>() 343 .ok_or(AutomatonError::parse_error( 344 "Could not convert the final states to u64", 345 ))?; 346 347 let mut transition_table: HashMap<(u64, Transition), Vec<u64>> = HashMap::new(); 348 for row in json["transition_table"] 349 .as_array() 350 .ok_or(AutomatonError::parse_error( 351 "Unable to parse rows in transition table", 352 ))? 353 .iter() 354 { 355 let transitions = row 356 .as_array() 357 .ok_or(AutomatonError::parse_error( 358 "Unable to get a row in the transition table", 359 ))? 360 .to_owned(); 361 362 let from: u64 = transitions 363 .get(0) 364 .ok_or(AutomatonError::parse_error( 365 "Unable to get the \"from\" state of a row", 366 ))? 367 .as_u64() 368 .ok_or(AutomatonError::parse_error( 369 "Unable to parse the \"from\" state of a row as a u64", 370 ))?; 371 372 let dest_sets: Vec<(char, Vec<u64>)> = alphabet_vec 373 .iter() 374 .zip(transitions.iter().skip(1).map(Value::as_array)) 375 .map(|(input, states)| { 376 states.and_then(|vs| { 377 vs.iter() 378 .map(|v| v.as_u64()) 379 .collect::<Option<_>>() 380 .and_then(|sts| Some((*input, sts))) 381 }) 382 }) 383 .collect::<Option<_>>() 384 .ok_or(AutomatonError::parse_error( 385 "Unable to parse the transition destinations in the table", 386 ))?; 387 for (input, dests) in dest_sets.iter() { 388 transition_table.insert((from, Transition::Char(*input)), dests.to_owned()); 389 } 390 } 391 392 Ok(NFA { 393 alphabet, 394 states, 395 initial_state, 396 final_states, 397 transition_table, 398 }) 399 } 400 } 401 402 impl TryFrom<&NFA> for Dot<Transition> { 403 type Error = AutomatonError; 404 fn try_from(nfa: &NFA) -> Result<Self, Self::Error> { 405 let mut graph: Graph<u64, Transition, Directed> = Graph::new(); 406 407 let states: HashMap<u64, _> = nfa.states.iter().map(|&s| (s, graph.add_node(s))).collect(); 408 409 let mut alphabet: HashSet<Transition> = 410 nfa.alphabet.iter().map(|&c| Transition::Char(c)).collect(); 411 alphabet.insert(Transition::Wildcard); 412 413 for transition in alphabet { 414 for (from_id, dest) in states 415 .iter() 416 .filter_map(|(&from, &from_id)| { 417 nfa.transition_table 418 .get(&(from, transition)) 419 .map(move |dests| dests.iter().map(move |dest| (from_id, dest))) 420 }) 421 .flatten() 422 { 423 let dest_id = *states.get(dest).ok_or(AutomatonError::dot_error( 424 "The destination of a transition does not exist in the nfa states!", 425 ))?; 426 graph.add_edge(from_id, dest_id, transition); 427 } 428 } 429 430 let initial_state_id = *states 431 .get(&nfa.initial_state) 432 .ok_or(AutomatonError::dot_error( 433 "Unable to get initial state graph index!", 434 ))?; 435 436 let final_states = nfa 437 .final_states 438 .iter() 439 .map(|s| states.get(s).map(|&ix| (*s, ix))) 440 .collect::<Option<BTreeMap<u64, _>>>() 441 .ok_or(AutomatonError::dot_error( 442 "Unable to get the graph index of a final state!", 443 ))?; 444 445 Ok(Self::new( 446 graph, 447 (nfa.initial_state, initial_state_id), 448 final_states, 449 )) 450 } 451 }