enfa.rs (18337B)
1 use crate::{ 2 dfa::{self, DFA}, 3 nfa::{self, NFA}, 4 Automaton, AutomatonError, Dot, Encodable, FiniteStateAutomaton, 5 }; 6 use core::fmt::Display; 7 use petgraph::{Directed, Graph}; 8 use serde_json::{from_str, Value}; 9 use std::collections::{BTreeMap, HashMap, HashSet}; 10 11 #[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] 12 pub enum Transition { 13 Char(char), 14 Epsilon, 15 Wildcard, 16 } 17 18 impl Display for Transition { 19 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 20 match self { 21 Transition::Char(c) => write!(f, "'{}'", c), 22 Transition::Epsilon => write!(f, "ε"), 23 Transition::Wildcard => write!(f, "•"), 24 } 25 } 26 } 27 28 impl From<nfa::Transition> for Transition { 29 fn from(transition: nfa::Transition) -> Self { 30 match transition { 31 nfa::Transition::Char(c) => Self::Char(c), 32 nfa::Transition::Wildcard => Self::Wildcard, 33 } 34 } 35 } 36 37 impl From<dfa::Transition> for Transition { 38 fn from(transition: dfa::Transition) -> Self { 39 match transition { 40 dfa::Transition::Char(c) => Self::Char(c), 41 dfa::Transition::Wildcard => Self::Wildcard, 42 } 43 } 44 } 45 46 impl TryFrom<Transition> for nfa::Transition { 47 type Error = AutomatonError; 48 fn try_from(transition: Transition) -> Result<Self, Self::Error> { 49 match transition { 50 Transition::Char(c) => Ok(nfa::Transition::Char(c)), 51 Transition::Wildcard => Ok(nfa::Transition::Wildcard), 52 Transition::Epsilon => Err(AutomatonError::conversion_error("Unable to convert ENFA epsilon transition to NFA transition, NFAs have no notion of an epsilon transition!")) 53 } 54 } 55 } 56 57 // ε-NFAs! 58 #[derive(Clone, Debug, PartialEq)] 59 pub struct ENFA { 60 pub alphabet: HashSet<char>, 61 states: HashSet<u64>, 62 initial_state: u64, 63 final_states: HashSet<u64>, 64 transition_table: HashMap<(u64, Transition), Vec<u64>>, 65 } 66 67 impl ENFA { 68 fn eclose(&self, state: u64, visited: Option<&mut HashSet<u64>>) -> HashSet<u64> { 69 let mut reachable: HashSet<u64> = HashSet::new(); 70 let mut visited_set = HashSet::new(); // Lifetime hack. There has to be a better way 71 let visited = visited.unwrap_or(&mut visited_set); 72 reachable.insert(state); 73 for state in self 74 .transition_table 75 .get(&(state, Transition::Epsilon)) 76 .unwrap_or(&vec![]) 77 { 78 if !visited.contains(state) { 79 visited.insert(*state); 80 for state in self.eclose(*state, Some(visited)).iter() { 81 reachable.insert(*state); 82 visited.insert(*state); 83 } 84 } 85 } 86 reachable 87 } 88 89 pub fn new( 90 alphabet: HashSet<char>, 91 states: HashSet<u64>, 92 initial_state: u64, 93 final_states: HashSet<u64>, 94 transition_table: HashMap<(u64, Transition), Vec<u64>>, 95 ) -> ENFA { 96 ENFA { 97 alphabet, 98 states, 99 initial_state, 100 final_states, 101 transition_table, 102 } 103 } 104 105 pub fn as_nfa(&self) -> NFA { 106 let preemptive_final_states: HashSet<u64> = self 107 .states 108 .iter() 109 .filter(|&&starting_state| { 110 self.eclose(starting_state, None) 111 .into_iter() 112 .any(|epsilon_accessible| self.final_states.contains(&epsilon_accessible)) 113 }) 114 .map(u64::to_owned) 115 .collect(); 116 117 let mut nfa_transition_table: HashMap<(u64, nfa::Transition), Vec<u64>> = HashMap::new(); 118 119 let mut enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new(); 120 121 for (from, transition) in self.transition_table.keys() { 122 match enfa_state_accepted_input_map.get(from) { 123 None => { 124 let mut transitions = HashSet::new(); 125 transitions.insert(*transition); 126 enfa_state_accepted_input_map.insert(*from, transitions); 127 } 128 Some(transitions) => { 129 let mut transitions = transitions.to_owned(); 130 transitions.insert(*transition); 131 enfa_state_accepted_input_map.insert(*from, transitions); 132 } 133 } 134 } 135 136 for &state in self.states.iter() { 137 let eclose = self.eclose(state, None); 138 for (enfa_from, transition) in eclose 139 .into_iter() 140 .flat_map(|s| { 141 enfa_state_accepted_input_map 142 .get(&s) 143 .map_or(HashSet::new(), HashSet::to_owned) 144 .into_iter() 145 .map(move |t| (s, t)) 146 }) 147 .filter_map(|(s, t)| nfa::Transition::try_from(t).map(|t| (s, t)).ok()) 148 { 149 let mut enfa_to = self 150 .transition_table 151 .get(&(enfa_from, Transition::from(transition))) 152 .map_or(vec![], Vec::to_owned); 153 let nfa_to = nfa_transition_table.get_mut(&(state, transition)); 154 match nfa_to { 155 Some(v) => { 156 v.append(&mut enfa_to); 157 } 158 None => { 159 nfa_transition_table.insert((state, transition), enfa_to); 160 } 161 } 162 } 163 } 164 165 let mut nfa_states: HashSet<u64> = nfa_transition_table 166 .values() 167 .flat_map(Vec::to_owned) 168 .collect(); // Prune off the states without an entrypoint 169 nfa_states.insert(self.initial_state); // The only state without an entrypoint 170 171 let nfa_final_states: HashSet<u64> = preemptive_final_states 172 .into_iter() 173 .filter(|s| nfa_states.contains(s)) 174 .collect(); 175 176 // TODO: Maybe try re-numbering the states here before making the NFA so that they're sequential? 177 178 NFA::new( 179 self.alphabet.clone(), 180 nfa_states, 181 self.initial_state, 182 nfa_final_states, 183 nfa_transition_table, 184 ) 185 } 186 } 187 188 impl Automaton for ENFA { 189 // TODO: Check that this is all g 190 fn accepts(&self, input: &str) -> bool { 191 let mut current_states: HashSet<u64> = 192 self.eclose(self.initial_state, None).into_iter().collect(); 193 let mut new_states: HashSet<u64> = HashSet::new(); 194 195 for c in input.chars() { 196 for state in current_states.iter() { 197 for newstate in self 198 .transition_table 199 .get(&(*state, Transition::Char(c))) 200 // FIXME: This is ugly 201 .map_or( 202 self.transition_table 203 .get(&(*state, Transition::Wildcard)) 204 .map_or(vec![], Vec::to_owned), 205 |v| { 206 let mut transitions = v.to_owned(); 207 self.transition_table 208 .get(&(*state, Transition::Wildcard)) 209 .map(|s| transitions.append(&mut s.clone())); 210 transitions 211 }, 212 ) 213 .iter() 214 .flat_map(|s| self.eclose(*s, None)) 215 { 216 new_states.insert(newstate); 217 } 218 } 219 current_states = new_states; 220 new_states = HashSet::new(); 221 if current_states.len() == 0 { 222 return false; 223 } 224 } 225 current_states.iter().any(|s| self.final_states.contains(s)) 226 } 227 } 228 229 impl FiniteStateAutomaton for ENFA { 230 fn as_dfa(&self) -> Result<DFA, AutomatonError> { 231 let nfa = self.as_nfa(); 232 nfa.as_dfa() 233 } 234 235 fn from_dfa(dfa: crate::dfa::DFA) -> Result<Self, AutomatonError> 236 where 237 Self: Sized, 238 { 239 let transition_table = dfa 240 .transition_table 241 .iter() 242 .map(|((from, input), to)| ((*from, Transition::from(*input)), vec![*to])) 243 .collect(); 244 Ok(ENFA { 245 alphabet: dfa.alphabet, 246 states: dfa.states, 247 initial_state: dfa.initial_state, 248 final_states: dfa.final_states, 249 transition_table, 250 }) 251 } 252 } 253 254 impl Display for ENFA { 255 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 256 let mut transition_vec: Vec<Transition> = 257 self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); 258 let mut states_vec: Vec<u64> = self.states.iter().copied().collect(); 259 let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect(); 260 261 transition_vec.push(Transition::Epsilon); 262 transition_vec.push(Transition::Wildcard); 263 transition_vec.sort(); 264 states_vec.sort(); 265 final_states_vec.sort(); 266 267 let transition_str = format!("{:?}", transition_vec); 268 let states_str = format!("{:?}", states_vec); 269 let initial_state_str = self.initial_state.to_string(); 270 let final_states_str = format!("{:?}", final_states_vec); 271 272 let transition_tbl_hdr = transition_vec 273 .iter() 274 .fold("\t|".to_owned(), |acc, &t| format!("{}\t{}\t|", acc, t)); 275 276 let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new(); 277 for &state in states_vec.iter() { 278 let input_dests = transition_vec 279 .iter() 280 .map(|&transition| { 281 self.transition_table 282 .get(&(state, transition)) 283 .and_then(|v| Some(v.to_owned())) 284 }) 285 .collect(); 286 transition_map.insert(state, input_dests); 287 } 288 289 let transition_table_str = 290 transition_map 291 .iter() 292 .fold(transition_tbl_hdr, |acc, (from, dest_groups)| { 293 let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| { 294 let dests_str = match dests { 295 None => "∅".to_owned(), 296 Some(v) => { 297 v.iter() 298 .enumerate() 299 .fold("{".to_owned(), |acc, (index, &dest)| { 300 let suffix = if index == v.len() - 1 { "}" } else { ", " }; 301 format!("{}{}{}", acc, dest, suffix) 302 }) 303 } 304 }; 305 format!("{}{}\t|\t", acc, dests_str) 306 }); 307 format!("{}\n{}\t|\t{}", acc, from, line_str) 308 }); 309 writeln!(f, "Alphabet: {}", transition_str)?; 310 writeln!(f, "States: {}", states_str)?; 311 writeln!(f, "Initial State: {}", initial_state_str)?; 312 writeln!(f, "Final States: {}", final_states_str)?; 313 writeln!(f, "Transition Table:\n{}", transition_table_str)?; 314 let graphviz_repr: Result<Dot<Transition>, AutomatonError> = self.try_into(); 315 match graphviz_repr { 316 Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s), 317 Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e), 318 } 319 } 320 } 321 322 impl Encodable<&str> for ENFA { 323 fn parse(json_str: &str) -> Result<Self, AutomatonError> { 324 ENFA::parse(from_str::<Value>(json_str)?) 325 } 326 } 327 328 impl Encodable<Value> for ENFA { 329 fn parse(json: Value) -> Result<Self, AutomatonError> { 330 let alphabet_vec: Vec<char> = json["alphabet"] 331 .as_array() 332 .ok_or(AutomatonError::conversion_error( 333 "Could not get the alphabet from the JSON", 334 ))? 335 .iter() 336 .map(|v| v.to_string().chars().next()) 337 .collect::<Option<Vec<char>>>() 338 .ok_or(AutomatonError::conversion_error( 339 "Could not convert the alphabet to UTF-8 characters", 340 ))?; 341 342 let alphabet_size = alphabet_vec.len(); 343 344 let alphabet: HashSet<char> = alphabet_vec.iter().map(char::to_owned).collect(); 345 346 let states: HashSet<u64> = json["states"] 347 .as_array() 348 .ok_or(AutomatonError::conversion_error( 349 "Could not get the states from the JSON", 350 ))? 351 .iter() 352 .map(|v| v.as_u64()) 353 .collect::<Option<_>>() 354 .ok_or(AutomatonError::conversion_error( 355 "Could not convert the states to u64", 356 ))?; 357 358 let initial_state: u64 = 359 json["initial_state"] 360 .as_u64() 361 .ok_or(AutomatonError::conversion_error( 362 "Initial state incorrectly formatted", 363 ))?; 364 365 let final_states: HashSet<u64> = json["final_states"] 366 .as_array() 367 .ok_or(AutomatonError::conversion_error( 368 "Could not get the final states from the JSON", 369 ))? 370 .iter() 371 .map(|v| v.as_u64()) 372 .collect::<Option<_>>() 373 .ok_or(AutomatonError::conversion_error( 374 "Could not convert the final states to u64", 375 ))?; 376 377 let mut transition_table: HashMap<(u64, Transition), Vec<u64>> = HashMap::new(); 378 379 for row in json["transition_table"] 380 .as_array() 381 .ok_or(AutomatonError::conversion_error( 382 "Unable to parse rows in transition table", 383 ))? 384 .iter() 385 { 386 let transitions = row 387 .as_array() 388 .ok_or(AutomatonError::conversion_error( 389 "Unable to get a row in the transition table", 390 ))? 391 .to_owned(); 392 393 let from: u64 = transitions 394 .get(0) 395 .ok_or(AutomatonError::conversion_error( 396 "Unable to get the \"from\" state of a row", 397 ))? 398 .as_u64() 399 .ok_or(AutomatonError::conversion_error( 400 "Unable to parse the \"from\" state of a row as a u64", 401 ))?; 402 403 let dest_sets: Vec<(char, Vec<u64>)> = alphabet_vec 404 .iter() 405 .zip( 406 transitions 407 .iter() 408 .skip(1) 409 .take(alphabet_size) 410 .map(Value::as_array), 411 ) 412 .map(|(input, states)| { 413 states.and_then(|vs| { 414 vs.iter() 415 .map(|v| v.as_u64()) 416 .collect::<Option<_>>() 417 .and_then(|sts| Some((*input, sts))) 418 }) 419 }) 420 .collect::<Option<_>>() 421 .ok_or(AutomatonError::conversion_error( 422 "Unable to parse the transition destinations in the table", 423 ))?; 424 let epsilon_transitions: Vec<Vec<u64>> = transitions 425 .iter() 426 .skip(alphabet_size + 1) 427 .map(Value::as_array) 428 .map(|states| { 429 states.and_then(|vs| vs.iter().map(Value::as_u64).collect::<Option<_>>()) 430 }) 431 .collect::<Option<_>>() 432 .ok_or(AutomatonError::conversion_error( 433 "Unable to parse the ε-closure destinations in the table", 434 ))?; 435 for (input, dests) in dest_sets.iter() { 436 transition_table.insert((from, Transition::Char(*input)), dests.to_owned()); 437 } 438 for transition in epsilon_transitions.iter() { 439 transition_table.insert((from, Transition::Epsilon), transition.to_owned()); 440 } 441 } 442 443 //panic!("Not yet implemented!"); 444 Ok(ENFA { 445 alphabet, 446 states, 447 initial_state, 448 final_states, 449 transition_table, 450 }) 451 } 452 } 453 454 impl TryFrom<&ENFA> for Dot<Transition> { 455 type Error = AutomatonError; 456 fn try_from(enfa: &ENFA) -> Result<Self, Self::Error> { 457 let mut graph: Graph<u64, Transition, Directed> = Graph::new(); 458 459 let states: HashMap<u64, _> = enfa 460 .states 461 .iter() 462 .map(|&s| (s, graph.add_node(s))) 463 .collect(); 464 465 let mut alphabet: HashSet<Transition> = 466 enfa.alphabet.iter().map(|&c| Transition::Char(c)).collect(); 467 alphabet.insert(Transition::Epsilon); 468 alphabet.insert(Transition::Wildcard); 469 470 for transition in alphabet { 471 for (from_id, dest) in states 472 .iter() 473 .filter_map(|(&from, &from_id)| { 474 enfa.transition_table 475 .get(&(from, transition)) 476 .map(move |dests| dests.iter().map(move |dest| (from_id, dest))) 477 }) 478 .flatten() 479 { 480 let dest_id = *states.get(dest).ok_or(AutomatonError::dot_error( 481 "The destination of a transition does not exist in the enfa states!", 482 ))?; 483 graph.add_edge(from_id, dest_id, transition); 484 } 485 } 486 487 let initial_state_id = 488 *states 489 .get(&enfa.initial_state) 490 .ok_or(AutomatonError::dot_error( 491 "Unable to get initial state graph index!", 492 ))?; 493 494 let final_states = enfa 495 .final_states 496 .iter() 497 .map(|s| states.get(s).map(|&ix| (*s, ix))) 498 .collect::<Option<BTreeMap<u64, _>>>() 499 .ok_or(AutomatonError::dot_error( 500 "Unable to get the graph index of a final state!", 501 ))?; 502 503 Ok(Self::new( 504 graph, 505 (enfa.initial_state, initial_state_id), 506 final_states, 507 )) 508 } 509 }