automaton

An automaton library & basic programs written in Rust
git clone git://git.ethandl.dev/automaton
Log | Files | Refs | README

enfa.rs (18337B)


      1 use crate::{
      2     dfa::{self, DFA},
      3     nfa::{self, NFA},
      4     Automaton, AutomatonError, Dot, Encodable, FiniteStateAutomaton,
      5 };
      6 use core::fmt::Display;
      7 use petgraph::{Directed, Graph};
      8 use serde_json::{from_str, Value};
      9 use std::collections::{BTreeMap, HashMap, HashSet};
     10 
     11 #[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
     12 pub enum Transition {
     13     Char(char),
     14     Epsilon,
     15     Wildcard,
     16 }
     17 
     18 impl Display for Transition {
     19     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     20         match self {
     21             Transition::Char(c) => write!(f, "'{}'", c),
     22             Transition::Epsilon => write!(f, "ε"),
     23             Transition::Wildcard => write!(f, "•"),
     24         }
     25     }
     26 }
     27 
     28 impl From<nfa::Transition> for Transition {
     29     fn from(transition: nfa::Transition) -> Self {
     30         match transition {
     31             nfa::Transition::Char(c) => Self::Char(c),
     32             nfa::Transition::Wildcard => Self::Wildcard,
     33         }
     34     }
     35 }
     36 
     37 impl From<dfa::Transition> for Transition {
     38     fn from(transition: dfa::Transition) -> Self {
     39         match transition {
     40             dfa::Transition::Char(c) => Self::Char(c),
     41             dfa::Transition::Wildcard => Self::Wildcard,
     42         }
     43     }
     44 }
     45 
     46 impl TryFrom<Transition> for nfa::Transition {
     47     type Error = AutomatonError;
     48     fn try_from(transition: Transition) -> Result<Self, Self::Error> {
     49         match transition {
     50             Transition::Char(c) => Ok(nfa::Transition::Char(c)),
     51             Transition::Wildcard => Ok(nfa::Transition::Wildcard),
     52             Transition::Epsilon => Err(AutomatonError::conversion_error("Unable to convert ENFA epsilon transition to NFA transition, NFAs have no notion of an epsilon transition!"))
     53         }
     54     }
     55 }
     56 
     57 // ε-NFAs!
     58 #[derive(Clone, Debug, PartialEq)]
     59 pub struct ENFA {
     60     pub alphabet: HashSet<char>,
     61     states: HashSet<u64>,
     62     initial_state: u64,
     63     final_states: HashSet<u64>,
     64     transition_table: HashMap<(u64, Transition), Vec<u64>>,
     65 }
     66 
     67 impl ENFA {
     68     fn eclose(&self, state: u64, visited: Option<&mut HashSet<u64>>) -> HashSet<u64> {
     69         let mut reachable: HashSet<u64> = HashSet::new();
     70         let mut visited_set = HashSet::new(); // Lifetime hack. There has to be a better way
     71         let visited = visited.unwrap_or(&mut visited_set);
     72         reachable.insert(state);
     73         for state in self
     74             .transition_table
     75             .get(&(state, Transition::Epsilon))
     76             .unwrap_or(&vec![])
     77         {
     78             if !visited.contains(state) {
     79                 visited.insert(*state);
     80                 for state in self.eclose(*state, Some(visited)).iter() {
     81                     reachable.insert(*state);
     82                     visited.insert(*state);
     83                 }
     84             }
     85         }
     86         reachable
     87     }
     88 
     89     pub fn new(
     90         alphabet: HashSet<char>,
     91         states: HashSet<u64>,
     92         initial_state: u64,
     93         final_states: HashSet<u64>,
     94         transition_table: HashMap<(u64, Transition), Vec<u64>>,
     95     ) -> ENFA {
     96         ENFA {
     97             alphabet,
     98             states,
     99             initial_state,
    100             final_states,
    101             transition_table,
    102         }
    103     }
    104 
    105     pub fn as_nfa(&self) -> NFA {
    106         let preemptive_final_states: HashSet<u64> = self
    107             .states
    108             .iter()
    109             .filter(|&&starting_state| {
    110                 self.eclose(starting_state, None)
    111                     .into_iter()
    112                     .any(|epsilon_accessible| self.final_states.contains(&epsilon_accessible))
    113             })
    114             .map(u64::to_owned)
    115             .collect();
    116 
    117         let mut nfa_transition_table: HashMap<(u64, nfa::Transition), Vec<u64>> = HashMap::new();
    118 
    119         let mut enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new();
    120 
    121         for (from, transition) in self.transition_table.keys() {
    122             match enfa_state_accepted_input_map.get(from) {
    123                 None => {
    124                     let mut transitions = HashSet::new();
    125                     transitions.insert(*transition);
    126                     enfa_state_accepted_input_map.insert(*from, transitions);
    127                 }
    128                 Some(transitions) => {
    129                     let mut transitions = transitions.to_owned();
    130                     transitions.insert(*transition);
    131                     enfa_state_accepted_input_map.insert(*from, transitions);
    132                 }
    133             }
    134         }
    135 
    136         for &state in self.states.iter() {
    137             let eclose = self.eclose(state, None);
    138             for (enfa_from, transition) in eclose
    139                 .into_iter()
    140                 .flat_map(|s| {
    141                     enfa_state_accepted_input_map
    142                         .get(&s)
    143                         .map_or(HashSet::new(), HashSet::to_owned)
    144                         .into_iter()
    145                         .map(move |t| (s, t))
    146                 })
    147                 .filter_map(|(s, t)| nfa::Transition::try_from(t).map(|t| (s, t)).ok())
    148             {
    149                 let mut enfa_to = self
    150                     .transition_table
    151                     .get(&(enfa_from, Transition::from(transition)))
    152                     .map_or(vec![], Vec::to_owned);
    153                 let nfa_to = nfa_transition_table.get_mut(&(state, transition));
    154                 match nfa_to {
    155                     Some(v) => {
    156                         v.append(&mut enfa_to);
    157                     }
    158                     None => {
    159                         nfa_transition_table.insert((state, transition), enfa_to);
    160                     }
    161                 }
    162             }
    163         }
    164 
    165         let mut nfa_states: HashSet<u64> = nfa_transition_table
    166             .values()
    167             .flat_map(Vec::to_owned)
    168             .collect(); // Prune off the states without an entrypoint
    169         nfa_states.insert(self.initial_state); // The only state without an entrypoint
    170 
    171         let nfa_final_states: HashSet<u64> = preemptive_final_states
    172             .into_iter()
    173             .filter(|s| nfa_states.contains(s))
    174             .collect();
    175 
    176         // TODO: Maybe try re-numbering the states here before making the NFA so that they're sequential?
    177 
    178         NFA::new(
    179             self.alphabet.clone(),
    180             nfa_states,
    181             self.initial_state,
    182             nfa_final_states,
    183             nfa_transition_table,
    184         )
    185     }
    186 }
    187 
    188 impl Automaton for ENFA {
    189     // TODO: Check that this is all g
    190     fn accepts(&self, input: &str) -> bool {
    191         let mut current_states: HashSet<u64> =
    192             self.eclose(self.initial_state, None).into_iter().collect();
    193         let mut new_states: HashSet<u64> = HashSet::new();
    194 
    195         for c in input.chars() {
    196             for state in current_states.iter() {
    197                 for newstate in self
    198                     .transition_table
    199                     .get(&(*state, Transition::Char(c)))
    200                     // FIXME: This is ugly
    201                     .map_or(
    202                         self.transition_table
    203                             .get(&(*state, Transition::Wildcard))
    204                             .map_or(vec![], Vec::to_owned),
    205                         |v| {
    206                             let mut transitions = v.to_owned();
    207                             self.transition_table
    208                                 .get(&(*state, Transition::Wildcard))
    209                                 .map(|s| transitions.append(&mut s.clone()));
    210                             transitions
    211                         },
    212                     )
    213                     .iter()
    214                     .flat_map(|s| self.eclose(*s, None))
    215                 {
    216                     new_states.insert(newstate);
    217                 }
    218             }
    219             current_states = new_states;
    220             new_states = HashSet::new();
    221             if current_states.len() == 0 {
    222                 return false;
    223             }
    224         }
    225         current_states.iter().any(|s| self.final_states.contains(s))
    226     }
    227 }
    228 
    229 impl FiniteStateAutomaton for ENFA {
    230     fn as_dfa(&self) -> Result<DFA, AutomatonError> {
    231         let nfa = self.as_nfa();
    232         nfa.as_dfa()
    233     }
    234 
    235     fn from_dfa(dfa: crate::dfa::DFA) -> Result<Self, AutomatonError>
    236     where
    237         Self: Sized,
    238     {
    239         let transition_table = dfa
    240             .transition_table
    241             .iter()
    242             .map(|((from, input), to)| ((*from, Transition::from(*input)), vec![*to]))
    243             .collect();
    244         Ok(ENFA {
    245             alphabet: dfa.alphabet,
    246             states: dfa.states,
    247             initial_state: dfa.initial_state,
    248             final_states: dfa.final_states,
    249             transition_table,
    250         })
    251     }
    252 }
    253 
    254 impl Display for ENFA {
    255     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
    256         let mut transition_vec: Vec<Transition> =
    257             self.alphabet.iter().map(|&c| Transition::Char(c)).collect();
    258         let mut states_vec: Vec<u64> = self.states.iter().copied().collect();
    259         let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect();
    260 
    261         transition_vec.push(Transition::Epsilon);
    262         transition_vec.push(Transition::Wildcard);
    263         transition_vec.sort();
    264         states_vec.sort();
    265         final_states_vec.sort();
    266 
    267         let transition_str = format!("{:?}", transition_vec);
    268         let states_str = format!("{:?}", states_vec);
    269         let initial_state_str = self.initial_state.to_string();
    270         let final_states_str = format!("{:?}", final_states_vec);
    271 
    272         let transition_tbl_hdr = transition_vec
    273             .iter()
    274             .fold("\t|".to_owned(), |acc, &t| format!("{}\t{}\t|", acc, t));
    275 
    276         let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new();
    277         for &state in states_vec.iter() {
    278             let input_dests = transition_vec
    279                 .iter()
    280                 .map(|&transition| {
    281                     self.transition_table
    282                         .get(&(state, transition))
    283                         .and_then(|v| Some(v.to_owned()))
    284                 })
    285                 .collect();
    286             transition_map.insert(state, input_dests);
    287         }
    288 
    289         let transition_table_str =
    290             transition_map
    291                 .iter()
    292                 .fold(transition_tbl_hdr, |acc, (from, dest_groups)| {
    293                     let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| {
    294                         let dests_str = match dests {
    295                             None => "∅".to_owned(),
    296                             Some(v) => {
    297                                 v.iter()
    298                                     .enumerate()
    299                                     .fold("{".to_owned(), |acc, (index, &dest)| {
    300                                         let suffix = if index == v.len() - 1 { "}" } else { ", " };
    301                                         format!("{}{}{}", acc, dest, suffix)
    302                                     })
    303                             }
    304                         };
    305                         format!("{}{}\t|\t", acc, dests_str)
    306                     });
    307                     format!("{}\n{}\t|\t{}", acc, from, line_str)
    308                 });
    309         writeln!(f, "Alphabet: {}", transition_str)?;
    310         writeln!(f, "States: {}", states_str)?;
    311         writeln!(f, "Initial State: {}", initial_state_str)?;
    312         writeln!(f, "Final States: {}", final_states_str)?;
    313         writeln!(f, "Transition Table:\n{}", transition_table_str)?;
    314         let graphviz_repr: Result<Dot<Transition>, AutomatonError> = self.try_into();
    315         match graphviz_repr {
    316             Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s),
    317             Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e),
    318         }
    319     }
    320 }
    321 
    322 impl Encodable<&str> for ENFA {
    323     fn parse(json_str: &str) -> Result<Self, AutomatonError> {
    324         ENFA::parse(from_str::<Value>(json_str)?)
    325     }
    326 }
    327 
    328 impl Encodable<Value> for ENFA {
    329     fn parse(json: Value) -> Result<Self, AutomatonError> {
    330         let alphabet_vec: Vec<char> = json["alphabet"]
    331             .as_array()
    332             .ok_or(AutomatonError::conversion_error(
    333                 "Could not get the alphabet from the JSON",
    334             ))?
    335             .iter()
    336             .map(|v| v.to_string().chars().next())
    337             .collect::<Option<Vec<char>>>()
    338             .ok_or(AutomatonError::conversion_error(
    339                 "Could not convert the alphabet to UTF-8 characters",
    340             ))?;
    341 
    342         let alphabet_size = alphabet_vec.len();
    343 
    344         let alphabet: HashSet<char> = alphabet_vec.iter().map(char::to_owned).collect();
    345 
    346         let states: HashSet<u64> = json["states"]
    347             .as_array()
    348             .ok_or(AutomatonError::conversion_error(
    349                 "Could not get the states from the JSON",
    350             ))?
    351             .iter()
    352             .map(|v| v.as_u64())
    353             .collect::<Option<_>>()
    354             .ok_or(AutomatonError::conversion_error(
    355                 "Could not convert the states to u64",
    356             ))?;
    357 
    358         let initial_state: u64 =
    359             json["initial_state"]
    360                 .as_u64()
    361                 .ok_or(AutomatonError::conversion_error(
    362                     "Initial state incorrectly formatted",
    363                 ))?;
    364 
    365         let final_states: HashSet<u64> = json["final_states"]
    366             .as_array()
    367             .ok_or(AutomatonError::conversion_error(
    368                 "Could not get the final states from the JSON",
    369             ))?
    370             .iter()
    371             .map(|v| v.as_u64())
    372             .collect::<Option<_>>()
    373             .ok_or(AutomatonError::conversion_error(
    374                 "Could not convert the final states to u64",
    375             ))?;
    376 
    377         let mut transition_table: HashMap<(u64, Transition), Vec<u64>> = HashMap::new();
    378 
    379         for row in json["transition_table"]
    380             .as_array()
    381             .ok_or(AutomatonError::conversion_error(
    382                 "Unable to parse rows in transition table",
    383             ))?
    384             .iter()
    385         {
    386             let transitions = row
    387                 .as_array()
    388                 .ok_or(AutomatonError::conversion_error(
    389                     "Unable to get a row in the transition table",
    390                 ))?
    391                 .to_owned();
    392 
    393             let from: u64 = transitions
    394                 .get(0)
    395                 .ok_or(AutomatonError::conversion_error(
    396                     "Unable to get the \"from\" state of a row",
    397                 ))?
    398                 .as_u64()
    399                 .ok_or(AutomatonError::conversion_error(
    400                     "Unable to parse the \"from\" state of a row as a u64",
    401                 ))?;
    402 
    403             let dest_sets: Vec<(char, Vec<u64>)> = alphabet_vec
    404                 .iter()
    405                 .zip(
    406                     transitions
    407                         .iter()
    408                         .skip(1)
    409                         .take(alphabet_size)
    410                         .map(Value::as_array),
    411                 )
    412                 .map(|(input, states)| {
    413                     states.and_then(|vs| {
    414                         vs.iter()
    415                             .map(|v| v.as_u64())
    416                             .collect::<Option<_>>()
    417                             .and_then(|sts| Some((*input, sts)))
    418                     })
    419                 })
    420                 .collect::<Option<_>>()
    421                 .ok_or(AutomatonError::conversion_error(
    422                     "Unable to parse the transition destinations in the table",
    423                 ))?;
    424             let epsilon_transitions: Vec<Vec<u64>> = transitions
    425                 .iter()
    426                 .skip(alphabet_size + 1)
    427                 .map(Value::as_array)
    428                 .map(|states| {
    429                     states.and_then(|vs| vs.iter().map(Value::as_u64).collect::<Option<_>>())
    430                 })
    431                 .collect::<Option<_>>()
    432                 .ok_or(AutomatonError::conversion_error(
    433                     "Unable to parse the ε-closure destinations in the table",
    434                 ))?;
    435             for (input, dests) in dest_sets.iter() {
    436                 transition_table.insert((from, Transition::Char(*input)), dests.to_owned());
    437             }
    438             for transition in epsilon_transitions.iter() {
    439                 transition_table.insert((from, Transition::Epsilon), transition.to_owned());
    440             }
    441         }
    442 
    443         //panic!("Not yet implemented!");
    444         Ok(ENFA {
    445             alphabet,
    446             states,
    447             initial_state,
    448             final_states,
    449             transition_table,
    450         })
    451     }
    452 }
    453 
    454 impl TryFrom<&ENFA> for Dot<Transition> {
    455     type Error = AutomatonError;
    456     fn try_from(enfa: &ENFA) -> Result<Self, Self::Error> {
    457         let mut graph: Graph<u64, Transition, Directed> = Graph::new();
    458 
    459         let states: HashMap<u64, _> = enfa
    460             .states
    461             .iter()
    462             .map(|&s| (s, graph.add_node(s)))
    463             .collect();
    464 
    465         let mut alphabet: HashSet<Transition> =
    466             enfa.alphabet.iter().map(|&c| Transition::Char(c)).collect();
    467         alphabet.insert(Transition::Epsilon);
    468         alphabet.insert(Transition::Wildcard);
    469 
    470         for transition in alphabet {
    471             for (from_id, dest) in states
    472                 .iter()
    473                 .filter_map(|(&from, &from_id)| {
    474                     enfa.transition_table
    475                         .get(&(from, transition))
    476                         .map(move |dests| dests.iter().map(move |dest| (from_id, dest)))
    477                 })
    478                 .flatten()
    479             {
    480                 let dest_id = *states.get(dest).ok_or(AutomatonError::dot_error(
    481                     "The destination of a transition does not exist in the enfa states!",
    482                 ))?;
    483                 graph.add_edge(from_id, dest_id, transition);
    484             }
    485         }
    486 
    487         let initial_state_id =
    488             *states
    489                 .get(&enfa.initial_state)
    490                 .ok_or(AutomatonError::dot_error(
    491                     "Unable to get initial state graph index!",
    492                 ))?;
    493 
    494         let final_states = enfa
    495             .final_states
    496             .iter()
    497             .map(|s| states.get(s).map(|&ix| (*s, ix)))
    498             .collect::<Option<BTreeMap<u64, _>>>()
    499             .ok_or(AutomatonError::dot_error(
    500                 "Unable to get the graph index of a final state!",
    501             ))?;
    502 
    503         Ok(Self::new(
    504             graph,
    505             (enfa.initial_state, initial_state_id),
    506             final_states,
    507         ))
    508     }
    509 }