automaton

An automaton library & basic programs written in Rust
git clone git://git.ethandl.dev/automaton
Log | Files | Refs | README

nfa.rs (16563B)


      1 use crate::{
      2     dfa::{self, DFA},
      3     Automaton, AutomatonError, Dot, Encodable, FiniteStateAutomaton,
      4 };
      5 use core::fmt::Display;
      6 use petgraph::{Directed, Graph};
      7 use std::{
      8     collections::{BTreeMap, BTreeSet, HashMap, HashSet},
      9     fmt,
     10 };
     11 
     12 use serde_json::{from_str, Value};
     13 
     14 #[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
     15 pub enum Transition {
     16     Char(char),
     17     Wildcard,
     18 }
     19 
     20 impl Display for Transition {
     21     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
     22         match self {
     23             Transition::Char(c) => write!(f, "'{}'", c),
     24             Transition::Wildcard => write!(f, "•"),
     25         }
     26     }
     27 }
     28 
     29 // TODO: From doesn't seem to be 2-way... Maybe I'm using it wrong?
     30 impl From<dfa::Transition> for Transition {
     31     fn from(transition: dfa::Transition) -> Self {
     32         match transition {
     33             dfa::Transition::Char(c) => Transition::Char(c),
     34             dfa::Transition::Wildcard => Transition::Wildcard,
     35         }
     36     }
     37 }
     38 
     39 impl From<Transition> for dfa::Transition {
     40     fn from(transition: Transition) -> Self {
     41         match transition {
     42             Transition::Char(c) => dfa::Transition::Char(c),
     43             Transition::Wildcard => dfa::Transition::Wildcard,
     44         }
     45     }
     46 }
     47 
     48 #[derive(Clone, Debug, PartialEq)]
     49 pub struct NFA {
     50     pub alphabet: HashSet<char>,
     51     states: HashSet<u64>,
     52     initial_state: u64,
     53     final_states: HashSet<u64>,
     54     transition_table: HashMap<(u64, Transition), Vec<u64>>,
     55 }
     56 
     57 impl NFA {
     58     pub fn new(
     59         alphabet: HashSet<char>,
     60         states: HashSet<u64>,
     61         initial_state: u64,
     62         final_states: HashSet<u64>,
     63         transition_table: HashMap<(u64, Transition), Vec<u64>>,
     64     ) -> Self {
     65         NFA {
     66             alphabet,
     67             states,
     68             initial_state,
     69             final_states,
     70             transition_table,
     71         }
     72     }
     73 }
     74 
     75 impl Automaton for NFA {
     76     fn accepts(&self, input: &str) -> bool {
     77         let mut current_states: Vec<u64> = vec![self.initial_state];
     78         let mut new_states: Vec<u64> = Vec::new();
     79         for c in input.chars() {
     80             for i in 0..current_states.len() {
     81                 match current_states.get(i).map_or(None, |&s| {
     82                     self.transition_table.get(&(s, Transition::Char(c))).map_or(
     83                         self.transition_table
     84                             .get(&(s, Transition::Wildcard))
     85                             .and_then(|s| Some(s.to_owned())),
     86                         |v| Some(v.to_owned()),
     87                     )
     88                 }) {
     89                     Some(states) => {
     90                         new_states.append(&mut states.clone());
     91                     }
     92                     None => (),
     93                 }
     94             }
     95             current_states = new_states;
     96             new_states = Vec::new();
     97             if current_states.len() == 0 {
     98                 return false;
     99             }
    100         }
    101         current_states.iter().any(|s| self.final_states.contains(s))
    102     }
    103 }
    104 
    105 impl FiniteStateAutomaton for NFA {
    106     fn as_dfa(&self) -> Result<dfa::DFA, AutomatonError> {
    107         let mut states = HashSet::new();
    108         let initial_state = 0;
    109         let mut current_state = initial_state;
    110         let mut final_states = HashSet::new();
    111         let mut dfa_transition_to_nfa_sets: HashMap<(u64, dfa::Transition), BTreeSet<u64>> =
    112             HashMap::new();
    113 
    114         let mut to_be_processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new();
    115         let mut processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new();
    116         let mut nfa_set_to_dfa_state: HashMap<BTreeSet<u64>, u64> = HashMap::new();
    117         let mut dfa_state_to_nfa_set: HashMap<u64, BTreeSet<u64>> = HashMap::new();
    118 
    119         to_be_processed.insert(BTreeSet::from([self.initial_state]));
    120 
    121         let mut nfa_alphabet: Vec<Transition> =
    122             self.alphabet.iter().map(|&c| Transition::Char(c)).collect();
    123         nfa_alphabet.push(Transition::Wildcard);
    124 
    125         while to_be_processed.len() != 0 {
    126             let current_nfa_set = match to_be_processed.pop_first() {
    127                 Some(set) => set,
    128                 None => continue,
    129             };
    130             if processed.contains(&current_nfa_set) {
    131                 continue;
    132             }
    133 
    134             states.insert(current_state);
    135             if current_nfa_set
    136                 .iter()
    137                 .any(|state| self.final_states.contains(state))
    138             {
    139                 final_states.insert(current_state);
    140             }
    141 
    142             nfa_set_to_dfa_state.insert(current_nfa_set.clone(), current_state);
    143             dfa_state_to_nfa_set.insert(current_state, current_nfa_set.clone());
    144 
    145             for &transition in nfa_alphabet.iter() {
    146                 let nfa_dests: BTreeSet<u64> = current_nfa_set
    147                     .iter()
    148                     .filter_map(|&s| {
    149                         // We need to take into account the wildcard transition dests and take the union of them with our transition dests (determinism)
    150                         let wildcard_transitions =
    151                             self.transition_table.get(&(s, Transition::Wildcard));
    152                         if transition == Transition::Wildcard {
    153                             wildcard_transitions.map(Vec::to_owned)
    154                         } else {
    155                             match wildcard_transitions {
    156                                 Some(wcs) => {
    157                                     let regular_dests = self
    158                                         .transition_table
    159                                         .get(&(s, transition))
    160                                         .map_or([].iter(), |dests| dests.into_iter());
    161                                     let mut union = wcs.to_owned();
    162                                     union.extend(regular_dests);
    163                                     Some(union)
    164                                 }
    165                                 None => self
    166                                     .transition_table
    167                                     .get(&(s, transition))
    168                                     .map(Vec::to_owned),
    169                             }
    170                         }
    171                     })
    172                     .flatten()
    173                     .collect();
    174                 to_be_processed.insert(nfa_dests.clone());
    175                 dfa_transition_to_nfa_sets.insert(
    176                     (current_state, dfa::Transition::from(transition)),
    177                     nfa_dests.clone(),
    178                 );
    179             }
    180 
    181             processed.insert(current_nfa_set);
    182             current_state += 1;
    183         }
    184 
    185         let transition_table: HashMap<(u64, dfa::Transition), u64> = dfa_transition_to_nfa_sets
    186             .iter()
    187             .map(|((from, transition), to)| {
    188                 Ok::<((u64, dfa::Transition), u64), AutomatonError>((
    189                     (*from, *transition),
    190                     nfa_set_to_dfa_state.get(to).map(u64::to_owned).ok_or(
    191                         AutomatonError::conversion_error(
    192                             "Unable to get the DFA state from a set of NFA states!",
    193                         ),
    194                     )?,
    195                 ))
    196             })
    197             .collect::<Result<HashMap<(u64, dfa::Transition), u64>, AutomatonError>>()?; // A monad is just a monoid in the category of endofunctors
    198 
    199         Ok(DFA {
    200             alphabet: self.alphabet.clone(),
    201             states,
    202             initial_state,
    203             final_states,
    204             transition_table,
    205         })
    206     }
    207 
    208     fn from_dfa(dfa: DFA) -> Result<Self, AutomatonError>
    209     where
    210         Self: Sized,
    211     {
    212         let transition_table = dfa
    213             .transition_table
    214             .iter()
    215             .map(|(&(from, input), &to)| ((from, Transition::from(input)), vec![to]))
    216             .collect();
    217         Ok(NFA {
    218             alphabet: dfa.alphabet,
    219             states: dfa.states,
    220             initial_state: dfa.initial_state,
    221             final_states: dfa.final_states,
    222             transition_table,
    223         })
    224     }
    225 }
    226 
    227 impl Display for NFA {
    228     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
    229         let mut transition_vec: Vec<Transition> =
    230             self.alphabet.iter().map(|&c| Transition::Char(c)).collect();
    231         let mut states_vec: Vec<u64> = self.states.iter().copied().collect();
    232         let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect();
    233 
    234         transition_vec.push(Transition::Wildcard);
    235         transition_vec.sort();
    236         states_vec.sort();
    237         final_states_vec.sort();
    238 
    239         let transition_str = format!("{:?}", transition_vec);
    240         let states_str = format!("{:?}", states_vec);
    241         let initial_state_str = self.initial_state.to_string();
    242         let final_states_str = format!("{:?}", final_states_vec);
    243 
    244         let transition_tbl_hdr = transition_vec
    245             .iter()
    246             .fold("\t|".to_owned(), |acc, &t| format!("{}\t{}\t|", acc, t));
    247 
    248         let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new();
    249         for &state in states_vec.iter() {
    250             let input_dests = transition_vec
    251                 .iter()
    252                 .map(|&transition| {
    253                     self.transition_table
    254                         .get(&(state, transition))
    255                         .and_then(|v| Some(v.to_owned()))
    256                 })
    257                 .collect();
    258             transition_map.insert(state, input_dests);
    259         }
    260 
    261         let transition_table_str =
    262             transition_map
    263                 .iter()
    264                 .fold(transition_tbl_hdr, |acc, (from, dest_groups)| {
    265                     let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| {
    266                         let dests_str = match dests {
    267                             None => "∅".to_owned(),
    268                             Some(v) => {
    269                                 v.iter()
    270                                     .enumerate()
    271                                     .fold("{".to_owned(), |acc, (index, &dest)| {
    272                                         let suffix = if index == v.len() - 1 { "}" } else { ", " };
    273                                         format!("{}{}{}", acc, dest, suffix)
    274                                     })
    275                             }
    276                         };
    277                         format!("{}{}\t|\t", acc, dests_str)
    278                     });
    279                     format!("{}\n{}\t|\t{}", acc, from, line_str)
    280                 });
    281         writeln!(f, "Alphabet: {}", transition_str)?;
    282         writeln!(f, "States: {}", states_str)?;
    283         writeln!(f, "Initial State: {}", initial_state_str)?;
    284         writeln!(f, "Final States: {}", final_states_str)?;
    285         writeln!(f, "Transition Table:\n{}", transition_table_str)?;
    286         let graphviz_repr: Result<Dot<Transition>, AutomatonError> = self.try_into();
    287         match graphviz_repr {
    288             Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s),
    289             Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e),
    290         }
    291     }
    292 }
    293 
    294 impl Encodable<&str> for NFA {
    295     fn parse(json_str: &str) -> Result<Self, AutomatonError> {
    296         NFA::parse(from_str::<Value>(json_str)?)
    297     }
    298 }
    299 
    300 impl Encodable<Value> for NFA {
    301     fn parse(json: Value) -> Result<Self, AutomatonError> {
    302         let alphabet_vec: Vec<char> = json["alphabet"]
    303             .as_array()
    304             .ok_or(AutomatonError::parse_error(
    305                 "Could not get the alphabet from the JSON",
    306             ))?
    307             .iter()
    308             .map(|v| v.to_string().chars().next())
    309             .collect::<Option<Vec<char>>>()
    310             .ok_or(AutomatonError::parse_error(
    311                 "Could not convert the alphabet to UTF-8 characters",
    312             ))?;
    313 
    314         let alphabet: HashSet<char> = alphabet_vec.iter().map(char::to_owned).collect();
    315 
    316         let states: HashSet<u64> = json["states"]
    317             .as_array()
    318             .ok_or(AutomatonError::parse_error(
    319                 "Could not get the states from the JSON",
    320             ))?
    321             .iter()
    322             .map(|v| v.as_u64())
    323             .collect::<Option<_>>()
    324             .ok_or(AutomatonError::parse_error(
    325                 "Could not convert the states to u64",
    326             ))?;
    327 
    328         let initial_state: u64 =
    329             json["initial_state"]
    330                 .as_u64()
    331                 .ok_or(AutomatonError::parse_error(
    332                     "Initial state incorrectly formatted",
    333                 ))?;
    334 
    335         let final_states: HashSet<u64> = json["final_states"]
    336             .as_array()
    337             .ok_or(AutomatonError::parse_error(
    338                 "Could not get the final states from the JSON",
    339             ))?
    340             .iter()
    341             .map(|v| v.as_u64())
    342             .collect::<Option<_>>()
    343             .ok_or(AutomatonError::parse_error(
    344                 "Could not convert the final states to u64",
    345             ))?;
    346 
    347         let mut transition_table: HashMap<(u64, Transition), Vec<u64>> = HashMap::new();
    348         for row in json["transition_table"]
    349             .as_array()
    350             .ok_or(AutomatonError::parse_error(
    351                 "Unable to parse rows in transition table",
    352             ))?
    353             .iter()
    354         {
    355             let transitions = row
    356                 .as_array()
    357                 .ok_or(AutomatonError::parse_error(
    358                     "Unable to get a row in the transition table",
    359                 ))?
    360                 .to_owned();
    361 
    362             let from: u64 = transitions
    363                 .get(0)
    364                 .ok_or(AutomatonError::parse_error(
    365                     "Unable to get the \"from\" state of a row",
    366                 ))?
    367                 .as_u64()
    368                 .ok_or(AutomatonError::parse_error(
    369                     "Unable to parse the \"from\" state of a row as a u64",
    370                 ))?;
    371 
    372             let dest_sets: Vec<(char, Vec<u64>)> = alphabet_vec
    373                 .iter()
    374                 .zip(transitions.iter().skip(1).map(Value::as_array))
    375                 .map(|(input, states)| {
    376                     states.and_then(|vs| {
    377                         vs.iter()
    378                             .map(|v| v.as_u64())
    379                             .collect::<Option<_>>()
    380                             .and_then(|sts| Some((*input, sts)))
    381                     })
    382                 })
    383                 .collect::<Option<_>>()
    384                 .ok_or(AutomatonError::parse_error(
    385                     "Unable to parse the transition destinations in the table",
    386                 ))?;
    387             for (input, dests) in dest_sets.iter() {
    388                 transition_table.insert((from, Transition::Char(*input)), dests.to_owned());
    389             }
    390         }
    391 
    392         Ok(NFA {
    393             alphabet,
    394             states,
    395             initial_state,
    396             final_states,
    397             transition_table,
    398         })
    399     }
    400 }
    401 
    402 impl TryFrom<&NFA> for Dot<Transition> {
    403     type Error = AutomatonError;
    404     fn try_from(nfa: &NFA) -> Result<Self, Self::Error> {
    405         let mut graph: Graph<u64, Transition, Directed> = Graph::new();
    406 
    407         let states: HashMap<u64, _> = nfa.states.iter().map(|&s| (s, graph.add_node(s))).collect();
    408 
    409         let mut alphabet: HashSet<Transition> =
    410             nfa.alphabet.iter().map(|&c| Transition::Char(c)).collect();
    411         alphabet.insert(Transition::Wildcard);
    412 
    413         for transition in alphabet {
    414             for (from_id, dest) in states
    415                 .iter()
    416                 .filter_map(|(&from, &from_id)| {
    417                     nfa.transition_table
    418                         .get(&(from, transition))
    419                         .map(move |dests| dests.iter().map(move |dest| (from_id, dest)))
    420                 })
    421                 .flatten()
    422             {
    423                 let dest_id = *states.get(dest).ok_or(AutomatonError::dot_error(
    424                     "The destination of a transition does not exist in the nfa states!",
    425                 ))?;
    426                 graph.add_edge(from_id, dest_id, transition);
    427             }
    428         }
    429 
    430         let initial_state_id = *states
    431             .get(&nfa.initial_state)
    432             .ok_or(AutomatonError::dot_error(
    433                 "Unable to get initial state graph index!",
    434             ))?;
    435 
    436         let final_states = nfa
    437             .final_states
    438             .iter()
    439             .map(|s| states.get(s).map(|&ix| (*s, ix)))
    440             .collect::<Option<BTreeMap<u64, _>>>()
    441             .ok_or(AutomatonError::dot_error(
    442                 "Unable to get the graph index of a final state!",
    443             ))?;
    444 
    445         Ok(Self::new(
    446             graph,
    447             (nfa.initial_state, initial_state_id),
    448             final_states,
    449         ))
    450     }
    451 }