automaton

An automaton library written in Rust
Log | Files | Refs

commit 707d9a5186561bf10c2ca5b5b4f76494cdd992bd
parent 4d6869cf819d0ba58ae1df4a29dae9958fe2b9f8
Author: Ethan Long <ethandavidlong@gmail.com>
Date:   Fri, 23 Feb 2024 00:49:55 +1100

I think I've basically done it, yay!

Still need to write some more extensive tests and look for bugs and edge
cases. Exciting! I should remove the profanity comments now and also
work on visualisation!

Diffstat:
Msrc/lib/nfa.rs | 116++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 112 insertions(+), 4 deletions(-)

diff --git a/src/lib/nfa.rs b/src/lib/nfa.rs @@ -67,6 +67,16 @@ impl From<dfa::Transition> for Transition { } } +// FIXME: WTF IS WRONG WITH FROM AND INTO??? +impl From<Transition> for dfa::Transition { + fn from(transition: Transition) -> Self { + match transition { + Transition::Char(c) => dfa::Transition::Char(c), + Transition::Wildcard => dfa::Transition::Wildcard, + } + } +} + #[derive(Clone, Debug, PartialEq)] pub struct NFA { pub alphabet: HashSet<char>, @@ -138,6 +148,8 @@ impl Automaton for NFA { impl FiniteStateAutomaton for NFA { // FIXME: This is broken, very broken. + // I think the bug lies in the fact that the "empty set state"/trap state should be overridden by the wildcard transition (which can be the trap state, so basically always set it to the wildcard transition). + /* fn as_dfa(&self) -> Result<DFA, Box<dyn Error>> { let mut states = HashSet::new(); let initial_dfa_state = 0; @@ -158,7 +170,17 @@ impl FiniteStateAutomaton for NFA { .collect(); possible_inputs.push(dfa::Transition::Wildcard); - for &input in possible_inputs.iter() { + let wildcard_transitions: HashMap<u64, Vec<u64>> = self + .states + .iter() + .filter_map(|&s| { + self.transition_table + .get(&(s, Transition::Wildcard)) + .map(|dests| (s, dests.to_owned())) + }) + .collect(); + + for &input in self.alphabet.iter() { let mut nfa_sets_to_be_processed: VecDeque<BTreeSet<u64>> = VecDeque::new(); let mut processing_nfa_states = BTreeSet::from([self.initial_state]); let mut nfa_sets_processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new(); @@ -168,8 +190,15 @@ impl FiniteStateAutomaton for NFA { .iter() .map(|&nfa_state| { self.transition_table - .get(&(nfa_state, Transition::from(input))) - .map_or(vec![], Vec::to_owned) + .get(&(nfa_state, Transition::Char(input))) + .map_or_else( + || { + wildcard_transitions + .get(&nfa_state) + .map_or_else(|| vec![], Vec::to_owned) + }, + Vec::to_owned, + ) }) // This conversion to BTreeSet is shit .map(Vec::into_iter) @@ -206,7 +235,7 @@ impl FiniteStateAutomaton for NFA { nfa_sets_to_be_processed.push_back(nfa_dests); - transition_table.insert((dfa_from, input), dfa_to); + transition_table.insert((dfa_from, dfa::Transition::Char(input)), dfa_to); } } nfa_sets_processed.insert(processing_nfa_states); @@ -224,6 +253,85 @@ impl FiniteStateAutomaton for NFA { transition_table, }) } + */ + fn as_dfa(&self) -> Result<dfa::DFA, Box<dyn Error>> { + let mut states = HashSet::new(); + let initial_state = 0; + let mut current_state = initial_state; + let mut final_states = HashSet::new(); + let mut dfa_transition_to_nfa_sets: HashMap<(u64, dfa::Transition), BTreeSet<u64>> = + HashMap::new(); + + let mut to_be_processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new(); + let mut processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new(); + let mut nfa_set_to_dfa_state: HashMap<BTreeSet<u64>, u64> = HashMap::new(); + let mut dfa_state_to_nfa_set: HashMap<u64, BTreeSet<u64>> = HashMap::new(); + + to_be_processed.insert(BTreeSet::from([self.initial_state])); + + let mut nfa_alphabet: Vec<Transition> = + self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); + nfa_alphabet.push(Transition::Wildcard); + + while to_be_processed.len() != 0 { + let current_nfa_set = match to_be_processed.pop_first() { + Some(set) => set, + None => continue, + }; + if processed.contains(&current_nfa_set) { + continue; + } + + states.insert(current_state); + if current_nfa_set + .iter() + .any(|state| self.final_states.contains(state)) + { + final_states.insert(current_state); + } + + nfa_set_to_dfa_state.insert(current_nfa_set.clone(), current_state); + dfa_state_to_nfa_set.insert(current_state, current_nfa_set.clone()); + + for &transition in nfa_alphabet.iter() { + let nfa_dests: BTreeSet<u64> = current_nfa_set + .iter() + .filter_map(|&s| self.transition_table.get(&(s, transition))) + .flatten() + .map(u64::to_owned) + .collect(); + to_be_processed.insert(nfa_dests.clone()); + dfa_transition_to_nfa_sets.insert( + (current_state, dfa::Transition::from(transition)), + nfa_dests.clone(), + ); + } + + processed.insert(current_nfa_set); + current_state += 1; + } + + let transition_table: HashMap<(u64, dfa::Transition), u64> = dfa_transition_to_nfa_sets + .iter() + .map(|((from, transition), to)| { + Ok::<((u64, dfa::Transition), u64), NFAParseError>(( + (*from, *transition), + nfa_set_to_dfa_state + .get(to) + .map(u64::to_owned) + .ok_or(NFAParseError::error("cuz"))?, + )) + }) + .collect::<Result<HashMap<(u64, dfa::Transition), u64>, NFAParseError>>()?; // A monad is just a monoid in the category of endofunctors + + Ok(DFA { + alphabet: self.alphabet.clone(), + states, + initial_state, + final_states, + transition_table, + }) + } fn from_dfa(dfa: DFA) -> Result<Self, Box<dyn Error>> where