automaton

An automaton library written in Rust
Log | Files | Refs

commit 4d6869cf819d0ba58ae1df4a29dae9958fe2b9f8
parent c069a2bea6e0514c55a12cc86c439f6cd0fa32b0
Author: Ethan Long <ethandavidlong@gmail.com>
Date:   Thu, 22 Feb 2024 20:44:55 +1100

Fixed a lot of bugs, nearly working!

Currently is parsing a regex through a recursive descent parser (I
think it's an RDP at least).
The parsing is then:
Regex->epsilon-NFA (Graph)->epsilon-NFA (transition table)->NFA->DFA

The NFA->DFA step is the part that I'm stuck on now, and then I will
have a deterministic regex engine!

Diffstat:
MCargo.lock | 16++++++++--------
Msrc/lib/automaton.rs | 14++++++++++++++
Msrc/lib/dfa.rs | 154++++++++++++++++++++++++++++++++++++++-----------------------------------------
Msrc/lib/enfa.rs | 215++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Msrc/lib/graph_enfa.rs | 19++++++++++++++++---
Msrc/lib/nfa.rs | 170++++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/lib/regex.rs | 6+++---
7 files changed, 314 insertions(+), 280 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -72,9 +72,9 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "libc" -version = "0.2.150" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "petgraph" @@ -94,18 +94,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -179,9 +179,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" dependencies = [ "proc-macro2", "quote", diff --git a/src/lib/automaton.rs b/src/lib/automaton.rs @@ -1,3 +1,4 @@ +use petgraph::{dot::Dot, Directed, Graph}; use std::error::Error; use std::fmt::Display; @@ -31,6 +32,19 @@ where fn parse(encoding: T) -> Result<Self, Box<dyn Error>>; } +pub fn to_graphviz_dot<T, U, V>(automaton: &T) -> Result<String, Box<dyn Error>> +where + T: TryInto<Graph<U, V, Directed>>, + T: Clone, + U: std::fmt::Debug, + V: std::fmt::Debug, + <T as TryInto<Graph<U, V, Directed>>>::Error: std::error::Error, + <T as TryInto<Graph<U, V, Directed>>>::Error: 'static, +{ + let graph: Graph<U, V, Directed> = automaton.clone().try_into()?; + Ok(format!("{:?}", Dot::new(&graph))) +} + #[cfg(test)] mod tests { mod testlib { diff --git a/src/lib/dfa.rs b/src/lib/dfa.rs @@ -1,4 +1,5 @@ use core::fmt::Display; +use petgraph::{Directed, Graph}; use std::{ collections::{HashMap, HashSet}, error::Error, @@ -6,11 +7,11 @@ use std::{ panic::Location, }; -use crate::{Automaton, Encodable, FiniteStateAutomaton}; +use crate::{to_graphviz_dot, Automaton, Encodable, FiniteStateAutomaton}; use serde_json::{from_str, Value}; #[derive(Debug)] -struct DFAParseError { +pub struct DFAParseError { reason: String, } @@ -53,7 +54,7 @@ impl DFAParseError { impl Error for DFAParseError {} -#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] pub enum Transition { Char(char), Wildcard, @@ -124,99 +125,64 @@ impl FiniteStateAutomaton for DFA { impl Display for DFA { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut alphabet_vec: Vec<char> = self.alphabet.iter().copied().collect(); + let mut transition_vec: Vec<Transition> = + self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); let mut states_vec: Vec<u64> = self.states.iter().copied().collect(); let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect(); - alphabet_vec.sort(); + transition_vec.push(Transition::Wildcard); + transition_vec.sort(); states_vec.sort(); final_states_vec.sort(); - let alphabet_str: String = - alphabet_vec - .iter() - .enumerate() - .fold("[".to_owned(), |st, (index, c)| { - let ending = if index == self.alphabet.len() - 1 { - "]" - } else { - ", " - }; - format!("{}{}{}", st, c, ending) - }); - - let states_str: String = - states_vec - .iter() - .enumerate() - .fold("[".to_owned(), |st, (index, n)| { - let ending = if index == self.states.len() - 1 { - "]" - } else { - ", " - }; - format!("{}{}{}", st, n, ending) - }); - - let initial_state_str: String = self.initial_state.to_string(); + let transition_str = format!("{:?}", transition_vec); + let states_str = format!("{:?}", states_vec); + let initial_state_str = self.initial_state.to_string(); + let final_states_str = format!("{:?}", final_states_vec); - let final_states_str: String = - final_states_vec - .iter() - .enumerate() - .fold("[".to_owned(), |st, (index, n)| { - let ending = if index == self.final_states.len() - 1 { - "]" - } else { - ", " - }; - format!("{}{}{}", st, n, ending) - }); - - let index_map: HashMap<Transition, usize> = alphabet_vec + let transition_tbl_hdr = transition_vec .iter() - .enumerate() - .map(|(index, &char)| (Transition::Char(char), index)) - .collect(); - - let mut transition_table_hdr: String = alphabet_vec - .iter() - .map(|c| format!("\t{}\t|", c)) - .fold("\t|".to_owned(), |st, substr| format!("{}{}", st, substr)); - transition_table_hdr.push_str("\tWildcard"); - - // FIXME: I Think this won't work xD - let mut transition_map: HashMap<u64, Vec<u64>> = HashMap::new(); - - for ((from, input), to) in self.transition_table.iter() { - let mut new_vec: Vec<u64> = transition_map - .get(from) - .unwrap_or(&vec![0 as u64; self.alphabet.len()]) - .to_owned(); - let new_index = index_map.get(&input).ok_or(fmt::Error)?; - new_vec[*new_index] = to.to_owned(); - transition_map.insert(from.to_owned(), new_vec); + .fold("\t|".to_owned(), |acc, &t| match t { + Transition::Char(c) => format!("{}\t'{}'\t|", acc, c), + Transition::Wildcard => format!("{}\t.\t|", acc), + }); + + let mut transition_map: HashMap<u64, Vec<Option<u64>>> = HashMap::new(); + for &state in states_vec.iter() { + let input_dests = transition_vec + .iter() + .map(|&transition| { + self.transition_table + .get(&(state, transition)) + .and_then(|v| Some(v.to_owned())) + }) + .collect(); + transition_map.insert(state, input_dests); } - let transition_table_str: String = + let transition_table_str = transition_map .iter() - .fold(transition_table_hdr, |st, (&from, states)| { - let dests = states - .into_iter() - .fold("".to_owned(), |st, dst| format!("{}{}\t|\t", st, dst)); - let wildcard = self - .transition_table - .get(&(from, Transition::Wildcard)) - .map_or("∅".to_owned(), |&dest| dest.to_string()); - format!("{}\n{}\t|\t{}{}", st, from, dests, wildcard) + .fold(transition_tbl_hdr, |acc, (from, dest_groups)| { + let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| { + let dest_str = match dests { + None => "∅".to_owned(), + Some(v) => v.to_string(), + }; + format!("{}{}\t|\t", acc, dest_str) + }); + format!("{}\n{}\t|\t{}", acc, from, line_str) }); - - writeln!(f, "Alphabet: {}", alphabet_str)?; + writeln!(f, "Alphabet: {}", transition_str)?; writeln!(f, "States: {}", states_str)?; writeln!(f, "Initial State: {}", initial_state_str)?; writeln!(f, "Final States: {}", final_states_str)?; - writeln!(f, "Transition Table:\n{}", transition_table_str) + writeln!(f, "Transition Table:\n{}", transition_table_str)?; + let graphviz_repr = to_graphviz_dot(self); + match graphviz_repr { + Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s), + Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e), + } } } @@ -320,3 +286,31 @@ impl Encodable<Value> for DFA { }) } } + +impl TryFrom<DFA> for Graph<u64, Transition, Directed> { + type Error = DFAParseError; + fn try_from(dfa: DFA) -> Result<Self, Self::Error> { + let mut graph: Graph<u64, Transition, Directed> = Graph::new(); + + let states: HashMap<u64, _> = dfa.states.iter().map(|&s| (s, graph.add_node(s))).collect(); + + let mut alphabet: HashSet<Transition> = + dfa.alphabet.iter().map(|&c| Transition::Char(c)).collect(); + alphabet.insert(Transition::Wildcard); + + for transition in alphabet { + for (from_id, dest) in states.iter().filter_map(|(&from, &from_id)| { + dfa.transition_table + .get(&(from, transition)) + .map(|dest| (from_id, dest)) + }) { + let dest_id = *states.get(dest).ok_or(DFAParseError::error( + "The destination of a transition does not exist in the dfa states!", + ))?; + graph.add_edge(from_id, dest_id, transition); + } + } + + Ok(graph) + } +} diff --git a/src/lib/enfa.rs b/src/lib/enfa.rs @@ -1,9 +1,10 @@ use crate::{ dfa::{self, DFA}, nfa::{self, NFA}, - Automaton, Encodable, FiniteStateAutomaton, + to_graphviz_dot, Automaton, Encodable, FiniteStateAutomaton, }; use core::fmt::Display; +use petgraph::{Directed, Graph}; use serde_json::{from_str, Value}; use std::{ collections::{HashMap, HashSet}, @@ -50,11 +51,11 @@ impl ENFAParseError { impl Error for ENFAParseError {} -#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] pub enum Transition { Char(char), - Wildcard, Epsilon, + Wildcard, } impl From<nfa::Transition> for Transition { @@ -134,8 +135,8 @@ impl ENFA { } } + // FIXME: There are states with no entrypoint here! Prune time pub fn as_nfa(&self) -> NFA { - // TODO: Implement this properly let nfa_final_states = self .states .iter() @@ -149,13 +150,21 @@ impl ENFA { let mut nfa_transition_table: HashMap<(u64, nfa::Transition), Vec<u64>> = HashMap::new(); - let enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new(); + let mut enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new(); for (from, transition) in self.transition_table.keys() { - let mut transitions = enfa_state_accepted_input_map - .get(from) - .map_or(HashSet::new(), HashSet::to_owned); - transitions.insert(*transition); + match enfa_state_accepted_input_map.get(from) { + None => { + let mut transitions = HashSet::new(); + transitions.insert(*transition); + enfa_state_accepted_input_map.insert(*from, transitions); + } + Some(transitions) => { + let mut transitions = transitions.to_owned(); + transitions.insert(*transition); + enfa_state_accepted_input_map.insert(*from, transitions); + } + } } for &state in self.states.iter() { @@ -177,7 +186,9 @@ impl ENFA { .map_or(vec![], Vec::to_owned); let nfa_to = nfa_transition_table.get_mut(&(state, transition)); match nfa_to { - Some(v) => v.append(&mut enfa_to), + Some(v) => { + v.append(&mut enfa_to); + } None => { nfa_transition_table.insert((state, transition), enfa_to); } @@ -185,9 +196,17 @@ impl ENFA { } } + let mut nfa_states: HashSet<u64> = nfa_transition_table + .values() + .flat_map(Vec::to_owned) + .collect(); // Prune off the states without an entrypoint + nfa_states.insert(self.initial_state); // The only state without an entrypoint + + // TODO: Maybe try re-numbering the states here before making the NFA so that they're sequential? + NFA::new( self.alphabet.clone(), - self.states.clone(), + nfa_states, self.initial_state, nfa_final_states, nfa_transition_table, @@ -250,7 +269,7 @@ impl Automaton for ENFA { impl FiniteStateAutomaton for ENFA { fn as_dfa(&self) -> Result<DFA, Box<dyn std::error::Error>> { let nfa = self.as_nfa(); - eprintln!("HOLY FUCKERONI IS THE NFA RIGHT? {}", nfa); + eprintln!("HOLY FUCKERONI IS THE NFA RIGHT?\n{}", nfa); nfa.as_dfa() } @@ -275,116 +294,73 @@ impl FiniteStateAutomaton for ENFA { impl Display for ENFA { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut alphabet_vec: Vec<char> = self.alphabet.iter().copied().collect(); + let mut transition_vec: Vec<Transition> = + self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); let mut states_vec: Vec<u64> = self.states.iter().copied().collect(); let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect(); - alphabet_vec.sort(); + transition_vec.push(Transition::Epsilon); + transition_vec.push(Transition::Wildcard); + transition_vec.sort(); states_vec.sort(); final_states_vec.sort(); - let alphabet_str = - alphabet_vec - .iter() - .enumerate() - .fold("[".to_owned(), |str, (index, c)| { - let ending = if index == self.alphabet.len() - 1 { - "]" - } else { - ", " - }; - format!("{str}{c}{ending}") - }); + let transition_str = format!("{:?}", transition_vec); + let states_str = format!("{:?}", states_vec); + let initial_state_str = self.initial_state.to_string(); + let final_states_str = format!("{:?}", final_states_vec); - let states_str = states_vec + let transition_tbl_hdr = transition_vec .iter() - .enumerate() - .fold("[".to_owned(), |str, (index, c)| { - let ending = if index == self.states.len() - 1 { - "]" - } else { - ", " - }; - format!("{str}{c}{ending}") + .fold("\t|".to_owned(), |acc, &t| match t { + Transition::Char(c) => format!("{}\t'{}'\t|", acc, c), + Transition::Epsilon => format!("{}\tε\t|", acc), + Transition::Wildcard => format!("{}\t.\t|", acc), }); - let initial_state_str: String = self.initial_state.to_string(); - - let final_states_str = - final_states_vec + let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new(); + for &state in states_vec.iter() { + let input_dests = transition_vec .iter() - .enumerate() - .fold("[".to_owned(), |str, (index, c)| { - let ending = if index == self.final_states.len() - 1 { - "]" - } else { - ", " - }; - format!("{str}{c}{ending}") - }); - - let mut transition_table_hdr: String = alphabet_vec - .iter() - .map(|c| format!("\t{c}\t|")) - .fold("\t|".to_owned(), |str, substr| format!("{str}{substr}")); - transition_table_hdr.push_str("\tε\t|"); - transition_table_hdr.push_str("\tWildcard\t|"); - - let mut transition_map: HashMap<u64, Vec<Vec<u64>>> = HashMap::new(); - - let index_map: HashMap<char, usize> = alphabet_vec - .iter() - .enumerate() - .map(|(index, char)| (char.to_owned(), index)) - .collect(); - - for ((from, input), to) in self.transition_table.iter() { - let mut new_vec: Vec<Vec<u64>> = transition_map - .get(from) - .unwrap_or(&vec![Vec::new(); self.alphabet.len() + 1]) - .to_owned(); - match input { - Transition::Char(char) => { - let new_index = index_map.get(char).ok_or(core::fmt::Error)?; - new_vec[*new_index] = to.to_owned(); - } - _ => { - new_vec[self.alphabet.len()] = to.to_owned(); - } - } - transition_map.insert(from.to_owned(), new_vec); + .map(|&transition| { + self.transition_table + .get(&(state, transition)) + .and_then(|v| Some(v.to_owned())) + }) + .collect(); + transition_map.insert(state, input_dests); } let transition_table_str = transition_map .iter() - .fold(transition_table_hdr, |acc, (input, state_sets)| { - let dests = state_sets.into_iter().fold("".to_owned(), |acc, dsts| { - let dest_sets_str: String = if dsts.len() == 0 { - "∅".to_owned() - } else { - dsts.iter() - .enumerate() - .fold("{".to_owned(), |acc, (index, dst)| { - let end = if index == dsts.len() - 1 { "}" } else { ", " }; - format!("{}{}{}", acc, dst, end) - }) + .fold(transition_tbl_hdr, |acc, (from, dest_groups)| { + let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| { + let dests_str = match dests { + None => "∅".to_owned(), + Some(v) => { + v.iter() + .enumerate() + .fold("{".to_owned(), |acc, (index, &dest)| { + let suffix = if index == v.len() - 1 { "}" } else { ", " }; + format!("{}{}{}", acc, dest, suffix) + }) + } }; - format!("{}{}\t|\t", acc, dest_sets_str) + format!("{}{}\t|\t", acc, dests_str) }); - // TODO: Check that this is correct - let wildcard_map: String = self - .transition_table - .get(&(*input, Transition::Wildcard)) - .map_or("∅".to_owned(), |dest| format!("{:?}", dest)); - format!("{}\n{}\t|\t{}\t|\t{}", acc, input, dests, wildcard_map) + format!("{}\n{}\t|\t{}", acc, from, line_str) }); - - writeln!(f, "Alphabet: {}", alphabet_str)?; + writeln!(f, "Alphabet: {}", transition_str)?; writeln!(f, "States: {}", states_str)?; writeln!(f, "Initial State: {}", initial_state_str)?; - writeln!(f, "Final State: {}", final_states_str)?; - writeln!(f, "Transition Table:\n{}", transition_table_str) + writeln!(f, "Final States: {}", final_states_str)?; + writeln!(f, "Transition Table:\n{}", transition_table_str)?; + let graphviz_repr = to_graphviz_dot(self); + match graphviz_repr { + Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s), + Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e), + } } } @@ -514,3 +490,40 @@ impl Encodable<Value> for ENFA { }) } } + +impl TryFrom<ENFA> for Graph<u64, Transition, Directed> { + type Error = ENFAParseError; + fn try_from(enfa: ENFA) -> Result<Self, Self::Error> { + let mut graph: Graph<u64, Transition, Directed> = Graph::new(); + + let states: HashMap<u64, _> = enfa + .states + .iter() + .map(|&s| (s, graph.add_node(s))) + .collect(); + + let mut alphabet: HashSet<Transition> = + enfa.alphabet.iter().map(|&c| Transition::Char(c)).collect(); + alphabet.insert(Transition::Epsilon); + alphabet.insert(Transition::Wildcard); + + for transition in alphabet { + for (from_id, dest) in states + .iter() + .filter_map(|(&from, &from_id)| { + enfa.transition_table + .get(&(from, transition)) + .map(move |dests| dests.iter().map(move |dest| (from_id, dest))) + }) + .flatten() + { + let dest_id = *states.get(dest).ok_or(ENFAParseError::error( + "The destination of a transition does not exist in the enfa states!", + ))?; + graph.add_edge(from_id, dest_id, transition); + } + } + + Ok(graph) + } +} diff --git a/src/lib/graph_enfa.rs b/src/lib/graph_enfa.rs @@ -1,7 +1,7 @@ use crate::{ dfa, enfa::{self, ENFA}, - Automaton, FiniteStateAutomaton, + to_graphviz_dot, Automaton, FiniteStateAutomaton, }; use petgraph::{dot::Dot, graph::NodeIndex, visit::EdgeRef, Directed, Graph}; use std::{ @@ -76,6 +76,7 @@ impl From<dfa::Transition> for Transition { } /// Intended as an easy way of constructing an ε-NFA in bits, adding onto it much like you'd build an ε-NFA on a whiteboard. +#[derive(Clone)] pub struct GraphENFA { alphabet: HashSet<char>, states: HashSet<u64>, @@ -259,7 +260,9 @@ impl GraphENFA { "Error converting from graph to regular NFA!", ))? .to_owned(); - transitions.push(dest); + if !transitions.contains(&dest) { + transitions.push(dest); + } if !visited.contains(&dest) { next_states.push(dest); } @@ -394,6 +397,16 @@ impl Display for GraphENFA { writeln!(f, "States: {}", states_str)?; writeln!(f, "Initial State: {}", initial_state_str)?; writeln!(f, "Final States: {}", final_states_str)?; - writeln!(f, "{:?}", Dot::new(&self.graph)) + let graphviz_repr = to_graphviz_dot(self); + match graphviz_repr { + Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s), + Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e), + } + } +} + +impl From<GraphENFA> for Graph<u64, Transition, Directed> { + fn from(graph_enfa: GraphENFA) -> Self { + graph_enfa.graph } } diff --git a/src/lib/nfa.rs b/src/lib/nfa.rs @@ -1,8 +1,9 @@ use crate::{ dfa::{self, DFA}, - Automaton, Encodable, FiniteStateAutomaton, + to_graphviz_dot, Automaton, Encodable, FiniteStateAutomaton, }; use core::fmt::Display; +use petgraph::{Directed, Graph}; use std::{ collections::{BTreeSet, HashMap, HashSet, VecDeque}, error::Error, @@ -13,7 +14,7 @@ use std::{ use serde_json::{from_str, Value}; #[derive(Debug)] -struct NFAParseError { +pub struct NFAParseError { reason: String, } @@ -51,7 +52,7 @@ impl NFAParseError { impl Error for NFAParseError {} -#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] pub enum Transition { Char(char), Wildcard, @@ -136,10 +137,11 @@ impl Automaton for NFA { } impl FiniteStateAutomaton for NFA { + // FIXME: This is broken, very broken. fn as_dfa(&self) -> Result<DFA, Box<dyn Error>> { - // TODO: Implement this function let mut states = HashSet::new(); let initial_dfa_state = 0; + states.insert(initial_dfa_state); let mut newest_dfa_state = initial_dfa_state; let mut final_states = HashSet::new(); let mut transition_table = HashMap::new(); @@ -147,6 +149,8 @@ impl FiniteStateAutomaton for NFA { let mut dfa_state_to_nfa_set: HashMap<u64, BTreeSet<u64>> = HashMap::new(); let mut nfa_set_to_dfa_state: HashMap<BTreeSet<u64>, u64> = HashMap::new(); + nfa_set_to_dfa_state.insert(BTreeSet::from([self.initial_state]), initial_dfa_state); + let mut possible_inputs: Vec<dfa::Transition> = self .alphabet .iter() @@ -167,7 +171,7 @@ impl FiniteStateAutomaton for NFA { .get(&(nfa_state, Transition::from(input))) .map_or(vec![], Vec::to_owned) }) - //This conversion to BTreeSet is shit + // This conversion to BTreeSet is shit .map(Vec::into_iter) .map(BTreeSet::from_iter) { @@ -242,107 +246,71 @@ impl FiniteStateAutomaton for NFA { impl Display for NFA { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut alphabet_vec: Vec<char> = self.alphabet.iter().copied().collect(); + let mut transition_vec: Vec<Transition> = + self.alphabet.iter().map(|&c| Transition::Char(c)).collect(); let mut states_vec: Vec<u64> = self.states.iter().copied().collect(); let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect(); - alphabet_vec.sort(); + transition_vec.push(Transition::Wildcard); + transition_vec.sort(); states_vec.sort(); final_states_vec.sort(); - let alphabet_str = - alphabet_vec - .iter() - .enumerate() - .fold("[".to_owned(), |str, (index, c)| { - let ending = if index == self.alphabet.len() - 1 { - "]" - } else { - ", " - }; - format!("{str}{c}{ending}") - }); + let transition_str = format!("{:?}", transition_vec); + let states_str = format!("{:?}", states_vec); + let initial_state_str = self.initial_state.to_string(); + let final_states_str = format!("{:?}", final_states_vec); - let states_str = states_vec + let transition_tbl_hdr = transition_vec .iter() - .enumerate() - .fold("[".to_owned(), |str, (index, c)| { - let ending = if index == self.states.len() - 1 { - "]" - } else { - ", " - }; - format!("{str}{c}{ending}") + .fold("\t|".to_owned(), |acc, &t| match t { + Transition::Char(c) => format!("{}\t'{}'\t|", acc, c), + Transition::Wildcard => format!("{}\t.\t|", acc), }); - let initial_state_str: String = self.initial_state.to_string(); - - let final_states_str = - final_states_vec + let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new(); + for &state in states_vec.iter() { + let input_dests = transition_vec .iter() - .enumerate() - .fold("[".to_owned(), |str, (index, c)| { - let ending = if index == self.final_states.len() - 1 { - "]" - } else { - ", " - }; - format!("{str}{c}{ending}") - }); - - let mut transition_table_hdr: String = alphabet_vec - .iter() - .map(|c| format!("\t{c}\t|")) - .fold("\t|".to_owned(), |str, substr| format!("{str}{substr}")); - transition_table_hdr.push_str("\tWildcard\t|"); - - let mut transition_map: HashMap<u64, Vec<Vec<u64>>> = HashMap::new(); - - let index_map: HashMap<Transition, usize> = alphabet_vec - .iter() - .enumerate() - .map(|(index, char)| (Transition::Char(char.to_owned()), index)) - .collect(); - - for ((from, input), to) in self.transition_table.iter() { - let mut new_vec: Vec<Vec<u64>> = transition_map - .get(from) - .unwrap_or(&vec![Vec::new(); self.alphabet.len()]) - .to_owned(); - let new_index = index_map.get(&input).ok_or(fmt::Error)?; - new_vec[*new_index] = to.to_owned(); - transition_map.insert(from.to_owned(), new_vec); + .map(|&transition| { + self.transition_table + .get(&(state, transition)) + .and_then(|v| Some(v.to_owned())) + }) + .collect(); + transition_map.insert(state, input_dests); } - let transition_table_str: String = + let transition_table_str = transition_map .iter() - .fold(transition_table_hdr, |st, (input, state_sets)| { - let dests = state_sets.into_iter().fold("".to_owned(), |acc, dsts| { - let dest_sets_str: String = if dsts.len() == 0 { - "∅".to_owned() - } else { - dsts.iter() - .enumerate() - .fold("{".to_owned(), |acc, (index, dst)| { - let end = if index == dsts.len() - 1 { "}" } else { ", " }; - format!("{}{}{}", acc, dst, end) - }) + .fold(transition_tbl_hdr, |acc, (from, dest_groups)| { + let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| { + let dests_str = match dests { + None => "∅".to_owned(), + Some(v) => { + v.iter() + .enumerate() + .fold("{".to_owned(), |acc, (index, &dest)| { + let suffix = if index == v.len() - 1 { "}" } else { ", " }; + format!("{}{}{}", acc, dest, suffix) + }) + } }; - format!("{acc}{}\t|\t", dest_sets_str) + format!("{}{}\t|\t", acc, dests_str) }); - let wildcard = self - .transition_table - .get(&(*input, Transition::Wildcard)) - .map_or("∅".to_owned(), |dest| format!("{:?}", dest)); - format!("{}\n{}\t|\t{}\t|\t{}", st, input, dests, wildcard) + format!("{}\n{}\t|\t{}", acc, from, line_str) }); - - writeln!(f, "Alphabet: {}", alphabet_str)?; + writeln!(f, "Alphabet: {}", transition_str)?; writeln!(f, "States: {}", states_str)?; writeln!(f, "Initial State: {}", initial_state_str)?; writeln!(f, "Final States: {}", final_states_str)?; - writeln!(f, "Transition Table:\n{}", transition_table_str) + writeln!(f, "Transition Table:\n{}", transition_table_str)?; + let graphviz_repr = to_graphviz_dot(self); + match graphviz_repr { + Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s), + Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e), + } } } @@ -448,3 +416,35 @@ impl Encodable<Value> for NFA { }) } } + +impl TryFrom<NFA> for Graph<u64, Transition, Directed> { + type Error = NFAParseError; + fn try_from(nfa: NFA) -> Result<Self, Self::Error> { + let mut graph: Graph<u64, Transition, Directed> = Graph::new(); + + let states: HashMap<u64, _> = nfa.states.iter().map(|&s| (s, graph.add_node(s))).collect(); + + let mut alphabet: HashSet<Transition> = + nfa.alphabet.iter().map(|&c| Transition::Char(c)).collect(); + alphabet.insert(Transition::Wildcard); + + for transition in alphabet { + for (from_id, dest) in states + .iter() + .filter_map(|(&from, &from_id)| { + nfa.transition_table + .get(&(from, transition)) + .map(move |dests| dests.iter().map(move |dest| (from_id, dest))) + }) + .flatten() + { + let dest_id = *states.get(dest).ok_or(NFAParseError::error( + "The destination of a transition does not exist in the nfa states!", + ))?; + graph.add_edge(from_id, dest_id, transition); + } + } + + Ok(graph) + } +} diff --git a/src/lib/regex.rs b/src/lib/regex.rs @@ -390,15 +390,15 @@ impl Encodable<RegexNonTerminal> for Regex { let final_state = enfa.add_non_terminal(parse_tree.clone(), current_state)?; enfa.set_state_acceptance(final_state, true)?; - eprintln!("DEBUG: {}", enfa); + eprintln!("Graph ENFA:\n{}", enfa); let test_enfa = enfa.as_enfa()?; - eprintln!("DEBUG2: {}", test_enfa); + eprintln!("Regular ENFA:\n{}", test_enfa); let dfa = enfa.as_dfa()?; - eprintln!("DEBUG3: {}", dfa); + eprintln!("DFA:\n{}", dfa); Ok(Regex { automaton: dfa,