commit 4d6869cf819d0ba58ae1df4a29dae9958fe2b9f8
parent c069a2bea6e0514c55a12cc86c439f6cd0fa32b0
Author: Ethan Long <ethandavidlong@gmail.com>
Date: Thu, 22 Feb 2024 20:44:55 +1100
Fixed a lot of bugs, nearly working!
Currently is parsing a regex through a recursive descent parser (I
think it's an RDP at least).
The parsing is then:
Regex->epsilon-NFA (Graph)->epsilon-NFA (transition table)->NFA->DFA
The NFA->DFA step is the part that I'm stuck on now, and then I will
have a deterministic regex engine!
Diffstat:
7 files changed, 314 insertions(+), 280 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -72,9 +72,9 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "libc"
-version = "0.2.150"
+version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "petgraph"
@@ -94,18 +94,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "proc-macro2"
-version = "1.0.70"
+version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
+checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
-version = "1.0.33"
+version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
@@ -179,9 +179,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.39"
+version = "2.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
+checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb"
dependencies = [
"proc-macro2",
"quote",
diff --git a/src/lib/automaton.rs b/src/lib/automaton.rs
@@ -1,3 +1,4 @@
+use petgraph::{dot::Dot, Directed, Graph};
use std::error::Error;
use std::fmt::Display;
@@ -31,6 +32,19 @@ where
fn parse(encoding: T) -> Result<Self, Box<dyn Error>>;
}
+pub fn to_graphviz_dot<T, U, V>(automaton: &T) -> Result<String, Box<dyn Error>>
+where
+ T: TryInto<Graph<U, V, Directed>>,
+ T: Clone,
+ U: std::fmt::Debug,
+ V: std::fmt::Debug,
+ <T as TryInto<Graph<U, V, Directed>>>::Error: std::error::Error,
+ <T as TryInto<Graph<U, V, Directed>>>::Error: 'static,
+{
+ let graph: Graph<U, V, Directed> = automaton.clone().try_into()?;
+ Ok(format!("{:?}", Dot::new(&graph)))
+}
+
#[cfg(test)]
mod tests {
mod testlib {
diff --git a/src/lib/dfa.rs b/src/lib/dfa.rs
@@ -1,4 +1,5 @@
use core::fmt::Display;
+use petgraph::{Directed, Graph};
use std::{
collections::{HashMap, HashSet},
error::Error,
@@ -6,11 +7,11 @@ use std::{
panic::Location,
};
-use crate::{Automaton, Encodable, FiniteStateAutomaton};
+use crate::{to_graphviz_dot, Automaton, Encodable, FiniteStateAutomaton};
use serde_json::{from_str, Value};
#[derive(Debug)]
-struct DFAParseError {
+pub struct DFAParseError {
reason: String,
}
@@ -53,7 +54,7 @@ impl DFAParseError {
impl Error for DFAParseError {}
-#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)]
+#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
pub enum Transition {
Char(char),
Wildcard,
@@ -124,99 +125,64 @@ impl FiniteStateAutomaton for DFA {
impl Display for DFA {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- let mut alphabet_vec: Vec<char> = self.alphabet.iter().copied().collect();
+ let mut transition_vec: Vec<Transition> =
+ self.alphabet.iter().map(|&c| Transition::Char(c)).collect();
let mut states_vec: Vec<u64> = self.states.iter().copied().collect();
let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect();
- alphabet_vec.sort();
+ transition_vec.push(Transition::Wildcard);
+ transition_vec.sort();
states_vec.sort();
final_states_vec.sort();
- let alphabet_str: String =
- alphabet_vec
- .iter()
- .enumerate()
- .fold("[".to_owned(), |st, (index, c)| {
- let ending = if index == self.alphabet.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{}{}{}", st, c, ending)
- });
-
- let states_str: String =
- states_vec
- .iter()
- .enumerate()
- .fold("[".to_owned(), |st, (index, n)| {
- let ending = if index == self.states.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{}{}{}", st, n, ending)
- });
-
- let initial_state_str: String = self.initial_state.to_string();
+ let transition_str = format!("{:?}", transition_vec);
+ let states_str = format!("{:?}", states_vec);
+ let initial_state_str = self.initial_state.to_string();
+ let final_states_str = format!("{:?}", final_states_vec);
- let final_states_str: String =
- final_states_vec
- .iter()
- .enumerate()
- .fold("[".to_owned(), |st, (index, n)| {
- let ending = if index == self.final_states.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{}{}{}", st, n, ending)
- });
-
- let index_map: HashMap<Transition, usize> = alphabet_vec
+ let transition_tbl_hdr = transition_vec
.iter()
- .enumerate()
- .map(|(index, &char)| (Transition::Char(char), index))
- .collect();
-
- let mut transition_table_hdr: String = alphabet_vec
- .iter()
- .map(|c| format!("\t{}\t|", c))
- .fold("\t|".to_owned(), |st, substr| format!("{}{}", st, substr));
- transition_table_hdr.push_str("\tWildcard");
-
- // FIXME: I Think this won't work xD
- let mut transition_map: HashMap<u64, Vec<u64>> = HashMap::new();
-
- for ((from, input), to) in self.transition_table.iter() {
- let mut new_vec: Vec<u64> = transition_map
- .get(from)
- .unwrap_or(&vec![0 as u64; self.alphabet.len()])
- .to_owned();
- let new_index = index_map.get(&input).ok_or(fmt::Error)?;
- new_vec[*new_index] = to.to_owned();
- transition_map.insert(from.to_owned(), new_vec);
+ .fold("\t|".to_owned(), |acc, &t| match t {
+ Transition::Char(c) => format!("{}\t'{}'\t|", acc, c),
+ Transition::Wildcard => format!("{}\t.\t|", acc),
+ });
+
+ let mut transition_map: HashMap<u64, Vec<Option<u64>>> = HashMap::new();
+ for &state in states_vec.iter() {
+ let input_dests = transition_vec
+ .iter()
+ .map(|&transition| {
+ self.transition_table
+ .get(&(state, transition))
+ .and_then(|v| Some(v.to_owned()))
+ })
+ .collect();
+ transition_map.insert(state, input_dests);
}
- let transition_table_str: String =
+ let transition_table_str =
transition_map
.iter()
- .fold(transition_table_hdr, |st, (&from, states)| {
- let dests = states
- .into_iter()
- .fold("".to_owned(), |st, dst| format!("{}{}\t|\t", st, dst));
- let wildcard = self
- .transition_table
- .get(&(from, Transition::Wildcard))
- .map_or("∅".to_owned(), |&dest| dest.to_string());
- format!("{}\n{}\t|\t{}{}", st, from, dests, wildcard)
+ .fold(transition_tbl_hdr, |acc, (from, dest_groups)| {
+ let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| {
+ let dest_str = match dests {
+ None => "∅".to_owned(),
+ Some(v) => v.to_string(),
+ };
+ format!("{}{}\t|\t", acc, dest_str)
+ });
+ format!("{}\n{}\t|\t{}", acc, from, line_str)
});
-
- writeln!(f, "Alphabet: {}", alphabet_str)?;
+ writeln!(f, "Alphabet: {}", transition_str)?;
writeln!(f, "States: {}", states_str)?;
writeln!(f, "Initial State: {}", initial_state_str)?;
writeln!(f, "Final States: {}", final_states_str)?;
- writeln!(f, "Transition Table:\n{}", transition_table_str)
+ writeln!(f, "Transition Table:\n{}", transition_table_str)?;
+ let graphviz_repr = to_graphviz_dot(self);
+ match graphviz_repr {
+ Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s),
+ Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e),
+ }
}
}
@@ -320,3 +286,31 @@ impl Encodable<Value> for DFA {
})
}
}
+
+impl TryFrom<DFA> for Graph<u64, Transition, Directed> {
+ type Error = DFAParseError;
+ fn try_from(dfa: DFA) -> Result<Self, Self::Error> {
+ let mut graph: Graph<u64, Transition, Directed> = Graph::new();
+
+ let states: HashMap<u64, _> = dfa.states.iter().map(|&s| (s, graph.add_node(s))).collect();
+
+ let mut alphabet: HashSet<Transition> =
+ dfa.alphabet.iter().map(|&c| Transition::Char(c)).collect();
+ alphabet.insert(Transition::Wildcard);
+
+ for transition in alphabet {
+ for (from_id, dest) in states.iter().filter_map(|(&from, &from_id)| {
+ dfa.transition_table
+ .get(&(from, transition))
+ .map(|dest| (from_id, dest))
+ }) {
+ let dest_id = *states.get(dest).ok_or(DFAParseError::error(
+ "The destination of a transition does not exist in the dfa states!",
+ ))?;
+ graph.add_edge(from_id, dest_id, transition);
+ }
+ }
+
+ Ok(graph)
+ }
+}
diff --git a/src/lib/enfa.rs b/src/lib/enfa.rs
@@ -1,9 +1,10 @@
use crate::{
dfa::{self, DFA},
nfa::{self, NFA},
- Automaton, Encodable, FiniteStateAutomaton,
+ to_graphviz_dot, Automaton, Encodable, FiniteStateAutomaton,
};
use core::fmt::Display;
+use petgraph::{Directed, Graph};
use serde_json::{from_str, Value};
use std::{
collections::{HashMap, HashSet},
@@ -50,11 +51,11 @@ impl ENFAParseError {
impl Error for ENFAParseError {}
-#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)]
+#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
pub enum Transition {
Char(char),
- Wildcard,
Epsilon,
+ Wildcard,
}
impl From<nfa::Transition> for Transition {
@@ -134,8 +135,8 @@ impl ENFA {
}
}
+ // FIXME: There are states with no entrypoint here! Prune time
pub fn as_nfa(&self) -> NFA {
- // TODO: Implement this properly
let nfa_final_states = self
.states
.iter()
@@ -149,13 +150,21 @@ impl ENFA {
let mut nfa_transition_table: HashMap<(u64, nfa::Transition), Vec<u64>> = HashMap::new();
- let enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new();
+ let mut enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new();
for (from, transition) in self.transition_table.keys() {
- let mut transitions = enfa_state_accepted_input_map
- .get(from)
- .map_or(HashSet::new(), HashSet::to_owned);
- transitions.insert(*transition);
+ match enfa_state_accepted_input_map.get(from) {
+ None => {
+ let mut transitions = HashSet::new();
+ transitions.insert(*transition);
+ enfa_state_accepted_input_map.insert(*from, transitions);
+ }
+ Some(transitions) => {
+ let mut transitions = transitions.to_owned();
+ transitions.insert(*transition);
+ enfa_state_accepted_input_map.insert(*from, transitions);
+ }
+ }
}
for &state in self.states.iter() {
@@ -177,7 +186,9 @@ impl ENFA {
.map_or(vec![], Vec::to_owned);
let nfa_to = nfa_transition_table.get_mut(&(state, transition));
match nfa_to {
- Some(v) => v.append(&mut enfa_to),
+ Some(v) => {
+ v.append(&mut enfa_to);
+ }
None => {
nfa_transition_table.insert((state, transition), enfa_to);
}
@@ -185,9 +196,17 @@ impl ENFA {
}
}
+ let mut nfa_states: HashSet<u64> = nfa_transition_table
+ .values()
+ .flat_map(Vec::to_owned)
+ .collect(); // Prune off the states without an entrypoint
+ nfa_states.insert(self.initial_state); // The only state without an entrypoint
+
+ // TODO: Maybe try re-numbering the states here before making the NFA so that they're sequential?
+
NFA::new(
self.alphabet.clone(),
- self.states.clone(),
+ nfa_states,
self.initial_state,
nfa_final_states,
nfa_transition_table,
@@ -250,7 +269,7 @@ impl Automaton for ENFA {
impl FiniteStateAutomaton for ENFA {
fn as_dfa(&self) -> Result<DFA, Box<dyn std::error::Error>> {
let nfa = self.as_nfa();
- eprintln!("HOLY FUCKERONI IS THE NFA RIGHT? {}", nfa);
+ eprintln!("HOLY FUCKERONI IS THE NFA RIGHT?\n{}", nfa);
nfa.as_dfa()
}
@@ -275,116 +294,73 @@ impl FiniteStateAutomaton for ENFA {
impl Display for ENFA {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- let mut alphabet_vec: Vec<char> = self.alphabet.iter().copied().collect();
+ let mut transition_vec: Vec<Transition> =
+ self.alphabet.iter().map(|&c| Transition::Char(c)).collect();
let mut states_vec: Vec<u64> = self.states.iter().copied().collect();
let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect();
- alphabet_vec.sort();
+ transition_vec.push(Transition::Epsilon);
+ transition_vec.push(Transition::Wildcard);
+ transition_vec.sort();
states_vec.sort();
final_states_vec.sort();
- let alphabet_str =
- alphabet_vec
- .iter()
- .enumerate()
- .fold("[".to_owned(), |str, (index, c)| {
- let ending = if index == self.alphabet.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{str}{c}{ending}")
- });
+ let transition_str = format!("{:?}", transition_vec);
+ let states_str = format!("{:?}", states_vec);
+ let initial_state_str = self.initial_state.to_string();
+ let final_states_str = format!("{:?}", final_states_vec);
- let states_str = states_vec
+ let transition_tbl_hdr = transition_vec
.iter()
- .enumerate()
- .fold("[".to_owned(), |str, (index, c)| {
- let ending = if index == self.states.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{str}{c}{ending}")
+ .fold("\t|".to_owned(), |acc, &t| match t {
+ Transition::Char(c) => format!("{}\t'{}'\t|", acc, c),
+ Transition::Epsilon => format!("{}\tε\t|", acc),
+ Transition::Wildcard => format!("{}\t.\t|", acc),
});
- let initial_state_str: String = self.initial_state.to_string();
-
- let final_states_str =
- final_states_vec
+ let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new();
+ for &state in states_vec.iter() {
+ let input_dests = transition_vec
.iter()
- .enumerate()
- .fold("[".to_owned(), |str, (index, c)| {
- let ending = if index == self.final_states.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{str}{c}{ending}")
- });
-
- let mut transition_table_hdr: String = alphabet_vec
- .iter()
- .map(|c| format!("\t{c}\t|"))
- .fold("\t|".to_owned(), |str, substr| format!("{str}{substr}"));
- transition_table_hdr.push_str("\tε\t|");
- transition_table_hdr.push_str("\tWildcard\t|");
-
- let mut transition_map: HashMap<u64, Vec<Vec<u64>>> = HashMap::new();
-
- let index_map: HashMap<char, usize> = alphabet_vec
- .iter()
- .enumerate()
- .map(|(index, char)| (char.to_owned(), index))
- .collect();
-
- for ((from, input), to) in self.transition_table.iter() {
- let mut new_vec: Vec<Vec<u64>> = transition_map
- .get(from)
- .unwrap_or(&vec![Vec::new(); self.alphabet.len() + 1])
- .to_owned();
- match input {
- Transition::Char(char) => {
- let new_index = index_map.get(char).ok_or(core::fmt::Error)?;
- new_vec[*new_index] = to.to_owned();
- }
- _ => {
- new_vec[self.alphabet.len()] = to.to_owned();
- }
- }
- transition_map.insert(from.to_owned(), new_vec);
+ .map(|&transition| {
+ self.transition_table
+ .get(&(state, transition))
+ .and_then(|v| Some(v.to_owned()))
+ })
+ .collect();
+ transition_map.insert(state, input_dests);
}
let transition_table_str =
transition_map
.iter()
- .fold(transition_table_hdr, |acc, (input, state_sets)| {
- let dests = state_sets.into_iter().fold("".to_owned(), |acc, dsts| {
- let dest_sets_str: String = if dsts.len() == 0 {
- "∅".to_owned()
- } else {
- dsts.iter()
- .enumerate()
- .fold("{".to_owned(), |acc, (index, dst)| {
- let end = if index == dsts.len() - 1 { "}" } else { ", " };
- format!("{}{}{}", acc, dst, end)
- })
+ .fold(transition_tbl_hdr, |acc, (from, dest_groups)| {
+ let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| {
+ let dests_str = match dests {
+ None => "∅".to_owned(),
+ Some(v) => {
+ v.iter()
+ .enumerate()
+ .fold("{".to_owned(), |acc, (index, &dest)| {
+ let suffix = if index == v.len() - 1 { "}" } else { ", " };
+ format!("{}{}{}", acc, dest, suffix)
+ })
+ }
};
- format!("{}{}\t|\t", acc, dest_sets_str)
+ format!("{}{}\t|\t", acc, dests_str)
});
- // TODO: Check that this is correct
- let wildcard_map: String = self
- .transition_table
- .get(&(*input, Transition::Wildcard))
- .map_or("∅".to_owned(), |dest| format!("{:?}", dest));
- format!("{}\n{}\t|\t{}\t|\t{}", acc, input, dests, wildcard_map)
+ format!("{}\n{}\t|\t{}", acc, from, line_str)
});
-
- writeln!(f, "Alphabet: {}", alphabet_str)?;
+ writeln!(f, "Alphabet: {}", transition_str)?;
writeln!(f, "States: {}", states_str)?;
writeln!(f, "Initial State: {}", initial_state_str)?;
- writeln!(f, "Final State: {}", final_states_str)?;
- writeln!(f, "Transition Table:\n{}", transition_table_str)
+ writeln!(f, "Final States: {}", final_states_str)?;
+ writeln!(f, "Transition Table:\n{}", transition_table_str)?;
+ let graphviz_repr = to_graphviz_dot(self);
+ match graphviz_repr {
+ Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s),
+ Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e),
+ }
}
}
@@ -514,3 +490,40 @@ impl Encodable<Value> for ENFA {
})
}
}
+
+impl TryFrom<ENFA> for Graph<u64, Transition, Directed> {
+ type Error = ENFAParseError;
+ fn try_from(enfa: ENFA) -> Result<Self, Self::Error> {
+ let mut graph: Graph<u64, Transition, Directed> = Graph::new();
+
+ let states: HashMap<u64, _> = enfa
+ .states
+ .iter()
+ .map(|&s| (s, graph.add_node(s)))
+ .collect();
+
+ let mut alphabet: HashSet<Transition> =
+ enfa.alphabet.iter().map(|&c| Transition::Char(c)).collect();
+ alphabet.insert(Transition::Epsilon);
+ alphabet.insert(Transition::Wildcard);
+
+ for transition in alphabet {
+ for (from_id, dest) in states
+ .iter()
+ .filter_map(|(&from, &from_id)| {
+ enfa.transition_table
+ .get(&(from, transition))
+ .map(move |dests| dests.iter().map(move |dest| (from_id, dest)))
+ })
+ .flatten()
+ {
+ let dest_id = *states.get(dest).ok_or(ENFAParseError::error(
+ "The destination of a transition does not exist in the enfa states!",
+ ))?;
+ graph.add_edge(from_id, dest_id, transition);
+ }
+ }
+
+ Ok(graph)
+ }
+}
diff --git a/src/lib/graph_enfa.rs b/src/lib/graph_enfa.rs
@@ -1,7 +1,7 @@
use crate::{
dfa,
enfa::{self, ENFA},
- Automaton, FiniteStateAutomaton,
+ to_graphviz_dot, Automaton, FiniteStateAutomaton,
};
use petgraph::{dot::Dot, graph::NodeIndex, visit::EdgeRef, Directed, Graph};
use std::{
@@ -76,6 +76,7 @@ impl From<dfa::Transition> for Transition {
}
/// Intended as an easy way of constructing an ε-NFA in bits, adding onto it much like you'd build an ε-NFA on a whiteboard.
+#[derive(Clone)]
pub struct GraphENFA {
alphabet: HashSet<char>,
states: HashSet<u64>,
@@ -259,7 +260,9 @@ impl GraphENFA {
"Error converting from graph to regular NFA!",
))?
.to_owned();
- transitions.push(dest);
+ if !transitions.contains(&dest) {
+ transitions.push(dest);
+ }
if !visited.contains(&dest) {
next_states.push(dest);
}
@@ -394,6 +397,16 @@ impl Display for GraphENFA {
writeln!(f, "States: {}", states_str)?;
writeln!(f, "Initial State: {}", initial_state_str)?;
writeln!(f, "Final States: {}", final_states_str)?;
- writeln!(f, "{:?}", Dot::new(&self.graph))
+ let graphviz_repr = to_graphviz_dot(self);
+ match graphviz_repr {
+ Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s),
+ Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e),
+ }
+ }
+}
+
+impl From<GraphENFA> for Graph<u64, Transition, Directed> {
+ fn from(graph_enfa: GraphENFA) -> Self {
+ graph_enfa.graph
}
}
diff --git a/src/lib/nfa.rs b/src/lib/nfa.rs
@@ -1,8 +1,9 @@
use crate::{
dfa::{self, DFA},
- Automaton, Encodable, FiniteStateAutomaton,
+ to_graphviz_dot, Automaton, Encodable, FiniteStateAutomaton,
};
use core::fmt::Display;
+use petgraph::{Directed, Graph};
use std::{
collections::{BTreeSet, HashMap, HashSet, VecDeque},
error::Error,
@@ -13,7 +14,7 @@ use std::{
use serde_json::{from_str, Value};
#[derive(Debug)]
-struct NFAParseError {
+pub struct NFAParseError {
reason: String,
}
@@ -51,7 +52,7 @@ impl NFAParseError {
impl Error for NFAParseError {}
-#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)]
+#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
pub enum Transition {
Char(char),
Wildcard,
@@ -136,10 +137,11 @@ impl Automaton for NFA {
}
impl FiniteStateAutomaton for NFA {
+ // FIXME: This is broken, very broken.
fn as_dfa(&self) -> Result<DFA, Box<dyn Error>> {
- // TODO: Implement this function
let mut states = HashSet::new();
let initial_dfa_state = 0;
+ states.insert(initial_dfa_state);
let mut newest_dfa_state = initial_dfa_state;
let mut final_states = HashSet::new();
let mut transition_table = HashMap::new();
@@ -147,6 +149,8 @@ impl FiniteStateAutomaton for NFA {
let mut dfa_state_to_nfa_set: HashMap<u64, BTreeSet<u64>> = HashMap::new();
let mut nfa_set_to_dfa_state: HashMap<BTreeSet<u64>, u64> = HashMap::new();
+ nfa_set_to_dfa_state.insert(BTreeSet::from([self.initial_state]), initial_dfa_state);
+
let mut possible_inputs: Vec<dfa::Transition> = self
.alphabet
.iter()
@@ -167,7 +171,7 @@ impl FiniteStateAutomaton for NFA {
.get(&(nfa_state, Transition::from(input)))
.map_or(vec![], Vec::to_owned)
})
- //This conversion to BTreeSet is shit
+ // This conversion to BTreeSet is shit
.map(Vec::into_iter)
.map(BTreeSet::from_iter)
{
@@ -242,107 +246,71 @@ impl FiniteStateAutomaton for NFA {
impl Display for NFA {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- let mut alphabet_vec: Vec<char> = self.alphabet.iter().copied().collect();
+ let mut transition_vec: Vec<Transition> =
+ self.alphabet.iter().map(|&c| Transition::Char(c)).collect();
let mut states_vec: Vec<u64> = self.states.iter().copied().collect();
let mut final_states_vec: Vec<u64> = self.final_states.iter().copied().collect();
- alphabet_vec.sort();
+ transition_vec.push(Transition::Wildcard);
+ transition_vec.sort();
states_vec.sort();
final_states_vec.sort();
- let alphabet_str =
- alphabet_vec
- .iter()
- .enumerate()
- .fold("[".to_owned(), |str, (index, c)| {
- let ending = if index == self.alphabet.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{str}{c}{ending}")
- });
+ let transition_str = format!("{:?}", transition_vec);
+ let states_str = format!("{:?}", states_vec);
+ let initial_state_str = self.initial_state.to_string();
+ let final_states_str = format!("{:?}", final_states_vec);
- let states_str = states_vec
+ let transition_tbl_hdr = transition_vec
.iter()
- .enumerate()
- .fold("[".to_owned(), |str, (index, c)| {
- let ending = if index == self.states.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{str}{c}{ending}")
+ .fold("\t|".to_owned(), |acc, &t| match t {
+ Transition::Char(c) => format!("{}\t'{}'\t|", acc, c),
+ Transition::Wildcard => format!("{}\t.\t|", acc),
});
- let initial_state_str: String = self.initial_state.to_string();
-
- let final_states_str =
- final_states_vec
+ let mut transition_map: HashMap<u64, Vec<Option<Vec<u64>>>> = HashMap::new();
+ for &state in states_vec.iter() {
+ let input_dests = transition_vec
.iter()
- .enumerate()
- .fold("[".to_owned(), |str, (index, c)| {
- let ending = if index == self.final_states.len() - 1 {
- "]"
- } else {
- ", "
- };
- format!("{str}{c}{ending}")
- });
-
- let mut transition_table_hdr: String = alphabet_vec
- .iter()
- .map(|c| format!("\t{c}\t|"))
- .fold("\t|".to_owned(), |str, substr| format!("{str}{substr}"));
- transition_table_hdr.push_str("\tWildcard\t|");
-
- let mut transition_map: HashMap<u64, Vec<Vec<u64>>> = HashMap::new();
-
- let index_map: HashMap<Transition, usize> = alphabet_vec
- .iter()
- .enumerate()
- .map(|(index, char)| (Transition::Char(char.to_owned()), index))
- .collect();
-
- for ((from, input), to) in self.transition_table.iter() {
- let mut new_vec: Vec<Vec<u64>> = transition_map
- .get(from)
- .unwrap_or(&vec![Vec::new(); self.alphabet.len()])
- .to_owned();
- let new_index = index_map.get(&input).ok_or(fmt::Error)?;
- new_vec[*new_index] = to.to_owned();
- transition_map.insert(from.to_owned(), new_vec);
+ .map(|&transition| {
+ self.transition_table
+ .get(&(state, transition))
+ .and_then(|v| Some(v.to_owned()))
+ })
+ .collect();
+ transition_map.insert(state, input_dests);
}
- let transition_table_str: String =
+ let transition_table_str =
transition_map
.iter()
- .fold(transition_table_hdr, |st, (input, state_sets)| {
- let dests = state_sets.into_iter().fold("".to_owned(), |acc, dsts| {
- let dest_sets_str: String = if dsts.len() == 0 {
- "∅".to_owned()
- } else {
- dsts.iter()
- .enumerate()
- .fold("{".to_owned(), |acc, (index, dst)| {
- let end = if index == dsts.len() - 1 { "}" } else { ", " };
- format!("{}{}{}", acc, dst, end)
- })
+ .fold(transition_tbl_hdr, |acc, (from, dest_groups)| {
+ let line_str = dest_groups.into_iter().fold("".to_owned(), |acc, dests| {
+ let dests_str = match dests {
+ None => "∅".to_owned(),
+ Some(v) => {
+ v.iter()
+ .enumerate()
+ .fold("{".to_owned(), |acc, (index, &dest)| {
+ let suffix = if index == v.len() - 1 { "}" } else { ", " };
+ format!("{}{}{}", acc, dest, suffix)
+ })
+ }
};
- format!("{acc}{}\t|\t", dest_sets_str)
+ format!("{}{}\t|\t", acc, dests_str)
});
- let wildcard = self
- .transition_table
- .get(&(*input, Transition::Wildcard))
- .map_or("∅".to_owned(), |dest| format!("{:?}", dest));
- format!("{}\n{}\t|\t{}\t|\t{}", st, input, dests, wildcard)
+ format!("{}\n{}\t|\t{}", acc, from, line_str)
});
-
- writeln!(f, "Alphabet: {}", alphabet_str)?;
+ writeln!(f, "Alphabet: {}", transition_str)?;
writeln!(f, "States: {}", states_str)?;
writeln!(f, "Initial State: {}", initial_state_str)?;
writeln!(f, "Final States: {}", final_states_str)?;
- writeln!(f, "Transition Table:\n{}", transition_table_str)
+ writeln!(f, "Transition Table:\n{}", transition_table_str)?;
+ let graphviz_repr = to_graphviz_dot(self);
+ match graphviz_repr {
+ Ok(s) => writeln!(f, "Graphviz Representation:\n{}", s),
+ Err(e) => writeln!(f, "Error getting Graphviz representation: {}", e),
+ }
}
}
@@ -448,3 +416,35 @@ impl Encodable<Value> for NFA {
})
}
}
+
+impl TryFrom<NFA> for Graph<u64, Transition, Directed> {
+ type Error = NFAParseError;
+ fn try_from(nfa: NFA) -> Result<Self, Self::Error> {
+ let mut graph: Graph<u64, Transition, Directed> = Graph::new();
+
+ let states: HashMap<u64, _> = nfa.states.iter().map(|&s| (s, graph.add_node(s))).collect();
+
+ let mut alphabet: HashSet<Transition> =
+ nfa.alphabet.iter().map(|&c| Transition::Char(c)).collect();
+ alphabet.insert(Transition::Wildcard);
+
+ for transition in alphabet {
+ for (from_id, dest) in states
+ .iter()
+ .filter_map(|(&from, &from_id)| {
+ nfa.transition_table
+ .get(&(from, transition))
+ .map(move |dests| dests.iter().map(move |dest| (from_id, dest)))
+ })
+ .flatten()
+ {
+ let dest_id = *states.get(dest).ok_or(NFAParseError::error(
+ "The destination of a transition does not exist in the nfa states!",
+ ))?;
+ graph.add_edge(from_id, dest_id, transition);
+ }
+ }
+
+ Ok(graph)
+ }
+}
diff --git a/src/lib/regex.rs b/src/lib/regex.rs
@@ -390,15 +390,15 @@ impl Encodable<RegexNonTerminal> for Regex {
let final_state = enfa.add_non_terminal(parse_tree.clone(), current_state)?;
enfa.set_state_acceptance(final_state, true)?;
- eprintln!("DEBUG: {}", enfa);
+ eprintln!("Graph ENFA:\n{}", enfa);
let test_enfa = enfa.as_enfa()?;
- eprintln!("DEBUG2: {}", test_enfa);
+ eprintln!("Regular ENFA:\n{}", test_enfa);
let dfa = enfa.as_dfa()?;
- eprintln!("DEBUG3: {}", dfa);
+ eprintln!("DFA:\n{}", dfa);
Ok(Regex {
automaton: dfa,