commit c069a2bea6e0514c55a12cc86c439f6cd0fa32b0
parent 822ddeb51d020c593aec273138cbbf35842097f0
Author: Ethan Long <ethandavidlong@gmail.com>
Date: Sun, 4 Feb 2024 23:28:47 +1100
Made some more progress. Things are still buggy AF
I scrapped the notion of even bothering to have a separate field for
"loose transitions", that was stupid. I have instead decided to make
everything use just use the `Transition` type that depends on the
automaton structure.
I know the Regex->Graph ENFA parsing is going good, as the graphviz
visualisations look good. So I just need to clean up the implementations
and get rid of bugs etc.
Diffstat:
| M | src/lib/dfa.rs | | | 41 | ++++++++++++++++++++++++++--------------- |
| M | src/lib/enfa.rs | | | 205 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
| M | src/lib/graph_enfa.rs | | | 83 | +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------ |
| M | src/lib/nfa.rs | | | 193 | +++++++++++++++++++++++++++++++++++++++++++++---------------------------------- |
| M | src/lib/regex.rs | | | 81 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
5 files changed, 384 insertions(+), 219 deletions(-)
diff --git a/src/lib/dfa.rs b/src/lib/dfa.rs
@@ -53,21 +53,30 @@ impl DFAParseError {
impl Error for DFAParseError {}
+#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)]
+pub enum Transition {
+ Char(char),
+ Wildcard,
+}
+
#[derive(Clone, Debug, PartialEq)]
pub struct DFA {
pub alphabet: HashSet<char>,
pub states: HashSet<u64>,
pub initial_state: u64,
pub final_states: HashSet<u64>,
- pub transition_table: HashMap<(u64, char), u64>,
- pub loose_transitions: HashMap<u64, u64>,
+ pub transition_table: HashMap<(u64, Transition), u64>,
}
impl Automaton for DFA {
fn accepts(&self, input: &str) -> bool {
let mut current_state: u64 = self.initial_state;
for c in input.chars() {
- if !self.alphabet.contains(&c) && !self.loose_transitions.contains_key(¤t_state) {
+ if !self.alphabet.contains(&c)
+ && !self
+ .transition_table
+ .contains_key(&(current_state, Transition::Wildcard))
+ {
DFAParseError::print_warn(&format!(
"Input character {} is not in the alphabet of the DFA",
c
@@ -75,8 +84,10 @@ impl Automaton for DFA {
}
match self
.transition_table
- .get(&(current_state, c))
- .or(self.loose_transitions.get(¤t_state))
+ .get(&(current_state, Transition::Char(c)))
+ .or(self
+ .transition_table
+ .get(&(current_state, Transition::Wildcard)))
{
Some(state) => current_state = state.to_owned(),
None => {
@@ -162,18 +173,19 @@ impl Display for DFA {
format!("{}{}{}", st, n, ending)
});
- let index_map: HashMap<char, usize> = alphabet_vec
+ let index_map: HashMap<Transition, usize> = alphabet_vec
.iter()
.enumerate()
- .map(|(index, char)| (char.to_owned(), index))
+ .map(|(index, &char)| (Transition::Char(char), index))
.collect();
let mut transition_table_hdr: String = alphabet_vec
.iter()
.map(|c| format!("\t{}\t|", c))
.fold("\t|".to_owned(), |st, substr| format!("{}{}", st, substr));
- transition_table_hdr.push_str("\tLoose");
+ transition_table_hdr.push_str("\tWildcard");
+ // FIXME: I Think this won't work xD
let mut transition_map: HashMap<u64, Vec<u64>> = HashMap::new();
for ((from, input), to) in self.transition_table.iter() {
@@ -189,15 +201,15 @@ impl Display for DFA {
let transition_table_str: String =
transition_map
.iter()
- .fold(transition_table_hdr, |st, (input, states)| {
+ .fold(transition_table_hdr, |st, (&from, states)| {
let dests = states
.into_iter()
.fold("".to_owned(), |st, dst| format!("{}{}\t|\t", st, dst));
let wildcard = self
- .loose_transitions
- .get(input)
+ .transition_table
+ .get(&(from, Transition::Wildcard))
.map_or("∅".to_owned(), |&dest| dest.to_string());
- format!("{}\n{}\t|\t{}{}", st, input, dests, wildcard)
+ format!("{}\n{}\t|\t{}{}", st, from, dests, wildcard)
});
writeln!(f, "Alphabet: {}", alphabet_str)?;
@@ -260,7 +272,7 @@ impl Encodable<Value> for DFA {
"Could not convert the final states to u64",
))?;
- let mut transition_table: HashMap<(u64, char), u64> = HashMap::new();
+ let mut transition_table: HashMap<(u64, Transition), u64> = HashMap::new();
for row in json["transition_table"]
.as_array()
.ok_or(DFAParseError::error(
@@ -295,7 +307,7 @@ impl Encodable<Value> for DFA {
"Unable to parse the transition destinations in the table",
))?;
for (input, dest) in dests.iter() {
- transition_table.insert((from, *input), *dest);
+ transition_table.insert((from, Transition::Char(*input)), *dest);
}
}
@@ -305,7 +317,6 @@ impl Encodable<Value> for DFA {
final_states,
transition_table,
initial_state,
- loose_transitions: HashMap::new(),
})
}
}
diff --git a/src/lib/enfa.rs b/src/lib/enfa.rs
@@ -1,4 +1,8 @@
-use crate::{dfa::DFA, nfa::NFA, Automaton, Encodable, FiniteStateAutomaton};
+use crate::{
+ dfa::{self, DFA},
+ nfa::{self, NFA},
+ Automaton, Encodable, FiniteStateAutomaton,
+};
use core::fmt::Display;
use serde_json::{from_str, Value};
use std::{
@@ -8,7 +12,7 @@ use std::{
};
#[derive(Debug)]
-struct ENFAParseError {
+pub struct ENFAParseError {
reason: String,
}
@@ -46,6 +50,42 @@ impl ENFAParseError {
impl Error for ENFAParseError {}
+#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)]
+pub enum Transition {
+ Char(char),
+ Wildcard,
+ Epsilon,
+}
+
+impl From<nfa::Transition> for Transition {
+ fn from(transition: nfa::Transition) -> Self {
+ match transition {
+ nfa::Transition::Char(c) => Self::Char(c),
+ nfa::Transition::Wildcard => Self::Wildcard,
+ }
+ }
+}
+
+impl From<dfa::Transition> for Transition {
+ fn from(transition: dfa::Transition) -> Self {
+ match transition {
+ dfa::Transition::Char(c) => Self::Char(c),
+ dfa::Transition::Wildcard => Self::Wildcard,
+ }
+ }
+}
+
+impl TryFrom<Transition> for nfa::Transition {
+ type Error = ENFAParseError;
+ fn try_from(transition: Transition) -> Result<Self, Self::Error> {
+ match transition {
+ Transition::Char(c) => Ok(nfa::Transition::Char(c)),
+ Transition::Wildcard => Ok(nfa::Transition::Wildcard),
+ Transition::Epsilon => Err(ENFAParseError::error("Unable to convert ENFA epsilon transition to NFA transition, NFAs have no notion of an epsilon transition!"))
+ }
+ }
+}
+
// ε-NFAs!
#[derive(Clone, Debug, PartialEq)]
pub struct ENFA {
@@ -53,17 +93,20 @@ pub struct ENFA {
states: HashSet<u64>,
initial_state: u64,
final_states: HashSet<u64>,
- transition_table: HashMap<(u64, Option<char>), Vec<u64>>,
- loose_transitions: HashMap<u64, u64>,
+ transition_table: HashMap<(u64, Transition), Vec<u64>>,
}
impl ENFA {
fn eclose(&self, state: u64, visited: Option<&mut HashSet<u64>>) -> HashSet<u64> {
let mut reachable: HashSet<u64> = HashSet::new();
- let mut deez = HashSet::new();
- let visited = visited.unwrap_or(&mut deez);
+ let mut visited_set = HashSet::new(); // Lifetime hack. There has to be a better way
+ let visited = visited.unwrap_or(&mut visited_set);
reachable.insert(state);
- for state in self.transition_table.get(&(state, None)).unwrap_or(&vec![]) {
+ for state in self
+ .transition_table
+ .get(&(state, Transition::Epsilon))
+ .unwrap_or(&vec![])
+ {
if !visited.contains(state) {
visited.insert(*state);
for state in self.eclose(*state, Some(visited)).iter() {
@@ -80,8 +123,7 @@ impl ENFA {
states: HashSet<u64>,
initial_state: u64,
final_states: HashSet<u64>,
- transition_table: HashMap<(u64, Option<char>), Vec<u64>>,
- loose_transitions: HashMap<u64, u64>,
+ transition_table: HashMap<(u64, Transition), Vec<u64>>,
) -> ENFA {
ENFA {
alphabet,
@@ -89,9 +131,68 @@ impl ENFA {
initial_state,
final_states,
transition_table,
- loose_transitions,
}
}
+
+ pub fn as_nfa(&self) -> NFA {
+ // TODO: Implement this properly
+ let nfa_final_states = self
+ .states
+ .iter()
+ .filter(|&&starting_state| {
+ self.eclose(starting_state, None)
+ .into_iter()
+ .any(|epsilon_accessible| self.final_states.contains(&epsilon_accessible))
+ })
+ .map(u64::to_owned)
+ .collect();
+
+ let mut nfa_transition_table: HashMap<(u64, nfa::Transition), Vec<u64>> = HashMap::new();
+
+ let enfa_state_accepted_input_map: HashMap<u64, HashSet<Transition>> = HashMap::new();
+
+ for (from, transition) in self.transition_table.keys() {
+ let mut transitions = enfa_state_accepted_input_map
+ .get(from)
+ .map_or(HashSet::new(), HashSet::to_owned);
+ transitions.insert(*transition);
+ }
+
+ for &state in self.states.iter() {
+ let eclose = self.eclose(state, None);
+ for (enfa_from, transition) in eclose
+ .into_iter()
+ .flat_map(|s| {
+ enfa_state_accepted_input_map
+ .get(&s)
+ .map_or(HashSet::new(), HashSet::to_owned)
+ .into_iter()
+ .map(move |t| (s, t))
+ })
+ .filter_map(|(s, t)| nfa::Transition::try_from(t).map(|t| (s, t)).ok())
+ {
+ let mut enfa_to = self
+ .transition_table
+ .get(&(enfa_from, Transition::from(transition)))
+ .map_or(vec![], Vec::to_owned);
+ let nfa_to = nfa_transition_table.get_mut(&(state, transition));
+ match nfa_to {
+ Some(v) => v.append(&mut enfa_to),
+ None => {
+ nfa_transition_table.insert((state, transition), enfa_to);
+ }
+ }
+ }
+ }
+
+ NFA::new(
+ self.alphabet.clone(),
+ self.states.clone(),
+ self.initial_state,
+ nfa_final_states,
+ nfa_transition_table,
+ )
+ }
}
impl Automaton for ENFA {
@@ -103,24 +204,32 @@ impl Automaton for ENFA {
for c in input.chars() {
if !self.alphabet.contains(&c)
- && !current_states
- .iter()
- .any(|s| self.loose_transitions.contains_key(s))
+ && !current_states.iter().any(|&s| {
+ self.transition_table
+ .contains_key(&(s, Transition::Wildcard))
+ })
{
ENFAParseError::print_warn(&format!(
- "Input character {} is not in the alphabet of the DFA",
+ "Input character {} is not in the alphabet of the ENFA",
c
));
}
for state in current_states.iter() {
for newstate in self
.transition_table
- .get(&(*state, Some(c)))
+ .get(&(*state, Transition::Char(c)))
+ // FIXME: This is ugly
.map_or(
- self.loose_transitions
- .get(state)
- .map_or(vec![], |&s| vec![s]),
- |v| v.to_owned(),
+ self.transition_table
+ .get(&(*state, Transition::Wildcard))
+ .map_or(vec![], Vec::to_owned),
+ |v| {
+ let mut transitions = v.to_owned();
+ self.transition_table
+ .get(&(*state, Transition::Wildcard))
+ .map(|s| transitions.append(&mut s.clone()));
+ transitions
+ },
)
.iter()
.flat_map(|s| self.eclose(*s, None))
@@ -139,43 +248,9 @@ impl Automaton for ENFA {
}
impl FiniteStateAutomaton for ENFA {
- // TODO: Check that this is all g
- // FIXED?: BUG: If a final state is within close(state), then state is also a final state.
- // TODO: Check that there are no other bugs
fn as_dfa(&self) -> Result<DFA, Box<dyn std::error::Error>> {
- let mut nfa_transition_table = HashMap::new();
- for (from, input) in self
- .transition_table
- .keys()
- .filter_map(|(f, i)| i.and_then(|c| Some((*f, c))))
- {
- let new_to: Vec<u64> = self
- .eclose(from, None)
- .iter()
- .filter_map(|v| self.transition_table.get(&(*v, Some(input))))
- .flatten()
- .map(u64::to_owned)
- .collect();
- nfa_transition_table.insert((from, input), new_to);
- }
- let final_states = self
- .states
- .iter()
- .filter(|v| {
- self.eclose(**v, None)
- .iter()
- .any(|s| self.final_states.contains(s))
- })
- .map(u64::to_owned)
- .collect();
- let nfa = NFA::new(
- self.alphabet.clone(),
- self.states.clone(),
- self.initial_state,
- final_states,
- nfa_transition_table,
- self.loose_transitions.clone(),
- );
+ let nfa = self.as_nfa();
+ eprintln!("HOLY FUCKERONI IS THE NFA RIGHT? {}", nfa);
nfa.as_dfa()
}
@@ -186,7 +261,7 @@ impl FiniteStateAutomaton for ENFA {
let transition_table = dfa
.transition_table
.iter()
- .map(|((from, input), to)| ((*from, Some(*input)), vec![*to]))
+ .map(|((from, input), to)| ((*from, Transition::from(*input)), vec![*to]))
.collect();
Ok(ENFA {
alphabet: dfa.alphabet,
@@ -194,7 +269,6 @@ impl FiniteStateAutomaton for ENFA {
initial_state: dfa.initial_state,
final_states: dfa.final_states,
transition_table,
- loose_transitions: dfa.loose_transitions,
})
}
}
@@ -270,11 +344,11 @@ impl Display for ENFA {
.unwrap_or(&vec![Vec::new(); self.alphabet.len() + 1])
.to_owned();
match input {
- Some(char) => {
+ Transition::Char(char) => {
let new_index = index_map.get(char).ok_or(core::fmt::Error)?;
new_vec[*new_index] = to.to_owned();
}
- None => {
+ _ => {
new_vec[self.alphabet.len()] = to.to_owned();
}
}
@@ -300,9 +374,9 @@ impl Display for ENFA {
});
// TODO: Check that this is correct
let wildcard_map: String = self
- .loose_transitions
- .get(input)
- .map_or("∅".to_owned(), |&dest| dest.to_string());
+ .transition_table
+ .get(&(*input, Transition::Wildcard))
+ .map_or("∅".to_owned(), |dest| format!("{:?}", dest));
format!("{}\n{}\t|\t{}\t|\t{}", acc, input, dests, wildcard_map)
});
@@ -364,7 +438,7 @@ impl Encodable<Value> for ENFA {
"Could not convert the final states to u64",
))?;
- let mut transition_table: HashMap<(u64, Option<char>), Vec<u64>> = HashMap::new();
+ let mut transition_table: HashMap<(u64, Transition), Vec<u64>> = HashMap::new();
for row in json["transition_table"]
.as_array()
@@ -423,10 +497,10 @@ impl Encodable<Value> for ENFA {
"Unable to parse the ε-closure destinations in the table",
))?;
for (input, dests) in dest_sets.iter() {
- transition_table.insert((from, Some(*input)), dests.to_owned());
+ transition_table.insert((from, Transition::Char(*input)), dests.to_owned());
}
for transition in epsilon_transitions.iter() {
- transition_table.insert((from, None), transition.to_owned());
+ transition_table.insert((from, Transition::Epsilon), transition.to_owned());
}
}
@@ -437,7 +511,6 @@ impl Encodable<Value> for ENFA {
initial_state,
final_states,
transition_table,
- loose_transitions: HashMap::new(),
})
}
}
diff --git a/src/lib/graph_enfa.rs b/src/lib/graph_enfa.rs
@@ -1,4 +1,8 @@
-use crate::{enfa::ENFA, Automaton, FiniteStateAutomaton};
+use crate::{
+ dfa,
+ enfa::{self, ENFA},
+ Automaton, FiniteStateAutomaton,
+};
use petgraph::{dot::Dot, graph::NodeIndex, visit::EdgeRef, Directed, Graph};
use std::{
collections::{HashMap, HashSet},
@@ -35,14 +39,49 @@ impl GraphENFAError {
impl Error for GraphENFAError {}
+#[derive(Hash, Debug, PartialEq, Eq, Copy, Clone)]
+pub enum Transition {
+ Char(char),
+ Wildcard,
+ Epsilon,
+}
+
+impl From<Transition> for enfa::Transition {
+ fn from(transition: Transition) -> Self {
+ match transition {
+ Transition::Char(c) => enfa::Transition::Char(c),
+ Transition::Wildcard => enfa::Transition::Wildcard,
+ Transition::Epsilon => enfa::Transition::Epsilon,
+ }
+ }
+}
+
+impl From<enfa::Transition> for Transition {
+ fn from(transition: enfa::Transition) -> Self {
+ match transition {
+ enfa::Transition::Char(c) => Transition::Char(c),
+ enfa::Transition::Wildcard => Transition::Wildcard,
+ enfa::Transition::Epsilon => Transition::Epsilon,
+ }
+ }
+}
+
+impl From<dfa::Transition> for Transition {
+ fn from(transition: dfa::Transition) -> Self {
+ match transition {
+ dfa::Transition::Char(c) => Transition::Char(c),
+ dfa::Transition::Wildcard => Transition::Wildcard,
+ }
+ }
+}
+
/// Intended as an easy way of constructing an ε-NFA in bits, adding onto it much like you'd build an ε-NFA on a whiteboard.
pub struct GraphENFA {
alphabet: HashSet<char>,
states: HashSet<u64>,
initial_state: u64,
final_states: HashSet<u64>,
- graph: Graph<u64, Option<char>, Directed>,
- loose_transitions: HashMap<u64, u64>,
+ graph: Graph<u64, Transition, Directed>,
state_id_to_index_map: HashMap<u64, NodeIndex>,
current_state_id: u64,
}
@@ -68,7 +107,6 @@ impl GraphENFA {
initial_state: initial_state_id,
final_states: HashSet::new(),
graph,
- loose_transitions: HashMap::new(),
state_id_to_index_map,
current_state_id: initial_state_id,
},
@@ -155,7 +193,7 @@ impl GraphENFA {
&mut self,
from_id: u64,
to_id: u64,
- input: Option<char>,
+ input: Transition,
) -> Result<(), GraphENFAError> {
if !self.states.contains(&from_id) || !self.states.contains(&to_id) {
return Err(GraphENFAError::error(
@@ -164,7 +202,7 @@ impl GraphENFA {
}
match input {
- Some(c) => {
+ Transition::Char(c) => {
if !self.alphabet.contains(&c) {
self.alphabet.insert(c);
}
@@ -193,29 +231,10 @@ impl GraphENFA {
Ok(())
}
- pub fn insert_loose_transition(
- &mut self,
- from_id: u64,
- to_id: u64,
- ) -> Result<(), GraphENFAError> {
- if !self.states.contains(&from_id) || !self.states.contains(&to_id) {
- return Err(GraphENFAError::error(
- "Tried adding a loose transition to/from a state that doesn't exist!",
- ));
- }
- if self.loose_transitions.contains_key(&from_id) {
- return Err(GraphENFAError::error(
- "There is already a loose transition from the specified from location!",
- ));
- }
- self.loose_transitions.insert(from_id, to_id);
- Ok(())
- }
-
/// Convert into a regular ε-NFA backed by a `HashMap` instead of a `Graph`
/// Whilst this can be used, the `as_dfa` method should be used if you're going to evaluate string acceptance.
pub fn as_enfa(&self) -> Result<ENFA, Box<dyn Error>> {
- let mut transition_table = HashMap::new();
+ let mut transition_table: HashMap<(u64, enfa::Transition), Vec<u64>> = HashMap::new();
let mut current_states = vec![self.initial_state];
let mut next_states = vec![];
let mut visited = HashSet::new();
@@ -231,7 +250,7 @@ impl GraphENFA {
.to_owned();
for edge in self.graph.edges(current_state_index) {
let mut transitions = transition_table
- .get(&(current_state, *edge.weight()))
+ .get(&(current_state, Transition::into(*edge.weight())))
.map_or(vec![], |v: &Vec<u64>| v.to_owned());
let dest = self
.graph
@@ -244,8 +263,12 @@ impl GraphENFA {
if !visited.contains(&dest) {
next_states.push(dest);
}
- transition_table.insert((current_state, *edge.weight()), transitions);
+ transition_table.insert(
+ (current_state, Transition::into(*edge.weight())),
+ transitions,
+ );
}
+
visited.insert(current_state);
}
current_states = next_states;
@@ -258,7 +281,6 @@ impl GraphENFA {
self.initial_state,
self.final_states.clone(),
transition_table,
- self.loose_transitions.clone(),
))
}
}
@@ -295,7 +317,7 @@ impl FiniteStateAutomaton for GraphENFA {
.get(&to)
.map_or_else(|| graph.add_node(to), |n| n.to_owned());
state_id_to_index_map.insert(to, to_index);
- graph.add_edge(from_index, to_index, Some(input));
+ graph.add_edge(from_index, to_index, Transition::from(input));
}
let current_state_id = *dfa
@@ -310,7 +332,6 @@ impl FiniteStateAutomaton for GraphENFA {
initial_state: dfa.initial_state,
final_states: dfa.final_states,
graph,
- loose_transitions: dfa.loose_transitions,
state_id_to_index_map,
current_state_id,
})
diff --git a/src/lib/nfa.rs b/src/lib/nfa.rs
@@ -1,7 +1,10 @@
-use crate::{dfa::DFA, Automaton, Encodable, FiniteStateAutomaton};
+use crate::{
+ dfa::{self, DFA},
+ Automaton, Encodable, FiniteStateAutomaton,
+};
use core::fmt::Display;
use std::{
- collections::{BTreeSet, HashMap, HashSet},
+ collections::{BTreeSet, HashMap, HashSet, VecDeque},
error::Error,
fmt,
panic::Location,
@@ -48,14 +51,28 @@ impl NFAParseError {
impl Error for NFAParseError {}
+#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy)]
+pub enum Transition {
+ Char(char),
+ Wildcard,
+}
+
+impl From<dfa::Transition> for Transition {
+ fn from(transition: dfa::Transition) -> Self {
+ match transition {
+ dfa::Transition::Char(c) => Transition::Char(c),
+ dfa::Transition::Wildcard => Transition::Wildcard,
+ }
+ }
+}
+
#[derive(Clone, Debug, PartialEq)]
pub struct NFA {
pub alphabet: HashSet<char>,
states: HashSet<u64>,
initial_state: u64,
final_states: HashSet<u64>,
- transition_table: HashMap<(u64, char), Vec<u64>>,
- loose_transitions: HashMap<u64, u64>,
+ transition_table: HashMap<(u64, Transition), Vec<u64>>,
}
impl NFA {
@@ -64,8 +81,7 @@ impl NFA {
states: HashSet<u64>,
initial_state: u64,
final_states: HashSet<u64>,
- transition_table: HashMap<(u64, char), Vec<u64>>,
- loose_transitions: HashMap<u64, u64>,
+ transition_table: HashMap<(u64, Transition), Vec<u64>>,
) -> Self {
NFA {
alphabet,
@@ -73,7 +89,6 @@ impl NFA {
initial_state,
final_states,
transition_table,
- loose_transitions,
}
}
}
@@ -84,9 +99,10 @@ impl Automaton for NFA {
let mut new_states: Vec<u64> = Vec::new();
for c in input.chars() {
if !self.alphabet.contains(&c)
- && !current_states
- .iter()
- .any(|s| self.loose_transitions.contains_key(s))
+ && !current_states.iter().any(|&s| {
+ self.transition_table
+ .contains_key(&(s, Transition::Wildcard))
+ })
{
NFAParseError::print_warn(&format!(
"Input character {} is not in the alphabet of the DFA",
@@ -95,9 +111,11 @@ impl Automaton for NFA {
return false;
}
for i in 0..current_states.len() {
- match current_states.get(i).map_or(None, |s| {
- self.transition_table.get(&(*s, c)).map_or(
- self.loose_transitions.get(s).and_then(|s| Some(vec![*s])),
+ match current_states.get(i).map_or(None, |&s| {
+ self.transition_table.get(&(s, Transition::Char(c))).map_or(
+ self.transition_table
+ .get(&(s, Transition::Wildcard))
+ .and_then(|s| Some(s.to_owned())),
|v| Some(v.to_owned()),
)
}) {
@@ -119,76 +137,87 @@ impl Automaton for NFA {
impl FiniteStateAutomaton for NFA {
fn as_dfa(&self) -> Result<DFA, Box<dyn Error>> {
- let mut current_newstate = self.initial_state;
- let mut newest_newstate = current_newstate;
- // Subset construction algo
- let mut state_forward_map: HashMap<BTreeSet<u64>, u64> = HashMap::new();
- let mut state_reverse_map: HashMap<u64, BTreeSet<u64>> = HashMap::new();
- let mut transition_table: HashMap<(u64, char), u64> = HashMap::new();
-
- state_reverse_map.insert(self.initial_state, BTreeSet::from([self.initial_state]));
- state_forward_map.insert(BTreeSet::from([self.initial_state]), self.initial_state);
-
- while current_newstate <= newest_newstate {
- for c in self.alphabet.iter() {
- let current_states = state_reverse_map
- .clone()
- .get(¤t_newstate)
- .ok_or(NFAParseError::error(
- "We should have already added this state into the reverse map!",
- ))?
- .to_owned();
- let to: BTreeSet<u64> = current_states
- .iter()
- .flat_map(|st| {
- self.transition_table
- .get(&(*st, *c))
- .unwrap_or(&vec![])
- .to_owned()
- .iter()
- .map(u64::to_owned)
- .collect::<Vec<u64>>()
- })
- .collect();
-
- match state_forward_map.get(&to) {
- Some(state) => {
- transition_table.insert((current_newstate, *c), *state);
- }
- None => {
- newest_newstate += 1;
- state_forward_map.insert(to.clone(), newest_newstate);
- state_reverse_map.insert(newest_newstate, to.clone());
- transition_table.insert((current_newstate, *c), newest_newstate);
- if to.len() == 0 {
- // Trap state!
- for c in self.alphabet.iter() {
- transition_table.insert((newest_newstate, *c), newest_newstate);
- }
- }
+ // TODO: Implement this function
+ let mut states = HashSet::new();
+ let initial_dfa_state = 0;
+ let mut newest_dfa_state = initial_dfa_state;
+ let mut final_states = HashSet::new();
+ let mut transition_table = HashMap::new();
+
+ let mut dfa_state_to_nfa_set: HashMap<u64, BTreeSet<u64>> = HashMap::new();
+ let mut nfa_set_to_dfa_state: HashMap<BTreeSet<u64>, u64> = HashMap::new();
+
+ let mut possible_inputs: Vec<dfa::Transition> = self
+ .alphabet
+ .iter()
+ .map(|&c| dfa::Transition::Char(c))
+ .collect();
+ possible_inputs.push(dfa::Transition::Wildcard);
+
+ for &input in possible_inputs.iter() {
+ let mut nfa_sets_to_be_processed: VecDeque<BTreeSet<u64>> = VecDeque::new();
+ let mut processing_nfa_states = BTreeSet::from([self.initial_state]);
+ let mut nfa_sets_processed: BTreeSet<BTreeSet<u64>> = BTreeSet::new();
+ while processing_nfa_states.len() != 0 {
+ if !nfa_sets_processed.contains(&processing_nfa_states) {
+ for nfa_dests in processing_nfa_states
+ .iter()
+ .map(|&nfa_state| {
+ self.transition_table
+ .get(&(nfa_state, Transition::from(input)))
+ .map_or(vec![], Vec::to_owned)
+ })
+ //This conversion to BTreeSet is shit
+ .map(Vec::into_iter)
+ .map(BTreeSet::from_iter)
+ {
+ let dfa_from = nfa_set_to_dfa_state
+ .get(&processing_nfa_states)
+ .ok_or(NFAParseError::error(
+ "Unable to get something that should have already been processed!",
+ ))?
+ .to_owned();
+ let dfa_to = nfa_set_to_dfa_state.clone().get(&nfa_dests).map_or_else(
+ || {
+ // New subset!
+ newest_dfa_state += 1;
+ states.insert(newest_dfa_state);
+ dfa_state_to_nfa_set.insert(newest_dfa_state, nfa_dests.clone());
+ nfa_set_to_dfa_state.insert(nfa_dests.clone(), newest_dfa_state);
+
+ if nfa_dests.iter().any(|s| self.final_states.contains(s)) {
+ // This is a final state!
+ final_states.insert(newest_dfa_state);
+ } else if nfa_dests.len() == 0 {
+ // This is the trap state!
+ for &trans in possible_inputs.iter() {
+ transition_table
+ .insert((newest_dfa_state, trans), newest_dfa_state);
+ }
+ }
+ newest_dfa_state
+ },
+ u64::to_owned,
+ );
+
+ nfa_sets_to_be_processed.push_back(nfa_dests);
+
+ transition_table.insert((dfa_from, input), dfa_to);
}
}
+ nfa_sets_processed.insert(processing_nfa_states);
+ processing_nfa_states = nfa_sets_to_be_processed
+ .pop_front()
+ .unwrap_or(BTreeSet::new());
}
- current_newstate += 1;
}
- let states = (self.initial_state..newest_newstate).collect();
- let final_states: HashSet<u64> = state_reverse_map
- .iter()
- .filter(|(_, old_states)| {
- old_states
- .iter()
- .any(|old_state| self.final_states.contains(old_state))
- })
- .map(|(new_state, _)| new_state.to_owned())
- .collect();
Ok(DFA {
alphabet: self.alphabet.clone(),
states,
- initial_state: self.initial_state,
+ initial_state: initial_dfa_state,
final_states,
transition_table,
- loose_transitions: self.loose_transitions.clone(),
})
}
@@ -199,7 +228,7 @@ impl FiniteStateAutomaton for NFA {
let transition_table = dfa
.transition_table
.iter()
- .map(|(from_input, to)| (*from_input, vec![*to]))
+ .map(|(&(from, input), &to)| ((from, Transition::from(input)), vec![to]))
.collect();
Ok(NFA {
alphabet: dfa.alphabet,
@@ -207,7 +236,6 @@ impl FiniteStateAutomaton for NFA {
initial_state: dfa.initial_state,
final_states: dfa.final_states,
transition_table,
- loose_transitions: dfa.loose_transitions,
})
}
}
@@ -270,10 +298,10 @@ impl Display for NFA {
let mut transition_map: HashMap<u64, Vec<Vec<u64>>> = HashMap::new();
- let index_map: HashMap<char, usize> = alphabet_vec
+ let index_map: HashMap<Transition, usize> = alphabet_vec
.iter()
.enumerate()
- .map(|(index, char)| (char.to_owned(), index))
+ .map(|(index, char)| (Transition::Char(char.to_owned()), index))
.collect();
for ((from, input), to) in self.transition_table.iter() {
@@ -304,9 +332,9 @@ impl Display for NFA {
format!("{acc}{}\t|\t", dest_sets_str)
});
let wildcard = self
- .loose_transitions
- .get(input)
- .map_or("∅".to_owned(), |&dest| dest.to_string());
+ .transition_table
+ .get(&(*input, Transition::Wildcard))
+ .map_or("∅".to_owned(), |dest| format!("{:?}", dest));
format!("{}\n{}\t|\t{}\t|\t{}", st, input, dests, wildcard)
});
@@ -366,7 +394,7 @@ impl Encodable<Value> for NFA {
"Could not convert the final states to u64",
))?;
- let mut transition_table: HashMap<(u64, char), Vec<u64>> = HashMap::new();
+ let mut transition_table: HashMap<(u64, Transition), Vec<u64>> = HashMap::new();
for row in json["transition_table"]
.as_array()
.ok_or(NFAParseError::error(
@@ -407,7 +435,7 @@ impl Encodable<Value> for NFA {
"Unable to parse the transition destinations in the table",
))?;
for (input, dests) in dest_sets.iter() {
- transition_table.insert((from, *input), dests.to_owned());
+ transition_table.insert((from, Transition::Char(*input)), dests.to_owned());
}
}
@@ -417,7 +445,6 @@ impl Encodable<Value> for NFA {
initial_state,
final_states,
transition_table,
- loose_transitions: HashMap::new(),
})
}
}
diff --git a/src/lib/regex.rs b/src/lib/regex.rs
@@ -1,6 +1,6 @@
use crate::{
dfa::DFA,
- graph_enfa::{GraphENFA, GraphENFAError},
+ graph_enfa::{GraphENFA, GraphENFAError, Transition},
Automaton, Encodable, FiniteStateAutomaton,
};
use either::Either;
@@ -249,14 +249,22 @@ impl Regex {
}
if groups.len() != 1 {
- Err(RegexError::ParseError(
+ return Err(RegexError::ParseError(
"Ended the grammar tree within a group!".to_owned(),
- ))
- } else {
- Ok(RegexNonTerminal::Group(groups.pop().ok_or(
- RegexError::ParseError("Ended the grammar tree without the main group!".to_owned()),
- )?))
+ ));
}
+ if in_a_union {
+ // Catch the outermost union if it exists
+ let last_elem = groups.pop().ok_or(RegexError::ParseError(
+ "Ended the grammar tree without the main group!".to_owned(),
+ ))?;
+ or_group.push(RegexNonTerminal::Group(last_elem));
+ return Ok(RegexNonTerminal::Union(or_group));
+ }
+
+ Ok(RegexNonTerminal::Group(groups.pop().ok_or(
+ RegexError::ParseError("Ended the grammar tree without the main group!".to_owned()),
+ )?))
}
}
@@ -279,8 +287,29 @@ impl Encodable<Vec<Either<char, RegexToken>>> for Regex {
}
}
+struct Stack<T> {
+ stack: Vec<T>,
+}
+
+impl<T> Stack<T> {
+ fn new() -> Self {
+ Stack { stack: vec![] }
+ }
+ fn peek_mut(&mut self) -> Option<&mut T> {
+ self.stack.last_mut()
+ }
+ fn peek(&self) -> Option<&T> {
+ self.stack.last()
+ }
+ fn push(&mut self, item: T) {
+ self.stack.push(item)
+ }
+ fn pop(&mut self) -> Option<T> {
+ self.stack.pop()
+ }
+}
+
impl GraphENFA {
- // TODO: Build up an ENFA using the parse tree
fn add_non_terminal(
&mut self,
nonterminal: RegexNonTerminal,
@@ -299,9 +328,13 @@ impl GraphENFA {
let new_state = self.new_state()?;
match terminal {
- RegexTerminal::Character(c) => self.insert_transition(from, new_state, Some(c)),
- RegexTerminal::Wildcard => self.insert_loose_transition(from, new_state),
- RegexTerminal::Epsilon => self.insert_transition(from, new_state, None),
+ RegexTerminal::Character(c) => {
+ self.insert_transition(from, new_state, Transition::Char(c))
+ }
+ RegexTerminal::Wildcard => {
+ self.insert_transition(from, new_state, Transition::Wildcard)
+ }
+ RegexTerminal::Epsilon => self.insert_transition(from, new_state, Transition::Epsilon),
}?;
Ok(new_state)
@@ -328,15 +361,9 @@ impl GraphENFA {
kleene_group: Vec<RegexNonTerminal>,
from: u64,
) -> Result<u64, GraphENFAError> {
- // FIXME: KLEENE GROUPS ARE BORKED!
- let start_state = self.add_terminal(RegexTerminal::Epsilon, from)?;
- let end_state = self.new_state()?;
- self.insert_transition(start_state, end_state, None)?;
- self.insert_transition(end_state, start_state, None)?;
-
- let last_state_in_group = self.add_group(kleene_group, start_state)?;
-
- self.insert_transition(last_state_in_group, end_state, None)?;
+ let end_state = self.add_group(kleene_group, from)?;
+ self.insert_transition(from, end_state, Transition::Epsilon)?;
+ self.insert_transition(end_state, from, Transition::Epsilon)?;
Ok(end_state)
}
@@ -347,13 +374,11 @@ impl GraphENFA {
union: Vec<RegexNonTerminal>,
from: u64,
) -> Result<u64, GraphENFAError> {
- // FIXME: Unions are buggy, they are looking for the end of scope ')', but won't work as just something like a|b
- let start_state = self.add_terminal(RegexTerminal::Epsilon, from)?;
let end_state = self.new_state()?;
for nonterminal in union {
- let entry_state = self.add_terminal(RegexTerminal::Epsilon, start_state)?;
+ let entry_state = self.add_terminal(RegexTerminal::Epsilon, from)?;
let exit_state = self.add_non_terminal(nonterminal, entry_state)?;
- self.insert_transition(exit_state, end_state, None)?;
+ self.insert_transition(exit_state, end_state, Transition::Epsilon)?;
}
Ok(end_state)
}
@@ -365,8 +390,16 @@ impl Encodable<RegexNonTerminal> for Regex {
let final_state = enfa.add_non_terminal(parse_tree.clone(), current_state)?;
enfa.set_state_acceptance(final_state, true)?;
+ eprintln!("DEBUG: {}", enfa);
+
+ let test_enfa = enfa.as_enfa()?;
+
+ eprintln!("DEBUG2: {}", test_enfa);
+
let dfa = enfa.as_dfa()?;
+ eprintln!("DEBUG3: {}", dfa);
+
Ok(Regex {
automaton: dfa,
parsed: parse_tree,