001package algs54; // section 5.4
002import stdlib.*;
003import algs13.Bag;
004import algs13.Stack;
005import algs42.Digraph;
006import algs42.DirectedDFS;
007/* ***********************************************************************
008 *  Compilation:  javac NFA.java
009 *  Execution:    java NFA regexp text
010 *  Dependencies: Stack.java Bag.java Digraph.java DirectedDFS.java
011 *
012 *  % java NFA "(A*B|AC)D" AAAABD
013 *  true
014 *
015 *  % java NFA "(A*B|AC)D" AAAAC
016 *  false
017 *
018 *  % java NFA "(a|(bc)*d)*" abcbcd
019 *  true
020 *
021 *  % java NFA "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd
022 *  true
023 *
024 *  Remarks
025 *  -----------
026 *    - This version does not currently suport multiway or, wildcard,
027 *      or the + operator.
028 *
029 *************************************************************************/
030
031public class NFA {
032
033        private final Digraph G;         // digraph of epsilon transitions
034        private final String regexp;     // regular expression
035        private final int M;             // number of characters in regular expression
036
037        // Create the NFA for the given RE
038        public NFA(String regexp) {
039                this.regexp = regexp;
040                M = regexp.length();
041                Stack<Integer> ops = new Stack<>();
042                G = new Digraph(M+1);
043                for (int i = 0; i < M; i++) {
044                        int lp = i;
045                        if (regexp.charAt(i) == '(' || regexp.charAt(i) == '|')
046                                ops.push(i);
047                        else if (regexp.charAt(i) == ')') {
048                                int or = ops.pop();
049                                if (regexp.charAt(or) == '|') {
050                                        lp = ops.pop();
051                                        G.addEdge(lp, or+1);
052                                        G.addEdge(or, i);
053                                }
054                                else if (regexp.charAt(or) == '(')
055                                        lp = or;
056                                else assert false;
057                        }
058
059                        // Lookahead;
060                        if (i < M-1 && regexp.charAt(i+1) == '*') {
061                                G.addEdge(lp, i+1);
062                                G.addEdge(i+1, lp);
063                        }
064                        if (regexp.charAt(i) == '(' || regexp.charAt(i) == '*' || regexp.charAt(i) == ')')
065                                G.addEdge(i, i+1);
066                }
067        }
068
069        // Does the NFA recognize txt?
070        public boolean recognizes(String txt) {
071                DirectedDFS dfs = new DirectedDFS(G, 0);
072                Bag<Integer> pc = new Bag<>();
073                for (int v = 0; v < G.V(); v++)
074                        if (dfs.marked(v)) pc.add(v);
075
076                // Compute possible NFA states for txt[i+1].
077                for (int i = 0; i < txt.length(); i++) {
078                        Bag<Integer> match = new Bag<>();
079                        for (int v : pc) {
080                                if (v == M) continue;
081                                if ((regexp.charAt(v) == txt.charAt(i)) || regexp.charAt(v) == '.')
082                                        match.add(v+1);
083                        }
084                        dfs = new DirectedDFS(G, match);
085                        pc = new Bag<>();
086                        for (int v = 0; v < G.V(); v++)
087                                if (dfs.marked(v)) pc.add(v);
088
089                        // optimization if no states reachable
090                        if (pc.size() == 0) return false;
091                }
092
093                // check for accept state
094                for (int v : pc)
095                        if (v == M) return true;
096                return false;
097        }
098
099
100        public static void main(String[] args) {
101                args = new String[] { "(a|(bc)*d)*", "abcbcbcdaaaabcbcdaaaddd" };
102
103                String regexp = "(" + args[0] + ")";
104                String txt = args[1];
105                NFA nfa = new NFA(regexp);
106                StdOut.println(nfa.recognizes(txt));
107        }
108
109}