001package algs54; // section 5.4 002import stdlib.*; 003import algs13.Bag; 004import algs13.Stack; 005import algs42.Digraph; 006import algs42.DirectedDFS; 007/* *********************************************************************** 008 * Compilation: javac NFA.java 009 * Execution: java NFA regexp text 010 * Dependencies: Stack.java Bag.java Digraph.java DirectedDFS.java 011 * 012 * % java NFA "(A*B|AC)D" AAAABD 013 * true 014 * 015 * % java NFA "(A*B|AC)D" AAAAC 016 * false 017 * 018 * % java NFA "(a|(bc)*d)*" abcbcd 019 * true 020 * 021 * % java NFA "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd 022 * true 023 * 024 * Remarks 025 * ----------- 026 * - This version does not currently suport multiway or, wildcard, 027 * or the + operator. 028 * 029 *************************************************************************/ 030 031public class NFA { 032 033 private final Digraph G; // digraph of epsilon transitions 034 private final String regexp; // regular expression 035 private final int M; // number of characters in regular expression 036 037 // Create the NFA for the given RE 038 public NFA(String regexp) { 039 this.regexp = regexp; 040 M = regexp.length(); 041 Stack<Integer> ops = new Stack<>(); 042 G = new Digraph(M+1); 043 for (int i = 0; i < M; i++) { 044 int lp = i; 045 if (regexp.charAt(i) == '(' || regexp.charAt(i) == '|') 046 ops.push(i); 047 else if (regexp.charAt(i) == ')') { 048 int or = ops.pop(); 049 if (regexp.charAt(or) == '|') { 050 lp = ops.pop(); 051 G.addEdge(lp, or+1); 052 G.addEdge(or, i); 053 } 054 else if (regexp.charAt(or) == '(') 055 lp = or; 056 else assert false; 057 } 058 059 // Lookahead; 060 if (i < M-1 && regexp.charAt(i+1) == '*') { 061 G.addEdge(lp, i+1); 062 G.addEdge(i+1, lp); 063 } 064 if (regexp.charAt(i) == '(' || regexp.charAt(i) == '*' || regexp.charAt(i) == ')') 065 G.addEdge(i, i+1); 066 } 067 } 068 069 // Does the NFA recognize txt? 070 public boolean recognizes(String txt) { 071 DirectedDFS dfs = new DirectedDFS(G, 0); 072 Bag<Integer> pc = new Bag<>(); 073 for (int v = 0; v < G.V(); v++) 074 if (dfs.marked(v)) pc.add(v); 075 076 // Compute possible NFA states for txt[i+1]. 077 for (int i = 0; i < txt.length(); i++) { 078 Bag<Integer> match = new Bag<>(); 079 for (int v : pc) { 080 if (v == M) continue; 081 if ((regexp.charAt(v) == txt.charAt(i)) || regexp.charAt(v) == '.') 082 match.add(v+1); 083 } 084 dfs = new DirectedDFS(G, match); 085 pc = new Bag<>(); 086 for (int v = 0; v < G.V(); v++) 087 if (dfs.marked(v)) pc.add(v); 088 089 // optimization if no states reachable 090 if (pc.size() == 0) return false; 091 } 092 093 // check for accept state 094 for (int v : pc) 095 if (v == M) return true; 096 return false; 097 } 098 099 100 public static void main(String[] args) { 101 args = new String[] { "(a|(bc)*d)*", "abcbcbcdaaaabcbcdaaaddd" }; 102 103 String regexp = "(" + args[0] + ")"; 104 String txt = args[1]; 105 NFA nfa = new NFA(regexp); 106 StdOut.println(nfa.recognizes(txt)); 107 } 108 109}