001package algs53; // section 5.3
002import stdlib.*;
003/* *************************************************************
004 *  Compilation:  java KMPplus.java
005 *  Execution:    java KMPplus pattern text
006 *
007 *  Knuth-Morris-Pratt algorithm over UNICODE alphabet.
008 *
009 *  % java KMPplus ABABAC BCBAABACAABABACAA
010 *  text:    BCBAABACAABABACAA
011 *  pattern:          ABABAC
012 *
013 *  % java KMPplus aabaaaba ccaabaabaabaaabaab
014 *  text:    ccaabaabaabaaabaab
015 *  pattern:         aabaaaba
016 *
017 *  % java KMPplus aabaaabb ccaabaabaabaaabaab
018 *  text:    ccaabaabaabaaabaab
019 *  pattern:                   aabaaabb
020 *
021 ***************************************************************/
022
023public class XKMPplus {
024        private final String pattern;
025        private final int[] next;
026
027        // create Knuth-Morris-Pratt NFA from pattern
028        public XKMPplus(String pattern) {
029                this.pattern = pattern;
030                int M = pattern.length();
031                next = new int[M];
032                int j = -1;
033                for (int i = 0; i < M; i++) {
034                        if (i == 0)                                      next[i] = -1;
035                        else if (pattern.charAt(i) != pattern.charAt(j)) next[i] = j;
036                        else                                             next[i] = next[j];
037                        while (j >= 0 && pattern.charAt(i) != pattern.charAt(j)) {
038                                j = next[j];
039                        }
040                        j++;
041                }
042
043                for (int i = 0; i < M; i++)
044                        StdOut.println("next[" + i + "] = " + next[i]);
045        }
046
047        // return offset of first occurrence of text in pattern (or N if no match)
048        // simulate the NFA to find match
049        public int search(String text) {
050                int M = pattern.length();
051                int N = text.length();
052                int i, j;
053                for (i = 0, j = 0; i < N && j < M; i++) {
054                        while (j >= 0 && text.charAt(i) != pattern.charAt(j))
055                                j = next[j];
056                        j++;
057                }
058                if (j == M) return i - M;
059                return N;
060        }
061
062
063        // test client
064        public static void main(String[] args) {
065                //args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" };
066                //args = new String[] { "rab",         "abacadabrabracabracadabrabrabracad" };
067                //args = new String[] { "bcara",       "abacadabrabracabracadabrabrabracad" };
068                //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" };
069                args = new String[] { "abacad",      "abacadabrabracabracadabrabrabracad" };
070                String pat = args[0];
071                String txt = args[1];
072
073                // substring search
074                XKMPplus kmp = new XKMPplus(pat);
075                int offset = kmp.search(txt);
076
077                // print results
078                StdOut.println("text:    " + txt);
079                StdOut.print("pattern: ");
080                for (int i = 0; i < offset; i++)
081                        StdOut.print(" ");
082                StdOut.println(pat);
083        }
084
085}