001package algs53;
002import  stdlib.*;
003
004/* *************************************************************
005 *
006 *  Compilation:  javac KMP.java
007 *  Execution:    java KMP pattern text
008 *
009 *  Reads in two strings, the pattern and the input text, and
010 *  searches for the pattern in the input text using the
011 *  KMP algorithm.
012 *
013 *  % java KMP abracadabra abacadabrabracabracadabrabrabracad
014 *  text:    abacadabrabracabracadabrabrabracad
015 *  pattern:               abracadabra
016 *
017 *  % java KMP rab abacadabrabracabracadabrabrabracad
018 *  text:    abacadabrabracabracadabrabrabracad
019 *  pattern:         rab
020 *
021 *  % java KMP bcara abacadabrabracabracadabrabrabracad
022 *  text:    abacadabrabracabracadabrabrabracad
023 *  pattern:                                   bcara
024 *
025 *  % java KMP rabrabracad abacadabrabracabracadabrabrabracad
026 *  text:    abacadabrabracabracadabrabrabracad
027 *  pattern:                        rabrabracad
028 *
029 *  % java KMP abacad abacadabrabracabracadabrabrabracad
030 *  text:    abacadabrabracabracadabrabrabracad
031 *  pattern: abacad
032 *
033 ***************************************************************/
034
035public class KMP {
036        private final int R;       // the radix
037        private int[][] dfa;       // the KMP automoton
038
039        private char[] pattern;    // either the character array for the pattern
040        private String pat;        // or the pattern string
041
042        // create the DFA from a String
043        public KMP(String pat) {
044                this.R = 256;
045                this.pat = pat;
046
047                // build DFA from pattern
048                int M = pat.length();
049                dfa = new int[R][M];
050                dfa[pat.charAt(0)][0] = 1;
051                for (int X = 0, j = 1; j < M; j++) {
052                        for (int c = 0; c < R; c++)
053                                dfa[c][j] = dfa[c][X];     // Copy mismatch cases.
054                        dfa[pat.charAt(j)][j] = j+1;   // Set match case.
055                        X = dfa[pat.charAt(j)][X];     // Update restart state.
056                }
057        }
058
059        // create the DFA from a character array over R-character alphabet
060        public KMP(char[] pattern, int R) {
061                this.R = R;
062                this.pattern = new char[pattern.length];
063                for (int j = 0; j < pattern.length; j++)
064                        this.pattern[j] = pattern[j];
065
066                // build DFA from pattern
067                int M = pattern.length;
068                dfa = new int[R][M];
069                dfa[pattern[0]][0] = 1;
070                for (int X = 0, j = 1; j < M; j++) {
071                        for (int c = 0; c < R; c++)
072                                dfa[c][j] = dfa[c][X];     // Copy mismatch cases.
073                        dfa[pattern[j]][j] = j+1;      // Set match case.
074                        X = dfa[pattern[j]][X];        // Update restart state.
075                }
076        }
077
078        // return offset of first match; N if no match
079        public int search(String txt) {
080
081                // simulate operation of DFA on text
082                int M = pat.length();
083                int N = txt.length();
084                int i, j;
085                for (i = 0, j = 0; i < N && j < M; i++) {
086                        j = dfa[txt.charAt(i)][j];
087                }
088                if (j == M) return i - M;    // found
089                return N;                    // not found
090        }
091
092
093        // return offset of first match; N if no match
094        public int search(char[] text) {
095
096                // simulate operation of DFA on text
097                int M = pattern.length;
098                int N = text.length;
099                int i, j;
100                for (i = 0, j = 0; i < N && j < M; i++) {
101                        j = dfa[text[i]][j];
102                }
103                if (j == M) return i - M;    // found
104                return N;                    // not found
105        }
106
107
108        // test client
109        public static void main(String[] args) {
110                args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" };
111                //args = new String[] { "rab",         "abacadabrabracabracadabrabrabracad" };
112                //args = new String[] { "bcara",       "abacadabrabracabracadabrabrabracad" };
113                //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" };
114                //args = new String[] { "abacad",      "abacadabrabracabracadabrabrabracad" };
115                String pat1 = args[0];
116                String txt1 = args[1];
117                char[] pat2 = pat1.toCharArray();
118                char[] txt2 = txt1.toCharArray();
119
120                KMP kmp1 = new KMP(pat1);
121                int offset1 = kmp1.search(txt1);
122
123                KMP kmp2 = new KMP(pat2, 256);
124                int offset2 = kmp2.search(txt2);
125
126                // print results
127                StdOut.println("text:    " + txt1);
128
129                StdOut.print("pattern: ");
130                for (int i = 0; i < offset1; i++)
131                        StdOut.print(" ");
132                StdOut.println(pat1);
133
134                StdOut.print("pattern: ");
135                for (int i = 0; i < offset2; i++)
136                        StdOut.print(" ");
137                StdOut.println(pat2);
138        }
139}