001package algs53; 002import stdlib.*; 003 004/* ************************************************************* 005 * 006 * Compilation: javac KMP.java 007 * Execution: java KMP pattern text 008 * 009 * Reads in two strings, the pattern and the input text, and 010 * searches for the pattern in the input text using the 011 * KMP algorithm. 012 * 013 * % java KMP abracadabra abacadabrabracabracadabrabrabracad 014 * text: abacadabrabracabracadabrabrabracad 015 * pattern: abracadabra 016 * 017 * % java KMP rab abacadabrabracabracadabrabrabracad 018 * text: abacadabrabracabracadabrabrabracad 019 * pattern: rab 020 * 021 * % java KMP bcara abacadabrabracabracadabrabrabracad 022 * text: abacadabrabracabracadabrabrabracad 023 * pattern: bcara 024 * 025 * % java KMP rabrabracad abacadabrabracabracadabrabrabracad 026 * text: abacadabrabracabracadabrabrabracad 027 * pattern: rabrabracad 028 * 029 * % java KMP abacad abacadabrabracabracadabrabrabracad 030 * text: abacadabrabracabracadabrabrabracad 031 * pattern: abacad 032 * 033 ***************************************************************/ 034 035public class KMP { 036 private final int R; // the radix 037 private int[][] dfa; // the KMP automoton 038 039 private char[] pattern; // either the character array for the pattern 040 private String pat; // or the pattern string 041 042 // create the DFA from a String 043 public KMP(String pat) { 044 this.R = 256; 045 this.pat = pat; 046 047 // build DFA from pattern 048 int M = pat.length(); 049 dfa = new int[R][M]; 050 dfa[pat.charAt(0)][0] = 1; 051 for (int X = 0, j = 1; j < M; j++) { 052 for (int c = 0; c < R; c++) 053 dfa[c][j] = dfa[c][X]; // Copy mismatch cases. 054 dfa[pat.charAt(j)][j] = j+1; // Set match case. 055 X = dfa[pat.charAt(j)][X]; // Update restart state. 056 } 057 } 058 059 // create the DFA from a character array over R-character alphabet 060 public KMP(char[] pattern, int R) { 061 this.R = R; 062 this.pattern = new char[pattern.length]; 063 for (int j = 0; j < pattern.length; j++) 064 this.pattern[j] = pattern[j]; 065 066 // build DFA from pattern 067 int M = pattern.length; 068 dfa = new int[R][M]; 069 dfa[pattern[0]][0] = 1; 070 for (int X = 0, j = 1; j < M; j++) { 071 for (int c = 0; c < R; c++) 072 dfa[c][j] = dfa[c][X]; // Copy mismatch cases. 073 dfa[pattern[j]][j] = j+1; // Set match case. 074 X = dfa[pattern[j]][X]; // Update restart state. 075 } 076 } 077 078 // return offset of first match; N if no match 079 public int search(String txt) { 080 081 // simulate operation of DFA on text 082 int M = pat.length(); 083 int N = txt.length(); 084 int i, j; 085 for (i = 0, j = 0; i < N && j < M; i++) { 086 j = dfa[txt.charAt(i)][j]; 087 } 088 if (j == M) return i - M; // found 089 return N; // not found 090 } 091 092 093 // return offset of first match; N if no match 094 public int search(char[] text) { 095 096 // simulate operation of DFA on text 097 int M = pattern.length; 098 int N = text.length; 099 int i, j; 100 for (i = 0, j = 0; i < N && j < M; i++) { 101 j = dfa[text[i]][j]; 102 } 103 if (j == M) return i - M; // found 104 return N; // not found 105 } 106 107 108 // test client 109 public static void main(String[] args) { 110 args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" }; 111 //args = new String[] { "rab", "abacadabrabracabracadabrabrabracad" }; 112 //args = new String[] { "bcara", "abacadabrabracabracadabrabrabracad" }; 113 //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" }; 114 //args = new String[] { "abacad", "abacadabrabracabracadabrabrabracad" }; 115 String pat1 = args[0]; 116 String txt1 = args[1]; 117 char[] pat2 = pat1.toCharArray(); 118 char[] txt2 = txt1.toCharArray(); 119 120 KMP kmp1 = new KMP(pat1); 121 int offset1 = kmp1.search(txt1); 122 123 KMP kmp2 = new KMP(pat2, 256); 124 int offset2 = kmp2.search(txt2); 125 126 // print results 127 StdOut.println("text: " + txt1); 128 129 StdOut.print("pattern: "); 130 for (int i = 0; i < offset1; i++) 131 StdOut.print(" "); 132 StdOut.println(pat1); 133 134 StdOut.print("pattern: "); 135 for (int i = 0; i < offset2; i++) 136 StdOut.print(" "); 137 StdOut.println(pat2); 138 } 139}