001package algs53; // section 5.3 002import stdlib.*; 003import java.math.BigInteger; 004import java.util.Random; 005/* ************************************************************* 006 * Compilation: javac RabinKarp.java 007 * Execution: java RabinKarp pat txt 008 * 009 * Reads in two strings, the pattern and the input text, and 010 * searches for the pattern in the input text using the 011 * Las Vegas version of the Rabin-Karp algorithm. 012 * 013 * % java RabinKarp abracadabra abacadabrabracabracadabrabrabracad 014 * pattern: abracadabra 015 * text: abacadabrabracabracadabrabrabracad 016 * match: abracadabra 017 * 018 * % java RabinKarp rab abacadabrabracabracadabrabrabracad 019 * pattern: rab 020 * text: abacadabrabracabracadabrabrabracad 021 * match: rab 022 * 023 * % java RabinKarp bcara abacadabrabracabracadabrabrabracad 024 * pattern: bcara 025 * text: abacadabrabracabracadabrabrabracad 026 * 027 * % java RabinKarp rabrabracad abacadabrabracabracadabrabrabracad 028 * text: abacadabrabracabracadabrabrabracad 029 * pattern: rabrabracad 030 * 031 * % java RabinKarp abacad abacadabrabracabracadabrabrabracad 032 * text: abacadabrabracabracadabrabrabracad 033 * pattern: abacad 034 * 035 ***************************************************************/ 036 037public class RabinKarp { 038 private String pat; // the pattern // needed only for Las Vegas 039 private long patHash; // pattern hash value 040 private int M; // pattern length 041 private long Q; // a large prime, small enough to avoid long overflow 042 private int R; // radix 043 private long RM; // R^(M-1) % Q 044 045 public RabinKarp(int R, char[] pattern) { 046 throw new Error("Operation not supported yet"); 047 } 048 049 public RabinKarp(String pat) { 050 this.pat = pat; // save pattern (needed only for Las Vegas) 051 R = 256; 052 M = pat.length(); 053 Q = longRandomPrime(); 054 055 // precompute R^(M-1) % Q for use in removing leading digit 056 RM = 1; 057 for (int i = 1; i <= M-1; i++) 058 RM = (R * RM) % Q; 059 patHash = hash(pat, M); 060 } 061 062 // Compute hash for key[0..M-1]. 063 private long hash(String key, int M) { 064 long h = 0; 065 for (int j = 0; j < M; j++) 066 h = (R * h + key.charAt(j)) % Q; 067 return h; 068 } 069 070 // Las Vegas version: does pat[] match txt[i..i-M+1] ? 071 private boolean check(String txt, int i) { 072 for (int j = 0; j < M; j++) 073 if (pat.charAt(j) != txt.charAt(i + j)) 074 return false; 075 return true; 076 } 077 078 // Monte Carlo version: always return true 079 private boolean check(int i) { 080 return true; 081 } 082 083 // check for exact match 084 public int search(String txt) { 085 int N = txt.length(); 086 if (N < M) return N; 087 long txtHash = hash(txt, M); 088 089 // check for match at offset 0 090 if ((patHash == txtHash) && check(txt, 0)) 091 return 0; 092 093 // check for hash match; if hash match, check for exact match 094 for (int i = M; i < N; i++) { 095 // Remove leading digit, add trailing digit, check for match. 096 txtHash = (txtHash + Q - RM*txt.charAt(i-M) % Q) % Q; 097 txtHash = (txtHash*R + txt.charAt(i)) % Q; 098 099 // match 100 int offset = i - M + 1; 101 if ((patHash == txtHash) && check(txt, offset)) 102 return offset; 103 } 104 105 // no match 106 return N; 107 } 108 109 110 // a random 31-bit prime 111 private static long longRandomPrime() { 112 BigInteger prime = new BigInteger(31, new Random()); 113 return prime.longValue(); 114 } 115 116 // test client 117 public static void main(String[] args) { 118 //args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" }; 119 //args = new String[] { "rab", "abacadabrabracabracadabrabrabracad" }; 120 //args = new String[] { "bcara", "abacadabrabracabracadabrabrabracad" }; 121 //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" }; 122 args = new String[] { "abacad", "abacadabrabracabracadabrabrabracad" }; 123 String pat = args[0]; 124 String txt = args[1]; 125 126 RabinKarp searcher = new RabinKarp(pat); 127 int offset = searcher.search(txt); 128 129 // print results 130 StdOut.println("text: " + txt); 131 132 // from brute force search method 1 133 StdOut.print("pattern: "); 134 for (int i = 0; i < offset; i++) 135 StdOut.print(" "); 136 StdOut.println(pat); 137 } 138}