001package algs53; // section 5.3
002import stdlib.*;
003import java.math.BigInteger;
004import java.util.Random;
005/* *************************************************************
006 *  Compilation:  javac RabinKarp.java
007 *  Execution:    java RabinKarp pat txt
008 *
009 *  Reads in two strings, the pattern and the input text, and
010 *  searches for the pattern in the input text using the
011 *  Las Vegas version of the Rabin-Karp algorithm.
012 *
013 *  % java RabinKarp abracadabra abacadabrabracabracadabrabrabracad
014 *  pattern: abracadabra
015 *  text:    abacadabrabracabracadabrabrabracad
016 *  match:                 abracadabra
017 *
018 *  % java RabinKarp rab abacadabrabracabracadabrabrabracad
019 *  pattern: rab
020 *  text:    abacadabrabracabracadabrabrabracad
021 *  match:           rab
022 *
023 *  % java RabinKarp bcara abacadabrabracabracadabrabrabracad
024 *  pattern: bcara
025 *  text:         abacadabrabracabracadabrabrabracad
026 *
027 *  %  java RabinKarp rabrabracad abacadabrabracabracadabrabrabracad
028 *  text:    abacadabrabracabracadabrabrabracad
029 *  pattern:                        rabrabracad
030 *
031 *  % java RabinKarp abacad abacadabrabracabracadabrabrabracad
032 *  text:    abacadabrabracabracadabrabrabracad
033 *  pattern: abacad
034 *
035 ***************************************************************/
036
037public class RabinKarp {
038        private String pat;      // the pattern  // needed only for Las Vegas
039        private long patHash;    // pattern hash value
040        private int M;           // pattern length
041        private long Q;          // a large prime, small enough to avoid long overflow
042        private int R;           // radix
043        private long RM;         // R^(M-1) % Q
044
045        public RabinKarp(int R, char[] pattern) {
046                throw new Error("Operation not supported yet");
047        }
048
049        public RabinKarp(String pat) {
050                this.pat = pat;      // save pattern (needed only for Las Vegas)
051                R = 256;
052                M = pat.length();
053                Q = longRandomPrime();
054
055                // precompute R^(M-1) % Q for use in removing leading digit
056                RM = 1;
057                for (int i = 1; i <= M-1; i++)
058                        RM = (R * RM) % Q;
059                patHash = hash(pat, M);
060        }
061
062        // Compute hash for key[0..M-1].
063        private long hash(String key, int M) {
064                long h = 0;
065                for (int j = 0; j < M; j++)
066                        h = (R * h + key.charAt(j)) % Q;
067                return h;
068        }
069
070        // Las Vegas version: does pat[] match txt[i..i-M+1] ?
071        private boolean check(String txt, int i) {
072                for (int j = 0; j < M; j++)
073                        if (pat.charAt(j) != txt.charAt(i + j))
074                                return false;
075                return true;
076        }
077
078        // Monte Carlo version: always return true
079        private boolean check(int i) {
080                return true;
081        }
082
083        // check for exact match
084        public int search(String txt) {
085                int N = txt.length();
086                if (N < M) return N;
087                long txtHash = hash(txt, M);
088
089                // check for match at offset 0
090                if ((patHash == txtHash) && check(txt, 0))
091                        return 0;
092
093                // check for hash match; if hash match, check for exact match
094                for (int i = M; i < N; i++) {
095                        // Remove leading digit, add trailing digit, check for match.
096                        txtHash = (txtHash + Q - RM*txt.charAt(i-M) % Q) % Q;
097                        txtHash = (txtHash*R + txt.charAt(i)) % Q;
098
099                        // match
100                        int offset = i - M + 1;
101                        if ((patHash == txtHash) && check(txt, offset))
102                                return offset;
103                }
104
105                // no match
106                return N;
107        }
108
109
110        // a random 31-bit prime
111        private static long longRandomPrime() {
112                BigInteger prime = new BigInteger(31, new Random());
113                return prime.longValue();
114        }
115
116        // test client
117        public static void main(String[] args) {
118                //args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" };
119                //args = new String[] { "rab",         "abacadabrabracabracadabrabrabracad" };
120                //args = new String[] { "bcara",       "abacadabrabracabracadabrabrabracad" };
121                //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" };
122                args = new String[] { "abacad",      "abacadabrabracabracadabrabrabracad" };
123                String pat = args[0];
124                String txt = args[1];
125
126                RabinKarp searcher = new RabinKarp(pat);
127                int offset = searcher.search(txt);
128
129                // print results
130                StdOut.println("text:    " + txt);
131
132                // from brute force search method 1
133                StdOut.print("pattern: ");
134                for (int i = 0; i < offset; i++)
135                        StdOut.print(" ");
136                StdOut.println(pat);
137        }
138}