001package algs53; // section 5.3
002import stdlib.*;
003/* *************************************************************
004 *  Compilation:  javac BoyerMoore.java
005 *  Execution:    java BoyerMoore pattern text
006 *
007 *  Reads in two strings, the pattern and the input text, and
008 *  searches for the pattern in the input text using the
009 *  bad-character rule part of the Boyer-Moore algorithm.
010 *  (does not implement the strong good suffix rule)
011 *
012 *  % java BoyerMoore abracadabra abacadabrabracabracadabrabrabracad
013 *  text:    abacadabrabracabracadabrabrabracad
014 *  pattern:               abracadabra
015 *
016 *  % java BoyerMoore rab abacadabrabracabracadabrabrabracad
017 *  text:    abacadabrabracabracadabrabrabracad
018 *  pattern:         rab
019 *
020 *  % java BoyerMoore bcara abacadabrabracabracadabrabrabracad
021 *  text:    abacadabrabracabracadabrabrabracad
022 *  pattern:                                   bcara
023 *
024 *  % java BoyerMoore rabrabracad abacadabrabracabracadabrabrabracad
025 *  text:    abacadabrabracabracadabrabrabracad
026 *  pattern:                        rabrabracad
027 *
028 *  % java BoyerMoore abacad abacadabrabracabracadabrabrabracad
029 *  text:    abacadabrabracabracadabrabrabracad
030 *  pattern: abacad
031 *
032 ***************************************************************/
033
034public class BoyerMoore {
035        private final int R;     // the radix
036        private final int[] right;     // the bad-character skip array
037
038        private char[] pattern;  // store the pattern as a character array
039        private String pat;      // or as a string
040
041        // pattern provided as a string
042        public BoyerMoore(String pat) {
043                this.R = 256;
044                this.pat = pat;
045
046                // position of rightmost occurrence of c in the pattern
047                right = new int[R];
048                for (int c = 0; c < R; c++)
049                        right[c] = -1;
050                for (int j = 0; j < pat.length(); j++)
051                        right[pat.charAt(j)] = j;
052        }
053
054        // pattern provided as a character array
055        public BoyerMoore(char[] pattern, int R) {
056                this.R = R;
057                this.pattern = new char[pattern.length];
058                for (int j = 0; j < pattern.length; j++)
059                        this.pattern[j] = pattern[j];
060
061                // position of rightmost occurrence of c in the pattern
062                right = new int[R];
063                for (int c = 0; c < R; c++)
064                        right[c] = -1;
065                for (int j = 0; j < pattern.length; j++)
066                        right[pattern[j]] = j;
067        }
068
069        // return offset of first match; N if no match
070        public int search(String txt) {
071                int M = pat.length();
072                int N = txt.length();
073                int skip;
074                for (int i = 0; i <= N - M; i += skip) {
075                        skip = 0;
076                        for (int j = M-1; j >= 0; j--) {
077                                if (pat.charAt(j) != txt.charAt(i+j)) {
078                                        skip = Math.max(1, j - right[txt.charAt(i+j)]);
079                                        break;
080                                }
081                        }
082                        if (skip == 0) return i;    // found
083                }
084                return N;                       // not found
085        }
086
087
088        // return offset of first match; N if no match
089        public int search(char[] text) {
090                int M = pattern.length;
091                int N = text.length;
092                int skip;
093                for (int i = 0; i <= N - M; i += skip) {
094                        skip = 0;
095                        for (int j = M-1; j >= 0; j--) {
096                                if (pattern[j] != text[i+j]) {
097                                        skip = Math.max(1, j - right[text[i+j]]);
098                                        break;
099                                }
100                        }
101                        if (skip == 0) return i;    // found
102                }
103                return N;                       // not found
104        }
105
106
107
108        // test client
109        public static void main(String[] args) {
110                //args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" };
111                //args = new String[] { "rab",         "abacadabrabracabracadabrabrabracad" };
112                //args = new String[] { "bcara",       "abacadabrabracabracadabrabrabracad" };
113                //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" };
114                args = new String[] { "abacad",      "abacadabrabracabracadabrabrabracad" };
115                String pat1 = args[0];
116                String txt1 = args[1];
117                char[] pat2 = pat1.toCharArray();
118                char[] txt2 = txt1.toCharArray();
119
120                BoyerMoore bm1 = new BoyerMoore(pat1);
121                BoyerMoore bm2 = new BoyerMoore(pat2, 256);
122                int offset1 = bm1.search(txt1);
123                int offset2 = bm2.search(txt2);
124
125                // print results
126                StdOut.println("text:    " + txt1);
127
128                StdOut.print("pattern: ");
129                for (int i = 0; i < offset1; i++)
130                        StdOut.print(" ");
131                StdOut.println(pat1);
132
133                StdOut.print("pattern: ");
134                for (int i = 0; i < offset2; i++)
135                        StdOut.print(" ");
136                StdOut.println(pat2);
137        }
138}