001package algs53; // section 5.3 002import stdlib.*; 003/* ************************************************************* 004 * Compilation: javac BoyerMoore.java 005 * Execution: java BoyerMoore pattern text 006 * 007 * Reads in two strings, the pattern and the input text, and 008 * searches for the pattern in the input text using the 009 * bad-character rule part of the Boyer-Moore algorithm. 010 * (does not implement the strong good suffix rule) 011 * 012 * % java BoyerMoore abracadabra abacadabrabracabracadabrabrabracad 013 * text: abacadabrabracabracadabrabrabracad 014 * pattern: abracadabra 015 * 016 * % java BoyerMoore rab abacadabrabracabracadabrabrabracad 017 * text: abacadabrabracabracadabrabrabracad 018 * pattern: rab 019 * 020 * % java BoyerMoore bcara abacadabrabracabracadabrabrabracad 021 * text: abacadabrabracabracadabrabrabracad 022 * pattern: bcara 023 * 024 * % java BoyerMoore rabrabracad abacadabrabracabracadabrabrabracad 025 * text: abacadabrabracabracadabrabrabracad 026 * pattern: rabrabracad 027 * 028 * % java BoyerMoore abacad abacadabrabracabracadabrabrabracad 029 * text: abacadabrabracabracadabrabrabracad 030 * pattern: abacad 031 * 032 ***************************************************************/ 033 034public class BoyerMoore { 035 private final int R; // the radix 036 private final int[] right; // the bad-character skip array 037 038 private char[] pattern; // store the pattern as a character array 039 private String pat; // or as a string 040 041 // pattern provided as a string 042 public BoyerMoore(String pat) { 043 this.R = 256; 044 this.pat = pat; 045 046 // position of rightmost occurrence of c in the pattern 047 right = new int[R]; 048 for (int c = 0; c < R; c++) 049 right[c] = -1; 050 for (int j = 0; j < pat.length(); j++) 051 right[pat.charAt(j)] = j; 052 } 053 054 // pattern provided as a character array 055 public BoyerMoore(char[] pattern, int R) { 056 this.R = R; 057 this.pattern = new char[pattern.length]; 058 for (int j = 0; j < pattern.length; j++) 059 this.pattern[j] = pattern[j]; 060 061 // position of rightmost occurrence of c in the pattern 062 right = new int[R]; 063 for (int c = 0; c < R; c++) 064 right[c] = -1; 065 for (int j = 0; j < pattern.length; j++) 066 right[pattern[j]] = j; 067 } 068 069 // return offset of first match; N if no match 070 public int search(String txt) { 071 int M = pat.length(); 072 int N = txt.length(); 073 int skip; 074 for (int i = 0; i <= N - M; i += skip) { 075 skip = 0; 076 for (int j = M-1; j >= 0; j--) { 077 if (pat.charAt(j) != txt.charAt(i+j)) { 078 skip = Math.max(1, j - right[txt.charAt(i+j)]); 079 break; 080 } 081 } 082 if (skip == 0) return i; // found 083 } 084 return N; // not found 085 } 086 087 088 // return offset of first match; N if no match 089 public int search(char[] text) { 090 int M = pattern.length; 091 int N = text.length; 092 int skip; 093 for (int i = 0; i <= N - M; i += skip) { 094 skip = 0; 095 for (int j = M-1; j >= 0; j--) { 096 if (pattern[j] != text[i+j]) { 097 skip = Math.max(1, j - right[text[i+j]]); 098 break; 099 } 100 } 101 if (skip == 0) return i; // found 102 } 103 return N; // not found 104 } 105 106 107 108 // test client 109 public static void main(String[] args) { 110 //args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" }; 111 //args = new String[] { "rab", "abacadabrabracabracadabrabrabracad" }; 112 //args = new String[] { "bcara", "abacadabrabracabracadabrabrabracad" }; 113 //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" }; 114 args = new String[] { "abacad", "abacadabrabracabracadabrabrabracad" }; 115 String pat1 = args[0]; 116 String txt1 = args[1]; 117 char[] pat2 = pat1.toCharArray(); 118 char[] txt2 = txt1.toCharArray(); 119 120 BoyerMoore bm1 = new BoyerMoore(pat1); 121 BoyerMoore bm2 = new BoyerMoore(pat2, 256); 122 int offset1 = bm1.search(txt1); 123 int offset2 = bm2.search(txt2); 124 125 // print results 126 StdOut.println("text: " + txt1); 127 128 StdOut.print("pattern: "); 129 for (int i = 0; i < offset1; i++) 130 StdOut.print(" "); 131 StdOut.println(pat1); 132 133 StdOut.print("pattern: "); 134 for (int i = 0; i < offset2; i++) 135 StdOut.print(" "); 136 StdOut.println(pat2); 137 } 138}