001package algs63; // section 6.3 002import stdlib.*; 003/* *********************************************************************** 004 * Compilation: javac Manber.java 005 * Execution: java Manber < text.txt 006 * Dependencies: In.java 007 * 008 * Reads a text corpus from stdin and suffix sorts it in subquadratic 009 * time using a variant of XManber's algorithm. 010 * 011 * NOTE: I THINK THIS IS CYCLIC SUFFIX SORTING 012 * 013 *************************************************************************/ 014 015public class XManber { 016 private final int N; // length of input string 017 private final String text; // input text 018 private final int[] index; // offset of ith string in order 019 private final int[] rank; // rank of ith string 020 private final int[] newrank; // rank of ith string (temporary) 021 private int offset; 022 023 public XManber(String s) { 024 N = s.length(); 025 text = s; 026 index = new int[N+1]; 027 rank = new int[N+1]; 028 newrank = new int[N+1]; 029 030 // sentinels 031 index[N] = N; 032 rank[N] = -1; 033 034 msd(); 035 doit(); 036 } 037 038 // do one pass of msd sorting by rank at given offset 039 private void doit() { 040 for (offset = 1; offset < N; offset += offset) { 041 StdOut.println("offset = " + offset); 042 043 int count = 0; 044 for (int i = 1; i <= N; i++) { 045 if (rank[index[i]] == rank[index[i-1]]) count++; 046 else if (count > 0) { 047 // sort 048 int left = i-1-count; 049 int right = i-1; 050 quicksort(left, right); 051 052 // now fix up ranks 053 int r = rank[index[left]]; 054 for (int j = left + 1; j <= right; j++) { 055 if (less(index[j-1], index[j])) { 056 r = rank[index[left]] + j - left; 057 } 058 newrank[index[j]] = r; 059 } 060 061 // copy back - note can't update rank too eagerly 062 for (int j = left + 1; j <= right; j++) { 063 rank[index[j]] = newrank[index[j]]; 064 } 065 066 count = 0; 067 } 068 } 069 } 070 } 071 072 // sort by leading char, assumes extended ASCII (256 values) 073 private void msd() { 074 // calculate frequencies 075 int[] freq = new int[256]; 076 for (int i = 0; i < N; i++) 077 freq[text.charAt(i)]++; 078 079 // calculate cumulative frequencies 080 int[] cumm = new int[256]; 081 for (int i = 1; i < 256; i++) 082 cumm[i] = cumm[i-1] + freq[i-1]; 083 084 // compute ranks 085 for (int i = 0; i < N; i++) 086 rank[i] = cumm[text.charAt(i)]; 087 088 // sort by first char 089 for (int i = 0; i < N; i++) 090 index[cumm[text.charAt(i)]++] = i; 091 } 092 093 094 // for debugging 095 public void show() { 096 String texttext = text + text; // make cyclic 097 StdOut.println("j, rank[index[j]], index[j]"); 098 for (int i = 0; i < N; i++) { 099 String s = texttext.substring(index[i], index[i] + Math.min(40, N)); 100 StdOut.println(s + " " + i + " " + rank[index[i]] + " " + index[i]); 101 } 102 StdOut.println(); 103 } 104 105 106 // test client 107 public static void main(String[] args) { 108 String s = StdIn.readAll(); 109 XManber m = new XManber(s); 110 m.show(); 111 } 112 113 114 /* ******************************************************************** 115 * Helper functions for comparing suffixes. 116 **********************************************************************/ 117 118 /* ******************************************************************** 119 * Is the substring text[v..N] lexicographically less than the 120 * substring text[w..N] ? 121 **********************************************************************/ 122 private boolean less(int v, int w) { 123 if (v + offset >= N) v -= N; 124 if (w + offset >= N) w -= N; 125 return rank[v + offset] < rank[w + offset]; 126 } 127 128 129 130 /* *********************************************************************** 131 * Quicksort code from Sedgewick 7.1, 7.2. 132 *************************************************************************/ 133 134 // swap pointer sort indices 135 private void exch(int i, int j) { 136 int swap = index[i]; 137 index[i] = index[j]; 138 index[j] = swap; 139 } 140 141 142 // SUGGEST REPLACING WITH 3-WAY QUICKSORT SINCE ELEMENTS ARE 143 // RANKS AND THERE MAY BE DUPLICATES 144 void quicksort(int l, int r) { 145 if (r <= l) return; 146 int i = partition(l, r); 147 quicksort(l, i-1); 148 quicksort(i+1, r); 149 } 150 151 int partition(int l, int r) { 152 int i = l-1, j = r; 153 int v = index[r]; 154 155 while (true) { 156 157 // find item on left to swap 158 while (less(index[++i], v)) 159 ; 160 161 // find item on right to swap 162 while (less(v, index[--j])) 163 if (j == l) break; 164 165 // check if pointers cross 166 if (i >= j) break; 167 168 exch(i, j); 169 } 170 171 // swap with partition element 172 exch(i, r); 173 174 return i; 175 } 176}