001package algs63; // section 6.3
002import stdlib.*;
003/* ***********************************************************************
004 *  Compilation:  javac Manber.java
005 *  Execution:    java Manber < text.txt
006 *  Dependencies: In.java
007 *
008 *  Reads a text corpus from stdin and suffix sorts it in subquadratic
009 *  time using a variant of XManber's algorithm.
010 *
011 *  NOTE: I THINK THIS IS CYCLIC SUFFIX SORTING
012 *
013 *************************************************************************/
014
015public class XManber {
016        private final int N;               // length of input string
017        private final String text;         // input text
018        private final int[] index;         // offset of ith string in order
019        private final int[] rank;          // rank of ith string
020        private final int[] newrank;       // rank of ith string (temporary)
021        private int offset;
022
023        public XManber(String s) {
024                N    = s.length();
025                text = s;
026                index   = new int[N+1];
027                rank    = new int[N+1];
028                newrank = new int[N+1];
029
030                // sentinels
031                index[N] = N;
032                rank[N] = -1;
033
034                msd();
035                doit();
036        }
037
038        // do one pass of msd sorting by rank at given offset
039        private void doit() {
040                for (offset = 1; offset < N; offset += offset) {
041                        StdOut.println("offset = " + offset);
042
043                        int count = 0;
044                        for (int i = 1; i <= N; i++) {
045                                if (rank[index[i]] == rank[index[i-1]]) count++;
046                                else if (count > 0) {
047                                        // sort
048                                        int left = i-1-count;
049                                        int right = i-1;
050                                        quicksort(left, right);
051
052                                        // now fix up ranks
053                                        int r = rank[index[left]];
054                                        for (int j = left + 1; j <= right; j++) {
055                                                if (less(index[j-1], index[j]))  {
056                                                        r = rank[index[left]] + j - left;
057                                                }
058                                                newrank[index[j]] = r;
059                                        }
060
061                                        // copy back - note can't update rank too eagerly
062                                        for (int j = left + 1; j <= right; j++) {
063                                                rank[index[j]] = newrank[index[j]];
064                                        }
065
066                                        count = 0;
067                                }
068                        }
069                }
070        }
071
072        // sort by leading char, assumes extended ASCII (256 values)
073        private void msd() {
074                // calculate frequencies
075                int[] freq = new int[256];
076                for (int i = 0; i < N; i++)
077                        freq[text.charAt(i)]++;
078
079                // calculate cumulative frequencies
080                int[] cumm = new int[256];
081                for (int i = 1; i < 256; i++)
082                        cumm[i] = cumm[i-1] + freq[i-1];
083
084                // compute ranks
085                for (int i = 0; i < N; i++)
086                        rank[i] = cumm[text.charAt(i)];
087
088                // sort by first char
089                for (int i = 0; i < N; i++)
090                        index[cumm[text.charAt(i)]++] = i;
091        }
092
093
094        // for debugging
095        public void show() {
096                String texttext = text + text;  // make cyclic
097                StdOut.println("j, rank[index[j]], index[j]");
098                for (int i = 0; i < N; i++) {
099                        String s = texttext.substring(index[i], index[i] +  Math.min(40, N));
100                        StdOut.println(s + " " + i + " " + rank[index[i]] + " " + index[i]);
101                }
102                StdOut.println();
103        }
104
105
106        // test client
107        public static void main(String[] args) {
108                String s = StdIn.readAll();
109                XManber m = new XManber(s);
110                m.show();
111        }
112
113
114        /* ********************************************************************
115         *  Helper functions for comparing suffixes.
116         **********************************************************************/
117
118        /* ********************************************************************
119         * Is the substring text[v..N] lexicographically less than the
120         * substring text[w..N] ?
121         **********************************************************************/
122        private boolean less(int v, int w) {
123                if (v + offset >= N) v -= N;
124                if (w + offset >= N) w -= N;
125                return rank[v + offset] < rank[w + offset];
126        }
127
128
129
130        /* ***********************************************************************
131         *  Quicksort code from Sedgewick 7.1, 7.2.
132         *************************************************************************/
133
134        // swap pointer sort indices
135        private void exch(int i, int j) {
136                int swap = index[i];
137                index[i] = index[j];
138                index[j] = swap;
139        }
140
141
142        // SUGGEST REPLACING WITH 3-WAY QUICKSORT SINCE ELEMENTS ARE
143        // RANKS AND THERE MAY BE DUPLICATES
144        void quicksort(int l, int r) {
145                if (r <= l) return;
146                int i = partition(l, r);
147                quicksort(l, i-1);
148                quicksort(i+1, r);
149        }
150
151        int partition(int l, int r) {
152                int i = l-1, j = r;
153                int v = index[r];
154
155                while (true) {
156
157                        // find item on left to swap
158                        while (less(index[++i], v))
159                                ;
160
161                        // find item on right to swap
162                        while (less(v, index[--j]))
163                                if (j == l) break;
164
165                        // check if pointers cross
166                        if (i >= j) break;
167
168                        exch(i, j);
169                }
170
171                // swap with partition element
172                exch(i, r);
173
174                return i;
175        }
176}