001package algs63;
002import  stdlib.*;
003
004/* ***********************************************************************
005 *  Compilation:  javac SuffixArray.java
006 *  Execution:    java SuffixArray < input.txt
007 *
008 *  A data type that computes the suffix array of a string.
009 *
010 *  % java SuffixArray < abra.txt
011 *    i ind lcp rnk  select
012 *  ---------------------------
013 *    0  11   -   0  !
014 *    1  10   0   1  A!
015 *    2   7   1   2  ABRA!
016 *    3   0   4   3  ABRACADABRA!
017 *    4   3   1   4  ACADABRA!
018 *    5   5   1   5  ADABRA!
019 *    6   8   0   6  BRA!
020 *    7   1   3   7  BRACADABRA!
021 *    8   4   0   8  CADABRA!
022 *    9   6   0   9  DABRA!
023 *   10   9   0  10  RA!
024 *   11   2   2  11  RACADABRA!
025 *
026 *  WARNING: This program assumes that the {@code substring()} method takes
027 *  constant time and space. Beginning with Oracle / OpenJDK Java 7, Update 6,
028 *  the substring method takes linear time and space in the size of the
029 *  extracted substring. Do NOT use this code with such versions.
030 *
031 *  See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a>
032 *  for more details.
033 *
034 *************************************************************************/
035
036import java.util.Arrays;
037
038public class SuffixArray {
039        private final String[] suffixes;
040        private final int N;
041
042        public SuffixArray(String s) {
043                N = s.length();
044                suffixes = new String[N];
045                for (int i = 0; i < N; i++)
046                        suffixes[i] = s.substring(i);
047                Arrays.sort(suffixes);
048        }
049
050        // size of string
051        public int length() { return N; }
052
053        // index of ith sorted suffix
054        public int index(int i) { return N - suffixes[i].length(); }
055
056        // ith sorted suffix
057        public String select(int i) { return suffixes[i]; }
058
059        // number of suffixes strictly less than query
060        public int rank(String query) {
061                int lo = 0, hi = N - 1;
062                while (lo <= hi) {
063                        int mid = lo + (hi - lo) / 2;
064                        int cmp = query.compareTo(suffixes[mid]);
065                        if      (cmp < 0) hi = mid - 1;
066                        else if (cmp > 0) lo = mid + 1;
067                        else return mid;
068                }
069                return lo;
070        }
071
072        // length of longest common prefix of s and t
073        private static int lcp(String s, String t) {
074                int N = Math.min(s.length(), t.length());
075                for (int i = 0; i < N; i++)
076                        if (s.charAt(i) != t.charAt(i)) return i;
077                return N;
078        }
079
080        // longest common prefix of suffixes(i) and suffixes(i-1)
081        public int lcp(int i) {
082                return lcp(suffixes[i], suffixes[i-1]);
083        }
084
085        // longest common prefix of suffixes(i) and suffixes(j)
086        public int lcp(int i, int j) {
087                return lcp(suffixes[i], suffixes[j]);
088        }
089
090        public static void main(String[] args) {
091                String s = StdIn.readAll().replaceAll("\n", " ").trim();
092                SuffixArray suffix = new SuffixArray(s);
093
094
095                StdOut.println("  i ind lcp rnk  select");
096                StdOut.println("---------------------------");
097
098                for (int i = 0; i < s.length(); i++) {
099                        int index = suffix.index(i);
100                        String ith = "\"" + s.substring(index, Math.min(index + 50, s.length())) + "\"";
101                        // String ith = suffix.select(i);
102                        int rank = suffix.rank(ith);
103                        if (i == 0) {
104                                StdOut.format("%3d %3d %3s %3d  %s\n", i, index, "-", rank, ith);
105                        }
106                        else {
107                                int lcp = suffix.lcp(i, i-1);
108                                StdOut.format("%3d %3d %3d %3d  %s\n", i, index, lcp, rank, ith);
109                        }
110                }
111
112        }
113
114}