001package algs63; 002import stdlib.*; 003 004/* *********************************************************************** 005 * Compilation: javac SuffixArray.java 006 * Execution: java SuffixArray < input.txt 007 * 008 * A data type that computes the suffix array of a string. 009 * 010 * % java SuffixArray < abra.txt 011 * i ind lcp rnk select 012 * --------------------------- 013 * 0 11 - 0 ! 014 * 1 10 0 1 A! 015 * 2 7 1 2 ABRA! 016 * 3 0 4 3 ABRACADABRA! 017 * 4 3 1 4 ACADABRA! 018 * 5 5 1 5 ADABRA! 019 * 6 8 0 6 BRA! 020 * 7 1 3 7 BRACADABRA! 021 * 8 4 0 8 CADABRA! 022 * 9 6 0 9 DABRA! 023 * 10 9 0 10 RA! 024 * 11 2 2 11 RACADABRA! 025 * 026 * WARNING: This program assumes that the {@code substring()} method takes 027 * constant time and space. Beginning with Oracle / OpenJDK Java 7, Update 6, 028 * the substring method takes linear time and space in the size of the 029 * extracted substring. Do NOT use this code with such versions. 030 * 031 * See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a> 032 * for more details. 033 * 034 *************************************************************************/ 035 036import java.util.Arrays; 037 038public class SuffixArray { 039 private final String[] suffixes; 040 private final int N; 041 042 public SuffixArray(String s) { 043 N = s.length(); 044 suffixes = new String[N]; 045 for (int i = 0; i < N; i++) 046 suffixes[i] = s.substring(i); 047 Arrays.sort(suffixes); 048 } 049 050 // size of string 051 public int length() { return N; } 052 053 // index of ith sorted suffix 054 public int index(int i) { return N - suffixes[i].length(); } 055 056 // ith sorted suffix 057 public String select(int i) { return suffixes[i]; } 058 059 // number of suffixes strictly less than query 060 public int rank(String query) { 061 int lo = 0, hi = N - 1; 062 while (lo <= hi) { 063 int mid = lo + (hi - lo) / 2; 064 int cmp = query.compareTo(suffixes[mid]); 065 if (cmp < 0) hi = mid - 1; 066 else if (cmp > 0) lo = mid + 1; 067 else return mid; 068 } 069 return lo; 070 } 071 072 // length of longest common prefix of s and t 073 private static int lcp(String s, String t) { 074 int N = Math.min(s.length(), t.length()); 075 for (int i = 0; i < N; i++) 076 if (s.charAt(i) != t.charAt(i)) return i; 077 return N; 078 } 079 080 // longest common prefix of suffixes(i) and suffixes(i-1) 081 public int lcp(int i) { 082 return lcp(suffixes[i], suffixes[i-1]); 083 } 084 085 // longest common prefix of suffixes(i) and suffixes(j) 086 public int lcp(int i, int j) { 087 return lcp(suffixes[i], suffixes[j]); 088 } 089 090 public static void main(String[] args) { 091 String s = StdIn.readAll().replaceAll("\n", " ").trim(); 092 SuffixArray suffix = new SuffixArray(s); 093 094 095 StdOut.println(" i ind lcp rnk select"); 096 StdOut.println("---------------------------"); 097 098 for (int i = 0; i < s.length(); i++) { 099 int index = suffix.index(i); 100 String ith = "\"" + s.substring(index, Math.min(index + 50, s.length())) + "\""; 101 // String ith = suffix.select(i); 102 int rank = suffix.rank(ith); 103 if (i == 0) { 104 StdOut.format("%3d %3d %3s %3d %s\n", i, index, "-", rank, ith); 105 } 106 else { 107 int lcp = suffix.lcp(i, i-1); 108 StdOut.format("%3d %3d %3d %3d %s\n", i, index, lcp, rank, ith); 109 } 110 } 111 112 } 113 114}