001package algs51; // section 5.1
002import stdlib.*;
003/* *********************************************************************************
004 *  Compilation: javac MSD.java
005 *  Execution:   java MSD < input.txt
006 *
007 *  Reads extended ASCII string from standard input and MSD radix sorts them.
008 *
009 *  % java MSD < shells.txt
010 *  are
011 *  by
012 *  sea
013 *  seashells
014 *  seashells
015 *  sells
016 *  sells
017 *  she
018 *  she
019 *  shells
020 *  shore
021 *  surely
022 *  the
023 *  the
024 *
025 * MAX_LENGTH = 64
026 * RESULTS WITH Arrays.sort (Mergesort)
027 *
028 *     $ ./runjava algs51.MSD
029 *       256000 [0.243 0.880]
030 *       512000 [0.398 1.638]
031 *      1024000 [0.851 2.138]
032 *     $ ./runjava algs51.MSD
033 *       256000 [0.281 1.011]
034 *       512000 [0.365 1.299]
035 *      1024000 [0.848 2.323]
036 *     $ ./runjava algs51.MSD
037 *       256000 [0.270 1.031]
038 *       512000 [0.370 1.370]
039 *      1024000 [0.857 2.316]
040 *
041 * RESULTS with MSD
042 *
043 *     $ ./runjava algs51.MSD
044 *       256000 [0.299 3.785]
045 *       512000 [0.151 0.505]
046 *      1024000 [0.305 2.020]
047 *     $ ./runjava algs51.MSD
048 *       256000 [0.296 3.795]
049 *       512000 [0.139 0.470]
050 *      1024000 [0.303 2.180]
051 *     $ ./runjava algs51.MSD
052 *       256000 [0.294 3.769]
053 *       512000 [0.141 0.480]
054 *      1024000 [0.282 2.000]
055 *
056 * RESULTS WITH Arrays.sort (Mergesort)
057 *
058 *     $ ./runjava algs51.MSD
059 *     dickens: [4.205]
060 *     $ ./runjava algs51.MSD
061 *     dickens: [4.257]
062 *     $ ./runjava algs51.MSD
063 *     dickens: [4.222]
064 *
065 * RESULTS with MSD
066 *
067 *     $ ./runjava algs51.MSD
068 *     dickens: [7.331]
069 *     $ ./runjava algs51.MSD
070 *     dickens: [7.411]
071 *     $ ./runjava algs51.MSD
072 *     dickens: [7.523]
073 *
074 ***********************************************************************************/
075
076public class MSD {
077        private static final int R      = 256;   // extended ASCII alphabet size
078        private static final int CUTOFF =  15;   // cutoff to insertion sort
079
080        // sort array of strings
081        public static void sort(String[] a) {
082                int N = a.length;
083                String[] aux = new String[N];
084                sort(a, 0, N-1, 0, aux);
085        }
086
087        // return dth character of s, -1 if d = length of string
088        private static int charAt(String s, int d) {
089                assert d >= 0 && d <= s.length();
090                if (d == s.length()) return -1;
091                return s.charAt(d);
092        }
093
094        // sort from a[lo] to a[hi], starting at the dth character
095        private static void sort(String[] a, int lo, int hi, int d, String[] aux) {
096
097                // cutoff to insertion sort for small subarrays
098                if (hi <= lo + CUTOFF) {
099                        insertion(a, lo, hi, d);
100                        return;
101                }
102
103                // compute frequency counts
104                int[] count = new int[R+2];
105                for (int i = lo; i <= hi; i++) {
106                        int c = charAt(a[i], d);
107                        count[c+2]++;
108                }
109
110                // transform counts to indicies
111                for (int r = 0; r < R+1; r++)
112                        count[r+1] += count[r];
113
114                // distribute
115                for (int i = lo; i <= hi; i++) {
116                        int c = charAt(a[i], d);
117                        aux[count[c+1]++] = a[i];
118                }
119
120                // copy back
121                for (int i = lo; i <= hi; i++)
122                        a[i] = aux[i - lo];
123
124
125                // recursively sort for each character
126                for (int r = 0; r < R; r++) {
127                        //sort(a, lo + count[r], lo + count[r+1] - 1, d+1, aux);
128
129                        int newLo = lo + count[r];
130                        int newHi = lo + count[r+1] - 1;
131                        if (newHi > newLo)
132                                sort(a, newLo, newHi, d+1, aux);
133                }
134        }
135
136
137        // return dth character of s, -1 if d = length of string
138        private static void insertion(String[] a, int lo, int hi, int d) {
139                for (int i = lo; i <= hi; i++)
140                        for (int j = i; j > lo && less(a[j], a[j-1], d); j--)
141                                exch(a, j, j-1);
142        }
143
144        // exchange a[i] and a[j]
145        private static void exch(String[] a, int i, int j) {
146                String temp = a[i];
147                a[i] = a[j];
148                a[j] = temp;
149        }
150
151        // is v less than w, starting at character d
152        private static boolean less1(String v, String w, int d) {
153                assert v.substring(0, d).equals(w.substring(0, d));
154                return v.substring(d).compareTo(w.substring(d)) < 0;
155        }
156        // is v less than w, starting at character d
157        private static boolean less(String v, String w, int d) {
158                assert v.substring(0, d).equals(w.substring(0, d));
159                int len1 = v.length ();
160                int len2 = w.length ();
161                int n = Math.min(len1, len2);
162                int k = 0;
163                while (k < n) {
164                        char c1 = v.charAt (k);
165                        char c2 = w.charAt (k);
166                        if (c1 != c2) {
167                                return c1 < c2;
168                        }
169                        k++;
170                }
171                return len1 < len2;
172        }
173
174
175        /* *********************************************************************************
176         *  Test code
177         ***********************************************************************************/
178        private static final int MAX_LENGTH = 64;
179        private static final String CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`~!@#$%^&*()_-+={[}]\\|:;'\"<>,./?";
180        private static String randomString() {
181                int length = 2 + StdRandom.uniform (MAX_LENGTH);
182                char[] text = new char[length];
183                int NUM_CHARACTERS = CHARACTERS.length ();
184                for (int i = 0; i < length; i++) {
185                        text[i] = CHARACTERS.charAt(StdRandom.uniform (NUM_CHARACTERS));
186                }
187                return new String(text);
188        }
189        private static void exch(int[] a, int i, int j) {
190                int swap = a[i];
191                a[i] = a[j];
192                a[j] = swap;
193        }
194        private static double time;
195        private static void countops (int N) {
196                String[] a = new String[N];
197                //for (int i = 0; i < a.length; i++) a[i] = (N*i)/N;
198                //StdRandom.shuffle (a);
199                for (int i = 0; i < a.length; i++) a[i] = randomString ();
200                //Arrays.sort (a);
201                //Arrays.sort (a); for (int i = 0; i < (N-1)/2; i++) exch(a, i, N-i-1);
202
203                Stopwatch sw = new Stopwatch ();
204                sort (a);
205                //Quick3way.sort (a);
206                //Arrays.sort (a);
207                time = sw.elapsedTime ();
208        }
209        public static void main(String[] args) {
210                int N = 128000;
211                countops (N);
212                //System.exit (0);
213                double prevTime = time;
214                for (int i=0; i<3; i++) {
215                        N *= 2;
216                        countops (N);
217                        StdOut.format("%8d [%5.3f %5.3f]\n", N, time, time/prevTime);
218                        prevTime = time;
219                }
220        }
221        public static void main3(String[] args) {
222                String[] a = new In("data/dickens.txt").readAllStrings ();
223                Stopwatch sw = new Stopwatch ();
224                sort (a);
225                //Arrays.sort (a);
226                StdOut.format("dickens: [%5.3f]\n", sw.elapsedTime ());
227        }
228        public static void main2(String[] args) {
229                String[] a = StdIn.readAllStrings();
230                int N = a.length;
231                sort(a);
232                for (int i = 0; i < N; i++)
233                        StdOut.println(a[i]);
234        }
235}