001package algs51; // section 5.0
002import stdlib.*;
003public class Alphabet {
004        public static final Alphabet BINARY         = new Alphabet("01");
005        public static final Alphabet OCTAL          = new Alphabet("01234567");
006        public static final Alphabet DECIMAL        = new Alphabet("0123456789");
007        public static final Alphabet HEXADECIMAL    = new Alphabet("0123456789ABCDEF");
008        public static final Alphabet DNA            = new Alphabet("ACTG");
009        public static final Alphabet LOWERCASE      = new Alphabet("abcdefghijklmnopqrstuvwxyz");
010        public static final Alphabet UPPERCASE      = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
011        public static final Alphabet PROTEIN        = new Alphabet("ACDEFGHIKLMNPQRSTVWY");
012        public static final Alphabet BASE64         = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
013        public static final Alphabet ASCII          = new Alphabet(128);
014        public static final Alphabet EXTENDED_ASCII = new Alphabet(256);
015        public static final Alphabet UNICODE16      = new Alphabet(65536);
016
017        private final char[] alphabet;     // the characters in the alphabet
018        private final int[] inverse;       // indices
019        private final int R;               // the radix of the alphabet
020
021        // Create a new Alphabet from chars in string.
022        public Alphabet(String alpha) {
023
024                // check that alphabet contains no duplicate chars
025                boolean[] unicode = new boolean[Character.MAX_VALUE];
026                for (int i = 0; i < alpha.length(); i++) {
027                        char c = alpha.charAt(i);
028                        if (unicode[c]) throw new IllegalArgumentException("Illegal alphabet: character = '" + c + "'");
029                        else unicode[c] = true;
030                }
031
032
033                alphabet = alpha.toCharArray();
034                R = alpha.length();
035                inverse = new int[Character.MAX_VALUE];
036                for (int i = 0; i < inverse.length; i++)
037                        inverse[i] = -1;
038
039                // can't use char since R can be as big as 65,536
040                for (int c = 0; c < R; c++)
041                        inverse[alphabet[c]] = c;
042        }
043
044        // Create a new Alphabet of Unicode chars 0 to R-1
045        private Alphabet(int R) {
046                alphabet = new char[R];
047                inverse = new int[R];
048                this.R = R;
049
050                // can't use char since R can be as big as 65,536
051                for (int i = 0; i < R; i++)
052                        alphabet[i] = (char) i;
053                for (int i = 0; i < R; i++)
054                        inverse[i] = i;
055        }
056
057        // Create a new Alphabet of Unicode chars 0 to 255 (extended ASCII)
058        public Alphabet() {
059                this(256);
060        }
061
062        // is character c in the alphabet?
063        public boolean contains(char c) {
064                return inverse[c] != -1;
065        }
066
067        // return radix R
068        public int R() {
069                return R;
070        }
071
072        // return number of bits to represent an index
073        public int lgR() {
074                int lgR = 0;
075                for (int t = R-1; t >= 1; t /= 2)
076                        lgR++;
077                return lgR;
078        }
079
080        // convert c to index between 0 and R-1.
081        public int toIndex(char c) {
082                if (c < 0 || c >= inverse.length || inverse[c] == -1) {
083                        throw new IllegalArgumentException("Character " + c + " not in alphabet");
084                }
085                return inverse[c];
086        }
087
088        // convert String s over this alphabet into a base-R integer
089        public int[] toIndices(String s) {
090                char[] source = s.toCharArray();
091                int[] target  = new int[s.length()];
092                for (int i = 0; i < source.length; i++)
093                        target[i] = toIndex(source[i]);
094                return target;
095        }
096
097        // convert an index between 0 and R-1 into a char over this alphabet
098        public char toChar(int index) {
099                if (index < 0 || index >= R) {
100                        throw new IndexOutOfBoundsException("Alphabet out of bounds");
101                }
102                return alphabet[index];
103        }
104
105        // Convert base-R integer into a String over this alphabet
106        public String toChars(int[] indices) {
107                StringBuilder s = new StringBuilder(indices.length);
108                for (int indice : indices)
109                        s.append(toChar(indice));
110                return s.toString();
111        }
112
113
114        public static void main(String[] args) {
115                int[] encoded1  = BASE64.toIndices("NowIsTheTimeForAllGoodMen");
116                String decoded1 = BASE64.toChars(encoded1);
117                StdOut.println(decoded1);
118
119                int[] encoded2  = DNA.toIndices("AACGAACGGTTTACCCCG");
120                String decoded2 = DNA.toChars(encoded2);
121                StdOut.println(decoded2);
122
123                int[] encoded3 = DECIMAL.toIndices("01234567890123456789");
124                String decoded3 = DECIMAL.toChars(encoded3);
125                StdOut.println(decoded3);
126        }
127}