c32d6eb43d35fdfc7de38beb66aa58bab95ccff5
[decoratedstr.git] / DecoratedStr.java
1 import java.util.Hashtable;
2 import java.util.Enumeration;
3 public class DecoratedStr {
4     protected static Hashtable<Character,String> char_to_alternatives_lower = new Hashtable<Character,String>();
5     protected static Hashtable<Character,String> char_to_alternatives = new Hashtable<Character,String>();
6     protected static Hashtable<Character,Character> alternative_to_char = new Hashtable<Character,Character>();
7     protected static Hashtable<Character,String> ligatures_expansions_lower = new Hashtable<Character,String>();
8     protected static Hashtable<Character,String> ligatures_expansions = new Hashtable<Character,String>();
9     protected static Hashtable<String,Character> ligatures_contractions = new Hashtable<String,Character>();
10
11     public static void init() {
12         char_to_alternatives_lower.put('a', "àáâãäåāăą");
13         char_to_alternatives_lower.put('c', "çćĉċč");
14         char_to_alternatives_lower.put('d', "ďđ");
15         char_to_alternatives_lower.put('e', "èéêëēĕėęě");
16         char_to_alternatives_lower.put('g', "ĝğġģ");
17         char_to_alternatives_lower.put('h', "ĥħ");
18         char_to_alternatives_lower.put('i', "ìíîïĩīĭįı");
19         char_to_alternatives_lower.put('j', "ĵ");
20         char_to_alternatives_lower.put('k', "ķ");
21         char_to_alternatives_lower.put('l', "ĺļľŀł");
22         char_to_alternatives_lower.put('n', "ñńņňʼnŋ");
23         char_to_alternatives_lower.put('o', "òóôöøōŏő");
24         char_to_alternatives_lower.put('r', "ŕŗř");
25         char_to_alternatives_lower.put('s', "śŝşš");
26         char_to_alternatives_lower.put('t', "ţťŧ");
27         char_to_alternatives_lower.put('u', "ùúûüũūŭůűų");
28         char_to_alternatives_lower.put('w', "ŵ");
29         char_to_alternatives_lower.put('y', "ýÿŷ");
30         char_to_alternatives_lower.put('z', "źżž");
31
32         Enumeration<Character> e = char_to_alternatives_lower.keys();
33         while (e.hasMoreElements()) {
34             Character k = e.nextElement();
35             String v = char_to_alternatives_lower.get(k);
36             char_to_alternatives.put(k, v);
37             char_to_alternatives.put(Character.toUpperCase(k), v.toUpperCase());
38             //System.out.println(Character.toUpperCase(k));
39             //System.out.println(v.toUpperCase());
40         }
41         char_to_alternatives.put('I', "İ");
42
43         e = char_to_alternatives.keys();
44         while (e.hasMoreElements()) {
45             Character k = e.nextElement();
46             String v = char_to_alternatives.get(k);
47             for (int i=v.length()-1; i>=0; --i) {
48                 char a = v.charAt(i);
49                 //System.out.println(k);
50                 //System.out.println(a);
51                 alternative_to_char.put(a, k);
52             }
53         }
54         
55         ligatures_expansions_lower.put('æ', "ae");
56         //ligatures_expansions_lower.put('ij', "ij"); buggy: see http://en.wikipedia.org/wiki/Typographic_ligature
57         ligatures_expansions_lower.put('œ', "oe");
58         e = ligatures_expansions_lower.keys();
59         while (e.hasMoreElements()) {
60             Character k = e.nextElement();
61             String v = ligatures_expansions_lower.get(k);
62             ligatures_expansions.put(k, v);
63             ligatures_contractions.put(v, k);
64             String uv = Character.toUpperCase(v.charAt(0)) + v.substring(1);
65             ligatures_expansions.put(Character.toUpperCase(k), uv);
66             ligatures_contractions.put(uv, Character.toUpperCase(k));
67         }
68     }
69
70     public static String remove_decoration(String txt) {
71         String result = "";
72         int len = txt.length();
73         char l;
74         Character al;
75         String le;
76         for (int i=0; i<len; ++i) {
77             l = txt.charAt(i);
78             al = alternative_to_char.get(l);
79             if (al != null)
80                 l = al;
81             le = ligatures_expansions.get(l);
82             if (le != null)
83                 result += le;
84             else
85                 result += l;
86         }
87         return result;
88     }
89
90     public static String decorated_match_single_char(char c, boolean case_sensitive) {
91         if (!case_sensitive)
92             c = Character.toLowerCase(c);
93         String result = "" + c;
94         String sa = char_to_alternatives.get(c);
95         if (sa != null)
96             result += sa;
97         if (!case_sensitive) {
98             String ur = result.toUpperCase();
99             if (result.compareTo(ur) != 0)
100                 result += ur;
101         }
102         if (result.length() > 1)
103             return "[" + result + "]";
104         else
105             return result;
106
107     }
108
109     public static String decorated_match(String txt, boolean case_sensitive) {
110         String result = "";
111         txt = remove_decoration(txt);
112         if (!case_sensitive)
113             txt = txt.toLowerCase();
114         int len = txt.length();
115         for (int i=0; i<len; ++i) {
116             char c1 = txt.charAt(i);
117             String c12 = "" + c1;
118             if (i < len-1)
119                 c12 += txt.charAt(i+1);
120             Character lc = ligatures_contractions.get(c12);
121             if (lc != null) {
122                 result += "(" + lc;
123                 if (!case_sensitive)
124                     result += "|" + Character.toUpperCase(lc);
125                 result += '|'
126                        + decorated_match_single_char(c12.charAt(0), case_sensitive)
127                        + decorated_match_single_char(c12.charAt(1), case_sensitive)
128                        + ')';
129                 i += 1;
130
131             }
132             else
133                 result += decorated_match_single_char(c1, case_sensitive);
134         }
135         return result;
136     }
137
138         public static void main(String argv[]) {
139         init();
140         String in = "Œuf";
141                 System.out.println(in);
142                 System.out.println(remove_decoration(in));
143                 System.out.println(decorated_match_single_char('m', true));
144                 System.out.println(decorated_match_single_char('m', false));
145                 System.out.println(decorated_match_single_char('h', true));
146                 System.out.println(decorated_match_single_char('h', false));
147                 System.out.println(decorated_match(in, true));
148                 System.out.println(decorated_match(in, false));
149         }
150 }