Imported version 0.5
[decoratedstr.git] / DecoratedStr.java
1 import java.util.Hashtable;
2 import java.util.Enumeration;
3 public class DecoratedStr {
4     protected static Hashtable<Character,String> char_to_alternatives_lower = new Hashtable<Character,String>();
5     protected static Hashtable<Character,String> char_to_alternatives = new Hashtable<Character,String>();
6     protected static Hashtable<Character,Character> alternative_to_char = new Hashtable<Character,Character>();
7     protected static Hashtable<Character,String> ligatures_expansions_lower = new Hashtable<Character,String>();
8     protected static Hashtable<Character,String> ligatures_expansions = new Hashtable<Character,String>();
9     protected static Hashtable<String,Character> ligatures_contractions = new Hashtable<String,Character>();
10     private static boolean initialized = false;
11
12     public static void init() {
13         if (initialized)
14             return;
15         char_to_alternatives_lower.put('a', "àáâãäåāăą");
16         char_to_alternatives_lower.put('c', "çćĉċč");
17         char_to_alternatives_lower.put('d', "ďđ");
18         char_to_alternatives_lower.put('e', "èéêëēĕėęě");
19         char_to_alternatives_lower.put('g', "ĝğġģ");
20         char_to_alternatives_lower.put('h', "ĥħ");
21         char_to_alternatives_lower.put('i', "ìíîïĩīĭįı");
22         char_to_alternatives_lower.put('j', "ĵ");
23         char_to_alternatives_lower.put('k', "ķ");
24         char_to_alternatives_lower.put('l', "ĺļľŀł");
25         char_to_alternatives_lower.put('n', "ñńņňʼnŋ");
26         char_to_alternatives_lower.put('o', "òóôöøōŏő");
27         char_to_alternatives_lower.put('r', "ŕŗř");
28         char_to_alternatives_lower.put('s', "śŝşš");
29         char_to_alternatives_lower.put('t', "ţťŧ");
30         char_to_alternatives_lower.put('u', "ùúûüũūŭůűų");
31         char_to_alternatives_lower.put('w', "ŵ");
32         char_to_alternatives_lower.put('y', "ýÿŷ");
33         char_to_alternatives_lower.put('z', "źżž");
34
35         Enumeration<Character> e = char_to_alternatives_lower.keys();
36         while (e.hasMoreElements()) {
37             Character k = e.nextElement();
38             String v = char_to_alternatives_lower.get(k);
39             char_to_alternatives.put(k, v);
40             char_to_alternatives.put(Character.toUpperCase(k), v.toUpperCase());
41             //System.out.println(Character.toUpperCase(k));
42             //System.out.println(v.toUpperCase());
43         }
44         char_to_alternatives.put('I', "İ");
45
46         e = char_to_alternatives.keys();
47         while (e.hasMoreElements()) {
48             Character k = e.nextElement();
49             String v = char_to_alternatives.get(k);
50             for (int i=v.length()-1; i>=0; --i) {
51                 char a = v.charAt(i);
52                 //System.out.println(k);
53                 //System.out.println(a);
54                 alternative_to_char.put(a, k);
55             }
56         }
57         
58         ligatures_expansions_lower.put('æ', "ae");
59         //ligatures_expansions_lower.put('ij', "ij"); buggy: see http://en.wikipedia.org/wiki/Typographic_ligature
60         ligatures_expansions_lower.put('œ', "oe");
61
62         e = ligatures_expansions_lower.keys();
63         while (e.hasMoreElements()) {
64             Character k = e.nextElement();
65             String v = ligatures_expansions_lower.get(k);
66             ligatures_expansions.put(k, v);
67             ligatures_contractions.put(v, k);
68             String uv = Character.toUpperCase(v.charAt(0)) + v.substring(1);
69             ligatures_expansions.put(Character.toUpperCase(k), uv);
70             ligatures_contractions.put(uv, Character.toUpperCase(k));
71         }
72
73         initialized = true;
74     }
75
76     public static String remove_decoration(String txt) {
77         init();
78         String result = "";
79         int len = txt.length();
80         char l;
81         Character al;
82         String le;
83         for (int i=0; i<len; ++i) {
84             l = txt.charAt(i);
85             al = alternative_to_char.get(l);
86             if (al != null)
87                 l = al;
88             le = ligatures_expansions.get(l);
89             if (le != null)
90                 result += le;
91             else
92                 result += l;
93         }
94         return result;
95     }
96
97     public static String decorated_match_single_char(char c, boolean case_sensitive) {
98         init();
99         if (!case_sensitive)
100             c = Character.toLowerCase(c);
101         String result = "" + c;
102         String sa = char_to_alternatives.get(c);
103         if (sa != null)
104             result += sa;
105         if (!case_sensitive) {
106             String ur = result.toUpperCase();
107             if (result.compareTo(ur) != 0)
108                 result += ur;
109         }
110         if (result.length() > 1)
111             return "[" + result + "]";
112         else
113             return result;
114
115     }
116
117     public static String decorated_match(String txt, boolean case_sensitive) {
118         init();
119         String result = "";
120         txt = remove_decoration(txt);
121         if (!case_sensitive)
122             txt = txt.toLowerCase();
123         int len = txt.length();
124         for (int i=0; i<len; ++i) {
125             char c1 = txt.charAt(i);
126             String c12 = "" + c1;
127             if (i < len-1)
128                 c12 += txt.charAt(i+1);
129             Character lc = ligatures_contractions.get(c12);
130             if (lc != null) {
131                 result += "(" + lc;
132                 if (!case_sensitive)
133                     result += "|" + Character.toUpperCase(lc);
134                 result += '|'
135                        + decorated_match_single_char(c12.charAt(0), case_sensitive)
136                        + decorated_match_single_char(c12.charAt(1), case_sensitive)
137                        + ')';
138                 i += 1;
139
140             }
141             else
142                 result += decorated_match_single_char(c1, case_sensitive);
143         }
144         return result;
145     }
146
147         public static void main(String argv[]) {
148         String in = "Œuf";
149                 System.out.println(in);
150                 System.out.println(remove_decoration(in));
151                 //System.out.println(decorated_match_single_char('m', true));
152                 //System.out.println(decorated_match_single_char('m', false));
153                 //System.out.println(decorated_match_single_char('h', true));
154                 //System.out.println(decorated_match_single_char('h', false));
155                 System.out.println(decorated_match(in, true));
156                 System.out.println(decorated_match(in, false));
157         }
158 }