1 import java.util.Hashtable;
2 import java.util.Enumeration;
3 public class DecoratedStr {
4 protected static Hashtable<Character,String> char_to_alternatives_lower = new Hashtable<Character,String>();
5 protected static Hashtable<Character,String> char_to_alternatives = new Hashtable<Character,String>();
6 protected static Hashtable<Character,Character> alternative_to_char = new Hashtable<Character,Character>();
7 protected static Hashtable<Character,String> ligatures_expansions_lower = new Hashtable<Character,String>();
8 protected static Hashtable<Character,String> ligatures_expansions = new Hashtable<Character,String>();
9 protected static Hashtable<String,Character> ligatures_contractions = new Hashtable<String,Character>();
10 private static boolean initialized = false;
12 public static void init() {
15 char_to_alternatives_lower.put('a', "àáâãäåāăą");
16 char_to_alternatives_lower.put('c', "çćĉċč");
17 char_to_alternatives_lower.put('d', "ďđ");
18 char_to_alternatives_lower.put('e', "èéêëēĕėęě");
19 char_to_alternatives_lower.put('g', "ĝğġģ");
20 char_to_alternatives_lower.put('h', "ĥħ");
21 char_to_alternatives_lower.put('i', "ìíîïĩīĭįı");
22 char_to_alternatives_lower.put('j', "ĵ");
23 char_to_alternatives_lower.put('k', "ķ");
24 char_to_alternatives_lower.put('l', "ĺļľŀł");
25 char_to_alternatives_lower.put('n', "ñńņňʼnŋ");
26 char_to_alternatives_lower.put('o', "òóôöøōŏő");
27 char_to_alternatives_lower.put('r', "ŕŗř");
28 char_to_alternatives_lower.put('s', "śŝşš");
29 char_to_alternatives_lower.put('t', "ţťŧ");
30 char_to_alternatives_lower.put('u', "ùúûüũūŭůűų");
31 char_to_alternatives_lower.put('w', "ŵ");
32 char_to_alternatives_lower.put('y', "ýÿŷ");
33 char_to_alternatives_lower.put('z', "źżž");
35 Enumeration<Character> e = char_to_alternatives_lower.keys();
36 while (e.hasMoreElements()) {
37 Character k = e.nextElement();
38 String v = char_to_alternatives_lower.get(k);
39 char_to_alternatives.put(k, v);
40 char_to_alternatives.put(Character.toUpperCase(k), v.toUpperCase());
41 //System.out.println(Character.toUpperCase(k));
42 //System.out.println(v.toUpperCase());
44 char_to_alternatives.put('I', "İ");
46 e = char_to_alternatives.keys();
47 while (e.hasMoreElements()) {
48 Character k = e.nextElement();
49 String v = char_to_alternatives.get(k);
50 for (int i=v.length()-1; i>=0; --i) {
52 //System.out.println(k);
53 //System.out.println(a);
54 alternative_to_char.put(a, k);
58 ligatures_expansions_lower.put('æ', "ae");
59 //ligatures_expansions_lower.put('ij', "ij"); buggy: see http://en.wikipedia.org/wiki/Typographic_ligature
60 ligatures_expansions_lower.put('œ', "oe");
62 e = ligatures_expansions_lower.keys();
63 while (e.hasMoreElements()) {
64 Character k = e.nextElement();
65 String v = ligatures_expansions_lower.get(k);
66 ligatures_expansions.put(k, v);
67 ligatures_contractions.put(v, k);
68 String uv = Character.toUpperCase(v.charAt(0)) + v.substring(1);
69 ligatures_expansions.put(Character.toUpperCase(k), uv);
70 ligatures_contractions.put(uv, Character.toUpperCase(k));
76 public static String remove_decoration(String txt) {
79 int len = txt.length();
83 for (int i=0; i<len; ++i) {
85 al = alternative_to_char.get(l);
88 le = ligatures_expansions.get(l);
97 public static String decorated_match_single_char(char c, boolean case_sensitive) {
100 c = Character.toLowerCase(c);
101 String result = "" + c;
102 String sa = char_to_alternatives.get(c);
105 if (!case_sensitive) {
106 String ur = result.toUpperCase();
107 if (result.compareTo(ur) != 0)
110 if (result.length() > 1)
111 return "[" + result + "]";
117 public static String decorated_match(String txt, boolean case_sensitive) {
120 txt = remove_decoration(txt);
122 txt = txt.toLowerCase();
123 int len = txt.length();
124 for (int i=0; i<len; ++i) {
125 char c1 = txt.charAt(i);
126 String c12 = "" + c1;
128 c12 += txt.charAt(i+1);
129 Character lc = ligatures_contractions.get(c12);
133 result += "|" + Character.toUpperCase(lc);
135 + decorated_match_single_char(c12.charAt(0), case_sensitive)
136 + decorated_match_single_char(c12.charAt(1), case_sensitive)
142 result += decorated_match_single_char(c1, case_sensitive);
147 public static void main(String argv[]) {
149 System.out.println(in);
150 System.out.println(remove_decoration(in));
151 //System.out.println(decorated_match_single_char('m', true));
152 //System.out.println(decorated_match_single_char('m', false));
153 //System.out.println(decorated_match_single_char('h', true));
154 //System.out.println(decorated_match_single_char('h', false));
155 System.out.println(decorated_match(in, true));
156 System.out.println(decorated_match(in, false));