Java version
authorJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Sun, 27 Sep 2009 17:53:17 +0000 (17:53 +0000)
committerJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Sun, 27 Sep 2009 17:53:17 +0000 (17:53 +0000)
DecoratedStr.java [new file with mode: 0644]

diff --git a/DecoratedStr.java b/DecoratedStr.java
new file mode 100644 (file)
index 0000000..c32d6eb
--- /dev/null
@@ -0,0 +1,150 @@
+import java.util.Hashtable;
+import java.util.Enumeration;
+public class DecoratedStr {
+    protected static Hashtable<Character,String> char_to_alternatives_lower = new Hashtable<Character,String>();
+    protected static Hashtable<Character,String> char_to_alternatives = new Hashtable<Character,String>();
+    protected static Hashtable<Character,Character> alternative_to_char = new Hashtable<Character,Character>();
+    protected static Hashtable<Character,String> ligatures_expansions_lower = new Hashtable<Character,String>();
+    protected static Hashtable<Character,String> ligatures_expansions = new Hashtable<Character,String>();
+    protected static Hashtable<String,Character> ligatures_contractions = new Hashtable<String,Character>();
+
+    public static void init() {
+        char_to_alternatives_lower.put('a', "àáâãäåāăą");
+        char_to_alternatives_lower.put('c', "çćĉċč");
+        char_to_alternatives_lower.put('d', "ďđ");
+        char_to_alternatives_lower.put('e', "èéêëēĕėęě");
+        char_to_alternatives_lower.put('g', "ĝğġģ");
+        char_to_alternatives_lower.put('h', "ĥħ");
+        char_to_alternatives_lower.put('i', "ìíîïĩīĭįı");
+        char_to_alternatives_lower.put('j', "ĵ");
+        char_to_alternatives_lower.put('k', "ķ");
+        char_to_alternatives_lower.put('l', "ĺļľŀł");
+        char_to_alternatives_lower.put('n', "ñńņňʼnŋ");
+        char_to_alternatives_lower.put('o', "òóôöøōŏő");
+        char_to_alternatives_lower.put('r', "ŕŗř");
+        char_to_alternatives_lower.put('s', "śŝşš");
+        char_to_alternatives_lower.put('t', "ţťŧ");
+        char_to_alternatives_lower.put('u', "ùúûüũūŭůűų");
+        char_to_alternatives_lower.put('w', "ŵ");
+        char_to_alternatives_lower.put('y', "ýÿŷ");
+        char_to_alternatives_lower.put('z', "źżž");
+
+        Enumeration<Character> e = char_to_alternatives_lower.keys();
+        while (e.hasMoreElements()) {
+            Character k = e.nextElement();
+            String v = char_to_alternatives_lower.get(k);
+            char_to_alternatives.put(k, v);
+            char_to_alternatives.put(Character.toUpperCase(k), v.toUpperCase());
+            //System.out.println(Character.toUpperCase(k));
+            //System.out.println(v.toUpperCase());
+        }
+        char_to_alternatives.put('I', "İ");
+
+        e = char_to_alternatives.keys();
+        while (e.hasMoreElements()) {
+            Character k = e.nextElement();
+            String v = char_to_alternatives.get(k);
+            for (int i=v.length()-1; i>=0; --i) {
+                char a = v.charAt(i);
+                //System.out.println(k);
+                //System.out.println(a);
+                alternative_to_char.put(a, k);
+            }
+        }
+        
+        ligatures_expansions_lower.put('æ', "ae");
+        //ligatures_expansions_lower.put('ij', "ij"); buggy: see http://en.wikipedia.org/wiki/Typographic_ligature
+        ligatures_expansions_lower.put('œ', "oe");
+        e = ligatures_expansions_lower.keys();
+        while (e.hasMoreElements()) {
+            Character k = e.nextElement();
+            String v = ligatures_expansions_lower.get(k);
+            ligatures_expansions.put(k, v);
+            ligatures_contractions.put(v, k);
+            String uv = Character.toUpperCase(v.charAt(0)) + v.substring(1);
+            ligatures_expansions.put(Character.toUpperCase(k), uv);
+            ligatures_contractions.put(uv, Character.toUpperCase(k));
+        }
+    }
+
+    public static String remove_decoration(String txt) {
+        String result = "";
+        int len = txt.length();
+        char l;
+        Character al;
+        String le;
+        for (int i=0; i<len; ++i) {
+            l = txt.charAt(i);
+            al = alternative_to_char.get(l);
+            if (al != null)
+                l = al;
+            le = ligatures_expansions.get(l);
+            if (le != null)
+                result += le;
+            else
+                result += l;
+        }
+        return result;
+    }
+
+    public static String decorated_match_single_char(char c, boolean case_sensitive) {
+        if (!case_sensitive)
+            c = Character.toLowerCase(c);
+        String result = "" + c;
+        String sa = char_to_alternatives.get(c);
+        if (sa != null)
+            result += sa;
+        if (!case_sensitive) {
+            String ur = result.toUpperCase();
+            if (result.compareTo(ur) != 0)
+                result += ur;
+        }
+        if (result.length() > 1)
+            return "[" + result + "]";
+        else
+            return result;
+
+    }
+
+    public static String decorated_match(String txt, boolean case_sensitive) {
+        String result = "";
+        txt = remove_decoration(txt);
+        if (!case_sensitive)
+            txt = txt.toLowerCase();
+        int len = txt.length();
+        for (int i=0; i<len; ++i) {
+            char c1 = txt.charAt(i);
+            String c12 = "" + c1;
+            if (i < len-1)
+                c12 += txt.charAt(i+1);
+            Character lc = ligatures_contractions.get(c12);
+            if (lc != null) {
+                result += "(" + lc;
+                if (!case_sensitive)
+                    result += "|" + Character.toUpperCase(lc);
+                result += '|'
+                       + decorated_match_single_char(c12.charAt(0), case_sensitive)
+                       + decorated_match_single_char(c12.charAt(1), case_sensitive)
+                       + ')';
+                i += 1;
+
+            }
+            else
+                result += decorated_match_single_char(c1, case_sensitive);
+        }
+        return result;
+    }
+
+       public static void main(String argv[]) {
+        init();
+        String in = "Œuf";
+               System.out.println(in);
+               System.out.println(remove_decoration(in));
+               System.out.println(decorated_match_single_char('m', true));
+               System.out.println(decorated_match_single_char('m', false));
+               System.out.println(decorated_match_single_char('h', true));
+               System.out.println(decorated_match_single_char('h', false));
+               System.out.println(decorated_match(in, true));
+               System.out.println(decorated_match(in, false));
+       }
+}