From a5a1230aee9bbad52a17cdba2d3d25e85eeb336b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Michel=20Nirgal=20Vourg=C3=A8re?= Date: Sun, 27 Sep 2009 17:53:17 +0000 Subject: [PATCH] Java version --- DecoratedStr.java | 150 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 DecoratedStr.java diff --git a/DecoratedStr.java b/DecoratedStr.java new file mode 100644 index 0000000..c32d6eb --- /dev/null +++ b/DecoratedStr.java @@ -0,0 +1,150 @@ +import java.util.Hashtable; +import java.util.Enumeration; +public class DecoratedStr { + protected static Hashtable char_to_alternatives_lower = new Hashtable(); + protected static Hashtable char_to_alternatives = new Hashtable(); + protected static Hashtable alternative_to_char = new Hashtable(); + protected static Hashtable ligatures_expansions_lower = new Hashtable(); + protected static Hashtable ligatures_expansions = new Hashtable(); + protected static Hashtable ligatures_contractions = new Hashtable(); + + public static void init() { + char_to_alternatives_lower.put('a', "àáâãäåāăą"); + char_to_alternatives_lower.put('c', "çćĉċč"); + char_to_alternatives_lower.put('d', "ďđ"); + char_to_alternatives_lower.put('e', "èéêëēĕėęě"); + char_to_alternatives_lower.put('g', "ĝğġģ"); + char_to_alternatives_lower.put('h', "ĥħ"); + char_to_alternatives_lower.put('i', "ìíîïĩīĭįı"); + char_to_alternatives_lower.put('j', "ĵ"); + char_to_alternatives_lower.put('k', "ķ"); + char_to_alternatives_lower.put('l', "ĺļľŀł"); + char_to_alternatives_lower.put('n', "ñńņňʼnŋ"); + char_to_alternatives_lower.put('o', "òóôöøōŏő"); + char_to_alternatives_lower.put('r', "ŕŗř"); + char_to_alternatives_lower.put('s', "śŝşš"); + char_to_alternatives_lower.put('t', "ţťŧ"); + char_to_alternatives_lower.put('u', "ùúûüũūŭůűų"); + char_to_alternatives_lower.put('w', "ŵ"); + char_to_alternatives_lower.put('y', "ýÿŷ"); + char_to_alternatives_lower.put('z', "źżž"); + + Enumeration e = char_to_alternatives_lower.keys(); + while (e.hasMoreElements()) { + Character k = e.nextElement(); + String v = char_to_alternatives_lower.get(k); + char_to_alternatives.put(k, v); + char_to_alternatives.put(Character.toUpperCase(k), v.toUpperCase()); + //System.out.println(Character.toUpperCase(k)); + //System.out.println(v.toUpperCase()); + } + char_to_alternatives.put('I', "İ"); + + e = char_to_alternatives.keys(); + while (e.hasMoreElements()) { + Character k = e.nextElement(); + String v = char_to_alternatives.get(k); + for (int i=v.length()-1; i>=0; --i) { + char a = v.charAt(i); + //System.out.println(k); + //System.out.println(a); + alternative_to_char.put(a, k); + } + } + + ligatures_expansions_lower.put('æ', "ae"); + //ligatures_expansions_lower.put('ij', "ij"); buggy: see http://en.wikipedia.org/wiki/Typographic_ligature + ligatures_expansions_lower.put('œ', "oe"); + e = ligatures_expansions_lower.keys(); + while (e.hasMoreElements()) { + Character k = e.nextElement(); + String v = ligatures_expansions_lower.get(k); + ligatures_expansions.put(k, v); + ligatures_contractions.put(v, k); + String uv = Character.toUpperCase(v.charAt(0)) + v.substring(1); + ligatures_expansions.put(Character.toUpperCase(k), uv); + ligatures_contractions.put(uv, Character.toUpperCase(k)); + } + } + + public static String remove_decoration(String txt) { + String result = ""; + int len = txt.length(); + char l; + Character al; + String le; + for (int i=0; i 1) + return "[" + result + "]"; + else + return result; + + } + + public static String decorated_match(String txt, boolean case_sensitive) { + String result = ""; + txt = remove_decoration(txt); + if (!case_sensitive) + txt = txt.toLowerCase(); + int len = txt.length(); + for (int i=0; i