Added command line interface with charset option, regexp optionnal, help.
[decoratedstr.git] / decoratedstr.py
index 866497ccab144e4888611716f65e4247753b37a3..1dc8737028190caf7a99f3a9ac66320ddc7e78fc 100755 (executable)
@@ -26,17 +26,25 @@ char_to_alternatives_lower={
     u'z': u'źżž',
 }
 
     u'z': u'źżž',
 }
 
+# This chars lower() function doesn't work
+char_to_alternatives_upper={
+    u'I': u'İ',
+}
 char_to_alternatives={} # idem, but with upper case too
 for char,alternatives in char_to_alternatives_lower.iteritems():
     char_to_alternatives[char] = alternatives
     char_to_alternatives[char.upper()] = alternatives.upper()
 char_to_alternatives={} # idem, but with upper case too
 for char,alternatives in char_to_alternatives_lower.iteritems():
     char_to_alternatives[char] = alternatives
     char_to_alternatives[char.upper()] = alternatives.upper()
-
+for char,alternatives in char_to_alternatives_upper.iteritems():
+    char_to_alternatives[char] = alternatives
 
 alternative_to_char = {} # reverse
 for char,alternatives in char_to_alternatives_lower.iteritems():
     for alternative in alternatives:
         alternative_to_char[alternative] = char
         alternative_to_char[alternative.upper()] = char.upper()
 
 alternative_to_char = {} # reverse
 for char,alternatives in char_to_alternatives_lower.iteritems():
     for alternative in alternatives:
         alternative_to_char[alternative] = char
         alternative_to_char[alternative.upper()] = char.upper()
+for char,alternatives in char_to_alternatives_upper.iteritems():
+    for alternative in alternatives:
+        alternative_to_char[alternative] = char
 
 # ligatures (only two chars supported)
 ligatures_expansions_lower = {
 
 # ligatures (only two chars supported)
 ligatures_expansions_lower = {
@@ -104,10 +112,20 @@ def decorated_match(txt, casesensitive=False):
 
 if __name__ == '__main__':
     import sys
 
 if __name__ == '__main__':
     import sys
-    if len(sys.argv)<2:
+    from optparse import OptionParser
+    parser = OptionParser(usage='%prog [options] string')
+    parser.add_option('--charset', help="set charset. default=%default", action='store', dest='charset', default='utf-8')
+    parser.add_option('-r', '--regexp', help="generate regular expression.", action='store_true', dest='regexp')
+    (options, args) = parser.parse_args()
+
+    if not args:
         print >> sys.stderr, u'Missing required parameter. Try "Œuf"'
         sys.exit(1)
         print >> sys.stderr, u'Missing required parameter. Try "Œuf"'
         sys.exit(1)
-    input = unicode(' '.join(sys.argv[1:]), 'utf-8')
-    print "input:", input                            # Œuf
-    print "undecorated:", remove_decoration(input)   # Oeuf
-    print "regex:", decorated_match(input)           # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+    input = unicode(' '.join(args), options.charset)
+    #print "input:", input                            # Œuf
+    #print "undecorated:", remove_decoration(input)   # Oeuf
+    #print "regex:", decorated_match(input)           # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+    if options.regexp:
+        print decorated_match(input).encode(options.charset)
+    else:
+        print remove_decoration(input).encode(options.charset)