Added command line interface with charset option, regexp optionnal, help.
[decoratedstr.git] / decoratedstr.py
index 50aff673004e6642a9ad68eec898c80cb9463411..1dc8737028190caf7a99f3a9ac66320ddc7e78fc 100755 (executable)
@@ -112,10 +112,20 @@ def decorated_match(txt, casesensitive=False):
 
 if __name__ == '__main__':
     import sys
-    if len(sys.argv)<2:
+    from optparse import OptionParser
+    parser = OptionParser(usage='%prog [options] string')
+    parser.add_option('--charset', help="set charset. default=%default", action='store', dest='charset', default='utf-8')
+    parser.add_option('-r', '--regexp', help="generate regular expression.", action='store_true', dest='regexp')
+    (options, args) = parser.parse_args()
+
+    if not args:
         print >> sys.stderr, u'Missing required parameter. Try "Œuf"'
         sys.exit(1)
-    input = unicode(' '.join(sys.argv[1:]), 'utf-8')
-    print "input:", input                            # Œuf
-    print "undecorated:", remove_decoration(input)   # Oeuf
-    print "regex:", decorated_match(input)           # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+    input = unicode(' '.join(args), options.charset)
+    #print "input:", input                            # Œuf
+    #print "undecorated:", remove_decoration(input)   # Oeuf
+    #print "regex:", decorated_match(input)           # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+    if options.regexp:
+        print decorated_match(input).encode(options.charset)
+    else:
+        print remove_decoration(input).encode(options.charset)