--- /dev/null
+.TH decoratedstr "May 27, 2009" "" "User Commands"
+.SH NAME
+decoratedstr \- decorated characters tools
+.SH SYNOPSIS
+.B decoratedstr [\-h|\-\-help] [\-\-charset] [\-r|\-\-regexp]
+.I string
+.SH DESCRIPTION
+.PP
+Remove decorations - such as accents and rings - from characters, expand
+ligatures.
+Optionnaly prints a matching regular expression.
+.SH EXAMPLE
+.BI "decoratedstr \(OEuf à la poëlle"
+.PP
+Oeuf a la poelle
+.PP
+.BI "decoratedstr \-r oeuf"
+.PP
+(\(oe|\(OE|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+.SH NOTES
+.PP
+No note available.
+.SH BUGS
+Python doesn't detect the LANG charset in the environement. You will need to use \-\-charset option
+if you don't use UTF\-8.
+.SH "AUTHOR"
+Jean-Michel Vourg\(`ere
+.mso www.tmac
+.URL "http://www.nirgal.com/"
if __name__ == '__main__':
import sys
- if len(sys.argv)<2:
+ from optparse import OptionParser
+ parser = OptionParser(usage='%prog [options] string')
+ parser.add_option('--charset', help="set charset. default=%default", action='store', dest='charset', default='utf-8')
+ parser.add_option('-r', '--regexp', help="generate regular expression.", action='store_true', dest='regexp')
+ (options, args) = parser.parse_args()
+
+ if not args:
print >> sys.stderr, u'Missing required parameter. Try "Œuf"'
sys.exit(1)
- input = unicode(' '.join(sys.argv[1:]), 'utf-8')
- print "input:", input # Œuf
- print "undecorated:", remove_decoration(input) # Oeuf
- print "regex:", decorated_match(input) # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+ input = unicode(' '.join(args), options.charset)
+ #print "input:", input # Œuf
+ #print "undecorated:", remove_decoration(input) # Oeuf
+ #print "regex:", decorated_match(input) # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF]
+ if options.regexp:
+ print decorated_match(input).encode(options.charset)
+ else:
+ print remove_decoration(input).encode(options.charset)