From 6ef7d1c732fe4046d0f56233c7e6682f82da139d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Michel=20Nirgal=20Vourg=C3=A8re?= Date: Wed, 27 May 2009 17:57:03 +0000 Subject: [PATCH] Added command line interface with charset option, regexp optionnal, help. Added a link in /usr/bin Added a man --- debian/links | 2 +- debian/rules | 2 +- decoratedstr.1 | 29 +++++++++++++++++++++++++++++ decoratedstr.py | 20 +++++++++++++++----- 4 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 decoratedstr.1 diff --git a/debian/links b/debian/links index 871fcc3..e43a9e9 100644 --- a/debian/links +++ b/debian/links @@ -1 +1 @@ -#/decoratedstr.py /usr/bin/decoratedstr +/usr/share/python-support/python-decoratedstr/decoratedstr.py /usr/bin/decoratedstr diff --git a/debian/rules b/debian/rules index b5566a3..0a216e9 100755 --- a/debian/rules +++ b/debian/rules @@ -56,7 +56,7 @@ binary-indep: build install # dh_installmime # dh_installinit # dh_installcron -# dh_installman man/* + dh_installman *.1 dh_installinfo dh_installchangelogs dh_link diff --git a/decoratedstr.1 b/decoratedstr.1 new file mode 100644 index 0000000..c46cd7f --- /dev/null +++ b/decoratedstr.1 @@ -0,0 +1,29 @@ +.TH decoratedstr "May 27, 2009" "" "User Commands" +.SH NAME +decoratedstr \- decorated characters tools +.SH SYNOPSIS +.B decoratedstr [\-h|\-\-help] [\-\-charset] [\-r|\-\-regexp] +.I string +.SH DESCRIPTION +.PP +Remove decorations - such as accents and rings - from characters, expand +ligatures. +Optionnaly prints a matching regular expression. +.SH EXAMPLE +.BI "decoratedstr \(OEuf à la poëlle" +.PP +Oeuf a la poelle +.PP +.BI "decoratedstr \-r oeuf" +.PP +(\(oe|\(OE|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF] +.SH NOTES +.PP +No note available. +.SH BUGS +Python doesn't detect the LANG charset in the environement. You will need to use \-\-charset option +if you don't use UTF\-8. +.SH "AUTHOR" +Jean-Michel Vourg\(`ere +.mso www.tmac +.URL "http://www.nirgal.com/" diff --git a/decoratedstr.py b/decoratedstr.py index 50aff67..1dc8737 100755 --- a/decoratedstr.py +++ b/decoratedstr.py @@ -112,10 +112,20 @@ def decorated_match(txt, casesensitive=False): if __name__ == '__main__': import sys - if len(sys.argv)<2: + from optparse import OptionParser + parser = OptionParser(usage='%prog [options] string') + parser.add_option('--charset', help="set charset. default=%default", action='store', dest='charset', default='utf-8') + parser.add_option('-r', '--regexp', help="generate regular expression.", action='store_true', dest='regexp') + (options, args) = parser.parse_args() + + if not args: print >> sys.stderr, u'Missing required parameter. Try "Œuf"' sys.exit(1) - input = unicode(' '.join(sys.argv[1:]), 'utf-8') - print "input:", input # Œuf - print "undecorated:", remove_decoration(input) # Oeuf - print "regex:", decorated_match(input) # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF] + input = unicode(' '.join(args), options.charset) + #print "input:", input # Œuf + #print "undecorated:", remove_decoration(input) # Oeuf + #print "regex:", decorated_match(input) # (œ|Œ|[oòóôöøōŏőOÒÓÔÖØŌŎŐ][eèéêëēĕėęěEÈÉÊËĒĔĖĘĚ])[uùúûüũūŭůűųUÙÚÛÜŨŪŬŮŰŲ][fF] + if options.regexp: + print decorated_match(input).encode(options.charset) + else: + print remove_decoration(input).encode(options.charset) -- 2.30.2