Updated script for 2013 web changes
authorJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Fri, 16 Sep 2016 20:42:54 +0000 (20:42 +0000)
committerJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Fri, 16 Sep 2016 20:42:54 +0000 (20:42 +0000)
go.py

diff --git a/go.py b/go.py
index 8a406e3f102bada2015bfea2c4e3441237593d9c..83fb47b9c249b8495634c857ec25133d131aae47 100755 (executable)
--- a/go.py
+++ b/go.py
@@ -29,7 +29,7 @@ def get_login_password():
     return login, password
 
 __opener__ = None
-def httpopen(url, post_data=None):
+def httpopen(url, post_data=None, headers={}):
     if post_data:
         logging.debug('HTTP POST %s %s', url, post_data)
     else:
@@ -39,7 +39,11 @@ def httpopen(url, post_data=None):
         cookiejar = CookieJar()
         __opener__ = urllib.request.build_opener()
         __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
-    http_response = __opener__.open(url, post_data)
+        __opener__.addheaders = [('User-Agent', 'Mozilla/5.0')]
+    if isinstance(post_data, str):
+        post_data = post_data.encode('utf-8')
+    req = urllib.request.Request(url, post_data, headers)
+    http_response = __opener__.open(req)
     return http_response
 
 def sleep(seconds):
@@ -53,56 +57,79 @@ def download():
     '''
     result = []
 
+    logging.info('Downloading initial request')
+    httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/identif.ea?origin=particuliers')
+    #logging.debug(httpresponse.info())
+    html = httpresponse.read().decode('utf-8')
+
+
     logging.info('Downloading password form')
-    httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/recupererPointEntree-identif.ea')
-    html = httpresponse.read().decode('iso8859-1')
+    urllogin = BASE_URL + '/wsost/OstBrokerWeb/loginform?TAM_OP=login&ERROR_CODE=0x00000000&URL=/voscomptes/canalXHTML/identif.ea?origin=particuliers'
+    httpresponse = httpopen(urllogin)
+    html = httpresponse.read().decode('utf-8')
     #logging.debug(httpresponse.info())
-    open('login.html', 'w', encoding='iso8859-1').write(html)
+    open('login.html', 'w', encoding='utf-8').write(html)
 
-    root = html_parser.html_parse(html)
-    #html_parser.print_idented_tree(root)
-    for img in html_parser.get_elem(root, 'img'):
-        src = img.attributes.get('src', '')
-        if src.startswith(WSOST_PREFIX):
-            #print(img)
-            img_id = src[len(WSOST_PREFIX)]
-            #print(img_id)
-            httpresponse = httpopen(BASE_URL + src)
-            img = httpresponse.read()
-            open(img_id+'.gif', 'wb').write(img)
+    #html = open('login.html', encoding='utf-8').read()
+    
+    match = re.search('(loginform\?imgid=allunifie2&[^)"]*)', html, re.MULTILINE)
+    if match is None:
+        logging.critical('Login form image not found!')
+        return []
+
+    url = BASE_URL + '/wsost/OstBrokerWeb/' + match.group(1)
+    httpresponse = httpopen(url)
+    logging.debug(httpresponse.info())
+    img = httpresponse.read()
+    open('loginform.gif', 'wb').write(img)
+
+    #root = html_parser.html_parse(html)
+    ##html_parser.print_idented_tree(root)
+    #for img in html_parser.get_elem(root, 'img'):
+    #    src = img.attributes.get('src', '')
+    #    print(img)
+    #    if src.startswith(WSOST_PREFIX):
+    #        print(img)
+    #        img_id = src[len(WSOST_PREFIX)]
+    #        #print(img_id)
+    #        httpresponse = httpopen(BASE_URL + src)
+    #        img = httpresponse.read()
+    #        open(img_id+'.gif', 'wb').write(img)
 
     xlt_password = {}
-    for img_id in "0123456789":
-        proc_convert=Popen('convert %s.gif -crop 20x20+5+5 pnm:-' % img_id,
+    for choice in range(16):
+        column = choice % 4
+        row = choice // 4
+        proc_convert=Popen('convert loginform.gif -crop 60x60+%s+%s pnm:-' % (column*64, row*64),
             shell=True, stdout=PIPE)
         proc_gocr=Popen('gocr -C 0-9 -i -',
             shell=True, stdin=proc_convert.stdout, stdout=PIPE)
         output = proc_gocr.communicate()[0]
         output = output.decode('utf-8')
         output = output.strip()
-        #print("image #%s is %s" % (img_id, output))
-        xlt_password[output] = img_id
-    
+        #print("choice #%s is %s" % (choice, output))
+        xlt_password[output] = choice
+    #for i in 0 1 2 3 4 5 6 7 8 9; do convert $i.gif -crop 20x20+5+5 pnm:- | gocr -C 0-9 -i -; done
+
     LOGIN, PASSWORD = get_login_password()
 
     shuffled_password = ''
     for c in PASSWORD:
-        shuffled_password += xlt_password[c]
+        shuffled_password += '%02d' % xlt_password[c]
     logging.info("shuffled_password: %s", shuffled_password)
-    #for i in 0 1 2 3 4 5 6 7 8 9; do convert $i.gif -crop 20x20+5+5 pnm:- | gocr -C 0-9 -i -; done
 
     sleep(10) # We are not supermen
 
-    post_data='urlbackend=%2Fvoscomptes%2FcanalXHTML%2Fsecurite%2Fauthentification%2FrecupererPointEntree-identif.ea%3Forigin%3Dparticuliers&origin=particuliers&password=' + shuffled_password + '&cv=true&cvvs=&username=' + LOGIN
+    post_data='urlbackend=%2Fvoscomptes%2FcanalXHTML%2Fidentif.ea%3Forigin%3Dparticuliers&origin=particuliers&password=' + shuffled_password + '&cv=true&cvvs=&username=' + LOGIN
     httpresponse = httpopen(BASE_URL + '/wsost/OstBrokerWeb/auth', post_data)
     html = httpresponse.read().decode('iso8859-1')
-    #print(httpresponse.info())
+    print(httpresponse.info())
     open('welcome.html', 'w', encoding='iso8859-1').write(html)
 
     assert 'initialiser-identif.ea' in html
     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/initialiser-identif.ea')
     html = httpresponse.read().decode('iso8859-1')
-    #print(httpresponse.info())
+    print(httpresponse.info())
     open('welcome2.html', 'w', encoding='iso8859-1').write(html)
 
     assert 'verifierMotDePasse-identif.ea' in html
@@ -118,7 +145,8 @@ def download():
     open('welcome4.html', 'w', encoding='iso8859-1').write(html)
 
     assert 'init-synthese.ea' in html
-    httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/init-synthese.ea')
+    httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/afficheSyntheseComptes-synthese.ea')
+    #httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/init-synthese.ea')
     html = httpresponse.read().decode('iso8859-1')
     #print(httpresponse.info())
     open('welcome5.html', 'w', encoding='iso8859-1').write(html)