First version of banque postale scripts
[banquepostale.git] / go.py
1 #!/usr/bin/env python3
2
3 import os
4 import time
5 import re
6 import logging
7 from datetime import datetime
8 import urllib.request
9 from http.cookiejar import CookieJar
10 from subprocess import Popen, PIPE
11
12 import html_parser
13 import htmlentities
14
15 BASE_URL = 'https://voscomptesenligne.labanquepostale.fr'
16 WSOST_PREFIX = '/wsost/OstBrokerWeb/loginform?imgid='
17
18 LOCAL_DIR = '/home/nirgal/banquepostale/'
19
20 def get_login_password():
21     config=open(LOCAL_DIR + 'config').read()
22     login=None
23     password=None
24     for line in config.splitlines():
25         if line.startswith('login'):
26             login = line[len('login'):].strip()
27         elif line.startswith('password'):
28             password = line[len('password'):].strip()
29     return login, password
30
31 __opener__ = None
32 def httpopen(url, post_data=None):
33     if post_data:
34         logging.debug('HTTP POST %s %s', url, post_data)
35     else:
36         logging.debug('HTTP GET %s', url)
37     global __opener__
38     if __opener__ is None:
39         cookiejar = CookieJar()
40         __opener__ = urllib.request.build_opener()
41         __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
42     http_response = __opener__.open(url, post_data)
43     return http_response
44
45 def sleep(seconds):
46     logging.debug('Waiting %s seconds', seconds)
47     time.sleep(seconds)
48
49 def main():
50     '''
51     Download all the accounts csv data and store them in LOCAL_DIR
52     Return a list of filenames
53     '''
54     result = []
55
56     logging.info('Downloading password form')
57     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/recupererPointEntree-identif.ea')
58     html = httpresponse.read().decode('iso8859-1')
59     #logging.debug(httpresponse.info())
60     open('login.html', 'w', encoding='iso8859-1').write(html)
61
62     root = html_parser.html_parse(html)
63     #html_parser.print_idented_tree(root)
64     for img in html_parser.get_elem(root, 'img'):
65         src = img.attributes.get('src', '')
66         if src.startswith(WSOST_PREFIX):
67             #print(img)
68             img_id = src[len(WSOST_PREFIX)]
69             #print(img_id)
70             httpresponse = httpopen(BASE_URL + src)
71             img = httpresponse.read()
72             open(img_id+'.gif', 'wb').write(img)
73
74     xlt_password = {}
75     for img_id in "0123456789":
76         proc_convert=Popen('convert %s.gif -crop 20x20+5+5 pnm:-' % img_id,
77             shell=True, stdout=PIPE)
78         proc_gocr=Popen('gocr -C 0-9 -i -',
79             shell=True, stdin=proc_convert.stdout, stdout=PIPE)
80         output = proc_gocr.communicate()[0]
81         output = output.decode('utf-8')
82         output = output.strip()
83         #print("image #%s is %s" % (img_id, output))
84         xlt_password[output] = img_id
85     
86     LOGIN, PASSWORD = get_login_password()
87
88     shuffled_password = ''
89     for c in PASSWORD:
90         shuffled_password += xlt_password[c]
91     logging.info("shuffled_password: %s", shuffled_password)
92     #for i in 0 1 2 3 4 5 6 7 8 9; do convert $i.gif -crop 20x20+5+5 pnm:- | gocr -C 0-9 -i -; done
93
94     sleep(10) # We are not supermen
95
96     post_data='urlbackend=%2Fvoscomptes%2FcanalXHTML%2Fsecurite%2Fauthentification%2FrecupererPointEntree-identif.ea%3Forigin%3Dparticuliers&origin=particuliers&password=' + shuffled_password + '&cv=true&cvvs=&username=' + LOGIN
97     httpresponse = httpopen(BASE_URL + '/wsost/OstBrokerWeb/auth', post_data)
98     html = httpresponse.read().decode('iso8859-1')
99     #print(httpresponse.info())
100     open('welcome.html', 'w', encoding='iso8859-1').write(html)
101
102     assert 'initialiser-identif.ea' in html
103     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/initialiser-identif.ea')
104     html = httpresponse.read().decode('iso8859-1')
105     #print(httpresponse.info())
106     open('welcome2.html', 'w', encoding='iso8859-1').write(html)
107
108     assert 'verifierMotDePasse-identif.ea' in html
109     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/verifierMotDePasse-identif.ea')
110     html = httpresponse.read().decode('iso8859-1')
111     #print(httpresponse.info())
112     open('welcome3.html', 'w', encoding='iso8859-1').write(html)
113     
114     assert 'init-synthese.ea' in html
115     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/init-synthese.ea')
116     html = httpresponse.read().decode('iso8859-1')
117     #print(httpresponse.info())
118     open('welcome4.html', 'w', encoding='iso8859-1').write(html)
119     sleep(3)
120
121     root = html_parser.html_parse(html)
122     for a in html_parser.get_elem(root, 'a'):
123         href = a.attributes.get('href', '')
124         href = htmlentities.resolve(href)
125         match = re.match('\.\./\.\./(...)/.*compte.numero=(.*)&typeRecherche=(.*)', href)
126         if match:
127             logging.debug(href)
128             #../../CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
129             # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
130             cpttype, cptnum, searchtype = match.group(1), match.group(2), match.group(3)
131     
132             logging.info('Found account type %s: %s' % (cpttype, cptnum))
133
134             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/' + href[len('../../'):])
135             html = httpresponse.read().decode('iso8859-1')
136             open(cptnum+'-init.html', 'w', encoding='iso8859-1').write(html)
137             sleep(4)
138
139             # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=*********&typeRecherche=1&typeMouvements=CCP
140             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=' + cptnum + '&typeRecherche='+ searchtype +'&typeMouvements=' + cpttype)
141             html = httpresponse.read().decode('iso8859-1')
142             #print(httpresponse.info())
143             open(cptnum+'-init2.html', 'w', encoding='iso8859-1').write(html)
144             sleep(4)
145
146             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/detailCompte2-telechargementMouvements.ea')
147             html = httpresponse.read().decode('iso8859-1')
148             #print(httpresponse.info())
149             open(cptnum+'-confirm.html', 'w', encoding='iso8859-1').write(html)
150             sleep(9)
151
152             root = html_parser.html_parse(html)
153             #html_parser.print_idented_tree(root)
154             for form in html_parser.get_elem(root, 'form'):
155                 if form.attributes.get('id', None) == 'formConfirmAgain':
156                     url = form.attributes['action']
157                 if not url:
158                     logging.critical("Can't find link to download csv")
159                     continue
160
161             # /voscomptes/canalXHTML/comptesCommun/telechargementMouvement/preparerRecherche-telechargementMouvements.ea?ts=1304816124318 POST 'format=CSV&duree='
162             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/' + url, 'format=CSV&duree=')
163             filename= LOCAL_DIR + cptnum + '.' + datetime.now().strftime('%Y%m%dT%H%M%S') + '.csv'
164             csvdata = httpresponse.read().decode('iso8859-1')
165             logging.info('Save CSV data to %s', filename)
166             open(filename, 'w', encoding='utf-8').write(csvdata)
167             sleep(9)
168
169     logging.info('Disconnecting')
170     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/deconnexion/init-deconnexion.ea')
171     html = httpresponse.read().decode('iso8859-1')
172     open('bye.html', 'w', encoding='iso8859-1').write(html)
173
174     logging.info('Disconnected')
175     return result
176
177 if __name__ == '__main__':
178     from optparse import OptionParser
179     parser = OptionParser()
180     parser.add_option('-d', '--debug',
181         action='store_true', dest='debug', default=False,
182         help="debug mode")
183     (options, args) = parser.parse_args()
184
185     if options.debug:
186         loglevel = logging.DEBUG
187     else:
188         loglevel = logging.INFO
189     logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
190
191     os.umask(0o077)
192     TMP_DIR = LOCAL_DIR + 'tmp/'
193     try:
194         os.mkdir(TMP_DIR)
195     except OSError as err:
196         if err.errno != 17: # File exists
197             raise
198     os.chdir(TMP_DIR)
199
200     main()