Removed some trace messages
[banquepostale.git] / go.py
1 #!/usr/bin/env python3
2
3 import os
4 import time
5 import re
6 import logging
7 from datetime import datetime
8 import urllib.request
9 from http.cookiejar import CookieJar
10 from subprocess import Popen, PIPE, call
11
12 import html_parser
13 import htmlentities
14
15 BASE_URL = 'https://voscomptesenligne.labanquepostale.fr'
16 WSOST_PREFIX = '/wsost/OstBrokerWeb/loginform?imgid='
17
18 LOCAL_DIR = '/home/nirgal/banquepostale/'
19
20 def get_login_password():
21     config=open(LOCAL_DIR + 'config').read()
22     login=None
23     password=None
24     for line in config.splitlines():
25         if line.startswith('login'):
26             login = line[len('login'):].strip()
27         elif line.startswith('password'):
28             password = line[len('password'):].strip()
29     return login, password
30
31 __opener__ = None
32 def httpopen(url, post_data=None, headers={}):
33     if post_data:
34         logging.debug('HTTP POST %s %s', url, post_data)
35     else:
36         logging.debug('HTTP GET %s', url)
37     global __opener__
38     if __opener__ is None:
39         cookiejar = CookieJar()
40         __opener__ = urllib.request.build_opener()
41         __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
42         __opener__.addheaders = [('User-Agent', 'Mozilla/5.0')]
43     if isinstance(post_data, str):
44         post_data = post_data.encode('utf-8')
45     req = urllib.request.Request(url, post_data, headers)
46     http_response = __opener__.open(req)
47     return http_response
48
49 def sleep(seconds):
50     logging.debug('Waiting %s seconds', seconds)
51     time.sleep(seconds)
52
53 def download():
54     '''
55     Download all the accounts csv data and store them in LOCAL_DIR
56     Return a list of filenames
57     '''
58     result = []
59
60     logging.info('Downloading initial request')
61     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/identif.ea?origin=particuliers')
62     #logging.debug(httpresponse.info())
63     html = httpresponse.read().decode('utf-8')
64
65
66     logging.info('Downloading password form')
67     urllogin = BASE_URL + '/wsost/OstBrokerWeb/loginform?TAM_OP=login&ERROR_CODE=0x00000000&URL=/voscomptes/canalXHTML/identif.ea?origin=particuliers'
68     httpresponse = httpopen(urllogin)
69     html = httpresponse.read().decode('utf-8')
70     #logging.debug(httpresponse.info())
71     open('login.html', 'w', encoding='utf-8').write(html)
72
73     #html = open('login.html', encoding='utf-8').read()
74     
75     match = re.search('(loginform\?imgid=allunifie2&[^)"]*)', html, re.MULTILINE)
76     if match is None:
77         logging.critical('Login form image not found!')
78         return []
79
80     url = BASE_URL + '/wsost/OstBrokerWeb/' + match.group(1)
81     httpresponse = httpopen(url)
82     #logging.debug(httpresponse.info())
83     img = httpresponse.read()
84     open('loginform.gif', 'wb').write(img)
85
86     #root = html_parser.html_parse(html)
87     ##html_parser.print_idented_tree(root)
88     #for img in html_parser.get_elem(root, 'img'):
89     #    src = img.attributes.get('src', '')
90     #    print(img)
91     #    if src.startswith(WSOST_PREFIX):
92     #        print(img)
93     #        img_id = src[len(WSOST_PREFIX)]
94     #        #print(img_id)
95     #        httpresponse = httpopen(BASE_URL + src)
96     #        img = httpresponse.read()
97     #        open(img_id+'.gif', 'wb').write(img)
98
99     xlt_password = {}
100     for choice in range(16):
101         column = choice % 4
102         row = choice // 4
103         proc_convert=Popen('convert loginform.gif -crop 60x60+%s+%s pnm:-' % (column*64, row*64),
104             shell=True, stdout=PIPE)
105         proc_gocr=Popen('gocr -C 0-9 -i -',
106             shell=True, stdin=proc_convert.stdout, stdout=PIPE)
107         output = proc_gocr.communicate()[0]
108         output = output.decode('utf-8')
109         output = output.strip()
110         #print("choice #%s is %s" % (choice, output))
111         xlt_password[output] = choice
112     #for i in 0 1 2 3 4 5 6 7 8 9; do convert $i.gif -crop 20x20+5+5 pnm:- | gocr -C 0-9 -i -; done
113
114     LOGIN, PASSWORD = get_login_password()
115
116     shuffled_password = ''
117     for c in PASSWORD:
118         shuffled_password += '%02d' % xlt_password[c]
119     logging.info("shuffled_password: %s", shuffled_password)
120
121     sleep(10) # We are not supermen
122
123     post_data='urlbackend=%2Fvoscomptes%2FcanalXHTML%2Fidentif.ea%3Forigin%3Dparticuliers&origin=particuliers&password=' + shuffled_password + '&cv=true&cvvs=&username=' + LOGIN
124     httpresponse = httpopen(BASE_URL + '/wsost/OstBrokerWeb/auth', post_data)
125     html = httpresponse.read().decode('iso8859-1')
126     #print(httpresponse.info())
127     open('welcome.html', 'w', encoding='iso8859-1').write(html)
128
129     assert 'initialiser-identif.ea' in html
130     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/initialiser-identif.ea')
131     html = httpresponse.read().decode('iso8859-1')
132     #print(httpresponse.info())
133     open('welcome2.html', 'w', encoding='iso8859-1').write(html)
134
135     assert 'verifierMotDePasse-identif.ea' in html
136     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/verifierMotDePasse-identif.ea')
137     html = httpresponse.read().decode('iso8859-1')
138     #print(httpresponse.info())
139     open('welcome3.html', 'w', encoding='iso8859-1').write(html)
140     
141     assert 'init-aiguillagePersonnalisation.ea' in html
142     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/donneesPersonnelles/aiguillage_personnalisation/init-aiguillagePersonnalisation.ea')
143     html = httpresponse.read().decode('iso8859-1')
144     #print(httpresponse.info())
145     open('welcome4.html', 'w', encoding='iso8859-1').write(html)
146
147     assert 'init-synthese.ea' in html
148     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/afficheSyntheseComptes-synthese.ea')
149     #httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/init-synthese.ea')
150     html = httpresponse.read().decode('iso8859-1')
151     #print(httpresponse.info())
152     open('welcome5.html', 'w', encoding='iso8859-1').write(html)
153     sleep(3)
154
155     root = html_parser.html_parse(html)
156     for a in html_parser.get_elem(root, 'a'):
157         href = a.attributes.get('href', '')
158         href = htmlentities.resolve(href)
159         match = re.match('\.\./\.\./(...)/.*compte.numero=(.*)&typeRecherche=(.*)', href)
160         if match:
161             logging.debug(href)
162             #../../CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
163             # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
164             cpttype, cptnum, searchtype = match.group(1), match.group(2), match.group(3)
165     
166             logging.info('Found account type %s: %s' % (cpttype, cptnum))
167             result.append(cptnum)
168
169             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/' + href[len('../../'):])
170             html = httpresponse.read().decode('iso8859-1')
171             open(cptnum+'-init.html', 'w', encoding='iso8859-1').write(html)
172             sleep(4)
173
174             # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=*********&typeRecherche=1&typeMouvements=CCP
175             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=' + cptnum + '&typeRecherche='+ searchtype +'&typeMouvements=' + cpttype)
176             html = httpresponse.read().decode('iso8859-1')
177             #print(httpresponse.info())
178             open(cptnum+'-init2.html', 'w', encoding='iso8859-1').write(html)
179             sleep(4)
180
181             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/detailCompte2-telechargementMouvements.ea')
182             html = httpresponse.read().decode('iso8859-1')
183             #print(httpresponse.info())
184             open(cptnum+'-confirm.html', 'w', encoding='iso8859-1').write(html)
185             sleep(9)
186
187             root = html_parser.html_parse(html)
188             #html_parser.print_idented_tree(root)
189             for form in html_parser.get_elem(root, 'form'):
190                 if form.attributes.get('id', None) == 'formConfirmAgain':
191                     url = form.attributes['action']
192                 if not url:
193                     logging.critical("Can't find link to download csv")
194                     continue
195
196             # /voscomptes/canalXHTML/comptesCommun/telechargementMouvement/preparerRecherche-telechargementMouvements.ea?ts=1304816124318 POST 'format=CSV&duree='
197             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/' + url, 'format=CSV&duree=')
198             filename= LOCAL_DIR + cptnum + '.' + datetime.now().strftime('%Y%m%dT%H%M%S') + '.csv'
199             csvdata = httpresponse.read().decode('iso8859-1')
200             logging.info('Save CSV data to %s', filename)
201             open(filename, 'w', encoding='utf-8').write(csvdata)
202             sleep(9)
203
204             lastfilename = LOCAL_DIR + cptnum + '.last.csv'
205             try:
206                 os.unlink(lastfilename)
207             except OSError as err:
208                 if err.errno == 2: #No such file or directory
209                     logging.warning('Could not find last csv link. Running for the first time?')
210                 else:
211                     raise
212             os.symlink(filename, lastfilename)
213
214     logging.info('Disconnecting')
215     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/deconnexion/init-deconnexion.ea')
216     html = httpresponse.read().decode('iso8859-1')
217     open('bye.html', 'w', encoding='iso8859-1').write(html)
218
219     logging.info('Disconnected')
220     return result
221
222
223 def agregate():
224     CSV_HEADER = 'Date;LibellĂ©;Montant(EUROS);Montant(FRANCS)'
225
226     account_files = os.listdir(LOCAL_DIR)
227     for account_file in account_files:
228         if not account_file.endswith('.last.csv'):
229             continue
230         
231         account = account_file[:-len('.last.csv')]
232         logging.debug('Agregating %s', account)
233
234         if os.access(LOCAL_DIR + account + '.csv', os.F_OK):
235             logging.debug('Not implemented')
236             pass
237         else:
238             logging.warning('Master csv file not found for %s: creating', account)
239             masterfile = open(LOCAL_DIR + account + '.csv', 'w', encoding='utf-8')
240
241             past_headers = False
242             for line in open(LOCAL_DIR + account_file).read().split('\n'):
243                 logging.debug('line: %s', line)
244                 if not line:
245                     continue
246                 if past_headers:
247                     masterfile.write(line + '\n')
248                 elif line == CSV_HEADER:
249                     past_headers = True
250
251
252 if __name__ == '__main__':
253     from optparse import OptionParser
254     parser = OptionParser()
255     parser.add_option('-d', '--debug',
256         action='store_true', dest='debug', default=False,
257         help="debug mode")
258     parser.add_option('--no-download',
259         action='store_true', dest='no_download', default=False,
260         help="don't download. Only agregate.")
261     (options, args) = parser.parse_args()
262
263     if options.debug:
264         loglevel = logging.DEBUG
265     else:
266         loglevel = logging.INFO
267     logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
268
269     os.umask(0o077) # this is really private
270
271     TMP_DIR = LOCAL_DIR + 'tmp/'
272     try:
273         os.mkdir(TMP_DIR)
274     except OSError as err:
275         if err.errno != 17: # File exists
276             raise
277     os.chdir(TMP_DIR)
278
279     if not options.no_download:
280         download()
281     agregate()