7 from datetime import datetime
9 from http.cookiejar import CookieJar
10 from subprocess import Popen, PIPE, call
15 BASE_URL = 'https://voscomptesenligne.labanquepostale.fr'
16 WSOST_PREFIX = '/wsost/OstBrokerWeb/loginform?imgid='
18 LOCAL_DIR = '/home/nirgal/banquepostale/'
20 def get_login_password():
21 config=open(LOCAL_DIR + 'config').read()
24 for line in config.splitlines():
25 if line.startswith('login'):
26 login = line[len('login'):].strip()
27 elif line.startswith('password'):
28 password = line[len('password'):].strip()
29 return login, password
32 def httpopen(url, post_data=None, headers={}):
34 logging.debug('HTTP POST %s %s', url, post_data)
36 logging.debug('HTTP GET %s', url)
38 if __opener__ is None:
39 cookiejar = CookieJar()
40 __opener__ = urllib.request.build_opener()
41 __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
42 __opener__.addheaders = [('User-Agent', 'Mozilla/5.0')]
43 if isinstance(post_data, str):
44 post_data = post_data.encode('utf-8')
45 req = urllib.request.Request(url, post_data, headers)
46 http_response = __opener__.open(req)
50 logging.debug('Waiting %s seconds', seconds)
55 Download all the accounts csv data and store them in LOCAL_DIR
56 Return a list of filenames
60 logging.info('Downloading initial request')
61 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/identif.ea?origin=particuliers')
62 #logging.debug(httpresponse.info())
63 html = httpresponse.read().decode('utf-8')
66 logging.info('Downloading password form')
67 urllogin = BASE_URL + '/wsost/OstBrokerWeb/loginform?TAM_OP=login&ERROR_CODE=0x00000000&URL=/voscomptes/canalXHTML/identif.ea?origin=particuliers'
68 httpresponse = httpopen(urllogin)
69 html = httpresponse.read().decode('utf-8')
70 #logging.debug(httpresponse.info())
71 open('login.html', 'w', encoding='utf-8').write(html)
73 #html = open('login.html', encoding='utf-8').read()
75 match = re.search('(loginform\?imgid=allunifie2&[^)"]*)', html, re.MULTILINE)
77 logging.critical('Login form image not found!')
80 url = BASE_URL + '/wsost/OstBrokerWeb/' + match.group(1)
81 httpresponse = httpopen(url)
82 logging.debug(httpresponse.info())
83 img = httpresponse.read()
84 open('loginform.gif', 'wb').write(img)
86 #root = html_parser.html_parse(html)
87 ##html_parser.print_idented_tree(root)
88 #for img in html_parser.get_elem(root, 'img'):
89 # src = img.attributes.get('src', '')
91 # if src.startswith(WSOST_PREFIX):
93 # img_id = src[len(WSOST_PREFIX)]
95 # httpresponse = httpopen(BASE_URL + src)
96 # img = httpresponse.read()
97 # open(img_id+'.gif', 'wb').write(img)
100 for choice in range(16):
103 proc_convert=Popen('convert loginform.gif -crop 60x60+%s+%s pnm:-' % (column*64, row*64),
104 shell=True, stdout=PIPE)
105 proc_gocr=Popen('gocr -C 0-9 -i -',
106 shell=True, stdin=proc_convert.stdout, stdout=PIPE)
107 output = proc_gocr.communicate()[0]
108 output = output.decode('utf-8')
109 output = output.strip()
110 #print("choice #%s is %s" % (choice, output))
111 xlt_password[output] = choice
112 #for i in 0 1 2 3 4 5 6 7 8 9; do convert $i.gif -crop 20x20+5+5 pnm:- | gocr -C 0-9 -i -; done
114 LOGIN, PASSWORD = get_login_password()
116 shuffled_password = ''
118 shuffled_password += '%02d' % xlt_password[c]
119 logging.info("shuffled_password: %s", shuffled_password)
121 sleep(10) # We are not supermen
123 post_data='urlbackend=%2Fvoscomptes%2FcanalXHTML%2Fidentif.ea%3Forigin%3Dparticuliers&origin=particuliers&password=' + shuffled_password + '&cv=true&cvvs=&username=' + LOGIN
124 httpresponse = httpopen(BASE_URL + '/wsost/OstBrokerWeb/auth', post_data)
125 html = httpresponse.read().decode('iso8859-1')
126 print(httpresponse.info())
127 open('welcome.html', 'w', encoding='iso8859-1').write(html)
129 assert 'initialiser-identif.ea' in html
130 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/initialiser-identif.ea')
131 html = httpresponse.read().decode('iso8859-1')
132 print(httpresponse.info())
133 open('welcome2.html', 'w', encoding='iso8859-1').write(html)
135 assert 'verifierMotDePasse-identif.ea' in html
136 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/verifierMotDePasse-identif.ea')
137 html = httpresponse.read().decode('iso8859-1')
138 #print(httpresponse.info())
139 open('welcome3.html', 'w', encoding='iso8859-1').write(html)
141 assert 'init-aiguillagePersonnalisation.ea' in html
142 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/donneesPersonnelles/aiguillage_personnalisation/init-aiguillagePersonnalisation.ea')
143 html = httpresponse.read().decode('iso8859-1')
144 #print(httpresponse.info())
145 open('welcome4.html', 'w', encoding='iso8859-1').write(html)
147 assert 'init-synthese.ea' in html
148 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/afficheSyntheseComptes-synthese.ea')
149 #httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/init-synthese.ea')
150 html = httpresponse.read().decode('iso8859-1')
151 #print(httpresponse.info())
152 open('welcome5.html', 'w', encoding='iso8859-1').write(html)
155 root = html_parser.html_parse(html)
156 for a in html_parser.get_elem(root, 'a'):
157 href = a.attributes.get('href', '')
158 href = htmlentities.resolve(href)
159 match = re.match('\.\./\.\./(...)/.*compte.numero=(.*)&typeRecherche=(.*)', href)
162 #../../CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
163 # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
164 cpttype, cptnum, searchtype = match.group(1), match.group(2), match.group(3)
166 logging.info('Found account type %s: %s' % (cpttype, cptnum))
167 result.append(cptnum)
169 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/' + href[len('../../'):])
170 html = httpresponse.read().decode('iso8859-1')
171 open(cptnum+'-init.html', 'w', encoding='iso8859-1').write(html)
174 # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=*********&typeRecherche=1&typeMouvements=CCP
175 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=' + cptnum + '&typeRecherche='+ searchtype +'&typeMouvements=' + cpttype)
176 html = httpresponse.read().decode('iso8859-1')
177 #print(httpresponse.info())
178 open(cptnum+'-init2.html', 'w', encoding='iso8859-1').write(html)
181 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/detailCompte2-telechargementMouvements.ea')
182 html = httpresponse.read().decode('iso8859-1')
183 #print(httpresponse.info())
184 open(cptnum+'-confirm.html', 'w', encoding='iso8859-1').write(html)
187 root = html_parser.html_parse(html)
188 #html_parser.print_idented_tree(root)
189 for form in html_parser.get_elem(root, 'form'):
190 if form.attributes.get('id', None) == 'formConfirmAgain':
191 url = form.attributes['action']
193 logging.critical("Can't find link to download csv")
196 # /voscomptes/canalXHTML/comptesCommun/telechargementMouvement/preparerRecherche-telechargementMouvements.ea?ts=1304816124318 POST 'format=CSV&duree='
197 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/' + url, 'format=CSV&duree=')
198 filename= LOCAL_DIR + cptnum + '.' + datetime.now().strftime('%Y%m%dT%H%M%S') + '.csv'
199 csvdata = httpresponse.read().decode('iso8859-1')
200 logging.info('Save CSV data to %s', filename)
201 open(filename, 'w', encoding='utf-8').write(csvdata)
204 lastfilename = LOCAL_DIR + cptnum + '.last.csv'
206 os.unlink(lastfilename)
207 except OSError as err:
208 if err.errno == 2: #No such file or directory
209 logging.warning('Could not find last csv link. Running for the first time?')
212 os.symlink(filename, lastfilename)
214 logging.info('Disconnecting')
215 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/deconnexion/init-deconnexion.ea')
216 html = httpresponse.read().decode('iso8859-1')
217 open('bye.html', 'w', encoding='iso8859-1').write(html)
219 logging.info('Disconnected')
224 CSV_HEADER = 'Date;Libellé;Montant(EUROS);Montant(FRANCS)'
226 account_files = os.listdir(LOCAL_DIR)
227 for account_file in account_files:
228 if not account_file.endswith('.last.csv'):
231 account = account_file[:-len('.last.csv')]
232 logging.debug('Agregating %s', account)
234 if os.access(LOCAL_DIR + account + '.csv', os.F_OK):
235 logging.debug('Not implemented')
238 logging.warning('Master csv file not found for %s: creating', account)
239 masterfile = open(LOCAL_DIR + account + '.csv', 'w', encoding='utf-8')
242 for line in open(LOCAL_DIR + account_file).read().split('\n'):
243 logging.debug('line: %s', line)
247 masterfile.write(line + '\n')
248 elif line == CSV_HEADER:
252 if __name__ == '__main__':
253 from optparse import OptionParser
254 parser = OptionParser()
255 parser.add_option('-d', '--debug',
256 action='store_true', dest='debug', default=False,
258 parser.add_option('--no-download',
259 action='store_true', dest='no_download', default=False,
260 help="don't download. Only agregate.")
261 (options, args) = parser.parse_args()
264 loglevel = logging.DEBUG
266 loglevel = logging.INFO
267 logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
269 os.umask(0o077) # this is really private
271 TMP_DIR = LOCAL_DIR + 'tmp/'
274 except OSError as err:
275 if err.errno != 17: # File exists
279 if not options.no_download: