7 from datetime import datetime
9 from http.cookiejar import CookieJar
10 from subprocess import Popen, PIPE, call
15 BASE_URL = 'https://voscomptesenligne.labanquepostale.fr'
16 WSOST_PREFIX = '/wsost/OstBrokerWeb/loginform?imgid='
18 LOCAL_DIR = '/home/nirgal/banquepostale/'
20 def get_login_password():
21 config=open(LOCAL_DIR + 'config').read()
24 for line in config.splitlines():
25 if line.startswith('login'):
26 login = line[len('login'):].strip()
27 elif line.startswith('password'):
28 password = line[len('password'):].strip()
29 return login, password
32 def httpopen(url, post_data=None):
34 logging.debug('HTTP POST %s %s', url, post_data)
36 logging.debug('HTTP GET %s', url)
38 if __opener__ is None:
39 cookiejar = CookieJar()
40 __opener__ = urllib.request.build_opener()
41 __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
42 http_response = __opener__.open(url, post_data)
46 logging.debug('Waiting %s seconds', seconds)
51 Download all the accounts csv data and store them in LOCAL_DIR
52 Return a list of filenames
56 logging.info('Downloading password form')
57 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/recupererPointEntree-identif.ea')
58 html = httpresponse.read().decode('iso8859-1')
59 #logging.debug(httpresponse.info())
60 open('login.html', 'w', encoding='iso8859-1').write(html)
62 root = html_parser.html_parse(html)
63 #html_parser.print_idented_tree(root)
64 for img in html_parser.get_elem(root, 'img'):
65 src = img.attributes.get('src', '')
66 if src.startswith(WSOST_PREFIX):
68 img_id = src[len(WSOST_PREFIX)]
70 httpresponse = httpopen(BASE_URL + src)
71 img = httpresponse.read()
72 open(img_id+'.gif', 'wb').write(img)
75 for img_id in "0123456789":
76 proc_convert=Popen('convert %s.gif -crop 20x20+5+5 pnm:-' % img_id,
77 shell=True, stdout=PIPE)
78 proc_gocr=Popen('gocr -C 0-9 -i -',
79 shell=True, stdin=proc_convert.stdout, stdout=PIPE)
80 output = proc_gocr.communicate()[0]
81 output = output.decode('utf-8')
82 output = output.strip()
83 #print("image #%s is %s" % (img_id, output))
84 xlt_password[output] = img_id
86 LOGIN, PASSWORD = get_login_password()
88 shuffled_password = ''
90 shuffled_password += xlt_password[c]
91 logging.info("shuffled_password: %s", shuffled_password)
92 #for i in 0 1 2 3 4 5 6 7 8 9; do convert $i.gif -crop 20x20+5+5 pnm:- | gocr -C 0-9 -i -; done
94 sleep(10) # We are not supermen
96 post_data='urlbackend=%2Fvoscomptes%2FcanalXHTML%2Fsecurite%2Fauthentification%2FrecupererPointEntree-identif.ea%3Forigin%3Dparticuliers&origin=particuliers&password=' + shuffled_password + '&cv=true&cvvs=&username=' + LOGIN
97 httpresponse = httpopen(BASE_URL + '/wsost/OstBrokerWeb/auth', post_data)
98 html = httpresponse.read().decode('iso8859-1')
99 #print(httpresponse.info())
100 open('welcome.html', 'w', encoding='iso8859-1').write(html)
102 assert 'initialiser-identif.ea' in html
103 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/initialiser-identif.ea')
104 html = httpresponse.read().decode('iso8859-1')
105 #print(httpresponse.info())
106 open('welcome2.html', 'w', encoding='iso8859-1').write(html)
108 assert 'verifierMotDePasse-identif.ea' in html
109 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/authentification/verifierMotDePasse-identif.ea')
110 html = httpresponse.read().decode('iso8859-1')
111 #print(httpresponse.info())
112 open('welcome3.html', 'w', encoding='iso8859-1').write(html)
114 assert 'init-synthese.ea' in html
115 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/synthese_assurancesEtComptes/init-synthese.ea')
116 html = httpresponse.read().decode('iso8859-1')
117 #print(httpresponse.info())
118 open('welcome4.html', 'w', encoding='iso8859-1').write(html)
121 root = html_parser.html_parse(html)
122 for a in html_parser.get_elem(root, 'a'):
123 href = a.attributes.get('href', '')
124 href = htmlentities.resolve(href)
125 match = re.match('\.\./\.\./(...)/.*compte.numero=(.*)&typeRecherche=(.*)', href)
128 #../../CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
129 # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/CCP/releves_ccp/menuReleve-releve_ccp.ea?compte.numero=*******&typeRecherche=1
130 cpttype, cptnum, searchtype = match.group(1), match.group(2), match.group(3)
132 logging.info('Found account type %s: %s' % (cpttype, cptnum))
133 result.append(cptnum)
135 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/' + href[len('../../'):])
136 html = httpresponse.read().decode('iso8859-1')
137 open(cptnum+'-init.html', 'w', encoding='iso8859-1').write(html)
140 # https://voscomptesenligne.labanquepostale.fr/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=*********&typeRecherche=1&typeMouvements=CCP
141 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/init-telechargementMouvements.ea?compte.numero=' + cptnum + '&typeRecherche='+ searchtype +'&typeMouvements=' + cpttype)
142 html = httpresponse.read().decode('iso8859-1')
143 #print(httpresponse.info())
144 open(cptnum+'-init2.html', 'w', encoding='iso8859-1').write(html)
147 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/detailCompte2-telechargementMouvements.ea')
148 html = httpresponse.read().decode('iso8859-1')
149 #print(httpresponse.info())
150 open(cptnum+'-confirm.html', 'w', encoding='iso8859-1').write(html)
153 root = html_parser.html_parse(html)
154 #html_parser.print_idented_tree(root)
155 for form in html_parser.get_elem(root, 'form'):
156 if form.attributes.get('id', None) == 'formConfirmAgain':
157 url = form.attributes['action']
159 logging.critical("Can't find link to download csv")
162 # /voscomptes/canalXHTML/comptesCommun/telechargementMouvement/preparerRecherche-telechargementMouvements.ea?ts=1304816124318 POST 'format=CSV&duree='
163 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/comptesCommun/telechargementMouvement/' + url, 'format=CSV&duree=')
164 filename= LOCAL_DIR + cptnum + '.' + datetime.now().strftime('%Y%m%dT%H%M%S') + '.csv'
165 csvdata = httpresponse.read().decode('iso8859-1')
166 logging.info('Save CSV data to %s', filename)
167 open(filename, 'w', encoding='utf-8').write(csvdata)
170 lastfilename = LOCAL_DIR + cptnum + '.last.csv'
172 os.unlink(lastfilename)
173 except OSError as err:
174 if err.errno == 2: #No such file or directory
175 logging.warning('Could not find last csv link. Running for the first time?')
178 os.symlink(filename, lastfilename)
180 logging.info('Disconnecting')
181 httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/deconnexion/init-deconnexion.ea')
182 html = httpresponse.read().decode('iso8859-1')
183 open('bye.html', 'w', encoding='iso8859-1').write(html)
185 logging.info('Disconnected')
190 CSV_HEADER = 'Date;Libellé;Montant(EUROS);Montant(FRANCS)'
192 account_files = os.listdir(LOCAL_DIR)
193 for account_file in account_files:
194 if not account_file.endswith('.last.csv'):
197 account = account_file[:-len('.last.csv')]
198 logging.debug('Agregating %s', account)
200 if os.access(LOCAL_DIR + account + '.csv', os.F_OK):
203 logging.warning('Master csv file not found for %s: creating', account)
204 masterfile = open(LOCAL_DIR + account + '.csv', 'w', encoding='utf-8')
207 for line in open(LOCAL_DIR + account_file).read().split('\n'):
211 masterfile.write(line + '\n')
212 elif line == CSV_HEADER:
216 if __name__ == '__main__':
217 from optparse import OptionParser
218 parser = OptionParser()
219 parser.add_option('-d', '--debug',
220 action='store_true', dest='debug', default=False,
222 (options, args) = parser.parse_args()
225 loglevel = logging.DEBUG
227 loglevel = logging.INFO
228 logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
230 os.umask(0o077) # this is really private
232 TMP_DIR = LOCAL_DIR + 'tmp/'
235 except OSError as err:
236 if err.errno != 17: # File exists