Fixed csv charset
[nef.git] / go.py
1 #!/usr/bin/env python3
2
3 import argparse
4 import datetime
5 import logging
6 import os
7 import re
8 import sys
9 import time
10 import urllib.request
11 from http.cookiejar import CookieJar
12 from lxml import etree
13 from io import StringIO
14 from subprocess import Popen, PIPE, call, DEVNULL
15
16 BASE_URL = 'https://espace-client.lanef.com'
17 LOCAL_DIR = '/home/nirgal/nef/'
18 CSV_HEADER = '"Date opération","Date Valeur","Référence","Montant","Solde","Libellé"'
19
20
21 def get_login_password():
22     config = open(LOCAL_DIR + 'config').read()
23     login = None
24     password = None
25     for line in config.splitlines():
26         if line.startswith('login'):
27             login = line[len('login'):].strip()
28         elif line.startswith('password'):
29             password = line[len('password'):].strip()
30     return login, password
31
32
33 __opener__ = None
34 def httpopen(url, post_data=None, headers={}):
35     if post_data:
36         logging.debug('HTTP POST %s %s', url, post_data)
37     else:
38         logging.debug('HTTP GET %s', url)
39     global __opener__
40     if __opener__ is None:
41         cookiejar = CookieJar()
42         __opener__ = urllib.request.build_opener()
43         __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
44         __opener__.addheaders = [('User-Agent', 'Mozilla/5.0')]
45     if isinstance(post_data, str):
46         post_data = post_data.encode('utf-8')
47     req = urllib.request.Request(url, post_data, headers)
48     http_response = __opener__.open(req)
49     return http_response
50
51
52 def sleep(seconds):
53     logging.debug('Waiting %s seconds', seconds)
54     time.sleep(seconds)
55
56
57 def download():
58     '''
59     Download all the accounts csv data and store them in LOCAL_DIR
60     Return a list of filenames
61     '''
62     parser = etree.HTMLParser()
63
64     # html = open('logon.html').read()
65     logging.info('Downloading login form')
66     httpresponse = httpopen(BASE_URL + '/templates/logon/logon.cfm')
67     # logging.debug(httpresponse.info())
68     html = httpresponse.read().decode('utf-8')
69     with open('logon.html', 'w', encoding='utf-8') as f:
70         f.write(html)
71
72     tree = etree.parse(StringIO(html), parser)
73     root = tree.getroot()
74     logontoken = root.xpath('//input[@id="logonToken"]')[0].get('value')
75     logging.debug('logontoken: %s', logontoken)
76
77     sleep(10)  # We are not supermen
78
79     logging.info('Login')
80     LOGIN, PASSWORD = get_login_password()
81     # post_data='FACTOR=LOGPAS&logonToken=' + logontoken + '&USERID=' + LOGIN + '&SUBUSERID=&STATIC='+ PASSWORD +'&OTP=&AUTOMATEDID='
82     post_data = ('FACTOR=LOGPAS&logonToken=' + logontoken + '&USERID=' + LOGIN
83                  + '&SUBUSERID=&STATIC=' + PASSWORD)
84     httpresponse = httpopen(BASE_URL + '/templates/logon/checkPasswordMatrixToken.cfm', post_data)
85     # logging.debug(httpresponse.info())
86     html = httpresponse.read().decode('utf-8')
87     with open('checkPasswordMatrixToken.html', 'w', encoding='utf-8') as f:
88         f.write(html)
89
90     assert "Bienvenue" in html, "Login failed"
91
92     sleep(5)
93     httpresponse = httpopen(BASE_URL + '/templates/landingPage/accountListWidget.cfm', 'isLanding=True&AccNum=')
94     # logging.debug(httpresponse.info())
95     html = httpresponse.read().decode('utf-8')
96     with open('accountListWidget.html', 'w', encoding='utf-8') as f:
97         f.write(html)
98
99     # html = open('accountListWidget.html').read()
100     tree = etree.parse(StringIO(html), parser)
101     root = tree.getroot()
102     lis = root.xpath('//ul[@id="accountList"]/li')
103     for li in lis:
104         txts = li[0].xpath('.//text()')
105         title = ''.join(txts).strip()
106         logging.debug('title: %s', title)
107
108         txts = li[1].xpath('.//text()')
109         cptnum_currency = ''.join(txts).strip()
110         logging.debug('cptnum_currency: %s', cptnum_currency)
111         match = re.match("([0-9]+) \((EUR)\)", cptnum_currency)
112         cptnum = match.group(1)
113         currency = match.group(2)
114
115         txts = li[2].xpath('.//text()')
116         balance = ''.join(txts).strip()
117         logging.debug('balance: %s', balance)
118         balance = float(balance.replace('\xa0', '').replace(',', '.'))
119
120         logging.info('Found %s #%s (%s %s)', title, cptnum, balance, currency)
121
122         startdate = (datetime.datetime.now() - datetime.timedelta(400)).strftime('%Y-%m-%d')
123         enddate = datetime.datetime.now().strftime('%Y-%m-%d')
124
125         # httpresponse = httpopen(BASE_URL + '/templates/account/accountActivityListWidget.cfm', 'page=1&startDate='+startdate+'&endDate='+enddate+'&orderBy=TRANSACTION_DATE_DESCENDING&viewMode=GRID&showBalance=True&transactionCode=&screenSize=LARGE&AccNum=' + cptnum + '&uniqueKey=detailContent_' + cptnum)
126         # #logging.debug(httpresponse.info())
127         # html = httpresponse.read().decode('utf-8')
128         # with open('accountActivityListWidget'+cptnum+'.html', 'w', encoding='utf-8') as f:
129         #     f.write(html)
130         # sleep(3)
131
132         httpresponse = httpopen(BASE_URL + '/templates/account/accountActivityListDownload.cfm?AccNum='+cptnum+'&viewMode=CSV&orderBy=TRANSACTION_DATE_DESCENDING&page=1&startDate='+startdate+'&endDate='+enddate+'&startAmount=0&endAmount=999999999999&initialPaginationLoad=true&screenSize=big&showBalance=true')
133         # httpresponse = httpopen(BASE_URL + '/templates/account/accountActivityListDownload.cfm?AccNum='+cptnum+'&viewMode=CSV&orderBy=TRANSACTION_DATE_DESCENDING&page=1&startDate='+startdate+'&endDate='+enddate+'&startAmount=0&endAmount=999999999999&initialPaginationLoad=true&screenSize=big&showBalance=true&hiddenFormId=00000000-0000-0000-0000-000000000000')  # hiddenFormId is javascript created random UUID
134         #logging.debug(httpresponse.info()) # Reports Content-Type: application/csv;charset=windows-1252
135         csvdata = httpresponse.read().decode('windows-1252')
136         if len(csvdata) == 0:
137             logging.info('No mouvement for %s', title)
138             continue
139         logging.debug('size: %s', len(csvdata))
140         filename = LOCAL_DIR + cptnum + '.' + datetime.datetime.now().strftime('%Y%m%dT%H%M%S') + '.csv'
141         with open(filename, 'w', encoding='utf-8') as f:
142             f.write(csvdata)
143         sleep(9)
144
145         lastfilename = LOCAL_DIR + cptnum + '.last.csv'
146         try:
147             os.unlink(lastfilename)
148         except OSError as err:
149             if err.errno == 2:  # No such file or directory
150                 logging.warning('Could not find last csv link. Running for the first time?')
151             else:
152                 raise
153         os.symlink(filename, lastfilename)
154
155     logging.info('Disconnecting')
156
157     httpresponse = httpopen(BASE_URL + '/templates/logon/logOff.cfm?logOffReason=LoggedOut')
158     # logging.debug(httpresponse.info())
159     html = httpresponse.read().decode('utf-8')
160     with open('logoff.html', 'w', encoding='utf-8') as f:
161         f.write(html)
162
163     sleep(1)
164     httpresponse = httpopen(BASE_URL + '/templates/logon/logon.cfm?logOffReason=')
165     # logging.debug(httpresponse.info())
166     html = httpresponse.read().decode('utf-8')
167     with open('logoff2.html', 'w', encoding='utf-8') as f:
168         f.write(html)
169
170
171 def agregate(csv_last_names):
172     # If a specific list of file was given, process these
173     if csv_last_names:
174         for name in csv_last_names:
175             merge_csv(name)
176         return
177
178     # Else process *.last.csv files
179     account_files = os.listdir(LOCAL_DIR)
180     for account_file in account_files:
181         if not account_file.endswith('.last.csv'):
182             continue
183
184         merge_csv(account_file)
185
186
187 def myexec(cmd):
188     proc = Popen(cmd, stderr=PIPE)
189     errmsg = str(proc.communicate()[1], encoding='utf-8')
190     errcode = proc.wait()
191     if errcode:
192         logging.error("Can't run %s: %s", cmd, errmsg)
193         return False
194     return True
195
196
197 def remove_headers(filein, fileout, delimline, keepdelim=False):
198     """
199     Copies filein in fileout, without the headers.
200     Look for a line containing "delimline" in filein.
201     Everything before is not copied.
202     The delimline itself is not copied unless keepdelim is True.
203     returns the headers, excluding delimline
204     """
205     headers = []
206     past_headers = False
207     with open(filein) as fin:
208         with open(fileout, 'w', encoding='utf-8') as fout:
209             for line in fin.read().split('\n'):
210                 if not line:
211                     continue
212                 if past_headers:
213                     fout.write(line + '\n')
214                 else:
215                     if line == delimline:
216                         past_headers = True
217                         if keepdelim:
218                             fout.write(line + '\n')
219                     else:
220                         headers.append(line)
221     return headers
222
223
224 def merge_csv(filename_last):
225     dotpos = filename_last.find('.')
226     if dotpos == -1:
227         logging.critical('File name %s must contain a dot.', filename_last)
228         return
229     if filename_last.find('/') >= 0:
230         logging.critical('File name %s must not contain '/'.' % filename_last)
231         return
232
233     account = filename_last[:dotpos]
234
235     logging.debug('Agregating %s', account)
236
237     oldmastername = LOCAL_DIR + account + '.csv'
238     newmastername = LOCAL_DIR + account + '.csv.new'
239
240     if not os.access(oldmastername, os.F_OK):
241         logging.warning('Master csv file not found for %s: creating', account)
242         remove_headers(LOCAL_DIR + filename_last, oldmastername, CSV_HEADER, keepdelim=True)
243         return
244
245     remove_headers(LOCAL_DIR + filename_last,
246                    LOCAL_DIR + 'tmp/last.csv',
247                    CSV_HEADER)
248     remove_headers(oldmastername,
249                    LOCAL_DIR + 'tmp/master.csv',
250                    CSV_HEADER)
251     cmd = ['diff', '-Nau', LOCAL_DIR + 'tmp/master.csv', LOCAL_DIR + 'tmp/last.csv']
252     proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
253     out, errormsg = proc.communicate()
254     proc.wait()
255     out = str(out, encoding='utf-8')
256     errormsg = str(errormsg, encoding='utf-8')
257     if errormsg:
258         logging.critical("Can't run %s: %s", cmd, errormsg)
259         return
260
261     with open(newmastername, 'w', encoding='utf-8') as newmasterfile:
262         newmasterfile.write(CSV_HEADER + '\n')
263
264         for diffline in out.split('\n'):
265             if diffline.startswith('+') and not diffline.startswith('+++'):
266                 diffline = diffline[1:]  # Remove staring '+'
267                 logging.info(diffline)
268                 newmasterfile.write(diffline + '\n')
269
270         with open(LOCAL_DIR + 'tmp/master.csv') as oldmasterfile:
271             newmasterfile.write(oldmasterfile.read())
272
273     myexec(['mv', newmastername, oldmastername])
274     #os.rename(newmastername, oldmastername) # TODO
275
276
277 if __name__ == '__main__':
278     parser = argparse.ArgumentParser(
279         description='Track NEF accounts')
280     parser.add_argument(
281         '-d', '--debug',
282         action='store_true', dest='debug', default=False,
283         help="debug mode")
284     parser.add_argument(
285         '--no-download',
286         action='store_true', dest='no_download', default=False,
287         help="don't download. Only agregate.")
288     parser.add_argument(
289         '--csvlast', nargs='+',
290         metavar='file.csv',
291         help="Process this file rather than *.last.csv.")
292     args = parser.parse_args(sys.argv[1:])
293
294     if args.debug:
295         loglevel = logging.DEBUG
296     else:
297         loglevel = logging.INFO
298     logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
299
300     logging.debug(args)
301
302     os.umask(0o077)  # This is really private
303
304     TMP_DIR = LOCAL_DIR + 'tmp/'
305     try:
306         os.mkdir(TMP_DIR)
307     except OSError as err:
308         if err.errno != 17:  # File exists
309             raise
310     os.chdir(TMP_DIR)
311
312     if not args.no_download:
313         download()
314     agregate(args.csvlast)