First version
authorJean-Michel Nirgal Vourgère <jmv_deb@nirgal.com>
Tue, 18 Apr 2017 09:20:11 +0000 (11:20 +0200)
committerJean-Michel Nirgal Vourgère <jmv_deb@nirgal.com>
Tue, 18 Apr 2017 09:20:11 +0000 (11:20 +0200)
go.py [new file with mode: 0755]

diff --git a/go.py b/go.py
new file mode 100755 (executable)
index 0000000..7efcdcd
--- /dev/null
+++ b/go.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+
+import argparse
+import datetime
+import logging
+import os
+import re
+import sys
+import time
+import urllib.request
+from http.cookiejar import CookieJar
+from lxml import etree
+from io import StringIO
+from subprocess import Popen, PIPE, call, DEVNULL
+
+BASE_URL = 'https://espace-client.lanef.com'
+LOCAL_DIR = '/home/nirgal/nef/'
+CSV_HEADER = '"Date opération","Date Valeur","Référence","Montant","Solde","Libellé"'
+
+
+def get_login_password():
+    config = open(LOCAL_DIR + 'config').read()
+    login = None
+    password = None
+    for line in config.splitlines():
+        if line.startswith('login'):
+            login = line[len('login'):].strip()
+        elif line.startswith('password'):
+            password = line[len('password'):].strip()
+    return login, password
+
+
+__opener__ = None
+def httpopen(url, post_data=None, headers={}):
+    if post_data:
+        logging.debug('HTTP POST %s %s', url, post_data)
+    else:
+        logging.debug('HTTP GET %s', url)
+    global __opener__
+    if __opener__ is None:
+        cookiejar = CookieJar()
+        __opener__ = urllib.request.build_opener()
+        __opener__.add_handler(urllib.request.HTTPCookieProcessor(cookiejar))
+        __opener__.addheaders = [('User-Agent', 'Mozilla/5.0')]
+    if isinstance(post_data, str):
+        post_data = post_data.encode('utf-8')
+    req = urllib.request.Request(url, post_data, headers)
+    http_response = __opener__.open(req)
+    return http_response
+
+
+def sleep(seconds):
+    logging.debug('Waiting %s seconds', seconds)
+    time.sleep(seconds)
+
+
+def download():
+    '''
+    Download all the accounts csv data and store them in LOCAL_DIR
+    Return a list of filenames
+    '''
+    parser = etree.HTMLParser()
+
+    # html = open('logon.html').read()
+    logging.info('Downloading login form')
+    httpresponse = httpopen(BASE_URL + '/templates/logon/logon.cfm')
+    # logging.debug(httpresponse.info())
+    html = httpresponse.read().decode('utf-8')
+    with open('logon.html', 'w', encoding='utf-8') as f:
+        f.write(html)
+
+    tree = etree.parse(StringIO(html), parser)
+    root = tree.getroot()
+    logontoken = root.xpath('//input[@id="logonToken"]')[0].get('value')
+    logging.debug('logontoken: %s', logontoken)
+
+    sleep(10)  # We are not supermen
+
+    logging.info('Login')
+    LOGIN, PASSWORD = get_login_password()
+    # post_data='FACTOR=LOGPAS&logonToken=' + logontoken + '&USERID=' + LOGIN + '&SUBUSERID=&STATIC='+ PASSWORD +'&OTP=&AUTOMATEDID='
+    post_data = ('FACTOR=LOGPAS&logonToken=' + logontoken + '&USERID=' + LOGIN
+                 + '&SUBUSERID=&STATIC=' + PASSWORD)
+    httpresponse = httpopen(BASE_URL + '/templates/logon/checkPasswordMatrixToken.cfm', post_data)
+    # logging.debug(httpresponse.info())
+    html = httpresponse.read().decode('utf-8')
+    with open('checkPasswordMatrixToken.html', 'w', encoding='utf-8') as f:
+        f.write(html)
+
+    assert "Bienvenue" in html, "Login failed"
+
+    sleep(5)
+    httpresponse = httpopen(BASE_URL + '/templates/landingPage/accountListWidget.cfm', 'isLanding=True&AccNum=')
+    # logging.debug(httpresponse.info())
+    html = httpresponse.read().decode('utf-8')
+    with open('accountListWidget.html', 'w', encoding='utf-8') as f:
+        f.write(html)
+
+    # html = open('accountListWidget.html').read()
+    tree = etree.parse(StringIO(html), parser)
+    root = tree.getroot()
+    lis = root.xpath('//ul[@id="accountList"]/li')
+    for li in lis:
+        txts = li[0].xpath('.//text()')
+        title = ''.join(txts).strip()
+        logging.debug('title: %s', title)
+
+        txts = li[1].xpath('.//text()')
+        cptnum_currency = ''.join(txts).strip()
+        logging.debug('cptnum_currency: %s', cptnum_currency)
+        match = re.match("([0-9]+) \((EUR)\)", cptnum_currency)
+        cptnum = match.group(1)
+        currency = match.group(2)
+
+        txts = li[2].xpath('.//text()')
+        balance = ''.join(txts).strip()
+        logging.debug('balance: %s', balance)
+        balance = float(balance.replace('\xa0', '').replace(',', '.'))
+
+        logging.info('Found %s #%s (%s %s)', title, cptnum, balance, currency)
+
+        startdate = (datetime.datetime.now() - datetime.timedelta(400)).strftime('%Y-%m-%d')
+        enddate = datetime.datetime.now().strftime('%Y-%m-%d')
+
+        # httpresponse = httpopen(BASE_URL + '/templates/account/accountActivityListWidget.cfm', 'page=1&startDate='+startdate+'&endDate='+enddate+'&orderBy=TRANSACTION_DATE_DESCENDING&viewMode=GRID&showBalance=True&transactionCode=&screenSize=LARGE&AccNum=' + cptnum + '&uniqueKey=detailContent_' + cptnum)
+        # #logging.debug(httpresponse.info())
+        # html = httpresponse.read().decode('utf-8')
+        # with open('accountActivityListWidget'+cptnum+'.html', 'w', encoding='utf-8') as f:
+        #     f.write(html)
+        # sleep(3)
+
+        httpresponse = httpopen(BASE_URL + '/templates/account/accountActivityListDownload.cfm?AccNum='+cptnum+'&viewMode=CSV&orderBy=TRANSACTION_DATE_DESCENDING&page=1&startDate='+startdate+'&endDate='+enddate+'&startAmount=0&endAmount=999999999999&initialPaginationLoad=true&screenSize=big&showBalance=true')
+        # httpresponse = httpopen(BASE_URL + '/templates/account/accountActivityListDownload.cfm?AccNum='+cptnum+'&viewMode=CSV&orderBy=TRANSACTION_DATE_DESCENDING&page=1&startDate='+startdate+'&endDate='+enddate+'&startAmount=0&endAmount=999999999999&initialPaginationLoad=true&screenSize=big&showBalance=true&hiddenFormId=00000000-0000-0000-0000-000000000000')  # hiddenFormId is javascript created random UUID
+        # logging.debug(httpresponse.info())
+        csvdata = httpresponse.read().decode('utf-8')
+        if len(csvdata) == 0:
+            logging.info('No mouvement for %s', title)
+            continue
+        logging.debug('size: %s', len(csvdata))
+        filename = LOCAL_DIR + cptnum + '.' + datetime.datetime.now().strftime('%Y%m%dT%H%M%S') + '.csv'
+        with open(filename, 'w', encoding='utf-8') as f:
+            f.write(csvdata)
+        sleep(9)
+
+        lastfilename = LOCAL_DIR + cptnum + '.last.csv'
+        try:
+            os.unlink(lastfilename)
+        except OSError as err:
+            if err.errno == 2:  # No such file or directory
+                logging.warning('Could not find last csv link. Running for the first time?')
+            else:
+                raise
+        os.symlink(filename, lastfilename)
+
+    logging.info('Disconnecting')
+
+    httpresponse = httpopen(BASE_URL + '/templates/logon/logOff.cfm?logOffReason=LoggedOut')
+    # logging.debug(httpresponse.info())
+    html = httpresponse.read().decode('utf-8')
+    with open('logoff.html', 'w', encoding='utf-8') as f:
+        f.write(html)
+
+    sleep(1)
+    httpresponse = httpopen(BASE_URL + '/templates/logon/logon.cfm?logOffReason=')
+    # logging.debug(httpresponse.info())
+    html = httpresponse.read().decode('utf-8')
+    with open('logoff2.html', 'w', encoding='utf-8') as f:
+        f.write(html)
+
+
+def agregate(csv_last_names):
+    # If a specific list of file was given, process these
+    if csv_last_names:
+        for name in csv_last_names:
+            merge_csv(name)
+        return
+
+    # Else process *.last.csv files
+    account_files = os.listdir(LOCAL_DIR)
+    for account_file in account_files:
+        if not account_file.endswith('.last.csv'):
+            continue
+
+        merge_csv(account_file)
+
+
+def myexec(cmd):
+    proc = Popen(cmd, stderr=PIPE)
+    errmsg = str(proc.communicate()[1], encoding='utf-8')
+    errcode = proc.wait()
+    if errcode:
+        logging.error("Can't run %s: %s", cmd, errmsg)
+        return False
+    return True
+
+
+def remove_headers(filein, fileout, delimline, keepdelim=False):
+    """
+    Copies filein in fileout, without the headers.
+    Look for a line containing "delimline" in filein.
+    Everything before is not copied.
+    The delimline itself is not copied unless keepdelim is True.
+    returns the headers, excluding delimline
+    """
+    headers = []
+    past_headers = False
+    with open(filein) as fin:
+        with open(fileout, 'w', encoding='utf-8') as fout:
+            for line in fin.read().split('\n'):
+                if not line:
+                    continue
+                if past_headers:
+                    fout.write(line + '\n')
+                else:
+                    if line == delimline:
+                        past_headers = True
+                        if keepdelim:
+                            fout.write(line + '\n')
+                    else:
+                        headers.append(line)
+    return headers
+
+
+def merge_csv(filename_last):
+    dotpos = filename_last.find('.')
+    if dotpos == -1:
+        logging.critical('File name %s must contain a dot.', filename_last)
+        return
+    if filename_last.find('/') >= 0:
+        logging.critical('File name %s must not contain '/'.' % filename_last)
+        return
+
+    account = filename_last[:dotpos]
+
+    logging.debug('Agregating %s', account)
+
+    oldmastername = LOCAL_DIR + account + '.csv'
+    newmastername = LOCAL_DIR + account + '.csv.new'
+
+    if not os.access(oldmastername, os.F_OK):
+        logging.warning('Master csv file not found for %s: creating', account)
+        remove_headers(LOCAL_DIR + filename_last, oldmastername, CSV_HEADER, keepdelim=True)
+        return
+
+    remove_headers(LOCAL_DIR + filename_last,
+                   LOCAL_DIR + 'tmp/last.csv',
+                   CSV_HEADER)
+    remove_headers(oldmastername,
+                   LOCAL_DIR + 'tmp/master.csv',
+                   CSV_HEADER)
+    cmd = ['diff', '-Nau', LOCAL_DIR + 'tmp/master.csv', LOCAL_DIR + 'tmp/last.csv']
+    proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
+    out, errormsg = proc.communicate()
+    proc.wait()
+    out = str(out, encoding='utf-8')
+    errormsg = str(errormsg, encoding='utf-8')
+    if errormsg:
+        logging.critical("Can't run %s: %s", cmd, errormsg)
+        return
+
+    with open(newmastername, 'w', encoding='utf-8') as newmasterfile:
+        newmasterfile.write(CSV_HEADER + '\n')
+
+        for diffline in out.split('\n'):
+            if diffline.startswith('+') and not diffline.startswith('+++'):
+                diffline = diffline[1:]  # Remove staring '+'
+                logging.info(diffline)
+                newmasterfile.write(diffline + '\n')
+
+        with open(LOCAL_DIR + 'tmp/master.csv') as oldmasterfile:
+            newmasterfile.write(oldmasterfile.read())
+
+    myexec(['mv', newmastername, oldmastername])
+    #os.rename(newmastername, oldmastername) # TODO
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Track NEF accounts')
+    parser.add_argument(
+        '-d', '--debug',
+        action='store_true', dest='debug', default=False,
+        help="debug mode")
+    parser.add_argument(
+        '--no-download',
+        action='store_true', dest='no_download', default=False,
+        help="don't download. Only agregate.")
+    parser.add_argument(
+        '--csvlast', nargs='+',
+        metavar='file.csv',
+        help="Process this file rather than *.last.csv.")
+    args = parser.parse_args(sys.argv[1:])
+
+    if args.debug:
+        loglevel = logging.DEBUG
+    else:
+        loglevel = logging.INFO
+    logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
+
+    logging.debug(args)
+
+    os.umask(0o077)  # This is really private
+
+    TMP_DIR = LOCAL_DIR + 'tmp/'
+    try:
+        os.mkdir(TMP_DIR)
+    except OSError as err:
+        if err.errno != 17:  # File exists
+            raise
+    os.chdir(TMP_DIR)
+
+    if not args.no_download:
+        download()
+    agregate(args.csvlast)