Implement de csv file merge
authorJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Mon, 26 Sep 2016 10:08:37 +0000 (10:08 +0000)
committerJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Mon, 26 Sep 2016 10:08:37 +0000 (10:08 +0000)
go.py

diff --git a/go.py b/go.py
index 940d5eeb261400226123c5aa55f7be393899a781..426508677adf09c3a9276aa5d99767b8c9796e4b 100755 (executable)
--- a/go.py
+++ b/go.py
@@ -7,7 +7,7 @@ import logging
 from datetime import datetime
 import urllib.request
 from http.cookiejar import CookieJar
-from subprocess import Popen, PIPE, call
+from subprocess import Popen, PIPE, call, DEVNULL
 
 import html_parser
 import htmlentities
@@ -16,6 +16,7 @@ BASE_URL = 'https://voscomptesenligne.labanquepostale.fr'
 WSOST_PREFIX = '/wsost/OstBrokerWeb/loginform?imgid='
 
 LOCAL_DIR = '/home/nirgal/banquepostale/'
+CSV_HEADER = 'Date;Libellé;Montant(EUROS);Montant(FRANCS)'
 
 def get_login_password():
     config=open(LOCAL_DIR + 'config').read()
@@ -83,19 +84,6 @@ def download():
     img = httpresponse.read()
     open('loginform.gif', 'wb').write(img)
 
-    #root = html_parser.html_parse(html)
-    ##html_parser.print_idented_tree(root)
-    #for img in html_parser.get_elem(root, 'img'):
-    #    src = img.attributes.get('src', '')
-    #    print(img)
-    #    if src.startswith(WSOST_PREFIX):
-    #        print(img)
-    #        img_id = src[len(WSOST_PREFIX)]
-    #        #print(img_id)
-    #        httpresponse = httpopen(BASE_URL + src)
-    #        img = httpresponse.read()
-    #        open(img_id+'.gif', 'wb').write(img)
-
     xlt_password = {}
     for choice in range(16):
         column = choice % 4
@@ -220,33 +208,110 @@ def download():
     return result
 
 
-def agregate():
-    CSV_HEADER = 'Date;Libellé;Montant(EUROS);Montant(FRANCS)'
+def agregate(csv_last_names):
+    # If a specific list of file was given, process these
+    if csv_last_names:
+        for name in csv_last_names:
+            merge_csv(name)
+        return
 
+    # Else process *.last.csv files
     account_files = os.listdir(LOCAL_DIR)
     for account_file in account_files:
         if not account_file.endswith('.last.csv'):
             continue
-        
-        account = account_file[:-len('.last.csv')]
-        logging.debug('Agregating %s', account)
-
-        if os.access(LOCAL_DIR + account + '.csv', os.F_OK):
-            logging.debug('Not implemented')
-            pass
-        else:
-            logging.warning('Master csv file not found for %s: creating', account)
-            masterfile = open(LOCAL_DIR + account + '.csv', 'w', encoding='utf-8')
-
-            past_headers = False
-            for line in open(LOCAL_DIR + account_file).read().split('\n'):
-                logging.debug('line: %s', line)
+     
+        merge_csv(account_file)   
+
+
+def myexec(cmd):
+    proc = Popen(cmd, stderr=PIPE)
+    errmsg = str(proc.communicate()[1], encoding='utf-8')
+    errcode = proc.wait()
+    if errcode:
+        logging.error("Can't run %s: %s", cmd, errmsg)
+        return False
+    return True
+
+
+def remove_headers(filein, fileout, delimline, keepdelim=False):
+    """
+    Copies filein in fileout, without the headers.
+    Look for a line containing "delimline" in filein.
+    Everything before is not copied.
+    The delimline itself is not copied unless keepdelim is True.
+    returns the headers, excluding delimline
+    """
+    headers = []
+    past_headers = False
+    with open(filein) as fin:
+        with open(fileout, 'w', encoding='utf-8') as fout:
+            for line in fin.read().split('\n'):
                 if not line:
                     continue
                 if past_headers:
-                    masterfile.write(line + '\n')
-                elif line == CSV_HEADER:
-                    past_headers = True
+                    fout.write(line + '\n')
+                else:
+                    if line == delimline:
+                        past_headers = True
+                        if keepdelim:
+                            fout.write(line + '\n')
+                    else:
+                        headers.append(line)
+    return headers
+                        
+    
+def merge_csv(filename_last):
+    dotpos = filename_last.find('.')
+    if dotpos == -1:
+        logging.critical('File name %s must contain a dot.', filename_last)
+        return
+    if filename_last.find('/') >= 0:
+        logging.critical('File name %s must not contain '/'.' % filename_last)
+        return
+
+    account = filename_last[:dotpos]
+
+    logging.debug('Agregating %s', account)
+    oldmastername = LOCAL_DIR + account + '.csv'
+    newmastername = LOCAL_DIR + account + '.csv.new'
+
+    if not os.access(oldmastername, os.F_OK):
+        logging.warning('Master csv file not found for %s: creating', account)
+        remove_headers(LOCAL_DIR + filename_last, oldmastername, CSV_HEADER, keepdelim=True)
+        return
+        
+    remove_headers(LOCAL_DIR + filename_last,
+                   LOCAL_DIR + 'tmp/last.csv',
+                   CSV_HEADER)
+    remove_headers(oldmastername,
+                   LOCAL_DIR + 'tmp/master.csv',
+                   CSV_HEADER)
+    cmd = ['diff', '-Nau', LOCAL_DIR + 'tmp/master.csv', LOCAL_DIR + 'tmp/last.csv']
+    proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
+    out, errormsg = proc.communicate()
+    proc.wait()
+    out = str(out, encoding='utf-8')
+    errormsg = str(errormsg, encoding='utf-8')
+    if errormsg:
+        logging.critical("Can't run %s: %s", cmd, errormsg)
+        return
+
+    with open(newmastername, 'w', encoding='utf-8') as newmasterfile:
+        newmasterfile.write(CSV_HEADER + '\n')
+
+        for diffline in out.split('\n'):
+            if diffline.startswith('+') and not diffline.startswith('+++'):
+                diffline = diffline[1:]  # Remove staring '+'
+                logging.info(diffline)
+                newmasterfile.write(diffline + '\n')
+
+        with open(LOCAL_DIR + 'tmp/master.csv') as oldmasterfile:
+            newmasterfile.write(oldmasterfile.read())
+
+    myexec(['mv', newmastername, oldmastername])
+
 
 
 if __name__ == '__main__':
@@ -258,6 +323,11 @@ if __name__ == '__main__':
     parser.add_option('--no-download',
         action='store_true', dest='no_download', default=False,
         help="don't download. Only agregate.")
+    parser.add_option('--csvlast',
+        action='append', dest='csv_last_names', default=[],
+        metavar='file.csv',
+        help="Process this file rather than *.last.csv. "
+             "That option can be used multiple times.")
     (options, args) = parser.parse_args()
 
     if options.debug:
@@ -278,4 +348,4 @@ if __name__ == '__main__':
 
     if not options.no_download:
         download()
-    agregate()
+    agregate(options.csv_last_names)