First agragation code
authorJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Sun, 29 May 2011 23:00:02 +0000 (23:00 +0000)
committerJean-Michel Nirgal Vourgère <jmv@nirgal.com>
Sun, 29 May 2011 23:00:02 +0000 (23:00 +0000)
go.py

diff --git a/go.py b/go.py
index cfcf88abaaf0fd896e0aaf09d31c309bc0dc4e0e..75d37231c2374f3dbae821388f9f09c7ff7f992f 100755 (executable)
--- a/go.py
+++ b/go.py
@@ -7,7 +7,7 @@ import logging
 from datetime import datetime
 import urllib.request
 from http.cookiejar import CookieJar
-from subprocess import Popen, PIPE
+from subprocess import Popen, PIPE, call
 
 import html_parser
 import htmlentities
@@ -46,7 +46,7 @@ def sleep(seconds):
     logging.debug('Waiting %s seconds', seconds)
     time.sleep(seconds)
 
-def main():
+def download():
     '''
     Download all the accounts csv data and store them in LOCAL_DIR
     Return a list of filenames
@@ -130,6 +130,7 @@ def main():
             cpttype, cptnum, searchtype = match.group(1), match.group(2), match.group(3)
     
             logging.info('Found account type %s: %s' % (cpttype, cptnum))
+            result.append(cptnum)
 
             httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/' + href[len('../../'):])
             html = httpresponse.read().decode('iso8859-1')
@@ -166,6 +167,16 @@ def main():
             open(filename, 'w', encoding='utf-8').write(csvdata)
             sleep(9)
 
+            lastfilename = LOCAL_DIR + cptnum + '.last.csv'
+            try:
+                os.unlink(lastfilename)
+            except OSError as err:
+                if err.errno == 2: #No such file or directory
+                    logging.warning('Could not find last csv link. Running for the first time?')
+                else:
+                    raise
+            os.symlink(filename, lastfilename)
+
     logging.info('Disconnecting')
     httpresponse = httpopen(BASE_URL + '/voscomptes/canalXHTML/securite/deconnexion/init-deconnexion.ea')
     html = httpresponse.read().decode('iso8859-1')
@@ -174,6 +185,34 @@ def main():
     logging.info('Disconnected')
     return result
 
+
+def agregate():
+    CSV_HEADER = 'Date;Libellé;Montant(EUROS);Montant(FRANCS)'
+
+    account_files = os.listdir(LOCAL_DIR)
+    for account_file in account_files:
+        if not account_file.endswith('.last.csv'):
+            continue
+        
+        account = account_file[:-len('.last.csv')]
+        logging.debug('Agregating %s', account)
+
+        if os.access(LOCAL_DIR + account + '.csv', os.F_OK):
+            pass
+        else:
+            logging.warning('Master csv file not found for %s: creating', account)
+            masterfile = open(LOCAL_DIR + account + '.csv', 'w', encoding='utf-8')
+
+            past_headers = False
+            for line in open(LOCAL_DIR + account_file).read().split('\n'):
+                if not line:
+                    continue
+                if past_headers:
+                    masterfile.write(line + '\n')
+                elif line == CSV_HEADER:
+                    past_headers = True
+
+
 if __name__ == '__main__':
     from optparse import OptionParser
     parser = OptionParser()
@@ -188,7 +227,8 @@ if __name__ == '__main__':
         loglevel = logging.INFO
     logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
 
-    os.umask(0o077)
+    os.umask(0o077) # this is really private
+
     TMP_DIR = LOCAL_DIR + 'tmp/'
     try:
         os.mkdir(TMP_DIR)
@@ -197,4 +237,5 @@ if __name__ == '__main__':
             raise
     os.chdir(TMP_DIR)
 
-    main()
+    download()
+    agregate()