+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import division
+import sys
+import os
+import logging
+import urllib2
+from time import sleep
+from ais.html_parser import *
+
+DOWNLOAD_SLEEP_TIME = 10
+EUROPA_DIR = '/var/lib/ais/europa/'
+
+# POST http://ec.europa.eu/fisheries/fleet/index.cfm?method=Search.ListSearchSimple
+# pays=index.cfm%3Fmethod%3DSearch.SearchSimple%26country%3D&ss_char_main_gear=&ss_Period=A&ss_P_date_from=&ss_indent_CFR=&ss_indent_name=&ss_indent_ext_mark=&ss_indent_reg_nr=&ss_ident_IRCS=MHXK6&search_type=simple&ss_indent_search_id=8982&nbr_event_disp=50
+
+def europa_get_html(callsign):
+ list_file = EUROPA_DIR+callsign+'-list.html'
+ detail_file = EUROPA_DIR+callsign+'-detail.html'
+
+ if os.path.exists(detail_file):
+ html = file(detail_file).read()
+ return unicode(html, 'utf-8')
+
+ if os.path.exists(list_file):
+ return None
+
+ # STEP 1: get a search ID
+
+ request = urllib2.Request('http://ec.europa.eu/fisheries/fleet/index.cfm?method=Search.SearchSimple&country=')
+ request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
+ uo = urllib2.urlopen(request)
+ html = uo.read()
+ uo.close()
+ html = unicode(html, 'utf-8')
+
+ root = html_parse(html)
+ ss_indent_search_id = None
+ for tag in get_elem(root, 'input'):
+ if tag.attributes.get('name', None) == 'ss_indent_search_id':
+ ss_indent_search_id = tag.attributes.get('value', None)
+ break
+ assert ss_indent_search_id is not None, 'Can\'t get a search_id'
+ logging.info('ss_indent_search_id=%s', ss_indent_search_id)
+
+ sleep(DOWNLOAD_SLEEP_TIME)
+
+ # STEP 2: get the list of results
+
+ request = urllib2.Request('http://ec.europa.eu/fisheries/fleet/index.cfm?method=Search.ListSearchSimple')
+ request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
+ uo = urllib2.urlopen(request, u'pays=index.cfm%3Fmethod%3DSearch.SearchSimple%26country%3D&ss_char_main_gear=&ss_Period=A&ss_P_date_from=&ss_indent_CFR=&ss_indent_name=&ss_indent_ext_mark=&ss_indent_reg_nr=&ss_ident_IRCS=' + callsign + u'&search_type=simple&ss_indent_search_id=' + ss_indent_search_id + u'&nbr_event_disp=50')
+ html = uo.read()
+ uo.close()
+ file(list_file, 'w').write(html)
+ html = unicode(html, 'utf-8')
+
+ if u'No data found' in html:
+ logging.error('No data found for ' + callsign)
+ return
+
+ root = html_parse(html)
+ lastevent_url = None
+ for tag in get_elem(root, 'a'):
+ tagurl = tag.attributes.get('href', '')
+ if tagurl.startswith(u'index.cfm?method=Search.DetailSearchSimple&event_key='):
+ lastevent_url = tagurl
+ assert lastevent_url, 'Internal error: Not appropriate URI found.'
+ logging.info('Last event is on %s', lastevent_url)
+
+ sleep(DOWNLOAD_SLEEP_TIME)
+
+ # STEP 2: get the last results in html
+
+ request = urllib2.Request('http://ec.europa.eu/fisheries/fleet/' + lastevent_url)
+ request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
+ uo = urllib2.urlopen(request)
+ html = uo.read()
+ uo.close()
+ file(detail_file, 'w').write(html)
+ html = unicode(html, 'utf-8')
+
+ sleep(DOWNLOAD_SLEEP_TIME)
+
+ return html
+
+
+
+def europa_get(callsign):
+ html = europa_get_html(callsign)
+ if not html:
+ return None
+ root = html_parse(html)
+ result = {}
+ for li in get_elem(root, 'li'):
+ elem0 = li.children[0]
+ elem1 = li.children[1]
+ txt0 = get_merged_leaf_content(elem0).replace(u'\xa0', ' ').strip(u'\t\n :')
+ txt1 = get_merged_leaf_content(elem1).replace(u'\xa0', ' ').strip(u'\t\n :')
+ result[txt0] = txt1
+ return result
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ parser = OptionParser(usage='%prog [options] callsign [callsign] ...')
+ parser.add_option('-d', '--debug',
+ action='store_true', dest='debug', default=False,
+ help="debug mode")
+ parser.add_option('--download-sleep', help="how many seconds do we sleep after each download. default=%default", action='store', type='int', dest='sleep', default=DOWNLOAD_SLEEP_TIME)
+ (options, args) = parser.parse_args()
+
+ if len(args) == 0:
+ print >> sys.stderr, "Need at least a parameter"
+ sys.exit(1)
+
+ DOWNLOAD_SLEEP_TIME = options.sleep
+
+ if options.debug:
+ loglevel = logging.DEBUG
+ else:
+ loglevel = logging.INFO
+
+ logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
+
+ keys = [
+ u'IRCS',
+ u'Vessel Name',
+ u'Country Code',
+ u'Port Code',
+ u'LOA',
+ u'Tonnage GT',
+ u'Year of Construction',
+ u'Entry Service Year',
+ u'Main Gear type',
+ u'Secondary Gear type',
+ u'External Marking',
+ ]
+ for callsign in args:
+ info = europa_get(callsign)
+ if not info:
+ continue
+ for key in keys:
+ print key, ':', info[key]
+ print