2 # -*- coding: utf-8 -*-
4 from __future__ import division
10 from ais.html_parser import *
12 DOWNLOAD_SLEEP_TIME = 10
13 EUROPA_DIR = '/var/lib/ais/europa/'
15 # POST http://ec.europa.eu/fisheries/fleet/index.cfm?method=Search.ListSearchSimple
16 # pays=index.cfm%3Fmethod%3DSearch.SearchSimple%26country%3D&ss_char_main_gear=&ss_Period=A&ss_P_date_from=&ss_indent_CFR=&ss_indent_name=&ss_indent_ext_mark=&ss_indent_reg_nr=&ss_ident_IRCS=MHXK6&search_type=simple&ss_indent_search_id=8982&nbr_event_disp=50
18 def europa_get_html(callsign):
19 list_file = EUROPA_DIR+callsign+'-list.html'
20 detail_file = EUROPA_DIR+callsign+'-detail.html'
22 if os.path.exists(detail_file):
23 html = file(detail_file).read()
24 return unicode(html, 'utf-8')
26 if os.path.exists(list_file):
29 # STEP 1: get a search ID
31 request = urllib2.Request('http://ec.europa.eu/fisheries/fleet/index.cfm?method=Search.SearchSimple&country=')
32 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
33 uo = urllib2.urlopen(request)
36 html = unicode(html, 'utf-8')
38 root = html_parse(html)
39 ss_indent_search_id = None
40 for tag in get_elem(root, 'input'):
41 if tag.attributes.get('name', None) == 'ss_indent_search_id':
42 ss_indent_search_id = tag.attributes.get('value', None)
44 assert ss_indent_search_id is not None, 'Can\'t get a search_id'
45 logging.info('ss_indent_search_id=%s', ss_indent_search_id)
47 sleep(DOWNLOAD_SLEEP_TIME)
49 # STEP 2: get the list of results
51 request = urllib2.Request('http://ec.europa.eu/fisheries/fleet/index.cfm?method=Search.ListSearchSimple')
52 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
53 uo = urllib2.urlopen(request, u'pays=index.cfm%3Fmethod%3DSearch.SearchSimple%26country%3D&ss_char_main_gear=&ss_Period=A&ss_P_date_from=&ss_indent_CFR=&ss_indent_name=&ss_indent_ext_mark=&ss_indent_reg_nr=&ss_ident_IRCS=' + callsign + u'&search_type=simple&ss_indent_search_id=' + ss_indent_search_id + u'&nbr_event_disp=50')
56 file(list_file, 'w').write(html)
57 html = unicode(html, 'utf-8')
59 if u'No data found' in html:
60 logging.error('No data found for ' + callsign)
63 root = html_parse(html)
65 for tag in get_elem(root, 'a'):
66 tagurl = tag.attributes.get('href', '')
67 if tagurl.startswith(u'index.cfm?method=Search.DetailSearchSimple&event_key='):
68 lastevent_url = tagurl
69 assert lastevent_url, 'Internal error: Not appropriate URI found.'
70 logging.info('Last event is on %s', lastevent_url)
72 sleep(DOWNLOAD_SLEEP_TIME)
74 # STEP 2: get the last results in html
76 request = urllib2.Request('http://ec.europa.eu/fisheries/fleet/' + lastevent_url)
77 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
78 uo = urllib2.urlopen(request)
81 file(detail_file, 'w').write(html)
82 html = unicode(html, 'utf-8')
84 sleep(DOWNLOAD_SLEEP_TIME)
90 def europa_get(callsign):
91 html = europa_get_html(callsign)
94 root = html_parse(html)
96 for li in get_elem(root, 'li'):
97 elem0 = li.children[0]
98 elem1 = li.children[1]
99 txt0 = get_merged_leaf_content(elem0).replace(u'\xa0', ' ').strip(u'\t\n :')
100 txt1 = get_merged_leaf_content(elem1).replace(u'\xa0', ' ').strip(u'\t\n :')
105 def normalize_info(info, callsign):
106 if not info[u'IRCS']:
107 info[u'IRCS'] = callsign
108 info[u'Country Code'] = info[u'Country Code'].split(u' - ', 1)[1]
109 info[u'Port Code'] = info[u'Port Code'].split(u' - ', 1)[1]
110 info[u'LOA'] = info[u'LOA'].replace(u',', u'.')
111 info[u'Tonnage GT'] = info[u'Tonnage GT'].replace(u',', u'.').rstrip(' T')
112 info[u'Main Gear type'] = info[u'Main Gear type'].split(u' - ', 1)[1]
113 info[u'Secondary Gear type'] = info[u'Secondary Gear type'].split(u' - ', 1)[1]
114 if info[u'Public aid']:
115 info[u'Public aid'] = info[u'Public aid'].split(u' - ', 1)[1]
119 sys.stdout.write(string.encode('utf-8'))
121 if __name__ == '__main__':
122 from optparse import OptionParser
123 parser = OptionParser(usage='%prog [options] callsign [callsign] ...')
124 parser.add_option('-d', '--debug',
125 action='store_true', dest='debug', default=False,
127 parser.add_option('--download-sleep',
128 action='store', type='int', dest='sleep', default=DOWNLOAD_SLEEP_TIME,
129 help="how many seconds do we sleep after each download. default=%default")
130 parser.add_option('--table',
131 action='store_true', dest='table_format', default=False,
132 help='Format in table')
134 (options, args) = parser.parse_args()
137 print >> sys.stderr, "Need at least a parameter"
140 DOWNLOAD_SLEEP_TIME = options.sleep
143 loglevel = logging.DEBUG
145 loglevel = logging.INFO
147 logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
156 u'Year of Construction',
157 u'Entry Service Year',
159 u'Secondary Gear type',
163 if options.table_format:
164 for i, key in enumerate(keys):
169 for callsign in args:
170 info = europa_get(callsign)
173 info = normalize_info(info, callsign)
174 if options.table_format:
175 for i, key in enumerate(keys):
182 myprint(key+u': '+info[key]+u'\n')