2 # -*- coding: utf-8 -*-
4 from __future__ import division
5 import sys, os, urllib2, time
6 from pprint import pprint
7 from datetime import datetime, date
9 from optparse import OptionParser
12 from ais.ntools import clean_alnum, clean_ais_charset, open_with_mkdirs, datetime_to_timestamp
13 from ais.common import *
14 from ais.html_parser import *
16 DOWNLOAD_SLEEP_TIME = 10
17 DISABLE_DOWNLOAD = False
18 MARINETRAFFIC_DIR = '/var/lib/ais/marinetraffic/'
21 def go_summary(reference_date, mmsi):
22 def get_raw_summary(html):
23 root = html_parse(html)
24 divs = get_elem(root, 'div')
31 for node in divdetail.children:
32 if isinstance(node, Tag) and node.name == 'h1':
33 info_raw[u'name'] = get_inner_text(node)
35 if isinstance(node, Tag) and node.name == 'h2':
37 info_raw[section][boldtext] = text
39 section = get_inner_text(node)
40 info_raw[section] = {}
43 if isinstance(node, Tag) and node.name == 'br':
45 info_raw[section][boldtext] = text
47 elif isinstance(node, Tag) and node.name == 'b':
49 info_raw[section][boldtext] = text
51 boldtext = get_inner_text(node)
53 text += get_inner_text(node)
55 info_raw[section][boldtext] = text
59 def qualify_summary(info_raw):
62 info['name'] = info_raw['name']
65 details = info_raw[u"Vessel's Details:"]
67 details = info_raw[u"Vessel's Details"]
68 info['callsign'] = clean_alnum(details[u'Call Sign:'].encode('ascii', 'replace'))
69 tmp = details.get(u'IMO:', None)
71 tmp = tmp.replace(u',', u'').strip()
72 if tmp != u'999999999':
73 info['imo'] = tmp.replace(u', ', u'').strip()
74 tmp = details.get(u'Length x Breadth:', None)
76 length, breadth = tmp.replace(u'm', u'').split('X')
77 info['length'], info['breadth'] = int(length), int(breadth)
78 info['mmsi'] = details[u'MMSI:'].strip()
79 tmp = details.get(u'Ship Type:', None)
89 u'Sailing Vessel': 36,
90 u'Pleasure Craft': 37,
95 u'Anti-Pollution': 54,
100 u'Special Craft': 59,
101 # Cargo is repported for types 70, 75, 76 .. 79
102 u'Cargo - Hazard A (Major)': 71,
103 u'Cargo - Hazard B': 72,
104 u'Cargo - Hazard C (Minor)': 73,
105 u'Cargo - Hazard D (Recognizable)': 74,
106 u'Tanker - Hazard A (Major)': 81,
107 u'Tanker - Hazard B': 82,
108 u'Tanker - Hazard C (Minor)': 83,
109 u'Tanker - Hazard D (Recognizable)': 84,
111 _type = reverse_types.get(tmp, None)
112 if _type is not None:
115 print >> sys.stderr , "NOTICE: can't properly qualify ship of type", tmp
116 # TODO year built .... ?
119 voyage = info_raw[u'Voyage Related Info (Last Received):']
121 voyage = info_raw[u'Voyage Related Info (Last Received)']
122 tmp = voyage.get(u'Destination:', None)
126 info['destination'] = tmp
127 tmp = voyage.get(u'Draught:', None)
129 info['draught'] = float(tmp.replace(u'm', u''))
130 tmp = voyage.get(u'ETA:', None)
134 tmp = datetime.strptime(tmp, '%Y-%m-%d %H:%M')
136 print "Failed to parse ETA date. Trying old format ...",
137 tmp = datetime.strptime(tmp, '%d/%m/%Y %H:%M:%S')
140 if tmp != datetime(1900, 1, 1):
141 info['eta'] = tmp.strftime('%m%d%H%M')
142 tmp = voyage.get(u'Info Received:', None)
144 voyage_updated = tmp.split(u'(')[0].strip()
146 info['voyage_updated'] = datetime.strptime(voyage_updated, '%Y-%m-%d %H:%M')
148 print "Failed to parse voyage updated date. Trying old format ...",
149 info['voyage_updated'] = datetime.strptime(voyage_updated, '%d/%m/%Y %H:%M:%S')
155 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-sum.html'
156 if not os.path.exists(filename):
158 print >> sys.stderr, filename, 'not found and downloads disabled.'
160 request = urllib2.Request('http://www.marinetraffic.com/ais/shipdetails.aspx?MMSI='+mmsi)
161 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
162 uo = urllib2.urlopen(request)
165 sleep(DOWNLOAD_SLEEP_TIME)
167 f = open_with_mkdirs(filename, 'w')
171 html = file(filename).read()
172 html = unicode(html, 'utf8')
174 if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
175 print >> sys.stderr, 'WARNING: The requested service is unavailable.'
178 if u'Non-existent Vessel' in html:
179 print >> sys.stderr, 'WARNING: Vessel unknown'
182 info_raw = get_raw_summary(html)
183 info = qualify_summary(info_raw)
186 assert info['mmsi'] == mmsi
189 print >> sys.stderr, "WARNING:"
191 print >> sys.stderr, args,
194 voyage_updated = info.get('voyage_updated', None)
196 timestamp = datetime_to_timestamp(voyage_updated)
198 timestamp = datetime_to_timestamp(datetime.strptime(reference_date, '%Y%m%d'))
199 imo = int(info.get('imo', 0))
201 warning('imo', imo, 'is too big')
203 name = info.get('name', u'').encode('utf8')
205 warning('name', name, 'is too big, truncating')
207 name = clean_ais_charset(name)
208 callsign = clean_alnum(info.get('callsign', u'').encode('utf8'))
209 type = info.get('type', 0)
210 if type < 0 or type > 100:
212 eta = info.get('eta', u'00002460')
214 eta_M = int(eta[0:2])
215 eta_D = int(eta[2:4])
216 eta_h = int(eta[4:6])
217 eta_m = int(eta[6:8])
222 draught = int(info.get('draught', 0)*10)
223 destination = clean_ais_charset(info.get('destination', u'').encode('utf8'))
225 add_nmea5_partial(mmsi, timestamp, imo, name, callsign, type, 0, 0, 0, 0, eta_M, eta_D, eta_h, eta_m, draught, destination, 'MTWW')
230 def import_last_pos(reference_date, mmsi, page=None):
231 if page is None or page == 1:
232 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov.html'
234 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov'+str(page)+'.html'
236 if not os.path.exists(filename):
238 print >> sys.stderr, filename, 'not found and downloads disabled.'
241 if page is None or page == 1:
242 request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi)
244 request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi+'&orderby=MINTIME&sort_order=DESC&var_page='+str(page))
245 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
246 uo = urllib2.urlopen(request)
249 sleep(DOWNLOAD_SLEEP_TIME)
251 f = open_with_mkdirs(filename, 'w')
255 html = file(filename).read()
256 html = unicode(html, 'utf8')
258 if u'No Records Found' in html:
259 print >> sys.stderr, 'NOTICE: No Records found.'
261 if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
262 print >> sys.stderr, 'WARNING: The requested service is unavailable.'
266 root = html_parse(html)
267 table = get_elem(root, u'table')[0]
271 # Now, import each track
272 for row in table.children[1:]: # ignore first line with headers
275 latlong = get_merged_leaf_content(line[5])
276 latlong = latlong.replace(u'\xa0', u'').strip()
278 lon, lat = latlong.split(' ')
280 show_on_map = line[8]
281 assert get_merged_leaf_content(show_on_map).strip() == 'Show on Map'
282 link = show_on_map.children[0].children[0].attributes['href']
283 tmp = link.split(u'?', 2)
284 assert tmp[0] == u'default.aspx'
286 tmp = tmp.split(u'&')
288 assert tmp[0] == u'zoom=9'
289 assert tmp[1] == u'oldmmsi='+mmsi
291 assert tmp.startswith(u'olddate=')
292 dt = tmp[len(u'olddate='):]
294 isodt = datetime.strptime(dt, '%m/%d/%Y %I:%M:%S %p')
298 speed = float(get_merged_leaf_content(line[6]))
299 course = float(get_merged_leaf_content(line[7]))
300 #print dt, isodt, lat, long, speed, course
303 timestamp = datetime_to_timestamp(isodt)
304 status = AIS_STATUS_NOT_AVAILABLE
305 rot = AIS_ROT_NOT_AVAILABLE
306 sog = int(speed*AIS_SOG_SCALE)
307 latitude = int(float(lat)*AIS_LATLON_SCALE)
308 longitude = int(float(lon)*AIS_LATLON_SCALE)
309 cog = int(course*AIS_COG_SCALE)
310 heading = AIS_NO_HEADING
312 add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
314 import_track(mmsi, dt, isodt)
316 if 'Next page' in html and page is not 2:
317 print 'There is another page!'
323 def import_track(mmsi, dt, isodt):
324 filename = MARINETRAFFIC_DIR+isodt.strftime('%Y%m%d')+'/'+mmsi+'-trk.xml'
325 if not os.path.exists(filename):
327 print >> sys.stderr, filename, 'not found and downloads disabled.'
330 url = 'http://www.marinetraffic.com/ais/gettrackxml.aspx?mmsi=%s&date=%s' % (mmsi, dt.replace(' ','%20'))
331 request = urllib2.Request(url)
332 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
333 request.add_header('Accept' , 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
334 request.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
335 request.add_header('Referer', 'http://www.marinetraffic.com/ais/default.aspx?zoom=9&oldmmsi=%(mmsi)s+&olddate=%(date)s' % { 'mmsi': mmsi, 'date': dt.replace(' ', '%20') })
336 uo = urllib2.urlopen(request)
339 sleep(DOWNLOAD_SLEEP_TIME)
341 f = open_with_mkdirs(filename, 'w')
345 html = file(filename).read()
348 xml = unicode(html, 'utf8')
349 info = { 'mmsi': mmsi, 'infosrc': u'MT' }
350 for node in html_lexer(xml):
351 if isinstance(node, Tag) and node.name==u'pos':
352 info['updated'] = node.attributes['timestamp']
353 info['lat'] = float(node.attributes['lat'])
354 info['lon'] = float(node.attributes['lon'])
355 info['course'] = float(node.attributes['course'])
356 info['speed'] = float(node.attributes['speed'])/10.
358 timestamp = datetime_to_timestamp(datetime.strptime(info['updated'], '%Y-%m-%dT%H:%M:%S'))
359 status = AIS_STATUS_NOT_AVAILABLE
360 rot = AIS_ROT_NOT_AVAILABLE
361 sog = int(info['speed']*AIS_SOG_SCALE)
362 latitude = int(float(info['lat'])*AIS_LATLON_SCALE)
363 longitude = int(float(info['lon'])*AIS_LATLON_SCALE)
364 cog = int(info['course'])*AIS_COG_SCALE
365 heading = AIS_NO_HEADING
367 #print datetime.utcfromtimestamp(timestamp),
368 #for i in strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source:
371 add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
376 if __name__ == '__main__':
377 parser = OptionParser(usage='%prog [options] mmsi [mmsi]...')
378 parser.add_option('--no-download', help="don't download any file", action='store_true', dest='no_download', default=False)
379 parser.add_option('--download-sleep', help="how many seconds do we sleep after each download. default=%default", action='store', type='int', dest='sleep', default=DOWNLOAD_SLEEP_TIME)
380 parser.add_option('--debug-sql', help="print all sql queries to stdout before running them", action='store_true', dest='debug_sql', default=False)
381 parser.add_option('--date', help="force reference date. default=%default\nDo NOT use without --date", action='store', dest='reference_date', default=datetime.utcnow().date().strftime('%Y%m%d'))
382 parser.add_option('--print-mmsi', help="prints each mmsi before it's processed", action='store_true', dest='print_mmsi', default=False)
383 (options, args) = parser.parse_args()
386 print >> sys.stderr, "Need parameters"
389 DISABLE_DOWNLOAD = options.no_download
390 DOWNLOAD_SLEEP_TIME = options.sleep
391 if options.debug_sql:
393 reference_date = options.reference_date
396 while len(mmsi) and mmsi[-1] in '\r\n':
398 if not mmsi.isdigit():
399 print 'MMSI', mmsi, 'is not numeric. Ignoring.'
401 if options.print_mmsi:
403 found = go_summary(reference_date, mmsi)
407 if import_last_pos(reference_date, mmsi, page):