2 # -*- coding: utf-8 -*-
4 from __future__ import division
5 import sys, os, urllib2, time
6 from pprint import pprint
7 from datetime import datetime, date
9 from optparse import OptionParser
12 from ais.ntools import clean_alnum, clean_ais_charset, open_with_mkdirs, datetime_to_timestamp
13 from ais.common import *
14 from ais.html_parser import *
16 DOWNLOAD_SLEEP_TIME = 10
17 DISABLE_DOWNLOAD = False
18 MARINETRAFFIC_DIR = '/var/lib/ais/marinetraffic/'
21 def go_summary(reference_date, mmsi):
22 def get_raw_summary(html):
23 root = html_parse(html)
24 divs = get_elem(root, 'div')
26 #print_idented_tree(divdetail, 0)
31 for node in divdetail.children:
32 if isinstance(node, Tag) and node.name == 'h1':
33 info_raw[u'name'] = get_inner_text(node)
35 if isinstance(node, Tag) and node.name == 'h2':
38 print >> sys.stderr, "WARNING: section is empty for setting", boldtext, "=", text
40 info_raw[section][boldtext] = text
42 section = get_inner_text(node)
43 info_raw[section] = {}
46 if isinstance(node, Tag) and node.name == 'br':
48 info_raw[section][boldtext] = text
50 elif isinstance(node, Tag) and node.name == 'b':
53 print >> sys.stderr, "WARNING: section is empty for setting", boldtext, "=", text
55 info_raw[section][boldtext] = text
57 boldtext = get_inner_text(node)
59 text += get_inner_text(node)
61 info_raw[section][boldtext] = text
65 def qualify_summary(info_raw):
68 info['name'] = info_raw['name']
71 details = info_raw[u"Vessel's Details:"]
73 details = info_raw[u"Vessel's Details"]
74 info['callsign'] = clean_alnum(details[u'Call Sign:'].encode('ascii', 'replace'))
75 tmp = details.get(u'IMO:', None)
77 tmp = tmp.replace(u',', u'').strip()
78 if tmp != u'999999999':
79 info['imo'] = tmp.replace(u', ', u'').strip()
80 tmp = details.get(u'Length x Breadth:', None)
82 length, breadth = tmp.replace(u'm', u'').split('X')
83 info['length'], info['breadth'] = int(length), int(breadth)
84 info['mmsi'] = details[u'MMSI:'].strip()
85 tmp = details.get(u'Ship Type:', None)
95 u'Sailing Vessel': 36,
96 u'Pleasure Craft': 37,
101 u'Anti-Pollution': 54,
103 #u'Local Vessel': 56,
104 #u'Local Vessel': 57,
105 u'Medical Trans': 58,
106 u'Special Craft': 59,
107 # Cargo is repported for types 70, 75, 76 .. 79
108 u'Cargo - Hazard A (Major)': 71,
109 u'Cargo - Hazard B': 72,
110 u'Cargo - Hazard C (Minor)': 73,
111 u'Cargo - Hazard D (Recognizable)': 74,
112 u'Tanker - Hazard A (Major)': 81,
113 u'Tanker - Hazard B': 82,
114 u'Tanker - Hazard C (Minor)': 83,
115 u'Tanker - Hazard D (Recognizable)': 84,
117 _type = reverse_types.get(tmp, None)
118 if _type is not None:
121 print >> sys.stderr , "NOTICE: can't properly qualify ship of type", tmp
122 # TODO year built .... ?
125 voyage = info_raw[u'Voyage Related Info (Last Received):']
127 voyage = info_raw[u'Voyage Related Info (Last Received)']
128 tmp = voyage.get(u'Destination:', None)
132 info['destination'] = tmp
133 tmp = voyage.get(u'Draught:', None)
135 info['draught'] = float(tmp.replace(u'm', u''))
136 tmp = voyage.get(u'ETA:', None)
140 tmp = datetime.strptime(tmp, '%Y-%m-%d %H:%M')
142 print "Failed to parse ETA date. Trying old format ...",
143 tmp = datetime.strptime(tmp, '%d/%m/%Y %H:%M:%S')
146 if tmp != datetime(1900, 1, 1):
147 info['eta'] = tmp.strftime('%m%d%H%M')
148 tmp = voyage.get(u'Info Received:', None)
150 voyage_updated = tmp.split(u'(')[0].strip()
152 info['voyage_updated'] = datetime.strptime(voyage_updated, '%Y-%m-%d %H:%M')
154 print "Failed to parse voyage updated date. Trying old format ...",
155 info['voyage_updated'] = datetime.strptime(voyage_updated, '%d/%m/%Y %H:%M:%S')
161 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-sum.html'
162 if not os.path.exists(filename):
164 print >> sys.stderr, filename, 'not found and downloads disabled.'
166 request = urllib2.Request('http://www.marinetraffic.com/ais/shipdetails.aspx?MMSI='+mmsi)
167 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
168 uo = urllib2.urlopen(request)
171 sleep(DOWNLOAD_SLEEP_TIME)
173 f = open_with_mkdirs(filename, 'w')
177 html = file(filename).read()
178 html = unicode(html, 'utf8')
180 if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
181 print >> sys.stderr, 'WARNING: The requested service is unavailable.'
184 if u'Non-existent Vessel' in html:
185 print >> sys.stderr, 'WARNING: Vessel unknown'
188 info_raw = get_raw_summary(html)
189 info = qualify_summary(info_raw)
192 assert info['mmsi'] == mmsi
195 print >> sys.stderr, "WARNING:"
197 print >> sys.stderr, args,
200 voyage_updated = info.get('voyage_updated', None)
202 timestamp = datetime_to_timestamp(voyage_updated)
204 timestamp = datetime_to_timestamp(datetime.strptime(reference_date, '%Y%m%d'))
205 imo = int(info.get('imo', 0))
207 warning('imo', imo, 'is too big')
209 name = info.get('name', u'').encode('utf8')
211 warning('name', name, 'is too big, truncating')
213 name = clean_ais_charset(name)
214 callsign = clean_alnum(info.get('callsign', u'').encode('utf8'))
215 type = info.get('type', 0)
216 if type < 0 or type > 100:
218 eta = info.get('eta', u'00002460')
220 eta_M = int(eta[0:2])
221 eta_D = int(eta[2:4])
222 eta_h = int(eta[4:6])
223 eta_m = int(eta[6:8])
228 draught = int(info.get('draught', 0)*10)
229 destination = clean_ais_charset(info.get('destination', u'').encode('utf8'))
231 add_nmea5_partial(mmsi, timestamp, imo, name, callsign, type, 0, 0, 0, 0, eta_M, eta_D, eta_h, eta_m, draught, destination, 'MTWW')
236 def import_last_pos(reference_date, mmsi, page=None):
237 if page is None or page == 1:
238 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov.html'
240 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov'+str(page)+'.html'
242 if not os.path.exists(filename):
244 print >> sys.stderr, filename, 'not found and downloads disabled.'
247 if page is None or page == 1:
248 request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi)
250 request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi+'&orderby=MINTIME&sort_order=DESC&var_page='+str(page))
251 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
252 uo = urllib2.urlopen(request)
255 sleep(DOWNLOAD_SLEEP_TIME)
257 f = open_with_mkdirs(filename, 'w')
261 html = file(filename).read()
262 html = unicode(html, 'utf8')
264 if u'No Records Found' in html:
265 print >> sys.stderr, 'NOTICE: No Records found.'
267 if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
268 print >> sys.stderr, 'WARNING: The requested service is unavailable.'
272 root = html_parse(html)
273 table = get_elem(root, u'table')[0]
277 # Now, import each track
278 for row in table.children[1:]: # ignore first line with headers
281 latlong = get_merged_leaf_content(line[5])
282 latlong = latlong.replace(u'\xa0', u'').strip()
284 lat, lon = latlong.split(' ')
286 show_on_map = line[8]
287 assert get_merged_leaf_content(show_on_map).strip() == 'Show on Map'
288 link = show_on_map.children[0].children[0].attributes['href']
289 tmp = link.split(u'?', 2)
290 assert tmp[0] == u'default.aspx'
292 tmp = tmp.split(u'&')
294 assert tmp[0] == u'zoom=9'
295 assert tmp[1] == u'oldmmsi='+mmsi
297 assert tmp.startswith(u'olddate=')
298 dt = tmp[len(u'olddate='):]
300 isodt = datetime.strptime(dt, '%m/%d/%Y %I:%M:%S %p')
304 speed = float(get_merged_leaf_content(line[6]))
305 course = float(get_merged_leaf_content(line[7]))
306 #print dt, isodt, lat, long, speed, course
309 timestamp = datetime_to_timestamp(isodt)
310 status = AIS_STATUS_NOT_AVAILABLE
311 rot = AIS_ROT_NOT_AVAILABLE
312 sog = int(speed*AIS_SOG_SCALE)
313 latitude = int(float(lat)*AIS_LATLON_SCALE)
314 longitude = int(float(lon)*AIS_LATLON_SCALE)
315 cog = int(course*AIS_COG_SCALE)
316 heading = AIS_NO_HEADING
318 print strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source
319 add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
321 import_track(mmsi, dt, isodt)
323 if 'Next page' in html and page is not 2:
324 print 'There is another page!'
330 def import_track(mmsi, dt, isodt):
331 filename = MARINETRAFFIC_DIR+isodt.strftime('%Y%m%d')+'/'+mmsi+'-trk.xml'
332 if not os.path.exists(filename):
334 print >> sys.stderr, filename, 'not found and downloads disabled.'
337 url = 'http://www.marinetraffic.com/ais/gettrackxml.aspx?mmsi=%s&date=%s' % (mmsi, dt.replace(' ','%20'))
338 request = urllib2.Request(url)
339 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
340 request.add_header('Accept' , 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
341 request.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
342 request.add_header('Referer', 'http://www.marinetraffic.com/ais/default.aspx?zoom=9&oldmmsi=%(mmsi)s+&olddate=%(date)s' % { 'mmsi': mmsi, 'date': dt.replace(' ', '%20') })
343 uo = urllib2.urlopen(request)
346 sleep(DOWNLOAD_SLEEP_TIME)
348 f = open_with_mkdirs(filename, 'w')
352 html = file(filename).read()
355 xml = unicode(html, 'utf8')
356 info = { 'mmsi': mmsi, 'infosrc': u'MT' }
357 for node in html_lexer(xml):
358 if isinstance(node, Tag) and node.name==u'pos':
359 info['updated'] = node.attributes['timestamp']
360 info['lat'] = float(node.attributes['lat'])
361 info['lon'] = float(node.attributes['lon'])
362 info['course'] = float(node.attributes['course'])
363 info['speed'] = float(node.attributes['speed'])/10.
365 timestamp = datetime_to_timestamp(datetime.strptime(info['updated'], '%Y-%m-%dT%H:%M:%S'))
366 status = AIS_STATUS_NOT_AVAILABLE
367 rot = AIS_ROT_NOT_AVAILABLE
368 sog = int(info['speed']*AIS_SOG_SCALE)
369 latitude = int(float(info['lat'])*AIS_LATLON_SCALE)
370 longitude = int(float(info['lon'])*AIS_LATLON_SCALE)
371 cog = int(info['course'])*AIS_COG_SCALE
372 heading = AIS_NO_HEADING
374 print datetime.utcfromtimestamp(timestamp),
375 for i in strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source:
378 add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
383 if __name__ == '__main__':
384 parser = OptionParser(usage='%prog [options] mmsi [mmsi]...')
385 parser.add_option('--no-download', help="don't download any file", action='store_true', dest='no_download', default=False)
386 parser.add_option('--download-sleep', help="how many seconds do we sleep after each download. default=%default", action='store', type='int', dest='sleep', default=DOWNLOAD_SLEEP_TIME)
387 parser.add_option('--debug-sql', help="print all sql queries to stdout before running them", action='store_true', dest='debug_sql', default=False)
388 parser.add_option('--date', help="force reference date. default=%default\nDo NOT use without --date", action='store', dest='reference_date', default=datetime.utcnow().date().strftime('%Y%m%d'))
389 parser.add_option('--print-mmsi', help="prints each mmsi before it's processed", action='store_true', dest='print_mmsi', default=False)
390 (options, args) = parser.parse_args()
393 print >> sys.stderr, "Need parameters"
396 DISABLE_DOWNLOAD = options.no_download
397 DOWNLOAD_SLEEP_TIME = options.sleep
398 if options.debug_sql:
400 reference_date = options.reference_date
403 while len(mmsi) and mmsi[-1] in '\r\n':
405 if not mmsi.isdigit():
406 print 'MMSI', mmsi, 'is not numeric. Ignoring.'
408 if options.print_mmsi:
410 found = go_summary(reference_date, mmsi)
414 if import_last_pos(reference_date, mmsi, page):