2 # -*- coding: utf-8 -*-
4 DOWNLOAD_SLEEP_TIME = 10
5 DISABLE_DOWNLOAD = False
6 MARINETRAFFIC_DIR = '/var/lib/ais/marinetraffic/'
8 import sys, os, urllib2, time
9 from pprint import pprint
10 from datetime import datetime, date
11 from time import sleep
12 from optparse import OptionParser
15 from ais.ntools import clean_alnum, clean_ais_charset, open_with_mkdirs, datetime_to_timestamp
16 from ais.common import *
17 from ais.html_parser import *
20 def go_summary(reference_date, mmsi):
21 def get_raw_summary(html):
22 root = html_parse(html)
23 divs = get_elem(root, 'div')
30 for node in divdetail.children:
31 if isinstance(node, Tag) and node.name == 'h1':
32 info_raw[u'name'] = get_inner_text(node)
34 if isinstance(node, Tag) and node.name == 'h2':
36 info_raw[section][boldtext] = text
38 section = get_inner_text(node)
39 info_raw[section] = {}
42 if isinstance(node, Tag) and node.name == 'br':
44 info_raw[section][boldtext] = text
46 elif isinstance(node, Tag) and node.name == 'b':
48 info_raw[section][boldtext] = text
50 boldtext = get_inner_text(node)
52 text += get_inner_text(node)
54 info_raw[section][boldtext] = text
58 def qualify_summary(info_raw):
61 info['name'] = info_raw['name']
64 details = info_raw[u"Vessel's Details:"]
66 details = info_raw[u"Vessel's Details"]
67 info['callsign'] = clean_alnum(details[u'Call Sign:'].encode('ascii', 'replace'))
68 tmp = details.get(u'IMO:', None)
70 tmp = tmp.replace(u',', u'').strip()
71 if tmp != u'999999999':
72 info['imo'] = tmp.replace(u', ', u'').strip()
73 tmp = details.get(u'Length x Breadth:', None)
75 length, breadth = tmp.replace(u'm', u'').split('X')
76 info['length'], info['breadth'] = int(length), int(breadth)
77 info['mmsi'] = details[u'MMSI:'].strip()
78 tmp = details.get(u'Ship Type:', None)
88 u'Sailing Vessel': 36,
89 u'Pleasure Craft': 37,
94 u'Anti-Pollution': 54,
100 # Cargo is repported for types 70, 75, 76 .. 79
101 u'Cargo - Hazard A (Major)': 71,
102 u'Cargo - Hazard B': 72,
103 u'Cargo - Hazard C (Minor)': 73,
104 u'Cargo - Hazard D (Recognizable)': 74,
105 u'Tanker - Hazard A (Major)': 81,
106 u'Tanker - Hazard B': 82,
107 u'Tanker - Hazard C (Minor)': 83,
108 u'Tanker - Hazard D (Recognizable)': 84,
110 _type = reverse_types.get(tmp, None)
111 if _type is not None:
114 print >> sys.stderr , "NOTICE: can't properly qualify ship of type", tmp
115 # TODO year built .... ?
118 voyage = info_raw[u'Voyage Related Info (Last Received):']
120 voyage = info_raw[u'Voyage Related Info (Last Received)']
121 tmp = voyage.get(u'Destination:', None)
125 info['destination'] = tmp
126 tmp = voyage.get(u'Draught:', None)
128 info['draught'] = float(tmp.replace(u'm', u''))
129 tmp = voyage.get(u'ETA:', None)
133 tmp = datetime.strptime(tmp, '%Y-%m-%d %H:%M')
135 print "Failed to parse ETA date. Trying old format ...",
136 tmp = datetime.strptime(tmp, '%d/%m/%Y %H:%M:%S')
139 if tmp != datetime(1900, 1, 1):
140 info['eta'] = tmp.strftime('%m%d%H%M')
141 tmp = voyage.get(u'Info Received:', None)
143 voyage_updated = tmp.split(u'(')[0].strip()
145 info['voyage_updated'] = datetime.strptime(voyage_updated, '%Y-%m-%d %H:%M')
147 print "Failed to parse voyage updated date. Trying old format ...",
148 info['voyage_updated'] = datetime.strptime(voyage_updated, '%d/%m/%Y %H:%M:%S')
154 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-sum.html'
155 if not os.path.exists(filename):
157 print >> sys.stderr, filename, 'not found and downloads disabled.'
159 request = urllib2.Request('http://www.marinetraffic.com/ais/shipdetails.aspx?MMSI='+mmsi)
160 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
161 uo = urllib2.urlopen(request)
164 sleep(DOWNLOAD_SLEEP_TIME)
166 f = open_with_mkdirs(filename, 'w')
170 html = file(filename).read()
171 html = unicode(html, 'utf8')
173 if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
174 print >> sys.stderr, 'WARNING: The requested service is unavailable.'
177 if u'Non-existent Vessel' in html:
178 print >> sys.stderr, 'WARNING: Vessel unknown'
181 info_raw = get_raw_summary(html)
182 info = qualify_summary(info_raw)
185 assert info['mmsi'] == mmsi
188 print >> sys.stderr, "WARNING:"
190 print >> sys.stderr, args,
193 voyage_updated = info.get('voyage_updated', None)
195 timestamp = datetime_to_timestamp(voyage_updated)
197 timestamp = datetime_to_timestamp(datetime.strptime(reference_date, '%Y%m%d'))
198 imo = int(info.get('imo', 0))
200 warning('imo', imo, 'is too big')
202 name = info.get('name', u'').encode('utf8')
204 warning('name', name, 'is too big, truncating')
206 name = clean_ais_charset(name)
207 callsign = clean_alnum(info.get('callsign', u'').encode('utf8'))
208 type = info.get('type', 0)
209 if type < 0 or type > 100:
211 eta = info.get('eta', u'00002460')
213 eta_M = int(eta[0:2])
214 eta_D = int(eta[2:4])
215 eta_h = int(eta[4:6])
216 eta_m = int(eta[6:8])
221 draught = int(info.get('draught', 0)*10)
222 destination = clean_ais_charset(info.get('destination', u'').encode('utf8'))
224 add_nmea5_partial(mmsi, timestamp, imo, name, callsign, type, 0, 0, 0, 0, eta_M, eta_D, eta_h, eta_m, draught, destination, 'MTWW')
229 def import_last_pos(reference_date, mmsi, page=None):
230 if page is None or page == 1:
231 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov.html'
233 filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov'+str(page)+'.html'
235 if not os.path.exists(filename):
237 print >> sys.stderr, filename, 'not found and downloads disabled.'
240 if page is None or page == 1:
241 request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi)
243 request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi+'&orderby=MINTIME&sort_order=DESC&var_page='+str(page))
244 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
245 uo = urllib2.urlopen(request)
248 sleep(DOWNLOAD_SLEEP_TIME)
250 f = open_with_mkdirs(filename, 'w')
254 html = file(filename).read()
255 html = unicode(html, 'utf8')
257 if u'No Records Found' in html:
258 print >> sys.stderr, 'NOTICE: No Records found.'
260 if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
261 print >> sys.stderr, 'WARNING: The requested service is unavailable.'
265 root = html_parse(html)
266 table = get_elem(root, u'table')[0]
270 # Now, import each track
271 for row in table.children[1:]: # ignore first line with headers
274 latlong = get_merged_leaf_content(line[5])
275 latlong = latlong.replace(u'\xa0', u'').strip()
277 lon, lat = latlong.split(' ')
279 show_on_map = line[8]
280 assert get_merged_leaf_content(show_on_map).strip() == 'Show on Map'
281 link = show_on_map.children[0].children[0].attributes['href']
282 tmp = link.split(u'?', 2)
283 assert tmp[0] == u'default.aspx'
285 tmp = tmp.split(u'&')
287 assert tmp[0] == u'zoom=9'
288 assert tmp[1] == u'oldmmsi='+mmsi
290 assert tmp.startswith(u'olddate=')
291 dt = tmp[len(u'olddate='):]
293 isodt = datetime.strptime(dt, '%m/%d/%Y %I:%M:%S %p')
297 speed = float(get_merged_leaf_content(line[6]))
298 course = float(get_merged_leaf_content(line[7]))
299 #print dt, isodt, lat, long, speed, course
302 timestamp = datetime_to_timestamp(isodt)
303 status = AIS_STATUS_NOT_AVAILABLE
304 rot = AIS_ROT_NOT_AVAILABLE
305 sog = int(speed*AIS_SOG_SCALE)
306 latitude = int(float(lat)*AIS_LATLON_SCALE)
307 longitude = int(float(lon)*AIS_LATLON_SCALE)
308 cog = int(course*AIS_COG_SCALE)
309 heading = AIS_NO_HEADING
311 add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
313 import_track(mmsi, dt, isodt)
315 if 'Next page' in html and page is not 2:
316 print 'There is another page!'
322 def import_track(mmsi, dt, isodt):
323 filename = MARINETRAFFIC_DIR+isodt.strftime('%Y%m%d')+'/'+mmsi+'-trk.xml'
324 if not os.path.exists(filename):
326 print >> sys.stderr, filename, 'not found and downloads disabled.'
329 url = 'http://www.marinetraffic.com/ais/gettrackxml.aspx?mmsi=%s&date=%s' % (mmsi, dt.replace(' ','%20'))
330 request = urllib2.Request(url)
331 request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
332 request.add_header('Accept' , 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
333 request.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
334 request.add_header('Referer', 'http://www.marinetraffic.com/ais/default.aspx?zoom=9&oldmmsi=%(mmsi)s+&olddate=%(date)s' % { 'mmsi': mmsi, 'date': dt.replace(' ', '%20') })
335 uo = urllib2.urlopen(request)
338 sleep(DOWNLOAD_SLEEP_TIME)
340 f = open_with_mkdirs(filename, 'w')
344 html = file(filename).read()
347 xml = unicode(html, 'utf8')
348 info = { 'mmsi': mmsi, 'infosrc': u'MT' }
349 for node in html_lexer(xml):
350 if isinstance(node, Tag) and node.name==u'pos':
351 info['updated'] = node.attributes['timestamp']
352 info['lat'] = float(node.attributes['lat'])
353 info['lon'] = float(node.attributes['lon'])
354 info['course'] = float(node.attributes['course'])
355 info['speed'] = float(node.attributes['speed'])/10.
357 timestamp = datetime_to_timestamp(datetime.strptime(info['updated'], '%Y-%m-%dT%H:%M:%S'))
358 status = AIS_STATUS_NOT_AVAILABLE
359 rot = AIS_ROT_NOT_AVAILABLE
360 sog = int(info['speed']*AIS_SOG_SCALE)
361 latitude = int(float(info['lat'])*AIS_LATLON_SCALE)
362 longitude = int(float(info['lon'])*AIS_LATLON_SCALE)
363 cog = int(info['course'])*AIS_COG_SCALE
364 heading = AIS_NO_HEADING
366 #print datetime.utcfromtimestamp(timestamp),
367 #for i in strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source:
370 add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
375 if __name__ == '__main__':
376 parser = OptionParser(usage='%prog [options] mmsi [mmsi]...')
377 parser.add_option('--no-download', help="don't download any file", action='store_true', dest='no_download', default=False)
378 parser.add_option('--download-sleep', help="how many seconds do we sleep after each download. default=%default", action='store', type='int', dest='sleep', default=DOWNLOAD_SLEEP_TIME)
379 parser.add_option('--debug-sql', help="print all sql queries to stdout before running them", action='store_true', dest='debug_sql', default=False)
380 parser.add_option('--date', help="force reference date. default=%default\nDo NOT use without --date", action='store', dest='reference_date', default=datetime.utcnow().date().strftime('%Y%m%d'))
381 parser.add_option('--print-mmsi', help="prints each mmsi before it's processed", action='store_true', dest='print_mmsi', default=False)
382 (options, args) = parser.parse_args()
385 print >> sys.stderr, "Need parameters"
388 DISABLE_DOWNLOAD = options.no_download
389 DOWNLOAD_SLEEP_TIME = options.sleep
390 if options.debug_sql:
392 reference_date = options.reference_date
395 while len(mmsi) and mmsi[-1] in '\r\n':
397 if not mmsi.isdigit():
398 print 'MMSI', mmsi, 'is not numeric. Ignoring.'
400 if options.print_mmsi:
402 found = go_summary(reference_date, mmsi)
406 if import_last_pos(reference_date, mmsi, page):