--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+DOWNLOAD_SLEEP_TIME = 10
+DISABLE_DOWNLOAD = False
+MARINETRAFFIC_DIR = '/var/lib/ais/marinetraffic/'
+
+import sys, os, urllib2, time
+from pprint import pprint
+from datetime import datetime, date
+from time import sleep
+from optparse import OptionParser
+
+from ais.db import *
+from ais.ntools import clean_alnum, clean_ais_charset, open_with_mkdirs, datetime_to_timestamp
+from ais.common import *
+from ais.html_parser import *
+
+
+def go_summary(reference_date, mmsi):
+ def get_raw_summary(html):
+ root = html_parse(html)
+ divs = get_elem(root, 'div')
+ divdetail = divs[3]
+
+ info_raw = {}
+ section = u''
+ boldtext = u''
+ text = u''
+ for node in divdetail.children:
+ if isinstance(node, Tag) and node.name == 'h1':
+ info_raw[u'name'] = get_inner_text(node)
+ continue
+ if isinstance(node, Tag) and node.name == 'h2':
+ if boldtext or text:
+ info_raw[section][boldtext] = text
+ boldtext = text = u''
+ section = get_inner_text(node)
+ info_raw[section] = {}
+ continue
+
+ if isinstance(node, Tag) and node.name == 'br':
+ if boldtext or text:
+ info_raw[section][boldtext] = text
+ boldtext = text = u''
+ elif isinstance(node, Tag) and node.name == 'b':
+ if boldtext or text:
+ info_raw[section][boldtext] = text
+ boldtext = text = u''
+ boldtext = get_inner_text(node)
+ else:
+ text += get_inner_text(node)
+ if boldtext or text:
+ info_raw[section][boldtext] = text
+ boldtext = text = u''
+ return info_raw
+
+ def qualify_summary(info_raw):
+ #pprint(info_raw)
+ info = {}
+ info['name'] = info_raw['name']
+
+ try:
+ details = info_raw[u"Vessel's Details:"]
+ except KeyError:
+ details = info_raw[u"Vessel's Details"]
+ info['callsign'] = clean_alnum(details[u'Call Sign:'].encode('ascii', 'replace'))
+ tmp = details.get(u'IMO:', None)
+ if tmp:
+ tmp = tmp.replace(u',', u'').strip()
+ if tmp != u'999999999':
+ info['imo'] = tmp.replace(u', ', u'').strip()
+ tmp = details.get(u'Length x Breadth:', None)
+ if tmp:
+ length, breadth = tmp.replace(u'm', u'').split('X')
+ info['length'], info['breadth'] = int(length), int(breadth)
+ info['mmsi'] = details[u'MMSI:'].strip()
+ tmp = details.get(u'Ship Type:', None)
+ if tmp:
+ tmp = tmp.strip()
+ reverse_types = {
+ u'Fishing': 30,
+ #u'Towing': 31,
+ #u'Towing': 32,
+ u'Dredger': 33,
+ u'Dive Vessel': 34,
+ u'Military Ops': 35,
+ u'Sailing Vessel': 36,
+ u'Pleasure Craft': 37,
+ u'Pilot Vessel': 50,
+ u'SAR': 51,
+ u'Tug': 52,
+ u'Port Tender': 53,
+ u'Anti-Pollution': 54,
+ u'Law Enforce': 55,
+ #u'Local Vessel': 56,
+ #u'Local Vessel': 57,
+ u'Medical Trans': 58,
+ u'Special Craft': 59,
+ # Cargo is repported for types 70, 75, 76 .. 79
+ u'Cargo - Hazard A (Major)': 71,
+ u'Cargo - Hazard B': 72,
+ u'Cargo - Hazard C (Minor)': 73,
+ u'Cargo - Hazard D (Recognizable)': 74,
+ u'Tanker - Hazard A (Major)': 81,
+ u'Tanker - Hazard B': 82,
+ u'Tanker - Hazard C (Minor)': 83,
+ u'Tanker - Hazard D (Recognizable)': 84,
+ }
+ _type = reverse_types.get(tmp, None)
+ if _type is not None:
+ info['type'] = _type
+ else:
+ print >> sys.stderr , "NOTICE: can't properly qualify ship of type", tmp
+ # TODO year built .... ?
+
+ try:
+ voyage = info_raw[u'Voyage Related Info (Last Received):']
+ except KeyError:
+ voyage = info_raw[u'Voyage Related Info (Last Received)']
+ tmp = voyage.get(u'Destination:', None)
+ if tmp:
+ tmp = tmp.strip()
+ if tmp != 'CLASS B':
+ info['destination'] = tmp
+ tmp = voyage.get(u'Draught:', None)
+ if tmp:
+ info['draught'] = float(tmp.replace(u'm', u''))
+ tmp = voyage.get(u'ETA:', None)
+ if tmp:
+ tmp = tmp.strip()
+ try:
+ tmp = datetime.strptime(tmp, '%Y-%m-%d %H:%M')
+ except ValueError:
+ print "Failed to parse ETA date. Trying old format ...",
+ tmp = datetime.strptime(tmp, '%d/%m/%Y %H:%M:%S')
+ print "Success"
+
+ if tmp != datetime(1900, 1, 1):
+ info['eta'] = tmp.strftime('%m%d%H%M')
+ tmp = voyage.get(u'Info Received:', None)
+ if tmp:
+ voyage_updated = tmp.split(u'(')[0].strip()
+ try:
+ info['voyage_updated'] = datetime.strptime(voyage_updated, '%Y-%m-%d %H:%M')
+ except ValueError:
+ print "Failed to parse voyage updated date. Trying old format ...",
+ info['voyage_updated'] = datetime.strptime(voyage_updated, '%d/%m/%Y %H:%M:%S')
+ print "Success"
+
+
+ return info
+
+ filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-sum.html'
+ if not os.path.exists(filename):
+ if DISABLE_DOWNLOAD:
+ print >> sys.stderr, filename, 'not found and downloads disabled.'
+ return False
+ request = urllib2.Request('http://www.marinetraffic.com/ais/shipdetails.aspx?MMSI='+mmsi)
+ request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
+ uo = urllib2.urlopen(request)
+ html = uo.read()
+ uo.close()
+ sleep(DOWNLOAD_SLEEP_TIME)
+
+ f = open_with_mkdirs(filename, 'w')
+ f.write(html)
+ f.close()
+ else:
+ html = file(filename).read()
+ html = unicode(html, 'utf8')
+
+ if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
+ print >> sys.stderr, 'WARNING: The requested service is unavailable.'
+ os.unlink(filename)
+ return False
+ if u'Non-existent Vessel' in html:
+ print >> sys.stderr, 'WARNING: Vessel unknown'
+ return False
+
+ info_raw = get_raw_summary(html)
+ info = qualify_summary(info_raw)
+ #pprint(info)
+
+ assert info['mmsi'] == mmsi
+
+ def warning(*args):
+ print >> sys.stderr, "WARNING:"
+ for arg in args:
+ print >> sys.stderr, args,
+ print >> sys.stderr
+
+ voyage_updated = info.get('voyage_updated', None)
+ if voyage_updated:
+ timestamp = datetime_to_timestamp(voyage_updated)
+ else:
+ timestamp = datetime_to_timestamp(datetime.strptime(reference_date, '%Y%m%d'))
+ imo = int(info.get('imo', 0))
+ if imo >= 1 << 31:
+ warning('imo', imo, 'is too big')
+ imo = 0
+ name = info.get('name', u'').encode('utf8')
+ if len(name) > 20:
+ warning('name', name, 'is too big, truncating')
+ name = name[:20]
+ name = clean_ais_charset(name)
+ callsign = clean_alnum(info.get('callsign', u'').encode('utf8'))
+ type = info.get('type', 0)
+ if type < 0 or type > 100:
+ type = 0 #TODO check
+ eta = info.get('eta', u'00002460')
+ if len(eta)==8:
+ eta_M = int(eta[0:2])
+ eta_D = int(eta[2:4])
+ eta_h = int(eta[4:6])
+ eta_m = int(eta[6:8])
+ else:
+ eta_M = eta_D = 0
+ eta_h = 24
+ eta_m = 60
+ draught = int(info.get('draught', 0)*10)
+ destination = clean_ais_charset(info.get('destination', u'').encode('utf8'))
+
+ add_nmea5_partial(mmsi, timestamp, imo, name, callsign, type, 0, 0, 0, 0, eta_M, eta_D, eta_h, eta_m, draught, destination, 'MTWW')
+
+ return True
+
+
+def import_last_pos(reference_date, mmsi, page=None):
+ if page is None or page == 1:
+ filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov.html'
+ else:
+ filename = MARINETRAFFIC_DIR+reference_date+'/'+mmsi+'-mov'+str(page)+'.html'
+
+ if not os.path.exists(filename):
+ if DISABLE_DOWNLOAD:
+ print >> sys.stderr, filename, 'not found and downloads disabled.'
+ return False
+
+ if page is None or page == 1:
+ request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi)
+ else:
+ request = urllib2.Request('http://www.marinetraffic.com/ais/datasheet.aspx?datasource=ITINERARIES&MMSI='+mmsi+'&orderby=MINTIME&sort_order=DESC&var_page='+str(page))
+ request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
+ uo = urllib2.urlopen(request)
+ html = uo.read()
+ uo.close()
+ sleep(DOWNLOAD_SLEEP_TIME)
+
+ f = open_with_mkdirs(filename, 'w')
+ f.write(html)
+ f.close()
+ else:
+ html = file(filename).read()
+ html = unicode(html, 'utf8')
+
+ if u'No Records Found' in html:
+ print >> sys.stderr, 'NOTICE: No Records found.'
+ return
+ if u'The requested service is unavailable.' in html or u'Η λειτουργία που ζητήσατε δεν είναι διαθέσιμη.' in html:
+ print >> sys.stderr, 'WARNING: The requested service is unavailable.'
+ os.unlink(filename)
+ return
+
+ root = html_parse(html)
+ table = get_elem(root, u'table')[0]
+
+ infosrc = u'MT'
+
+ # Now, import each track
+ for row in table.children[1:]: # ignore first line with headers
+ line = row.children
+
+ latlong = get_merged_leaf_content(line[5])
+ latlong = latlong.replace(u'\xa0', u'').strip()
+ if latlong:
+ lon, lat = latlong.split(' ')
+
+ show_on_map = line[8]
+ assert get_merged_leaf_content(show_on_map).strip() == 'Show on Map'
+ link = show_on_map.children[0].children[0].attributes['href']
+ tmp = link.split(u'?', 2)
+ assert tmp[0] == u'default.aspx'
+ tmp = tmp[1]
+ tmp = tmp.split(u'&')
+ assert len(tmp)==3
+ assert tmp[0] == u'zoom=9'
+ assert tmp[1] == u'oldmmsi='+mmsi
+ tmp = tmp[2]
+ assert tmp.startswith(u'olddate=')
+ dt = tmp[len(u'olddate='):]
+
+ isodt = datetime.strptime(dt, '%m/%d/%Y %I:%M:%S %p')
+
+
+ if latlong:
+ speed = float(get_merged_leaf_content(line[6]))
+ course = float(get_merged_leaf_content(line[7]))
+ #print dt, isodt, lat, long, speed, course
+
+ strmmsi = mmsi
+ timestamp = datetime_to_timestamp(isodt)
+ status = AIS_STATUS_NOT_AVAILABLE
+ rot = AIS_ROT_NOT_AVAILABLE
+ sog = int(speed*AIS_SOG_SCALE)
+ latitude = int(float(lat)*AIS_LATLON_SCALE)
+ longitude = int(float(lon)*AIS_LATLON_SCALE)
+ cog = int(course*AIS_COG_SCALE)
+ heading = AIS_NO_HEADING
+ source = 'MTWW'
+ add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
+
+ import_track(mmsi, dt, isodt)
+
+ if 'Next page' in html and page is not 2:
+ print 'There is another page!'
+ return True
+ else:
+ return False
+
+
+def import_track(mmsi, dt, isodt):
+ filename = MARINETRAFFIC_DIR+isodt.strftime('%Y%m%d')+'/'+mmsi+'-trk.xml'
+ if not os.path.exists(filename):
+ if DISABLE_DOWNLOAD:
+ print >> sys.stderr, filename, 'not found and downloads disabled.'
+ return
+
+ url = 'http://www.marinetraffic.com/ais/gettrackxml.aspx?mmsi=%s&date=%s' % (mmsi, dt.replace(' ','%20'))
+ request = urllib2.Request(url)
+ request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.9.0.7) Gecko/2009032018 Firefox/3.0.6 (Debian-3.0.6-1)')
+ request.add_header('Accept' , 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
+ request.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
+ request.add_header('Referer', 'http://www.marinetraffic.com/ais/default.aspx?zoom=9&oldmmsi=%(mmsi)s+&olddate=%(date)s' % { 'mmsi': mmsi, 'date': dt.replace(' ', '%20') })
+ uo = urllib2.urlopen(request)
+ html = uo.read()
+ uo.close()
+ sleep(DOWNLOAD_SLEEP_TIME)
+
+ f = open_with_mkdirs(filename, 'w')
+ f.write(html)
+ f.close()
+ else:
+ html = file(filename).read()
+
+ #print filename
+ xml = unicode(html, 'utf8')
+ info = { 'mmsi': mmsi, 'infosrc': u'MT' }
+ for node in html_lexer(xml):
+ if isinstance(node, Tag) and node.name==u'pos':
+ info['updated'] = node.attributes['timestamp']
+ info['lat'] = float(node.attributes['lat'])
+ info['lon'] = float(node.attributes['lon'])
+ info['course'] = float(node.attributes['course'])
+ info['speed'] = float(node.attributes['speed'])/10.
+ strmmsi = mmsi
+ timestamp = datetime_to_timestamp(datetime.strptime(info['updated'], '%Y-%m-%dT%H:%M:%S'))
+ status = AIS_STATUS_NOT_AVAILABLE
+ rot = AIS_ROT_NOT_AVAILABLE
+ sog = int(info['speed']*AIS_SOG_SCALE)
+ latitude = int(float(info['lat'])*AIS_LATLON_SCALE)
+ longitude = int(float(info['lon'])*AIS_LATLON_SCALE)
+ cog = int(info['course'])*AIS_COG_SCALE
+ heading = AIS_NO_HEADING
+ source = 'MTTR'
+ #print datetime.utcfromtimestamp(timestamp),
+ #for i in strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source:
+ # print repr(i),
+ #print
+ add_nmea1(strmmsi, timestamp, status, rot, sog, latitude, longitude, cog, heading, source)
+ #dbcommit()
+
+
+
+if __name__ == '__main__':
+ parser = OptionParser(usage='%prog [options] mmsi [mmsi]...')
+ parser.add_option('--no-download', help="don't download any file", action='store_true', dest='no_download', default=False)
+ parser.add_option('--download-sleep', help="how many seconds do we sleep after each download. default=%default", action='store', type='int', dest='sleep', default=DOWNLOAD_SLEEP_TIME)
+ parser.add_option('--debug-sql', help="print all sql queries to stdout before running them", action='store_true', dest='debug_sql', default=False)
+ parser.add_option('--date', help="force reference date. default=%default\nDo NOT use without --date", action='store', dest='reference_date', default=datetime.utcnow().date().strftime('%Y%m%d'))
+ parser.add_option('--print-mmsi', help="prints each mmsi before it's processed", action='store_true', dest='print_mmsi', default=False)
+ (options, args) = parser.parse_args()
+
+ if len(args)==0:
+ print >> sys.stderr, "Need parameters"
+ sys.exit(1)
+
+ DISABLE_DOWNLOAD = options.no_download
+ DOWNLOAD_SLEEP_TIME = options.sleep
+ if options.debug_sql:
+ sql_setdebug(True)
+ reference_date = options.reference_date
+
+ for mmsi in args:
+ while len(mmsi) and mmsi[-1] in '\r\n':
+ mmsi = mmsi[:-1]
+ if not mmsi.isdigit():
+ print 'MMSI', mmsi, 'is not numeric. Ignoring.'
+ continue
+ if options.print_mmsi:
+ print 'MMSI', mmsi
+ found = go_summary(reference_date, mmsi)
+ if found:
+ page = 1
+ while True:
+ if import_last_pos(reference_date, mmsi, page):
+ page += 1
+ else:
+ break