2 # -*- coding: utf-8 -*-
11 from gzip import GzipFile
12 from StringIO import StringIO
13 from OpenSSL import SSL
18 IPV4_IN_IPV6_PREFIX = '::ffff:'
20 def format_ip_port(ip, port, *spam):
21 "Build a nice printable string for a given sockaddr"
23 if ip.startswith(IPV4_IN_IPV6_PREFIX):
24 ip = ip[len(IPV4_IN_IPV6_PREFIX):]
28 return '[%s]:%s' % (ip, port)
30 return '%s:%s' % (ip, port)
33 "Base class for both requests & responses"
35 # Derived class can override that value:
36 content_length_default = 0
42 self.headers_complete = False
44 def set_line1(self, line1):
47 def get_line1(self, *args, **kargs):
50 def add_header_line(self, line):
51 # RFC 2616 section 2.2:
52 # Field names are case insensitive
53 # RFC 2616 section 4.2:
54 # Multiple message-header fields with the same field-name MAY be
55 # present in a message if and only if the entire field-value for that
56 # header field is defined as a comma-separated list [i.e., #(values)].
57 # It MUST be possible to combine the multiple header fields into one
58 # "field-name: field-value" pair, without changing the semantics of the
59 # message, by appending each subsequent field-value to the first, each
60 # separated by a comma.
61 # A proxy MUST NOT change the order of fields.
62 sp = line.split(':', 1)
63 #logging.debug(repr(sp))
65 self.headers.append((sp[0].strip(), sp[1].strip(' \t')))
67 # FIXME headers can be on multiple lines
68 # See RFC 2616 section 2.2:
69 # HTTP/1.1 header field values can be folded onto multiple lines if
70 # the continuation line begins with a space or horizontal tab.
71 self.headers.append((line,))
73 def get_header_value(self, key):
74 for header in self.headers:
79 def all_headers(self, *args, **kargs):
81 line1 = self.get_line1(*args, **kargs)
83 result += line1+'\r\n'
84 for header in self.headers:
85 result += ': '.join(header)+'\r\n'
89 def is_data_complete(self):
90 if not self.headers_complete:
92 if self.get_header_value('Transfer-Encoding')=='chunked':
93 if not hasattr(self, 'chunk_size'):
97 if self.chunk_size == 0:
98 hex_chunk_size = self.pop_data_line()
99 if options.debug_length:
100 logging.debug('hex chunk_size=%s', hex_chunk_size) # TODO extensions
101 if hex_chunk_size is None:
102 return False # need more data
103 self.chunk_size = int(hex_chunk_size, 16) # CRLF
104 if options.debug_length:
105 logging.debug('chunk_size=%s', self.chunk_size)
106 if self.chunk_size == 0:
107 logging.warning('chunk-transfer trailer? :%s', repr(self.data))
108 self.data = self.chunk_data # TODO trailers
109 # remove any Transfert-Encoding: chunked
110 # update Content-Length
111 content_length_updated = False
113 while i < len(self.headers):
114 key = self.headers[i][0].lower()
115 if key == 'transfer-encoding':
118 if key == 'content-length':
119 self.headers[i] = ('Content-Length', str(len(self.data)))
120 content_length_updated = True
122 if not content_length_updated:
123 self.headers.append(('Content-Length', str(len(self.data))))
124 #self.headers_complete = False
125 break # we're done with chunking
129 if self.chunk_size <= l:
131 need_more_data = False
133 need_more_data = True
134 self.chunk_data += self.data[:l]
135 self.data = self.data[l:]
140 self.chunk_data = self.chunk_data[:-2] # CRLF
142 l = self.get_header_value('Content-Length')
144 l = int(l) # TODO execpt
146 l = self.content_length_default
147 if options.debug_length:
148 logging.debug('Expected length=%s', l)
149 logging.debug('Current length=%s', len(self.data))
150 return len(self.data) >= l
152 def pop_data_line(self):
154 Extract a line separated by CRLF from the data buffer
155 Returns None if there is no CRLF left
157 p = self.data.find('\r\n')
161 self.data = self.data[p+2:]
164 def recv_from(self, sock):
165 self.data = '' # unparsed data
168 new_raw_data = sock.recv(1500) # usual IP MTU, for speed
169 except SSL.Error, err:
170 logging.debug('SSL.Error during sock.recv: %s', repr(err))
171 return # connection failure
173 return # connection was closed
174 self.data += new_raw_data
175 while not self.headers_complete:
176 line = self.pop_data_line()
178 break # no more token, continue recv
180 if self.line1 is not None:
181 self.headers_complete = True
183 # See RFC 2616 section 4.1:
184 # If the server is reading the protocol stream at the beginning of a
185 # message and receives a CRLF first, it should ignore the CRLF
186 elif self.line1 is None:
189 self.add_header_line(line)
190 if self.is_data_complete():
193 def send_to(self, sock, abs_path=False):
195 Sends that http information to an opened socked
196 If abs_path is true, it will remove the scheme/hostname part
197 Otherwise, it will produce a full absolute url
200 sock.sendall(self.all_headers(abs_path=abs_path))
202 sock.sendall(self.data)
203 except (socket.error, SSL.SysCallError), err:
204 logging.error('Error during sock.send: %s', err.args[1])
207 def debug_dump_line1(self):
208 line1 = self.get_line1()
209 if line1 is not None:
210 logging.debug(self.get_line1())
212 def debug_dump_headers(self):
213 for header in self.headers:
215 logging.debug('%s: %s', repr(header[0]), repr(header[1]))
217 logging.debug('%s (NO VALUE)', repr(header[0]))
218 def debug_dump_data(self):
220 data_length = len(self.data)
221 truncate = data_length > options.dump_length
223 printed_data = repr(self.data[:options.dump_length])+'...'
225 printed_data = repr(self.data)
227 logging.debug('data: (%s bytes) %s', data_length, printed_data)
229 def debug_dump(self, title='DUMP'):
230 if options.log_full_transfers:
232 logging.debug(title+' '+('-'*(80-l-1)))
233 self.debug_dump_line1()
234 self.debug_dump_headers()
235 self.debug_dump_data()
236 logging.debug('-'*80)
238 self.debug_dump_line1()
240 def clean_hop_headers(self):
241 #remove any Proxy-* header, and hop by hop headers
243 while i < len(self.headers):
244 key = self.headers[i][0].lower()
245 if key.startswith('proxy-') or key in ('connection', 'keep-alive', 'te', 'trailers', 'transfer-encoding', 'upgrade'):
251 class HttpRequest(HttpBase):
252 # default is no data for requests
253 content_length_default = 0
256 HttpBase.__init__(self)
257 self.http_method = ''
258 self.http_version = 'HTTP/1.1'
259 self.parsed_url = None
261 def recv_from(self, sock):
262 HttpBase.recv_from(self, sock)
263 if options.debug_raw_messages:
264 self.debug_dump('REQUEST RAW')
266 def send_to(self, sock, *args, **kargs):
267 HttpBase.send_to(self, sock, *args, **kargs)
268 if options.debug_raw_messages:
269 self.debug_dump('REQUEST PATCHED')
272 def set_line1(self, line1):
274 splits = line1.split(' ')
276 self.http_method, url, self.http_version = splits
277 self.parsed_url = urlparse.urlparse(url)
279 logging.error("Can't parse http request line %s", line1)
281 def get_line1(self, abs_path=False, *args, **kargs):
284 url = urlparse.urlunparse(['', ''] + list(self.parsed_url[2:]))
286 url = self.parsed_url.geturl()
287 return self.http_method + ' ' + url + ' ' + self.http_version
290 def clean_host_request(self):
291 def split_it(host_port):
292 # 'www.google.com:80' -> 'www.google.com', '80'
293 # 'www.google.com' -> 'www.google.com', ''
294 sp = host_port.split(':', 1)
299 def join_it(host, port):
302 result += ':' + str(port)
305 if self.parsed_url.scheme and not self.parsed_url.netloc:
306 self.parsed_url = urlparse.ParseResult('', self.parsed_url.scheme, *self.parsed_url[2:])
307 logging.debug('emptying scheme for netloc')
309 request_hostname = self.parsed_url.hostname
310 request_port = self.parsed_url.port
311 request_netloc = join_it(request_hostname, request_port)
312 header_necloc = self.get_header_value('Host')
313 header_hostname, header_port = split_it(header_necloc)
314 if not request_hostname and header_hostname:
315 # copy "Host" header into request netloc
316 self.parsed_url = urlparse.ParseResult(self.parsed_url.scheme, header_hostname, *self.parsed_url[2:])
317 elif request_hostname:
318 if request_netloc != header_necloc:
319 # RFC 2616, section 5.2: Host header must be ignored FIXME
320 logging.warning('Ignoring necloc value %s in request. Header "Host" value is %s', request_netloc, header_necloc)
321 for i in range(len(self.headers)):
322 if self.headers[i][0].lower()=='host':
323 self.headers[i][1] = request_netloc
325 elif not header_necloc:
326 self.headers.append(('Host', request_netloc))
328 def set_default_scheme(self, scheme):
329 if not self.parsed_url.scheme:
330 self.parsed_url = urlparse.ParseResult(scheme, *self.parsed_url[1:])
332 def check_headers_valid(self):
333 if not self.http_method:
334 raise HttpErrorResponse(400, 'Bad Request', 'Http method is required')
335 if not self.parsed_url:
336 raise HttpErrorResponse(400, 'Bad Request', 'Http url is required')
337 if not self.http_version:
338 raise HttpErrorResponse(400, 'Bad Request', 'Http version is required')
340 class HttpResponse(HttpBase):
341 # for responses, default is data until connection closed :
342 content_length_default = sys.maxint
344 def recv_from(self, sock):
345 HttpBase.recv_from(self, sock)
346 if options.debug_raw_messages:
347 self.debug_dump('RESPONSE RAW')
349 def send_to(self, sock, *args, **kargs):
350 HttpBase.send_to(self, sock, *args, **kargs)
351 if options.debug_raw_messages:
352 self.debug_dump('RESPONSE PATCHED')
355 def decompress_data(self):
356 compression_scheme = self.get_header_value('Content-Encoding')
357 if compression_scheme == 'gzip':
358 gzf = GzipFile(fileobj=StringIO(self.data))
360 plain_data = gzf.read()
362 logging.error('Error while decompressing gzip data: %s', err)
363 self.debug_dump('RESPONSE BEFORE DECOMPRESSION')
366 # remove any Content-Encoding header
367 # update Content-Length
369 while i < len(self.headers):
370 key = self.headers[i][0].lower()
371 if key == 'content-encoding':
374 if key == 'content-length':
375 self.headers[i] = ('Content-Length', str(len(plain_data)))
377 self.data = plain_data
380 class HttpErrorResponse(HttpResponse):
381 def __init__(self, errcode, errtitle, errmsg=None):
382 HttpResponse.__init__(self)
383 self.set_line1('HTTP/1.1 %s %s' % (errcode, errtitle))
384 self.add_header_line('Server: Spy proxy')
385 self.add_header_line('Date: %s' % ctime())
390 self.headers.append(('Content-Length', str(len(self.data))))
393 def get_connected_sock(hostname, port):
397 HttpErrorResponse(500, 'Internal error', "Can't connect to port %s" % port)
400 addrinfo = socket.getaddrinfo(hostname, port, socket.AF_UNSPEC, socket.SOCK_STREAM)
401 except socket.gaierror, err:
402 if err.args[1] == -2: # Name or service not known
405 logging.debug('Connection to %s failed: %s', format_ip_port(hostname, port), err.args[1])
406 raise HttpErrorResponse(404, 'Unknown host', 'Can\'resolve %s.' % hostname)
408 for family, socktype, proto, canonname, sockaddr in addrinfo:
409 if options.debug_connections:
410 logging.debug('Connecting to %s ...', format_ip_port(*sockaddr))
411 sock = socket.socket(family, socktype, proto)
413 sock.connect(sockaddr)
414 except socket.gaierror, err:
415 if err.args[0] != -2:
417 if options.debug_connections:
418 logging.debug('Connection to %s failed: %s', format_ip_port(*sockaddr), err.args[1])
420 except socket.error, err:
421 if err.args[0] not in (111, 113): # Connection refused, No route to host
423 if options.debug_connections:
424 logging.debug('Connection to %s failed: %s', format_ip_port(*sockaddr), err.args[1])
427 # Connection successfull
428 if options.debug_connections:
429 logging.debug('Connected to %s', format_ip_port(*sockaddr))
433 raise HttpErrorResponse(404, 'Not found', 'Can\'t connect to %s.' % hostname)
437 def run_request_http(request):
438 sock = get_connected_sock(request.parsed_url.hostname, request.parsed_url.port or 80)
439 request.send_to(sock, abs_path=True)
440 response = HttpResponse()
441 response.recv_from(sock)
442 sock.shutdown(socket.SHUT_RDWR)
444 response.decompress_data()
448 def run_request_https(request):
449 sock = get_connected_sock(request.parsed_url.hostname, request.parsed_url.port or 443)
450 ssl_context = SSL.Context(SSL.SSLv23_METHOD)
451 ssl_sock = SSL.Connection(ssl_context, sock)
452 ssl_sock.set_connect_state()
453 request.send_to(ssl_sock, abs_path=True)
454 response = HttpResponse()
455 response.recv_from(ssl_sock)
458 response.decompress_data()
461 def make_https_sslcontext(hostname):
462 # To generate a certificate:
463 # openssl req -nodes -new -x509 -keyout certs/proxy.key -out certs/proxy.crt -days 10000
465 # openssl req -nodes -new -x509 -keyout certs/ca.key -out certs/ca.crt -days 10000 -subj "/O=Spy Proxy/CN=*" -newkey rsa:2048
467 # openssl req -nodes -new -subj "/CN=*.nirgal.com" -days 10000 -keyout certs/nirgal.com.key -out certs/nirgal.com.csr
468 # openssl x509 -req -in certs/nirgal.com.csr -out certs/nirgal.com.crt -CA certs/ca.crt -CAkey certs/ca.key [-CAcreateserial]
469 # openssl req -batch -nodes -new -subj "/CN=*.nirgal.com" -days 10000 -keyout certs/nirgal.com.key | openssl x509 -req -out certs/nirgal.com.crt -CA certs/ca.crt -CAkey certs/ca.key
471 keyfile = os.path.join('certs', hostname+'.key')
472 crtfile = os.path.join('certs', hostname+'.crt')
473 csrfile = os.path.join('certs', hostname+'.csr')
475 ssl_context = SSL.Context(SSL.SSLv23_METHOD)
476 if not os.path.exists(keyfile) or not os.path.exists(crtfile):
477 logging.debug('Generating custom SSL certificates for %s', hostname)
478 subprocess.call(['openssl', 'req', '-nodes', '-new', '-subj', '/CN='+hostname, '-days', '10000', '-keyout', keyfile, '-out', csrfile])
479 subprocess.call(['openssl', 'x509', '-req', '-in', csrfile, '-out', crtfile, '-CA', 'certs/ca.crt', '-CAkey', 'certs/ca.key'])
480 ssl_context.use_privatekey_file (keyfile)
481 ssl_context.use_certificate_file(crtfile)
484 class ProxyConnectionIn(threading.Thread):
485 def __init__(self, clientsocket):
486 threading.Thread.__init__(self)
487 self.clientsocket = clientsocket
489 def check_proxy_auth(self, request_in):
493 proxy_auth = request_in.get_header_value('Proxy-Authorization')
494 if proxy_auth is not None and proxy_auth.startswith('Basic '):
495 proxy_auth = proxy_auth[len('Basic '):]
496 # logging.debug('proxy_auth raw: %s', proxy_auth)
497 proxy_auth = base64.b64decode(proxy_auth)
498 #logging.debug('proxy_auth: %s', proxy_auth)
499 if proxy_auth != options.auth:
500 response = HttpErrorResponse('407',
501 'Proxy Authentication Required',
502 'Proxy requires an authorization.')
503 response.add_header_line('Proxy-Authenticate: Basic realm="Spy proxy"')
507 request_in = HttpRequest()
508 request_in.recv_from(self.clientsocket)
512 self.check_proxy_auth(request_in) # raises 407
513 request_in.check_headers_valid() # raises 400
515 if request_in.http_method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE'):
516 if request_in.parsed_url and request_in.parsed_url.scheme == 'http':
518 request_in.clean_host_request()
519 request_in.clean_hop_headers()
520 request_in.headers.append(('Connection', 'close'))
522 request_in.debug_dump('REQUEST')
524 response = run_request_http(request_in)
525 response.clean_hop_headers()
526 response.headers.append(('Connection', 'close'))
528 response.debug_dump('RESPONSE')
531 raise HttpErrorResponse(501, 'Not implemented', 'Unsupported scheme %s.' % request_in.parsed_url.scheme)
533 logging.info("%s %s %s %s %s", request_in.http_method, request_in.parsed_url.geturl(), request_in.http_version, response.line1[9:12], len(response.data) or '-')
535 elif request_in.http_method == 'CONNECT':
536 request_in.clean_host_request()
537 HttpErrorResponse(200, 'Proceed', '').send_to(self.clientsocket)
539 ssl_context = make_https_sslcontext(request_in.parsed_url.hostname)
540 ssl_sock = SSL.Connection(ssl_context, self.clientsocket)
541 ssl_sock.set_accept_state()
543 request_in_ssl = HttpRequest()
544 request_in_ssl.recv_from(ssl_sock)
546 request_in_ssl.check_headers_valid() # raises 400
548 request_in_ssl.clean_hop_headers()
549 request_in_ssl.headers.append(('Connection', 'close'))
551 request_in_ssl.clean_host_request()
552 request_in_ssl.set_default_scheme('https')
554 response = run_request_https(request_in_ssl)
555 response.clean_hop_headers()
556 response.headers.append(('Connection', 'close'))
558 response.send_to(ssl_sock)
560 logging.info("%s %s %s %s %s", request_in_ssl.http_method, request_in_ssl.parsed_url.geturl(), request_in_ssl.http_version, response.line1[9:12], len(response.data) or '-')
563 #self.clientsocket.shutdown(socket.SHUT_RDWR)
564 #self.clientsocket.close()
565 return # bypass classic socket shutdown
567 request_in.debug_dump('REQUEST')
568 logging.error('Method %s not supported', request_in.http_method)
569 # FIXME RFC 2616, section 14.7: We should return an "Allow" header
570 self.clientsocket.send('HTTP/1.1 405 Method not allowed\r\n\r\nSorry method %s is not supported by the proxy.\r\n' % request_in.http_method)
571 self.clientsocket.close()
573 except HttpErrorResponse, error:
576 response.send_to(self.clientsocket)
578 self.clientsocket.shutdown(socket.SHUT_RDWR)
579 except socket.error, err:
580 logging.error('Error during socket.shutdown: %s', err.args[1])
581 self.clientsocket.close()
586 loglevel = logging.DEBUG
588 loglevel = logging.INFO
589 logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s')
591 if options.listen_host:
592 addrinfo = socket.getaddrinfo(options.listen_host, options.listen_port, socket.AF_UNSPEC, socket.SOCK_STREAM)
593 family, socktype, proto, canonname, sockaddr = addrinfo[0]
596 family = socket.AF_INET6
598 family = socket.AF_INET
599 socktype = socket.SOCK_STREAM
601 options.listen_port = int(options.listen_port)
603 options.listen_port = socket.getservbyname(options.listen_port)
604 sockaddr = (options.listen_host, options.listen_port)
606 # TODO: multicast require more stuff
607 # see http://code.activestate.com/recipes/442490/
609 serversocket = socket.socket(family, socktype) #, proto) # TODO
610 serversocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
611 logging.info('Listening on %s', format_ip_port(*sockaddr))
612 serversocket.bind(sockaddr)
613 serversocket.listen(30)
617 clientsocket, address = serversocket.accept()
618 except KeyboardInterrupt:
619 logging.info('Ctrl+C received. Shutting down.')
622 logging.debug('thread count: %s', threading.activeCount())
623 if options.debug_connections:
624 logging.debug('Connection from %s', format_ip_port(*address))
625 cnx_thread = ProxyConnectionIn(clientsocket)
629 if __name__ == '__main__':
630 from optparse import OptionParser #, OptionGroup
632 parser = OptionParser(usage='%prog [options]')
634 parser.add_option('-b', '--bind',
635 action='store', type='str', dest='listen_host', default='',
637 help="listen address, default='%default'")
639 parser.add_option('-p', '--port',
640 action='store', type='str', dest='listen_port', default='8080',
642 help="listen port, default=%default")
644 parser.add_option('--auth',
645 action='store', type='str', dest='auth', default='',
646 metavar='LOGIN:PASSWORD',
647 help="proxy authentification, default='%default'")
649 parser.add_option('--log-full-transfers',
650 action='store_true', dest='log_full_transfers', default=False,
651 help="log full requests and responses")
653 parser.add_option('--dump-length',
654 action='store', type='int', dest='dump_length', default=160,
655 help="length of data dump")
657 parser.add_option('-d', '--debug',
658 action='store_true', dest='debug', default=False,
661 parser.add_option('--debug-raw-messages',
662 action='store_true', dest='debug_raw_messages', default=False,
663 help="dump raw messages before they are patched")
665 parser.add_option('--debug-connections',
666 action='store_true', dest='debug_connections', default=False,
667 help="dump connections information")
669 parser.add_option('--debug-length',
670 action='store_true', dest='debug_length', default=False,
671 help="dump lengthes information")
674 options, args = parser.parse_args()