newspaint

Documenting Problems That Were Difficult To Find The Answer To

Node.JS HTTP and HTTPS Proxy

Writing a HTTP proxy using Node.JS is easy. There are plenty of examples on how to do this on the web. But things get trickier when you want to also proxy HTTPS requests. This is because http.createServer() doesn’t call the callback when a CONNECT request comes through. Instead you have to explicitly add a separate listener for the “connect” event and handle the socket communications directly. But this is very similar to handling a HTTP proxy request – you set up a socket connection to the target and then proxy communications between the client request socket and the proxied socket.

This proxy application accepts two command line arguments: -d switches on debugging (which you won’t need normally) and -p port selects which TCP port the proxy should listen on. For example:

user@myserver:~# /usr/local/node/bin/node webproxy.js -d -p 8080

Here is the source code:

var http = require('http');
var net = require('net');

var debugging = 0;

var regex_hostport = /^([^:]+)(:([0-9]+))?$/;

function getHostPortFromString( hostString, defaultPort ) {
  var host = hostString;
  var port = defaultPort;

  var result = regex_hostport.exec( hostString );
  if ( result != null ) {
    host = result[1];
    if ( result[2] != null ) {
      port = result[3];
    }
  }

  return( [ host, port ] );
}

// handle a HTTP proxy request
function httpUserRequest( userRequest, userResponse ) {
  if ( debugging ) {
    console.log( '  > request: %s', userRequest.url );
  }

  var httpVersion = userRequest['httpVersion'];
  var hostport = getHostPortFromString( userRequest.headers['host'], 80 );

  // have to extract the path from the requested URL
  var path = userRequest.url;
  result = /^[a-zA-Z]+:\/\/[^\/]+(\/.*)?$/.exec( userRequest.url );
  if ( result ) {
    if ( result[1].length > 0 ) {
      path = result[1];
    } else {
      path = "/";
    }
  }

  var options = {
    'host': hostport[0],
    'port': hostport[1],
    'method': userRequest.method,
    'path': path,
    'agent': userRequest.agent,
    'auth': userRequest.auth,
    'headers': userRequest.headers
  };

  if ( debugging ) {
    console.log( '  > options: %s', JSON.stringify( options, null, 2 ) );
  }

  var proxyRequest = http.request(
    options,
    function ( proxyResponse ) {
      if ( debugging ) {
        console.log( '  > request headers: %s', JSON.stringify( options['headers'], null, 2 ) );
      }

      if ( debugging ) {
        console.log( '  < response %d headers: %s', proxyResponse.statusCode, JSON.stringify( proxyResponse.headers, null, 2 ) );
      }

      userResponse.writeHead(
        proxyResponse.statusCode,
        proxyResponse.headers
      );

      proxyResponse.on(
        'data',
        function (chunk) {
          if ( debugging ) {
            console.log( '  < chunk = %d bytes', chunk.length );
          }
          userResponse.write( chunk );
        }
      );

      proxyResponse.on(
        'end',
        function () {
          if ( debugging ) {
            console.log( '  < END' );
          }
          userResponse.end();
        }
      );
    }
  );

  proxyRequest.on(
    'error',
    function ( error ) {
      userResponse.writeHead( 500 );
      userResponse.write(
        "<h1>500 Error</h1>\r\n" +
        "<p>Error was <pre>" + error + "</pre></p>\r\n" +
        "</body></html>\r\n"
      );
      userResponse.end();
    }
  );

  userRequest.addListener(
    'data',
    function (chunk) {
      if ( debugging ) {
        console.log( '  > chunk = %d bytes', chunk.length );
      }
      proxyRequest.write( chunk );
    }
  );

  userRequest.addListener(
    'end',
    function () {
      proxyRequest.end();
    }
  );
}

function main() {
  var port = 5555; // default port if none on command line

  // check for any command line arguments
  for ( var argn = 2; argn < process.argv.length; argn++ ) {
    if ( process.argv[argn] === '-p' ) {
      port = parseInt( process.argv[argn + 1] );
      argn++;
      continue;
    }

    if ( process.argv[argn] === '-d' ) {
      debugging = 1;
      continue;
    }
  }

  if ( debugging ) {
    console.log( 'server listening on port ' + port );
  }

  // start HTTP server with custom request handler callback function
  var server = http.createServer( httpUserRequest ).listen(port);

  // add handler for HTTPS (which issues a CONNECT to the proxy)
  server.addListener(
    'connect',
    function ( request, socketRequest, bodyhead ) {
      var url = request['url'];
      var httpVersion = request['httpVersion'];

      var hostport = getHostPortFromString( url, 443 );

      if ( debugging )
        console.log( '  = will connect to %s:%s', hostport[0], hostport[1] );

      // set up TCP connection
      var proxySocket = new net.Socket();
      proxySocket.connect(
        parseInt( hostport[1] ), hostport[0],
        function () {
          if ( debugging )
            console.log( '  < connected to %s/%s', hostport[0], hostport[1] );

          if ( debugging )
            console.log( '  > writing head of length %d', bodyhead.length );

          proxySocket.write( bodyhead );

          // tell the caller the connection was successfully established
          socketRequest.write( "HTTP/" + httpVersion + " 200 Connection established\r\n\r\n" );
        }
      );

      proxySocket.on(
        'data',
        function ( chunk ) {
          if ( debugging )
            console.log( '  < data length = %d', chunk.length );

          socketRequest.write( chunk );
        }
      );

      proxySocket.on(
        'end',
        function () {
          if ( debugging )
            console.log( '  < end' );

          socketRequest.end();
        }
      );

      socketRequest.on(
        'data',
        function ( chunk ) {
          if ( debugging )
            console.log( '  > data length = %d', chunk.length );

          proxySocket.write( chunk );
        }
      );

      socketRequest.on(
        'end',
        function () {
          if ( debugging )
            console.log( '  > end' );

          proxySocket.end();
        }
      );

      proxySocket.on(
        'error',
        function ( err ) {
          socketRequest.write( "HTTP/" + httpVersion + " 500 Connection error\r\n\r\n" );
          if ( debugging ) {
            console.log( '  < ERR: %s', err );
          }
          socketRequest.end();
        }
      );

      socketRequest.on(
        'error',
        function ( err ) {
          if ( debugging ) {
            console.log( '  > ERR: %s', err );
          }
          proxySocket.end();
        }
      );
    }
  ); // HTTPS connect listener
}

main();

Postscript

I wrote this proxy and have documented it because I couldn’t find any examples on the Internet for HTTPS proxies. I was trying to connect my web browser to my Node.JS HTTP proxy (which I’d duplicated from code I’d found elsewhere on the web) – but couldn’t see my callback being fired. After delving into the source of http.js that comes with Node.JS and looking at the documentation for the connect event later (it’s obvious when you think about it…) I eventually got myself a working HTTPS proxy (in the same script as the HTTP proxy on the same port).

9 responses to “Node.JS HTTP and HTTPS Proxy

  1. Anael Favre 2013-02-14 at 17:47:59

    Thank you for your source code !

    I am implementing a simple HTTP(s) proxy for my needs and your article gived me a real help !

  2. got 2014-06-04 at 14:54:27

    is your chunk var the equivalent of the body?
    i want to modify the body of the html page return by your script i can’t figure out what variable to use and do a replace on it?

    • newspaint 2014-06-04 at 15:06:07

      proxyResponse.on(‘data’) does contain the body of the HTML response from the remote website. However this may come in several blocks – not one single block of data. So the data you’re searching for may be spanned across two blocks. In addition the content of the page may be encoded (such as with gzip). If you want to modify the data in-place then you will have to create a connection-specific variable and then append all the chunks to that variable. Upon proxyResponse.on(‘end’) you will need to then decode the data in your variable before doing a search-and-replace. Then finally re-encoding before sending back onto the connected client.

      • thefourtheye (@dFourthi) 2014-06-22 at 06:33:49

        Nope. It is not possible. The chunk is actually a part of the encrypted data. So, you cannot make any sense out of the data you receive, but you can simply relay it :-)

      • newspaint 2014-06-24 at 12:11:21

        You are correct in regards to proxying HTTPS traffic. You simply cannot unencrypt transiting encrypted traffic. You can write a man-in-the-middle HTTPS proxy using Node.JS – I’ve done this – but will not blog about it because it is too dangerous to give that information away for free.

        My earlier reply, however, was about unencrypted HTTP traffic. Sometimes this traffic is encoded – a common encoding being gzip. But I believe base-64 isn’t uncommon. Extracting information transiting the proxy is made more difficult if the page is encoded.

  3. joshuaeric 2014-11-06 at 20:20:49

    Is it possible to use a proxy here?

    // set up TCP connection
    var proxySocket = new net.Socket();
    proxySocket.connect(

    I am trying to setup this:

    REQUEST -> NODE PROXY -> PROXY 1 -> TARGET
    REQUEST -> NODE PROXY -> PROXY 2 -> TARGET
    REQUEST -> NODE PROXY -> PROXY 3 -> TARGET

    I have it working great for http, but https is causing the problem. I receive the connect event, but not sure what to do next.

    Your code works, but I can’t use PROXY 1, PROXY 2, or PROXY 3 with it.

    Great job though, your code is getting me closer!

  4. Ritesh 2016-01-03 at 07:15:11

    I’d like modify the response data aka MITM with potentially encoded traffic. Could you please help? Happy to explain why this is legitimate application for both HTTP and HTTPs traffic. Feel free to PM me.

    • newspaint 2016-01-03 at 07:40:14

      Hello, I have written a MITM proxy using Node.JS in the past when performing required testing on an internal HTTPS application. However I made the deliberate choice never to publish this code.

      It is my belief that MITM attacks should only be conducted by those with enough knowledge to derive this code on their own – and not handed over to script kiddies so recklessly.

      You say you have a legitimate reason to perform MITM attacks. Perhaps that’s true. I’m still not helping you do it. You need to intimately know the security protocols and network protocol stack as well as the programming language so that you can work it out yourself.

      The fewer people who have the power to do MITM attacks the better.

Leave a comment