newspaint

Documenting Problems That Were Difficult To Find The Answer To

JavaScript Regular Expressions for Perl Developers

Too many of the regular expression tutorials for JavaScript focus on the basics of regular expression syntax – not how regular expressions can be actually used.

Test String For Match

Some basic examples:

if ( /abc/.test( "abcdefg" ) ) { console.log( "Match" ) } // MATCH
if ( /^abc/.test( "abcdefg" ) ) { console.log( "Match" ) } // MATCH
if ( /bcd/.test( "abcdefg" ) ) { console.log( "Match" ) } // MATCH
if ( /^bcd/.test( "abcdefg" ) ) { console.log( "Match" ) } // NO MATCH
if ( /^ABC/.test( "abcdefg" ) ) { console.log( "Match" ) } // NO MATCH
if ( /^ABC/i.test( "abcdefg" ) ) { console.log( "Match" ) } // MATCH

Some more examples:

var tests = [
  [ 'abc', '', 'abcdefg' ],
  [ '^abc', '', 'abcdefg' ],
  [ 'bcd', '', 'abcdefg' ],
  [ '^bcd', '', 'abcdefg' ],
  [ '^ABC', '', 'abcdefg' ],
  [ '^ABC', 'i', 'abcdefg' ]
];

for ( var i = 0; i < tests.length; i++ ) {
  var regString = tests[i][0];
  var regFlags = tests[i][1];
  var testString = tests[i][2];

  var regexp = new RegExp( regString, regFlags );
  if ( regexp.test( testString ) ) {
    console.log( '  \'' + testString + '\' =~ /' + regString + '/' + regFlags );
  } else {
    console.log( '  \'' + testString + '\' !~ /' + regString + '/' + regFlags );
  }
}

// OUTPUTS:
//  'abcdefg' =~ /abc/
//  'abcdefg' =~ /^abc/
//  'abcdefg' =~ /bcd/
//  'abcdefg' !~ /^bcd/
//  'abcdefg' !~ /^ABC/
//  'abcdefg' =~ /^ABC/i

Test And Capture

var result = /^(.+?)=(\S+)$/.exec( 'name=Jim' );
if ( result != null ) {
  console.log( '  key=' + result[1] + ', value=' + result[2] );
} else {
  console.log( 'NO MATCH' );
}

Global Search

In this example the regKeyPair variable is created with a global search and each “exec()” on this variable will return a subsequent match.

var regKeyPair = new RegExp( '(\\S+?)=(\\S+)', 'g' );
do {
  var result = regKeyPair.exec( 'name=Jim age=21' );
  if ( result != null ) {
    console.log( '  key=' + result[1] + ', value=' + result[2] );
  } else {
    console.log( 'NO MATCH' );
    break;
  }
} while ( 1 );

Note that matching over multiple lines requires the “m” flag. E.g.:

var multiLine = 'line1\nline2\nline3\n';
var getLine = new RegExp( '^(.+)$', 'mg' );

do {
  var result = getLine.exec( multiLine );
  if ( result == null )
    break;

  console.log( '  found line: "' + result[1] + '"' );
} while ( 1 );

Resetting the search can be done as follows:

var regGetLine = new RegExp( '^(.+)$', 'mg' );
// .. do multiple exec()s on regGetLine

regGetLine.lastIndex = 0; // similar to pos() function in Perl

// .. do another set of multiple exec()s on regGetLine

Simple Replace

Note that replace operates on a string – but it returns a new string with the replacements; the original string is never modified by the replace() call.

var source = 'abcdefg';
var replacement = source.replace(/abc/, 'ABC'); // 'ABCdefg'
var source = 'key=value';
var replacement = source.replace(/(.+?)=(.+)/, '$2=$1'); // 'value=key'

This next example demonstrates the use of a (synchronous) callback function that is passed the entire match and then the sub-matches as parameters.

function replaceFn( all, first, second ) {
  return( second.toUpperCase() + '=' + first.toUpperCase() );
}

var source = 'key=value';
var replacement = source.replace(/(.+?)=(.+)/, replaceFn); // 'VALUE=KEY'

Simple Global Replace

var source = 'key = value; key2 = value2';
var replacement = source.replace(/([^;=\s]+?)=([^;\s]+)/g, "$2=$1"); // 'VALUE=KEY'

Process a Buffer of Text One Line at a Time

function processLines( text, callback_fn ) {
  var regexpLine = new RegExp( "([^\r\n]+)[\r\n]+" );
  while ( 1 ) {
    var mymatch = regexpLine.exec( text );
    if ( mymatch == null )
      break;

    var start = mymatch.index;
    var end = start + mymatch[0].length;

    var line = mymatch[1];

    // remove the line and everything before it from the buffer
    text = text.substring( end );

    // process the line of text
    callback_fn( line );
  }

  // return the remaining text
  return( text );
}

This is useful when you are receiving chunks of text. The last part of the chunk may be half a line so you want to store that and add the next chunk to the unprocessed half-line next time. E.g.:

var buffer = "";
process.stdin.on('data', function (chunk) {
  buffer += chunk;
  buffer = processLines(
    buffer,
    function(line) { console.log( ' Line was "%s"', line ); }
  );
} );

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: