#!/usr/local/bin/perl
# Perl "Pattern Matching" Frequently Asked Questions
#
# This is NOT an exhaustive treatment of pattern
# matching in Perl. This is a collection of fairly
# simple syntax examples.
# Perl's regex engine is quite powerful.
# For a complete treatment of pattern matching with Perl
# see 'Programming Perl' by Larry Wall, Tom Christiansen,
# and Randel Schwartz and 'Mastering Regular Expressions'
# by Aligator De'scarte.....both published by O'Reilly.
#
# You should be able to copy/paste this entire faq into
# a file and run it. There is one example that is
# commented out completely because it would try to open a
# non-existent file.
#
# Now, from the simple to the more complex.....
################ THE /match/ SYNTAX ####################
$str = 'a string of words with no punctuation';
# Perl's default match punctuation is /pattern/. You
# can use other delimiters, but this is a FAQ, not a book.
# The match operator can be used against $_, Perl's
# 'current' var.
$_ = $str;
if (/string/) { print "matched 'string'\n"; }
# In order to be clear, we will be verbose and explicitly
# operate on a scalar variable ($str) using the '=~' operator.
if ($str =~ /string/) { print "\nmatched 'string' again.\n"; }
# make the match case-insensitive with a switch, '[red]i[/red]'.
if ($str =~ /strinG/[red]i[/red])
{ print "\nmatched strinG to string, ignoring case\n"; }
# use character classes, like digits or white space
if ($str =~ /[red]\d[/red]/) { print "\nmatched a digit, $&\n"; }
else { print "\nThere are no digits in the string.\n"; }
# put digits in the string.
$str = 'string of words and digits 5678 and no punctuation';
# check to see if the str contains a digit
if ($str =~ /[red]\d[/red]/) { print "\nmatched one digit, $&\n"; }
# find the first 2 digits in the string
if ($str =~ /[red]\d\d[/red]/) { print "\nmatched two digits, $&\n"; }
# that only matched the '56'. Get the others by using the
# '+' to match one or more times.
if ($str =~ /[red]\d+[/red]/) { print "\nMatched multiple digits - $&\n"; }
# match 2 or 3 digits with {2,3} - note the match is gready.
# If it can find 3 it will match 3, not 2.
if ($str =~ /[red]\d{2,3}[/red]/) { print "\nMatched 2 or 3 digits - $&\n"; }
# look for white space chars (space, tab, etc...)
if ($str =~ /[red]\s[/red]/) { print "\nmatched white space, -$&-\n"; }
# look for a digit followed by a white space char
if ($str =~ /[red]\d\s[/red]/) { print "\nmatched digit space, -$&-\n"; }
# look for an alpha char then space then a digit.
if ($str =~ /[red][a-z]\s\d[/red]/)
{ print "\nmatched alpha space digit, -$&-\n"; }
# a practical example - look for things that look like IP addresses.
# parse an access log for IP addresses
$str = '<date>10/16/2000</date><remoteHost>123.234.9.78</remoteHost>';
if ($str =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/)
{ #{1,3} says find at least 1 and no more than 3 of \d.
# we must escape the dots \. which are normally wild cards
# to make them match dots.
# you might have notice already that $& contains the last match
$remoteHost = $&;
print "\nFound an IP - $remoteHost\n";
# $` contains the part before the match
print "part before the IP - $`\n";
# $' contains the part after the match
print "part after the IP - $'\n";
}
# or parse the tags and catch remoteHost
if ($str =~ /(<remoteHost>)(.*?)(<\/remoteHost>)/)
{ #(.*?) catches the match in $2
$remoteHost = $2;
print "\nFound an IP - $remoteHost in between $1 and $3\n";
# note that each new regex resets $1, $2, $3, etc...
}
# You might want to run through a web log looking for
# all the occurrences of the <remoteHost>...</remoteHost>
# chunk. You can read the entire file into a var, and
# catch all of the IP's like this.
# open(OPF,"<your_web_log.txt") or die("Failed to open log, $!.\n";
# while (<IPF>) { $web_log .= $_; }
# close OPF;
# while ($web_log =~ /<remoteHost>(.*?)<\/remoteHost>/g)
# { print "Host: $1\n"; }
#
# 'g' at the end of the match makes the match 'global'.
# If the 'g' were not there, the match would happen once
# and stop. With the 'g', the match will happen as many
# times as the pattern occurs in the string in $web_log.
# You can also use .....
# . is a wild card - matches anything
# ? says match minimally - not greedily
# \d - a digit
# \n - new line
# \r - carriage return
# \t - tab
# [A-Z] - upper case A through Z.
# [a-z] - lower case
# [^a-z] - the '^' makes it a negative class or anything other than a-z
# \s - white space
# \S - non-white space char
# There are others.....see the books.
################ THE 's/find/replace/' SYNTAX ####
print "\n\n";
print ' ######## PATTERN REPLACEMENT ##########';
$str = 'a string of words with no punctuation and the word stRiNg repeated';
# replace the first 'string' with 'STRING'
print "\nbefore - $str\n";
$str =~ s/string/STRING/;
print "after - $str\n";
# replace all occurrences of 'string' with 'STRING' ignoring case
$str = 'a string of words with no punctuation and the word stRiNg repeated';
print "\nBefore global => $str\n";
$str =~ s/string/STRING/ig;
# the 'g' = global
print "After global =>$str\n\n";
# using an eval in the right side
$str =~ s/string/$&.++$i/geis;
print "NUMBERED =>$str\n\n";
# or, evaluate a call to a sub routine in the right side.
$str =~ s/string\d/&number_them($&)/gei;
sub number_them
{
my $piece = shift;
$number++;
$piece .= $number;
return ($piece);
}
print "NUMBERED AGAIN=>$str\n\n";
# There are also negative and positive look-aheads, that
# may be addressed in the future. Again, if you want to
# know how to use them, see one of the books.
This site uses cookies to help personalise content, tailor your experience and to keep you logged in if you register.
By continuing to use this site, you are consenting to our use of cookies.