Perl: Difference between revisions

Revision as of 09:24, 14 February 2009

Reference

Perldoc

% perldoc -q duplicate
  "How can I remove ²duplicate elements from a list or array?"
% perldoc -f split
  split /PATTERN/,EXPR,LIMIT
  split /PATTERN/,EXPR
  split /PATTERN/
  ...

Script Examples

Miscellaneous examples

# Various examples in Perl
die "can't run this";

#-----------------------------------------------------------------------------------------------------------------------------------
#Split a multi-line variable/output in line components - method 1.
my @ArrayList = `$CT lsvob -short`;              #any command producing a multi-line output

foreach (@ArrayList)
{
	chop();                                  #remove the trailing newline
	print "Array List - The VOB is $_.\n";
};

#-----------------------------------------------------------------------------------------------------------------------------------
#Split a multi-line variable/output in line components - method 2.
my $ScalarList = `$CT lsvob -short`;    #any command producing a multi-line output
my @ArrayList2 = split/\n/,$ScalarList; #split the scalar into several lines

foreach (@ArrayList2)
{
	print "Scalar List - The VOB is $_.\n";
};

#-----------------------------------------------------------------------------------------------------------------------------------
# Use ${variable} to split scalar identifier from the rest of a text
my $variable;
print "$variable_temp\n";               # NOK! Print a variable named variable_temp
print "${variable}_temp\n";             # OK! Print a $variable, followed by "_temp"

#-----------------------------------------------------------------------------------------------------------------------------------
#Append an array (or single element) to another
push(@array1,@array_or_element);

#-----------------------------------------------------------------------------------------------------------------------------------
#Add use strict; at the beginning to improve compilation warnings.
use strict;

#-----------------------------------------------------------------------------------------------------------------------------------
#Arrays / Hashes
#

#The following actually adds six elements to array VOBRecords. It doesn't create an extra element contains the hash.
my %VOBAttrib = ( VOBName => $VOBName,  IsProjectVOB => $IsProjectVOB, IsProjectAdminVOB => $IsProjectAdminVOB, ProjectName => $ProjectName );
push (@VOBRecords, %VOBAttrib);

#-----------------------------------------------------------------------------------------------------------------------------------
# Handling reference
my $VOBAttrRef = $VOBs{'AdminMask'};                #This return a reference to a Hash
my %VOBAttr = %$VOBAttrRef;                         #This dereference the reference above and return a Hash

print $VOBAttr{'ProjectName'},"\n";                 #We can use our new Hash variable
print $$VOBAttrRef{'ProjectName'},"\n";             # ... or we can dereference our reference variable using the $$ construct
print $VOBAttrRef->{'ProjectName'},"\n";            # ... but -> can also be used to dereference
print $VOBs{'AdminMask'}->{'ProjectName'},"\n";     #We can also skip altoghether the reference variable
print $VOBs{'AdminMask'}{'ProjectName'},"\n";       # ... This notation is also available as a shortcut, -> can be omitted

#-----------------------------------------------------------------------------------------------------------------------------------
#Autovivification - example on how to create a hash of array
#

my %Projects;
foreach my $VOBName (keys %VOBs)
{
	my $ProjectName = $VOBs{$VOBName}{'ProjectName'};
	push(@{$Projects{$ProjectName}}, $VOBName); # <-- we dereference value returned by $Projects{$ProjectName} as 
                                                    #     an array, hence creating automatically an array if undef
}

#-----------------------------------------------------------------------------------------------------------------------------------
#Read something from standard input
$line = <STDIN>;
$line = readline(*STDIN);       # same thing
chop($line = <STDIN>);          # remove trailing newline

#-----------------------------------------------------------------------------------------------------------------------------------
#Read one character}
#print "Press RETURN...";
#$key = getc();

#-----------------------------------------------------------------------------------------------------------------------------------
# Regex Matching

# Use /regex/, or m!regex! (where ! can be any quoting character)
# Use =~ to match a given variable, otherwise $_ is used. Use !~ to reverse the sense of the match.
# SCALAR CONTEXT: // return true/false if matching found
$myvar =~ /World/               #scalar context, returns true if $myvar contains World
/World/                         #scalar context, same as above except that now it is $_ that is matched
"Hello World" =~ /World/        #scalar contect, same as above, to show that left member doesn't need to be an L-Value

# Extracting matches
# The grouping metacharacters () also allow the extraction of the parts of a string that matched. For each grouping, the part that
# matched inside goes into the special variables $1 , $2 , etc. They can be used just as ordinary variables:

# extract hours, minutes, seconds
$time =~ /(\d\d):(\d\d):(\d\d)/;  # match hh:mm:ss format
$hours = $1;
$minutes = $2;
$seconds = $3;

# LIST CONTEXT, a match /regex/ with groupings will return the list of matched values ($1,$2,...) . So we could rewrite it as

($hours, $minutes, $second) = ($time =~ /(\d\d):(\d\d):(\d\d)/);

# If the groupings in a regex are nested, $1 gets the group with the leftmost opening parenthesis, $2 the next opening parenthesis,
# etc. For example, here is a complex regex and the matching variables indicated below it:

# /(ab(cd|ef)((gi)|j))/;
#  1  2      34

#Associated with the matching variables $1 , $2 , ... are the backreferences \1 , \2 , ... Backreferences are matching variables
# that can be used inside a regex:

    /(\w\w\w)\s\1/; # find sequences like 'the the' in string

# $1 , $2 , ... should only be used outside of a regex, and \1 , \2 , ... only inside a regex.

#-----------------------------------------------------------------------------------------------------------------------------------
# Regex Search & Replace

# Use s/regex/replacement/modifiers
# Use =~ to match a given variable, otherwise $_ is used.
# SCALAR CONTEXT: s/// returns the number of matches, or false if no match.

$x = "Time to feed the cat!";
$x =~ s/cat/hacker/;   # $x contains "Time to feed the hacker!"

# VARIABLES:
# $1,$2: matched variables are immediately available in the replacement string.

# MODIFIERS:
# - g: find all matches
# - e: wraps an eval{...} around the replacement string and the evaluated result is substituted for the matched substring.

# reverse all the words in a string
$x = "the cat in the hat";
$x =~ s/(\w+)/reverse $1/ge;    # $x contains "eht tac ni eht tah"

#-----------------------------------------------------------------------------------------------------------------------------------
# The split operator
# split /regex/, string splits string into a list of substrings and returns that list. The regex determines the character sequence
# that string is split with respect to. For example, to extract a comma-delimited list of numbers, use

$x = "1.618,2.718,   3.142";
@const = split /,\s*/, $x;      # $const[0] = '1.618'
                                # $const[1] = '2.718'
                                # $const[2] = '3.142'

# If the empty regex // is used, the string is split into individual characters. If the regex has groupings, then the list produced
# contains the matched substrings from the groupings as well:

$x = "/usr/bin";
@parts = split m!(/)!, $x;      # $parts[0] = ''    Since the first character of $x matched the regex, an initial element was prepended.
                                # $parts[1] = '/'
                                # $parts[2] = 'usr'
                                # $parts[3] = '/'
                                # $parts[4] = 'bin'

#-----------------------------------------------------------------------------------------------------------------------------------
# The lookahead and lookbehind assertions are generalizations of the anchor concept. Lookahead and lookbehind are zero-width
# assertions that let us specify which characters we want to test for. The lookahead assertion is denoted by (?=regexp) and the
# lookbehind assertion is denoted by (?<=fixed-regexp). Some examples are

    $x = "I catch the housecat 'Tom-cat' with catnip";
    $x =~ /cat(?=\s+)/;  # matches 'cat' in 'housecat'
    @catwords = ($x =~ /(?<=\s)cat\w+/g);  # matches,
                                           # $catwords[0] = 'catch'
                                           # $catwords[1] = 'catnip'
    $x =~ /\bcat\b/;  # matches 'cat' in 'Tom-cat'
    $x =~ /(?<=\s)cat(?=\s)/; # doesn't match; no isolated 'cat' in
                              # middle of $x

#-----------------------------------------------------------------------------------------------------------------------------------
# Grep
my @array = ("el1","gel2","el3","gel1","gel2");
my @array2 = grep {s/(.*el)/reverse $1/e} @array;

$,="\n";
print @array2;

#-----------------------------------------------------------------------------------------------------------------------------------
# How to discard stderr on windows
# note: on windows, we use \nul instead of nul because each folder has it's own nul handler, and we want to reduce the number of
#       used handle
my $STDERRNULL = "2>\\nul";             #use this on windows
my $STDERRNULL = "2>/dev/null";         #use this on unix

my $AT="@";

my @CTResults =	qx($CT lstype -local -fmt "%n \\\"%[type_scope]p\\\"\\n" lbtype:$labelName$AT$vobName $STDERRNULL);

#-----------------------------------------------------------------------------------------------------------------------------------
# here's a file-private function as a closure,
# callable as &$priv_func;  it cannot be prototyped.
my $priv_func = sub {
    # stuff goes here.
};

#-----------------------------------------------------------------------------------------------------------------------------------
# Capture command output: use back-ticks ``, qx(), or system("") with redirection.
my @ouput = `ls`;
my @ouput = qx(ls);
system("ls >output.txt");

#-----------------------------------------------------------------------------------------------------------------------------------
# Capture command exit status: use system("")
my $exit_status = system("del file.txt");

#-----------------------------------------------------------------------------------------------------------------------------------
#Handling of @ in scalar / list context.
#                                            RESULT      CONTEXT  EXPLANATION
my @a = ("titi","tutu");
my $varnoquote=@a; print "$varnoquote\n"; # "2"         (SCALAR - @_ is evaluated in scalar context)
my $varquote="@a"; print "$varquote\n";   # "titi tutu" (EXPAND - @_ is quote-expanded, each item being separated by space)
print @a; print"\n";                      # "tititutu"  (LIST   - $, is empty)
print(@a); print"\n";                     # "tititutu"  (LIST   - $, is empty)
printf @a; print"\n";                     # "titi"      (LIST   - 1st element in list is interpreted as the format string)
printf(@a); print"\n";                    # "titi"      (LIST   - 1st element in list is interpreted as the format string)
print @a,"\n";                            # "tititutu"  (LIST   - $, is empty)
printf "%s\n",@a;                         # "titi"      (LIST   - only 1st element is read)

#-----------------------------------------------------------------------------------------------------------------------------------
#Give default value if no parameter in sub
sub myfunc
{
	my($suffix) = @_ ? "@_" : "defaultvalue";
}

#-----------------------------------------------------------------------------------------------------------------------------------
#Report a complete error message when loading a script
#This hack allows for printing a custom error message + file not found-like error message (given by $!) + syntax error messages (@_)
do "your script.pl"
	or (print "Your error message\n$!\n" and die @_);

#-----------------------------------------------------------------------------------------------------------------------------------
#Example on how to embed perl into a w2K shell script.
#Notice how the first rem is actually a multiline assignment to perl array variable @rem, where the value is quoted with ' '.
@rem= 'PERL for Windows NT - ccperl must be in search path
@echo off
ccperl %0 %1 %2 %3 %4 %5 %6 %7 %8 %9
goto endofperl
@rem ';

# Your Perl code comes here

# End of Perl section

__END__
:endofperl

#-----------------------------------------------------------------------------------------------------------------------------------
#Example on use of qw (=''), join, etc to build complex line using variable and function calls (here vobName() is a function)
system "echo", $CT, qw(rmtype), join("",'trtype:MKELEM_POST_OWNER@',vobName($vobname));

#-----------------------------------------------------------------------------------------------------------------------------------
#Example on how to temporarily disable STDERR and restore it afterwards
open(SAVE_STDERR, '>&STDERR');
close(STDERR) unless $ENV{CLEARCASE_TRACE_TRIGGERS};
$exe = qx(file $ptmp) =~ /executable|bourne|commands text|\bscript/i;
open(STDERR, '>&SAVE_STDERR');
close(SAVE_STDERR);

#-----------------------------------------------------------------------------------------------------------------------------------
#REDIRECTION OF STDERR WITH system()
#
#It seems that STDERR can only be redirected if it occurs in the command, not in args!
system "echo hello world! 2>\\nul";             # OK
system qq(echo hello world! 2>\\nul);           # OK
system "echo", "hello world!"," 2>\\nul";       # OK
system "$CT hello world! 2>\\nul";              # OK
system qq(CT hello world! 2>\\nul);             # OK
system "$CT", "hello world!"," 2>\\nul";        # NOK (seems that external pgm can not be redirected like that)

#-----------------------------------------------------------------------------------------------------------------------------------
#Searching or modifying arrays: map / grep
@chars = map(chr, @nums);
@foo = grep(!/^#/, @bar);                       # weed out comments

#-----------------------------------------------------------------------------------------------------------------------------------
#Handling of various quoting
#
# qx ``
#  => $VAR is expanded
#  => "..." quote are conversed
#  => \ is processed (so use \\ for backslash in a windows path for instance)
qx($CTNDEBUG lstype \n -fmt "%n\\n" -kind brtype -invob $VOBAdminName);

#-----------------------------------------------------------------------------------------------------------------------------------
#Choping character: very useful to remove the trailing "\n", also on list!
chop( my $userinput=<STDIN> );                  #Chop the trailing "\n" in user input
chop( my @list=qx(ls);                          #Chop the trailing "\n" in the command output

#-----------------------------------------------------------------------------------------------------------------------------------
#CHOMPing character: safer version (in case for instance the last line doesn't have the \n character
chomp( my $userinput=<STDIN> );	                #Chomp the trailing "\n" in user input IF PRESENT
chomp( my @list=qx(ls);                         #Chomp the trailing "\n" in the command output IF PRESENT

my %VOBs = (
	"AdminMask"                  => { ProjectName => "\"\""          },
	"MaskADKSAM"                 => { ProjectName => "\"MaskADKSAM\""}
);

#-----------------------------------------------------------------------------------------------------------------------------
#passing filehandle as sub parameters and return values --> use reference
#
# First as return values:

sub openTimeOut($)
{
	my $filename = shift;
	my $timeout=15;
	while( !open(LOG,$filename) ) { sleep 1; --$timeout or die "Time out trying to open file $filename"; }
	return \*LOG;
}

sub printToFile($@)
{
	my $filename = shift;
	my $fh = openTimeOut(">$filename");
	print $fh @_;
	close($fh);
}

#
# BUT BEWARE, ACTUALLY OpenTimeOut returns a reference to the same FILEHANDLE in current glob !
# The code below illustrate this:
#
#
	my ($to,$from) = @_;
	$fhto = openTimeOut(\*TO,">>$to");
	$fhfrom = openTimeOut(\*FROM,"<$from");    # This returns same FILEHANDLE reference as $fhto
	while (<$fhfrom>) {print $fhto $_}         # Failed, because now $fhto = $fhfrom, which only open for output
	close($fhfrom);
	close($fhto);

#
# The solution, pass by parameters:
#
sub openTimeOut2(*;$)
{
	my $fh = shift;
	my $filename = shift;
	my $timeout=15;
	while( !open($fh,$filename) ) { sleep 1; --$timeout or die "Time out trying to open file $filename"; }
}

sub printToFile($@)
{
	my $filename = shift;
	openTimeOut2(\*LOG,">$filename");
	print LOG @_;
	close(LOG);
}

#-----------------------------------------------------------------------------------------------------------------------------
#uppercase / lowercase
#
my $lowercase = lc "My StRiNg";                 #mystring
my $uppercase = uc "My StRiNg";	                #MYSTRING
my $firstcharlowercase = lcfirst "My StRiNg";   #my StRiNg
my $firstcharuppercase = ucfirst "My StRiNg";   #My StRiNg

# Using command-line parameters
print scalar @ARGV; #number of parameters
print "1st param: $ARGV[0]";
print "executable name: $0";

# Using reference

$tab{'mpe'} = '...';

process(\$tab);

sub process ()
{
	my $tab = $_[0];
	$tab->{'mpe'} = '...';
}

#Using command-line options

use Getopt::long;

GetOptions ("d|debug+" => \$debug,
			"q|quiet" => \$quiet,
			...
			);

# Concat 2 strings
$stringC = $stringA . $stringB;
$stringC = "$stringA$stringB";
$stringC = join('', ($stringA, $stringB));

Using Anonymous Hash References

#!/usr/bin/perl

use strict;

my @myarray;

foreach my $iter ( 1..10 )
{
	my $value1 = "value1_".$iter;
	my $value2 = "value2_".$iter;
	my $value3 = "value3_".$iter;

   	print "Creating our \$hashref... ";
   	# Construct { key1 => value1, key2 => value2.... } creates a REFERENCE to an anonymous hash.
   	# Since reference are SCALAR, we assign it to a scalar variable
   	my $hashref = { index1 => $value1, index2 => $value2, index3 => $value3 };
	
	print "Done.\n",
	      "  \$hashref: ",$hashref,"\n";
	print "  content: ",$$hashref{'index1'},",",$$hashref{'index2'},",",$$hashref{'index2'},"\n";

	print "Adding \$hashref to our array... ";
	push( @myarray, $hashref );

	print "Done. There are currently ", scalar(@myarray), " elements in \@myarray.\n";
	print "Accessing last element of our array...";
	print " content: @myarray[$#myarray], ${@myarray[$#myarray]}{'index1'} our better yet @myarray[$#myarray]->{'index2'}\n";
}

print "\n\nNow we will traverse our array again...\n";
foreach ( @myarray )
{
	print 	"$_ containing ",
	 		"index1 => $$_{'index1'},",
			"index2 => $$_{'index2'},",
			"index3 => $$_{'index3'}\n";
	print 	"... or using -> operator: ",
	 		"index1 => $_->{'index1'},",
			"index2 => $_->{'index2'},",
			"index3 => $_->{'index3'}\n";
}

Using GetOpt / LWP

TODO: Ugly, sort it out...

#!/usr/bin/perl
#
use strict;
use LWP::UserAgent;
use LWP::Debug;
use HTTP::Cookies;
use Getopt::Long qw(:config no_ignore_case);
use HTML::TableExtract;

my $debug = 0;
my $quite = 0;
my $proxy = 0;
my $username;
my $password;

sub usage {
    print "Usage: $0 <blabla>\n";
    exit;
}

GetOptions ( "d|debug+"     => \$debug,
             "P|proxy"      => \$proxy,
             "u|user=s"     => \$username,
             "p|pass=s"     => \$password,
             "q|quite"      => \$quite ) || usage();
$debug = 0 if $quite;
my $url = $ARGV[0];

LWP::Debug::level('+') if $debug;
my $ua = LWP::UserAgent->new;

# Session cookie
my $jar = HTTP::Cookies->new ();
$ua->cookie_jar($jar);

# Enable proxy...
$ua->env_proxy if $proxy;


my %articles = get_articles ( $ua, $url );

exit;

# Get starting URL....
my $res = $ua->get($url);
unless ($res->is_success) {
    die "Die: " . $res->status_line, "\n";
}
exit;




print $res->content if $debug;

my $res = $ua->post( $url,
                     [
                      'j_username'  =>  $username,
                      'j_password'  =>  $password,
                      'Submit'      =>  'Entrer'
                     ]
    );
unless ($res->is_success) {
    die "Die: ".$res->status_line, "\n";
}

sub get_articles {
    my $ua = shift;
    my $url = shift;

    my $res = $ua->get ($url);
    unless ($res->is_success) {
        die "http-get failed: ".$res->status_line, "\n$url\n";
    }
    
    my $te = HTML::TableExtract->new ( slice_columns => 0,
                                       keep_html => 1,
                                       keep_headers => 1,
                                       subtables => 1,
                                       headers => [qw(Matter)] );

    $te->parse($res->content);
    open(my $fh, ">mpe.html") || die "Cannot create file: $!\n";
    print $fh $res->content;
    close($fh);

    foreach my $ts ( $te->tables ) {
#       print "Table (", join(',', $ts->coords), "):\n";
        foreach my $row ( $ts->rows ) {
            print "Row: " . join (';', @$row ). "\n";
            next unless $row->[0] =~ m/\/content\/(.*)\/fulltext/;
            print $1."\n";
        }
    }
    return 0;
}

Pitfalls

# Frequent Mistakes in Perl
die "can't run this";

#-----------------------------------------------------------------------------------------------------------------------------------
#Forget to chop the trailing "\n"
#-----------------------------------------------------------------------------------------------------------------------------------
my $path = qx(pwd);                     #NOK! trailing \n will corrupt path construction
chop( my $path = qx(pwd) );             #OK!

#-----------------------------------------------------------------------------------------------------------------------------------
#Mix case in name of package
#-----------------------------------------------------------------------------------------------------------------------------------
# Imagine a module file named Vobs.pm

use Vobs;
use VOBs;                               # NOK --> Will complain about double definition (but will not flag the mix case problem)

#-----------------------------------------------------------------------------------------------------------------------------------
# Beware of operator precedence and strange behaviour
chomp my @emptylist = qx("dir");        #NOK ! @emptylist will be empty
chomp ( my @list = qx("dir") );	        #OK !

CPAN - Perl Packages

First time launch:

$ cpan                                       # ... OR ...
$ perl -MCPAN -e shell                       #  --> yes auto config

To adapt config related to proxy:

cpan> o config init /proxy/                  # (to enter an empty string, simply enter 1 space char as a value)
cpan> o conf commit

To install a Perl package (eg. here package GetOpt::Long):

$ cpan
cpan>install GetOpt::Long

Editing the configruation:

cpan> o conf init                            # Reset the configuration
cpan> o conf http_proxy http://proxy:8080/   # Edit a given variable (eg. here changing proxy settings):
cpan> o conf commit                          # To commit changes in the configuration
cpan> o                                      # to get o options
cpan> o conf                                 # To get o conf option

To edit CPAN database url:

cpan> o conf /urllist/
cpan> o conf init /urllist/
cpan> o conf urllist shift
cpan> o conf urllist unshift ftp://my.new.site/
cpan> o conf commit

To update CPAN itself:

cpan> install Bundle::CPAN
cpan> reload cpan

@@ Line 1: / Line 1: @@
 == Reference ==
 * Perldoc
-<source lang="bash">
+<div style="padding-left:2em;"><source lang="perl">
 % perldoc -q duplicate
-"How can I remove ²duplicate elements from a list or array?"
+  "How can I remove ²duplicate elements from a list or array?"
+% perldoc -f split
-</source>
+  split /PATTERN/,EXPR,LIMIT
+  split /PATTERN/,EXPR
+  split /PATTERN/
+  ...
+</source></div>
 == Script Examples ==