Perl
Jump to navigation
Jump to search
Reference
- Perldoc
% perldoc -q duplicate
"How can I remove duplicate elements from a list or array?"
% perldoc -f split
split /PATTERN/,EXPR,LIMIT
split /PATTERN/,EXPR
split /PATTERN/
...
Script Examples
Miscellaneous examples
# Various examples in Perl
die "can't run this";
#-----------------------------------------------------------------------------------------------------------------------------------
#Split a multi-line variable/output in line components - method 1.
my @ArrayList = `$CT lsvob -short`; #any command producing a multi-line output
foreach (@ArrayList)
{
chop(); #remove the trailing newline
print "Array List - The VOB is $_.\n";
};
#-----------------------------------------------------------------------------------------------------------------------------------
#Split a multi-line variable/output in line components - method 2.
my $ScalarList = `$CT lsvob -short`; #any command producing a multi-line output
my @ArrayList2 = split/\n/,$ScalarList; #split the scalar into several lines
foreach (@ArrayList2)
{
print "Scalar List - The VOB is $_.\n";
};
#-----------------------------------------------------------------------------------------------------------------------------------
# Use ${variable} to split scalar identifier from the rest of a text
my $variable;
print "$variable_temp\n"; # NOK! Print a variable named variable_temp
print "${variable}_temp\n"; # OK! Print a $variable, followed by "_temp"
#-----------------------------------------------------------------------------------------------------------------------------------
#Append an array (or single element) to another
push(@array1,@array_or_element);
#-----------------------------------------------------------------------------------------------------------------------------------
#Add use strict; at the beginning to improve compilation warnings.
use strict;
#-----------------------------------------------------------------------------------------------------------------------------------
#Arrays / Hashes
#
#The following actually adds six elements to array VOBRecords. It doesn't create an extra element contains the hash.
my %VOBAttrib = ( VOBName => $VOBName, IsProjectVOB => $IsProjectVOB, IsProjectAdminVOB => $IsProjectAdminVOB, ProjectName => $ProjectName );
push (@VOBRecords, %VOBAttrib);
#-----------------------------------------------------------------------------------------------------------------------------------
# Handling reference
my $VOBAttrRef = $VOBs{'AdminMask'}; #This return a reference to a Hash
my %VOBAttr = %$VOBAttrRef; #This dereference the reference above and return a Hash
print $VOBAttr{'ProjectName'},"\n"; #We can use our new Hash variable
print $$VOBAttrRef{'ProjectName'},"\n"; # ... or we can dereference our reference variable using the $$ construct
print $VOBAttrRef->{'ProjectName'},"\n"; # ... but -> can also be used to dereference
print $VOBs{'AdminMask'}->{'ProjectName'},"\n"; #We can also skip altoghether the reference variable
print $VOBs{'AdminMask'}{'ProjectName'},"\n"; # ... This notation is also available as a shortcut, -> can be omitted
#-----------------------------------------------------------------------------------------------------------------------------------
#Autovivification - example on how to create a hash of array
#
my %Projects;
foreach my $VOBName (keys %VOBs)
{
my $ProjectName = $VOBs{$VOBName}{'ProjectName'};
push(@{$Projects{$ProjectName}}, $VOBName); # <-- we dereference value returned by $Projects{$ProjectName} as
# an array, hence creating automatically an array if undef
}
#-----------------------------------------------------------------------------------------------------------------------------------
#Read something from standard input
$line = <STDIN>;
$line = readline(*STDIN); # same thing
chop($line = <STDIN>); # remove trailing newline
#-----------------------------------------------------------------------------------------------------------------------------------
#Read one character}
#print "Press RETURN...";
#$key = getc();
#-----------------------------------------------------------------------------------------------------------------------------------
# Regex Matching
# Use /regex/, or m!regex! (where ! can be any quoting character)
# Use =~ to match a given variable, otherwise $_ is used. Use !~ to reverse the sense of the match.
# SCALAR CONTEXT: // return true/false if matching found
$myvar =~ /World/ #scalar context, returns true if $myvar contains World
/World/ #scalar context, same as above except that now it is $_ that is matched
"Hello World" =~ /World/ #scalar contect, same as above, to show that left member doesn't need to be an L-Value
# Extracting matches
# The grouping metacharacters () also allow the extraction of the parts of a string that matched. For each grouping, the part that
# matched inside goes into the special variables $1 , $2 , etc. They can be used just as ordinary variables:
# extract hours, minutes, seconds
$time =~ /(\d\d):(\d\d):(\d\d)/; # match hh:mm:ss format
$hours = $1;
$minutes = $2;
$seconds = $3;
# LIST CONTEXT, a match /regex/ with groupings will return the list of matched values ($1,$2,...) . So we could rewrite it as
($hours, $minutes, $second) = ($time =~ /(\d\d):(\d\d):(\d\d)/);
# If the groupings in a regex are nested, $1 gets the group with the leftmost opening parenthesis, $2 the next opening parenthesis,
# etc. For example, here is a complex regex and the matching variables indicated below it:
# /(ab(cd|ef)((gi)|j))/;
# 1 2 34
#Associated with the matching variables $1 , $2 , ... are the backreferences \1 , \2 , ... Backreferences are matching variables
# that can be used inside a regex:
/(\w\w\w)\s\1/; # find sequences like 'the the' in string
# $1 , $2 , ... should only be used outside of a regex, and \1 , \2 , ... only inside a regex.
#-----------------------------------------------------------------------------------------------------------------------------------
# Regex Search & Replace
# Use s/regex/replacement/modifiers
# Use =~ to match a given variable, otherwise $_ is used.
# SCALAR CONTEXT: s/// returns the number of matches, or false if no match.
$x = "Time to feed the cat!";
$x =~ s/cat/hacker/; # $x contains "Time to feed the hacker!"
# VARIABLES:
# $1,$2: matched variables are immediately available in the replacement string.
# MODIFIERS:
# - g: find all matches
# - e: wraps an eval{...} around the replacement string and the evaluated result is substituted for the matched substring.
# reverse all the words in a string
$x = "the cat in the hat";
$x =~ s/(\w+)/reverse $1/ge; # $x contains "eht tac ni eht tah"
#-----------------------------------------------------------------------------------------------------------------------------------
# The split operator
# split /regex/, string splits string into a list of substrings and returns that list. The regex determines the character sequence
# that string is split with respect to. For example, to extract a comma-delimited list of numbers, use
$x = "1.618,2.718, 3.142";
@const = split /,\s*/, $x; # $const[0] = '1.618'
# $const[1] = '2.718'
# $const[2] = '3.142'
# If the empty regex // is used, the string is split into individual characters. If the regex has groupings, then the list produced
# contains the matched substrings from the groupings as well:
$x = "/usr/bin";
@parts = split m!(/)!, $x; # $parts[0] = '' Since the first character of $x matched the regex, an initial element was prepended.
# $parts[1] = '/'
# $parts[2] = 'usr'
# $parts[3] = '/'
# $parts[4] = 'bin'
#-----------------------------------------------------------------------------------------------------------------------------------
# The lookahead and lookbehind assertions are generalizations of the anchor concept. Lookahead and lookbehind are zero-width
# assertions that let us specify which characters we want to test for. The lookahead assertion is denoted by (?=regexp) and the
# lookbehind assertion is denoted by (?<=fixed-regexp). Some examples are
$x = "I catch the housecat 'Tom-cat' with catnip";
$x =~ /cat(?=\s+)/; # matches 'cat' in 'housecat'
@catwords = ($x =~ /(?<=\s)cat\w+/g); # matches,
# $catwords[0] = 'catch'
# $catwords[1] = 'catnip'
$x =~ /\bcat\b/; # matches 'cat' in 'Tom-cat'
$x =~ /(?<=\s)cat(?=\s)/; # doesn't match; no isolated 'cat' in
# middle of $x
#-----------------------------------------------------------------------------------------------------------------------------------
# Grep
my @array = ("el1","gel2","el3","gel1","gel2");
my @array2 = grep {s/(.*el)/reverse $1/e} @array;
$,="\n";
print @array2;
#-----------------------------------------------------------------------------------------------------------------------------------
# How to discard stderr on windows
# note: on windows, we use \nul instead of nul because each folder has it's own nul handler, and we want to reduce the number of
# used handle
my $STDERRNULL = "2>\\nul"; #use this on windows
my $STDERRNULL = "2>/dev/null"; #use this on unix
my $AT="@";
my @CTResults = qx($CT lstype -local -fmt "%n \\\"%[type_scope]p\\\"\\n" lbtype:$labelName$AT$vobName $STDERRNULL);
#-----------------------------------------------------------------------------------------------------------------------------------
# here's a file-private function as a closure,
# callable as &$priv_func; it cannot be prototyped.
my $priv_func = sub {
# stuff goes here.
};
#-----------------------------------------------------------------------------------------------------------------------------------
# Capture command output: use back-ticks ``, qx(), or system("") with redirection.
my @ouput = `ls`;
my @ouput = qx(ls);
system("ls >output.txt");
#-----------------------------------------------------------------------------------------------------------------------------------
# Capture command exit status: use system("")
my $exit_status = system("del file.txt");
#-----------------------------------------------------------------------------------------------------------------------------------
#Handling of @ in scalar / list context.
# RESULT CONTEXT EXPLANATION
my @a = ("titi","tutu");
my $varnoquote=@a; print "$varnoquote\n"; # "2" (SCALAR - @_ is evaluated in scalar context)
my $varquote="@a"; print "$varquote\n"; # "titi tutu" (EXPAND - @_ is quote-expanded, each item being separated by space)
print @a; print"\n"; # "tititutu" (LIST - $, is empty)
print(@a); print"\n"; # "tititutu" (LIST - $, is empty)
printf @a; print"\n"; # "titi" (LIST - 1st element in list is interpreted as the format string)
printf(@a); print"\n"; # "titi" (LIST - 1st element in list is interpreted as the format string)
print @a,"\n"; # "tititutu" (LIST - $, is empty)
printf "%s\n",@a; # "titi" (LIST - only 1st element is read)
#-----------------------------------------------------------------------------------------------------------------------------------
#Give default value if no parameter in sub
sub myfunc
{
my($suffix) = @_ ? "@_" : "defaultvalue";
}
#-----------------------------------------------------------------------------------------------------------------------------------
#Report a complete error message when loading a script
#This hack allows for printing a custom error message + file not found-like error message (given by $!) + syntax error messages (@_)
do "your script.pl"
or (print "Your error message\n$!\n" and die @_);
#-----------------------------------------------------------------------------------------------------------------------------------
#Example on how to embed perl into a w2K shell script.
#Notice how the first rem is actually a multiline assignment to perl array variable @rem, where the value is quoted with ' '.
@rem= 'PERL for Windows NT - ccperl must be in search path
@echo off
ccperl %0 %1 %2 %3 %4 %5 %6 %7 %8 %9
goto endofperl
@rem ';
# Your Perl code comes here
# End of Perl section
__END__
:endofperl
#-----------------------------------------------------------------------------------------------------------------------------------
#Example on use of qw (=''), join, etc to build complex line using variable and function calls (here vobName() is a function)
system "echo", $CT, qw(rmtype), join("",'trtype:MKELEM_POST_OWNER@',vobName($vobname));
#-----------------------------------------------------------------------------------------------------------------------------------
#Example on how to temporarily disable STDERR and restore it afterwards
open(SAVE_STDERR, '>&STDERR');
close(STDERR) unless $ENV{CLEARCASE_TRACE_TRIGGERS};
$exe = qx(file $ptmp) =~ /executable|bourne|commands text|\bscript/i;
open(STDERR, '>&SAVE_STDERR');
close(SAVE_STDERR);
#-----------------------------------------------------------------------------------------------------------------------------------
#REDIRECTION OF STDERR WITH system()
#
#It seems that STDERR can only be redirected if it occurs in the command, not in args!
system "echo hello world! 2>\\nul"; # OK
system qq(echo hello world! 2>\\nul); # OK
system "echo", "hello world!"," 2>\\nul"; # OK
system "$CT hello world! 2>\\nul"; # OK
system qq(CT hello world! 2>\\nul); # OK
system "$CT", "hello world!"," 2>\\nul"; # NOK (seems that external pgm can not be redirected like that)
#-----------------------------------------------------------------------------------------------------------------------------------
#Searching or modifying arrays: map / grep
@chars = map(chr, @nums);
@foo = grep(!/^#/, @bar); # weed out comments
#-----------------------------------------------------------------------------------------------------------------------------------
#Handling of various quoting
#
# qx ``
# => $VAR is expanded
# => "..." quote are conversed
# => \ is processed (so use \\ for backslash in a windows path for instance)
qx($CTNDEBUG lstype \n -fmt "%n\\n" -kind brtype -invob $VOBAdminName);
#-----------------------------------------------------------------------------------------------------------------------------------
#Choping character: very useful to remove the trailing "\n", also on list!
chop( my $userinput=<STDIN> ); #Chop the trailing "\n" in user input
chop( my @list=qx(ls); #Chop the trailing "\n" in the command output
#-----------------------------------------------------------------------------------------------------------------------------------
#CHOMPing character: safer version (in case for instance the last line doesn't have the \n character
chomp( my $userinput=<STDIN> ); #Chomp the trailing "\n" in user input IF PRESENT
chomp( my @list=qx(ls); #Chomp the trailing "\n" in the command output IF PRESENT
my %VOBs = (
"AdminMask" => { ProjectName => "\"\"" },
"MaskADKSAM" => { ProjectName => "\"MaskADKSAM\""}
);
#-----------------------------------------------------------------------------------------------------------------------------
#passing filehandle as sub parameters and return values --> use reference
#
# First as return values:
sub openTimeOut($)
{
my $filename = shift;
my $timeout=15;
while( !open(LOG,$filename) ) { sleep 1; --$timeout or die "Time out trying to open file $filename"; }
return \*LOG;
}
sub printToFile($@)
{
my $filename = shift;
my $fh = openTimeOut(">$filename");
print $fh @_;
close($fh);
}
#
# BUT BEWARE, ACTUALLY OpenTimeOut returns a reference to the same FILEHANDLE in current glob !
# The code below illustrate this:
#
#
my ($to,$from) = @_;
$fhto = openTimeOut(\*TO,">>$to");
$fhfrom = openTimeOut(\*FROM,"<$from"); # This returns same FILEHANDLE reference as $fhto
while (<$fhfrom>) {print $fhto $_} # Failed, because now $fhto = $fhfrom, which only open for output
close($fhfrom);
close($fhto);
#
# The solution, pass by parameters:
#
sub openTimeOut2(*;$)
{
my $fh = shift;
my $filename = shift;
my $timeout=15;
while( !open($fh,$filename) ) { sleep 1; --$timeout or die "Time out trying to open file $filename"; }
}
sub printToFile($@)
{
my $filename = shift;
openTimeOut2(\*LOG,">$filename");
print LOG @_;
close(LOG);
}
#-----------------------------------------------------------------------------------------------------------------------------
#uppercase / lowercase
#
my $lowercase = lc "My StRiNg"; #mystring
my $uppercase = uc "My StRiNg"; #MYSTRING
my $firstcharlowercase = lcfirst "My StRiNg"; #my StRiNg
my $firstcharuppercase = ucfirst "My StRiNg"; #My StRiNg
# Using command-line parameters
print scalar @ARGV; #number of parameters
print "1st param: $ARGV[0]";
print "executable name: $0";
# Using reference
$tab{'mpe'} = '...';
process(\$tab);
sub process ()
{
my $tab = $_[0];
$tab->{'mpe'} = '...';
}
#Using command-line options
use Getopt::long;
GetOptions ("d|debug+" => \$debug,
"q|quiet" => \$quiet,
...
);
# Concat 2 strings
$stringC = $stringA . $stringB;
$stringC = "$stringA$stringB";
$stringC = join('', ($stringA, $stringB));
Using Anonymous Hash References
#!/usr/bin/perl
use strict;
my @myarray;
foreach my $iter ( 1..10 )
{
my $value1 = "value1_".$iter;
my $value2 = "value2_".$iter;
my $value3 = "value3_".$iter;
print "Creating our \$hashref... ";
# Construct { key1 => value1, key2 => value2.... } creates a REFERENCE to an anonymous hash.
# Since reference are SCALAR, we assign it to a scalar variable
my $hashref = { index1 => $value1, index2 => $value2, index3 => $value3 };
print "Done.\n",
" \$hashref: ",$hashref,"\n";
print " content: ",$$hashref{'index1'},",",$$hashref{'index2'},",",$$hashref{'index2'},"\n";
print "Adding \$hashref to our array... ";
push( @myarray, $hashref );
print "Done. There are currently ", scalar(@myarray), " elements in \@myarray.\n";
print "Accessing last element of our array...";
print " content: @myarray[$#myarray], ${@myarray[$#myarray]}{'index1'} our better yet @myarray[$#myarray]->{'index2'}\n";
}
print "\n\nNow we will traverse our array again...\n";
foreach ( @myarray )
{
print "$_ containing ",
"index1 => $$_{'index1'},",
"index2 => $$_{'index2'},",
"index3 => $$_{'index3'}\n";
print "... or using -> operator: ",
"index1 => $_->{'index1'},",
"index2 => $_->{'index2'},",
"index3 => $_->{'index3'}\n";
}
Using GetOpt / LWP
TODO: Ugly, sort it out...
#!/usr/bin/perl
#
use strict;
use LWP::UserAgent;
use LWP::Debug;
use HTTP::Cookies;
use Getopt::Long qw(:config no_ignore_case);
use HTML::TableExtract;
my $debug = 0;
my $quite = 0;
my $proxy = 0;
my $username;
my $password;
sub usage {
print "Usage: $0 <blabla>\n";
exit;
}
GetOptions ( "d|debug+" => \$debug,
"P|proxy" => \$proxy,
"u|user=s" => \$username,
"p|pass=s" => \$password,
"q|quite" => \$quite ) || usage();
$debug = 0 if $quite;
my $url = $ARGV[0];
LWP::Debug::level('+') if $debug;
my $ua = LWP::UserAgent->new;
# Session cookie
my $jar = HTTP::Cookies->new ();
$ua->cookie_jar($jar);
# Enable proxy...
$ua->env_proxy if $proxy;
my %articles = get_articles ( $ua, $url );
exit;
# Get starting URL....
my $res = $ua->get($url);
unless ($res->is_success) {
die "Die: " . $res->status_line, "\n";
}
exit;
print $res->content if $debug;
my $res = $ua->post( $url,
[
'j_username' => $username,
'j_password' => $password,
'Submit' => 'Entrer'
]
);
unless ($res->is_success) {
die "Die: ".$res->status_line, "\n";
}
sub get_articles {
my $ua = shift;
my $url = shift;
my $res = $ua->get ($url);
unless ($res->is_success) {
die "http-get failed: ".$res->status_line, "\n$url\n";
}
my $te = HTML::TableExtract->new ( slice_columns => 0,
keep_html => 1,
keep_headers => 1,
subtables => 1,
headers => [qw(Matter)] );
$te->parse($res->content);
open(my $fh, ">mpe.html") || die "Cannot create file: $!\n";
print $fh $res->content;
close($fh);
foreach my $ts ( $te->tables ) {
# print "Table (", join(',', $ts->coords), "):\n";
foreach my $row ( $ts->rows ) {
print "Row: " . join (';', @$row ). "\n";
next unless $row->[0] =~ m/\/content\/(.*)\/fulltext/;
print $1."\n";
}
}
return 0;
}
Pitfalls
# Frequent Mistakes in Perl
die "can't run this";
#-----------------------------------------------------------------------------------------------------------------------------------
#Forget to chop the trailing "\n"
#-----------------------------------------------------------------------------------------------------------------------------------
my $path = qx(pwd); #NOK! trailing \n will corrupt path construction
chop( my $path = qx(pwd) ); #OK!
#-----------------------------------------------------------------------------------------------------------------------------------
#Mix case in name of package
#-----------------------------------------------------------------------------------------------------------------------------------
# Imagine a module file named Vobs.pm
use Vobs;
use VOBs; # NOK --> Will complain about double definition (but will not flag the mix case problem)
#-----------------------------------------------------------------------------------------------------------------------------------
# Beware of operator precedence and strange behaviour
chomp my @emptylist = qx("dir"); #NOK ! @emptylist will be empty
chomp ( my @list = qx("dir") ); #OK !
CPAN - Perl Packages
First time launch:
$ cpan # ... OR ...
$ perl -MCPAN -e shell # --> yes auto config
To adapt config related to proxy:
cpan> o config init /proxy/ # (to enter an empty string, simply enter 1 space char as a value)
cpan> o conf commit
To install a Perl package (eg. here package GetOpt::Long):
$ cpan
cpan>install GetOpt::Long
Editing the configruation:
cpan> o conf init # Reset the configuration
cpan> o conf http_proxy http://proxy:8080/ # Edit a given variable (eg. here changing proxy settings):
cpan> o conf commit # To commit changes in the configuration
cpan> o # to get o options
cpan> o conf # To get o conf option
To edit CPAN database url:
cpan> o conf /urllist/
cpan> o conf init /urllist/
cpan> o conf urllist shift
cpan> o conf urllist unshift ftp://my.new.site/
cpan> o conf commit
To update CPAN itself:
cpan> install Bundle::CPAN
cpan> reload cpan