Perl: Difference between revisions

From miki
Jump to navigation Jump to search
Line 188: Line 188:
</source>
</source>


== Pitfalls
== Pitfalls ==


<source lang="perl">
<source lang="perl">

Revision as of 22:08, 12 February 2009

Reference

  • Perldoc
% perldoc -q duplicate
"How can I remove ²duplicate elements from a list or array?"

Script Examples

Miscellaneous examples

# Using command-line parameters
print scalar @ARGV; #number of parameters
print "1st param: $ARGV[0]";
print "executable name: $0";

# Using reference

$tab{'mpe'} = '...';

process(\$tab);

sub process ()
{
	my $tab = $_[0];
	$tab->{'mpe'} = '...';
}

#Using command-line options

use Getopt::long;

GetOptions ("d|debug+" => \$debug,
			"q|quiet" => \$quiet,
			...
			);

# Concat 2 strings
$stringC = $stringA . $stringB;
$stringC = "$stringA$stringB";
$stringC = join('', ($stringA, $stringB));

Using Anonymous Hash References

#!/usr/bin/perl

use strict;

my @myarray;

foreach my $iter ( 1..10 )
{
	my $value1 = "value1_".$iter;
	my $value2 = "value2_".$iter;
	my $value3 = "value3_".$iter;

   	print "Creating our \$hashref... ";
   	# Construct { key1 => value1, key2 => value2.... } creates a REFERENCE to an anonymous hash.
   	# Since reference are SCALAR, we assign it to a scalar variable
   	my $hashref = { index1 => $value1, index2 => $value2, index3 => $value3 };
	
	print "Done.\n",
	      "  \$hashref: ",$hashref,"\n";
	print "  content: ",$$hashref{'index1'},",",$$hashref{'index2'},",",$$hashref{'index2'},"\n";

	print "Adding \$hashref to our array... ";
	push( @myarray, $hashref );

	print "Done. There are currently ", scalar(@myarray), " elements in \@myarray.\n";
	print "Accessing last element of our array...";
	print " content: @myarray[$#myarray], ${@myarray[$#myarray]}{'index1'} our better yet @myarray[$#myarray]->{'index2'}\n";
}

print "\n\nNow we will traverse our array again...\n";
foreach ( @myarray )
{
	print 	"$_ containing ",
	 		"index1 => $$_{'index1'},",
			"index2 => $$_{'index2'},",
			"index3 => $$_{'index3'}\n";
	print 	"... or using -> operator: ",
	 		"index1 => $_->{'index1'},",
			"index2 => $_->{'index2'},",
			"index3 => $_->{'index3'}\n";
}

Using GetOpt / LWP

TODO: Ugly, sort it out...

#!/usr/bin/perl
#
use strict;
use LWP::UserAgent;
use LWP::Debug;
use HTTP::Cookies;
use Getopt::Long qw(:config no_ignore_case);
use HTML::TableExtract;

my $debug = 0;
my $quite = 0;
my $proxy = 0;
my $username;
my $password;

sub usage {
    print "Usage: $0 <blabla>\n";
    exit;
}

GetOptions ( "d|debug+"     => \$debug,
             "P|proxy"      => \$proxy,
             "u|user=s"     => \$username,
             "p|pass=s"     => \$password,
             "q|quite"      => \$quite ) || usage();
$debug = 0 if $quite;
my $url = $ARGV[0];

LWP::Debug::level('+') if $debug;
my $ua = LWP::UserAgent->new;

# Session cookie
my $jar = HTTP::Cookies->new ();
$ua->cookie_jar($jar);

# Enable proxy...
$ua->env_proxy if $proxy;


my %articles = get_articles ( $ua, $url );

exit;

# Get starting URL....
my $res = $ua->get($url);
unless ($res->is_success) {
    die "Die: " . $res->status_line, "\n";
}
exit;




print $res->content if $debug;

my $res = $ua->post( $url,
                     [
                      'j_username'  =>  $username,
                      'j_password'  =>  $password,
                      'Submit'      =>  'Entrer'
                     ]
    );
unless ($res->is_success) {
    die "Die: ".$res->status_line, "\n";
}

sub get_articles {
    my $ua = shift;
    my $url = shift;

    my $res = $ua->get ($url);
    unless ($res->is_success) {
        die "http-get failed: ".$res->status_line, "\n$url\n";
    }
    
    my $te = HTML::TableExtract->new ( slice_columns => 0,
                                       keep_html => 1,
                                       keep_headers => 1,
                                       subtables => 1,
                                       headers => [qw(Matter)] );

    $te->parse($res->content);
    open(my $fh, ">mpe.html") || die "Cannot create file: $!\n";
    print $fh $res->content;
    close($fh);

    foreach my $ts ( $te->tables ) {
#       print "Table (", join(',', $ts->coords), "):\n";
        foreach my $row ( $ts->rows ) {
            print "Row: " . join (';', @$row ). "\n";
            next unless $row->[0] =~ m/\/content\/(.*)\/fulltext/;
            print $1."\n";
        }
    }
    return 0;
}

Pitfalls

# Frequent Mistakes in Perl
die "can't run this";

#-----------------------------------------------------------------------------------------------------------------------------------
#Forget to chop the trailing "\n"
#-----------------------------------------------------------------------------------------------------------------------------------
my $path = qx(pwd);                     #NOK! trailing \n will corrupt path construction
chop( my $path = qx(pwd) );             #OK!

#-----------------------------------------------------------------------------------------------------------------------------------
#Mix case in name of package
#-----------------------------------------------------------------------------------------------------------------------------------
# Imagine a module file named Vobs.pm

use Vobs;
use VOBs;                               # NOK --> Will complain about double definition (but will not flag the mix case problem)

#-----------------------------------------------------------------------------------------------------------------------------------
# Beware of operator precedence and strange behaviour
chomp my @emptylist = qx("dir");        #NOK ! @emptylist will be empty
chomp ( my @list = qx("dir") );	        #OK !

CPAN - Perl Packages

First time launch:

$ cpan                                       # ... OR ...
$ perl -MCPAN -e shell                       #  --> yes auto config

To adapt config related to proxy:

cpan> o config init /proxy/                  # (to enter an empty string, simply enter 1 space char as a value)
cpan> o conf commit

To install a Perl package (eg. here package GetOpt::Long):

$ cpan
cpan>install GetOpt::Long

Editing the configruation:

cpan> o conf init                            # Reset the configuration
cpan> o conf http_proxy http://proxy:8080/   # Edit a given variable (eg. here changing proxy settings):
cpan> o conf commit                          # To commit changes in the configuration
cpan> o                                      # to get o options
cpan> o conf                                 # To get o conf option

To edit CPAN database url:

cpan> o conf /urllist/
cpan> o conf init /urllist/
cpan> o conf urllist shift
cpan> o conf urllist unshift ftp://my.new.site/
cpan> o conf commit

To update CPAN itself:

cpan> install Bundle::CPAN
cpan> reload cpan