URI::URL version 2.7
Gisle Aas (aas@oslonett.no)
Thu, 27 Apr 1995 19:31:47 +0200
Today I have used some time on the URI::URL module. The result is something
that I have called URI::URL version 2.7:
Fixed some bugs:
- URI::URL::strict was not implemented
- $url->print_on did not work because of 'strict refs'
- Self test did not work if $Debug was on
- Initializing an URL with '0' did not work
- Made URI::URL::implementor into a function
Clean ups:
- Documentation looks better after pod2man
- Changed all $obj->{'elem'} into $obj->{elem}
- Removed $rcsid
- One function (new) used $this-> instead of $self->
Diff listing is included below. You might also want to obtain the new version
directly from
http://www.nr.no/home/aas/URL-2.7.pm
Martijn, can you update the release at Nexor?
Can anybody tell me if there is any need for `$escapes{$hex}'? I just
commented it out, as it did not seem to be used.
--
Regards,
Gisle Aas
*** URL-2.6.pm Thu Apr 27 14:57:02 1995
--- URL.pm Thu Apr 27 19:05:32 1995
***************
*** 1,12 ****
#!/usr/local/bin/perl -w
! package URI::URL;
!
! require 5.001; # file scoped my's require perl 5.001
!
! $rcsid = '$Id: URL.pm,v 2.6 1995/04/26 23:04:13 timbo Exp $';
! $rcsid = $rcsid; # shut up -w
! $Version = '$Revision: 2.6 $'; $Version =~ s/.*(\d+\.\d+).*/$1/;
#####################################################################
--- 1,7 ----
#!/usr/local/bin/perl -w
! package URI::URL; # $Id: URL.pm,v 2.7 1995/04/27 17:05:32 aas Exp $'
! require 5.001;
#####################################################################
***************
*** 16,94 ****
=head1 SYNOPSIS
! use URI::URL;
!
! # Constructors
!
! $url1 = new URI::URL 'http://www.com/%7Euser/gisle.gif';
!
! $url2 = new URI::URL 'gisle.gif', 'http://www.com/%7Euser';
!
! $url3 = $url2->abs; # get absolute url using base
!
! $url4 = $url2->abs('http:/other/path');
!
! $url5 = newlocal URI::URL; # pwd
! $url6 = newlocal URI::URL '/tmp'; # dir
! $url7 = newlocal URI::URL '/etc/motd'; # file
!
! $url8 = $url->clone;
!
! # Stringify URL
!
! $str1 = $url->as_string; # complete escaped URL string
! $str2 = $url->full_path; # escaped path+query+params+frag
!
! # Retrieving Generic-RL components:
!
! $scheme = $url->scheme;
! $netloc = $url->netloc; # see user,password,host,port below
! $path = $url->path;
! $params = $url->params;
! $query = $url->query;
! $frag = $url->frag;
!
! # Retrieving Network location (netloc) components:
!
! $user = $url->user;
! $password = $url->password;
! $host = $url->host;
! $port = $url->port; # returns default if not defined
!
! # Retrieving other attributes:
!
! $base = $url->base;
!
! # Setting fields:
! # All methods above can set field values for example:
! $url->scheme('http');
! $url->host('www.w3.org');
! $url->path('/welcome.html');
! $url->query('protocol info');
! $url->base($url5); # use string or object
!
! # Specify unsafe characters to be escaped for this url
! $url->unsafe('\x00-\x20"\$#%;<>?\x7E-\xFF');
!
! # General method to get/set field values:
!
! $value = $url->elem($name [, $new_value ]);
!
! $defport= $url->default_port; # default port for scheme
!
!
! # Escaping functions (See 'HOW AND WHEN TO ESCAPE' below)
! $escaped = uri_escape($component);
! $component = uri_unescape($escaped);
!
!
! # Other functions and methods
!
! URI::URL->strict(0); # disable strict schemes
! URI::URL->implementor; # get generic implementor
! URI::URL->implementor($scheme); # get scheme implementor
! URI::URL->implementor($scheme, $class); # set scheme implementor
=head1 DESCRIPTION
--- 11,81 ----
=head1 SYNOPSIS
! use URI::URL;
! # Constructors
! $url1 = new URI::URL 'http://www.perl.com/%7Euser/gisle.gif';
! $url2 = new URI::URL 'gisle.gif', 'http://www.com/%7Euser';
! $url3 = $url2->abs; # get absolute url using base
! $url4 = $url2->abs('http:/other/path');
!
! $url5 = newlocal URI::URL; # pwd
! $url6 = newlocal URI::URL '/tmp'; # dir
! $url7 = newlocal URI::URL '/etc/motd'; # file
!
! $url8 = $url1; # copy references
! $url = $url8->clone; # copy objects
!
! # Stringify URL
! $str1 = $url->as_string; # complete escaped URL string
! $str2 = $url->full_path; # escaped path+query+params+frag
! $str3 = "$url"; # use operator overloading (experimental)
!
! # Retrieving Generic-RL components:
! $scheme = $url->scheme;
! $netloc = $url->netloc; # see user,password,host,port below
! $path = $url->path;
! $params = $url->params;
! $query = $url->query;
! $frag = $url->frag;
!
! # Retrieving Network location (netloc) components:
! $user = $url->user;
! $password = $url->password;
! $host = $url->host;
! $port = $url->port; # returns default if not defined
!
! # Retrieving other attributes:
! $base = $url->base;
!
! # Setting fields:
! # All methods above can set field values for example:
! $url->scheme('http');
! $url->host('www.w3.org');
! $url->port($url->default_port);
! $url->path('/welcome.html');
! $url->query('protocol info');
! $url->base($url5); # use string or object
!
! # Specify unsafe characters to be escaped for this url
! $url->unsafe('\x00-\x20"\$#%;<>?\x7E-\xFF');
!
! # General method to get/set field values:
! $value = $url->elem($name [, $new_value ]);
!
! # Port numbers
! $defport= $url->default_port; # default port for scheme
!
!
! # Escaping functions (See 'HOW AND WHEN TO ESCAPE' below)
! $escaped = uri_escape($component);
! $component = uri_unescape($escaped);
!
! # Other functions
! URI::URL::strict(0); # disable strict schemes
! URI::URL::implementor; # get generic implementor
! URI::URL::implementor($scheme); # get scheme implementor
! URI::URL::implementor($scheme, $class); # set scheme implementor
=head1 DESCRIPTION
***************
*** 118,168 ****
The C<new> method must be able to determine the scheme for the URL.
If a scheme is not specified in the URL it will use the scheme
specified by the base URL. If no base URL scheme is defined then the
! C<new> will croak unless URI::URL->strict(0) has been invoked, in
which case 'http' is silently assumed.
Once the scheme has been determined C<new> then uses the C<implementor>
! method to determine which class implements that scheme.
If no implementor class is defined for the scheme then C<new> will
! croak unless URI::URL->strict(0) has been invoked, in which case the
internal generic class is assumed.
Internally defined schemes are implemented by C<URI::URL::scheme_name>.
! The URI::URL->implementor method can also be used to set the class
used to implement a scheme.
=head1 HOW AND WHEN TO ESCAPE
! An edited extract from a URI specification:
- The printability requirement has been met by specifing a safe
- set of characters, and a general escaping scheme for encoding
- "unsafe" characters. This "safe" set is suitable, for example,
- for use in electronic mail. This is the canonical form of a URI.
-
- There is a conflict between the need to be able to represent many
- characters including spaces within a URI directly, and the need
- to be able to use a URI in environments which have limited
- character sets or in which certain characters are prone to
- corruption. This conflict has been resolved by use of an
- hexadecimal escaping method which may be applied to any
- characters forbidden in a given context. When URLs are moved
- between contexts, the set of characters escaped may be enlarged
- or reduced unambiguously. The canonical form for URIs has all
- white spaces encoded.
-
- Notes:
-
- A URL string *must*, by definition, consist of escaped
- components. Complete URL's are always escaped.
-
- The components of a URL string must be *individually* escaped.
- Each component of a URL may have a separate requirements
- regarding what must be escaped, and those requirements are also
- dependent on the URL scheme.
! Never escape an already escaped component string.
This implementation expects an escaped URL string to be passed to
C<new> and will return an escaped URL string from C<as_string>.
--- 105,159 ----
The C<new> method must be able to determine the scheme for the URL.
If a scheme is not specified in the URL it will use the scheme
specified by the base URL. If no base URL scheme is defined then the
! C<new> will croak unless URI::URL::strict(0) has been invoked, in
which case 'http' is silently assumed.
Once the scheme has been determined C<new> then uses the C<implementor>
! function to determine which class implements that scheme.
If no implementor class is defined for the scheme then C<new> will
! croak unless URI::URL::strict(0) has been invoked, in which case the
internal generic class is assumed.
Internally defined schemes are implemented by C<URI::URL::scheme_name>.
! The URI::URL::implementor function can also be used to set the class
used to implement a scheme.
=head1 HOW AND WHEN TO ESCAPE
! =over 3
!
! =item An edited extract from a URI specification:
!
! The printability requirement has been met by specifing a safe set of
! characters, and a general escaping scheme for encoding "unsafe"
! characters. This "safe" set is suitable, for example, for use in
! electronic mail. This is the canonical form of a URI.
!
! There is a conflict between the need to be able to represent many
! characters including spaces within a URI directly, and the need to be
! able to use a URI in environments which have limited character sets or
! in which certain characters are prone to corruption. This conflict has
! been resolved by use of an hexadecimal escaping method which may be
! applied to any characters forbidden in a given context. When URLs are
! moved between contexts, the set of characters escaped may be enlarged
! or reduced unambiguously. The canonical form for URIs has all white
! spaces encoded.
! =item Notes:
!
! A URL string I<must>, by definition, consist of escaped
! components. Complete URL's are always escaped.
!
! The components of a URL string must be I<individually> escaped. Each
! component of a URL may have a separate requirements regarding what
! must be escaped, and those requirements are also dependent on the URL
! scheme.
!
! Never escape an already escaped component string.
!
! =back
This implementation expects an escaped URL string to be passed to
C<new> and will return an escaped URL string from C<as_string>.
***************
*** 182,193 ****
can be added to your own code. To create a new scheme class use code
like:
! package MYURL::foo;
! @ISA = (URI::URL->implementor); # inherit from generic scheme
! The 'URI::URL->implementor' method call with no parameters returns
the name of the class which implements the generic URL scheme
! behaviour (typically URI::URL::_generic). All schemes should be
derived from this class.
Your class can then define overriding methods (e.g., C<new()>,
--- 173,184 ----
can be added to your own code. To create a new scheme class use code
like:
! package MYURL::foo;
! @ISA = (URI::URL::implementor); # inherit from generic scheme
! The 'URI::URL::implementor' function call with no parameters returns
the name of the class which implements the generic URL scheme
! behaviour (typically C<URI::URL::_generic>). All schemes should be
derived from this class.
Your class can then define overriding methods (e.g., C<new()>,
***************
*** 196,205 ****
To register your new class as the implementor for a specific scheme
use code like:
! URI::URL->implementor('foo', 'MYURL::foo');
Any new URL created for scheme 'foo' will be implemented by your
! 'MYURL::foo' class. Existing URLs will not be affected.
=head1 WHAT A URL IS NOT
--- 187,196 ----
To register your new class as the implementor for a specific scheme
use code like:
! URI::URL::implementor('foo', 'MYURL::foo');
Any new URL created for scheme 'foo' will be implemented by your
! C<MYURL::foo> class. Existing URLs will not be affected.
=head1 WHAT A URL IS NOT
***************
*** 257,264 ****
The latest version of this module is likely to be available from:
! http://www.ics.uci.edu/WebSoft/libwww-perl/contrib/
! http://web.nexor.co.uk/public/perl/perl.html
=head1 INSTALLING
--- 248,255 ----
The latest version of this module is likely to be available from:
! http://www.ics.uci.edu/WebSoft/libwww-perl/contrib/
! http://web.nexor.co.uk/public/perl/perl.html
=head1 INSTALLING
***************
*** 278,284 ****
Note that running the module standalone will execute a substantial
self test.
! =head1 Methods and Functions
Below you'll find some descriptions of methods and functions.
--- 269,275 ----
Note that running the module standalone will execute a substantial
self test.
! =head1 METHODS AND FUNCTIONS
Below you'll find some descriptions of methods and functions.
***************
*** 296,301 ****
--- 287,296 ----
@ISA = qw(Exporter);
@EXPORT_OK = qw(uri_escape uri_unescape);
+ # Make the version number available
+ ($Version) = '$Revision: 2.7 $' =~ /(\d+\.\d+)/;
+ $Version += 0; # shut up -w
+
# Define default unsafe characters.
# Note that you cannot reliably change this at runtime
# because the substitutions which use it use the /o flag.
***************
*** 326,335 ****
my $Implementors = ();
# Build a hex<->char map (HexHex->Char and Char->HexHex)
map {
my($hex, $chr) = (sprintf("%%%02X", $_), chr($_));
! $escapes{$hex} = $chr; $escapes{lc($hex)} = $chr;
! $escapes{$chr} = $hex;
} 0..255;
use strict qw(subs refs);
--- 321,332 ----
my $Implementors = ();
# Build a hex<->char map (HexHex->Char and Char->HexHex)
+ my %escapes;
map {
my($hex, $chr) = (sprintf("%%%02X", $_), chr($_));
! # $escapes{ $hex } = $chr; # not used
! # $escapes{lc($hex)} = $chr;
! $escapes{$chr} = $hex;
} 0..255;
use strict qw(subs refs);
***************
*** 354,366 ****
=head2 new
! $url = new URI::URL $escaped_string [, $optional_base_url]
! Object constructor.
! To trap bad/unknown url schemes use:
! $obj = eval { new URI::URL ... };
=cut
--- 351,365 ----
=head2 new
! $url = new URI::URL $escaped_string [, $optional_base_url]
! This is the object constructor. To trap bad og unknown URL schemes
! use:
! $obj = eval { new URI::URL ... };
! or set C<URI::URL::strict(0)> if you don't care about bad or unknown
! schemes.
=cut
***************
*** 368,377 ****
{
my($class, $init, $base) = @_;
! my $this;
if (ref $init) {
! $this = $init->clone;
! $this->base($base) if $base;
} else {
$init =~ s/^\s+//; # remove leading space
$init =~ s/\s.*//; # remove anything after first word
--- 367,376 ----
{
my($class, $init, $base) = @_;
! my $self;
if (ref $init) {
! $self = $init->clone;
! $self->base($base) if $base;
} else {
$init =~ s/^\s+//; # remove leading space
$init =~ s/\s.*//; # remove anything after first word
***************
*** 389,415 ****
if $StrictSchemes;
$scheme = 'http';
}
! my $impclass = URI::URL->implementor($scheme);
unless ($impclass) {
croak "URI::URL scheme '$scheme' is not supported"
if $StrictSchemes;
! $impclass = URI::URL->implementor; # use generic
}
# hand-off to scheme specific implementation sub-class
! $this = $impclass->new($init, $base);
}
! return $this;
}
! =head2 clone
!
! $url2 = $url1->clone;
!
! Copy constructor.
!
! =cut
sub clone
{
--- 388,408 ----
if $StrictSchemes;
$scheme = 'http';
}
! my $impclass = URI::URL::implementor($scheme);
unless ($impclass) {
croak "URI::URL scheme '$scheme' is not supported"
if $StrictSchemes;
! $impclass = URI::URL::implementor(); # use generic
}
# hand-off to scheme specific implementation sub-class
! $self = $impclass->new($init, $base);
}
! return $self;
}
! # Copy constructor
sub clone
{
***************
*** 420,426 ****
=head2 newlocal
! $url = newlocal URI::URL $path;
Return a URL object that denotes a path on the local filesystem
(current directory by default). Paths not starting with '/' are
--- 413,419 ----
=head2 newlocal
! $url = newlocal URI::URL $path;
Return a URL object that denotes a path on the local filesystem
(current directory by default). Paths not starting with '/' are
***************
*** 447,463 ****
$url->print_on(*FILEHANDLE);
! Print the contents of the URL object to the specified file handle
! (default STDOUT). Useful for debugging.
=cut
sub print_on
{
my $self = shift;
my $fh = shift || 'STDOUT';
my($k, $v);
! print $fh "Dump of $self...\n";
foreach $k (sort keys %$self){
$v = $self->{$k};
$v = 'UNDEF' unless defined $v;
--- 440,457 ----
$url->print_on(*FILEHANDLE);
! Prints a verbose presentation of the contents of the URL object to the
! specified file handle (default STDOUT). Mainly useful for debugging.
=cut
sub print_on
{
+ no strict qw(refs); # because we use strings as filehandles
my $self = shift;
my $fh = shift || 'STDOUT';
my($k, $v);
! print $fh "Dump of URL $self...\n";
foreach $k (sort keys %$self){
$v = $self->{$k};
$v = 'UNDEF' unless defined $v;
***************
*** 465,472 ****
}
}
! =head2 implementor
Get and/or set implementor class for a scheme.
Returns '' if specified scheme is not supported.
--- 459,477 ----
}
}
+ sub strict
+ {
+ return $StrictSchemes unless @_;
+ my $old = $StrictSchemes;
+ $StrictSchemes = $_[0];
+ $old;
+ }
+
+ =head2 URI::URL::implementor
! URI::URL::implementor;
! URI::URL::implementor($scheme);
! URI::URL::implementor($scheme, $class);
Get and/or set implementor class for a scheme.
Returns '' if specified scheme is not supported.
***************
*** 475,481 ****
=cut
sub implementor {
! my($class, $scheme, $impclass) = @_;
my $ic;
$scheme = (defined $scheme) ? lc($scheme) : '_generic';
--- 480,486 ----
=cut
sub implementor {
! my($scheme, $impclass) = @_;
my $ic;
$scheme = (defined $scheme) ? lc($scheme) : '_generic';
***************
*** 507,513 ****
# have we already initialised this class?
return 1 if exists $Implementors{$class};
! no strict 'refs';
# Setup overloading - experimental
%{"${class}::OVERLOAD"} = %URI::URL::_generic::OVERLOAD
unless defined %{"${class}::OVERLOAD"};
--- 512,518 ----
# have we already initialised this class?
return 1 if exists $Implementors{$class};
! no strict qw(refs);
# Setup overloading - experimental
%{"${class}::OVERLOAD"} = %URI::URL::_generic::OVERLOAD
unless defined %{"${class}::OVERLOAD"};
***************
*** 527,544 ****
# Use $url->scheme(undef) to set the value to undefined.
# Generic-RL components:
! sub scheme { shift->elem('scheme', @_); }
! sub netloc { shift->elem('netloc', @_); }
! sub path { shift->elem('path', @_); }
! sub params { shift->elem('params', @_); }
! sub query { shift->elem('query', @_); }
! sub frag { shift->elem('frag', @_); }
# Fields derived from generic netloc:
! sub user { shift->elem('user', @_); }
! sub password { shift->elem('password',@_); }
! sub host { shift->elem('host', @_); }
! sub port { shift->elem('port', @_); }
# optimisation to speed up elem() below:
my %netloc_fields = qw(user 1 password 1 host 1 port 1);
--- 532,549 ----
# Use $url->scheme(undef) to set the value to undefined.
# Generic-RL components:
! sub scheme { shift->elem('scheme', @_); }
! sub netloc { shift->elem('netloc', @_); }
! sub path { shift->elem('path', @_); }
! sub params { shift->elem('params', @_); }
! sub query { shift->elem('query', @_); }
! sub frag { shift->elem('frag', @_); }
# Fields derived from generic netloc:
! sub user { shift->elem('user', @_); }
! sub password { shift->elem('password',@_); }
! sub host { shift->elem('host', @_); }
! sub port { shift->elem('port', @_); }
# optimisation to speed up elem() below:
my %netloc_fields = qw(user 1 password 1 host 1 port 1);
***************
*** 601,612 ****
{
my($self, $text, $patn) = @_;
if ($patn){
! $text =~ s/([$patn])/$URI::URL::escapes{$1}/eg;
return $text;
}
# let perl pre-compile this default for max speed
! $text =~ s/([$DefaultUnsafe])/
! $URI::URL::escapes{$1}/oeg;
$text;
}
--- 606,616 ----
{
my($self, $text, $patn) = @_;
if ($patn){
! $text =~ s/([$patn])/$escapes{$1}/eg;
return $text;
}
# let perl pre-compile this default for max speed
! $text =~ s/([$DefaultUnsafe])/$escapes{$1}/oeg;
$text;
}
***************
*** 618,623 ****
--- 622,628 ----
*_esc_path = \&escape;
*_esc_params = \&escape;
*_esc_frag = \&escape;
+
sub _esc_query {
my($self, $text, @unsafe) = @_;
$text =~ s/ /+/g; # RFC1630
***************
*** 702,722 ****
$self->{'_orig_url'} = $u if $URI::URL::Debug;
# draft-ietf-uri-relative-url-06.txt Section 2.4
# 2.4.1
! $self->{'frag'} = $self->unescape($1) if $u =~ s/#(.*)$//;
# 2.4.2
! $self->{'scheme'} = lc($1) if $u =~ s/^\s*([\w\+\.\-]+)://;
# 2.4.3
! $self->{'netloc'} = $self->unescape($1) if $u =~ s!^//([^/]*)!!;
# 2.4.4
if ($u =~ s/\?(.*)//){ # '+' -> ' ' for queries (RFC1630)
my $query = $1;
$query =~ s/\+/ /g;
! $self->{'query'} = $self->unescape($query)
}
# 2.4.5
! $self->{'params'} = $self->unescape($1) if $u =~ s/;(.*)//;
# 2.4.6
! $self->{'path'} = $self->unescape($u);
# read netloc components: "<user>:<password>@<host>:<port>"
$self->_read_netloc;
1;
--- 707,727 ----
$self->{'_orig_url'} = $u if $URI::URL::Debug;
# draft-ietf-uri-relative-url-06.txt Section 2.4
# 2.4.1
! $self->{frag} = $self->unescape($1) if $u =~ s/#(.*)$//;
# 2.4.2
! $self->{scheme} = lc($1) if $u =~ s/^\s*([\w\+\.\-]+)://;
# 2.4.3
! $self->{netloc} = $self->unescape($1) if $u =~ s!^//([^/]*)!!;
# 2.4.4
if ($u =~ s/\?(.*)//){ # '+' -> ' ' for queries (RFC1630)
my $query = $1;
$query =~ s/\+/ /g;
! $self->{query} = $self->unescape($query)
}
# 2.4.5
! $self->{params} = $self->unescape($1) if $u =~ s/;(.*)//;
# 2.4.6
! $self->{path} = $self->unescape($u);
# read netloc components: "<user>:<password>@<host>:<port>"
$self->_read_netloc;
1;
***************
*** 724,751 ****
sub _read_netloc { # netloc -> user, password, host, post
my($self) = @_;
! my $nl = $self->{'netloc'} || ''; # already unescaped
$self->{'_str'} = ''; # void cache
if ($nl =~ s/^([^:@]*):?(.*?)@//){
! $self->{'user'} = $1;
! $self->{'password'} = $2 if $2 ne '';
}
if ($nl =~ s/^([^:]*):?(\d*)//){
! $self->{'host'} = $1;
! $self->{'port'} = $2 if $2 ne '';
}
}
sub _write_netloc { # user, password, host, post -> netloc
my($self) = @_;
my $tmp;
! my $nl = $self->{'user'} || '';
! $nl .= ":$self->{'password'}" if $nl and $self->{'password'};
$nl .= "\@" if $nl;
! $nl .= ($tmp = $self->{'host'});
! $nl .= ":$tmp" if ($tmp && ($tmp=$self->{'port'})
&& $tmp != $self->default_port);
! $self->{'netloc'} = $nl;
}
--- 729,756 ----
sub _read_netloc { # netloc -> user, password, host, post
my($self) = @_;
! my $nl = $self->{netloc} || ''; # already unescaped
$self->{'_str'} = ''; # void cache
if ($nl =~ s/^([^:@]*):?(.*?)@//){
! $self->{user} = $1;
! $self->{password} = $2 if $2 ne '';
}
if ($nl =~ s/^([^:]*):?(\d*)//){
! $self->{host} = $1;
! $self->{port} = $2 if $2 ne '';
}
}
sub _write_netloc { # user, password, host, post -> netloc
my($self) = @_;
my $tmp;
! my $nl = $self->{user} || '';
! $nl .= ":$self->{password}" if $nl and $self->{password};
$nl .= "\@" if $nl;
! $nl .= ($tmp = $self->{host});
! $nl .= ":$tmp" if ($tmp && ($tmp=$self->{port})
&& $tmp != $self->default_port);
! $self->{netloc} = $nl;
}
***************
*** 809,848 ****
$base = new URI::URL $base unless ref $base; # make obj if needed
! my @u = @{$embed}{qw(scheme host port path params query frag) };
# just use base if we are empty (2a)
! # XXX can we ever be empty? I think scheme is always defined.
! return $base->clone if (scalar(grep($_, @u)) == 0);
!
! my($scheme, $host, $port, $path, $params, $query, $frag) = @u;
# if we have a scheme we must already be absolute (2b)
return $embed if $scheme;
! $embed->{'scheme'} = $base->{'scheme'}; # (2c)
! return $embed if $embed->{'netloc'}; # (3)
! $embed->{'netloc'} = $base->{'netloc'}; # (3)
$embed->_read_netloc();
! return $embed if $embed->{'path'} =~ m:^/:; # (4)
! unless ($embed->{'path'}){ # (5)
! $embed->{'path'} = $base->{'path'}; # (5)
return $embed if $embed->params; # (5a)
! $embed->{'params'} = $base->{'params'}; # (5a)
return $embed if $embed->query; # (5b)
! $embed->{'query'} = $base->{'query'}; # (5b)
return $embed;
}
# (Step 6) # draft 6 suggests stack based approach
! my $basepath = $base->{'path'};
! my $relpath = $embed->{'path'};
$basepath =~ s!^/!!;
$basepath =~ s!/$!/.!; # prevent empty segment
--- 814,856 ----
$base = new URI::URL $base unless ref $base; # make obj if needed
! my($scheme, $host, $port, $path, $params, $query, $frag) =
! @{$embed}{qw(scheme host port path params query frag)};
# just use base if we are empty (2a)
! {
! my @u = grep(defined($_) && $_ ne '',
! $scheme,$host,$port,$path,$params,$query,$frag);
! return $base->clone unless @u;
! }
# if we have a scheme we must already be absolute (2b)
return $embed if $scheme;
! $embed->{'_str'} = ''; # void cached string
! $embed->{scheme} = $base->{scheme}; # (2c)
! return $embed if $embed->{netloc}; # (3)
! $embed->{netloc} = $base->{netloc}; # (3)
$embed->_read_netloc();
! return $embed if $path =~ m:^/:; # (4)
! if ($path eq '') { # (5)
! $embed->{path} = $base->{path}; # (5)
return $embed if $embed->params; # (5a)
! $embed->{params} = $base->{params}; # (5a)
return $embed if $embed->query; # (5b)
! $embed->{query} = $base->{query}; # (5b)
return $embed;
}
# (Step 6) # draft 6 suggests stack based approach
! my $basepath = $base->{path};
! my $relpath = $embed->{path};
$basepath =~ s!^/!!;
$basepath =~ s!/$!/.!; # prevent empty segment
***************
*** 856,861 ****
--- 864,870 ----
my @newpath = ();
my $isdir = 0;
my $segment;
+
foreach $segment (@path) { # left to right
# warn '> ', join('/', @newpath), ": $segment\n";
if ($segment eq '.') { # ignore "same" directory
***************
*** 881,889 ****
}
}
! $embed->{'path'} = join('/', @newpath) . ($isdir ? '/' : '');
!
! return $embed;
}
--- 890,897 ----
}
}
! $embed->{path} = join('/', @newpath) . ($isdir ? '/' : '');
! $embed;
}
***************
*** 896,918 ****
}
- # _expect()
- #
- # Handy low-level object method tester. See test code at end.
- #
- sub _expect {
- my($self, $method, $expect, @args) = @_;
- my $result = $self->$method(@args);
- $expect = 'UNDEF' unless defined $expect;
- $result = 'UNDEF' unless defined $result;
- return 1 if $expect eq $result;
- warn "'$self'->$method(@args) = '$result' " .
- "(expected '$expect')\n";
- $self->print_on('STDERR');
- confess "Test Failed";
- }
-
-
####################################################################
#
# Internal pre-defined basic scheme support
--- 904,909 ----
***************
*** 937,945 ****
# allow the generic parser to do the bulk of the work
$self->URI::URL::_generic::_parse($init);
# then just deal with the effect of rare stray '?'s
! if (defined $self->{'query'}){
! $self->{'path'} .= "?$self->{'query'}";
! delete $self->{'query'};
}
1;
}
--- 928,936 ----
# allow the generic parser to do the bulk of the work
$self->URI::URL::_generic::_parse($init);
# then just deal with the effect of rare stray '?'s
! if (defined $self->{query}){
! $self->{path} .= '?' . $self->{query};
! delete $self->{query};
}
1;
}
***************
*** 947,954 ****
sub _esc_path
{
my($self, $text) = @_;
! $text =~ s/([^-a-zA-Z\d\$_.+!*'(),%?:@&=\/])/
! $URI::URL::escapes{$1}/oeg;
$text;
}
--- 938,944 ----
sub _esc_path
{
my($self, $text) = @_;
! $text =~ s/([^-a-zA-Z\d\$_.+!*'(),%?:@&=\/])/$escapes{$1}/oeg;
$text;
}
***************
*** 975,994 ****
package URI::URL::gopher; @ISA = qw(URI::URL::_generic);
sub default_port { 70 };
- sub gtype { shift->elem('gtype', @_); }
sub _parse {
! my($self, $url) = @_;
! $self->{'scheme'} = lc($1) if $url =~ s/^\s*([\w\+\.\-]+)://;
! $self->{'netloc'} = $self->unescape($1)
! if $url =~ s!^//([^/]*)!!;
! $self->{'gtype'} = $self->unescape($1) if $url =~ s!^/(.)!!;
! my @parts = split(/%09/, $url, 3);
! $self->{'selector'} = $self->unescape(shift @parts);
! $self->{'search'} = $self->unescape(shift @parts);
! $self->{'string'} = $self->unescape(shift @parts);
}
package URI::URL::finger; @ISA = qw(URI::URL::_generic);
--- 965,984 ----
package URI::URL::gopher; @ISA = qw(URI::URL::_generic);
sub default_port { 70 };
sub _parse {
! my($self, $url) = @_;
! $self->{scheme} = lc($1) if $url =~ s/^\s*([\w\+\.\-]+)://;
! $self->{netloc} = $self->unescape($1) if $url =~ s!^//([^/]*)!!;
! $self->{gtype} = $self->unescape($1) if $url =~ s!^/(.)!!;
! my @parts = split(/%09/, $url, 3);
! $self->{selector} = $self->unescape(shift @parts);
! $self->{search} = $self->unescape(shift @parts);
! $self->{string} = $self->unescape(shift @parts);
}
+ sub gtype { shift->elem('gtype', @_); }
+
package URI::URL::finger; @ISA = qw(URI::URL::_generic);
***************
*** 1010,1018 ****
sub _parse {
my($self, $init) = @_;
$self->URI::URL::_generic::_parse($init);
! my @parts = split(/\//, $self->{'path'});
! $self->{'group'} = $self->unescape($parts[1]);
! $self->{'digits'}= $self->unescape($parts[2]);
}
--- 1000,1008 ----
sub _parse {
my($self, $init) = @_;
$self->URI::URL::_generic::_parse($init);
! my @parts = split(/\//, $self->{path});
! $self->{group} = $self->unescape($parts[1]);
! $self->{digits}= $self->unescape($parts[2]);
}
***************
*** 1021,1029 ****
sub _parse {
my($self, $init) = @_;
! $self->{'scheme'} = lc($1) if ($init =~ s/^\s*([\w\+\.\-]+)://);
my $tmp = $self->unescape($init);
! $self->{'grouppart'} = $tmp;
$self->{ ($tmp =~ m/\@/) ? 'article' : 'group' } = $tmp;
}
--- 1011,1019 ----
sub _parse {
my($self, $init) = @_;
! $self->{scheme} = lc($1) if ($init =~ s/^\s*([\w\+\.\-]+)://);
my $tmp = $self->unescape($init);
! $self->{grouppart} = $tmp;
$self->{ ($tmp =~ m/\@/) ? 'article' : 'group' } = $tmp;
}
***************
*** 1036,1045 ****
sub _parse {
my($self, $init) = @_;
$self->URI::URL::_generic::_parse($init);
! my @parts = split(/\//, $self->{'path'});
! $self->{'database'} = $self->unescape($parts[1]);
! $self->{'wtype'} = $self->unescape($parts[2]);
! $self->{'wpath'} = $self->unescape($parts[3]);
}
--- 1026,1035 ----
sub _parse {
my($self, $init) = @_;
$self->URI::URL::_generic::_parse($init);
! my @parts = split(/\//, $self->{'path'});
! $self->{database} = $self->unescape($parts[1]);
! $self->{wtype} = $self->unescape($parts[2]);
! $self->{wpath} = $self->unescape($parts[3]);
}
***************
*** 1060,1067 ****
sub _parse {
my($self, $init) = @_;
! $self->{'scheme'} = lc($1) if ($init =~ s/^\s*([\w\+\.\-]+)://);
! $self->{'encoded822addr'} = $self->unescape($init);
}
--- 1050,1057 ----
sub _parse {
my($self, $init) = @_;
! $self->{scheme} = lc($1) if ($init =~ s/^\s*([\w\+\.\-]+)://);
! $self->{encoded822addr} = $self->unescape($init);
}
***************
*** 1076,1087 ****
# Aliases for old method names. To be deleted in a future version.
{ package URI::URL::_generic;
! no strict 'refs';
*{"dump"} = \&print_on;
*{"str"} = \&as_string;
}
#####################################################################
#
# If we're not use'd or require'd execute self-test.
--- 1066,1082 ----
# Aliases for old method names. To be deleted in a future version.
{ package URI::URL::_generic;
! no strict qw(refs);
*{"dump"} = \&print_on;
*{"str"} = \&as_string;
}
+
+ #####################################################################
+ #
+ # S E L F T E S T S E C T I O N
+ #
#####################################################################
#
# If we're not use'd or require'd execute self-test.
***************
*** 1101,1106 ****
--- 1096,1120 ----
__END__
+ package URI::URL::_generic;
+
+ # _expect()
+ #
+ # Handy low-level object method tester. See test code at end.
+ #
+ sub _expect {
+ my($self, $method, $expect, @args) = @_;
+ my $result = $self->$method(@args);
+ $expect = 'UNDEF' unless defined $expect;
+ $result = 'UNDEF' unless defined $result;
+ return 1 if $expect eq $result;
+ warn "'$self'->$method(@args) = '$result' " .
+ "(expected '$expect')\n";
+ $self->print_on('STDERR');
+ confess "Test Failed";
+ }
+
+
package main;
use Carp;
***************
*** 1110,1116 ****
# Do basic tests first.
# Dies if an error has been detected, prints "ok" otherwise.
! print "Self tests for version $URI::URL::Version...\n";
&scheme_parse_test;
--- 1124,1131 ----
# Do basic tests first.
# Dies if an error has been detected, prints "ok" otherwise.
! print "Self tests for URI::URL version $URI::URL::Version...\n";
!
&scheme_parse_test;
***************
*** 1122,1129 ****
&absolute_test;
! print "$URI::URL::rcsid ok\n";
exit 0;
--- 1137,1147 ----
&absolute_test;
! URI::URL::strict(0);
! $u = new URI::URL "myscheme:something";
! # print $u->as_string, " works after URI::URL::strict(0)\n";
+ print "URI::URL version $URI::URL::Version ok\n";
exit 0;
***************
*** 1235,1240 ****
--- 1253,1259 ----
$url->query(undef);
$url->_expect('query', undef);
+ $url->print_on;
}
#
***************
*** 1467,1473 ****
# add some extra ones for good measure
! push(@absolute_tests, ['x/y//../z', => 'http://a/b/c/x/y/z']);
print " Relative + Base => Expected Absolute URL\n";
print "================================================\n";
--- 1486,1496 ----
# add some extra ones for good measure
! push(@absolute_tests, ['x/y//../z' => 'http://a/b/c/x/y/z'],
! ['1' => 'http://a/b/c/1' ],
! ['0' => 'http://a/b/c/0' ],
! ['/0' => 'http://a/0' ],
! );
print " Relative + Base => Expected Absolute URL\n";
print "================================================\n";
***************
*** 1483,1490 ****
}
print "absolute test ok\n";
}
-
-
- 1;
- exit 0;
-
--- 1506,1508 ----