----------------------------------------------------------------------------- -- -- libwww-ada95 : A World Wide Web client library for Ada95 -- -- U R I -- -- B o d y -- -- Copyright (C) 1997-1998 Regents of the University of California -- -- libwww-ada95 is free software; you can redistribute it and/or modify it -- under the terms of the GNU General Public License as published by the Free -- Software Foundation, with or without the single exception listed below; -- either version 2, or (at your option) any later version. libwww-ada95 is -- distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -- without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -- PARTICULAR PURPOSE. See the GNU General Public License for more details. -- You should have received a copy of the GNU General Public License -- distributed with libwww-ada95; see the file COPYING. If not, write to the -- Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -- 02111-1307, USA. -- -- As a special exception, if other files instantiate generics from this -- library, or you link this library with other files to produce an -- executable, this library does not by itself cause the resulting -- executable to be covered by the GNU General Public License. This -- exception does not however invalidate any other reasons why the -- executable file might be covered by the GNU General Public License. -- -- Created in 1997 by Kari Nies ----------------------------------------------------------------------------- with Ada.Strings.Maps; with Text_IO; with Onions; package body URI is package Maps renames Ada.Strings.Maps; Lowalpha : Maps.Character_Set := Maps.To_Set ("abcdefghijklmnopqrstuvwxyz"); Upalpha : Maps.Character_Set := Maps.To_Set ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); Digit : Maps.Character_Set := Maps.To_Set ("1234567890"); Alpha : Maps.Character_Set := Maps."or"(Lowalpha, Upalpha); Alphanum : Maps.Character_Set := Maps."or"(Alpha, Digit); Schemec : Maps.Character_Set := Maps."or" (Alphanum, Maps.To_Set ("+-.")); Whitespc : Maps.Character_Set := Maps.To_Set (STRING'(' ', ASCII.ht)); Endsite : Maps.Character_Set := Maps.To_Set ("/?#"); Endpath : Maps.Character_Set := Maps.To_Set ("?#"); -- subprogram specs required by style-checker function Check_Chars (UBS : UB_String; Set : Maps.Character_Set) return BOOLEAN; function Check_Chars_Embedded (UBS : UB_String; Set : Maps.Character_Set; Embedded : CHARACTER) return BOOLEAN; procedure Parse_Hostname (Hostname : UB_String); procedure Parse_Hostnumber (Hostnumber : UB_String); procedure Parse_Site (UBS : UB_String; PURI : in out Parsed_URI); ---------------------------------------------------------------------- -- Check if all character in UBS are in the given character set ---------------------------------------------------------------------- function Check_Chars (UBS : UB_String; Set : Maps.Character_Set) return BOOLEAN is begin for Idx in 1 .. UB_Strings.Length (UBS) loop if not Maps.Is_In (UB_Strings.Element (UBS, Idx), Set) then return FALSE; end if; end loop; return TRUE; end Check_Chars; ---------------------------------------------------------------------- -- Check if all character in UBS are in the given character set -- also allows the given Embedded character (not in the set) to be -- embedded in UBS. Embedded characters may not appear at the start -- or end of UBS and they may not be consecutive. ---------------------------------------------------------------------- function Check_Chars_Embedded (UBS : UB_String; Set : Maps.Character_Set; Embedded : CHARACTER) return BOOLEAN is Ch : CHARACTER; Last_Embedded : BOOLEAN := FALSE; Len : NATURAL := UB_Strings.Length (UBS); begin for Idx in 1 .. Len loop Ch := UB_Strings.Element (UBS, Idx); if not Maps.Is_In (Ch, Set) then if Ch = Embedded then if (Idx = 1) or (Idx = Len) or Last_Embedded then return FALSE; else Last_Embedded := TRUE; end if; else return FALSE; end if; else Last_Embedded := FALSE; end if; end loop; return TRUE; end Check_Chars_Embedded; ----------------------------------------------------------------------- -- Parse the given hostname. Should be -- (domainlabel ".")* toplabel -- where labels are alphanumeric with possible embedded hyphens. ----------------------------------------------------------------------- procedure Parse_Hostname (Hostname : UB_String) is Parse_Str : UB_String := Hostname; Pos : NATURAL; Len : NATURAL := UB_Strings.Length (Hostname); Label : UB_String; begin loop Pos := UB_Strings.Index (Parse_Str, "."); exit when Pos = 0; if Pos > 1 and then Pos < UB_Strings.Length (Parse_Str) then Label := UB_Strings.To_Unbounded_String ( UB_Strings.Slice (Parse_Str, 1, Pos - 1)); Parse_Str := UB_Strings.Delete (Parse_Str, 1, Pos); if not Check_Chars_Embedded (Label, Alphanum, '-') then raise Invalid_URI; end if; else raise Invalid_URI; end if; end loop; -- Parse_Str now only contains the toplabel -- note, we already know the toplabel begins with an alpha char if not Check_Chars_Embedded (Parse_Str, Alphanum, '-') then raise Invalid_URI; end if; end Parse_Hostname; ----------------------------------------------------------------------- -- Parse the given hostnumber. Should be -- (Digit)+"."(Digit)+"."(Digit)+"."(Digit)+ ----------------------------------------------------------------------- procedure Parse_Hostnumber (Hostnumber : UB_String) is Parse_Str : UB_String := Hostnumber; Digit_s : UB_String; Pos : NATURAL; begin -- should be (Digit)+"."(Digit)+"."(Digit)+"."(Digit) for i in 1 .. 3 loop Pos := UB_Strings.Index (Hostnumber, "."); if Pos > 1 and then Pos < UB_Strings.Length (Parse_Str) then Digit_s := UB_Strings.To_Unbounded_String ( UB_Strings.Slice (Parse_Str, 1, Pos - 1)); Parse_Str := UB_Strings.Delete (Parse_Str, 1, Pos); if not Check_Chars (Digit_s, Digit) then raise Invalid_URI; end if; else raise Invalid_URI; end if; end loop; -- Parse_Str now only contains last set of digits if not Check_Chars (Parse_Str, Digit) then raise Invalid_URI; end if; end Parse_Hostnumber; ----------------------------------------------------------------------- -- Parse the given site string, extracting the host and port -- and updating the given Parsed_URI. ----------------------------------------------------------------------- procedure Parse_Site (UBS : UB_String; PURI : in out Parsed_URI) is Site : UB_String := UBS; Pos : NATURAL; Port : UB_String; Len : NATURAL := UB_Strings.Length (Site); begin -- skip user[:password]@ if it exists Pos := UB_Strings.Index (Site, "@"); if Pos > 0 then Site := UB_Strings.Delete (Site, 1, Pos); Len := UB_Strings.Length (Site); end if; -- check for port, follows host "host [:port]" Pos := UB_Strings.Index (Site, ":"); if Pos > 0 then if Pos < Len then -- careful, port can be empty after ':' Port := UB_Strings.To_Unbounded_String ( UB_Strings.Slice (Site, Pos + 1, Len)); if not Check_Chars (Port, Digit) then raise Invalid_URI; end if; PURI.Port := Integer'Value (UB_Strings.To_String (Port)); end if; Site := UB_Strings.Delete (Site, Pos, Len); Len := UB_Strings.Length (Site); end if; -- parse host, "hostname | hostnumber" Pos := UB_Strings.Index (Site, ".", Ada.Strings.Backward); if Pos > 0 then if Pos = Len then raise Invalid_URI; end if; if Maps.Is_In (UB_Strings.Element (Site, Pos + 1), Alpha) then Parse_Hostname (Site); else Parse_Hostnumber (Site); end if; else -- no "." found -- site str must only be a toplabel of a hostname -- make sure 1st char is Alpha if not Maps.Is_In (UB_Strings.Element (Site, Pos + 1), Alpha) then raise Invalid_URI; end if; -- make sure the rest is Alphanum with optional embedded "-"s if not Check_Chars_Embedded (Site, Alphanum, '-') then raise Invalid_URI; end if; end if; PURI.Host := Site; end Parse_Site; ----------------------------------------------------------------------- -- Given a URI_String, extracts the host, port, path and query -- Returns a Parsed_URI with this data ----------------------------------------------------------------------- function Parse_URI (URI_String : UB_String) return Parsed_URI is PURI : Parsed_URI; Pos : NATURAL; Parse_Str : UB_String; Scheme : UB_String; Site : UB_String; Path : UB_String; begin -- trim excess whitespace from front and back of URI Parse_Str := UB_Strings.Trim (URI_String, Whitespc, Whitespc); -- parse scheme Pos := UB_Strings.Index (Parse_Str, ":"); if Pos > 0 then -- maker sure scheme is at least one character if Pos = 1 then raise Invalid_URI; end if; Scheme := UB_Strings.To_Unbounded_String ( UB_Strings.Slice (Parse_Str, 1, Pos - 1)); if not Check_Chars (Scheme, Schemec) then raise Invalid_URI; end if; Parse_Str := UB_Strings.Delete (Parse_Str, 1, Pos); PURI.Scheme := Scheme; end if; -- parse site if (UB_Strings.Length (Parse_Str) > 2) and then (UB_Strings.Slice (Parse_Str, 1, 2) = "//") then Parse_Str := UB_Strings.Delete (Parse_Str, 1, 2); -- extract site until we come to /,?,# or eoln Pos := 1; while (Pos <= UB_Strings.Length (Parse_Str)) and then (not Maps.Is_In (UB_Strings.Element (Parse_Str, Pos), Endsite)) loop Pos := Pos + 1; end loop; if Pos > UB_Strings.Length (Parse_Str) then -- at end of URI Site := Parse_Str; Parse_Site (Site, PURI); return PURI; elsif Pos > 1 then -- site is not empty Site := UB_Strings.To_Unbounded_String ( UB_Strings.Slice (Parse_Str, 1, Pos - 1)); Parse_Str := UB_Strings.Delete (Parse_Str, 1, Pos - 1); Parse_Site (Site, PURI); end if; end if; -- parse path -- extract path until we come to ?,# or eoln Pos := 1; while (Pos <= UB_Strings.Length (Parse_Str)) and then (not Maps.Is_In (UB_Strings.Element (Parse_Str, Pos), Endpath)) loop Pos := Pos + 1; end loop; if Pos > UB_Strings.Length (Parse_Str) then -- at end of URI PURI.Path := Parse_Str; return PURI; elsif Pos > 1 then -- path is not empty Path := UB_Strings.To_Unbounded_String ( UB_Strings.Slice (Parse_Str, 1, Pos - 1)); Parse_Str := UB_Strings.Delete (Parse_Str, 1, Pos - 1); PURI.Path := Path; end if; -- check for query if UB_Strings.Element (Parse_Str, 1) = '?' then -- delete "?" Parse_Str := UB_Strings.Delete (Parse_Str, 1, 1); -- extract query, it may only be followed by "# fragment" Pos := UB_Strings.Index (Parse_Str, "#"); if Pos > 0 then -- remove #fragment" Parse_Str := UB_Strings.Delete (Parse_Str, Pos, UB_Strings.Length (Parse_Str)); end if; PURI.Query := Parse_Str; end if; return PURI; end Parse_URI; ----------------------------------------------------------------------- -- Given a URI_Object, Parses the associated URI string and initializes -- the associated parsed URI with the extracted host, port, path and -- query data. ----------------------------------------------------------------------- procedure Parse_URI (URI : in out URI_Object) is begin URI.URI_Parsed := Parse_URI (URI.URI_String); end Parse_URI; ----------------------------------------------------------------------- -- Prints the contents of the given URI_Object. ----------------------------------------------------------------------- procedure Print_URI (URI : URI_Object) is begin Text_IO.Put_Line ("URI_String: " & UB_Strings.To_String (URI.URI_String)); Text_IO.Put ("URI_Parsed: " & UB_Strings.To_String (URI.URI_Parsed.Scheme) & "://" & UB_Strings.To_String (URI.URI_Parsed.Host)); if URI.URI_Parsed.Port /= 0 then Text_IO.Put (":" & Onions.Image (URI.URI_Parsed.Port)); end if; Text_IO.Put (UB_Strings.To_String (URI.URI_Parsed.Path)); if UB_Strings.Length (URI.URI_Parsed.Query) > 0 then Text_IO.Put ("?" & UB_Strings.To_String (URI.URI_Parsed.Query)); end if; Text_IO.NEW_LINE (1); Text_IO.Put_Line ("Scheme|" & UB_Strings.To_String (URI.URI_Parsed.Scheme) & "|"); Text_IO.Put_Line ("Host |" & UB_Strings.To_String (URI.URI_Parsed.Host) & "|"); Text_IO.Put_Line ("Port |" & Onions.Image (URI.URI_Parsed.Port) & "|"); Text_IO.Put_Line ("Path |" & UB_Strings.To_String (URI.URI_Parsed.Path) & "|"); Text_IO.Put_Line ("Query |" & UB_Strings.To_String (URI.URI_Parsed.Query) & "|"); Text_IO.New_Line (2); end Print_URI; end URI;