#!/home/rebelsky/perl/bin/perl
# ============================================================================
# Ravel - Ravel server for filter and processing web pages. The Ravel server
# loads a set of plugins and then sits waiting for incoming HTTP
# client request. When it recieves these, depending on the user and
# the location, it calls some of the plugins to process the page in
# some manner, possibly altering it in the process. When done, it
# returns it to the browser.
#
# Logging in and account information: The account server is
# implemented as an CGI program that operates through a standard
# web server. This allows the Ravel server to use the standard Perl
# web libraries to send the login information and retrieve the user
# information. It also makes it a relatively simple affair to code
# the account server and system for changing user account settings;
# they can all be done through the well know CGI mechanisms. The
# account server takes two parameters: UserID and Password. The
# account server should return either an empty page if the login
# failed or the user data if successfull. The user data is stored
# in a heirarchical XMLish format. For example, the user ID is
# stored as ... This makes it a simple matter
# to retrieve data from. The current tags defined for this format
# are:
#
# - Contains the ID of the user
# - The users password for logging in to Ravel
# - User's e-mail address
# - Comma delimited list of plugins to be
# applied, listed in the order that they are to be
# applied
# - Comma delimited list of groups that the
# user belongs to
#
# Plugin system: The plugins are Perl modules that are stored in a
# specific directory. (This directory can be set either through a
# command line option or through the configuration file.) They must
# have the .pm extension. When the server starts up it scans this
# directory for these files and loads all of them up into Perl,
# placing them in their own packages. (The package is named for the
# filename of the perl module. Thus a plugin called Annotate.pm would
# be placed into a package called Annotate.) Each of these modules
# must define the procedure Execute. Execute takes four parameters
# and does not need to return anything. The first parameter that it
# takes is the user ID of the user who requested the page. The second
# parameter is the full string giving the information on the user who
# requested it, in the account format listed above. The third
# parameter is an HTTP::Request object giving the request from the
# users client. Finally, the fourth parameter is an HTTP::Response
# object. This last object contains the page returned from the
# web server and modified by the other plugins to that point. The
# plugin is free to call any methods on that object to change the
# contents of the page or any other information. When all of the
# plugins have been executed, it will return the response to the
# client.
#
# Exclusion System: A set of rules may be given by a web page to
# dictate what plugins may be used with a web page and by whom. These
# plugins come from two different sources. The first source is the
# web page itself. A rule is embeded on a page using a meta tag of
# the format where the string in the
# content part list the rule. The second source is a file called
# ravel.txt located in the web server root directory. It consist of a
# series of lines, each listing a single rule. Rules listed later take
# precedence over rules listed before and all of the rules listed in
# the ravel.txt file take precedence over rules on a pages meta tag.
# The format of these rules is either:
#
# allow: user-group-plugin-path
# or
# disallow: user-group-plugin-path
#
# The path is the path name from the URL including the leading slash.
# The second part of the string, the one with the hyphens must be a
# Perl regular expression for matching the different attributes. The
# server will try to match this against each combination of groups
# and plugins for each rule. If the first part of the rule was
# disallow, then a plugin will be disabled if the server can find a
# match. If it says allow, then a plugin will be allowed if a rule
# matches for it. Matching is case insensitive. For example:
#
# disallow: .*?-.*?-.*?-.*?$
# allow: .*?-Glimmer-.*?-.*?$
#
# This pair of rules will disable all plugins for regular users, but
# allow anyone in the Glimmer group to view the page with the plugins
# enabled.
#
# Author: Andrew Kensler
#
# Current Version: 0.15
#
# History: 6/30/99 - Created
# - Wrote first version of GetConfig, ProcessClient,
# and MainLoop
# 7/1/99 - Modified GetConfig to use GetOpt package to parse
# the command line for options
# - Added status messages to GetConfig and MainLoop and
# error checking to MainLoop
# - Added InitDaemon mode to go become a daemon like
# background process, plus the configuration option
# to do this.
# - Added display banner.
# 7/2/99 - Added option to chain through another proxy.
# - Began user validation code, GetUser, SendLoginForm.
# 7/4/99 - Fixed an "connection reset by peer" problem with
# SendLoginForm by slurping up request.
# - Changed SendLoginForm to send a file instead of a
# hardcoded page. (Added configuration option for this.)
# - Continued work on log in process in general.
# 7/7/99 - Finished login code.
# 7/8/99 - Wrote code to load plugins and then apply them.
# 7/9/99 - Started PluginList and modified ApplyPlugins so that the
# list of plugins is filtered by rules.
# 7/13/99 - Finished PluginList code for applying exclusion rules.
# - Changed configuration code to read a configuration file.
# 7/16/99 - Using with functioning account server.
# - Fixed (hopefully) the problem with the child processes
# not terminating. Hopefully this will lead to better
# memory usage and resource usage as well.
# 7/22/99 - I was stupid. I forgot to explicitely make the child
# process exit. Doh!! No wonder it was leaving zombies.
# 7/26/99 - Switched over the log in system to use the HTTP
# authentication mechanism. Now the browser should display
# a dialog box, and problems with the browser caching the
# log in page should dissapear. As should problems with
# the first request being a CGI request or something weird.
# The browser just resends with an extra line in the header.
# It also avoids having to reload after logging in.
# 7/27/99 - Fixed the authentication system. It now uses proxy
# authentication instead of normal authentication.
# - Wrote in the virtual URL dispatching mechanism where
# virtual URLs on the server can be registered to a
# particular function to handle them.
# - Wrote the logout code. This is simply an instance of the
# virtual URL. Just go the /logout page.
# 7/28/99 - Changed virtual URL dispatcher to work with request that
# have CGI style parameters. Also made the virtual URL
# mechanism case insensitive.
# 8/2/99 - Fixed package problems in terms of interaction with the
# plugins. It had actually been loading the plugins into
# main package, and this would cause interference between
# plugins and screw things up with allowing and disallowing
# particular plugins. It looks like its working now.
# Hopefully this will solve problems with registering
# proxy URL's now.
# 8/16/99 - Changed the code for chaining through another proxy. Now
# instead of setting the proxy URL in an environment string
# and having the switch simply say to user that, the code
# takes the URL of the proxy as the switch parameter or
# string in the configuration file and uses that URL
# directly.
# 8/19/99 - Changed clients to be determined by the authorization
# string instead of their numeric IP address. This is a
# security precaution and also allows people to run multiple
# browsers on their computer with each logged in to
# different accounts
# - Now strips the Proxy-Authorization field before relaying
# request. This makes it compliant with the HTTP spec.
# 9/26/99 - user_agent was created on initialization and then
# passed between procedures. Now global, eliminating need
# to constantly pass it around.
# - Hoisted the code from ProcessClient that fetched a page
# and ran it throught the Ravel plugins. It now is in its
# own routine, RavelRequest so that it plugins may take
# advantage of it to request pages to process. (Incidently,
# this motivated the change of user_agent to global.)
#
# To Do List: - Work on better handling of addresses (/ vs. /index.html) While
# the servers usually resolve / into /index.html, Ravel needs to
# do this automatically for the benefit of the plugins, so that
# a distinct URL is a distinct URL. Unfortunately, this varies
# among servers.
# - Error checking; make it more robust and bullet proof.
# - What happens when you try to use SSL and an encrypted web
# connection?
# - Change the content length to reflect the new length after
# filtering. Not strictly nescessary to actually work but
# it is needed to bring up to snuff with the HTTP RFC. Either
# that, or it should remove that line from the header entirely.
# - Figure out a protocol for what the plugins should return.
# Right now, we don't check the return value at all. We might
# want to figure out a scheme for this.
# - Add a check to make sure that we don't try to run a plugin
# that doesn't exists on this server.
#
# ============================================================================
# Package Imports
use strict; # Strict checking mode
use Getopt::Long; # Procedure to parse command line
use Sys::Hostname; # Determine our hostname
use HTTP::Daemon; # Server daemon package
use HTTP::Status; # Exports convenient response code constants
use HTTP::Headers; # Headers class (property of messages)
use HTTP::Message; # Base class for request and response classes
use HTTP::Request; # Client request class
use HTTP::Response; # Server response class
use LWP::UserAgent; # Fetch pages for us
use URI::Escape; # Encode a URL string
# ============================================================================
# Global Variables
my $server_addr; # Local address to use for the server
my $server_port; # Port to listen on
my $daemon_mode; # Whether or not to become a background daemon
my $proxy_addr; # URL of another proxy to chain through
my $time_out; # Seconds to go before timing out accounts
my $account_server; # URL string to the account server
my $plugin_path; # Path to the directory for plugins
my $virtual_host; # The base part to virtual URLs
my $logout_page; # The page to send when the person logs out
my %connection_data; # Hash to record data on the active connections
my %user_info; # Hash to record information on current users
my %dispatch_url; # Hash to record virtual URLs
my $user_agent; # User agent to fetch pages (LWP::UserAgent)
# ============================================================================
# DisplayBanner - Display the Web Raveler banner.
# Input: None
# Output: None
sub DisplayBanner
{
print '___________ _____ _____ _____ _____ _____ ___________'."\n";
print '__________ \/ ___ \/ ___ \/ ___ \/ ___ \/ ___ \/ __________'."\n";
print ' \/ / R \/ / A \/ / V \/ / E \/ / L \/ / '."\n";
print '__________/ /\___/ /\___/ /\___/ /\___/ /\___/ /\__________'."\n";
print '___________/\_____/\_____/\_____/\_____/\_____/\___________'."\n";
print "Web Raveler Server, V0.15.\n";
print "By Andrew Kensler.\n";
print "\n";
}
# ============================================================================
# GetConfig - Set the values of certain configuration variables. Then it
# executes the configuration file. Finally, it calls a procedure
# in the GetOpt package to parse the command line to get the
# information.
# Input: None
# Output: None
sub GetConfig
{
# Print status information
print "Getting configuration information...\n";
# Set up defaults for the options
$server_addr=hostname; # Local address for server to be on
$server_port=54321; # Port to listen on
$daemon_mode=undef; # Don't go into daemon mode by default
$proxy_addr=undef; # Direct connection by default
$time_out=1200; # Default in seconds to time out account
# FIXME: DEFAULT ACCOUNT SERVER IS HARDCODED IN RIGHT NOW
$account_server="http://www.math.grin.edu/~kensler/Research/Account/AccountServer.cgi";
$plugin_path="./Plugins"; # Path to the plugins directory
$virtual_host="http://ravel/"; # The first part of the path for the virtual host name
$logout_page="Logout.html"; # The page to access to log off the system
# Load the configuration file. The configuration file should contain
# Perl code like that above for setting defaults. Any valid Perl code
# is fine, but you'll probably want to limit it to setting the global
# variables and comments. However you could always use this to do
# something like prompt the user for these settings. This mechanism
# for configuration files was suggested by Perl guru Tom Christiansen.
do '$ENV{HOME}/.Ravel';
# Parse the command line
Getopt::Long::GetOptions
"address=s"=>\$server_addr,
"port=i"=>\$server_port,
"daemon"=>\$daemon_mode,
"proxy=s"=>\$proxy_addr,
"timeout=i"=>\$time_out,
"accountserver=s"=>\$account_server,
"pluginpath=s"=>\$plugin_path,
"virtualhost=s"=>\$virtual_host,
"logoutpage=s"=>\$logout_page;
}
# ============================================================================
# InitDaemonMode - This turns the program into a daemon. It does this by
# forking off a child process and terminating the parent
# process. This puts the program into the back ground. Next
# for the child, it sets it to trap hang up messages
# Input: None
# Output: None
sub InitDaemonMode
{
# Print status information
print "Going into the background...\n";
# Fork off a child process to handle it. The parent should terminate,
# leaving the child process running.
if (fork()!=0)
{
exit 0;
}
# The child process should ignore hangup signals. This way, when the user
# logs out, it continues running in the background. It continues running
# until it is killed.
$SIG{HUP}='IGNORE';
}
# ============================================================================
# Reaper - This routine is responsible for helping to kill of child processes.
# It reinstalls itself to handle the next signal, just in case it is
# running on a SysV system and calls wait so that the child process
# can exit correctly.
# Input: None
# Output: None
sub Reaper {
$SIG{CHLD} = \&Reaper;
wait;
}
# ============================================================================
# LoadPlugins - Scan the plugin directory for Perl module files and load them
# into their own package namespace.
# Input: None
# Output: None
sub LoadPlugins
{
# Local Variables
my $plugin_file; # The fully qualified file name for a plugin
my $plugin; # The bare plugin name without a path or such
# Print status information.
print "Loading plugins...\n";
# Scan the directory for plugins.
while($plugin_file=<$plugin_path/*.pm>)
{
$plugin_file=~/([^\/]*).pm$/;
$plugin=$1;
# Print more status information.
print " -> $plugin\n";
# Load the plugin into its own namespace.
eval "package $plugin; require '$plugin_file'; package main;";
}
}
# ============================================================================
# SendLoginResponse - Send an HTTP response telling the browser to prompt for
# the users name and password.
# Input: client - A client connection (HTTP::Daemon::ClientConn)
# Output: None
sub SendLoginResponse
{
# Parameters
my $client=shift; # Client connection (HTTP::Daemon::ClientConn)
# Local Variables
my $headers; # A header for our response (HTTP::Headers)
my $response; # Our response to return (HTTP::Response)
# Construct a header to send back with our response. The key to this is
# the proxy-authenticate line. This tells the browser to prompt the
# user for their username and password. This will be sent along with the
# resent request.
$headers=new HTTP::Headers
"Server"=>"Ravel/0.15",
"Content-Type"=>"text/html",
"Proxy-Authenticate"=>"BASIC realm=\"Ravel\"";
# Now we build a response out of that, saying the request is unauthorized.
$response=new HTTP::Response
RC_PROXY_AUTHENTICATION_REQUIRED,
"Proxy Authentication Required",
$headers;
# Now transmit our response to them.
$client->send_response($response);
}
# ============================================================================
# UserLogin - Read the client request and extract the user login information
# from it. Then pass that to the user account server to try to
# get the information for that account, if possible.
# Input: client - A client connection (HTTP::Daemon::ClientConn)
# auth - The encoded authorization string (HTTP::Request)
# Output: User ID of the user at the client end (String)
# or undef if they still need to be authenticated or could not.
sub UserLogin
{
# Parameters
my $client=shift; # Client connection (HTTP::Daemon::ClientConn)
my $auth=shift; # The authorization string (String)
# Local variables
my $request_headers; # Headers from the request (HTTP::Headers)
my $auth_len; # String length for decoding (String)
my $user_id; # The extracted user name (String)
my $password; # The extracted password (String)
my $login_request; # Request from the client (HTTP::Request)
my $login_response; # Response from the server (HTTP::Server)
my $info; # User information (String)
# Strip the Basic part out and decode the authorization information.
# The authorization info is in the form of the user id and password
# joined by a semicolon and encoded in Base64 format. The code for
# decoding the Base64 is adapted from perlfaq9. Then we extract the
# user name and password from this.
$auth=~s/Basic //;
$auth=~tr@A-Za-z0-9+/@@cd;
$auth=~tr@A-Za-z0-9+/@ -_@;
$auth_len=pack("c",32+0.75*length($auth));
$auth=unpack("u",$auth_len.$auth);
($user_id,$password)=split(/:/,$auth);
# URL encode the string so we can send it and the account server can decode
# it properly.
$user_id=URI::Escape::uri_escape($user_id);
$password=URI::Escape::uri_escape($password);
# Construct a request to the account server. Pack the user name and
# password into a CGI style GET request.
$login_request=new HTTP::Request("GET",$account_server."?UserID=".$user_id."&Password=".$password);
# Now fetch the information from the account server and put it into a
# string to work with.
$login_response=$user_agent->request($login_request);
$info=$login_response->content;
# If it was a bad login try, then the server will respond with an empty
# page. If we get that, or fail to contact the server, then we need to
# return an undef to indicate that the login failed. This will prompt
# the proxy authorization response to be sent again.
if (($info eq "")||$login_response->is_error)
{
return undef;
}
# Otherwise, the login is all right. We stuff their account info into the
# user information hash and then return their user id.
$user_info{$user_id}=$info;
return $user_id;
}
# ============================================================================
# GetUser - Look to see if there is a current entry in the hash of authoriz.
# strings for a given connection. If there is not, then we
# bounce back a form asking for the user to login. We basically
# react depending on the state.
# Input: client - A client connection (HTTP::Daemon::ClientConn)
# client_request - Request from the client (HTTP::Request)
# Output: User ID of the user at the client end (String)
# or undef if we're still doing authentication stuff.
sub GetUser
{
# Parameters
my $client=shift; # Client connection (HTTP::Daemon::ClientConn)
my $client_request=shift; # Request from the client (HTTP::Request)
# Local Variables
my $request_headers; # Headers from the request (HTTP::Headers)
my $auth; # The authorization string (String)
my $status; # Status of the client (string)
my $last_time; # Time of last activity from client (integer)
my $user_id; # User id
# Extract the header from the client request. We need this to get the
# authorization string. We'll also need it to remove the proxy
# authorization string later.
$request_headers=$client_request->headers;
# Now get the user authentication string.
$auth=$request_headers->header("Proxy-Authorization");
# If there is no authentication string, then we reject the try and send
# a response requesting for them to log in.
if (!$auth)
{
SendLoginResponse $client;
return undef;
}
# Put the connection status info into an easier variable.
$status=$connection_data{$auth}{"status"};
# If they are in not logged in, but sent the authentication string, then
# verify their authorization information against the account database. If
# they fail, then resend the log in response. Otherwise, we record their
# information and consider them to be logged in.
if ($status ne "logged in")
{
$user_id=UserLogin($client,$auth);
if ($user_id)
{
$connection_data{$auth}{"time"}=time;
$connection_data{$auth}{"user"}=$user_id;
$connection_data{$auth}{"status"}="logged in";
}
else
{
SendLoginResponse $client;
return undef;
}
}
# Look up the last access time. If there hasn't been a connection from
# that address before or the last activity was longer than the time out
# time then tell them that they will have to login in again. We also
# give them the logging in status so when they click the button, we catch
# their data.
$last_time=$connection_data{$auth}{"time"};
if ((time-$last_time)>$time_out)
{
SendLoginResponse $client;
delete $connection_data{$auth}{"status"};
return undef;
}
# If we made it here, they are fine and we can go ahead an process their
# request. One thing we need to do first is to remove the
# Proxy-Authorization header so that is doesn't get passed downstream.
# According to the HTTP spec, that is a no-no.
$request_headers->remove_header("Proxy-Authorization");
# Otherwise, we can update their last access time entry and return the
# user ID associated with that connection.
$connection_data{$auth}{"time"}=time;
return $connection_data{$auth}{"user"};
}
# ============================================================================
# RegisterProxyURL - Given a string containing a pathname and a reference to
# a procedure, this will associate the procedure with the
# URL in a hash. For each page request, this hash will
# be checked and if an entry is found, the system will
# call the procedure.
# Input: path - The path part of the URL (String)
# procedure - Reference to the procedure to invoke (Reference)
# Output: None
sub RegisterProxyURL
{
# Parameters
my $path=shift; # The path part of the URL (String)
my $procedure=shift; # Reference to the procedure (Reference)
# Local Variables
my $virtual_url; # The complete virtual URL (String)
# Join the path to the virtual host name to get the full URL string and
# make this lower case so that we get case insensitivity.
$virtual_url=lc($virtual_host.$path);
# Fill the entry in the dispatch table with our reference to the
# procedure to call when this is invoked.
$dispatch_url{$virtual_url}=$procedure;
}
# ============================================================================
# DispatchProxyURL - For a client request, this looks in the proxy URL
# dispatch table to see if there is an entry. If there
# is an entry, it invokes the referenced procedure,
# passing the parameters to it and offering it a chance
# to handle the request by responding.
# Input: user - The ID of the user for this request (String)
# client - A client connection (HTTP::Daemon::ClientConn)
# client_request - Request from the client (HTTP::Request)
# Output: True if we've short circuited normal processing and handle the
# request at this level. Otherwise, a false value means to go ahead
# and handle the URL as normal.
sub DispatchProxyURL
{
# Parameters
my $user=shift; # ID of the user for this request (String)
my $client=shift; # Client connection (HTTP::Daemon::ClientConn)
my $client_request=shift; # Request from the client (HTTP::Request)
# Local Variables
my $url;
my $procedure;
# Get the URL from the request. We also make it lowercase so that the
# dispatcher is insensitive to the case of the URLs.
$url=lc $client_request->url;
# If there is a question mark and junk after it, then strip it off along
# with everything after it. This makes it work properly even if CGI
# parameters are included with the URL.
$url=~s/\?.*//;
# Look in the table. Is there an entry for this URL? If so, call the
# procedure referenced by the table entry.
$procedure=$dispatch_url{$url};
if ($procedure)
{
&$procedure($user,$user_info{$user},$client,$client_request);
return 1;
}
# Otherwise, processing goes on as normal.
return undef;
}
# ============================================================================
# Logout - This is called with through the virtual URL mechanism to log the
# user out of the system. Really what it just does is to forget the
# users entries in the user information hashes.
# Input: user - The ID of the user for this request (String)
# user_info - A string with information on the user (String)
# client - A client connection (HTTP::Daemon::ClientConn)
# client_request - Request from the client (HTTP::Request)
sub Logout
{
# Parameters
my $user=shift; # ID of the user for this request (String)
my $user_info=shift; # A string with info. on the user (String)
my $client=shift; # Client connection (HTTP::Daemon::ClientConn)
my $client_request=shift; # Request from the client (HTTP::Request)
# Local Variables
my $auth; # Authorization for connection hash (String)
my $page; # A small page to return on logout (String)
my $response; # Our response to return (HTTP::Response)
# Extract the authorization string from the user info. The connection data
# is indexed by this, so we need to know it to delete those entries.
$user_info=~/(.*?)<\/AUTH>/;
$auth=$1;
# Delete the entry from the user information hash
delete $user_info{$user};
# Now delete the connection data
delete $connection_data{$auth}{"time"};
delete $connection_data{$auth}{"user"};
delete $connection_data{$auth}{"status"};
# Send a page to say "Bye-bye!"
$client->send_file_response($logout_page);
}
# ============================================================================
# PluginList - This extract the list of plugins from the user info and then
# applies the exclusion rules associated with the page to the
# list to remove all of the plugins that should be excluded.
# The returned list is the list of plugins that are approved
# to be run on this page by the user.
# Input: user - The ID of the user for this request (String)
# client_request - Request from the client (HTTP::Request)
# server_response - The response from the server (HTTP::Response)
# Output: None
sub PluginList
{
# Parameters
my $user=shift; # ID of the user for this request (String)
my $client_request=shift; # Request from the client (HTTP::Request)
my $server_response=shift; # The response from the server (HTTP::Response)
# Local Variables
my $headers; # Header from the response (HTTP::Headers)
my $content_type; # The content-type from the header (String)
my $html; # The HTML contents of the page (String)
my $url; # The URL of the request (URI::URL)
my $rules_request; # A request for the rules file (HTTP::Request)
my $rules_response; # Rules returned by the server (HTTP::Response)
my @rule_list; # List of rules controlling plugins (Array)
my $rule; # One of the rules in the list (String)
my @plugin_list; # List of plugins in order to apply (Array)
my $plugin; # One of the plugins in the list (String)
my @group_list; # List of groups this user is in (Array)
my $group; # One of the groups in the list
my $path_name; # The directory path from the URL (String)
my $state; # The state to set a plugin to (String)
my $pattern; # A rules pattern to try to match (String)
my $line; # A line to try to match by a rule (String)
my %plugin_state; # On/off state for each plugin (Hash)
# Is this an HTML page? If so, we scan through the HTML on the page and
# look for meta tags that correspond to Ravel rules. Then we extract the
# rule and push it onto an array.
$headers=$server_response->headers;
$content_type=$headers->header("Content-Type");
if ($content_type eq "text/html")
{
$html=$server_response->content;
while($html=~/<\s*meta\s+name\s*=\s*"?Ravel"?\s+content\s*=\s*"?(.*?)"?\s*>/gi)
{
push(@rule_list,$1);
}
}
# Now try to get the rules from a ravel.txt file in the root directory on
# the server. If there is such a file, we add each line to the list of
# rules.
$url=$client_request->url;
# FIXME: QUIT BANGING FOR NONEXISTENT FILES ON THE SERVERS WHILE WE TEST
# $rules_request=new HTTP::Request("GET","http://".$url->host."/ravel.txt");
# $rules_response=$user_agent->request($rules_request);
# if ($rules_response->is_success)
# {
# $html=$rules_response->content;
# push(@rule_list,split(/\n/,$html));
# }
# Extract the list of plugins from the user info.
$user_info{$user}=~/(.*?)<\/PLUGINLIST>/;
@plugin_list=split(/,/,$1);
# Extract the list of groups the user is in.
$user_info{$user}=~/(.*?)<\/GROUPLIST>/;
@group_list=split(/,/,$1);
# Extract the directory and file path from the request URL.
$path_name=$url->path;
# Now for each of the rules, we apply extract the state that should be set
# and the pattern to match to put a plugin into that state. Then we
# construct lines describing each possible group and plugin and try to see
# if we have a match. If we do, then for the time being, we set the plugin
# to whatever state was described for the time being
foreach $rule (@rule_list)
{
$rule=~/(.*?):\s*(.*)/;
$state=$1;
$pattern=$2;
foreach $plugin (@plugin_list)
{
foreach $group (@group_list)
{
$line="$user-$group-$plugin-$path_name";
if ($line=~/$pattern/i)
{
$plugin_state{$plugin}=$state;
last;
}
}
}
}
# Finally, we weed out all of the plugins in the list with entries in the
# state hash that say "disallow". We finish by returning this culled list
# of plugins to use.
@plugin_list=grep $plugin_state{$_}ne"disallow", @plugin_list;
return @plugin_list;
}
# ============================================================================
# ApplyPlugins - For each client request, this is called with the name of the
# user, the client's request and the server's response. It is
# free to do with them as it wishes. Namely, it can invoke
# methods on the response to change the content before the
# server returns them.
# Input: user - The ID of the user for this request (String)
# client_request - Request from the client (HTTP::Request)
# server_response - The response from the server (HTTP::Response)
# Output: None
sub ApplyPlugins
{
# Parameters
my $user=shift; # ID of the user for this request (String)
my $client_request=shift; # Request from the client (HTTP::Request)
my $server_response=shift; # The response from the server (HTTP::Response)
# Local Variables
my @plugin_list; # The plugins to apply in list format.
my $plugin; # The current plugin to work on
# Grab the list of plugins to apply. The list is returned in the order
# to apply them and with all of the rules in the HTML and from the server
# about what plugins to allow taken into account.
@plugin_list=PluginList($user,$client_request,$server_response);
# Loop for each of the plugins in the list and in order and execute
# the plugin on the data. We pass the ID of the user, the client's
# request, and the server's response. The plugin can invoke any
# methods that it likes to alter the server response. This object will
# get returned to the client.
foreach $plugin (@plugin_list)
{
eval $plugin.'::Execute($user,$user_info{$user},$client_request,$server_response)';
}
}
# ============================================================================
# RavelRequest - This is the heart of the ProcessClient routine. It's been
# hoisted out for the benefit of plugin writers who wish to
# to request extra pages and have them processed by the Ravel
# plugins.
# Input: user - The ID of the user for this request (String)
# request - Request for a resource (HTTP::Request)
# Output: The processed response from the server (HTTP::Response)
sub RavelRequest
{
# Parameters
my $user=shift; # ID of the user for this request (String)
my $request=shift; # Request to fetch (HTTP::Request)
# Local Variables
my $response; # The response from the server (HTTP::Response)
# Pass the request to the server to try to get a response. We use the
# simple_request method instead of the request method, because we the
# request method automatically intercepts redirect responses and
# requests the page the redirect points to. The browser never catches
# the redirect and so its expansion of relative URLs gets screwed up.
# So we use the simple_request method because it leaves handling of
# the redirects to the browser. Lesson learned.
$response=$user_agent->simple_request($request);
# Now run it through the plugins.
ApplyPlugins($user,$request,$response);
# Finally, return the response after the plugins
return $response;
}
# ============================================================================
# ProcessClient - Given a client, this will do all of the things nescessary
# to handle it. One of the key things that it will do is to
# fork of a child process to handle it.
# Input: client - A client connection (HTTP::Daemon::ClientConn)
# Output: None
sub ProcessClient
{
# Parameters
my $client=shift; # Client connection (HTTP::Daemon::ClientConn)
# Local Variables
my $pid; # Process ID from fork call (Integer)
my $client_request; # Request from the client (HTTP::Request)
my $server_response; # The response from the server (HTTP::Response)
my $user; # The ID of the user (String)
# Get the request from the client. We need it to pass to the GetUser
# routine, and to check to see if it is a local registered page that we
# should short circuit to.
$client_request=$client->get_request;
# Get the user ID associated with this client. If we don't get one back
# then close the connection and return; they've been served by the login
# routines.
if (!($user=GetUser($client,$client_request)))
{
close $client;
return;
}
# Check to see if the request is to a virtual URL that we have
# registered. If so, the handler should have returned a response to that
# effect, so we should stop processing here.
if (DispatchProxyURL($user,$client,$client_request))
{
close $client;
return;
}
# Fork off a child process to handle it. The parent will return to the
# main loop to recieve more request.
$pid=fork;
if ($pid!=0)
{
close $client;
return;
}
# Run the clients request through Ravel now.
$server_response=RavelRequest($user,$client_request);
# Pass the response back to the client.
$client->send_response($server_response);
# We're done, so close the connection with the client and exit.
close $client;
exit(0);
}
# ============================================================================
# MainLoop - This is the main server loop. It sets itself up to listen on the
# given port for client connections and calls ProcessRequest to
# deal with each one of them.
# Input: None
# Output: None
sub MainLoop
{
# Local Variables
my $server_daemon; # Server daemon object (HTTP::Daemon)
my $client; # Client connection (HTTP::Daemon::ClientConn)
# Print out status information.
print "Starting up server...\n";
# Register the logout virtual URL, so that the user has a way to log out
# from the system by going to the right page.
RegisterProxyURL("logout",\&Logout);
# When a child exits, it sends the CHLD signal to the parent process.
# But we need to properly acknowledge this in order for the child to be
# able to exit properly. UNIX is just weird about this. So we install
# our signal handler and this theoretically prevents zombie child
# processes, which is a Good Thing, considering the resource consumption
# of Perl and the limited process table. We could use an anonymous
# subroutine, but if this is defensive against SysV UNIXes.
$SIG{CHLD} = \&Reaper;
# Set up a user agent for the child processes to use to fetch the pages
# from the servers. This way it will be ready for the child processes
# to use when they need it.
$user_agent=new LWP::UserAgent;
$user_agent->agent("Ravel/0.15");
# If the user wants this to chain through another proxy, then we will
# set the user agent to chain through this other proxy.
$user_agent->proxy("HTTP",$proxy_addr) if $proxy_addr;
# Create a daemon object to list for incoming client request. We'll set
# it to listen on the given port.
$server_daemon=new HTTP::Daemon
LocalAddr=>$server_addr,
LocalPort=>$server_port;
# Tell the user that we are successfully running.
print "Server active at ".$server_daemon->url."\n";
# Here is the main loop. We block on accept and wait for client
# connections. For each one, we call ProcessRequest to handle it.
while($client=$server_daemon->accept)
{
ProcessClient($client);
}
}
# ============================================================================
# Main Sequence
DisplayBanner; # Display our banner to the user.
GetConfig; # Get the configuration information.
InitDaemonMode if $daemon_mode; # Go into the background daemon style if desired.
LoadPlugins; # Load up all of the plugins
MainLoop; # Execute the server loop.