#!/usr/local/bin/perl ##++ ## CGI Lint v1.0 ## Last modified: November 25, 1995 ## ## Copyright (c) 1995, 1996 ## Shishir Gundavaram and O'Reilly & Associates ## All Rights Reserved ## ## E-Mail: shishir@ora.com ## ## Permission to use, copy, modify and distribute is hereby granted, ## providing that no charges are involved and the above copyright ## notice and this permission appear in all copies and in supporting ## documentation. Requests for other distribution rights, including ## incorporation in commercial products, such as books, magazine ## articles, or CD-ROMS should be made to the authors. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ##-- ############################################################################ #++ # Here is how you should run this script: # # usage: CGI_Lint configuration_file [data_file] # # - The configuration file should contain a list of environment variables # and their values, such as: # # REQUEST_METHOD = GET # QUERY_STRING = name=John Surge&company=ABC Corporation! # HTTP_ACCEPT = image/gif, image/x-xbitmap, image/jpeg, */* # SERVER_PROTOCOL = HTTP/1.0 # REMOTE_ADDR = 198.198.198.198 # DOCUMENT_ROOT = /usr/local/bin/httpd_1.4.2/public # GATEWAY_INTERFACE = CGI/1.1 # REQUEST_METHOD = GET # SCRIPT_NAME = /cgi-bin/abc.pl # SERVER_SOFTWARE = NCSA/1.4.2 # REMOTE_HOST = gateway.cgi.com # # * The most important field here is SCRIPT_NAME, which should point to # to the script you want to test. # # * You do not have to encode the information in QUERY_STRING. But, you # you do have to *escape* the "%", "&", or the "=" characters. # # - The data file (optional) should contain a list of the fields (and # values) of your form. Here is an example of a data file for multipart # message (you need to set CONTENT_TYPE = multipart/form-data). # # name = Joe = Joseph # company = JP & Play # percentage = 50% # review = */usr/shishir/rev.dat # # * Any time you have a file field, you need to specify a valid # filename (with an asterisk before it). # # For non multipart data (i.e application/x-www-form-urlencoded), you # can specify the data in the same format as above (except for the fact that # you need to escape certain characters). #-- $| = 1; $cat = "/usr/bin/cat -u"; #++ # Buffering is turned off, so that messages and data are displayed in the # correct order in which they were written. The cat UNIX utility is used by # CGI Lint to send a data file to the CGI scripts. The -u switch forces cat # to display the file in unbuffered mode. In other words, each line is output # as soon as it is read. #-- ($config_file, $data_file) = @ARGV; #++ # The two command line parameters are read from the ARGV array, and are # stored in config_file and data_file variables. #-- &load_errors (); #++ # The laod_errors subroutine sets the ERRORS associative array with all of # error messages used by this application. #-- if ($config_file) { &parse_config_file ($config_file); #++ # If the user passed a configuration file (containing a list of the # environment variables) to this tool, the following block of code is # executed. First, the parse_config_file subroutine goes through the # configuration file and sets up the environment variables in the ENV # associative array. Remember, these new environment variables are # available for all processes that this script creates (or executes), but # disappear when this process, itself, terminates. #-- &parse_NCSA_srm_file (); #++ # The parse_NCSA_srm file reads through the NCSA server resource map # configuration file looking for any of the following lines: # # ScriptAlias /my-cgi-apps/ /usr/local/bin/httpd_1.4.2/cgi-bin/ # AddType application/x-httpd-cgi .cgi .pl # # The main reason for doing this is to make sure that the script referred # to in the environment variable SCRIPT_NAME is a recognizable (or valid) # CGI script, as far as the server is concerned. Or else, an error message # is displayed. #-- &check_script_path (); #++ # The full path to the script is determined by looking at the value stored # in the environment variable DOCUMENT_ROOT (or SERVER_ROOT is the server # is not installed normally). We will look at this in more detail when # we analyze the subroutine. #-- &check_script_permissions (); #++ # CGI Lint makes sure that the specified script is executable. In order to # truly simulate a server, this tool has to be installed as a setuid # script. In other words, this tool has to run with the same user # identification as that of the server. See the book for a "wrapper" # program. #-- &check_script_header (); #++ # The CGI script is checked for the #! header line at the top of the # script. This subroutine also ensures that the specified interpreter # exists, and is executable. #-- if ($ENV{'INTERPRETER'} =~ /perl/i) { &check_perl_syntax (); &check_perl_code (); } #++ # The environment variable INTERPRETER is set to the name and path of the # interpreter (by the check_script_header subroutine) that will execute the # CGI script. If it is as a Perl script, two subroutines are called. First, # the check_perl_syntax looks for syntax errors in the script, and second, # check_perl_code searches the script for potential performance and # security problems dealing with the eval, open, and system commands. #-- &run_script ($data_file); #++ # Finally, the run_script subroutine is called to execute the script. There # is a lot of data processing that goes on before the CGI script is # actually executed. #-- } else { &terminate ("Usage"); } exit (0); #++ # If the configuration file is not passed to this tool, the terminate # subroutine is called with an argument of "Usage", which displays the error # message of the same name, as stored in the ERRORS associative array, and # exits. Now, let us look at the various subroutines that provide the core of # functionality. #-- #++ # S U B R O U T I N E S #-- sub parse_config_file { local ($file) = @_; open (CONFIG, "<" . $file) || &terminate ("Config_File"); while () { if (/^\s*(\w+)\s*=\s*(.*)\s*$/) { $ENV{$1} = join ("", "__", $2); } } close (CONFIG); #++ # The configuration file is opened in read mode. If it cannot be opened, # the "Config_Error" message is displayed. The while loop iterates # through the file, looking for lines that start with the following format: # # key1 = value1 # key2 = value2 # key3 = value3 # # The regular expression ignores leading and trailing whitespace, so you # can align all of the key/value pairs (environment variables) for better # readibility. The data is stored in the environment variable array ENV. # Notice how each value is preceded with the "__" characters. Later on in # this script, this is used to remove all of the other UNIX environment # variables to provide a realistic simulation of the server. #-- unless ($ENV{'SERVER_ROOT'}) { ($ENV{'SERVER_ROOT'}) = $ENV{'DOCUMENT_ROOT'} =~ m|^(.*)/|; } #++ # The subroutine checks for the SERVER_ROOT variable in the configuration # file, which should point to the server root directory, in case your # server setup is somehow different from the regular (and usual) # configuration. However, if your server setup matches the one defined by # NCSA, you only need to define the DOCUMENT_ROOT variable. #-- $ENV{'SERVER_ROOT'} =~ s/^__//; } #++ # The SERVER_ROOT variable is not defined by any of the servers today, but is # used by this script exclusively for the purpose of determining the server # root directory. It is not available to the CGI script (the leading "__" is # removed from the variable. #-- sub parse_NCSA_srm_file { local ($srm_file, $alias, $real); $srm_file = join ("/", $ENV{'SERVER_ROOT'}, "conf", "srm.conf"); open (SRM, "<" . $srm_file) || &terminate ("Srm_File"); while () { if ( ($alias, $real) = /^\s*ScriptAlias\s*(\S+)\s*(\S+)\s*$/) { $ENV{$alias} = $real; #++ # This subroutine iterates through the server resource map configuration # file, searching for lines that begin with either ScriptAlias or AddType. # If the line contains the ScriptAlias directive, the associative array # element is created, with the key being the alias (i.e "/cgi-bin") and the # value being the true directory (i.e "/usr/local/httpd_1.4.2/cgi-bin"). # This makes it very easy to check to see if a script resides in this # directory. #-- } elsif (/^s*AddType\s*application\/x-httpd-cgi\s*(.*)\s*$/) { ($ENV{'CGI_TYPES'} = $1) =~ s/\s+/\|/g; } } close (SRM); } #++ # All of the file extensions listed on the AddType directive are converted # into a regular expression (by removing the spaces, and adding the # alternation character, "|") and are stored in the the CGI_TYPES environment # variable. Notice how the values of both of these variables do not contain # "__" which indicates that they will not be available for the CGI script. #-- sub check_script_path { local ($script_name, $cgi_path, $cgi_script, $file_type); ($script_name = $ENV{'SCRIPT_NAME'}) =~ s/^__//; ($cgi_path, $cgi_script) = $script_name =~ m|^(.*/)(.*)$|; $ENV{'NPH_SCRIPT'} = ($cgi_script =~ /^nph-/) ? 1 : 0; #++ # This subroutine performs a couple of functions. First, it determines # whether the CGI script is a Non Parse Header script, by searching to see # if the script starts with the string "nph-". #-- if (defined ($ENV{$cgi_path})) { $ENV{'SCRIPT_PATH'} = join ("", $ENV{$cgi_path}, $cgi_script); &terminate ("Bad_Script") unless (-e $ENV{'SCRIPT_PATH'}); #++ # The previous subroutine created elements (or variables) in the ENV # associative array for each ScriptAlias directory, like this: # # $ENV{'/cgi-bin/'} = "/usr/local/httpd_1.4.2/cgi-bin" # # This conditional determines the path for the cgi script, and checks to # see if there is a key by the same name. If it is successful, it # indicates that the script is located in a valid directory. The # environment variable SCRIPT_PATH refers to the full path (not relative) # to the script Then, the subroutine makes sure the file exists, or an # error message is output. #-- } elsif ( ($file_type) = $cgi_script =~ /(\.\w+)$/ ) { if ($file_type =~ /$ENV{'CGI_TYPES'}\b/) { $cgi_path = "/" unless ($cgi_path); $ENV{'SCRIPT_PATH'} = join