#!/usr/local/bin/perl # # This is a CGI script to serve HTTPFS Requests # # The HTTPFS requests come from a remote HTTPFS client, which can be # an HTTPFS client framework built into an application, an adapter between # HTTPFS and Midnight Commander's (MC) MCFS, or any other HTTP user agent # (including a web browser or Wget). The purpose of this script is # to list directories on the present computer, send out files, and # accept data for new or existing files. # This script acts effectively as an "NFS daemon" which "exports", well, # the entire file system of this hosts (or a part of it). # # This script is supposed to be called in response to a HEAD, GET, PUT, or # DELETE action. Upon receiving this kind of request, an HHTP server parses # it and calls the present script, passing request headers as env variables, # and the PUT message as this script's standard input. # # Thus this script should receive the following environment # # PATH=/sbin:/usr/sbin:/usr/bin # REMOTE_HOST=10.1.1.1 # HTTP_HOST=10.1.10.1:80 # GATEWAY_INTERFACE=CGI/1.1 # SERVER_SOFTWARE=Netscape-Communications/1.12 # SERVER_URL=http://day-1 # REQUEST_METHOD=PUT # SERVER_NAME=day-1 # HTTP_USER_AGENT=VFS-client/1.1 # SCRIPT_NAME=/cgi-bin/admin/MCHFS-server.pl # SERVER_PORT=80 # SERVER_PROTOCOL=HTTP/1.0 # HTTP_PRAGMA=httpfs="lstat" # REMOTE_ADDR=10.1.1.1 # TZ=GMT0 # PATH_INFO=/aaa/bbb/ccc/ # PATH_TRANSLATED=/w/data/aaa/bbb/ccc/ # # Of interest (importance) to us are REQUEST_METHOD. HTTP_USER_AGENT tells # the name and the version of the agent, just for reference. # PATH_INFO tells the path to the file/directory of interest to the client; # PATH_TRANSLATED is the same path with the server's DocumentRoot # prepended. HTTP_PRAGMA may contain an httpfs command that specializes # the request (for example, to distinguish a 'stat' from a 'lstat' request). # When the request method is PUT, CONTENT_TYPE and CONTENT_LENGTH env # variables _must_ be present, to tell the message size and data format. # # A file/directory path a HTTPFS client specifies is normally a relative path # from this HTTP server's DocumentRoot. For example, if an HTTP server was # configured with DocumentRoot=/w/data, and a client makes a request # GET /cgi-bin/admin/MCHFS-server.pl/images/foo # then the client actually refers to a file /w/data/images/foo. # However, the HTTPFS client may specify a file by its _absolute_ path # on this server: # GET /cgi-bin/admin/MCHFS-server.pl/DeepestRoot/etc/passwd # really refers to a '/etc/passwd' # # # This MCHFS-server.pl script operates in one of the several modes: # # Inquiry: request method HEAD # # 1.1) Inquire of a resource (file or directory) status: # HEAD /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/dirn/file # Pragma: httpfs="stat" # The script returns the status of a specified file or directory in a # ETag: response header # ETag: '"' status-info '"' # where status-info is a string of 11 numbers separated by a single # space: dev ino mode nlink uid gid size atime mtime ctime blocks # where # dev device number of filesystem # ino inode number # mode file mode (type and permissions) # nlink number of (hard) links to the file # uid numeric user ID of file's owner # gid numeric group ID of file's owner # size total size of file, in bytes # atime last access time since the epoch # mtime last modify time since the epoch # ctime inode change time (NOT creation time!) since the epoch # blocks actual number of blocks allocated # all numbers but 'mode' are decimal numbers; mode is an octal number. # see struct stat and 'stat' entry in 'man perlfunc' # # Note the status-info is a "hard validator" - a unique # representation of a resource, file or directory. Indeed, should # a file gets modified, at least its modification timestamp will # change. That's why status-info is being delivered in a ETag, a field # which is designated by the HTTP standard to carry resource identifiers. # # 1.2) Inquire of a 'raw' status of a resource (file or directory) : # HEAD /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/dirn/file # Pragma: httpfs="lstat" # or Pragma omitted # See 1) above; the only difference that a lstat() of a file # is obtained. # # Requesting a resource: GET method # Most of the actions below heed a 'HTTP_IF_MODIFIED_SINCE' request header. # If the header is present and the requested resource was not modified since # the specified time, this script replies '304 Not Modified' and sends # no content. # # 2.1) Listing of a directory: # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/dirn/ # The script should return the contents of a directory # $DocumentRoot/dir1/dir2/dirn # or /dir2/dirn (if dir1 is "DeepestRoot") # The directory in question must exist and be accessible by this script. # The server returns the listing of the directory, a text/plain entity: # for each directory entry (including . and ..) the server writes a line # name/status-info # see above for status-info # The server also sets the Last-Modified: response header to the # modification timestamp of the directory. # # 2.2) Delivery of a file: # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # The script should send the contents of a file # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # If the file exists and is readable by this server, it sends the file # content as it is, tagged with an "application/octet-stream" MIME type. # # 2.3) Reading the content of a symbolic link: # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # Pragma: httpfs="readlink" # The script should send the content (a resolved target name) of a symbolic link # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # If the file is indeed a symbolic link and can be successfully resolved # by this server, it sends the resolved name as the reply content (terminated # with \n), tagged with a "text/plain" MIME type. # # 2.3) "Opening" of a file and sending out the existing content # (if necessary) # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # Pragma: httpfs="preopen-xxxx" # where four characters xxxx in the preopen-xxxx pragma above have the following # meaning: # 1. char: 'R' if the file is being open for reading, '-' otherwise # 2. char: 'W' if the file is being open for writing, # 'A' if the file is being open for appending, # '-' otherwise # 3. char: 'C' if the file is being open with an O_CREAT flag, # 'X' if the file is being open with both O_CREAT and O_EXCL flag, # '-' otherwise # 4. char: 'T' if the file is being open with an O_TRUNC flag, # '-' otherwise # This script checks a file # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # and: # - the file does not exist and O_CREAT is not set: return an error # "404 File not found" # - the file does not exist and O_CREAT is set: create a file with a given # name with a zero size. Return "201 Created" if successful, # "403 Forbidden" if the creation fails. The file is created under the # effective GID and UID of this script, with permissions as granted by # the current umask. # - if the file exists and both O_CREAT and O_EXCL are set, return an # error "403 File already exists" # - if the file exists and O_TRUNC is set, the file is truncated to zero # length. Return "201 Truncated" if successful, or "403 Forbidden" # if the truncation fails. # - If the file is being open for writing or appending, check the permission # of the file. If the file does not permit modifications, send an error # "403 Forbidden" # - otherwise, send the contents of the file, if it wasn't modified # after a specified time, just as in option 2.2 above. # # Modifying/creating a resource: PUT method # Altering contents of a file, creating a new file: # PUT /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # The script should write the submitted data to a file # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # The file is created if needed. The script must have permissions to # write into the file (or create it). The client was supposed to # send the data for the file as a part of its request. The data should # be tagged with an "application/octet-stream" MIME type. This HTTPFS server # responds in "201 Created" HTTP code, or in one of the error codes. # # Deleting a resource: DELETE method # # Security consideration: # This script obviously opens up the file system of the host computer to # the entire world. If the HTTP daemon runs as a root, then any remote # HTTPFS client can do absolutely anything with this host's files. Clearly this # is not often desirable. Therefore, one may limit access to this script # (via .htaccess or conf/access.conf) to trusted hosts/users, and/or demand # authentication (in a regular HTTP way, by a restrict directive in a # .htaccess file). Note that the access restrictions above are the # responsibility of the httpd; this script doesn't need to do anything about # them (and doesn't even need to be aware of them). # # In addition, this HTTPFS server script may implement its own access # control. For example, it may refuse PUT requests. This effectively makes # exported file systems read-only. The MCHFS-server.pl script may allow # modification/listing of only certain files (based on whatever criteria). # The script may disallow DeepestRoot and ".." in the file paths, thus # restricting user access only to a specific part of the file system tree. # The possibilities are endless. # # This script can also be optimized to take advantage of HTTP/1.1 features: # it may consider conditional range requests (IF_RANGE) and # send out only requested parts of a file (rather than the whole file), # taking advantage of a RANGE: HTTP header. # # # $Id: MCHFS-server.pl,v 3.3 1999/05/21 17:57:04 oleg Exp oleg $ # Tell the client off (with a status in $1 and reason in string $2) # Status may be omitted sub bail_out { my $status = @_ > 1 ? shift : "400 Bad Request"; print "Status: $status\n"; print "Content-type: text/html\n\n"; # Two \n\n terminate the headers print "VFS Server Error\n"; print "

VFS Server Error

\n"; print "This server encountered an error:

$_[0] \n"; exit 4 } # Put file $1 sub put_file { my $file_path = shift; my $to_read = $ENV{CONTENT_LENGTH} || bail_out "411 Length required", "CONTENT_LENGTH is missing"; $ENV{CONTENT_TYPE} eq "application/octet-stream" || bail_out "PUT message must be of a type application/octet-stream"; open(PUT_FILE,">$file_path") || bail_out "403 Forbidden", "$file_path is not writable: $!"; my $chunk_size = 1024; my $chunk; binmode STDIN; binmode PUT_FILE; while ( $to_read > 0 && ($read_res = sysread STDIN,$chunk, ($chunk_size < $to_read ? $chunk_size : $to_read))) { syswrite PUT_FILE,$chunk,$read_res || bail_out "$file_path write error $!"; $to_read -= $read_res; } $to_read == 0 || bail_out "500 Read Error", "Failed to read the input completely"; close PUT_FILE; my @stat_res = lstat($file_path); @stat_res || bail_out "404 Not found or accessible", "$file_path not found or unreadable: $!"; print "Status: 201 Created $file_path\n"; print "ETag: \""; print format_stat_info(@stat_res); print "\"\n\n"; exit 0 } # Format the the list returned by a stat or lstat command sub format_stat_info { my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = @_; return sprintf "%d %d %o %d %d %d %d %d %d %d %d", $dev,$ino,$mode,$nlink,$uid,$gid,$size, $atime,$mtime,$ctime,$blocks; } # Inquiry of a file $1 sub inquiry_file { my $file_path = shift; my $sub_command = $ENV{HTTP_PRAGMA}; $sub_command =~ s/httpfs=\"(\w+)\"/$1/; my @stat_res = ($sub_command eq "stat") ? stat($file_path) : ( not $sub_command or $sub_command eq "lstat" ) ? lstat($file_path) : bail_out "Invalid pragma httpfs command $sub_command"; @stat_res || bail_out "404 Not found or accessible", "$file_path not found or unreadable: $!"; print "ETag: \""; print format_stat_info(@stat_res); print "\"\n\n"; exit 0; } # Print a Last-Modified: header corresponding to the epoch timestamp $1 # e.g., Last-modified: Sun, 06 Nov 1994 08:49:37 GMT sub print_last_modif { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(shift); printf "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\n", ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")[$wday], $mday, ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[$mon], 1900+$year, $hour,$min,$sec; } # Resolve the symbolic link given as $1 sub read_link { my $resolved_link = readlink shift; $resolved_link || bail_out "404 Bad symbolic link", "Symbolic link cannot be resolved: $!"; print "Content-type: text/plain\n\n"; print $resolved_link, "\n"; exit 0; } # List a directory $1 sub list_directory { ( chdir shift && opendir(DIR,".") ) || bail_out "404 Not found or accessible", "directory is not found, or unreadable: $!"; print "Content-type: text/plain\n\n"; map { print "$_/"; print format_stat_info( lstat ); print "\n"; } readdir(DIR); closedir DIR; exit 0 } # Send a regular file whose path is in $1 sub send_regular_file { my $file_path = shift; sysopen(GET_FILE,"$file_path",O_RDONLY) || bail_out "404 Not found or accessible", "$file_path not found, or unreadable"; $| = 1; binmode GET_FILE; print "Content-type: application/octet-stream\n\n"; # print "Content-type: text/plain\n\n"; my $chunk_size = 1024; my $chunk; my $read_res; binmode STDOUT; while ( $read_res = sysread GET_FILE,$chunk,$chunk_size ) { syswrite STDOUT,$chunk,$read_res || bail_out "STDOUT write error $!"; } close GET_FILE; exit 0 } # Pre-open a file $1 according to modes in $2, as explained in the # title comments above # Return to continue GET-ting of the file as usual (that is, checking # of a modification timestamp and sending the file content) # Note that the file is already stat(), so we can use _ for all # file tests. sub preopen_file { my $file_path = shift; my $mode_str = shift; $mode_str eq 'R---' and return; my ($mode_r,$mode_w,$mode_c,$mode_t) = unpack("aaaa",$mode_str); if( not -e _ ) { # if the file does not exist $mode_c eq '-' && bail_out "404 File not found", "File does not exist and O_CREAT not set"; $mode_c eq 'C' || $mode_c eq 'X' || bail_out "Invalid mode_c in $mode_str"; open(FILE,">$file_path") or bail_out "403 Forbidden", "Failed to create a non-existent file $file_path due to $!"; close FILE; print "Status: 201 Created\n\n"; exit 0 } # File does exist $mode_c eq 'X' && bail_out "403 File already exists", "File $file_path already exists"; if( $mode_t eq 'T' ) { open(FILE,">$file_path") or bail_out "403 Forbidden", "Truncation of $file_path failed due to $!"; close FILE; print "Status: 201 Truncated\n\n"; exit 0; } $mode_t eq '-' || bail_out "Invalid mode_t in $mode_str"; $mode_w eq '-' || -w _ || bail_out "403 Forbidden", "File $file_path is not writable"; } # Get a file or directory $1 sub get_file { my $file_path = shift; my $sub_command = $ENV{HTTP_PRAGMA}; $sub_command =~ s/httpfs=\"(\S+)\"/$1/; stat($file_path); ($sub_command =~ /preopen-(\S\S\S\S)/) && preopen_file $file_path,$1; my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat(_); $mtime || bail_out "404 Not found or accessible", "$file_path not found or is unreadable: $!"; print_last_modif $mtime; my $modif_since = $ENV{HTTP_IF_MODIFIED_SINCE}; # like Tue, 28 Jul 1998 22:31:49 GMT if( $_ = $modif_since ) { my ($day,$ascii_mon,$year,$hh,$min,$sec) = m/\A\w\w\w, (\d\d) (\w\w\w) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT/; $day || bail_out "wrong date format in $modif_since"; my ($mtime_sec,$mtime_min,$mtime_hour,$mtime_mday,$mtime_mon, $mtime_year,$mtime_wday,$mtime_yday,$mtime_isdst) = gmtime($mtime); my $mon = index "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec",$ascii_mon; $mon >= 0 || bail_out "wrong month name in $modif_since"; $mon = $mon/4; # $mon is zero based #my $ts1=sprintf("19%2d%02d%02d%02d%02d%02d", # $mtime_year,$mtime_mon,$mtime_mday,$mtime_hour,$mtime_min,$mtime_sec); #my $ts2=($year . sprintf("%02d",$mon) . $day . $hh . $min . $sec); # bail_out "modif >$ts1< >$ts2<"; if( sprintf("%4d%02d%02d%02d%02d%02d", ($mtime_year+1900),$mtime_mon,$mtime_mday,$mtime_hour,$mtime_min,$mtime_sec) le ($year . sprintf("%02d",$mon) . $day . $hh . $min . $sec) ) { print "Status: 304 Not Modified\n\n"; exit 0; } } print "ETag: \""; print format_stat_info(stat(_)); print "\"\n"; $sub_command eq "readlink" && read_link($file_path); -d _ && list_directory($file_path); send_regular_file($file_path); bail_out "404 Not found or accessible", "$file_path not found, or unreadable: $!"; exit 0; } # Main module my $req_method = $ENV{REQUEST_METHOD} || bail_out "REQUEST_METHOD is missing: the script isn't called by a HTTP server"; my $file_path = $ENV{PATH_INFO}; # IIS prepends the name of the script to the PATH_INFO # so we remove it if we come across it $file_path =~ s/\A$ENV{SCRIPT_NAME}//; $file_path || bail_out "PATH_INFO is missing: the script isn't called by a HTTP server"; my $Is_IIS = $ENV{SERVER_SOFTWARE} =~ /\AMicrosoft-IIS/; # URL-unquote any special characters $file_path =~ tr/+/ /; $file_path =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $file_path = $file_path =~ /\A\/DeepestRoot/ ? $' : ($Is_IIS ? "C:/InetPub$file_path" : $ENV{PATH_TRANSLATED}); $file_path or ($file_path = "/"); #bail_out "IIS is $Is_IIS"; #bail_out "file $file_path"; #my $env_out=""; while( ($key,$val)=each(%ENV) ) { $env_out .= "$key=$val
" } #bail_out "env $env_out"; $req_method eq "HEAD" && inquiry_file($file_path); $req_method eq "PUT" && put_file($file_path); $req_method eq "GET" && get_file($file_path); #$req_method eq "DELETE" && delete_file($file_path); bail_out "405 Method not allowed", "$req_method method is invalid or not allowed";