#!/usr/local/bin/perl # # This is a CGI script to serve HTTPFS Requests # and to implement a "RFC822 Filesystem" # # This server provides a (read-only) access to a structured document -- # an e-mail message -- as if it were a file system directory. # A particular message can be retrieved as it is, as an unstructured document. # Alternatively, a user may regard the message as a "directory", and access # each header (Date, To, etc) as well as the message body as separate files. # For example, a message # # From jkh@his.com Tue Mar 30 03:01:19 1999 # Received: from his.com (his.com [10.1.1.1]) # by chai.pobox.com (Postfix) with ESMTP for # id 0557C66F7; Mon, 29 Mar 1999 18:47:58 -0500 (EST) # Received: (from jkh@localhost) # by his.com (8.9.3/8.9.3) id PAA42143 # for oleg@pobox.com; Mon, 29 Mar 1999 15:48:00 -0800 (PST) # (envelope-from jkh) # Date: Mon, 29 Mar 1999 15:48:00 -0800 (PST) # From: "Jordan K. Hubbard" # Message-Id: <199903292348.PAA42143@his.com> # To: oleg@pobox.com # Subject: final confirmation for FREENIX track # Content-Length: 1167 # Status: OR # # This message is just a final confirmation that you or one of your # co-conspirators have had a paper accepted at this year's FREENIX track # at the USENIX annual technical conference on June 6th - June 11th. # # can be considered a 'directory' of the following files: # MDA-From, Received, Date, From, Message-Id, To, Subject, # Content-Length, Status, Body # In spirit of RFC822, the file system is case-insensitive. MDA-From # refers to the 'From' line as generated by a Message Delivery Agent # (/usr/bin/mail typically). If a RFC822 header occurs more than once in # a message, the content from all instances of the header is concatenated # together. Although the repeated headers ought to be treated as a # directory of the instances. # The timestamp in the MDA-From line determines the modification and # creation timestamps of the message-as-a-directory, and of all of # its "files". # # This is a proof-of-the-concept implementation, don't be too harsh on it! # # HTTPFS requests come from a remote HTTPFS client, which can be # an HTTPFS client framework built into an application, an adapter between # HTTPFS and Midnight Commander's (MC) MCFS, or any other HTTP user agent # (including a web browser or Wget). This HTTPFS server provides a # read-only access. # # This script is supposed to be called in response to a HEAD, GET, PUT, or # DELETE action. Upon receiving this kind of request, an HHTP server parses # it and calls the present script, passing request headers as env variables, # and the PUT message as this script's standard input. # # Thus this script should receive the following environment # # PATH=/sbin:/usr/sbin:/usr/bin # REMOTE_HOST=10.1.1.1 # HTTP_HOST=10.1.10.1:80 # GATEWAY_INTERFACE=CGI/1.1 # SERVER_SOFTWARE=Netscape-Communications/1.12 # SERVER_URL=http://day-1 # REQUEST_METHOD=HEAD # SERVER_NAME=day-1 # HTTP_USER_AGENT=VFS-client/1.1 # SCRIPT_NAME=/cgi-bin/admin/HFS822-server.pl # SERVER_PORT=80 # SERVER_PROTOCOL=HTTP/1.0 # HTTP_PRAGMA=httpfs="lstat" # REMOTE_ADDR=10.1.1.1 # TZ=GMT0 # PATH_INFO=/home/oleg/message1/Date # PATH_TRANSLATED=/w/data/home/oleg/message1/Date # # Of interest (importance) to us is REQUEST_METHOD. HTTP_USER_AGENT tells # the name and the version of the agent, just for reference. # PATH_INFO tells the path to the file/directory of interest to the client; # PATH_TRANSLATED is the same path with the server's DocumentRoot # prepended. HTTP_PRAGMA may contain an httpfs command that specializes # the request (for example, to distinguish a 'stat' from a 'lstat' request). # When the request method is PUT, CONTENT_TYPE and CONTENT_LENGTH env # variables _must_ be present, to tell the message size and data format. # # # This HFS822-server.pl script operates in one of the several modes: # # Inquiry: request method HEAD # # 1.1) Inquire of a resource (file or directory) status: # HEAD /cgi-bin/admin/HFS822-server.pl/dir1/dir2/dirn/file # Pragma: httpfs="stat" # The script returns the status of a specified file or directory in a # ETag: response header # ETag: '"' status-info '"' # where status-info is a string of 11 numbers separated by a single # space: dev ino mode nlink uid gid size atime mtime ctime blocks # where # dev device number of filesystem # ino inode number # mode file mode (type and permissions) # nlink number of (hard) links to the file # uid numeric user ID of file's owner # gid numeric group ID of file's owner # size total size of file, in bytes # atime last access time since the epoch # mtime last modify time since the epoch # ctime inode change time (NOT creation time!) since the epoch # blocks actual number of blocks allocated # all numbers but 'mode' are decimal numbers; mode is an octal number. # see struct stat and 'stat' entry in 'man perlfunc' # # Note the status-info is a "hard validator" - a unique # representation of a resource, file or directory. Indeed, should # a file gets modified, at least its modification timestamp will # change. That's why status-info is being delivered in a ETag, a field # which is designated by the HTTP standard to carry resource identifiers. # # The meaning of the fields above depends on what kind of resource is # actually being inquired about: # # 1.1.1) A message treated as an unstructured file # HEAD /cgi-bin/admin/HFS822-server.pl/message1 # # The status-info describes the regular UFS file message1. # # # 1.1.2) A message treated as a directory # HEAD /cgi-bin/admin/HFS822-server.pl/message1/ # or # HEAD /cgi-bin/admin/HFS822-server.pl/message1/. # status-info = message1.status-info but # dev = - message1.ino # ino = 1 # mode = ( message1.mode ^ S_IFREG ) | S_IFDIR # nlink = 2 # mtime = MDA-From.timestamp # ctime = MDA-From.timestamp # where S_IFREG is 0x8000, and S_IFDIR is 0x4000 (see sys/stat.h) # # 1.1.3) A parent dir for a message treated as a directory # HEAD /cgi-bin/admin/HFS822-server.pl/message1/.. # same as message.status-info # # 1.1.4) A header of a message (e.g, a "To:" message header), including # a special header "Body" (which represents the body of the message) # HEAD /cgi-bin/admin/HFS822-server.pl/message1/To # status-info = message1.status-info but # dev = - message1.ino # ino = field-number // 2 -based # size = size of the header (or a body) # mtime = MDA-From.timestamp # ctime = MDA-From.timestamp # blocks = ceil(size/512) # # # 1.2) Inquire of a 'raw' status of a resource (file or directory) : # HEAD /cgi-bin/admin/HFS822-server.pl/dir1/dir2/dirn/file # Pragma: httpfs="lstat" # or Pragma omitted # Exactly as 1.1) above # # Requesting a resource: GET method # Most of the actions below heed a 'HTTP_IF_MODIFIED_SINCE' request header. # If the header is present and the requested resource was not modified since # the specified time, this script replies '304 Not Modified' and sends # no content. # # 2.1.1) Delivery of a message treated as an unstructured file # GET /cgi-bin/admin/HFS822-server.pl/message1 # If the message exists and readable by the server, its content is # sent as it is, tagged with an "text/plain" MIME type. # # 2.1.2) Listing a message treated as directory # GET /cgi-bin/admin/HFS822-server.pl/message1/ # The script enumerates messages's headers (and a pseudo-header: "Body") # The message in question must exist and be accessible by this script. # The server returns the listing of the directory, a text/plain entity: # for each directory entry (including . and ..) the server writes a line # name/status-info # see above for status-info # The server also sets the Last-Modified: response header to the # modification timestamp of the directory. # # 2.1.3) Delivery of a separate header (or the "Body") # GET /cgi-bin/admin/HFS822-server.pl/message1/Date # The script sends the contents of the corresponding header, # tagged with a "text/plain" MIME type. # # 2.2) Reading the content of a symbolic link: # GET /cgi-bin/admin/HFS822-server.pl/message1/Date # Pragma: httpfs="readlink" # The script returns "404 Not found or accessible" error code # # 2.3) "Opening" of a file and sending out the existing content # (if necessary) # GET /cgi-bin/admin/HFS822-server.pl/message1/from # Pragma: httpfs="preopen-xxxx" # where four characters xxxx in the preopen-xxxx pragma above have the following # meaning: # 1. char: 'R' if the file is being open for reading, '-' otherwise # 2. char: 'W' if the file is being open for writing, # 'A' if the file is being open for appending, # '-' otherwise # 3. char: 'C' if the file is being open with an O_CREAT flag, # 'X' if the file is being open with both O_CREAT and O_EXCL flag, # '-' otherwise # 4. char: 'T' if the file is being open with an O_TRUNC flag, # '-' otherwise # The only allowed preopen request is 'preopen-R---", which is treated # as if no Pragma was given. All other preopen requests return # "403 Forbidden" error. # # Modifying/creating a resource: PUT method # Deleting a resource: DELETE method # As the HFS822 pseudo-filesystem is read-only, we always reply # "405 Method not allowed" # # This script can also be optimized to take advantage of HTTP/1.1 features: # it may consider conditional range requests (IF_RANGE) and # send out only requested parts of a file (rather than the whole file), # taking advantage of a RANGE: HTTP header. # # # $Id: HFS822-server.pl,v 1.1 1999/05/16 22:07:46 oleg Exp oleg $ use Time::Local; # Tell the client off (with a status in $1 and reason in string $2) # Status may be omitted sub bail_out { my $status = @_ > 1 ? shift : "400 Bad Request"; print "Status: $status\n"; print "Content-type: text/html\n\n"; # Two \n\n terminate the headers print "HFS822 Server Error\n"; print "

HFS822 Server Error

\n"; print "This server encountered an error:

$_[0] \n"; exit 4 } # The following list enumerates field names returned by a stat() command @STAT_KEYS = ("dev", "ino", "mode", "nlink", "uid", "gid", "rdev", "size", "atime", "mtime", "ctime", "blksize", "blocks"); # Given a list as the one returned by a stat or lstat command # return the corresponding hash sub stat_array_to_hash { my %stat = (); foreach (@STAT_KEYS) { $stat{$_} = shift; } return %stat; } @MONTHS = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"); $MONTHS_STR = join "|", @MONTHS; # Global parameters $MDA_FROM = ""; # An MDA-FROM line if has been read # Given a VFS path, split it into a message name (a path # to a file that presumably holds a RFC822 message), # and a field name (name of a header in that RFC822 message) # The field name is always the last component of the path. # Return a list (message_path, field_name); # If the message is to be treated as a directory # (i.e., the VFS path is "/tmp/message1/" then we return # ("/tmp/message1", "/") # The path is assumed absolute (it always starts with a slash) sub split_path { $_ = shift; if (/([^\/]+)\/([^\/]*)\Z/) { if ($2) { my $dir_name = $` . $1; -f $dir_name and return ( $dir_name, $2 ); return ( $_, "" ); } else { # The path ends in a slash return ( $` . $1, "/" ); } } else { # The path is like "/aaa" return ( $_, "" ); } } # Format the the list returned by a stat or lstat command sub format_stat_info { my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = @_; return sprintf "%d %d %o %d %d %d %d %d %d %d %d", $dev,$ino,$mode,$nlink,$uid,$gid,$size, $atime,$mtime,$ctime,$blocks; } # Given a message file $1, open it and read the MDA_From # line. Extract the time stamp and return it (as the number # of epoch seconds) The message file MSG_FILE remains open # (and positioned after the MDA_From line) # The MDA_from like is expected to be in the following format # From me@your.place Tue Mar 30 03:01:19 1999 sub check_message { my $msg_file = shift; open(MSG_FILE,$msg_file) || bail_out "404 Not found or accessible", "$msg_file not found or unreadable: $!"; $MDA_FROM = ; $MDA_FROM =~ /^From \S+ \w+ (\w+)\s+(\d+) (\d\d):(\d\d):(\d\d) (\d\d\d\d)$/ || bail_out "500 Invalid MDA From line in $msg_file", "MDA line '$MDA_FROM' in $msg_file is invalid"; my $mon = index $MONTHS_STR, $1; defined $mon || bail_out "500 Invalid MDA From line", "MDA line '$mda_from_line' in $msg_file is invalid"; my $mtime = timelocal($5,$4,$3,$2,$mon/4,$6); $mtime > 0 || bail_out "500 Invalid MDA From line", "MDA line '$mda_from_line' in $msg_file is invalid"; return $mtime; } # Read the headers from an opened MSG_FILE up to the body # The file is supposed to be opened, and MDA_FROM line # is read. # Leave the MSG_FILE at the beginning of the BODY # The hash of all the headers is returned sub load_headers { $MDA_FROM or die "\nMDA_FROM should have been read already"; my %headers = ("MDA-From" => $MDA_FROM); my $line = ; chomp $line; for(;;) { $line or last; # An empty line ends headers $line =~ /^([a-zA-Z:.-]+):\s+/ || bail_out "500 Invalid header line", "RFC822 header line '$line' is invalid"; my $key = $1; my $value = $'; while ( chomp($line = ) ) { $line =~ /^\s+/ or last; $value .= " $'"; # it was a continuation line } (exists $headers{$key}) ? ($headers{$key} .= "\n$value") : ($headers{$key} = $value); } defined $line || bail_out "500 Unexpected EOF", "Unexpected EOF while reading an RFC822 message"; return %headers; } # Inquiry of a file $1 sub inquiry_file { my ($msg_path, $field_name) = split_path shift; my $sub_command = $ENV{HTTP_PRAGMA}; $sub_command =~ s/httpfs=\"(\w+)\"/$1/; ($sub_command eq "stat") or (not $sub_command) or ($sub_command eq "lstat" ) or bail_out "Invalid pragma httpfs command $sub_command"; my @msg_stat = stat $msg_path; @msg_stat || bail_out "404 Not found or accessible", "$msg_path not found or unreadable: $!"; if( $field_name eq "" ) { # Message as an unstructured file print "ETag: \"", format_stat_info(@msg_stat), "\"\n\n"; exit 0; } my $MDA_time = check_message $msg_path; my %stat_hash = stat_array_to_hash(@msg_stat); $stat_hash{mtime} = $MDA_time; $stat_hash{ctime} = $MDA_time; $stat_hash{dev} = - $stat_hash{ino}; $stat_hash{nlink} = 1; if( $field_name eq "/" ) { # Message as a directory $stat_hash{ino} = 1; ($stat_hash{mode} & 0xF000) == 0x8000 || bail_out "500 Message format error", "$msg_path is not a regular file"; $stat_hash{mode} = ($stat_hash{mode} ^ 0x8000) | 0x4000; $stat_hash{nlink} = 2; print "ETag: \"", format_stat_info(@stat_hash{@STAT_KEYS}), "\"\n\n"; exit 0; } my %headers = load_headers(); #print join "\n", %headers; if( lc($field_name) eq "body" ) { # The body of the message $stat_hash{ino} = (scalar keys %headers) + 2; my $size = $stat_hash{size} - (tell MSG_FILE); $stat_hash{size} = $size; $stat_hash{blocks} = ($size+511)/512; print "ETag: \"", format_stat_info(@stat_hash{@STAT_KEYS}), "\"\n\n"; exit 0; } my $i = 2; foreach (keys %headers) { if( lc($_ ) eq lc($field_name) ) { $stat_hash{ino} = $i; $stat_hash{size} = length($headers{$_}); $stat_hash{blocks} = ($stat_hash{size}+511)/512; print "ETag: \"", format_stat_info(@stat_hash{@STAT_KEYS}), "\"\n\n"; exit 0; } $i++; } bail_out "404 RFC822 field not found", "Field $field_name was not found in the RFC822 message"; } # Print a Last-Modified: header corresponding to the epoch timestamp $1 # e.g., Last-modified: Sun, 06 Nov 1994 08:49:37 GMT sub print_last_modif { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(shift); printf "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\n", ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")[$wday], $mday, ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[$mon], 1900+$year, $hour,$min,$sec; } # Resolve the symbolic link given as $1 sub read_link { bail_out "404 Bad symbolic link", "Symbolic link cannot be resolved in HFS822"; } # Send a regular file whose path is in $1 sub send_regular_file { my $file_path = shift; sysopen(GET_FILE,"$file_path",O_RDONLY) or die "The file must exist: $!"; $| = 1; binmode GET_FILE; my $chunk_size = 1024; my $chunk; my $read_res; binmode STDOUT; while ( $read_res = sysread GET_FILE,$chunk,$chunk_size ) { syswrite STDOUT,$chunk,$read_res || bail_out "STDOUT write error $!"; } close GET_FILE; } # Pre-open a file $1 according to modes in $2, as explained in the # title comments above. # Return to continue GET-ting of the file as usual (that is, checking # of a modification timestamp and sending the file content) sub preopen_file { my $file_path = shift; my $mode_str = shift; $mode_str eq 'R---' and return; bail_out "403 Forbidden", "RFC message $file_path is not writable under HFS822"; } # Get a file or directory $1 sub get_file { my ($msg_path, $field_name) = split_path shift; my $sub_command = $ENV{HTTP_PRAGMA}; $sub_command =~ s/httpfs=\"(\S+)\"/$1/; ($sub_command =~ /preopen-(\S\S\S\S)/) && preopen_file $msg_path,$1; my @msg_stat = stat $msg_path; ( @msg_stat && -r _ ) || bail_out "404 Not found or accessible", "$msg_path not found or unreadable: $!"; # Message as an unstructured file if ($field_name eq "" ) { print "Content-type: application/octet-stream\n"; print "Content-length: ",$msg_stat[7],"\n"; print "ETag: \"",format_stat_info(@msg_stat),"\"\n\n"; send_regular_file($msg_path); exit 0 } my $MDA_time = check_message $msg_path; print_last_modif $MDA_time; my $modif_since = $ENV{HTTP_IF_MODIFIED_SINCE}; # like Tue, 28 Jul 1998 22:31:49 GMT if( $_ = $modif_since ) { my ($day,$ascii_mon,$year,$hh,$min,$sec) = m/\A\w\w\w, (\d\d) (\w\w\w) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT/; $day || bail_out "wrong date format in $modif_since"; my ($mtime_sec,$mtime_min,$mtime_hour,$mtime_mday,$mtime_mon, $mtime_year,$mtime_wday,$mtime_yday,$mtime_isdst) = gmtime($MDA_time); my $mon = index $MONTHS_STR,$ascii_mon; $mon >= 0 || bail_out "wrong month name in $modif_since"; $mon = $mon/4; # $mon is zero based #my $ts1=sprintf("19%2d%02d%02d%02d%02d%02d", # $mtime_year,$mtime_mon,$mtime_mday,$mtime_hour,$mtime_min,$mtime_sec); #my $ts2=($year . sprintf("%02d",$mon) . $day . $hh . $min . $sec); # bail_out "modif >$ts1< >$ts2<"; if( sprintf("%4d%02d%02d%02d%02d%02d", ($mtime_year+1900),$mtime_mon,$mtime_mday,$mtime_hour,$mtime_min,$mtime_sec) le ($year . sprintf("%02d",$mon) . $day . $hh . $min . $sec) ) { print "Status: 304 Not Modified\n\n"; exit 0; } } $sub_command eq "readlink" && read_link($msg_path); my %stat_hash = stat_array_to_hash(@msg_stat); $stat_hash{mtime} = $MDA_time; $stat_hash{ctime} = $MDA_time; $stat_hash{dev} = - $stat_hash{ino}; $stat_hash{nlink} = 1; my %headers = load_headers(); #print join "\n", %headers; if( $field_name eq "/" ) { # Message as a directory $stat_hash{ino} = 1; my $old_mode = $stat_hash{mode}; my $full_size = $stat_hash{size}; ($stat_hash{mode} & 0xF000) == 0x8000 || bail_out "500 Message format error", "$msg_path is not a regular file"; $stat_hash{mode} = ($stat_hash{mode} ^ 0x8000) | 0x4000; $stat_hash{nlink} = 2; print "ETag: \"", format_stat_info(@stat_hash{@STAT_KEYS}), "\"\n"; print "Content-type: text/plain\n\n"; print "./", format_stat_info(@stat_hash{@STAT_KEYS}), "\n"; print "../", format_stat_info(@msg_stat), "\n"; $stat_hash{nlink} = 1; $stat_hash{mode} = $old_mode; my $i = 2; foreach(keys %headers) { $stat_hash{ino} = $i; $stat_hash{size} = length($headers{$_}); $stat_hash{blocks} = ($stat_hash{size}+511)/512; print "$_/", format_stat_info(@stat_hash{@STAT_KEYS}), "\n"; $i++; } $stat_hash{ino} = $i; my $size = $full_size - (tell MSG_FILE); $stat_hash{size} = $size; $stat_hash{blocks} = ($size+511)/512; print "Body/",format_stat_info(@stat_hash{@STAT_KEYS}), "\n"; exit 0; } if( lc($field_name) eq "body" ) { # The body of the message $stat_hash{ino} = (scalar keys %headers) + 2; my $size = $stat_hash{size} - (tell MSG_FILE); $stat_hash{size} = $size; $stat_hash{blocks} = ($size+511)/512; print "ETag: \"", format_stat_info(@stat_hash{@STAT_KEYS}), "\"\n"; print "Content-type: text/plain\n"; print "Content-length: $stat_hash{size}\n\n"; my @lines = ; print @lines; exit 0; } my $i = 2; foreach (keys %headers) { if( lc($_ ) eq lc($field_name) ) { $stat_hash{ino} = $i; my $val = $headers{$_}; $stat_hash{size} = length($val); $stat_hash{blocks} = ($stat_hash{size}+511)/512; print "ETag: \"", format_stat_info(@stat_hash{@STAT_KEYS}), "\"\n"; print "Content-type: text/plain\n"; print "Content-length: $stat_hash{size}\n\n"; print $val; exit 0; } $i++; } bail_out "404 RFC822 field not found", "Field $field_name was not found in the RFC822 message"; exit 0; } # Main module my $req_method = $ENV{REQUEST_METHOD} || bail_out "REQUEST_METHOD is missing: the script isn't called by a HTTP server"; my $file_path = $ENV{PATH_INFO}; # IIS prepends the name of the script to the PATH_INFO # so we remove it if we come across it $file_path =~ s/\A$ENV{SCRIPT_NAME}//; $file_path || bail_out "PATH_INFO is missing: the script isn't called by a HTTP server"; my $Is_IIS = $ENV{SERVER_SOFTWARE} =~ /\AMicrosoft-IIS/; # URL-unquote any special characters $file_path =~ tr/+/ /; $file_path =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $file_path = $file_path =~ /\A\/DeepestRoot/ ? $' : ($Is_IIS ? "C:/InetPub$file_path" : $ENV{PATH_TRANSLATED}); $file_path =~ s/\/\.\.\Z//; # trailing /.. is removed $file_path =~ s/\/\.\Z/\//; # trailing /. is replaced by / $file_path or bail_out "A path to a RFC822 message is empty"; #bail_out "IIS is $Is_IIS"; #bail_out "file $file_path"; #my $env_out=""; while( ($key,$val)=each(%ENV) ) { $env_out .= "$key=$val
" } #bail_out "env $env_out"; $req_method eq "HEAD" && inquiry_file($file_path); $req_method eq "GET" && get_file($file_path); bail_out "405 Method not allowed", "$req_method method is invalid or not allowed";