; Reading from a comma-separated stream ; ; function: read-csv NAN-INTERP INPUT-PORT -> LIST-OF-VALS ; ; Here NAN-INTERP is either a procedure STRING-OR-SYMBOL -> VAL or ; an associative list with string or symbolic keys. ; INPUT-PORT should contain the sequence of tokens separated with commas ; and arbitrary amount of white space. The tokens are of the following three ; kinds: ; -- number (as accepted by string->number) ; -- string (double-quoted, may contain backslash escapes) ; we use the 'read' function to read strings; so the string ; may contain any backslash escapes as permitted by R5RS and the ; particular Scheme implementation. ; -- anything else, converted to a symbol. A special type of 'anything' ; else is a `missing token', which `occurs' between two consecutive ; commas (perhaps separated only by the white space), or the ; trailing comma. ; The missing token is a symbol whose string representation is the ; empty string. ; ; The function returns the list of read values: a numeric token is ; converted to a number; strings and anything else are passed to the ; NAN-INTERP function and its result is used in the list of return VALs. ; If NAN-INTERP is an associative list, we do the lookup in the list then. ; ; This file uses the input parsing library: the following files ; from this directory should be present or loaded first: ; (include "myenv.scm") ; or myenv-chez or myenv-scm, etc ; (include "srfi-13-local.scm") ; or import from SRFI-13 if available ; (include "char-encoding.scm") ; (include "util.scm") ; (include "input-parse.scm") ; $Id: read-csv.scm,v 1.1 2006/03/31 02:24:28 oleg Exp oleg $ ; The following two definitions satisfy the import requirement ; of the parsing library (define (parser-error port message . specialising-msgs) (apply cerr (cons message specialising-msgs)) (cerr nl) (exit 4)) (define (ssax:warn port message . specialising-msgs) (apply cerr (cons message specialising-msgs)) (cerr nl)) (define whitespace (list #\space char-return char-tab char-newline)) (define (read-csv nan-interp port) (if (eof-object? (skip-while whitespace port)) '() ; empty port (let loop ((accum '())) (let*-values (((token) (next-token whitespace '(#\space #\, #\" *eof*) "reading token" port)) ((term) (skip-while whitespace port)) ; must be comma, quote or EOF ((term val) (cond ((eqv? term #\") ; expecting string (if (not (equal? token "")) (parser-error port "junk before the string token. Missing comma?" token)) (let* ((str (read port)) (term (skip-while whitespace port))) ; must be , or EOF (if (not (eof-object? term)) (assert-curr-char '(#\,) "comma after string token" port)) (values term (cond ((procedure? nan-interp) (nan-interp str)) ((assoc str nan-interp) => cdr) (else (parser-error port "unexpected string token str" str)))))) (else (values (read-char port) ; read and skip the terminator (cond ((string->number token)) ((procedure? nan-interp) (nan-interp (string->symbol token))) ((assq (string->symbol token) nan-interp) => cdr) (else (parser-error port "unexpected other token" token))))))) ((accum) (cons val accum))) (if (eof-object? term) (reverse accum) (loop accum)))))) (display "test1") (newline) (display (call-with-input-string " 1.223, nil, 22.3334, 2.23 , nil" (lambda (port) (read-csv '((nan . -999) (nil . -1.0)) port)))) (newline) ; (1.223 -1.0 22.3334 2.23 -1.0) (display "test2") (newline) (write (call-with-input-string " 1.223, \"str\" , \"complex \\\\ string, indeed\\\"\" , 2.23 , nil" (lambda (port) (read-csv (lambda (x) x) port)))) (newline) ; (1.223 "str" "complex \\ string, indeed\"" 2.23 nil) ; Please note three `skipped' numbers, the last one at the very end! (display "test3") (newline) (display (call-with-input-string "-1,+2,-15e-15,, nan, 22.3334, 2.23 , , \"nil\"," (lambda (port) (read-csv `((,(string->symbol "") . 0.0) ; missing number (two commas in a row) (nan . -999) ("nil" . -1.0)) port)))) ; zeros stand for skipped numbers ; (-1 2 -1.5e-14 0.0 -999 22.3334 2.23 0.0 -1.0 0.0)