• Re: Fast CSV reading in Common Lisp

    From B. Pym@Nobody447095@here-nor-there.org to comp.lang.lisp on Fri Aug 22 08:47:21 2025
    From Newsgroup: comp.lang.lisp

    taruss wrote:

    First, you could get each line using
    READ-LINE, then use a loop through the
    positions of the delimiter character #\; to
    find the field positions. Now,
    unfortunately, Common Lisp doesn't have a
    PARSE-FLOAT function, so you would have to
    import a library do do that.
    See PARSE-NUMBER from
    http://cliki.net/parse-number or the older
    PARSE-FLOAT from http://www.cs.cmu.edu/Groups/AI/util/lang/lisp/code/math/atof/0.html
    (The latter
    doesn't have the nice floating format
    support, though).

    So, you could do something along the lines of (untested):

    (with-open-file (stream "my-file.csv" :direction :input)
    (loop :for line :in (read-line stream nil nil)
    :while line
    :nconc (loop :for column-number :upfrom 0
    :as start = 0 then end
    :as end = (position #\; line :start (1+ start))
    :collect (if (zerop column-number)
    (subseq line (1+ start) (1- end))
    (parse-number line (1+ start) end))
    :when (null end) :do (loop-finish))))

    Gauche Scheme

    (define csv-data
    "1999-01-04;1391.12;3034.53;66.515625;86.2;441.39 1999-01-05;1404.86;3072.41;66.3125;86.17;440.63 1999-01-06;1435.12;3156.59;66.4375;86.32;441.7 1999-01-07;1432.32;3106.08;66.25;86.22;447.67")

    (define (parse-csv str)
    (let ((fields (string-split str #\;)))
    (cons (car fields)
    (map string->number (cdr fields)))))

    (with-input-from-string csv-data (lambda ()
    (Do ((line (read-line) <>)
    (res '() (cons (parse-csv line) res)))
    ((eof-object? line) @ res))))

    (("1999-01-04" 1391.12 3034.53 66.515625 86.2 441.39)
    ("1999-01-05" 1404.86 3072.41 66.3125 86.17 440.63)
    ("1999-01-06" 1435.12 3156.59 66.4375 86.32 441.7)
    ("1999-01-07" 1432.32 3106.08 66.25 86.22 447.67))

    Given:

    (define-syntax Do-aux
    (syntax-rules (<> @ values)
    [(_ ((a b <>) d ...) (seen ...) z ...)
    (Do-aux (d ...) (seen ... (a b b)) z ...) ]
    [(_ ((a b c) d ...) (seen ...) z ...)
    (Do-aux (d ...) (seen ... (a b c)) z ...) ]
    [(_ ((a b) d ...) (seen ...) z ...)
    (Do-aux (d ...) (seen ... (a b)) z ...) ]
    [(_ ((a) d ...) (seen ...) z ...)
    (Do-aux (d ...) (seen ... (a '())) z ...) ]
    [(_ (a d ...) (seen ...) z ...)
    (Do-aux (d ...) (seen ... (a '())) z ...) ]
    [(_ () seen (a b ... @ (values x ...)) z ...)
    (Do-aux () seen (a b ... (values (reverse~ x) ...)) z ...) ]
    [(_ () seen (a b ... @ xs) z ...)
    (Do-aux () seen (a b ... (reverse xs)) z ...) ]
    [(_ () seen till body ...)
    (do seen till body ...) ]))
    (define-syntax Do
    (syntax-rules ()
    [(_ specs till body ...)
    (Do-aux specs () till body ...) ]))
    --
    [T]he problem is that lispniks are as cultish as any other devout group and basically fall down frothing at the mouth if they see [heterodoxy].
    --- Kenny Tilton
    The good news is, it's not Lisp that sucks, but Common Lisp. --- Paul Graham --- Synchronet 3.21a-Linux NewsLink 1.2
  • From B. Pym@Nobody447095@here-nor-there.org to comp.lang.lisp,comp.lang.scheme on Wed Jul 2 18:20:57 2025
    From Newsgroup: comp.lang.lisp

    taruss wrote:

    First, you could get each line using
    READ-LINE, then use a loop through the
    positions of the delimiter character #\; to
    find the field positions. Now,
    unfortunately, Common Lisp doesn't have a
    PARSE-FLOAT function, so you would have to
    import a library do do that.
    See PARSE-NUMBER from
    http://cliki.net/parse-number or the older
    PARSE-FLOAT from http://www.cs.cmu.edu/Groups/AI/util/lang/lisp/code/math/atof/0.html
    (The latter
    doesn't have the nice floating format
    support, though).

    So, you could do something along the lines of (untested):

    (with-open-file (stream "my-file.csv" :direction :input)
    (loop :for line :in (read-line stream nil nil)
    :while line
    :nconc (loop :for column-number :upfrom 0
    :as start = 0 then end
    :as end = (position #\; line :start (1+ start))
    :collect (if (zerop column-number)
    (subseq line (1+ start) (1- end))
    (parse-number line (1+ start) end))
    :when (null end) :do (loop-finish))))

    Gauche Scheme

    (define csv
    "1999-01-04;1391.12;3034.53;66.515625;86.2;441.39 1999-01-05;1404.86;3072.41;66.3125;86.17;440.63 1999-01-06;1435.12;3156.59;66.4375;86.32;441.7 1999-01-07;1432.32;3106.08;66.25;86.22;447.67")

    (define (parse-line str)
    (let ((fields (string-split str #\;)))
    (cons (car fields)
    (map string->number (cdr fields)))))

    (with-input-from-string csv
    (lambda ()
    (do ((line "")
    (res '() (cons (parse-line line) res)))
    ((begin (set! line (read-line))
    (eof-object? line))
    (reverse res)))))

    (("1999-01-04" 1391.12 3034.53 66.515625 86.2 441.39)
    ("1999-01-05" 1404.86 3072.41 66.3125 86.17 440.63)
    ("1999-01-06" 1435.12 3156.59 66.4375 86.32 441.7)
    ("1999-01-07" 1432.32 3106.08 66.25 86.22 447.67))
    --- Synchronet 3.21d-Linux NewsLink 1.2
  • From B. Pym@Nobody447095@here-nor-there.org to comp.lang.lisp,comp.lang.scheme on Wed Jul 2 19:05:33 2025
    From Newsgroup: comp.lang.lisp

    B. Pym wrote:

    taruss wrote:

    First, you could get each line using
    READ-LINE, then use a loop through the
    positions of the delimiter character #\; to
    find the field positions. Now,
    unfortunately, Common Lisp doesn't have a
    PARSE-FLOAT function, so you would have to
    import a library do do that.
    See PARSE-NUMBER from
    http://cliki.net/parse-number or the older
    PARSE-FLOAT from http://www.cs.cmu.edu/Groups/AI/util/lang/lisp/code/math/atof/0.html
    (The latter
    doesn't have the nice floating format
    support, though).

    So, you could do something along the lines of (untested):

    (with-open-file (stream "my-file.csv" :direction :input)
    (loop :for line :in (read-line stream nil nil)
    :while line
    :nconc (loop :for column-number :upfrom 0
    :as start = 0 then end
    :as end = (position #\; line :start (1+ start))
    :collect (if (zerop column-number)
    (subseq line (1+ start) (1- end))
    (parse-number line (1+ start) end))
    :when (null end) :do (loop-finish))))

    Gauche Scheme

    (define csv
    "1999-01-04;1391.12;3034.53;66.515625;86.2;441.39 1999-01-05;1404.86;3072.41;66.3125;86.17;440.63 1999-01-06;1435.12;3156.59;66.4375;86.32;441.7 1999-01-07;1432.32;3106.08;66.25;86.22;447.67")

    (define (parse-line str)
    (let ((fields (string-split str #\;)))
    (cons (car fields)
    (map string->number (cdr fields)))))

    (with-input-from-string csv
    (lambda ()
    (do ((line "")
    (res '() (cons (parse-line line) res)))
    ((begin (set! line (read-line))
    (eof-object? line))
    (reverse res)))))

    (("1999-01-04" 1391.12 3034.53 66.515625 86.2 441.39)
    ("1999-01-05" 1404.86 3072.41 66.3125 86.17 440.63)
    ("1999-01-06" 1435.12 3156.59 66.4375 86.32 441.7)
    ("1999-01-07" 1432.32 3106.08 66.25 86.22 447.67))

    Shorter.

    ;; Using my "do.".
    (with-input-from-string csv
    (lambda ()
    (do. ((line (read-line) <>) ;; <> means repeat the preceding expr.
    (res '() (cons (parse-line line) res)))
    ((eof-object? line) (reverse res)))))

    Given:

    (define-syntax do.-aux
    (syntax-rules ( <> )
    [ (do.-aux ((v init <>) more ...) seen stuff ...)
    (do.-aux ((v init init) more ...) seen stuff ...) ]
    [ (do.-aux (what more ...) (seen ...) stuff ...)
    (do.-aux (more ...) (seen ... what) stuff ...) ]
    [ (do.-aux () seen stuff ...)
    (do seen stuff ...) ] ))
    (define-syntax do.
    (syntax-rules ()
    [ (do. things more ...)
    (do.-aux things () more ...) ] ))
    --- Synchronet 3.21d-Linux NewsLink 1.2
  • From B. Pym@Nobody447095@here-nor-there.org to comp.lang.lisp,comp.lang.scheme on Wed Jul 2 20:42:17 2025
    From Newsgroup: comp.lang.lisp

    B. Pym wrote:

    B. Pym wrote:

    taruss wrote:

    First, you could get each line using
    READ-LINE, then use a loop through the
    positions of the delimiter character #\; to
    find the field positions. Now,
    unfortunately, Common Lisp doesn't have a
    PARSE-FLOAT function, so you would have to
    import a library do do that.
    See PARSE-NUMBER from
    http://cliki.net/parse-number or the older
    PARSE-FLOAT from http://www.cs.cmu.edu/Groups/AI/util/lang/lisp/code/math/atof/0.html
    (The latter
    doesn't have the nice floating format
    support, though).

    So, you could do something along the lines of (untested):

    (with-open-file (stream "my-file.csv" :direction :input)
    (loop :for line :in (read-line stream nil nil)
    :while line
    :nconc (loop :for column-number :upfrom 0
    :as start = 0 then end
    :as end = (position #\; line :start (1+ start))
    :collect (if (zerop column-number)
    (subseq line (1+ start) (1- end))
    (parse-number line (1+ start) end))
    :when (null end) :do (loop-finish))))

    Gauche Scheme

    (define csv
    "1999-01-04;1391.12;3034.53;66.515625;86.2;441.39 1999-01-05;1404.86;3072.41;66.3125;86.17;440.63 1999-01-06;1435.12;3156.59;66.4375;86.32;441.7 1999-01-07;1432.32;3106.08;66.25;86.22;447.67")

    (define (parse-line str)
    (let ((fields (string-split str #\;)))
    (cons (car fields)
    (map string->number (cdr fields)))))

    (with-input-from-string csv
    (lambda ()
    (do ((line "")
    (res '() (cons (parse-line line) res)))
    ((begin (set! line (read-line))
    (eof-object? line))
    (reverse res)))))

    (("1999-01-04" 1391.12 3034.53 66.515625 86.2 441.39)
    ("1999-01-05" 1404.86 3072.41 66.3125 86.17 440.63)
    ("1999-01-06" 1435.12 3156.59 66.4375 86.32 441.7)
    ("1999-01-07" 1432.32 3106.08 66.25 86.22 447.67))

    Shorter.

    ;; Using my "do.".
    (with-input-from-string csv
    (lambda ()
    (do. ((line (read-line) <>) ;; <> means repeat the preceding expr.
    (res '() (cons (parse-line line) res)))
    ((eof-object? line) (reverse res)))))

    Given:

    (define-syntax do.-aux
    (syntax-rules ( <> )
    [ (do.-aux ((v init <>) more ...) seen stuff ...)
    (do.-aux ((v init init) more ...) seen stuff ...) ]
    [ (do.-aux (what more ...) (seen ...) stuff ...)
    (do.-aux (more ...) (seen ... what) stuff ...) ]
    [ (do.-aux () seen stuff ...)
    (do seen stuff ...) ] ))
    (define-syntax do.
    (syntax-rules ()
    [ (do. things more ...)
    (do.-aux things () more ...) ] ))

    (with-input-from-string csv
    (lambda ()
    (do/ ((line (read-line))
    (res '() (cons (parse-line line) res)))
    ((eof-object? line) @ res))))

    Given:

    Consider this:

    (do ((x (read))

    Why should the "(read)" be executed only once?
    Why should "do" in this case perform as "let"?
    We already have "let"; we don't need "do" to perform
    the job of "let".

    ;; If an update expression isn't given, the initialization
    ;; expression is used instead.
    (define-syntax do/-aux
    (syntax-rules ( @ )
    [ (do/-aux ((v init) more ...) seen stuff ...)
    (do/-aux ((v init init) more ...) seen stuff ...) ]
    [ (do/-aux specs seen (bool @ xs) stuff ...)
    (do/-aux specs seen (bool (reverse xs)) stuff ...) ]
    [ (do/-aux (what more ...) (seen ...) stuff ...)
    (do/-aux (more ...) (seen ... what) stuff ...) ]
    [ (do/-aux () seen stuff ...)
    (do seen stuff ...) ] ))
    (define-syntax do/
    (syntax-rules ()
    [ (do/ things more ...)
    (do/-aux things () more ...) ] ))


    --- Synchronet 3.21d-Linux NewsLink 1.2