• Re: Weird problem

    From B. Pym@21:1/5 to Pierre Mai on Tue Aug 6 03:46:09 2024
    Pierre Mai wrote:

    Here is a simple solution, which assumes that only one way exists to
    split a word, and that a simple heuristic suffices to disambiguate
    between possible matches at a given point (implemented are first-match
    and longest-first-match, via the operations SIMPLE-FIND-PART and GREEDY-FIND-PART).

    If this isn't sufficient, you can either try to change the code below
    to support some form of backtracking/non-determinism, or you can check
    out Screamer, which is an extension to Common Lisp for non-determinstic programming, which makes this task much easier IMHO. Screamer is by
    Jeffrey Mark Siskind (http://www.neci.nj.nec.com/homepages/qobi).

    I'd recommend using Screamer, since I'd imagine you will want to
    process your word fragments further, and most things NLP will imply
    some non-determinism.

    Regs, Pierre.

    ;;; Utility function

    (defun starts-with (string start-string &key (start 0))
    (let ((start-length (+ start (length start-string))))
    (and (>= (length string) start-length)
    (string-equal string start-string :start1 start
    :end1 start-length))))

    ;;; The different part-finders, which implement first-match and
    ;;; longest-first-match heuristics respectively.

    (defun simple-find-part (string part-list &key (start 0))
    (dolist (part part-list)
    (when (starts-with string part :start start)
    (return part))))

    (defun greedy-find-part (string part-list &key (start 0))
    (loop with result = nil
    with length = 0
    for part in part-list
    do
    (when (and (starts-with string part :start start)
    (> (length part) length))
    (setq result part
    length (length part)))
    finally
    (return result)))

    ;;; The main function.

    (defun break-apart (word part-finder &rest part-lists)
    (loop with word-length = (length word)
    for index = 0 then (+ index (length part))
    for part-list in part-lists
    for part = (funcall part-finder word part-list :start index)
    never (or (not part) (>= index word-length))
    collect part into result
    finally
    (return (and (= index word-length)
    result))))

    ;;; Examples

    #|
    * (break-apart "astronaut" #'simple-find-part
    '("as" "co" "ast") '("tro" "ro" "mp")
    '("na" "ut") '("ut" "er"))
    ("as" "tro" "na" "ut")
    * (break-apart "astronaut" #'greedy-find-part
    '("as" "co" "ast") '("tro" "ro" "mp")
    '("na" "ut") '("ut" "er"))
    ("ast" "ro" "na" "ut")
    * (break-apart "astronaut" #'simple-find-part
    '("as" "co" "ast") '("tro" "mp")
    '("na" "ut") '("ut" "er"))
    ("as" "tro" "na" "ut")
    * (break-apart "astronaut" #'greedy-find-part
    '("as" "co" "ast") '("tro" "mp")
    '("na" "ut") '("ut" "er"))
    NIL

    newLISP

    (define (cartesian-product lists)
    (if (null? lists)
    '(())
    (let (subproduct (cartesian-product (rest lists)))
    (apply append
    (map
    (fn (x) (map (fn (xs) (cons x xs)) subproduct))
    (first lists))))))

    (define (good? xs) (= (apply string xs) "magnetohydrodynamics"))

    (filter good?
    (cartesian-product
    '(("mag" "ma" "ho" "magn" "in")
    ("eto" "net" "et")
    ("ohy" "o" "od" "oh")
    ("hy" "hyd" "ma" "hi")
    ("od" "drod" "rod")
    ("y" "yj" "yn" "yna")
    ("m" "am" "nam" "nami")
    ("ic" "is" "i")
    ("s" "cs"))))

    (("mag" "net" "o" "hy" "drod" "y" "nam" "ic" "s")
    ("mag" "net" "o" "hy" "drod" "y" "nam" "i" "cs")
    ("mag" "net" "o" "hy" "drod" "yn" "am" "ic" "s")
    ("mag" "net" "o" "hy" "drod" "yn" "am" "i" "cs")
    ("mag" "net" "o" "hy" "drod" "yna" "m" "ic" "s")
    ("mag" "net" "o" "hy" "drod" "yna" "m" "i" "cs")
    ("mag" "net" "o" "hyd" "rod" "y" "nam" "ic" "s")
    ("mag" "net" "o" "hyd" "rod" "y" "nam" "i" "cs")
    ("mag" "net" "o" "hyd" "rod" "yn" "am" "ic" "s")
    ("mag" "net" "o" "hyd" "rod" "yn" "am" "i" "cs")
    ("mag" "net" "o" "hyd" "rod" "yna" "m" "ic" "s")
    ("mag" "net" "o" "hyd" "rod" "yna" "m" "i" "cs")
    ("magn" "et" "o" "hy" "drod" "y" "nam" "ic" "s")
    ("magn" "et" "o" "hy" "drod" "y" "nam" "i" "cs")
    ("magn" "et" "o" "hy" "drod" "yn" "am" "ic" "s")
    ("magn" "et" "o" "hy" "drod" "yn" "am" "i" "cs")
    ("magn" "et" "o" "hy" "drod" "yna" "m" "ic" "s")
    ("magn" "et" "o" "hy" "drod" "yna" "m" "i" "cs")
    ("magn" "et" "o" "hyd" "rod" "y" "nam" "ic" "s")
    ("magn" "et" "o" "hyd" "rod" "y" "nam" "i" "cs")
    ("magn" "et" "o" "hyd" "rod" "yn" "am" "ic" "s")
    ("magn" "et" "o" "hyd" "rod" "yn" "am" "i" "cs")
    ("magn" "et" "o" "hyd" "rod" "yna" "m" "ic" "s")
    ("magn" "et" "o" "hyd" "rod" "yna" "m" "i" "cs"))

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)