Detecting English Rhyme Words In AWK
From
Mike Sanders@21:1/5 to
All on Wed Aug 21 01:12:07 2024
1st attempt...
# An attempt at detecting english rhyme words in AWK Michael Sanders - 2024
#
# example invocation: awk -f rhyme.awk -v rhyme=skipper < words.txt
# adjust levenshtein distance to widen/constrain results
{ if (isrhyme($1, rhyme) && levenshtein($1, rhyme) < 2) print $1 }
# -----------------------------------------------------------------
function isrhyme(w1, w2, e1, e2) {
# lowercase words for consistency
w1 = tolower(w1)
w2 = tolower(w2)
# if the general rule applies (matching vowel sounds), return 1
e1 = lvcs(w1)
e2 = lvcs(w2)
if (e1 == e2) return 1
# specific exceptions for common english rhyming patterns
if (ismatch(w1, w2, /(ight|ite)$/)) return 1 # tight/lite
if (ismatch(w1, w2, /(air|ear)$/)) return 1 # hair/bear
if (ismatch(w1, w2, /(ain|ane)$/)) return 1 # plain/lane
if (ismatch(w1, w2, /(ought|aught)$/)) return 1 # bought/caught
if (ismatch(w1, w2, /(ell|ale)$/)) return 1 # sell/pale
if (ismatch(w1, w2, /(ound|own)$/)) return 1 # round/crown
if (ismatch(w1, w2, /(en|in)$/)) return 1 # pen/tin
if (ismatch(w1, w2, /(ow|ou)$/)) return 1 # cow/plough
if (ismatch(w1, w2, /(ine|ign)$/)) return 1 # wine/sign
# common word endings that may not exactly match yet rhyme phonetically
if (ismatch(w1, w2, /(tion|sion)$/)) return 1 # nation/mission
if (ismatch(w1, w2, /(tion|cian)$/)) return 1 # station/magician
if (ismatch(w1, w2, /(able|ible)$/)) return 1 # capable/responsible
# special cases for shorter vowel/consonant combinations
if (ismatch(w1, w2, /(op|up)$/)) return 1 # hop/cup
if (ismatch(w1, w2, /(ap|ep)$/)) return 1 # clap/step
if (ismatch(w1, w2, /(ad|ed)$/)) return 1 # bad/red
if (ismatch(w1, w2, /(ox|ock)$/)) return 1 # box/rock
# catch-all...
if (ismatch(w1, w2, /(ot|ote|oat)$/)) return 1 # note/boat
if (ismatch(w1, w2, /(upe|oop)$/)) return 1 # dupe/poop
return 0
}
# -----------------------------------------------------------------
function levenshtein(w1, w2, l1, l2, i, j, cst, diz) {
l1 = length(w1)
l2 = length(w2)
# initialize distance array
for (i = 0; i <= l1; i++) diz[i, 0] = i
for (j = 0; j <= l2; j++) diz[0, j] = j
# compute distance
for (i = 1; i <= l1; i++) {
for (j = 1; j <= l2; j++) {
cst = (substr(w1, i, 1) == substr(w2, j, 1)) ? 0 : 1
diz[i, j] = min3(diz[i-1, j] + 1, # deletion
diz[i, j-1] + 1, # insertion
diz[i-1, j-1] + cst) # substitution
}
}
return diz[l1, l2]
}
# -----------------------------------------------------------------
# levenshtein helper: returns minimum of 3 numbers
function min3(a, b, c) { return (a < b ? (a < c ? a : c) : (b < c ? b : c)) }
# -----------------------------------------------------------------
# rhyme helper: returns last vowel-consonant sequence k'ite', educat'ion' function lvcs(w) {
# start from end of word looking for last vowel
for (i = length(w); i > 0; i--) {
if (substr(w, i, 1) ~ /[aeiouy]/) return substr(w, i) # last vowel to end
}
return "" # return empty string if no vowel is found
}
# -----------------------------------------------------------------
# rhyme helper: true if words match given pattern '(ight|ite)$'
function ismatch(w1, w2, p) { return match(w1, p) && match(w2, p) }
# eof
--
:wq
Mike Sanders
--- SoupGate-Win32 v1.05
* Origin: fsxNet Usenet Gateway (21:1/5)