User:Inductiveload/Sandbox/long-s replacements
Appearance
Some regex replacements that can make a substantial dent in long-s corrections.
This uses the fact that a lot of locations in words will never be "f" (e.g. "fhip"), so we can assume it's an OCR error for a long-s.
This list is empirical, and may very occasionally over-correct (e.g. when a very common word is often mistaken, but the correction could rarely break a correct, but uncommon, word), but the hope it is will still end up with fewer mistakes than before.
var long_s_reps = [
[/([^i])fic\b/, "$1sic"],
[/([Ee])aft/, "$1ast"],
[/([W])eft/, "$1est"], // assume Weft is West, but weft is like fabric
[/(af|un)?focia/, "$1socia"],
[/(A|a)nfwer/, "$1nswer"],
[/(ef)?fent/, "$1sent"], // essential, sent, sentinel
[/(other|like)wife/, "$1wise"],
[/\bfide\b/, "side"],
[/\bfo\b/, "so"],
[/\breft/, "rest"],
[/abfo/, "abso"],
[/ccef[fs]/, "ccess"],
[/affer/, "asser"],
[/affi([fs])/, "assis"], // assis
[/aff(um|ur)/, "ass$1"], // assume, assure
[/Afia/, "Asia"],
[/aftic/, "astic"],
[/afty/, "asty"],
[/alfo/, "also"],
[/apfe/, "apse"],
[/aufp/, "ausp"],
[/baffy/, "bassy"],
[/([Bb])afe/, "$1ase"],
[/([Bb])eft/, "$1est"],
[/([Cc])afua/, "$1asua"],
[/([Cc])auf/, "$1aus"],
[/([Cc])eaf(?!a)/, "$1eas"],
[/ceff/, "cess"], // necessary
[/cefs\b/, "cess"], // princess
[/Chrif/, "Chris"],
[/cife/, "cise"],
[/claf[fs]/, "class"],
[/clofe/, "close"],
[/conf(i|t|eq)/, "cons$1"], // const, conseq...
[/courfe/, "course"],
[/([Cc])roff\B/, "$1ross"], // cross-
[/([Cc])rofs\b/, "$1ross"], // cross
[/defcr/, "descr"],
[/efer([vt])/, "eser$1"], // deserve-, desert-
[/([dD])if([ocprgqst]|ad)/, "$1is$2"], // dis-
[/\b([dD])if([^f]\w)/, "$1is$2"],
[/diffol/, "dissol"],
[/defir/, "desir"],
[/efour/, "esour"],
[/efpe/, "espe"], // especial
[/([Bb])eft(\b|ed|ing)/, "$1est$1"],
[/([^kgrdw])eft\b/, "$1est"], // -est
[/([Ee])fta/, "$1sta"], // establish
[/([Ee])fti/, "$1sti"], // estimate
[/enfes/, "enses"],
[/ennf/, "enns"], // Pennsylv etc
[/erfal/, "ersal"],
[/fa(cr|fe|ga|id|le|lut|tis|w\b)/, "sa$1"],
[/fatif(?!e)/, "satis"],
[/fca([^s])/, "sca$1"], // scarce, scant, etc (not briefcase)
[/fchem/, "schem"],
[/fc(ie|ious|ure|en)/, "sc$1"], // science, conscious, secure
[/fenf/, "sens"],
[/fe(a\b|af|cl|co)/, "se$1"], // season, seclude, second
[/fee(m|n|ing)/, "see$1"], // seen, seem
[/fe(ek|gr)/, "se$1"],
[/felec/, "selec"],
[/fel(f|v)/, "sel$1"],
[/fenfe/, "sense"],
[/feri([eo])/, "seri$1"],
[/fervi/, "servi"],
[/fettle(m)/, "settle$1"],
[/fevera/, "severa"],
[/fing(le|u)/, "sing$1"], // single, singular
[/fis\b/, "sis"], // -sis
[/ffidu/, "ssidu"], // Assiduous
[/fh(al|ut|ip|o)/, "sh$1"],
[/inifter/, "inister"],
[/fidera/, "sidera"], // considerable/ation/ate
[/fift(?!h)/, "sist"], // subsist, consist
[/fign/, "sign"],
[/fimi/, "simi"],
[/fion/, "sion"],
[/firft/, "first"],
[/fite\b/, "site"],
[/fitive/, "sitive"],
[/fitu/, "situ"],
[/flowl/, "slowl"],
[/flowne/, "slowne"],
[/fm(an|en|all|oth|ooth)/, "sm$1"], // small, helmsmen, smooth
[/focie/, "socie"],
[/fole/, "sole"],
[/foli/, "soli"],
[/fome/, "some"],
[/foon/, "soon"],
[/foph/, "soph"], // -sopher/y
[/fourc/, "sourc"],
[/fouth/, "South"],
[/fov/, "sov"],
[/fpade/, "spade"],
[/fpawn/, "spawn"],
[/fpeak/, "speak"],
[/fpec/, "spec"],
[/fpee/, "spee"],
[/fpir/, "spir"], //spirir, spiral,
[/ft(air|an|at|eem|ep|ill|on|oo|r|ud|y)/, "st$1"],
[/\bft(u)/, "st$1"],
[/fubf/, "subs"], // do before fub
[/fub/, "sub"],
[/fucc/, "succ"],
[/fuch/, "such"],
[/fuf(p)/, "sus$1"],
[/fuff/, "suff"],
[/fund(?!rais)/, "sund"],
[/fumm/, "summ"], // summit, summary
[/fuit/, "suit"],
[/fuper/, "super"],
[/fupp/, "supp"],
[/fure/, "sure"],
[/furv/, "surv"],
[/fway/, "sway"],
[/fyf/, "sys"],
[/fym/, "sym"],
[/grefs/, "gress"],
[/hift/, "hist"],
[/ifh/, "ish"],
[/ifm\b/, "ism"],
[/ifon/, "ison"],
[/iftic/, "istic"],
[/illuf/, "illus"],
[/(I|i)nft/, "$1nst"],
[/Jefus/, "Jesus"],
[/([Jj])uft/, "$1ust"],
[/([Ll])aft/, "$1ast"], // last, lastly, etc
[/lefia/, "lesia"],
[/([^ie])efs/, "$1ess"], // -ess
[/leff/, "less"], // -ess-
[/lifh/, "lish"],
[/([MmPp])afs\b/, "$1ass"],
[/([Mm])i(f\B|fs\b)/, "$1i$2"],
[/Miffifippi/, "Missisippi"],
[/Miffiffippi/, "Mississippi"],
[/([Mm])oft/, "most"],
[/([Mm])uft/, "must"],
[/nefe/, "nese"],
[/nefs/, "ness"],
[/nfate/, "nsate"],
[/nfive/, "nsive"],
[/oaft/, "oast"], // coast, etc
[/obf/, "obs"],
[/obfe/, "obse"], // observ
[/ofed/, "osed"],
[/offefs/, "ossess"],
[/offi/, "poss"],
[/ofition/, "osition"], // position, etc
[/ofity/, "osity"],
[/ouf\b/, "ous"],
[/oufly/, "ously"],
[/([Pp])aft/, "$1ast"],
[/erfon/, "erson"],
[/erfua/, "ersua"],
[/erfue/, "ersue"],
[/erfui/, "ersui"],
[/eruf/, "erus"],
[/hraf/, "hras"], // phrase
[/paff/, "pass"], // pass/age, for pafs, see mafs
[/([Pp])leaf/, "$1leas"],
[/([Pp])of(e|t)/, "$1os$2"], // post, pose, compose...
[/ref([fs])/, "res$1"],
[/refen/, "resen"],
[/\b([Aa]r|[Rr])ifi/, "$1isi"], // a/rising
[/rofef([sf])/, "rofess"],
[/rofp/, "rosp"],
[/urpof/, "urpos"],
[/queft/, "quest"],
[/reafo/, "reaso"],
[/refea/, "resea"],
[/refi/, "resi"],
[/([Tt])afte/, "$1aste"],
[/terfect/, "tersect"], // intersect, but not perfect, etc
[/hefe/, "hese"], // these
[/hofe/, "hose"], // those, whose
[/traft/, "trast"],
[/ranf/, "rans"], // trans-
[/ufe/, "use"],
[/vaft/, "vast"],
[/([Vv])eff/, "vess"],
[/verf([eyo])/, "vers$1"], //verse, verso -versy
[/([Vv])ifi/, "$1isi"],
[/ifdom/, "isdom"],
[/xift/, "xist"],
];