User:TarmstroBot/user-fixes.py
Jump to navigation
Jump to search
# -*- coding: utf-8 -*- #FDSYS FIXES. Used for volumes 65-116 of the U.S. Statutes at Large #to correct common mistakes introduced by the GPO's OCR. fixes['fdsys'] = { 'regex': True, 'msg': { '_default':u'Robot:fixing common errors in FDsys OCR', }, 'replacements': [ # June 2012: further OCR corrections based on results of April 2012 run. # Estimated number of changes to dataset resulting from this run: 110,500 # ## undoing overzealous edits from earlier run (u"\\bacompany(i?ni?g|ii\\^)", u"accompanying"), (u"internationaly", u"internationally"), (u"\\b([Rr])enlist", u"\\1eenlist"), (u"([A-Z]) ([A-Z]) TO F(?= [A-HJ-TV-Z])", u"\\1\\2T OF"), ## character substitutions (scannos) ### “£m” -> “an”, sometimes (u"£md", u"and"), ### “£ui” -> “an” (u"£ui", u"an"), ### “3d|5d” -> “yi”, sometimes (u"[35]dng", u"ying"), (u"[35]d(?=e)", u"yi"), ### “a” -> “e”, sometimes (u"\\btha\\b", u"the"), (u"\\b([Oo])thar", u"\\1ther"), (u"anothar", u"another"), (u"([Hh])aad", u"\\1ead"), ### “a” -> “m”, sometimes (u"\\bl?aore", u"more"), ### “a” -> “s”, sometimes (u"\\bahall", u"shall"), ### “B|E|K” -> “R”, sometimes (u"T?[BEK](?=evis)", u"R"), (u"[BEK](?=evenu)", u"R"), (u"[BEK](?=ep[oru])", u"R"), ### “d” -> “i”, sometimes (u"[vw]dth", u"with"), ### “e” -> “c”, sometimes (u"\\be(?=ont)", u"c"), (u"\\beons\\B", u"cons"), ### “ei” -> “a”, sometimes (u"\\bei(?=(fte|g[aegr]|mend|mo|n[adnty]|rm|ss))", u"a"), (u"([^b][Ll])eind", u"\\1and"), (u"([a-z])Ei(?=[gik-nrsx])", u"\\1a"), (u"([Pp])(a|ei)r(a|ei)", u"\\1ara"), ### “em” -> “an”, sometimes (u"emd\\b", u"and"), ### “Eui” -> “an” (u"Eui", u"an"), ### “h” -> “b”, sometimes (u"semhle", u"semble"), ### “h” -> “li”, sometimes (u"\\b([ae])h(?=[gm])", u"\\1li"), (u"([aou]b)H(?=[a-z])", u"\\1li"), ### “(I|L)^” -> “U”, sometimes (u"\\b[IL]\^+(?=(ni|rb|\\.S))", u"U"), ### “i” -> “r”, sometimes (u"aiy\\b", u"ary"), ### “ii” -> “n”, sometimes (u"aiid", u"and"), ### “ii” -> “u”, sometimes (u"fii(?=(n[cd]|r[bnt][^']))", u"fu"), (u"\\bii(?=nde)", u"u"), ### “im” -> “un”, sometimes (u"([Cc])ommimit", u"\\1ommunit"), (u"\\bimless\\b", u"unless"), (u"([Ff])oimd", u"\\1ound"), (u"\\bim\^?der", u"under"), (u"coimt", u"count"), (u"\\Btimit", u"tunit"), (u"Jime\\b", u"June"), ### “imi” -> “um”, sometimes (u"\\bnimi(?=[^s])", u"num"), ### “iu-” -> “ur”, sometimes (u"iu[^t](?=ance)", u"ur"), ### “i'” -> “r”, sometimes (u"i'(?=(a|e[^cdnorsH]|i[a-gi-km-z]|o[^enryU]))", u"r"), ### “Ji” -> “h”, sometimes (u"tJi", u"th"), ### “jr” -> “y”, sometimes (u"([bcdghlmstBLKS]|[^wr ]a|[^bdrwLR][eo]|[^p]f|[^io]n|[^mo ]p|[^eo]r)jr", u"\\1y"), ### “j^” -> “p”, sometimes (u"j\\^(?=r)", u"p"), ### “j^” -> “y”, sometimes (u"([Pp])aj\\^", u"\\1ay"), ### “k)” -> “o”, sometimes (u"([CG])k\\)", u"\\1o"), ### “l)” -> “b”, sometimes (u"l\\)(?=(a[^a-eilmnrtvyzB (]|e[^cnoprtv]|i[^fimnpsM .]|o[^cflrswx]|u[^bir]|y[^el]))", u"b"), ### “li” -> “h”, sometimes (u"\\b([CcSsTtWw])li(?=([io]|a[^n]|e[^dinrs]|u[^r]))", u"\\1h"), (u"([einr])tli\\b", u"\\1th"), (u"([Oo])tlier", u"\\1ther"), ### “m” -> “in”, sometimes (u"\\bm(?=s([eu]r|t[^or]))", u"in"), ### “m” -> “rn”, sometimes (u"([Gg])r?ov[ec]m", u"\\1overn"), (u"bom\\b", u"born"), ### “n” -> “h”, sometimes (u"sucn", u"such"), (u"witn\\b", u"with"), (u"\\bTne\\b", u"The"), ### “o” -> “b”, sometimes (u" oy\\b", u" by"), (u"assemol", u"assembl"), ### “o)r|o}r” -> “oy”, sometimes (u"o[)}]r(?=(i[^a]|[ms]))", u" oy"), ### “sd” -> “al”, sometimes (u"(ib|[cegmnpr-w])sd\\b", u"\\1al"), ### “si” -> “a”, sometimes (u"parsi(?=g)", u"para"), ### “sm” -> “an”, sometimes (u"smd", u"and"), ### “U” -> “li”, sometimes (u"U(?=e[fu])", u"li"), (u"earUer", u"earlier"), ### “U” -> “LI”, sometimes (u"(E|OB)UG", u"\\1LIG"), (u"BIUTY", u"BILITY"), (u"\\bUAB\\B", u"LIAB"), (u"([^BEGLRS])UFE", u"\\1LIFE"), (u"MIUT", u"MILIT"), (u"ELUG", u"ELLIG"), (u"\\b([A-Z].[A-IK-SU-Z][A-Z])UNE\\b", u"\\1LINE"), ### “U” -> “ll”, sometimes (u"U(?=(ee|o[mw]))", u"ll"), (u"\\baU\\b", u"all"), (u"([ou])U(?=er)", u"\\1ll"), ### “v” -> “y”, sometimes (u"davs\\b", u"days"), (u"\\bvear", u"year"), (u"dav\\b", u"day"), (u"\\bmav", u"may"), (u"tv\\b", u"ty"), ### “v^” -> “w”, sometimes (u"([Ll])av\^+r?", u"\\1aw"), (u"v[iv]\^(?=(a[nr]|[eh]|i[nt]|n[^go]|s[^o ]))", u"w"), ### “v/” -> “w”, sometimes (u"v/\\^?(?=[^r'*,])", u"w"), ### “w” -> “v”, sometimes (u"twes\\b", u"tives"), ### “X” -> “)(”, sometimes (u"\\(IX([A-E])\\)", u"(I)(\\1)"), ## ## general misspellings (u"asse...bled", u"assembled"), (u"emplo..[mni]ent", u"employment"), (u"Secretan[jr]", u"Secretary"), (u"[C({f][^Cc]{1,2}ongress", u"Congress"), (u"([Pp])ro[mwnH]ded", u"\\1rovided"), (u"\\be..a[ce]ted", u"enacted"), (u"[(<{][^Gg]r?eneral", u"General"), (u"([Ss])ec[^a-z]{1,2}ion", u"\\1ection"), (u"se[^msv]{2,3}[bh]led", u"sembled"), (u"throu\\^h?", u"through"), (u"C.[^m]mm(?=(e[^d]|[iou]))", u"Comm"), (u"\\bb\\^+p?inn", u"beginn"), (u"Act[^ ]?of", u"Act of"), (u"([Oo])f[li][[^i]ce", u"\\1ffice"), (u"([Pp])ub[^i]c", u"\\1ublic"), (u"\\bRE[^C]ORD", u"RECORD"), (u"[ILTUZfrt*][il'\"^]+?\\^+nited", u"United"), (u"([^s])a(u|ii)[ce]{2}\\b", u"\\1ance"), (u"\\b[ao]\\^+s+em", u"assem"), (u"fi.aud", u"fraud"), (u"Hou[^s]e\\b", u"House"), (u"Hou.{1,2}se", u"House"), (u"\\b([Pp])r.[^d](?=ams?)\\b", u"\\1rogr"), (u"\\bsh[^eov][^w]ll?\\b", u"shall"), (u"sh.{1,2}dl", u"shall"), (u"d[fltI]ng\\b", u"ding"), (u"\\b[A-Z].pon\\b", u"Upon"), (u"([Nn])\\^(?=otia)", u"\\1eg"), (u"\\bva [l1I]", u"val"), ## ## proper names (u"WILL[^I]+(?= J\\.)", u"WILLIAM"), ## ## letters for numbers (u"\(lO\)", u"(10)"), (u"lO ?[Uu][Ss][Cce]", u"10 USC"), (u"SO ?(?=USC)", u"50 "), (u"\\b[Iil](?=[89][0-9][0-9])", u"1"), (u"\\biioi\\b", u"1101"), ## ## excess word spacing (with some spelling corrections also) (u"a.?p.?[pn].?[lf].?i.?[cpr].?[aecH].?[bohD].?l[^cfrF]?e", u"applicable"), (u"\\b([Pp])[^aer]?[rvftiTfnw][^aeiou]?[oadpbgh][^cfmnpv]?[vrocyniuT][^emoru]?[iltvJ'][^s]?[dao][^e]?[easciftzB][^aen]?(d|c[li])\\b", u"\\1rovided"), (u"([Hh]).?[ea][^wz]?a[^i]?[dao][^z]?[il].?(n|u|i[ir])[^gt]?[gqo]\\b", u"\\1eading"), (u"A.?[MH].?E.?[RBEK].?[Ii].?C.?A", u"AMERICA"), (u"[RBEK].?[ec].?[vyr].?[eco][^o]?(n|u|ii).?u.?[ec]", u"Revenue"), (u"S T A T E S\\b", u"STATES"), (u"U[^A]?[NX][^L^S]?[IFTtfC][^N]?T[^J]?[EBO].?(D|t\\))", u"UNITED"), (u"\\b([Aa])[^fn]?f[^afir]?[tbi][^bilno]?[eaE][^acr]?[ri]\\b", u"\\1fter"), (u"\\bT[^HO]?[RBEKH][^R]?A[^N]?D.?E", u"TRADE"), (u"\\bca se", u"case"), (u"(\^|J )UNE\\b", u"JUNE"), (u"\\bPag.?e\\b", u"Page"), (u"\\bO F(?= [A-Z] [A-Z])", u"OF"), (u"([Aa]).?[pPDdo].?[pj].?[riFT].?[oepQ].?[ri].?[ilfr].?[an].?[tsl].?[if].?o.?([nuh]|r?i+)[^']?s\\b", u"\\1ppropriations"), (u"A.?[Pr].?P.?[RBEK].?O.?[PF].?[RBEKH].?[IL].?A.?T.?I.?O.?[NXK][^']?S\\b", u"APPROPRIATIONS"), (u"([Aa]).?[pPDdo].?[pj].?[riFT][^cv]?[oepQ][^cl]?[ri].?[ilfr][^gluzCN]?[an].?[tsl].?[if].?o.?([nuh]|r?i+)", u"\\1ppropriation"), (u"A.?[Pr].?P.?[RBEK].?O.?[PF].?[RBEKH].?[IL].?A.?T.?I.?O.?[NXK]", u"APPROPRIATION"), (u"Conser[vy]ati[op]n", u"Conservation"), (u"COKSERVATION", u"CONSERVATION"), (u"r.?e.?i.?[mn].?[boB].?u.?r.?[se].?[es].?m.?e.?n.?[tl].?s\\b", u"reimbursements"), (u"K E I M B U R S E M E N T S\\b", u"REIMBURSEMENTS"), (u"([Rr]).?e.?i.?[mn].?[boB].?u.?r.?[se].?[es].?m.?e.?n.?[tl]\\b", u"\\1eimbursement"), (u"[RK].?E.?I.?M.?B.?U.?R.?S.?E.?M.?E.?N.?T\\b", u"REIMBURSEMENT"), (u"([Pp]).?[riTF].?[og].?[ce].?[litIJ].?[ae].?[manseM].?a.?[tl].?i.?[oa].?([nu]|[ir]i)[^']?s\\b", u"\\1roclamations"), (u"P.?[RBEK].?[OC].?[CO].?[LI].?A.?M.?A.?T.?I.?O.?(N|iS')[^']?S\\b", u"PROCLAMATIONS"), (u"([Pp]).?[riTF].?[og][^tv]?[ce].?[litIJ].?[ae].?[manseM][^N]?a.?[tl].?i.?[oa].?([nu]|[irn]i)", u"\\1roclamation"), (u"P.?[RBEK].?[OC].?[CO].?[LI].?A.?M.?A.?T.?I.?O.?(N|iS')", u"PROCLAMATION"), (u"([Cc]).?([obp]|cJ).?[rit][^e]?[pog].?[oa].?[rifl][^z]?a.?t.?[i'].?[oc].?([nu]|[rj]i|i[vlL])[^']?s\\b", u"\\1orporations"), (u"C.?[OC].?[RBEK].?[PF].?O.?[RBEK].?A.?T.?[ITil].?O.?[NXJ][^']?S\\b", u"CORPORATIONS"), (u"([Cc]).?([obp]|cJ)[^r]?[rit][^emr]?[pog].?[oa].?[rifl][^z]?a.?t.?[i'].?[oc].?([nu]|[rj]i|i[vlL])", u"\\1orporation"), (u"C.?[OC].?[RBEK].?[PF].?O.?[RBEK].?A.?T.?[ITil].?O.?[NXJ]", u"CORPORATION"), (u"([Ee]).?x.?p[^l]?[op].?[ri].?t.?[ae].?t.?i.?o.?n", u"\\1xportation"), (u"E X P O R T A T I O N", u"EXPORTATION"), (u"([Ii]).?m.?[mn].?i.?[geop].?[riT].?a.?t.?i.?o.?([nH]|f?i:?!?)", u"\\1mmigration"), (u"I.?(M|BII).?M.?I.?G.?R.?A.?T.?I.?O.?N", u"IMMIGRATION"), (u"impartation", u"importation"), (u"([Ll]).?e.?[goq].?i.?[se].?[lih].?a.?[ti].?i.?o.?(n|[iU]j|fv)", u"\\1egislation"), (u"L E G I S L A T I O N", u"LEGISLATION"), (u"([Ii]).?(m|in).?[pD].?r.?o.?v.?e.?m.?e.?[nu].?t[^']?s\\b", u"\\1mprovements"), (u"I.?M.?P.?[RBEK].?O.?V.?[EK].?M.?E.?[NX].?[Tl].?S\\b", u"IMPROVEMENTS"), (u"([Ii]).?(m|in).?[pD].?r.?o.?v.?e.?m.?e.?[nu].?t", u"\\1mprovement"), (u"I.?M.?P.?[RBEK].?O.?V.?[EK].?M.?E.?[NX].?(T|l')", u"IMPROVEMENT"), (u"([Rr]).?[ecot].?[saB].?[tf].?[ri].?i.?c.?[ts].?[if].?[oe].?[nuh][^']?s\\b", u"\\1estrictions"), (u"[RBEK].?E.?S.?T.?[RBEK].?I.?C.?T.?I.?O.?[NX][^']?S\\b", u"RESTRICTIONS"), (u"([Rr]).?[ecot][^r]?[saB].?[tf].?[ri].?i.?c.?[ts].?[if][^v]?[oe].?[nuh]", u"\\1estriction"), (u"[BK]ESTRICTION", u"RESTRICTION"), (u"([Aa]).?[dau].?[jiy].?u.?[sa].?t.?m.?[ec].?(n|u|ii).?t[^']?s\\b", u"\\1djustments"), (u"A.?D.?J.?U.?S.?T.?M.?E.?N.?T.?S\\b", u"ADJUSTMENTS"), (u"A.?D.?J.?U.?S.?T.?M.?E.?N.?T", u"ADJUSTMENT"), (u"([Aa]).?[dau].?[jiy].?u.?[sa].?t.?m.?[ec].?(n|u|ii).?t", u"\\1djustment"), (u"e?m?([Aa]).?[mi].?m.?u.?n.?i.?t.?i.?o.?n", u"\\1mmunition"), (u"DEPARTMENT S\\b", u"DEPARTMENTS"), (u"D.?[Et].?[PrF].?A.?[RBEK].?T.?M.?E.?[NX].?T", u"DEPARTMENT"), (u"i ?n ?s ?p ?e ?c ?tions", u"inspections"), (u"I N S P E C T I O N S\\b", u"INSPECTIONS"), (u"([Ii]).?([nA]|ii).?s[^u]?[pn][^b]?[ecs].?c.?[ti].?i.?o.?[nuQ]", u"\\1nspection"), (u"I.?[NX].?S.?P.?E.?C.?T.?I.?O.?[NX]", u"INSPECTION"), (u"([Ii]).?n.?v.?e.?s.?t.?m e n t s\\b", u"\\1nvestments"), (u"I.?N.?V.?E.?S.?T.?M.?E.?N.?T[^']?S\\b", u"INVESTMENTS"), (u"([Ii]).?(n|u|ii).?v.?[ec].?[siT].?t.?m.?[ec].?(n|u|A|ii).?t", u"\\1nvestment"), (u"I.?[NX].?V.?E.?S.?T.?M.?E.?[NX].?T", u"INVESTMENT"), (u"s.?e.?t.?t l e m e n t s\\b", u"settlements"), (u"S.?E[^N]?[TI].?T.?L.?E.?M.?E.?N.?T", u"SETTLEMENT"), (u"([Ss]).?e[^n]?t.?t.?[li].?e.?m.?e.?[nuh].?t", u"\\1ettlement"), (u"([Aa]).?l.?l.?o.?t.?m.?[ec].?n.?[ti][^']?s\\b", u"\\1llotments"), (u"A.?L.?[LX].?O.?T.?M.?E.?[NX].?I?T[^']?S\\b", u"ALLOTMENTS"), (u"([Aa]).?l.?l.?o.?t.?m.?[ec].?n.?[ti]", u"\\1llotment"), (u"A.?L.?[LX].?O.?T.?M.?E.?[NX].?T", u"ALLOTMENT"), (u"E N D O W M E N T", u"ENDOWMENT"), (u"([Ee]).?n.?d.?o.?w.?m.?e.?n.?t", u"\\1ndowment"), (u"([Ee]).?x.?[co].?[ec].?p.?t.?i.?[oa].?[nu][^']?s\\b", u"\\1xceptions"), (u"E X C E P T I O N S", u"EXCEPTIONS"), (u"([Ee]).?x.?[co].?[ec].?p.?t.?i.?[oa].?([nu]|fti|iii)", u"\\1xception"), (u"\\bE.?X.?C.?E.?[PI].?T.?[IT].?O.?N", u"EXCEPTION"), (u"E A S E M E N T S", u"EASEMENTS"), (u"([Ee]).?(a|ei).?s.?e.?m.?e.?n.?t[^'(]?s\\b", u"\\1asements"), (u"([Ee]).?(a|ei).?s.?e.?m.?e.?n.?t", u"\\1asement"), (u"E ?l ?e ?c ?t ?i ?o ?n ?s\\b", u"Elections"), (u"E L E C T I O N S", u"ELECTIONS"), (u"\\b([Ee])[^f]?[li].?[ec].?c[^a]?[tc][^r]?i[^-]?[oc].?n", u"\\1lection"), (u"E.?L.?[EB].?C.?T.?[Ii].?O.?[NX]", u"ELECTION"), (u"([Mm]).?o.?n.?u.?m.?e.?n.?t[^']?s\\b", u"\\1onuments"), (u"M O N U M E N T S\\b", u"MONUMENTS"), (u"([Mm]).?o.?[nJ].?u.?m.?e.?n.?t", u"\\1onument"), (u"M O N U M E N T", u"MONUMENT"), (u"([Mm]).?[oc].?[vr].?[ev].?m.?e.?n.?[ti][^']?s\\b", u"\\1ovements"), (u"\\b([Mm]).?[oc].?[vr].?[ev].?m.?e.?n.?[ti]\\b", u"\\1ovement"), (u"M O V E M E N T", u"MOVEMENT"), (u"\\bC.?[EK].?[RBEKS].?T.?A.?I.?[NXK]\\b", u"CERTAIN"), (u"([Cc]) ?[ec] ?[ri] ?[tc] ?[ae] ?[id] ?(n|u|[ri]?i|h)\\b", u"\\1ertain"), ## ## number spacing (u"\\b([0-9]) ([0-9])(?= USC)", u"\\1\\2"), (u"(tions? [0-9]+) (?=\\([a-z0-9]\\))", u"\\1"), ## ## insufficient spacing (u"USC(?=[a-z])", u"USC "), (u"\\bofthe\\b", u"of the"), (u"\\bofthis", u"of this"), ## ## superfluous characters (should be deleted, not substituted with another) ### caret (^) (u"\\^+(?=[A-Z][a-z])", u""), (u"A?\\^nte,", u"Ante,"), (u"o[f|]\\^the", u"of the"), ### comma (,) (u",tion", u"tion"), ### misc. (u"\\bfi[^a-z ]o?m", u"from"), (u"\\ba[^a-z]nd\\b", u"and"), (u"\\b(An|no).te\\b", u"\\1te"), (u"\\bfi[^a-z]ee", u"free"), (u"([Dd])iu.ing", u"\\1uring"), # end of June 2012 run ################################################################################################### # April 2012: more OCR corrections based on results of September 2011 run. # Estimated number of changes to dataset resulting from this run: 37,000 # ## undo overzealous corrections from prior run ### “acompany” -> “accompany” (u"\\bacompany\\b", u"accompany"), ### “generaly” -> “generally” (also in uppercase) (u"\\bgeneraly\\b", u"generally"), (u"\\bGENERALY\\b", u"GENERALLY"), (u"\\bgenerallv\\b", u"generally"), ### “iregular” -> “irregular” (u"\\biregular\\b", u"irregular"), ### “mistatement” -> “misstatement” (u"\\mistatement\\b", u"misstatement"), ### "ofunds" -> "of funds" (u"\\bofunds\\b", u"of funds"), ### "remployment" -> "reemployment" (u"\\bremployment\\b", u"reemployment"), (u"\\bREMPLOYMENT\\b", u"REEMPLOYMENT"), ### closing up letters that should have remained spaced apart (u"E F F E C TO F", u"EFFECT OF"), (u"D I S T R I C TO F", u"DISTRICT OF"), (u"L I S TO F", u"LIST OF"), (u"P R E S I D E N TO F", u"PRESIDENT OF"), ## ## close up spacing and correct additional misspellings ## ordered roughly by length of word(s) to match (u"\\bA N ACT\\b", u"AN ACT"), (u"([Ll])abor.?a.?t.?o.?r.?y", u"\\1aboratory"), (u"([Mm]) and a to r y", u"\\1andatory"), (u"\\bA.?D.?M.?I.?[NX].?I.?S.?T.?[RBEK].?A.?T.?I.?O.?[NX]\\b", u"ADMINISTRATION"), (u"\\b([Aa]).?[da].?[mn].?[i'].?[nua].?i.?[seB].?[tfr].?[ri].?a.?[tsv][^o]?[ir].?o.?n", u"\\1dministration"), (u"\\bO.?R.?G.?A.?[NX].?[Ii].?[Zz].?A.?T.?[Ii].?O.?[NX][^']?S\\b", u"ORGANIZATIONS"), (u"\\b([Oo]).?[riz].?[gc].?a.?[nmp].?[ix].?[za].?a.?t.?i.?o.?[nu][^'^(]?s\\b", u"\\1rganizations"), (u"O.?[RBEK].?[GO].?A.?[NX].?[Ii].?[Zz].?A.?[TI].?[Ii].?O.?[NX]", u"ORGANIZATION"), (u"\\b([Oo]).?[riz].?[gc].?a.?[nmp].?[ix].?[za].?a.?t.?i.?o.?[nu]", u"\\1rganization"), (u"[REK].?E.?L.?A.?T.?I.?O.?N.?S.?H.?I.?P[^']?S\\b", u"RELATIONSHIPS"), (u"\\br.?e.?l.?a.?t.?i.?o.?n.?s.?h.?i.?[po].?s\\b", u"relationships"), (u"[REK].?E.?L.?A.?T.?I.?O.?N.?S.?H.?I.?P", u"RELATIONSHIP"), (u"\\b([Rr]).?e.?l.?a.?t.?i.?o.?[nu].?s.?[hn].?i.?(p|o|i\\)|j\\))", u"\\1elationship"), (u"A.?[ULT].?[TI].?H.?O.?[RBEKS].?I.?Z.?A.?[TI].?I.?O.?[NX]", u"AUTHORIZATION"), (u"([Aa]).?[uni].?[tf].?[hna].?[oc].?r.?[if].?z.?[anu].?[ti].?[i'].?[opabq].?[nu]", u"\\1uthorization"), (u"M A N U F ACT U R I N G", u"MANUFACTURING"), (u"([Mm]).?[as].?[nm].?u.?[fi].?[ao].?c.?t.?u.?r.?i.?n.?g", u"\\1anufacturing"), (u"V.?O.?L.?U.?N.?T.?A.?R.?I.?L.?Y", u"VOLUNTARILY"), (u"v[od]luntaril[yv]", u"voluntarily"), (u"V.?O.?L.?U.?N.?T.?A.?[REK].?Y", u"VOLUNTARY"), (u"([Vv]).?o.?l.?u.?[nu].?t.?[aoe].?[ri][^l]?[yv]\\b", u"\\1oluntary"), (u"A.?P.?P.?O.?I.?N.?T.?M.?E.?[NX].?T.?S\\b", u"APPOINTMENTS"), (u"([Aa]).?p.?p.?o.?i.?[nu].?t.?m.?e.?n.?t.?s\\b", u"\\1ppointments"), (u"A.?P.?P.?O.?I.?[NX].?T.?M.?E.?[NX].?T", u"APPOINTMENT"), (u"([Aa]).?[pd].?[png].?[od].?i.?[nu].?t.?m.?e.?[nu].?t\\b", u"\\1ppointment"), (u"A S S O C I A T I O N S\\b", u"ASSOCIATIONS"), (u"([Aa]).?s.?s.?o.?c.?i.?a.?t.?i.?o.?[nu][^']?s\\b", u"\\1ssociations"), (u"A.?S.?S.?O.?C.?[IL].?A.?T.?I.?O.?[NX]", u"ASSOCIATION"), (u"([Aa]).?s.?s.?.o.?c.?i.?a.?t.?i.?[oa].?n\\b", u"\\1ssociation"), (u"E N TITLE M E N T S\\b", u"ENTITLEMENTS"), (u"e.?n.?t.?i.?t.?l.?e.?m.?e.?n.?t.?s\\b", u"entitlements"), (u"E.?N.?T.?[IT].?T.?[LU].?E.?M.?E.?N.?T", u"ENTITLEMENT"), (u"([Ee]).?[nu].?t.?i.?t.?[li].?e.?m.?e.?[nu].?t\\b", u"\\1ntitlement"), (u"E X A M I N A T I O N S\\b", u"EXAMINATIONS"), (u"([Ee]).?x.?a.?m.?i.?n.?a.?t.?i.?o.?n.?s\\b", u"\\1xaminations"), (u"E.?X.?A.?M.?I.?[NX].?A.?T.?I.?O.?[NX]\\b", u"EXAMINATION"), (u"([Ee]).?x.?a.?m.?i.?[nu].?[an].?[ts].?[if].?o.?(n|u|ri)\\b", u"\\1xamination"), (u"E.?X.?P.?E.?[NX].?D.?[IF].?T.?U.?[RBEK].?E.?S\\b", u"EXPENDITURES"), (u"([Ee]).?x.?p.?[efs].?[nu].?d.?i[^s]?[td].?u.?[ri].?e.?s\\b", u"\\1xpenditures"), (u"E.?X.?P.?E.?N.?D.?[IF].?T.?U.?[RB].?E\\b", u"EXPENDITURE"), (u"([Ee]).?x.?p.?e.?[nu].?d.?i.?t.?u.?r.?[eo]\\b", u"\\1xpenditure"), (u"DESIGNATIONXS", u"DESIGNATIONS"), (u"([Dd]).?e.?s.?i.?g.?n.?a.?t.?i.?o.?n.?s\\b", u"\\1esignations"), (u"D.?E.?S.?I.?G.?N.?A.?T.?[Ii].?O.?[NX]\\b", u"DESIGNATION"), (u"([Dd]).?[eT].?[sT].?[if].?[goc].?[na].?[ai].?t.?[ix].?[op].?n\\b", u"\\1esignation"), (u"I.?N.?S.?T.?A.?L.?L.?M.?[EK].?N.?T.?S\\b", u"INSTALLMENTS"), (u"([Ii]).?[np].?s.?t.?[ao].?l.?l.?m.?e.?n.?t.?s\\b", u"\\1nstallments"), (u"I N S T A L L M E N T\\b", u"INSTALLMENT"), (u"([Ii]).?(n|ii).?s.?t.?a.?[ld].?l.?m.?e.?n.?t\\b", u"\\1nstallment"), (u"I.?N.?S.?T.?[IF].?T.?U.?T.?I.?O.?N[^']?S\\b", u"INSTITUTIONS"), (u"([Ii]).?n.?s.?t.?i.?t.?[un].?t.?i.?o.?n[^']?s\\b", u"\\1nstitutions"), (u"I.?N.?S.?T.?[IFT].?T.?U.?T.?I.?O.?[NX]\\b", u"INSTITUTION"), (u"([Ii]).?(n|u|q|ii).?s.?[tl].?i.?t.?u.?t.?i.?[op].?n\\b", u"\\1nstitution"), (u"I N S T R U C T I O N A L\\b", u"INSTRUCTIONAL"), (u"I N S T R U C T I O N S\\b", u"INSTRUCTIONS"), (u"\\binst.?r.?uctions\\b", u"instructions"), (u"I.?N.?S.?T.?R.?U.?C.?T.?I.?O.?N\\b", u"INSTRUCTION"), (u"\\b([Ii])[^o]?n.?s[^a]?t.?[ri].?u.?c.?t.?[i'].?o.?(n|ii)", u"\\1nstruction"), (u"[RBEK].?E.?Q.?U.?[Ii].?[RE].?E.?M.?[EK].?[NX].?T.?S\\b", u"REQUIREMENTS"), (u"\\b([Rr]).?[ea].?[qaig].?[uno].?[iu].?[ri].?[eoa].?[ml].?e.?(n|ii).?[til].?s\\b", u"\\1equirements"), (u"R E Q U I R E M E N TO F ", u"REQUIREMENT OF "), (u"[RBEK].?E.?Q.?U.?[Ii].?[RBEK].?E.?M.?[EF].?N.?T", u"REQUIREMENT"), (u"([Rr]).?e.?[qao].?u.?i.?r.?e.?m.?e.?(n|u|ii).?t\\b", u"\\1equirement"), (u"J.?U.?[REK].?I.?S.?D.?I.?[CG].?T.?I.?O.?N", u"JURISDICTION"), (u"([Jj]).?u.?[rix].?[il].?s.?[da].?i.?[ceo].?t.?i.?o.?n", u"\\1urisdiction"), (u"R E A S S I G N M E N T S", u"REASSIGNMENTS"), (u"R E A S S I G N M E N T", u"REASSIGNMENT"), (u"R e a s s i g n m e n t", u"Reassignment"), (u"E N L I S T M E N T S", u"ENLISTMENTS"), (u"([Ee])[^e]?n.?l.?i.?s.?t.?m.?e.?n.?t.?s\\b", u"\\1nlistments"), (u"E.?N.?L.?I.?S.?T.?M.?E.?N.?T", u"ENLISTMENT"), (u"([Ee])[^e]?n.?[li].?i.?s.?t.?m.?[eo].?[nr].?t", u"\\1nlistment"), (u"A.?C.?Q.?U.?I.?S.?I.?T.?[IT].?O.?[NX]", u"ACQUISITION"), (u"([Aa]).?c.?[qauog].?u.?i.?[sa].?i.?t.?[ij].?o.?[nuUD]", u"\\1cquisition"), (u"I M P O R T A T I O N", u"IMPORTATION"), (u"([Ii]).?(m|in).?p.?o.?r.?t.?[aeo].?[ti].?i.?o.?n\\b", u"\\1mportation"), (u"([Rr]).?e.?s.?[el].?r.?v.?a.?t.?i.?o.?n", u"\\1eservation"), (u"I.?N.?S.?T.?I.?T.?U.?T.?E[^']?S\\b", u"INSTITUTES"), (u"([Ii]).?n.?s.?t.?i.?t.?u.?t.?e[^']?s\\b", u"\\1nstitutes"), (u"I.?N.?S.?T.?[IFT].?T.?U.?T.?E", u"INSTITUTE"), (u"([Ii]).?n.?s.?t.?[il].?t.?u.?t.?e", u"\\1nstitute"), (u"O.?P.?E.?[RK].?A.?T.?I.?O.?[NX][^']?S\\b", u"OPERATIONS"), (u"\\b([Oo]).?p.?e.?[ri].?a.?[tl].?i.?[oq].?[nu][^']?s\\b", u"\\1perations"), (u"O[^O]?[Pr].?[EK].?[RBEK].?A.?T.?[Ii].?O.?[NX]", u"OPERATION"), (u"([Oo])[^O^o^m]?p.?e.?[ri].?[ak][^c]?[tli].?i.?[opdq].?[nu]", u"\\1peration"), (u"A.?M.?M.?U.?[NM].?I.?T.?I.?[OG].?N", u"AMMUNITION"), (u"\\b([Aa]).?[mn].?m.?u.?[nu].?t.?i.?o.?[nh]", u"\\1mmunition"), (u"C.?O.?M.?M.?[IL].?S.?S.?[Ir].?O.?[NX].?E.?R[^']?S\\b", u"COMMISSIONERS"), (u"C.?O.?M.?M.?[IL].?S.?S.?[Ir].?O.?[NX].?E.?R", u"COMMISSIONER"), (u"C.?O.?M.?M.?[IL].?S.?S.?[Ir].?O.?[NX]", u"COMMISSION"), (u"([Cc])[^u]?[obcp].?[mn].?[mM][^o]?[ilsrx][^c^N]?[saG].?[stgp].?[i'].?[opac].?[nu]", u"\\1ommission"), (u"[RBEK].?E[^Q]?T.?[IE].?[RBEK].?E.?M.?[EK].?N.?T", u"RETIREMENT"), (u"([Rr]).?e[^q]?t.?i.?[ri].?e.?m.?e.?(n|u|r|ii).?[tcU]", u"\\1etirement"), (u"C.?[Oo].?N.?T.?[RBA].?A.?C.?T[^']?S\\b", u"CONTRACTS"), (u"\\b([Cc]).?[ocp].?[npu][^r]?[tf][^e]?[ri].?[aer].?[co].?t[^']?s\\b", u"\\1ontracts"), (u"C.?O.?[NX].?T[^E]?[RBEAO][^N^R]?A.?[CP].?T", u"CONTRACT"), (u"\\b([Cc]).?[oj][^u]?[nu][^n^s]?[ti].?[reij].?[aeus].?[cod].?[tT]\\b", u"\\1ontract"), (u"P.?[RB].?I.?S.?O.?N.?E.?[RBE][^']?S\\b", u"PRISONERS"), (u"([Pp]).?r.?i.?s.?o.?n.?e.?r[^']?s\\b", u"\\1risoners"), (u"prisouer", u"prisoner"), (u"P R I S O N S\\b", u"PRISONS"), (u"T.?[RBEK].?A.?[NX].?S.?F.?E.?[RBK][^']?S\\b", u"TRANSFERS"), (u"([Tt]).?r.?a.?[nmu].?s.?[fji].?e.?[riT][^']?s\\b", u"\\1ransfers"), (u"T.?[RBKO].?A.?[NX].?S.?[FEP].?E[^R]?[RBK]", u"TRANSFER"), (u"\\b([Tt]).?[rifaV][^e^w]?a[^i]?[nmhirou][^l]?[srei][^u]?[fijltr][^t]?e[^r]?r", u"\\1ransfer"), (u"C O M M U N I T I E S", u"COMMUNITIES"), (u"([Cc]).?o.?[mn].?[mn].?[un].?i.?t.?i.?e.?s", u"\\1ommunities"), (u"C.?O.?M.?M.?U.?[NXK].?[IFT][^S]?T.?[YT]", u"COMMUNITY"), (u"([Cc]).?[ob].?m.?m.?[un].?[num].?[it].?[tlf].?[yvj]", u"\\1ommunity"), (u"E.?Q.?U.?I.?P.?M.?E.?[NI].?T", u"EQUIPMENT"), (u"([Ee]).?[qao].?[ui].?i.?p[^I]?[mn].?[ec].?(n|u|ii).?t\\b", u"\\1quipment"), (u"\\b([Vv]).?[id][^t]?[oa].?[li].?a.?[ti].?i.?o.?n", u"\\1iolation"), (u"VI O L A T I O N S", u"VIOLATIONS"), (u"VIOLAT I O N", u"VIOLATION"), (u"A.?C.?C.?O.?U.?[NX].?T.?I.?[NX].?G", u"ACCOUNTING"), (u"\\b([Aa]).?[cot].?c.?o.?[un].?[nu].?t.?i.?n.?g", u"\\1ccounting"), (u"A C C O U N T S\\b", u"ACCOUNTS"), (u"\\b([Aa]).?c.?c.?o.?u.?n.?t[^']?s\\b", u"\\1ccounts"), (u"\\bA.?[CG].?C.?O.?U.?[NX].?T", u"ACCOUNT"), (u"\\b([Aa]).?[co].?[cs].?[op].?(u|n|ii).?[nu].?t", u"\\1ccount"), (u"([Cc]) o m m and s:", u"\\1ommands:"), (u"C O M M A N D", u"COMMAND"), (u"([Cc]).?o.?m.?m[^i]?a.?[nu].?d", u"\\1ommand"), (u"S.?[EKB].?[RBEK].?V.?[IiE].?[CO].?E[^'^R]?S\\b", u"SERVICES"), (u"\\b([Ss]).?[ecfos].?[rif].?[vVxy].?[ildfj].?[cep].?[ecot][^'^r]?s\\b(?=[^^])", u"\\1ervices"), (u"\\bS.?[EK].?[RBEK].?V.?[IiE].?[CO].?[EKB]", u"SERVICE"), (u"\\b([Ss])[^e]?[ec][^c^x]?[riTtfp][^o]?[vyxrA][^a^o]?[ildnjf].?[coeq][^m^-]?[eB]\\b", u"\\1ervice"), (u"([Uu]).?(n|u|ii).?i.?fo[ri].?m[^']?s\\b", u"\\1niforms"), (u"U[^R]?[NX].?I.?FO[RBEK].?M", u"UNIFORM"), (u"([Uu])[^r]?(n|ii).?i[^n]?f.?o.?[rif].?(m|[aun]i|jn)", u"\\1niform"), (u"P.?[RBEK].?O.?P.?E.?[RBEK].?T.?[YTVy]", u"PROPERTY"), (u"\\b([Pp]).?[ri].?o[^s]?[pnu].?[er].?[rin][^e^s]?[tfi][^n]?(y|v|T|j[rTji'^]?)", u"\\1roperty"), (u"F.?[EK].?D.?[EK].?[RBEK].?A[^L^T]?[LIi]", u"FEDERAL"), (u"([Ff])[^i^r]?[ec][^c^l]?[dao][^l^n]?[ecmo].?[ricd][^i]?[ak][^l]?[lL]", u"\\1ederal"), (u"T[IF]TLE[ ]?S(?=(:| [I5]))", u"TITLES"), (u"([Tt]) i t l e s\\b", u"\\1itles"), (u"\\bT.?[ITfJ].?T.?L.?E", u"TITLE"), (u"\\b([Tt])[^h^r]?[il][^s^y]?t.?[li][^v]?[ec]\\b", u"\\1itle"), ## scannos ### “eh.” for “ch.” (u"\\beh\\. ", u"ch. "), ### “sees.” for “secs.” (u"\\b([Ss])ees\\. ", u"\\1ecs. "), ### “IT” or “IJ” for “U” in “U.S.C.” (u"I[JT]\. ?S\. ?C\.", u"U.S.C."), ### “\bho..ever\b” for “however” (u"\\bho..ever\\b", u"however"), ### “mg” at the end of a word for “ing” (u"([b-hklnoprstvwyz])(m|iii|iu)g\\b", u"\\1ing"), ### “asse...led” for “assembled” (u"asse...led", u"assembled"), ### “however^” -> “however,” (u"ho(w|..)ever\\^", u"however,"), ### miscellaneous character scannos (u"£i(?=([a-mo-z]|n[^c]))", u"a"), (u"\\b[sgilbfoJna^](?=tat\\.)", u"S"), (u"([0-9])['\"* ^]?usc", u"\\1 USC"), (u"\\b([Aa])(n|u|ii)[vy]\\b ", u"\\1ny "), (u"^(?<![(])\^(?=[A-Za-z0-9]{1,3}\) )", u"("), (u"[<*^]?\\^E[CO]", u"SEC"), (u"\\ban\^ ", u"any "), (u"\\bTTie", u"The"), (u"0(?=(MB|PM))", u"O"), (u"[EK](?=el(a|o|i[^i^m]))", u"R"), (u"([Ss])iii(?=(ch|h|j|s))", u"\\1ub"), (u"\\boi\\^(?=a)", u"org"), (u"wii(?=ic(h|[li]i))", u"wh"), (u" \\^(?=cal)", u" fis"), (u"\\boi\\^(?=[ep])", u"op"), # end of April 2012 run ################################################################################################### # September 2011: more corrections resulting from identifying mis-OCR'ed text, plus # further elimination of excess whitespace. Begin to standardize citation forms # to Statutes at Large and U.S. Code. Dehyphenate lines to improve accuracy of # concordance. # Estimated number of changes to dataset resulting from this run: 880,000 # # character substitution/scannos: ## a (u"\\bStete", u"State"), (u"Ei(?=nd)", u"a"), (u"\\befter\\b", u"after"), ## al (u"shedl", u"shall"), ## B (u"H(?=e it)", u"B"), ## b (u"J\\)(?=y)", u"b"), ## C (u"\\bSE[^C]\.(?= [0-9])", u"SEC."), (u"\\(['^](?=[hio][a-z][a-z][^)])", u"C"), (u"\\beomm\\b", u"Comm"), (u"\\bCJo(?=[^v])", u"Co"), (u"\\bUSe\\b", u"USC"), (u"[eO](?=ongress)", u"C"), (u"[eO](?=ONGRESS)", u"C"), (u"\\bO(?=o[mn])", u"C"), ## c (u"se[a-z][':;bcjtC^]tion(?=[^e])", u"section"), ## D (u" I\\)\. (?=[A-Z])", u" D. "), ## E (u"([CR])P\\^S", u"\\1ES"), ## e (u"\\b([Tt])(h|li)c\\b", u"\\1he"), ## ee (u"\\bF[ir]( a|a )a", u"Free"), (u"\\bF[ir]w\\b", u"Free"), ## f (u"([EeOo])fiic", u"\\1ffic"), ## fi (u"([EeOo])fHc", u"\\1ffic"), ## G (u"\\bCJ(?=ov)", u"G"), ## gr (u"para\\^aph", u"paragraph"), ## h (u"([^a^c^g^l^C^K])li(?=(arb|av|ere|ea|el[^m]|i[grs]|osp|ous|uma))", u"\\1h"), (u"\\bs(ii|u)cli\\b", u"such"), (u"tli(?=(ir|r|o[a-z]))", u"th"), (u"\\beacli\\b", u"each"), (u"\\bwliicli\\b", u"which"), (u"([Cc])liapt[ce]r", u"\\1hapter"), (u"\\b([Ww])ne(?=[^d^'][^s])", u"\\1he"), (u"liundred", u"hundred"), (u"(m|ui|lu)ontlis\\b", u"months"), (u"cliange", u"change"), (u"liigli", u"high"), (u"([Ff])ra[nu]cliise", u"\\1ranchise"), (u"\\blliis\\b", u"this"), ## i (u"([st])lon", u"\\1ion"), ## J (u"\\.l(?=(anu|un))", u"J"), ## l (u"\\btitie\\b", u"title"), ## LI (u"\\bAPPU", u"APPLI"), (u"E[Ss]TABUSH", u"ESTABLISH"), (u"\\bUMIT", u"LIMIT"), (u"P[UuV]BUC", u"PUBLIC"), (u"\\bFACIU", u"FACILI"), (u"\\bQUAUF", u"QUALIF"), (u"POUCY", u"POLICY"), (u"VAUD", u"VALID"), (u"ABOUSH", u"ABOLISH"), ## li (u"([bdip])U", u"\\1li"), (u"Uc\\b", u"lic"), (u"eU(?=[gv])", u"eli"), (u"Umit(?=[^.])", u"limit"), ## ll (u"shaU", u"shall"), (u"U(?=ment)", u"ll"), (u"wiU\\b", u"will"), ## m (u"\\bfroa\\b", u"from"), (u"([Dd])epart..(i?)ent", u"\\1epartment"), (u"\\b([Aa])iiie", u"\\1me"), ## n (u"\\bfollowlii[gs]\\b", u"following"), (u"([Uu])poii", u"\\1pon"), ## of (u"\\boif(?= )", u"of"), (u"\\boif(?=-)", u"off"), ## p (u"[[ij|}][)>](?=[a-z])", u"p"), ## q (u"([Ee])\\(j(?=[a-z])", u"\\1q"), ## r (u"\\bi-(?=[aeiou])", u"r"), (u"([aeou])i['*](?=[^i])\\b", u"\\1r"), ## U (u"[LITtifXUJr]'(?=(nited|NITED))", u"U"), ## u (u"fiill", u"full"), (u"\\b([Ccf]?)ovir", u"\\1our"), ## un (u"\\bcoimt", u"count"), (u"\\bimtil\\b", u"until"), (u"himdred", u"hundred"), (u"Fimd", u"Fund"), (u"moimt", u"mount"), (u"\\bluider\\b", u"under"), (u"([Cc])oimcil", u"\\1ouncil"), ## ur (u"\\boiu[^a-z^ ]", u"our"), (u"[li]u.ban(?=[^d])", u"urban"), ## w (u"\\bw\\^", u"w"), (u"([aeiotu])w\\^", u"\\1w"), (u"Avith", u"with"), (u"Av(h|li)ic(h|li)", u"which"), (u"Av(?=o[^c^d^i^n^y])", u"w"), (u"([aeiou])Av", u"\\1w"), ## y (u"pa[j)]([a-z]+)ents\\b", u"payments"), (u"pa[j)]([a-z]+)ent\\b", u"payment"), (u"([Aa]n|a[rv]|b|[Dd]a|el|it|[Mm]a|n[clt]|or|r[mt]|tl|u[dr])j\\^ ", u"\\1y "), ## yi (u"jd(?=ng)", u"yi"), ## symbols (u"(\\([A-Z])X(?=[ivx])", u"\\1)("), # # dehyphenation (u'([^-][A-Za-z][a-eg-xz])-\r\n([a-z])', u'\\1\\2'), # # close up spacing: ## 15 (u"([Nn]).?o.?t.?w.?[it].?t.?[hnH].?[sS].?t.?[aeo].?[nm].?[dao].?[ij].?[nhpaQ^].?[gof]\\b", u"\\1otwithstanding"), (u"[EKR].?E.?P.?[EKR].?E.?S.?E.?N.?T.?A.?T.?I.?V.?E[^(]S", u"REPRESENTATIVES"), (u"\\b([Rr]).?[et].?[pfvynD].?[rTfvti'y].?e.?[sHnxa].?[ec&].?[nmu].?[tif].?[aeondA].?[tif].?[ivtl/'].?[vrcoeym\].?e[^']?s\\b", u"\\1epresentatives"), ## 14 (u"A.?D.?M.?[IE].?[NX].?[I1].?[S8].?T.?[RKOBE].?A.?[TI].?[IT].?V.?[EB]", u"ADMINISTRATIVE"), (u"([Aa]).?[daoH].?[mn].?i.?[nou].?i.?s.?t.?[rio].?a.?[tc].?i.?[vyr].?[eG©s]", u"\\1dministrative"), (u"\\b([Rr]).?e.?p.?[ri].?e.?[saH].?e.?n.?[ts].?[aeo].?t.?i.?[vrc].?e\\b", u"\\1epresentative"), ## 13 (u"\\bD.?E.?T.?E.?[RHEK].?M.?I.?[NX].?A.?T.?I.?O.?[NX]", u"DETERMINATION"), (u"([Dd]).?e[^x]?t.?e.?[ri].?[mn].?[ij].?[nao].?a.?t.?[-ij].?[ocQ].?n\\b", u"\\1etermination"), (u"E.?[NX].?V.?I.?[RKBE].?O.?[NX].?M.?E.?[NX].?T.?A.?L", u"ENVIRONMENTAL"), (u"([Ee]).?n.?[vy].?i.?r.?[ob].?[nh].?m.?e.?n.?t.?[aeo][^n]?[lid]\\b", u"\\1nvironmental"), (u"E.?S.?T.?A.?B.?L.?I.?S.?H.?M.?E.?[NX].?T", u"ESTABLISHMENT"), (u"E S T A B U S H M E N T", u"ESTABLISHMENT"), (u"([Ee]).?[si].?t.?[aeo].?[bho].?[lfiU].?[i'].?s.?[hbn].?m.?e.?[nu].?t", u"\\1stablishment"), (u"I.?[NX].?T.?E.?[RBEK].?[NX].?A.?T.?I.?O.?[NX].?A.?L", u"INTERNATIONAL"), (u"\\b([Ii]).?n.?t[^h]?e.?r.?[nau].?a.?t.?[ij].?o.?n.?[aso].?[lLF]", u"\\1nternational"), (u"Q U A R T E R M A S T E R", u"QUARTERMASTER"), (u"([Qq]).?[nu].?a.?r.?t.?e.?r.?[mn].?a.?s.?t.?e.?r", u"\\1uartermaster"), ## 12 (u"A.?M.?O.?[RE].?T.?I.?Z.?A.?T.?I.?O.?N", u"AMORTIZATION"), (u"a m o r t i z a t.?i.?o.?n\\b", u"amortization"), (u"C.?O.?[NX].?S.?E.?[RBK].?V.?A.?T.?I.?O.?[NX]", u"CONSERVATION"), (u"([Cc]).?o.?[nu].?s.?[ec].?[ri].?v.?a.?[tf].?i.?o.?[nu]\\b", u"\\1onservation"), (u"C O N S T I T U T I O N", u"CONSTITUTION"), (u"([Cc]).?o.?[nu].?s.?t.?i.?t.?u.?t.?[il].?o.?[nr]", u"\\1onstitution"), (u"N O T I F I C A T I O N", u"NOTIFICATION"), (u"([Nn]).?[oq].?t.?i.?[ft].?[il].?c.?a.?t.?i.?o.?n\\b", u"\\1otification"), (u"P.?A.?[RB].?T.?I.?C.?I.?P.?A.?T.?I.?O.?[NX]", u"PARTICIPATION"), (u"\\b([Pp]).?a.?r.?t.?[i\]].?c.?i.?p.?[aM].?t.?i.?o.?n\\b", u"\\1articipation"), (u"P.?[RBEX].?E.?S.?E.?[RB].?V.?A.?T.?I.?O.?[NX]", u"PRESERVATION"), (u"([Pp]).?[ri].?e.?s.?e.?r.?v.?a.?t.?i.?o.?n\\b", u"\\1reservation"), (u"([Ss]).?[up].?[boD][^-]?[pa].?[an].?[rip].?[ae].?g.?[ri].?a.?[po].?[hnk]", u"\\1ubparagraph"), (u"U.?N.?E.?M.?P.?L.?O.?Y.?M.?E.?[NX].?T", u"UNEMPLOYMENT"), (u"([Uu]).?n[^d]?e.?[mn].?p.?[l'].?o.?[yjv].?[mn].?[ce].?[nu].?t", u"\\1nemployment"), ## 11 (u"A.?G.?[REK].?I.?C.?U.?L.?T.?U.?[RBEK].?E", u"AGRICULTURE"), (u"([Aa]).?[g^].?[riTF].?i.?c.?[uo].?[-l].?t.?[um].?[rTiF].?[ec]\\b", u"\\1griculture"), (u"A.?P.?P.?(U|L.?I).?C.?A.?T.?I.?O.?[NX]", u"APPLICATION"), (u"([Aa]).?p.?[pn].?[lift].?i.?[ce].?[ao].?t.?[il].?o.?n", u"\\1pplication"), (u"C.?O.?[RB].?[RB].?E.?C.?T.?I.?O.?[NX].?S", u"CORRECTIONS"), (u"([Cc]).?o.?[ri].?r.?e.?c.?t.?[il].?o.?n.?s\\b", u"\\1orrections"), (u"D.?E.?F.?I.?[NX].?I.?T.?I.?O.?[NX].?S", u"DEFINITIONS"), (u"\\b([Dd]).?[ec].?[filrt].?[ilt].?[nau].?i.?t.?i.?o.?[nu].?[sa]\\b", u"\\1efinitions"), (u"([Dd]).?e.?s.?i.?[ge].?[np].?a.?t.?[ij].?n.?[ge]\\b", u"\\1esignating"), (u"D.?E.?T.?E.?R.?M.?I.?N.?I.?N.?G", u"DETERMINING"), (u"([Dd]).?e.?t[^h]?[eac].?[rni].?m.?i.?n.?i.?n.?g", u"\\1etermining"), (u"D.?E.?V.?[EB].?L.?O.?P.?M.?E.?[NX].?T", u"DEVELOPMENT"), (u"([Dd]).?[ec].?v.?[ec].?[li].?o.?p.?m.?[ec].?[nu].?t\\b", u"\\1evelopment"), (u"E.?[NX].?F.?O.?[EKR].?C.?E.?M.?E.?[NX].?T", u"ENFORCEMENT"), (u"([Ee])[^i]?[nr].?[firt].?o.?[rfi].?[coe][^l]?e.?m.?[ec].?[nu].?t\\b", u"\\1nforcement"), (u"\\be.?[sx].?t.?[aeo].?[boh].?[lf].?i.?[sa&].?[hnt].?[ec].?d\\b", u"established"), (u"I.?[NX].?F.?O.?[EKR].?M.?A.?T.?I.?O.?[NX]", u"INFORMATION"), (u"([Ii]) n f o r m a t i o n", u"\\1nformation"), (u"([Ii])[nu][fitl]o[rif][ma][an]ti[op]n", u"\\1nformation"), (u"L.?E.?G.?I.?S.?[LI].?A.?T.?[IJ].?V.?E", u"LEGISLATIVE"), (u"([Ll]).?e.?[gs].?i.?s.?l.?a.?t.?i.?v.?e", u"\\1egislative"), (u"P.?A.?[RB].?T.?N.?E.?[RB].?S.?H.?I.?P", u"PARTNERSHIP"), (u"([Pp]).?a.?[ri].?t.?[na].?e.?[rif][^-]?[se].?[hn].?i.?[pj]\\b", u"\\1artnership"), (u"\\b([Rr]).?e.?[aA].?[d4].?[ji].?u.?s.?t.?m.?e.?n.?t\\b", u"\\1eadjustment"), (u"[RBEK].?E.?S.?E.?[RBEK].?V.?A.?T.?I.?O.?[NX]", u"RESERVATION"), (u"\\b([Rr]).?e.?[si].?[el3].?r.?[vs].?a.?t.?i.?[obc].?n\\b", u"\\1eservation"), (u"T E M P O R A R I L Y", u"TEMPORARILY"), (u"([Tt]).?e.?[mn].?p.?o.?[ri].?a.?r.?i.?l.?[yv]", u"\\1emporarily"), (u"T.?E.?[RBEK].?M.?I.?[NX].?A.?T.?I.?O.?[NX]", u"TERMINATION"), (u"\\b([Tt]).?[ec].?[ri].?[mn].?[ij].?[naou].?a.?[tf].?[ij].?[ocQ].?n", u"\\1ermination"), ## 10 (u"A.?[MH].?E.?[NX].?D.?M.?[EP].?[NX].?T.?S", u"AMENDMENTS"), (u"([Aa]).?[mn].?[ecio].?[nu].?[daou].?[mn].?[ec].?[nadhu].?[td][^']?[se]\\b", u"\\1mendments"), (u"C.?O.?N.?F.?O.?[EKR].?M.?I.?N.?G", u"CONFORMING"), (u"([Cc]).?o.?[nm].?[fr].?o.?[rnfi].?[maD].?i.?n.?g", u"\\1onforming"), (u"D E D U C T I O N S", u"DEDUCTIONS"), (u"\\b([Dd]).?[ec].?d.?u.?c.?t.?[il'].?o.?[nu].?s\\b", u"\\1eductions"), (u"D.?E.?F.?I.?[NX].?I.?T.?I.?O.?[NX]", u"DEFINITION"), (u"([Dd]).?[ea].?[filtr].?[ilt][^r]?[nha][^v]?[il][^c]?t.?i.?[oc].?n", u"\\1efinition"), (u"D E T E R M I N E D", u"DETERMINED"), (u"d.?e.?t.?e.?[rin].?[ma].?i.?[nau].?e.d\\b", u"determined"), (u"\\bD.?I.?S.?A.?B.?I.?L.?[IT].?T.?[YT]", u"DISABILITY"), (u"([Dd])[^m^v]?i[^n^t]?[sa].?a.?[bo].?[id].?[lf].?i.?t.?[yv]", u"\\1isability"), (u"E[^E]?M.?P.?L.?O.?Y.?M.?E.?[NX].?T", u"EMPLOYMENT"), (u"([Ee])[^e]?[mn].?p.?[li'].?[oc].?[yjvi].?[mn].?[eoc].?[nu].?[tf]", u"\\1mployment"), (u"\\b([Pp]).?r[^C]?o.?p.?o[^s]?[ri].?t.?i.?o.?n\\b", u"\\1roportion"), (u"P.?[REK].?O.?T.?[EB].?C.?T.?I.?O.?[NX]", u"PROTECTION"), (u"\\b([Pp]).?[rit].?[oa].?t.?[eolG].?[cdeij].?[tl].?i.?[ob].?[nfH]\\b", u"\\1rotection"), (u"P.?[RBEK].?O.?V.?I.?S.?[IT].?O.?[NX][^']?S", u"PROVISIONS"), (u"\\b([Pp])[^e^p]?[rip][^m^p]?[oa][^c^p]?[vyTxtrV].?[ilFL][^d^n^z]?[sgaer].?[il'].?[oa][^a^n]?[nuar][^n]?s\\b", u"\\1rovisions"), (u"\\b([Qq]).?u.?[ae].?n.?t.?i.?t.?[il].?e.?s\\b", u"\\1uantities"), (u"\\b[RBE].?E.?S.?O.?L.?U.?T.?I.?O.?[NX]", u"RESOLUTION"), (u"\\b([Rr]).?[eca].?s.?[ou].?l.?(u|y|ii).?t.?[ilj].?o.?(n|u|li|ri)", u"\\1esolution"), (u"PE[BE]MITTING", u"PERMITTING"), (u"\\b([Pp]).?e.?r.?m.?i.?t.?t.?i.?n.?g\\b", u"\\1ermitting"), (u"P U R C H A S I N G", u"PURCHASING"), (u"([Pp]) u r c h a s i n g", u"\\1urchasing"), (u"S.?U.?B.?C.?H.?A.?P.?T.?[EB].?[RBEK]", u"SUBCHAPTER"), (u"([Ss]).?[uad].?[bD].?c.?[hn].?a.?p.?t.?[ec].?[rct]", u"\\1ubchapter"), (u"t h r o u g h o u t", u"throughout"), ## 9 (u"A.?U.?T.?H.?O.?[RBEK].?I.?T.?Y", u"AUTHORITY"), (u"\\b([Aa]).?[unJ].?[t ][^M]?[hnH].?[obpq].?r.?[ifl][^z]?[tlfi].?[yv/^]\\b", u"\\1uthority"), (u"A.?M.?E.?[NX].?D.?M.?E.?[NX].?T", u"AMENDMENT"), (u"([Aa])[^m]?m[^m]?[eoc][^u]?[nuh][^n]?[daoTt].?m.?[esc][^h]?[nuhHa].?t", u"\\1mendment"), (u"C.?O.?M.?M.?I.?T.?T.?E.?[EB]", u"COMMITTEE"), (u"([Cc]).?o.?[mn].?[mn].?[i'].?[t'].?t.?[ecio].?e\\b", u"\\1ommittee"), (u"\\b([DR]).?E.?D.?U.?C[^A]?T.?I.?O.?[NX]", u"\\1EDUCTION"), (u"\\b([DdRr])[^e]?e[^e]?d.?u.?c[^a]?t.?[il'].?o.?[nu]", u"\\1eduction"), (u"D E F E N D A N T", u"DEFENDANT"), (u"\\b([Dd]).?e.?[fri].?e.?[nu].?d.?a.?[nu].?t", u"\\1efendant"), (u"D.?E.?T.?E.?[RB].?M.?I.?N.?E", u"DETERMINE"), (u"([Dd]).?e.?t.?[ce].?r.?[mn].?i.?n.?e", u"\\1etermine"), (u"\\b([Ee]).?[damMuo].?[una].?[co].?[au].?[tsl(][^o]?[ir].?[oci9].?[nuiajUt»^]\\b", u"\\1ducation"), (u"E.?F.?F.?E.?C.?T.?I.?V.?E", u"EFFECTIVE"), (u"([Ee])[^a^c^f^i^l^n^s^t]?[fitl][^a^i^l^t]?[fTitFrjEC][^t^x]?[ecs].?[co].?[tcf].?[ilY][^a^e^o]?[vxAyt].?e", u"\\1ffective"), (u"E.?X.?E.?C.?U.?T.?I.?V.?E", u"EXECUTIVE"), (u"([Ee]).?x.?[es].?[co].?[un].?t.?[il].?v.?[ec]", u"\\1xecutive"), (u"i m p o r t e r s", u"importers"), (u"I.?[NX].?S.?U.?[RBEK].?A.?[NX].?C.?E", u"INSURANCE"), (u"([Ii])[^g]?[nu][^']?[sgt][^a^e^o]?[umv][^a]?[ri][^d^e]?[as].?[numap].?c.?[ect]", u"\\1nsurance"), (u"n o r t h e r l y", u"northerly"), (u"O.?P.?E.?[RK].?A.?T.?I[^O]?[NX].?G", u"OPERATING"), (u"([Oo])[^o]?p.?[ce].?[ri].?a.?[tf].?i[^o]?n.?g", u"\\1perating"), (u"P.?A.?[RE].?A.?G.?R.?A.?P.?H", u"PARAGRAPH"), (u"([Pp]).?a.?[ri].?a.?[-efgio^].?[ri].?a.?[pgo].?(h|n|[jl]i)", u"\\1aragraph"), (u"P E R F O R M E D", u"PERFORMED"), (u"p.?[ea].?[rf].?[fr].?o.?r.?[ma][^t]?e[^n]?d\\b", u"performed"), (u"P E R M I T T E D", u"PERMITTED"), (u"\\b([Pp]).?e.?r.?m.?i.?t.?t.?e.?d\\b", u"\\1ermitted"), (u"R E I N S U R E R", u"REINSURER"), (u"r e i n s u r e r", u"reinsurer"), (u"S.?E.?[Ce].?[RBEK].?E.?T.?A.?[RBEK].?Y", u"SECRETARY"), (u"\\b([Ss])[^l^p^t]?e.?[cop][^u]?[rilxfT].?[ecv].?[tfilU].?[aoen'][^r]?[rixutzjy].?[yijv/-})>35,^]\\b", u"\\1ecretary"), (u"([Ss]).?o.?u.?t.?h.?e.?r[^n]?l.?y", u"\\1outherly"), (u"S T A T E M E N T", u"STATEMENT"), (u"([Ss])[^s]?t.?[aeos].?t.?[ec].?[mn].?[ec].?[nu].?t\\b", u"\\1tatement"), (u"\\b([Ss]).?[ui].?p.?p.?l.?[yv].?i.?n.?[go]\\b", u"\\1upplying"), (u"T E M P O R A R Y", u"TEMPORARY"), (u"([Tt]).?e.?[mna].?p.?o.?[ri].?a.?[ri][^l]?[yv]", u"\\1emporary"), (u"([TW]) H E [RBEK] E F O [RBEK] E", u"\\1HEREFORE"), ## 8 (u"\\b([Aa]).?m.?e.?n.?d.?i.?[nu].?g\\b", u"\\1mending"), (u"([Aa]).?r.?t.?i.?c.?l.?e[^']?s\\b", u"\\1rticles"), (u"([Cc]).?[ob].?m[^a]?[mn].?[e4].?r.?[coe].?[ec]\\b", u"\\1ommerce"), (u"\\b([Dd]).?[ilL].?[sh^].?[tfI].?[rit][^l]?[ilf][^l]?[cbter][^u]?[-trc]\\b", u"\\1istrict"), (u"D.?R.?A.?W.?B.?A.?C.?K", u"DRAWBACK"), (u"\\be.?a.?s.?t.?e.?r[^n]?l.?y", u"easterly"), (u"\\b([Ii]).?m.?p.?o.?r.?t.?e.?r\\b", u"\\1mporter"), (u"([Ii]).?n.?t.?[ec].?r.?[i.][^r]?[orQ)][^r]?[ri]\\b", u"\\1nterior"), (u"\\b([Nn]).?[an ].?[tl].?[if].?[op].?[nuaHort].?a[^n]?[lidfrLJT]\\b", u"\\1ational"), (u"\\b([Nn]).?o.?r.?t.?[hn].?e.?r.?n\\b", u"\\1orthern"), (u"\\bO.?F.?F.?[Ii].?C.?E.?R[^']?S\\b", u"OFFICERS"), (u"\\b([Pp]).?[a^].?[yvr][^e^t]?m.?e.?[numh].?t.?s\\b", u"\\1ayments"), (u"\\b([Pp]).?r[^R]?e.?s.?e.?r.?v.?e\\b", u"\\1reserve"), (u"\\b([Pp]).?[ri].?e.?v.?i.?o.?u.?[s^]\\b", u"\\1revious"), (u"\\b([Pp]).?u.?r.?[ce].?h.?a.?s.?[ce]\\b", u"\\1urchase"), (u"([Pp]).?[nu].?r.?p.?o.?s.?[ce].?s\\b", u"\\1urposes"), (u"\\b([Pp])[^e]?[un].?[ri].?[seag].?[unai].?[ast^].?[num].?[td]\\b", u"\\1ursuant"), (u"\\b([Qq]).?[uji].?a.?n.?t.?[-i].?t.?[yvj]\\b", u"\\1uantity"), (u"\\b([Ss])[^m]?a.?l.?a.?[ri].?i.?[ec].?s\\b", u"\\1alaries"), (u"\\b([Ss])[esofc][rif7][vxVy./\][ilfdj][cep][eco]s\\b", u"\\1ervices"), (u"([Ss]) e r v i c e s", u"\\1ervices"), (u"([Ss]).?o.?u.?t.?h.?e.?r.?n\\b", u"\\1outhern"), (u"([Ss]).?t.?and.?a.?[ri].?d", u"\\1tandard"), (u"\\b([Ss]).?t.?a.?t.?[una][^e]?t.?[eco].?[st]\\b", u"\\1tatutes"), (u"([Ss]).?u.?p.?e.?r[^v]?i.?o.?r", u"\\1uperior"), (u"\\b([Ss]).?u.?p.?p.?l.?i.?e.?(?=[rd])", u"\\1upplie"), (u"([Tt]).?[rVi].?[ec].?a.?s.?u.?[ritT].?[yv]\\b", u"\\1reasury"), (u"([Vv]).?e.?t.?e.?[ri].?a.?[nu][^']?s\\b", u"\\1eterans"), (u"w.?e.?s.?t.?e.?r[^n]?l.?y", u"westerly"), ## 7 (u"\\b([Aa]).?[fi£].?[fTiEGlC].?[aoe].?[il].?r[^e]?s\\b", u"\\1ffairs"), (u"([Aa]).?[mn].?[ce].?[nu].?d.?[ce].?d\\b", u"\\1mended"), (u"\\bA.?[REBK].?[TI].?I.?C.?[LUX].?[EJK]\\b", u"ARTICLE"), (u"([Cc]).?[hn].?a.?p.?t.?[ce].?r", u"\\1hapter"), (u"([Cc])[^c]?o.?[mn].?p.?a.?[nu].?y", u"\\1ompany"), (u"D A M A G E S", u"DAMAGES"), (u"\\b([Dd]).?[el?][^v]?[fir].?[e&].?[nup].?s.?[ecBQ]\\b", u"\\1efense"), (u"\\b([Ee]).?a.?s.?t.?e.?r.?n", u"\\1astern"), (u"\\be.?n.?t.?e.?r.?e.?d\\b", u"entered"), (u"F.?O.?[EKR].?E.?I.?G.?[NX]", u"FOREIGN"), (u"([Ff]).?u.?r.?n.?i.?s.?h", u"\\1urnish"), (u"G.?E.?N.?E.?R.?A[^L]?L", u"GENERAL"), (u"\\b([Hh]).?o.?u.?[so].?[il].?[nuh^].?[gc]\\b", u"\\1ousing"), (u"\\bI n d i a n ([as])\\b", u"Indian\\1"), (u"\\b([Ii])[^t]?s.?l.?and[^']?s\\b", u"\\1slands"), (u"([Jj]).?u.?s.?t.?i.?c.?[eco]\\b", u"\\1ustice"), (u"M.?A.?X.?I.?M.?U.?M", u"MAXIMUM"), (u"([Mm]).?a.?x.?i.?[mn].?[nu].?[mn]", u"\\1aximum"), (u"M.?I.?N.?I.?M.?U.?M", u"MINIMUM"), (u"\\b([Mm]).?i.?n.?i.?[mn].?[nu].?[mn]", u"\\1inimum"), (u"\\b([Mm]).?o.?[nm].?t[^i]?[hnb].?l.?y\\b", u"\\1onthly"), (u"([Nn]) o t h i n g", u"\\1othing"), (u"([Pp]) a r t i a l", u"\\1artial"), (u"\\b([Pp]).?a.?[y35vs}^][^e]?m.?e.?[nmu].?[t1]\\b", u"\\1ayment"), (u"\\b([Pp]).?e.?r.?m.?i.?t.?s\\b", u"\\1ermits"), (u"P.?R.?I.?V.?A.?T.?E", u"PRIVATE"), (u"\\b([Pp]).?[rti].?o[^c]?[geK].?[riT].?[a&].?[mnoe)^]\\b", u"\\1rogram"), (u"([Rr])[^r]?e.?g.?[nu].?[li].?a.?r", u"\\1egular"), (u"\\b([Ss]).?t.?a.?t.?u[^e]?t.?e\\b", u"\\1tatute"), (u"\\b([Ss]).?u.?[bDQ].?[po].?a.?[ri].?t\\b", u"\\1ubpart"), (u"([Tt]).?e.?x.?t.?i.?l.?e\\b", u"\\1extile"), (u"the r e i n", u"therein"), (u"the r e t o", u"thereto"), (u"([Tt]).?h[^o]?r.?o.?u.?g.?h\\b", u"\\1hrough"), (u"\\bW.?e.?s.?t.?e.?r.?n\\b", u"Western"), (u"\\bW.?H.?E.?[EKR].?E.?A.?S\\b", u"WHEREAS"), (u"\\bW.?H.?E.?[EKR].?E.?O.?F\\b", u"WHEREOF"), (u"\\b([Ww]).?r.?i.?t[^h]?i.?[nhQ].?[gao]\\b", u"\\1riting"), ## 6 (u"ACT.?I.?O.?[NX]", u"ACTION"), (u"\\ba.?m.?e.?n.?d.?s\\b", u"amends"), (u"\\b([Aa])[^d^s]?[gse^][^e^h^n^s^t]?[eac][^c^i]?[nur].?[cpqge(][^x]?[yjv35]\\b", u"\\1gency"), (u"\\b([Aa]).?[nu][^o]?n.?u.?a.?l\\b", u"\\1nnual"), (u"\\b([Cc])[^h]?e.?[nu].?[tl][^r]?e.?r\\b", u"\\1enter"), (u"\\b([Dd]).?[es].?[po].?u.?t.?[yv]\\b", u"\\1eputy"), (u"\\b([Dd]).?[un][^j]?[ri].?[il].?[nug].?[gj]\\b", u"\\1uring"), (u"([Ee]).?[nu].?e.?[ri].?g.?[yvj3^]\\b", u"\\1nergy"), (u"([Ff]) o r m e r", u"\\1ormer"), (u"([Hh]).?[ce].?[a«].?[ld].?[tf].?[hnk|]\\b", u"\\1ealth"), (u"I.?n.?d.?[iU].?a.?[numh]\\b", u"Indian"), (u"([Ii]) n t e n t", u"\\1ntent"), (u"\\bI[^t]?s.?l.?and\\b", u"Island"), (u"\\bm.?o.?[nm].?t[^i]?[hnb][^']?s\\b", u"months"), (u"\\b([Nn]).?[opQ].?t.?[iV].?[coe][^r]?[epoc]\\b", u"\\1otice"), (u"\\bO.?[FPE].?[FTPE7][^E^R]?[Ii][^J]?[CO].?[EB]\\b", u"OFFICE"), (u"\\b([Pp])[^r]?[eaoc].?[ritljf^].?[itlj].?[op].?d\\b", u"\\1eriod"), (u"\\b([Pp]).?e.?r.?m.?i.?t\\b", u"\\1ermit"), (u"([Pp]).?o[^f^F]?u.?n.?d.?s\\b", u"\\1ounds"), (u"\\b([Rr]).?[ecaf][^a^k^s^x]?p[^e]?[oc°^].?[ri'][^a^n]?[tl1]\\b", u"\\1eport"), (u"\\b([Ss]).?a[^n]?[li][^v]?a.?[ri].?[yvj]\\b", u"\\1alary"), (u"\\b([Tt])[^h^r]?a[^c^l^s]?([kx]).?i.?n.?g\\b", u"\\1a\\2ing"), (u"([Tt]) h a n k s", u"\\1hanks"), (u"\\b([Ww]).?ithin\\b", u"\\1ithin"), ## 5 (u"([Aa]) m e n d\\b", u"\\1mend"), (u"\\b([Aa]).?m.?[oQ].?n.?[gj]\\b", u"\\1mong"), (u"\\b([Dd])[^i^r^u]?a[^m^r]?t.?e[^f]?([ds])\\b", u"\\1ate\\2"), (u"\\b([Dd]).?e[^p]?a.?t.?[hnk]\\b", u"\\1eath"), (u"E a r t h", u"Earth"), (u"\\b([Ee])[^i^r]?n.?t[^h]?[ec].?r\\b", u"\\1nter"), (u"\\b([Ee]).?n.?[tf][^o]?[ri][^t]?[yv]\\b", u"\\1ntry"), (u"([Ff])[^f]?u.?n.?d[^']?s\\b", u"\\1unds"), (u"\\b([Hh])[^a^e]?u[^l^-]?[mna].?a.?[nm]\\b", u"\\1uman"), (u"\\b([Ii])[^n]?t[^d^i]?[ce][^o]?[mn][^(^'^d^i]?s\\b", u"\\1tems"), (u"([Jj]).?o.?i.?[nu].?t\\b", u"\\1oint"), (u"\\b([Ll])[^l]?[a^][^g^r]?[boD].?[o6].?[rT]\\b", u"\\1abor"), (u"\\bl and s\\b", u"lands"), (u"\\b([Mm]).?e[^g]?a[^r]?[mn].?s\\b", u"\\1eans"), (u"\\b([Mm]).?o.?[nm].?t[^i]?[hnb]\\b", u"\\1onth"), (u"\\b([Nn])[^f^w]?o.?r.?t[^o]?[hn]\\b", u"\\1orth"), (u"\\bO.?T.?H.?E.?[EKR]\\b", u"OTHER"), (u"([Pp]) a r t y", u"\\1arty"), (u"([Qq]).?[uj].?[opO].?[tf].?a\\b", u"\\1uota"), (u"R e a d y", u"Ready"), (u"\\b[EKR].?U.?L.?E.?S", u"RULES"), (u"\\bT.?[IiFJ].?[TLiU^].?[LUEX].?[EPBFK]\\b", u"TITLE"), (u"\\b([Tt])[^e]?[rVinYTt^][^e]?[amd][^e]?d[^l]?e\\b", u"\\1rade"), (u"\\b([Uu])[^q^s]?[nuimrhdaHD^][^c^f^T]?[daouj^][^T]?[ecp6si^].?r\\b", u"\\1nder"), (u"([Uu]) n d u e", u"\\1ndue"), (u"\\b([Uu]).?[rif][^s]?[bh].?[an][^i]?[nu]\\b", u"\\1rban"), (u"\\b([Ww]).?[hnmfbK%][^l]?[ila'^][^l]?[cdpoi;].?h\\b", u"\\1hich"), ## 4 (u"\\b([Dd])[^i^r^u]?a[^m^n^r^t]?t.?e\\b", u"\\1ate"), (u"([Dd]) e b t", u"\\1ebt"), (u"F o r t\\b", u"Fort"), (u"\\b([Ff])[^o]?r.?o.?m\\b", u"\\1rom"), (u"\\b([Ii])[^n]?t[^i]?[ce][^a^o]?m\\b", u"\\1tem"), (u"\\b([Ll])[^i^o]?[ae^].?w[^e^i^n^'^(]?s\\b", u"\\1aws"), (u"\\b([Ll])[^i^o]?e[^a^d^i^n]?s.?s\\b", u"\\1ess"), (u"\\b([Mm])[^o]?o[^h^n^o^u]?r[^i^s]?e\\b", u"\\1ore"), (u"([Nn]) o t e\\b", u"\\1ote"), (u"\\b([Pp]).?a.?r.?t\\b", u"\\1art"), (u"\\br.?e[^g^l^m^p^w^U]?a[^n^r]?[d4]\\b", u"read"), (u"\\b[EKR].?U.?L.?E\\b", u"RULE"), (u"([Tt]) e r m", u"\\1erm"), (u"\\b([Tt]).?e.?x.?t\\b", u"\\1ext"), (u"\\bt.?h.?a.?n\\b", u"than"), (u"\\b([Tt]).?h[^e^w]?a.?t\\b", u"\\1hat"), (u"([Uu]) p o n", u"\\1pon"), (u"\\bv.?a.?l.?\.", u"val."), (u"\\b([Yy])[^c^/]?[ea6sg^][^d^g^h^m^s^u^v]?[ae&n^].?[r]\\b", u"\\1ear"), ## 3 (u"\\b([Aa]) [nu] y\\b", u"\\1ny"), (u"\\b([Aa])( n|n )y\\b", u"\\1ny"), (u"\\b([Ff]) o r\\b", u"\\1or"), (u"\\b([Ff])( o|o )r\\b", u"\\1or"), (u"\\bF O R\\b", u"FOR"), (u"\\bh a s\\b", u"has"), (u"\\b([Ll]) a w\\b", u"\\1aw"), (u"\\bL aw\\b", u"Law"), (u"\\bm a [vy]\\b", u"may"), (u"\\bm( a|a )y\\b", u"may"), (u"\\b([Nn]) o t\\b", u"\\1ot"), (u"\\b([Nn])( o|o )t\\b", u"\\1ot"), (u"\\bN O W\\b", u"NOW"), (u"\\b([Pp]) a j", u"\\1ay"), (u"\\bT H E\\b", u"THE"), (u"T( h|h )e\\b", u"The"), (u"\\bT( H|H )E\\b", u"THE"), (u"T lie", u"The"), ## 2 (u"\\bB Y\\b", u"BY"), (u"\\b([Bb]) [yv]\\b", u"\\1y"), (u"I N(?= GENERAL)", u"IN"), (u"(DEPARTMENT|STATE) O F", u"\\1 OF"), (u"\\b([Tt]) o\\b", u"\\1o"), (u"\\bT O\\b", u"TO"), ## symbols only (u"([A-Za-z0-9]\\)) \\((?=[A-Za-z0-9]\\))", u"\\1("), (u"([^-]) (?=[.,;:?)])", u"\\1"), (u'^" (?=[A-Z(])', u'"'), # # proper names: (u"FRANKUN", u"FRANKLIN"), (u"\\bT.?R.?U.?M.?A.?N", u"TRUMAN"), (u"\\bH.?A.?R[^N^V]?[EKR].?Y", u"HARRY"), (u"\\bD.?W.?I.?G.?H.?T", u"DWIGHT"), (u"E.?I.?S.?E.?N.?H.?O.?W.?E.?R", u"EISENHOWER"), (u"J.?O.?H.?N.?S[^T]?O.?N", u"JOHNSON"), (u"L Y N D O N", u"LYNDON"), (u"J.?O.?H.?N", u"JOHN"), (u"F I T Z G E R A L D", u"FITZGERALD"), (u"K.?E.?N.?N.?E.?D.?Y", u"KENNEDY"), (u"N.?I.?X.?O.?N", u"NIXON"), (u"[EKR].?I.?C.?H.?A.?[EKR].?D", u"RICHARD"), (u"G.?E.?R.?A.?L.?D", u"GERALD"), (u"\. F O R D ,", u". FORD,"), (u"J I M M Y", u"JIMMY"), (u"C A R T E R", u"CARTER"), (u"R O N A L D", u"RONALD"), (u"R E A G A N", u"REAGAN"), (u"GEORGE B U S H", u"GEORGE BUSH"), # # months: (u"J.?a.?n.?u.?a.?r.?y", u"January"), (u"J[^e^u]?a.?n.?\.", u"Jan."), (u"F.?e.?b.?r.?u.?a.?r.?y", u"February"), (u"F.?e.?b.?\.", u"Feb."), (u"M.?a.?r.?[^a^e^i^o^u]?c.?h", u"March"), (u"M[^c^e^i^u]?a[^d^i^s^u^v^y]?r.?\.", u"Mar."), (u"A.?p[^a]?r.?i.?l", u"April"), (u"A p r \.", u"Apr."), (u"M a y\\b", u"May"), (u"\\bMa[Vv]\\b", u"May"), (u"\\bJ[^e^i^o]?u[^a]?n[^d^g^k]?e\\b", u"June"), (u"J u n \.", u"Jun."), (u"J.?u.?[lI].?y", u"July"), (u"A.?u.?g.?u.?s.?t", u"August"), (u"Augu[^s]t", u"August"), (u"Au[^g]ust", u"August"), (u"A.?u.?g[^u]?\.", u"Aug."), (u"S.?[ce].?p.?t.?[ce].?[mn].?b.?[ce].?r", u"September"), (u"S e p t \.", u"Sept."), (u"O.?[ce].?t.?o.?[bh].?[ce].?r", u"October"), (u"O c t \.", u"Oct."), (u"[O0]c[']?t[-']?\.", u"Oct."), (u"N.?o.?v.?[ce].?[mn].?b.?[ce].?r", u"November"), (u"N.?o.?v.?\.", u"Nov."), (u"D.?[ce].?[ce].?[ce].?[mn].?[bh].?[ce].?r", u"December"), (u"D e c \.", u"Dec."), (u"D[ft]?e[ce]\.", u"Dec."), # # military: (u"F.?[ao].?r.?c.?[ce].?s\\b", u"Forces"), (u"F o r c e", u"Force"), (u"A.?r[^a]?[mn].?y", u"Army"), (u"N.?a.?v.?y\\b", u"Navy"), (u"N.?a.?v.?a.?l", u"Naval"), (u"M a r i n e s", u"Marines"), (u"M a r i n e", u"Marine"), (u"C.?o.?r.?p[^u]?s", u"Corps"), # # increase spacing: (u"([0-9],)(?=[12][0-9]{3})", u"\\1 "), (u"([123]) ([0-9])(?=,[0-9]{3})", u"\\1\\2"), (u"([Ss])ection(?=[0-9])", u"\\1ection "), (u"([0-9])(USC|usc)", u"\\1 USC"), (u"(USC|usc)(?=[0-9])", u"USC "), (u"ofA(?=[a-z])", u"of A"), (u"\\bterm'", u"term '"), (u"ofthe(?=[A-Z])", u"of the "), (u"PUBLICLAW", u"PUBLIC LAW"), # # citation: (u"[Uu][ ]?\.[ ]?[Ss][ ]?\.[ ]?[Cc][ ]?[.,]", u"U.S.C."), (u"S.?t[^r]?a[^r]?t[^e]?\.", u"Stat."), (u"([0-9])(?=Stat)", u"\\1 "), (u"Stat\.(?=[0-9])", u"Stat. "), (u"([0-9])U.(S| S)", u"\\1 U.S"), # # legislative terms: (u"assemblea", u"assembled"), (u"assembled\^", u"assembled,"), (u"assern[boh]", u"assemb"), (u"f[un]rth[ce]r\^", u"further,"), (u"([Pp])rovided\^", u"\\1rovided,"), (u"U.?[nu][^l]?i.?t.?[ce].?d", u"United"), (u"[A-Z][A-Z](m|ni)t[ce]d", u"United"), (u"S.?[ft].?a.?[ft][^i^u]?[ce][^']?s\\b", u"States"), (u"S[^e^i]?[ft][^r]?a.?[ft][^i^u^.]?[ce]\\b", u"State"), (u"A.?[mn].?[ce].?r.?i.?[ce].?a", u"America"), (u"eiiact", u"enact"), (u"it ena[^c]t", u"it enact"), (u"\\bit en.{1,3}ted\\b", u"it enacted"), (u"\\bA c t\\b", u"Act"), # # Roman numerals (u"I ([IVX])\\b", u"I\\1"), (u"\\bV I", u"VI"), # end of September 2011 run ################################################################################################### # August 2011: begin to fix errors identified by running concordance of text through spell checker. # estimated number of changes to dataset resulting from this run: 260,000 # ## excess whitespace: (u"\\( ([a-z0-9A-Z]) \\)", u"(\\1)"), (u"\\b([Tt]) h e\\b", u"\\1he"), (u"\\b([Aa]) n d\\b", u"\\1nd"), (u"([Ff])re e", u"\\1ree"), (u"([Ff])r ee", u"\\1ree"), (u"\\b([Ff]) ree", u"\\1ree"), (u"\\bA C T\\b", u"ACT"), (u"\\b1 9 ([0-9]) ([0-9])\\b", u"19\\1\\2"), (u"\\bt a x\\b", u"tax"), (u"\\bS E C \.", u"SEC."), (u"([Nn]) a tion", u"\\1ation"), ### whitespace fixes that will also catch transposition errors or simple misspellings: (u"A.?p.?p.?r.?o.?p.?r.?i.?a.?t.?i.?o.?n\\b", u"Appropriation"), (u"\\b([Aa]).?t.?t.?e.?n.?d.?a.?n.?[ce].?e\\b", u"\\1ttendance"), (u"\\b([Aa]).?s.?s.?i.?s.?t.?a.?n.?[ce].?e\\b", u"\\1ssistance"), (u"([Cc]).?o.?n.?s.?t.?r.?u.?[ce].?t.?i.?o.?n\\b", u"\\1onstruction"), (u"\\bC.?O.?N.?C.?U.?R.?R.?E.?N.?T\\b", u"CONCURRENT"), (u"\b([DdRr]).?e.?d.?u.?[ce].?t.?i.?o.?n\b", u"\\1eduction"), (u"\\bD.?e.?p.?a.?r.?t.?m.?e.?n.?t\\b", u"Department"), (u"\\bD.?E.?P.?A.?R.?T.?M.?E.?N.?T\\b", u"DEPARTMENT"), (u"d.?e.?s.?i.?g.?n.?a.?t.?e.?d\\b", u"designated"), (u"\\b([Ee]).?m.?p.?l.?o.?y.?m.?[ce].?n.?t\\b", u"\\1mployment"), (u"\\bf.?o.?l.?l.?o.?[vw].?i.?n.?g\\b", u"following"), (u"\\b([Ff]).?o.?r.?e.?i.?g.?n\\b", u"\\1oreign"), (u"\\b([Gg]).?o.?v.?[ce].?r.?n.?m.?[ce].?n.?t\\b", u"\\1overnment"), (u"\\b([Hh]).?e.?a.?d.?i.?n.?g\\b", u"\\1eading"), (u"\\b([Ii]).?n.?s.?e.?r.?t.?i.?n.?g\\b", u"\\1nserting"), (u"\\bJ.?O.?I.?N.?T\\b", u"JOINT"), (u"\\b([Mm]).?a[^r^s]?k.?i.?n.?g\\b", u"\\1aking"), (u"([Pp]).?a.?r.?a.?g.?r.?a.?p.?h\\b", u"\\1aragraph"), (u"\\bp.?r.?e.?[ce].?e.?d.?i.?n.?g\\b", u"preceding"), (u"\b([Pp]).?u.?b.?l.?i.?[ce].?a.?t.?i.?o.?n\b", u"\\1ublication"), (u"\\bR.?E.?S.?O.?L.?U.?T.?I.?O.?N\\b", u"RESOLUTION"), (u"\\b([Oo])[^c]?t.?h.?e.?r\\b", u"\\1ther"), (u"\\bs.?t.?r.?i.?k.?i.?n.?g\\b", u"striking"), (u"([Tt]).?r.?a.?n.?s.?p.?o.?r.?t.?a.?t.?i.?o.?n\\b", u"\\1ransportation"), ## character substitution errors in OCR transcription: (u"\\bh[vy]\\b", u"by"), (u"\\bbv\\b", u"by"), (u"FrM", u"Free"), (u"\\bFraa\\b", u"Free"), (u"\\bimder\\b", u"under"), (u"\\b([Tt])[ilr]ie\\b", u"\\1he"), (u"\\bt[bn]e\\b", u"the"), (u"\\bPub.c\\b", u"Public"), (u"([^K][aeioudg])i-(?=[a-z])", u"\\1r"), (u"([^K][aeiou])i'(?=[a-hj-z])", u"\\1r"), (u"\\bOt[>)]", u"(b"), (u"assemhl", u"assembl"), (u"nnent\\b", u"ment"), (u"emment\\b", u"ernment"), (u"\\bPUB[OU]C\\b", u"PUBLIC"), (u"\\]\\)(?=[a-z])", u"p"), (u"\\bfimd", u"fund"), (u"\\bGreneral\\b", u"General"), (u"\\bamoimt\\b", u"amount"), (u"\\bcanying\\b", u"carrying"), (u"\\bslia", u"sha"), (u"t[li]o(u|ii)\\b", u"tion"), (u"([Tt])[li]iere", u"\\1here"), (u"\\busc of\\b", u"use of"), (u"[)}]dng\\b", u"ying"), (u"([^e])nient\\b", u"\\1ment"), (u"([Ss])cct", u"\\1ect"), (u"\\([JT]ove", u"Gove"), (u"d\^.?ree", u"degree"), (u"e<.(?=[ot])", u"ec"), (u"\\bt[il]ic\\b", u"the"), (u'\u00A3uid', u'and'), ### numbering (u"\\(D ", u"(1) "), (u"\.SX", u".5%"), (u"\\bi([0-9] USC)", u"1\\1"), ## citations (u"U\.S \.C\.", u"U.S.C."), (u"U\.S \.C \.", u"U.S.C."), # end of August 2011 run # # July 2011: initial series of replacements (u"\\(([a-z0-9])X", u"(\\1)("), (u"([0-9]) us[ce]\\b", u"\\1 USC"), (u"([0-9]) u s [ce]\\b", u"\\1 USC"), (u"tiou\\b", u"tion"), (u"([Ss])cction", u"\\1ection"), ] } # TESSERACT FIXES. Used for early volumes of the U.S. Statutes at Large fixes['tesseract'] = { 'regex': True, 'msg': { '_default':u'Robot:correcting common OCR errors', }, 'replacements': [ # dehyphenation. This will correct errors like “hun” and “dred” # being separately identified as misspelled when they appear on # different lines separated by a hyphen. It will introduce new # errors where hyphens appear inside, or just before, a marginal # note in the text, but these should be substantially outnumbered # by the errors fixed from rejoining hyphenated words. (u'([a-zA-Z][a-xz])[-–•]\r\n([a-z])', u'\\1\\2'), # deleting surplusage (u" [`'] ", u" "), (u"\.\.\.\.", u""), (u" _([a-z])", u" \\1"), (u"\bgg\b", u""), (u"if'\b", u"if"), (u"if' ", u"if "), ## garbage strings at front and end of line (u'^(¤|°|;|:|\'|"|¥|\||,|\.)+', u''), (u'(¤|°|;|:|\'|"|¥|\||,|\.)+$', u''), # number ranges (u"ty(—|\u00B7)(?=[efnost])", u"ty-"), # common "scannos" (incorrect character recognition) (u'f`', u'f'), (u' _f', u' f'), (u' 0f', u' of'), (u" of[`‘’_'] ", u' of '), (u" ot[`‘’'] ", u' of '), (u" qf([a-zA-Z0-9])", u' of \\1'), (u' 1n ', u' in '), (u'{i', u'fi'), (u' VV', u' W'), (u'1\u00B7 ', u'r '), (u'1\u00B7(?=[a-z1])', u'r'), (u'\b[Il]\)(?=[^ ])', u'D'), (u'\./[Iil1]', u'A'), (u'\.0(?=n)', u'A'), (u"{'(?=[aeioru])", u"f"), (u"[it{]`", u'f'), (u"f'(?=[a-rt-z01])", u"f"), (u"\.[dH](?=[cen])", u"A"), (u"tl1\b", u"th"), # ligatures seem to be particularly difficult for Tesseract (u' oif', u' off'), (u'suihc', u'suffic'), (u'Ojiice', u'Office'), (u'Q[fj]ic', u'Offic'), (u'o[dHj]ice', u'office'), (u'o[fit]hce', u'office'), (u'O[dHhj]ice', u'Office'), (u'of[A-Z]ce', u'office'), (u'oiii[ceo]', u'offic'), (u'qyic', u'offic'), (u'otfic', u'offic'), (u' o ce', u' office'), (u' o ence', u' offence'), (u'afh', u'affi'), (u'[BH]rst', u'first'), (u'speciii', u'specifi'), (u'([eo])tli[ce]', u'\\1ffic'), (u'oilic[ce]', u'office'), (u'eiiec', u'effec'), (u'iift([ehy])', u'fift\\1'), (u'(f1|ii)v[ce]', u'five'), (u' tive ', u' five '), (u'aliirm', u'affirm'), (u'Hft', u'fift'), (u'Hv[ce]', u'five'), (u"eilect", u"effect"), (u"\bilft", u"fift"), (u'[lI1][iIl1]fty', u'fifty'), (u"ty[-–—]liv", u"ty-fiv"), (u"ti[ft]t", u"fift"), (u"oihoe", u"office"), (u"iil(?=(ed|ing))", u"fil"), (u"liv[ce](?= (dol|hun|thou))", u"five"), # "e" is frequently mis-OCRed as "c" (u'\bctc\.', ur'etc.'), (u'rcs', u'res'), (u'([Rr])cp', u'\\1ep'), (u'([lt])cd\b', u'\\1ed'), (u'mcnt', u'ment'), (u'([Pp])rcs', u'\\1res'), (u'tivc', u'tive'), (u"cxt", u"ext"), (u"livcs", u"tives"), (u"\b[ce]nt[ce]r", u"enter"), (u"\b([bBhHwW])c\b", u"\\1e"), (u"([dD])c(?=[np])", u"\\1e"), (u"cth\b", u"eth"), (u"Scp(?=[t.])", u"Sep"), (u"\b[ce][nu]..[nu][ce][ce]r", u"engineer"), # "i" is frequently mis-OCRed as the number "1" or lowercase "l" (u'[lI1]ng\b', u'ing'), (u'h[l1](?=[bms])', u'hi'), (u'w[l1]th', u'with'), (u"th1[sS]", u"this"), (u"sh1p", u"ship"), (u"([Cc])h1[ce]f", u"\\1hief"), (u"w1s[ce]", u"wise"), (u"h1gh", u"high"), (u'mach1ne', u'machine'), (u's 1p\b', u'ship'), (u'([st])[iIl1][oO0]n', u'\\1ion'), (u's[oO0][lI1]di?[ce]r(?=[ a-z])', u'soldier'), (u'cr[il1]on', u'ction'), (u'th[lI1]r(?=[dt])', u'thir'), # "n" is frequently mi-OCRed as "u" (u'\baud\b', u'and'), # "o" is frequently mis-OCRed as the number "0" (u'0ther', u'other'), (u't0wn', u'town'), (u'0h1[co]', u'Ohio'), (u'([Ff])0', u'\\1o'), (u'pr[O0]v', u'prov'), (u'\b[o0]n[ce]\b', u'one'), # "y" is frequently mis-OCRed as "v" (u'trv', u'try'), (u'monev', u'money'), (u' anv ', u' any '), (u'\bmav\b', u'may'), # "rn" and "in" are sometimes mis-OCRed as "m" (u'govem', u'govern'), (u'ordam', u'ordain'), (u'mcreas', u'increas'), # directions (u'(we|We|ea|Ea)stem', u'\\1stern'), (u'(nor|Nor|sou|Sou)them', u'\\1thern'), # other common substitutions of one character for another # that cause misspellings ##and (u'\bund\b', u'and'), (u'amd', u'and'), # but the preceding fix can be overzealous sometimes, so: (u'Canden', u'Camden'), ## for (u' ibr', u' for'), (u' (fb|to)r ', u' for '), (u'_/br', u'for'), ## further (u'[fj]i[erstxz]rt[lh][ce]r', u'further'), (u'furlher', u'further'), (u'fuiither', u'further'), (u'fwther', u'further'), (u'j(h|ia)rther', u'further'), (u'jirr.?.?.?er', u'further'), ## in (u' iu ', u' in '), (u' [t1]n ', u' in '), (u'_1n', u'in'), (u' 1n', u' in'), ## is (u' [il1][sS] ', u' is '), ## of (u' [cegprqsu]f ', u' of '), (u' [o0][filty]` ', u' of '), (u',[o0][filty]` ', u', of '), (u" q[`'] ", u' of '), (u"of_([a-zA-Z0-9])", u'of \\1'), (u" [ce]y[`'] ", u' of '), (u'1y"', u'of'), (u'\bot? th[a-z]\b', u'of the'), (u'\bo(ft)? e\b', u'of the'), (u'\bot all\b', u'of all'), (u' Q" ', u' of '), (u'\b[o0] tate', u'of State'), (u'\b0 the\b', u'of the'), (u'q/`', u'of'), (u'1y`', u'of'), (u'0 R e', u'of Re'), ## or (u' 0r', u' or'), (u',0r', u', or'), ## shall (u' s a l ', u' shall '), (u' sha l', u' shall'), (u'\bs all\b', u'shall'), ## the (u't[l/]ze', u'the'), (u'th[cg]', u'the'), (u' tl e', u' the'), (u' t[hl][l1]e', u' the'), (u'\btl[a-z1]e\b', u'the'), (u' of th ', u' of the '), (u'\btot e\b', u'to the'), (u' in th ', u' in the '), (u'{he', u'the'), ## with (u' mth ', u' with '), (u'w[nx]th', u'with'), ## (u' Jet ', u' Act '), (u' [eu]ct ', u' act '), (u' Au ', u' An '), (u'Jn ', u'An '), # (ur'" An', ur'"An'), (u' dn Act', u' An Act'), (u'afl[ce]r', u'after'), (u' unend', u' amend'), (u'arnend', u'amend'), (u' bo ', u' be '), (u'wuse', u'cause'), (u'cer [i1] ca e', u'certificate'), (u'c[il1]t[il1]z[ce]n', u'citizen'), (u'ddllar', u'dollar'), (u"do[l1][l1]ar", u"dollar"), (u'g1ve', u'give'), (u'[ti]iirn', u'furn'), (u' i[fily]` ', u' if '), (u'irnp', u'imp'), (u'im o(?=(rt|se))', u'impo'), (u'([a-z])mg ', u'\\1ing '), (u' mten([dt])', u' inten\\1'), (u' sai ', u' said '), (u'samc', u'same'), (u'And e it', u'And be it'), (u'Arrnov[mn]n', u'Approved'), (u'authoriae', u'authorize'), (u'authomze', u'authorize'), (u'D[il1]str[il1]ct', u'District'), (u'enawt', u'enact'), (u'ena[ce]t[ce][dj]', u'enacted'), # (u'intitulcd', u'intituled'), (u'prescri e', u'prescribe'), (u'([Pp])rovi e', u'\\1rovide'), (u'sect[i1][o0]n', u'section'), (u'therc', u'there'), (u' t is act', u' this act'), (u' t0 ', u' to '), (u' te the', u' to the'), #numbers (u'tw0', u'two'), (u'thxrd', u'third'), (u'\bt r[ce][ce]', u'three'), (u'thr[ce][ce]', u'three'), (u'f[co]u(r|1•)', u'four'), (u's1x', u'six'), (u" ix", u" six"), (u's[ce]v[ce][nu]', u'seven'), (u'[ce][i1]ght', u'eight'), (u'e[i1] ht', u'eight'), (u'n1n[ce]', u'nine'), (u'\bmne', u'nine'), (u'tcn', u'ten'), (u'[ce]i ht[ce][ce][nu]', u'eighteen'), (u'twquty', u'twenty'), (u"tw[a-z][a-z]ty", u"twenty"), (u'twent -', u'twenty-'), (u't[^ ][^ ]ir(?=[dt])', u'thir'), (u"\bsix y\b", u"sixty"), (u"\bseven ty", u"seventy"), (u'eigh ty', u'eighty'), (u'\beig.?.?ty', u'eighty'), (u'[^e^E]ig[^h]ty', u'eighty'), (u'nincty', u'ninety'), (u"\bnine y", u"ninety"), (u'hnmdred', u'hundred'), (u'\bun red?\b', u'hundred'), (u'h[un][un]d([^r]e|r[^e]|[^r][^e])d', u'hundred'), (u'himdred', u'hundred'), (u'\bhundre\b', u'hundred'), (u't ousan ', u'thousand '), (u't ousand', u'thousand'), (u' ousan ', u' thousand '), (u'\bthousan\b', u'thousand'), (u'1[Ss](?=[0-9][0-9])', u'18'), # (u'c0untry', u'country'), (u'[ce]na[ce]t[ce]d', u'enacted'), # legislature (u'Am[ce][a-z][a-z][co]a', u'America'), (u'Am[ce][a-z][co]a', u'America'), (u'Arr([a-z01>]+), (?=[A-Z])', ur'Approved, '), (u'(II|H)[oO0]us[ce]', u'House'), (u' Hm[a-z][ce] ', u' House '), (u'Hausa', u'House'), (u'I[a-z]us[ce]', u'House'), (u'and H[a-z][a-z][a-z][a-z]e ', u'and House '), (u'and I[a-zI][a-z][a-z][a-z][ce] ', u'and House '), (u'Senate and H[a-z][a-z][a-z][ce] ', u'Senate and House '), (u'Senate and Hom[ce] ', u'Senate and House '), (u' Slate ', u' State '), (u'U[a-z][a-z][a-z][ce]d', u'United'), (u'Umked', u'United'), (u' nite tates', u' United States'), (u"\bnit?e? S?tates?", u"United States"), (u'Sncr[a-z][a-z][a-z] ', u'Section '), (u'Sncr[a-z][a-z][a-z][a-z] ', u'Section '), (u'O[co]([a-z]+)gr([a-z]{0,4})\b', u'Congress'), (u'S[amn]c\.', ur'Sec.'), (ur'S[an]o\.', ur'Sec.'), (u'SE[CG]\.', ur'Sec.'), (u'S[amn]ss\.', ur'Sess.'), (u'SESS\.', ur'Sess.'), (u'C[nuH]\.', ur'Ch.'), (u'ta[^t]iv', u'tativ'), (u'Am[ce]rwa', u'America'), (u'(en|m)ac[^t]ed', u'enacted'), # fixes for erroneously inserted or deleted whitespace (u' t e', u' the'), (u'shallbe', u'shall be'), (u'[oq]fth[ce]', u'of the'), (u'bythe', u'by the'), (u'inthe', u'in the'), (u'tothe', u'to the'), (u'An[au]ct', u'An act'), (u'B e it', u'Be it'), (u'itenact', u'it enact'), (u'i[ft]further', u'it further'), (u'it-further', u'it further'), (u'thereforefurther', u'therefore further'), (u"ji([a-z]+)h[ce]r", u"further"), (u'[aoq]fRep', u'of Rep'), (u',in', u', in'), (u'Actfor', u'Act for'), (u'havethe', u'have the'), (u'atleast', u'at least'), (u'andfor', u'and for'), (u" ofR ", u" of R"), (u' [“"] (?=[A-Z])', ur' “'), (u'ofAmer', u'of Amer'), # months and years (u'Jan\.(?=[0-9])', ur'Jan. '), (u'F[co]b\.', ur'Feb.'), (u'Feb\.(?=[0-9])', ur'Feb. '), (u'Mar\.(?=[0-9])', ur'Mar. '), (u'April(?=[0-9])', ur'April '), (u'M[a-z]y(?=[0-9])', ur'May '), (u'J [nu](?=(ne|ly))', ur'Ju'), (u'J[un]n[ce](?=[0-9])', ur'June '), (u'J[un]ly(?=[0-9])', ur'July '), (u'A[nu]g\.(?=[0-9])', ur'Aug. '), (u'S[ce][p ]t\.(?=[0-9])', ur'Sept. '), (u'O[co]t\.(?=[0-9])', ur'Oct. '), (u'Nov\.(?=[0-9])', ur'Nov. '), (u'D[a-z][a-z]\.(?=[0-9])', ur'Dec. '), (u'([0-9],)(?=1[78][0-9][0-9])', u'\\1 '), # character omissions (u"accor mg", u"according"), (u"ap oin", u"appoin"), (u"ap int", u"appoint"), (u'ap rop', u'approp'), (u"ap? r[co] ri", u"appropri"), (u'ap rov', u'approv'), (u"assemb e", u"assemble"), (u'aut or', u'author'), (u"\bui ing", u"building"), (u"com le", u"comple"), (u"Com trol", u"Comptrol"), (u"\bde enden", u"dependen"), (u"\bdis ur", u"disbur"), (u"\bdis os", u"dispos"), (u'di trict', u'district'), (u'\bistrict', u'district'), (u'd[o0] ars', u'dollars'), (u' [o0] ars', u' dollars'), (u' dolars', u' dollars'), (u'do(l+) ar', u'dollar'), (u'emp oy', u'employ'), (u" em lo ", u" employ"), (u'entit[^l]e', u'entitle'), (u'exce t', u'except'), (u'gnmt', u'grant'), (u"\berein\b", u"herein"), (u"\bim ris", u"impris"), (u"\bim os", u"impos"), (u'\bim ro', u'impro'), (u"inc u (in|m)g", u"including"), (u"ju ge", u"judge"), (u'juri diction', u'jurisdiction'), (u' aws', u' laws'), (u"li ht", u"light"), (u"li uor", u"liquor"), (u"\b0 ve\b", u"of five"), (u' ot er', u' other'), (u"\bot [1inwz]er", u"other"), (u"\bp ace", u"place"), (u"\bro er", u"proper"), (u"resi en", u"residen"), (u"ri ht", u"right"), (u'Re ublic', u'Republic'), (u' ublic ', u' public '), (u"\bpu ic\b", u"public"), (u'p.ension', u'pension'), (u"pu is e", u"publishe"), (u"\bro rata", u"pro rata"), (u're pect', u'respect'), (u'regi t', u'regist'), (u"scri e", u"scribe"), (u"sen ence", u"sentence"), (u'\bshal\b', u'shall'), (u"\bsu (?=(ject|mit))", u"sub"), (u'subscrib r', u'subscriber'), (u'\bsuc ', u'such '), (u"\bsu ple", u"supple"), (u"\bsu[p ][p ]ort", u"support"), (u"\bsu reme", u"supreme"), (u"\btra e", u"trade"), (u'assembled, hat', u'assembled, That'), (u'assembled, T t', u'assembled, That'), (u' T at', u' That'), (u'u on\b', u'upon'), (u'ves el', u'vessel'), (u'\bw ic\b', u'which'), (u'I.?.?antry', u'Infantry'), (u'([pP])ubhc', u'\\1ublic'), # extra characters in otherwise correctly spelled words (u'C.?o.?n.?g.?r.?e.?s.?s\b', u'Congress'), (u'S.?e.?n.?a.?t.?e\b', u'Senate'), (u'H.?o.?u.?s.?e\b', u'House'), (u'A.?m.?e.?r.?i.?c.?a\b', u'America'), (u'U.?n.?i.?t.?e.?d\b', u'United'), (u'S.?e.?c.?r.?e.?t.?a.?r.?y\b', u'Secretary'), (u'R.?e.?p.?r.?e.?s.?e.?n.?t', u'Represent'), (u'\bd.?o.?l.?l.?a.?r\b', u'dollar'), (u'\bs.?h.?a.?l.?l\b', u'shall'), ] }