# morph-ga.txt: Morphology rules for Irish.
# Copyright (C) 2004-2007 Kevin P. Scannell <kscanne@gmail.com>
#
# This is free software; see the file COPYING for copying conditions.  There
# is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE.
#
#  with the exception of some of the non-standard verb endings,
#  each rule decreases number of caps or length => recursion will stop.
#
#  each substitution is applied globally (though this is only relevant
#  for the few rules that aren't anchored at ^ or $
#
#  There is no need to encode the regular inflectional morphology of 
#  Irish here because it is already handled by the C++ code that generates
#  the basic lexicon.   So if "imirceacha" is not in the lexicon (it is)
#  "imirceach" won't be either.   On the other hand it can be quite useful
#  to include *derivational* morphology; so if "gaimbneachas" is not
#  in the lexicon (it is) there is some hope that by stripping the 
#  "eachas" it will find the root "gaimbn".
^([ACEFH-LO-SU-Z])([A-Z'-]*[A-Z][A-Z'-]*)$	$1\L$2 -1 <.+>	<&>	# IRE; potentially eclipsing init chars require more care:
^B([A-GI-OQ-Z][A-Z'-]*)$	B\L$1		-1	 <.+>	<&>
^B'([AEIOUF][A-Z'-]*)$	B'\L$1		-1	 <.+>	<&>
^BH([A-EG-Z][A-Z'-]*)$	Bh\L$1		-1	 <.+>	<&>
^D([A-SU-Z][A-Z'-]*)$	D\L$1		-1	 <.+>	<&>
^D'([AEIOUF][A-Z'-]*)$	D'\L$1		-1	 <.+>	<&>
^G([ABD-Z][A-Z'-]*)$	G\L$1		-1	 <.+>	<&>
^M([AC-Z][A-Z'-]*)$	M\L$1		-1	 <.+>	<&>
^N([ABCEFH-Z-][A-Z'-]*)$	N\L$1		-1	 <.+>	<&>
^T([A-RT-Z-][A-Z'-]*)$	T\L$1		-1	 <.+>	<&>
^([ACEFH-LO-SU-Z])		\l$1		-1	<.+>	<&> 	# Promhach
^B([^pPh])		b$1		-1	<.+>	<&>
^Bh([^fF])		bh$1		-1	<.+>	<&>
^D([^tT])		d$1		-1	<.+>	<&>
^G([^cC])		g$1		-1	<.+>	<&>
^M([^bB])		m$1		-1	<.+>	<&>
^N([^dDgG])		n$1		-1	<.+>	<&>
^T([^sS])		t$1		-1	<.+>	<&>
(...)-([A-Z])	$1-\l$2		-1	<.+>	<&> 	# Promh-Aire
^b'([AEIOUF])	b'\l$1		-1	<.+>	<&>
^bP([A-Z'-]+)$	bp\L$1		-1	<.+>	<&>	# bPROMH-AIRE
^bP			bp		-1	<.+>	<&>	# bPromhach
^bhF([A-Z'-]+)$	bhf\L$1		-1	<.+>	<&>
^bhF			bhf		-1	<.+>	<&>
^d'([AEIOUF])	d'\l$1		-1	<.+>	<&>
^dT([A-Z'-]+)$	dt\L$1		-1	<.+>	<&>
^dT			dt		-1	<.+>	<&>
^gC([A-Z'-]+)$	gc\L$1		-1	<.+>	<&>
^gC			gc		-1	<.+>	<&>
^h([AEIOU])([A-Z'-]+)$	h$1\L$2	-1	<.+>	<&>	# hIREANN
^h([AEIOU])	h\l$1		-1	<.+>	<&>
^m'([AEIOUF])	m'\l$1		-1	<.+>	<&>
^mB([A-Z'-]+)$	mb\L$1		-1	<.+>	<&>
^mB			mb		-1	<.+>	<&>
^n([AEIOU])([A-Z'-]+)$	n$1\L$2	-1	<.+>	<&>
^n([AEIOU])	n-\l$1		-1	<.+>	<&>
^nD([A-Z'-]+)$	nd\L$1		-1	<.+>	<&>
^nD			nd		-1	<.+>	<&>
^nG([A-Z'-]+)$	ng\L$1		-1	<.+>	<&>
^nG			ng		-1	<.+>	<&>
^tS([A-Z'-]+)$	ts\L$1		-1	<.+>	<&>
^tS			ts		-1	<.+>	<&>
^t([AEIOU])([A-Z'-]+)$	t$1\L$2	-1	<N.*>	<&>	# tUASAL
^t([AEIOU])	t-\l$1		-1	<N.*>	<&>
# these aren't in aspell db by default; better to do early (mb'amhlaidh)
^mb'			b'		-1	<.+>	<&>
^d'f([aeiou])	d'fh$1		2	<.+>	<&>
#  do these early - avoids mistakes with "aighthe" (==aghaidheanna)
#  and note that first rule must precede second to get "-aithe" pp's correct
# IMPORTANT - used for -ughadh endings e.g., -> -aghadh which goes to  below 
u(i?[dg]h)			a$1		1	<.+>	<&>  # see next few
i[dg]h(th?.)		i$1		1	<.+>	<&>  # marcaidhthe, masluighthe, beannuighth?ear
#############################################################################
################## CAIGHDEN OIFIGIIL SPELLING REFORMS #####################
#############################################################################
#   modernize prefixes (no need to trap mutated versions, stripped below
#   full list from OD77 is in gaeilge/diolaim/x/OD77alt-prefixes
^h?aith			ath		1	<.+>	<&>
^h?aird([^e])	ard$1		1	<.+>	<&>
# handled by more general "nn" rule below
#^h?anns			ans		1	<.+>	<&>
^dh'			d'		1	<.+>	<&>
^h-			h		1	<.+>	<&>    # CO p.126
^n-([AEIOU])	n$1		1	<.+>	<&>    # CO p.125
^t-([AEIOU])	t$1		1	<.+>	<&>    # CO p.125
^h?aoin			aon		1	<.+>	<&>
^h?ana-			an-		1	<.+>	<&>    # ana-eagraithe
#^[]rd(-?..)	ard$1		1	<.+>	<&>
^h?ath-?th		at		1	<[ANV].*>	<&>
^bh'			b'		1	<.+>	<&>		# bh'fhi -> b'fhi
^bain			ban		1	<.+>	<&>
^bin			bn		1	<.+>	<&>
^bairr			barr		1	<.+>	<&>
^baoith			baoth		1	<.+>	<&>
^beig			beag		1	<.+>	<&>
^bil			bal		1	<.+>	<&>
^bioth			bith		1	<.+>	<&>
^boig			bog		1	<.+>	<&>
^boinn			bonn		1	<.+>	<&>
^boirr			borr		1	<.+>	<&>  # boirrphist...
# handled by more general ll rule below
#^bolls			bols		1	<.+>	<&>
^buadh			bua			1	<.+>	<&>
^buain			buan		1	<.+>	<&>  # buainseasamh...
^caim			cam		1	<.+>	<&>  # caimbheart...
# handled by more general tht rule below
#^caitht			cait		1	<.+>	<&>
^caoimh			caomh		1	<.+>	<&>
# N.B. makes string longer
#^ceathar			ceathair		1	<.+>	<&>
^cid			cad		1	<.+>	<&>
# N.B. makes string longer
#^cinn			ceann		1	<.+>	<&>
^ceinn			ceann		1	<.+>	<&>  # Di04 ceinn-litir, srl.
^cionn			ceann		1	<.+>	<&>
^clir			clr		1	<.+>	<&>   # clirfhiacail e.g.
^claoidh			clo		1	<.+>	<&>
^claoin			claon		1	<.+>	<&>
^coilg			colg		1	<.+>	<&>  # coilgsheasamh e.g.
^c[o]imh-mh		cimh		1	<[ANV].*>	<&>
^coimh			comh		1	<.+>	<&>    # coimhlonadh
^cimhmh			cimh	1	<.+>	<&>    # cimhmheas, srl in Di04
^cimh([^ei])			comh$1	1	<.+>	<&>    # cimh-chealg (Di04)
# general mh[^a] -> omh$1  rule below
^cmha		comha		1	<[ANV].*>	<&>
^cmhmh			cmh	1	<.+>	<&>    # cmhmhaith, srl in Di04
^comh-(mh[aou])		c$1		1	<[ANV].*>	<&>
^comh-(mh[ei])		ci$1		1	<[ANV].*>	<&>
^coimh-n		cin		1	<[ANV].*>	<&>
^c[o]mh-?(n[aou])		c$1		1	<[ANV].*>	<&>
^c[o]mh-?(n[ei])		ci$1		1	<[ANV].*>	<&>
^c-(..)		comh$1		1	<[ANV].*>	<&>
^c([bcdfgpt]h)		comh$1		1	<[ANV].*>	<&>
^c[ou]ir([pr])			cor$1		1	<.+>	<&>
^crainn			crann		1	<.+>	<&>
^crioth			crith		1	<.+>	<&>
^croim			crom		1	<.+>	<&>   # croimleac e.g.
^cruai?dh			crua		1	<.+>	<&>
^cil			cl		1	<.+>	<&>
^d-			d		1	<.+>	<&>		# common in Di04 e.g. d-bheathach
^daill			dall		1	<.+>	<&> # daillintinn
^daoir([^s])			daor		1	<.+>	<&>
#  first spelling of dea- is from na Grianna
^dagh-			dea-		1	<.+>	<&>
^de[ai]gh-		dea-		1	<.+>	<&>
^de[ai]gh([^-])	dea-$1		1	<.+>	<&>
^deir([bg])			dear$1		1	<.+>	<&> # deirbhnialas, deirgmheisce
^do-			d-		1	<.+>	<&>   # Di04 common. do-chuimhne 
^diubhr			dir		1	<.+>	<&>
^dligh(i?)		dl		1	<.+>	<&>
^doi-			do-		1	<.+>	<&>  # doi-bhriathar, etc.  Di04
^doinn			donn		1	<.+>	<&>
^droich			droch		1	<.+>	<&>
^druim			droim		1	<.+>	<&>
^duibh			dubh		1	<.+>	<&>
^dubh-			d		1	<.+>	<&>
# handled by more general prefix+gc rule below
#^agc			ag		1	<.+>	<&>
#^igc			ig		1	<.+>	<&>
# handled by more general prefix+dt rule below
#^adt			ad		1	<.+>	<&>
#^idt			id		1	<.+>	<&>
# handled by more general ll rule below
#^falls			fals		1	<.+>	<&>
^fgbh			fg			1	<.+>	<&>
^h?eadar		idir		1	<.+>	<&>
^h?eidir		idir	1	<.+>	<&>
^fiadh			fia		1	<.+>	<&>
^fr-			for		1	<.+>	<&> # include hyphen so len is same
^flith			frith		1	<.+>	<&>
^frioth([^])	frith$1		1	<.+>	<&>
^f-			fo-		1	<.+>	<&>
^foir-			for-	1	<.+>	<&>  # foir-chim
^fuair-			fuar-	1	<.+>	<&>  # fuair-chrith
# N.B. makes string longer
#^gamh			geamh		1	<.+>	<&>
# N.B. makes string longer
^girr			gearr		1	<.+>	<&>
^gairbh			garbh		1	<.+>	<&>
^gir			gar		1	<.+>	<&>
^gnith			gnth		1	<.+>	<&>
^gobh			gabh		1	<.+>	<&>
^hala([^aeiou]+[ei])	hili$1		1	<.+>	<&>    # OD77
^hala([^aeiou]+[aou])	hilea$1	1	<.+>	<&>
^h?iair		iar		1	<.+>	<&>
^h?iarann		iarn		1	<.+>	<&>
^h?iodar		idir		1	<.+>	<&> # iodarchaidreamh
^h?iol([^ar])			il$1		1	<.+>	<&>
^h?iomshl			ioml		1	<.+>	<&>  # exception to next
^h?iom([^alpr])			im$1		1	<.+>	<&>
^h?ion			in		1	<.+>	<&>
# handled by more general nn rule below
#^h?ionn([rs])			ion$1		1	<.+>	<&>
^([ls])imh		$1mh	1	<.+>	<&>
^lin		ln		1	<.+>	<&>  # linchinnte
# handled by more general sb rule below
#^lasb			lasp		1	<.+>	<&>
^leig([^h])		lig$1		1	<.+>	<&>  # leigint, but not leigheas words
# risky?
^leith-			leath-		1	<.+>	<&>
^loim			lom		1	<.+>	<&>  # loimeasna
^lir		leor		1	<.+>	<&>
^luaith([^r])	luath$1		1	<.+>	<&>  # luaithintinn
# handled by more general sg rule below
#^luasg			luasc		1	<.+>	<&>
# N.B. makes string longer
^lubh			luibh		1	<.+>	<&> # lubhghort
^lui([mn])		loi$1	1	<.+>	<&>		# luinnir->loinnir
^lith			lth		1	<.+>	<&>
^maill			mall		1	<.+>	<&>
^maoil			maol		1	<.+>	<&>
^maoith([^n])		maoth$1		1	<.+>	<&>  # maoithintinneach
^mairbh		marbh		1	<.+>	<&>   # mairbhghin
# only two math- words anyway?
#^magh		math		1	<.+>	<&>
^meadhn		men		1	<.+>	<&>
^mean		men		1	<.+>	<&>
^mh'([aeiou].)		m'$1		1	<.+>	<&>
# N.B. makes string longer
^min-		mion-		1	<.+>	<&>
^mo-			m-		1	<.+>	<&>
^mo([bcdfgmpst]h)		m$1		1	<[ANV].*>	<&>  # mochothrom
^(mh?)-([^aeiou].)		$1$2		1	<[ANV].*>	<&>
# no midh- words anyway?
#^miodh		midh		1	<.+>	<&>
^mir			mr		1	<.+>	<&>
^naoimh			naomh		1	<.+>	<&>
^neamh-([^m].)		neamh$1		1	<[ANV].*>	<&>  # try stripping hyphen before stripping whole thing!
^neimh(..)		neamh$1		1	<.+>	<&>  # Di04
^nea-mb			neamhbh		1	<.+>	<&>
^nea-			neamh-		1	<.+>	<&>    # strip hyphen later
^nea([cdfgpt]h)		neamh$1		1	<.+>	<&>
^h?i([gr])		$1		1	<.+>	<&>
# handled by more general nn rule below
#^pannc			panc		1	<.+>	<&>
#^poinnt			point		1	<.+>	<&>
^pont			punt		1	<.+>	<&>
^prmh			promh		1	<.+>	<&>
^ridh			r		1	<.+>	<&>
^rimh			ramh		1	<.+>	<&>
^r[i]o?gh([^n])		r$1		1	<.+>	<&>   # righdhamhna, rogh-chorin, but not "righnigh", "righne", etc.
^ro-			r-		1	<.+>	<&>
^ri-			r-		1	<.+>	<&>
^roighn			righn		1	<.+>	<&>
^roimh			ramh		1	<.+>	<&>
^ruadh			rua		1	<.+>	<&>
^rin			rn		1	<.+>	<&>
^sir			sr		1	<.+>	<&>  # sireolas
^saoghl			saol		1	<.+>	<&>
^saoibh			saobh		1	<.+>	<&>
^saoir-			saor-		1	<.+>	<&>
^sclug			sclog		1	<.+>	<&>
^scoith			scoth		1	<.+>	<&> # scoithdhearg
^seana-			sean-		1	<.+>	<&>    # strip hyphen later
^sein-			sean-		1	<.+>	<&>    # strip hyphen later
^siod			sead		1	<.+>	<&>
^sodh			s		1	<.+>	<&>
# N.B. makes it longer
^sr-			sor-		1	<.+>	<&>
^smid			smit		1	<.+>	<&>
^soi-			so-		1	<.+>	<&>  # soi-bhriste, etc.  Di04
# handled by more general ll rule below
#^soills			soils		1	<.+>	<&>
# handled by more general nn rule below
#^sonnr			sonr		1	<.+>	<&>
^sr[a]ic		srac		1	<.+>	<&>
# N.B. makes string longer
^srang			sreang		1	<.+>	<&>
^sream			sram		1	<.+>	<&>
# N.B. makes string longer - risky, plus no "sruim-" words at all?
#^srim			sruim		1	<.+>	<&>
^stit			stt		1	<.+>	<&>  # stitseirbhs
^taidhbh			taibh		1	<.+>	<&>
# handled by more general sb rule below
#^taisb		taisp	1	<.+>	<&>
^teasb([^h])		taispe$1	1	<.+>	<&>
^teint			tint		1	<.+>	<&>
^tele([^aeiou]+[ei])	teili$1		1	<.+>	<&>    # OD77
^tele([^aeiou]+[aou])	teilea$1	1	<.+>	<&>
^tiom			tim		1	<.+>	<&>
^tiugh			ti		1	<.+>	<&>
# risky
#^tir	tabhar		1	<.+>	<&>
# handled by more general rule below
#^treabhth			treaf		1	<.+>	<&>
^treas			tras		1	<.+>	<&>
^trin			tran		1	<.+>	<&>
^troim			trom		1	<.+>	<&>
^tuaith		tuath		1	<.+>	<&>  # tuaithcheantar
^h?uaith		uath		1	<.+>	<&>
^h?ir			r		1	<.+>	<&>
# End of Prefixes - Now general spelling reforms (but not from CO handbook)
# Mostly based on corpus work...
# den for dan
^(n?dh?)en		$1an		1	<V.*>	<&>
eoracht(a?)$	eoireacht$1		1	<[FN].*>	<&> # spaisteoracht
racht(a?)$		ireacht$1	1	<[FN].*>	<&>   # fidheadracht
air$			ir			1	<[FN].*>	<&>		# pilair, paipair
eor$			eoir		1	<.+>	<&>
# 08-11-05, generalize nouns to everything (fraochta e.g.)
chd(a?)$		cht$1		1	<.+>	<&>    # masc too (achd, m.sh.)
nteacht(a?)$	neacht$1	1	<[FN].*>	<&>    # deisimnteacht, etc. 
anna$			nna		1	<[FN].*>	<&>	# common!  clanna, seanna, gnanna, srl.
# corpus - Dmhnach, etc., but not rmhilis, etc.
#  Handles  all ^cmh- prefixes too except  ^cmha which we do above explicitly
([^Rr])mh([^a])		$1omh$2		1	<.+>	<&>
^(.)gh			$1ogh		1	<.+>	<&>		# fghmhar->foghmhar->fmhar
eamhn		in		1	<.+>	<&>     #  maitheamhnas, breitheamhnas
([^e])amhn	$1n	1	<.+>	<&>     #  rathamhnas
#   warning - makes longer 
^b'f([aeiou])	b'fh$1	1	<.+>	<&>
#   Now various general spelling reforms from CO handbook
ai(cht?n)		a$1		1	<.+>	<&>    # CO p.120, smaichtn
sg			sc		1	<.+>	<&>    # CO p.115
sb			sp		1	<.+>	<&>    # CO p.115
sd			st		1	<.+>	<&>    # CO p.115
dn			nn		1	<.+>	<&>    # CO p.114 cadna
(ir[ld])		e$1		1	<.+>	<&>    # CO p.123
(i?r[dlnr])		a$1		1	<.+>	<&>    # CO p.123-124
(i?r[dlnr])		o$1		1	<.+>	<&>    # CO p.123-124
(i?r[dlnr])		u$1		1	<.+>	<&>    # CO p.123-124
e			eo		1	<.+>	<&>    # go ler, aibhleg, etc.
eamh[au]il$		iil		1	<[ANF].*>	<&>    # dlightheamhail
eamhla			ila		1	<[ANF].*>	<&>
([^e])amh[au]il$		$1il		1	<[ANF].*>	<&>    # CO p.102, a2+baril, etc.
([^e])amhla		$1la		1	<[ANF].*>	<&>    # "-amhlachta?" too
adht([a])		at$1	1	<.+>	<&>	# iadhta, cruadhtn, iarradhtas, srl
ale$			aola		1	<.+>	<&>    # *3* decl.
eachaibh$		igh		1	<N pl="y".*>	<&>    # taoiseachaibh
([^e])achaibh$		$1aigh		1	<N pl="y".*>	<&>    # Albanachaibh
anaibh$			anna		1	<N pl="y".*>	<&>    # sgoileanaibh, ceisteanaibh
naibh$			in		1	<N pl="y".*>	<&>    # uachtarnaibh
aibh$			a		1	<N pl="y".*>	<&>    # sinaggaibh, fiachaibh
(..[^a])ibh$		$1		1	<N pl="y".*>	<&>    # minteoiribh, etc.
(.)eu			$1a		1	<.+>	<&>    # sgeul, ceudna, srl.
tch			t		1	<.+>	<&>    # CO p.103
i[dg]hea		o		1	<.+>	<&>    # CO p.105,107
i[dg]he					1	<.+>	<&>    # CO p.104,106
u([mto])		a$1		1	<.+>	<&>	# cialluonn, ionaduochta, leasutear, mionnum
uith			aith		1	<.+>	<&>    # rialuitheoir
adha					1	<.+>	<&>    # CO p.105
dh					1	<.+>	<&>    # dhbhar, rdh, etc.
[mb]hth			f		1	<.+>	<&>    # CO p.106
th[mb]h			f		1	<.+>	<&>    # CO p.106
ghai			a		1	<.+>	<&>    # CO p.107
thch			ch		1	<.+>	<&>    # CO p.108
tht			t		1	<.+>	<&>    # CO p.108
ll([rst])		l$1		1	<.+>	<&>    # CO p.112-113
nn([cdlrst])		n$1		1	<.+>	<&>    # CO p.114
ghail$			al		1	<N pl="n" gnt="n" gnd="f".*>	<&>
ghaile$			aola		1	<N pl="n" gnt="y" gnd="f".*>	<&>
ighil$			l		1	<N pl="n" gnt="n" gnd="f".*>	<&>
ighile$			ola		1	<N pl="n" gnt="y" gnd="f".*>	<&>
(ain|[e][ai]|iar)gc	$1g		1	<.+>	<&>    # CO p.109
e[au]gc	ag		1	<.+>	<&>    # CO p.109
([ai])dt		$1d		1	<.+>	<&>    # CO p.109
(dh??)omb		$1om		1	<.+>	<&>    # CO p.109
(ai?n)bhf		$1bh		1	<.+>	<&>    # CO p.109  ainbhfiosach
(..)u(i?s)$		$1a$2		1	<.+>	<&>    # solus, rus, etc.
#   some dialect/pre-standard noun/adjective inflections
([o]ga)$		$1		1	<N pl="y".*>	<&>   # duilleoga
nna$			nna		1	<N pl="y".*>	<&>   # ceisteanna
(..)dha$		$1tha		1	<.+>	<&>    # measardha, muinteardha
(..)dha$		$1a		1	<.+>	<&>    # stuamdha, promhdha
ei?g$			eog		1	<[NF].*>	<&>   # nb2 
eoig$			eog		1	<.+>	<&>   # nb2, leave as <.+> for <F>'s
ig$			g		1	<[NF].*>	<&>   # nb2 
lainn$			lann		1	<[NF].*>	<&>   # nb2
ais$			ais		1	<[NF].*>	<&>   # cuntais, bronntanais
dth([ea])$		dt$1		1	<.+>	<&>    # goidthe
# Frsabh -> Frsa (dpl)
bh$				1	<[NF].*>	<&>
ocha$				1	<N pl="y".*>	<&>	# margaidheacha->margaocha->marga
# but note in previous that sometimes this gets interrupted by correct
# plural adjective:  cogaidheacha->cogaocha, or sometimes even correct
# plural noun:  claidheacha->claocha  (so want to keep -ocha intermediate)
##################  NON/PRE-STANDARD VERB ENDINGS #########################
# future/conditional 2nd declension with -cha?- see CO p.103
e[o]cha(i?dh)$		eo$1		1	<V.*>	<&>
([^e])cha(i?dh)$	$1$2		1	<V.*>	<&>
e[o]chaimi([ds])$	eoimi$1		1	<V.*>	<&>
([^e])chaimi([ds])$	$1imi$2	1	<V.*>	<&>
e[o]chamuid$	eoimid		1	<V p="y" t="fist">	<&>
([^e])chamuid$	$1imid		1	<V p="y" t="fist">	<&>
e[o]chai(nn|ds)$	eoi$1		1	<V p="y" t="coinn">	<&>
([^e])chai(nn|ds)$	$1i$2		1	<V p="y" t="coinn">	<&>
e[o]chth$		eof		1	<V p="y" t="coinn">	<&>
([^e])chth$		$1f		1	<V p="y" t="coinn">	<&>
e[o]ch(th)?aidhe$		eofa		1	<V p="n" t="coinn">	<&>
([^e])ch(th)?aidhe$		$1fa		1	<V p="n" t="coinn">	<&>
(|eo)char$			$1far	1	<V p="n" t="fist">	<&>
(|eo)chthar$			$1far	1	<V p="n" t="fist">	<&>
faidhear$			far	1	<V p="n" t="fist">	<&>
# d n-amharcth, d dtugth, srl
th(e)$			t$1		1	<V p="y" t="gnth">	<&>
#   2nd declension Dinneen-style endings
(..)ad$		$1aonn		1	<V p="y" t="lith">	<&> # ciallud
aghadh					1	<[NF].*>	<&>  # broad and slender
iaghadh			odh		1	<[VF].*>	<&>  # innsiughadh->insodh
(..)thear$		$1tear		1	<[VF].*>	<&> # aut. present
(..)t(te?ar)$	$1$2		1	<[VF].*>	<&> # aut. present
ighdar			odar		1	<[VF].*>	<&>    # broad and slender
ighim			m		1	<[VF].*>	<&>    # broad and slender
ighidh					1	<.+>	<&>    # subjunctive?
ighinn			nn		1	<.+>	<&>    # imperfect
uigh'e$			aithe		1	<.+>	<&>	# U019.txt
fiomuid$		fimid		1	<[VF].*>	<&>
# Future forms: see OS88 p.215 for these (fe?as, etc. on p.63), also CB p.145
#  socrid, dlseoid, 3rd plural forms in achtanna
fa[ds]$			faidh		1	<V p="y" t="fist">	<&>
fea[ds]$		fidh		1	<V p="y" t="fist">	<&>
chas$		idh		1	<V p="y" t="fist">	<&> # meabhrchas->meabhrs->...
i?[ds]$		idh		1	<V p="y" t="fist">	<&>
eoi?[ds]$		eoidh		1	<V p="y" t="fist">	<&>
# Common in achtanna to have -fidh,-fid ending on broad stem, lots of 
# other slender/broad mismatches:
([aou][^aeiou]*)f$	$1fa	1	<V p="n" t="coinn">	<&>
([aou][^aeiou]*)fidh?$	$1faidh	1	<V p="y" t="fist">	<&>
([ei][^aeiou]*)fadh$	$1feadh	1	<V p="y" t="coinn">	<&>
([ei][^aeiou]*)far$	$1fear	1	<V p="n" t="fist">	<&>
([ei][^aeiou]*)tar$	$1tear	1	<V p="n" t="lith">	<&> # * not + for chtar
# Future 2nd p., "r" forms noted on OS88 p.216;
# see achtanna for ocfaid/ocfa, bainfid/bainfe
fa$			faidh		1	<V p="y" t="fist">	<&>
fai[rsd]$		faidh		1	<V p="y" t="fist">	<&>
fe$			fidh		1	<V p="y" t="fist">	<&>
fi[rsd]$		fidh		1	<V p="y" t="fist">	<&>
is$			idh		1	<V p="y" t="fist">	<&>
eois$			eoidh		1	<V p="y" t="fist">	<&>
# Future/Cond autonomous; unwanted prefix h is common in achtanna
^h([aeiou].+fe?ar)$	$1	1	<V p="n" t="fist">	<&>
^h([aeiou].+fa?)$		$1	1	<V p="n" t="coinn">	<&>
# Past; also see Di27 (present).  "r" forms on p.216 (also CB p.145 present)
(...)os$		$1igh		1	<V p="y" t="caite">	<&>
(...)ai[rs]$		$1		1	<V p="y" t="caite">	<&>
(..[^aeiou])i[rs]$	$1		1	<V p="y" t="caite">	<&>
(...)s$		$1igh		1	<V p="y" t="caite">	<&>
#  Thugtaoi in MU44, e.g.
taoi$			ta		1	<V p="n" t="gnth">	<&>
#  present
ains$			ann		1	<V p="y" t="lith">	<&>
ins$			eann		1	<V p="y" t="lith">	<&>
ns$			onn		1	<V p="y" t="lith">	<&>
#  OS88 p.126  present
anns$			ann		1	<V p="y" t="lith">	<&>
onns$			onn		1	<V p="y" t="lith">	<&>
#  corpus; now handled above
# th(e|ear)$		t$1		1	<V.*>	<&>
famuid		faimid		1	<V p="y" t="fist">	<&>
#  -idh, -aidh on 1st declension verbs: Pres. Subj. or 2nd pl. imper (-ig) 
#  according to Dinneen.  Some examples seem to be pres. in corpus,
#  e.g. suidhidh, some apparently past e.g. uneclipsed "tigidh"
#  Here we go with subjunctive which is the most common by far
(.[^a])idh$	$1e		1	<V p="y" t="foshuit">	<&>
# (..)aidh$	$1a		1	<V p="y" t="foshuit">	<&>
#  pre-standard texts commonly have "danfaimd", "tugaims", "rachaimd"...
(...)im([ds])$		$1imi$2	1	<V.*>	<&>
igidh$			ig		1	<V p="y" t="ord">	<&>
mist$			mis		1	<V.*>	<&>
#  various -il  endings should be broad, CO p.120
ilim$			laim		1	<V.*>	<&>
ileann$		lann		1	<V.*>	<&>
ilfidh$		lfaidh		1	<V.*>	<&>
ilfe$			lf		1	<V.*>	<&>
ilfear$		lfar		1	<V.*>	<&>
ilf$			lfa		1	<V.*>	<&>
i(lf?)eadh$		$1adh		1	<V.*>	<&>
i(lf?)im(i[sd])$	$1aim$2	1	<V.*>	<&>
ilig$			laig		1	<V.*>	<&>
ilea([dm]ar)$		la$1		1	<V.*>	<&>
i(lf?)ids$		$1aids	1	<V.*>	<&>
ltar$			iltear		1	<V.*>	<&>
lta$			ilte		1	<A.*>	<&>
laithe$		ilte		1	<A.*>	<&>
lanna$			lacha		1	<N.*>	<&>  # nb3 pl
#############################################################################
#   start some more radical changes, e.g. stripping prefixes completely
#############################################################################
# hyphened prefixes, an-, dea-
^h?an-([bcfgmp]h)	$1		0	<[AN].*>	<&>
^h?an-([bcfgmp][^h])	$1		2	<[AN].*>	<&>
^h?an-([^bcfgmp][^h])	$1		0	<[AN].*>	<&>
^h?an-([^bcfgmp]h)	$1		2	<[AN].*>	<&>   # an-dhuine 
^dea-([bcdfgmpt]h)	$1		0	<[AN].*>	<&>
^dea-([bcdfgmpt][^h])	$1		2	<[AN].*>	<&>
^dea-(sh[aeioulnr])	$1	0	<[AN].*>	<&>
^dea-(s[aeioulnr])	$1		2	<[AN].*>	<&>
^dea-(s[^aeioulnrh])	$1	0	<[AN].*>	<&>   # dea-scal
^dea-([^bcdfgmpst])	$1		0	<[AN].*>	<&>
# other prefixes; most are not independent words (ath, comh, etc.)
# but several are -- these are worth including for several reasons:
# (1) efficiency; morph. analysis happens well before the check for
#  compounds/run-togethers (2) allows for a more refined check for
#  lenition of the prefixed word or other changes (comh-mh, comh->c, etc.)
^h?ain([deilnrst])	$1		0	<[AN].*>	<&>
^h?ain([bcfgmp]h)	$1		0	<[AN].*>	<&>
^h?ath([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^h?ath([bcdfgmp]h)	$1		0	<[ANV].*>	<&>
^h?ath(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^h?ath(s[^aeioulnrh])	$1	0	<[ANV].*>	<&>
^h?ath-?th		th		1	<[ANV].*>	<&>   # -> "at-" above
^comh([^bcdfgmnpst-])	$1		0	<[ANV].*>	<&>
^comh([bcdfgpt]h)	$1		0	<[ANV].*>	<&>
^comh(sh[aeioulnr])	$1	0	<[ANV].*>	<&>
^comh(s[^aeioulnrh])	$1	0	<[ANV].*>	<&>
# cmhargadh, cmhalartach; but pre-standard "cmhoibriughadh" too
^c(mh[aou])		$1		0	<[ANV].*>	<&>
^ci(mh[ei])		$1		0	<[ANV].*>	<&>
^c(n[aou])		$1		0	<[ANV].*>	<&>
^ci(n[ei])		$1		0	<[ANV].*>	<&>
^do-([aeiou])	$1		0	<[AN].*>	<&>
# ^do([lnr]) BELOW
^do([bcdfgmpt]h)	$1		0	<[AN].*>	<&>
^do(sh[aeioulnr])	$1		0	<[AN].*>	<&>
^do(s[^aeioulnrh])	$1		0	<[AN].*>	<&>
^do-([aeiou])	$1		0	<[ANV].*>	<&>
^droch([^bcdfgmpst-])	$1		0	<[AN].*>	<&>
^droch-(ch)		$1		0	<[AN].*>	<&>
^droch([bdfgmpt]h)	$1		0	<[AN].*>	<&>
^droch(sh[aeioulnr])	$1		0	<[AN].*>	<&>
^droch(s[^aeioulnrh])	$1		0	<[AN].*>	<&>
^for([^bcdfgmprst-])	$1		0	<[ANV].*>	<&>
^for-(r)		$1		0	<[ANV].*>	<&>
^for([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^for(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^for(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^for([^bcdfgmprst-])	$1		0	<[ANV].*>	<&>
^for-(r)		$1		0	<[ANV].*>	<&>
^for([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^for(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^for(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^fo-([aeiou])	$1		0	<[ANV].*>	<&>
# ^fo([lnr]) BELOW
^fo([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^fo(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^fo(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^fo-(..)		$1		1	<[ANV].*>	<&>
^frith([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^fri(t[^h])		$1		0	<[ANV].*>	<&>
^frith([bcdfgmp]h)	$1		0	<[ANV].*>	<&>
^frith(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^frith(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^h?iar([^bcdfgmprst-])	$1		0	<[ANV].*>	<&>
^h?iar-(r)		$1		0	<[ANV].*>	<&>
^h?iar([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^h?iar(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^h?iar(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
# ilsiamsa, ilsiollach, etc.  no special case
^h?il([^bcfgmp-]..)	$1		0	<[AN].*>	<&>
^h?il([bcfgmp]h.)	$1		0	<[AN].*>	<&>
^h?im([^bcdfghmpst-]..)	$1		0	<[ANV].*>	<&>
^h?im([bcdfgmpt]h.)	$1		0	<[ANV].*>	<&>
^h?im(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^h?im(s[^aeioulnrh].)	$1		0	<[ANV].*>	<&>
# ^in([^bcfgmp-]) BELOW
^h?in-(n)		$1		0	<[AN].*>	<&>
^h?in([bcfgmp]h..)	$1		0	<[AN].*>	<&>
^leas-(s)		$1		0	<[AN].*>	<&>
^leas([aeioud]..)	$1		0	<[AN].*>	<&>
^leas([bcfgm]h..)	$1		0	<[AN].*>	<&>
^m-([aeiou])	$1		0	<[ANV].*>	<&>
^m([aeiou]..)	$1		2	<[ANV].*>	<&>  # msid
^m([lnr]..)		$1		0	<[ANV].*>	<&>
^m([bcdfgmpt]h.)	$1		0	<[ANV].*>	<&>
^m(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^m(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^neamh-?([aeiou])	$1		0	<[ANV].*>	<&>  # neamh-aistear but neamhaithne
^neamh([lnr])		$1		0	<[ANV].*>	<&>
^neamh([bcdfgpt]h)	$1		0	<[ANV].*>	<&>
^neamh-(mh)		$1		0	<[ANV].*>	<&>
^neamh(sh[aeioulnr])	$1	0	<[ANV].*>	<&>
^neamh(s[^aeioulnrh])	$1	0	<[ANV].*>	<&>
^promh([^bcdfgmpst-])	$1		0	<N.*>	<&>
^promh([bcdfgpt]h)	$1		0	<N.*>	<&>
^promh-(mh)		$1		0	<N.*>	<&>
^promh(sh[aeioulnr])	$1		0	<N.*>	<&>
^promh(s[^aeioulnrh])	$1		0	<N.*>	<&>
^ramh([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^ramh([bcdfgpt]h)	$1		0	<[ANV].*>	<&>
^ramh-(mh)		$1		0	<[ANV].*>	<&>
^ramh(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^ramh(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^r([lnr]..)		$1		0	<[ANV].*>	<&>
^r([bcdfgmpt]h.)	$1		0	<[ANV].*>	<&>
^r(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^r(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^r-([aeiou]..)	$1		0	<[AN].*>	<&>
^r([aeou]..)	$1		2	<[AN].*>	<&>  # not "i" since several actual words in Dinneen start with ri-
^r([lnr]..)		$1		0	<[AN].*>	<&>
^r([bcdfgmpt]h.)	$1		0	<[AN].*>	<&>
^r(sh[aeioulnr])	$1		0	<[AN].*>	<&>
^r(s[^aeioulnrh])	$1		0	<[AN].*>	<&>
^sain([^bcdfgmpst-])	$1		0	<[ANV].*>	<&>
^sain([bcdfgmpt]h)	$1		0	<[ANV].*>	<&>
^sain(sh[aeioulnr])	$1		0	<[ANV].*>	<&>
^sain(s[^aeioulnrh])	$1		0	<[ANV].*>	<&>
^so-([aeiou]..)	$1		0	<[AN].*>	<&>
^so([lnr]..)		$1		0	<[AN].*>	<&>
^so([bcdfgmpt]h.)	$1		0	<[AN].*>	<&>
^so(sh[aeioulnr].)	$1		0	<[AN].*>	<&>
^so(s[^aeioulnrh]..)	$1		0	<[AN].*>	<&>
^tras([^bcfghmnps-])	$1		0	<[ANV].*>	<&>   # leave "^trasn" alone
^tras([bcfgmp]h)	$1		0	<[ANV].*>	<&>
^tras-(s)		$1		0	<[ANV].*>	<&>
# next batch are non-words so really need to be here, but VERY infrequent
^(cil|gig|h?is|meig|micr|pic|teil)ea-?([^aeiou-]+[aou])	$2	0	 <.+>	<&>
^(cil|gig|h?is|meig|micr|pic|teil)i-?([^aeiou-]+[ei])	$2	0	 <.+>	<&>
^(h?ant|ft|nan|par|pol|h?ultr)a-?([^aeiou-]+[aou])		$2	0	 <.+>	<&>
^(h?ant|ft|nan|par|pol|h?ultr)ai-?([^aeiou-]+[ei])		$2	0	<.+>	<&>
^(h?eachtar|freas|h?os|neas|ralt|tob|h?uas|h?uath)-?([^-])		$2	0	 <.+>	<&>
#############################################################################
#  Spelling standardizations, but ones where I'm less sure that they 
#  always apply, i.e. non-standard bits that are also commonly standard,
#  e.g. -tha$   which could be legit "tortha" but also non-std "glrtha"
#  Because of this, safer to apply them after the prefix stripping, so
#  e.g. "iarchoirpigh" comes out as MOIRF{choirpigh} instead of 
#  CAIGHDEAN{choirpeach}, and "forghanntanas" comes out as 
#  MOIRF{ghanntanas} instead of CAIGHDEAN{ghanntan}
cha$			cha		1	<[ANF].*>	<&>
tha$			tha		1	<.+>	<&>    # glrtha, spartha
(.[a])igh$		$1ch		1	<[NF].*>	<&>	# nb2 -a genitives
(..)i[dg]h$		$1each		1	<[NF].*>	<&>   #  same (e.g. stmhaillidh)
(..)aidh$		$1a		1	<.+>	<&>    # bhfgaidh
# past tenses
(...)eas$		$1		1	<V p="y" t="caite">	<&>
(..[^aeiou])as$	$1		1	<V p="y" t="caite">	<&>
#  rule for [^ei] is above with other non-std prefixes, last resort here
#  for words like cimhigean -> comhigean
^cimh([ei])			comh$1	1	<.+>	<&>
#############################################################################
#  derivational morphology
([ao])chai?s$		$1ch		0	<A pl="n" gnt="n".*>	<&>
acht$			ach		0	<A pl="n" gnt="n".*>	<N pl="n" gnt="n" gnd="f">
([ao])chta$		$1cht		1	<N pl="n" gnt="n".*>	<&>
ocht$			och		0	<A pl="n" gnt="n".*>	<N pl="n" gnt="n" gnd="f">
lachta?$		il		0	<A pl="n" gnt="n".*>	<&> # CO p.120
chi?n$				0	<N.*>	<&>
# but actually non-standard if it's gs or pp; general  uith -> aith below
uithe$					0	<N.*>	<&>
#############################################################################
#  emphatic suffixes
(.[aou])(san?|na)$	$1		0	<[NV].*>	<&>
(.[ei])(sean|[sn]e)$	$1		0	<[NV].*>	<&>
([aou][^aeiou-]*[^aeious-])san?$	$1	0	<[NV].*>	<&>
([aou][^aeiou-]*[^aeioun-])na$	$1	0	<[NV].*>	<&>
([ei][^aeiou-]*[^aeious-])se(an)?$	$1	0	<[NV].*>	<&>
([ei][^aeiou-]*[^aeioun-])ne$	$1	0	<[NV].*>	<&>
([aou][^aeiou-]*s)-san?$		$1	0	<[NV].*>	<&>
([aou][^aeiou-]*n)-na$			$1	0	<[NV].*>	<&>
([ei][^aeiou-]*s)-se(an)?$		$1	0	<[NV].*>	<&>
([ei][^aeiou-]*n)-ne$			$1	0	<[NV].*>	<&>
#############################################################################
#   common spelling errors
# iri$			ir		2	<N.*>	<&>
uil$			il		2	<A.*>	<&>
il$			iil		2	<A.*>	<&>
(.[^aeiou])ala$	$1la		2	<N.*>	<&>
ula$			la		2	<A.*>	<&>
eail$			eil		2	<.+>	<&>    # common missing accent
([^ae])oir$		$1ir		2	<.+>	<&>
([^ae])ora$		$1ra		2	<.+>	<&>
aio		ao		2	<.+>	<&>   # ionadaiocht, srl
eor$			eoir		2	<N.*>	<&>
ionn$			onn		2	<V.*>	<&>
itear$			tear		2	<V.*>	<&>
ite$			te		2	<V.*>	<&>
iodh$			odh		2	<V.*>	<&>
([^e])oidh$		$1idh		2	<V.*>	<&>
([^e])ofar$		$1far		2	<V.*>	<&>
# next batch from ngramprobs.pl
u$					2	<N.*>	<&>
#chtai$			chta		2	<N.*>	<&>
#ai$			a		2	<.+>	<&>
(...)i$			$1		2	<.+>	<&>
iu			i		2	<.+>	<&>
no			n		2	<.+>	<&>
# aio			ao		2	<.+>	<&>
#aioch(ta?)$		aoch$1		2	<N.*>	<&>   # see prev.
io			o		2	<.+>	<&>
leir			lir		2	<.+>	<&>
^(g?ch?)om([^hamnpr]..)	$1omh$2		2	<.+>	<&>
			a		2	<.+>	<&>  # held down alt too long; "a" is only diphthong with "a" as second letter, and none with e/u
([])		$1i		2	<.+>	<&>
			o		2	<.+>	<&>
# places where the second vowel is the one with the fada are rarer - most of
# them are "a", then "e", "i", "u", then noise -- all but "i" conflict
# with a resolution above;  is, e.g., almost always a mistake for "i" !
([])			i$1		2	<.+>	<&>
# i$					2	<.+>	<&>  # lotsa false compounds with this
#############################################################################
#  rules with somewhat lower probability (e.g. fo-, must be after non-stnd
#  verb endings to avoid "foluonn"  parsing as "fo+luonn"
^do([lnr]..)		$1		0	<[AN].*>	<&>
^fo([lnr]..)		$1		0	<[ANV].*>	<&>
^h?in([^bcfgmp-]..)	$1		0	<[AN].*>	<&>
# these next ones work for proper names like "NGael"; otherwise
# they're caught by general "make everything lower" catch all that follows
^BP			bP		1	<.+>	<&>
^BHF			bhF		1	<.+>	<&>
^DT			dT		1	<.+>	<&>
^GC			gC		1	<.+>	<&>
^MB			mB		1	<.+>	<&>
^ND			nD		1	<.+>	<&>
^NG			nG		1	<.+>	<&>
^TS			tS		1	<.+>	<&>
# last resort for weird caps
([A-Z])		\l$1		2	<.+>	<&>	# pROMhach
([ei][^aeiou]+)n?$	$1		0	<.+>	<&>	# diminutive
([aou][^aeiou]+)n?$	$1		2	<.+>	<&>	# diminutive
([o])ign?$		$1g		0	<N.*>	<&>
#  the next rule handles a lot of stuff, including unwanted hyphens
#  after prefixes (since the prefix rules will apply after recursing)
#  and similarly unwanted hyphens before emphatic endings (CO p.126)
(.)-			$1		1	<.+>	<&>       # r-bheag / rbheag
# delenite, de-eclipse.  
# Only helps when we recurse and find a standard unmutated prefix above!
# putting caps back Dec 08 for non-words like "tSneach", "nInach"
# which will only be found by stripping mutation and keeping cap
^b([Pp][^h])		$1		0	<[ANV].*>	<&>
^bh([Ff][^h])		$1		0	<[ANV].*>	<&>
^d([Tt][^h])		$1		0	<[ANV].*>	<&>
^g([Cc][^h])		$1		0	<[ANV].*>	<&>
^m([Bb][^h])		$1		0	<[ANV].*>	<&>
^n([DdGg][^h])		$1		0	<[ANV].*>	<&>
^t([Ss][^h])		$1		0	<[ANV].*>	<&>
^([bcdfgmpBCDFGMP])h	$1		0	<[ANVY].*>	<&>
^([Tt])h([^s])		$1$2		0	<[ANVY].*>	<&>   # luathscal danger
^([Ss])h([lnraeiou])	$1$2	0	<[ANVY].*>	<&>
^t'([AEIOUaeiou]|[Ff]h)	d'$1	1	<[ANVY].*>	<&>	# t'athair
^[bdm]'([AEIOUaeiou]|[Ff]h)	$1	0	<[ANVY].*>	<&>	# d'amonn too
^[nt]-([aeiou])	$1		0	<[ANV].*>	<&>
^[nt]([AEIOU])	$1		0	<[ANV].*>	<&>
