Modulus:grc-pronunciation

Purpose

This module generates pronunciations for {{grc-IPA}}, which is used in Ancient Greek entries.

Use the sandbox module to try things out without causing problems in entries.

The biggest part of the data is contained in the data module.

Testcases

ἄγριος ‎(ágrios)	IPA^(key): /á.ɡri.os/ → /ˈa.ɣri.os/ → /ˈa.ɣri.os/ (5^th BC Attic) IPA^(key): /á.ɡri.os/ (1^st BC Egyptian) IPA^(key): /ˈa.ɡri.os/ (4^th AD Koine) IPA^(key): /ˈa.ɣri.os/ (10^th AD Byzantine) IPA^(key): /ˈa.ɣri.os/ (15^th AD Constantinopolitan) IPA^(key): /ˈa.ɣri.os/ Mark the vowel length of the ambiguous vowels ἄet ι by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
ἄναρθρος ‎(ánarthros)	IPA^(key): /á.nar.tʰros/ → /ˈa.nar.θros/ → /ˈa.nar.θros/ (5^th BC Attic) IPA^(key): /á.nar.tʰros/ (1^st BC Egyptian) IPA^(key): /ˈa.nar.tʰros/ (4^th AD Koine) IPA^(key): /ˈa.nar.θros/ (10^th AD Byzantine) IPA^(key): /ˈa.nar.θros/ (15^th AD Constantinopolitan) IPA^(key): /ˈa.nar.θros/ Mark the vowel length of the ambiguous vowels ἄet α by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
ἄνθρωπος ‎(ánthrōpos)	IPA^(key): /án.tʰrɔː.pos/ → /ˈan.θro.pos/ → /ˈan.θro.pos/ (5^th BC Attic) IPA^(key): /án.tʰrɔː.pos/ (1^st BC Egyptian) IPA^(key): /ˈan.tʰro.pos/ (4^th AD Koine) IPA^(key): /ˈan.θro.pos/ (10^th AD Byzantine) IPA^(key): /ˈan.θro.pos/ (15^th AD Constantinopolitan) IPA^(key): /ˈan.θro.pos/ Mark the vowel length of the ambiguous vowel ἄ by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
ᾰ̓́νθρωπος ‎(ánthrōpos)	IPA^(key): /án.tʰrɔː.pos/ → /ˈan.θro.pos/ → /ˈan.θro.pos/ (5^th BC Attic) IPA^(key): /án.tʰrɔː.pos/ (1^st BC Egyptian) IPA^(key): /ˈan.tʰro.pos/ (4^th AD Koine) IPA^(key): /ˈan.θro.pos/ (10^th AD Byzantine) IPA^(key): /ˈan.θro.pos/ (15^th AD Constantinopolitan) IPA^(key): /ˈan.θro.pos/
ἀρχιμανδρῑ́της ‎(arkhimandrī́tēs)	IPA^(key): /ar.xi.manˈðri.tis/ → /ar.çi.maˈdri.tis/ (4^th AD Koine) IPA^(key): /ar.xi.manˈðri.tis/ (10^th AD Byzantine) IPA^(key): /ar.çi.manˈðri.tis/ (15^th AD Constantinopolitan) IPA^(key): /ar.çi.maˈdri.tis/
Αὖλος ‎(Aŷlos)	IPA^(key): /áu̯.los/ → /ˈa.βlos/ → /ˈa.vlos/ (5^th BC Attic) IPA^(key): /áu̯.los/ (1^st BC Egyptian) IPA^(key): /ˈaw.los/ (4^th AD Koine) IPA^(key): /ˈa.βlos/ (10^th AD Byzantine) IPA^(key): /ˈa.vlos/ (15^th AD Constantinopolitan) IPA^(key): /ˈa.vlos/
Γᾱ́δ ‎(Gā́d)	IPA^(key): /ɡǎːd/ → /ɣað/ → /ɣað/ (5^th BC Attic) IPA^(key): /ɡǎːd/ (1^st BC Egyptian) IPA^(key): /ɡad/ (4^th AD Koine) IPA^(key): /ɣað/ (10^th AD Byzantine) IPA^(key): /ɣað/ (15^th AD Constantinopolitan) IPA^(key): /ɣað/
γαῖᾰ ‎(gaîa)	IPA^(key): /ɡái̯.a/ → /ˈɣɛ.a/ → /ˈɟe.a/ (5^th BC Attic) IPA^(key): /ɡái̯.a/ (1^st BC Egyptian) IPA^(key): /ˈɡɛ.a/ (4^th AD Koine) IPA^(key): /ˈɣɛ.a/ (10^th AD Byzantine) IPA^(key): /ˈɟe.a/ (15^th AD Constantinopolitan) IPA^(key): /ˈɟe.a/
γένος ‎(génos)	IPA^(key): /ɡé.nos/ → /ˈɣe.nos/ → /ˈɟe.nos/ (5^th BC Attic) IPA^(key): /ɡé.nos/ (1^st BC Egyptian) IPA^(key): /ˈɡɛ.nos/ (4^th AD Koine) IPA^(key): /ˈɣe.nos/ (10^th AD Byzantine) IPA^(key): /ˈɟe.nos/ (15^th AD Constantinopolitan) IPA^(key): /ˈɟe.nos/
Δῐονῡ́σῐᾰ ‎(Dionȳ́sia)	IPA^(key): /di.o.ny̌ː.si.a/ → /ði.oˈny.si.a/ → /ði.oˈni.si.a/ (5^th BC Attic) IPA^(key): /di.o.ny̌ː.si.a/ (1^st BC Egyptian) IPA^(key): /di.oˈny.si.a/ (4^th AD Koine) IPA^(key): /ði.oˈny.si.a/ (10^th AD Byzantine) IPA^(key): /ði.oˈny.si.a/ (15^th AD Constantinopolitan) IPA^(key): /ði.oˈni.si.a/
ἐγγενής ‎(engenḗs)	IPA^(key): /eŋ.ɡe.nɛ̌ːs/ → /eŋ.ɣeˈnis/ → /eɲ.ɟeˈnis/ (5^th BC Attic) IPA^(key): /eŋ.ɡe.nɛ̌ːs/ (1^st BC Egyptian) IPA^(key): /ɛŋ.ɡɛˈnes/ (4^th AD Koine) IPA^(key): /eŋ.ɣeˈnis/ (10^th AD Byzantine) IPA^(key): /eɲ.ɟeˈnis/ (15^th AD Constantinopolitan) IPA^(key): /eɲ.ɟeˈnis/
ἔγγονος ‎(éngonos)	IPA^(key): /éŋ.ɡo.nos/ → /ˈeŋ.ɣo.nos/ → /ˈeŋ.ɣo.nos/ (5^th BC Attic) IPA^(key): /éŋ.ɡo.nos/ (1^st BC Egyptian) IPA^(key): /ˈɛŋ.ɡo.nos/ (4^th AD Koine) IPA^(key): /ˈeŋ.ɣo.nos/ (10^th AD Byzantine) IPA^(key): /ˈeŋ.ɣo.nos/ (15^th AD Constantinopolitan) IPA^(key): /ˈeŋ.ɣo.nos/
ἔγκειμαι ‎(énkeimai)	IPA^(key): /éŋ.keː.mai̯/ → /ˈeŋ.ki.mɛ/ → /ˈeɲ.ci.me/ (5^th BC Attic) IPA^(key): /éŋ.keː.mai̯/ (1^st BC Egyptian) IPA^(key): /ˈɛŋ.ki.mɛ/ (4^th AD Koine) IPA^(key): /ˈeŋ.ki.mɛ/ (10^th AD Byzantine) IPA^(key): /ˈeɲ.ɣi.me/ (15^th AD Constantinopolitan) IPA^(key): /ˈeɲ.ci.me/
ἔκγονος ‎(ékgonos)	IPA^(key): /éɡ.ɡo.nos/ → /ˈeɣ.ɣo.nos/ → /ˈe.ɣo.nos/ (5^th BC Attic) IPA^(key): /éɡ.ɡo.nos/ (1^st BC Egyptian) IPA^(key): /ˈɛɡ.ɡo.nos/ (4^th AD Koine) IPA^(key): /ˈeɣ.ɣo.nos/ (10^th AD Byzantine) IPA^(key): /ˈeɡ.ɣo.nos/ (15^th AD Constantinopolitan) IPA^(key): /ˈe.ɣo.nos/
ἔκδικος ‎(ékdikos)	IPA^(key): /éɡ.di.kos/ → /ˈeɡ.ði.kos/ → /ˈeɡ.ði.kos/ (5^th BC Attic) IPA^(key): /éɡ.di.kos/ (1^st BC Egyptian) IPA^(key): /ˈɛɡ.di.kos/ (4^th AD Koine) IPA^(key): /ˈeɡ.ði.kos/ (10^th AD Byzantine) IPA^(key): /ˈeɡ.ði.kos/ (15^th AD Constantinopolitan) IPA^(key): /ˈeɡ.ði.kos/ Mark the vowel length of the ambiguous vowel ι by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
ἐκφῠ́ω ‎(ekphýō)	IPA^(key): /ekʰ.pʰý.ɔː/ → /ekˈɸy.o/ → /ekˈfi.o/ (5^th BC Attic) IPA^(key): /ekʰ.pʰý.ɔː/ (1^st BC Egyptian) IPA^(key): /ɛkˈpʰy.o/ (4^th AD Koine) IPA^(key): /ekˈɸy.o/ (10^th AD Byzantine) IPA^(key): /ekˈfy.o/ (15^th AD Constantinopolitan) IPA^(key): /ekˈfi.o/
ἔμβρυον ‎(émbryon)	IPA^(key): /ém.bry.on/ → /ˈem.βry.on/ → /ˈe.bri.on/ (5^th BC Attic) IPA^(key): /ém.bry.on/ (1^st BC Egyptian) IPA^(key): /ˈɛm.bry.on/ (4^th AD Koine) IPA^(key): /ˈem.βry.on/ (10^th AD Byzantine) IPA^(key): /ˈem.vry.on/ (15^th AD Constantinopolitan) IPA^(key): /ˈe.bri.on/ Mark the vowel length of the ambiguous vowel υ by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
ἐρετμόν ‎(eretmón)	IPA^(key): /e.ret.món/ → /e.retˈmon/ → /e.retˈmon/ (5^th BC Attic) IPA^(key): /e.ret.món/ (1^st BC Egyptian) IPA^(key): /ɛ.rɛtˈmon/ (4^th AD Koine) IPA^(key): /e.retˈmon/ (10^th AD Byzantine) IPA^(key): /e.retˈmon/ (15^th AD Constantinopolitan) IPA^(key): /e.retˈmon/
ἐρρήθη ‎(errhḗthē)	IPA^(key): /er̥.r̥ɛ̌ː.tʰɛː/ → /erˈri.θi/ → /eˈri.θi/ (5^th BC Attic) IPA^(key): /er̥.r̥ɛ̌ː.tʰɛː/ (1^st BC Egyptian) IPA^(key): /ɛr̥ˈr̥e.tʰe/ (4^th AD Koine) IPA^(key): /erˈri.θi/ (10^th AD Byzantine) IPA^(key): /erˈri.θi/ (15^th AD Constantinopolitan) IPA^(key): /eˈri.θi/
Ζεύς ‎(Zeýs)	IPA^(key): /sdeú̯s/ → /zeβs/ → /zevs/ (5^th BC Attic) IPA^(key): /sdeú̯s/ (1^st BC Egyptian) IPA^(key): /zews/ (4^th AD Koine) IPA^(key): /zeβs/ (10^th AD Byzantine) IPA^(key): /zevs/ (15^th AD Constantinopolitan) IPA^(key): /zevs/
Ἡρᾰκλέης ‎(Hērakléēs)	IPA^(key): /hɛː.ra.klé.ɛːs/ → /i.raˈkle.is/ → /i.raˈkle.is/ (5^th BC Attic) IPA^(key): /hɛː.ra.klé.ɛːs/ (1^st BC Egyptian) IPA^(key): /(h)e.raˈklɛ.es/ (4^th AD Koine) IPA^(key): /i.raˈkle.is/ (10^th AD Byzantine) IPA^(key): /i.raˈkle.is/ (15^th AD Constantinopolitan) IPA^(key): /i.raˈkle.is/
Θρᾷξ ‎(Thrâix)	IPA^(key): /tʰráːi̯kʰs/ → /θraxs/ → /θraks/ (5^th BC Attic) IPA^(key): /tʰráːi̯kʰs/ (1^st BC Egyptian) IPA^(key): /tʰrakʰs/ (4^th AD Koine) IPA^(key): /θraxs/ (10^th AD Byzantine) IPA^(key): /θraks/ (15^th AD Constantinopolitan) IPA^(key): /θraks/
Κιλικίᾱ ‎(Kilikíā)	IPA^(key): /ki.li.kí.aː/ → /ki.liˈki.a/ → /ci.liˈci.a/ (5^th BC Attic) IPA^(key): /ki.li.kí.aː/ (1^st BC Egyptian) IPA^(key): /ki.liˈki.a/ (4^th AD Koine) IPA^(key): /ki.liˈki.a/ (10^th AD Byzantine) IPA^(key): /ci.liˈci.a/ (15^th AD Constantinopolitan) IPA^(key): /ci.liˈci.a/ Mark the vowel length of the ambiguous vowels ι, ιet ί by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
μάχη ‎(mákhē)	IPA^(key): /má.kʰɛː/ → /ˈma.xi/ → /ˈma.çi/ (5^th BC Attic) IPA^(key): /má.kʰɛː/ (1^st BC Egyptian) IPA^(key): /ˈma.kʰe/ (4^th AD Koine) IPA^(key): /ˈma.xi/ (10^th AD Byzantine) IPA^(key): /ˈma.çi/ (15^th AD Constantinopolitan) IPA^(key): /ˈma.çi/ Mark the vowel length of the ambiguous vowel ά by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
ναῦς ‎(naŷs)	IPA^(key): /náu̯s/ → /naβs/ → /navs/ (5^th BC Attic) IPA^(key): /náu̯s/ (1^st BC Egyptian) IPA^(key): /naws/ (4^th AD Koine) IPA^(key): /naβs/ (10^th AD Byzantine) IPA^(key): /navs/ (15^th AD Constantinopolitan) IPA^(key): /navs/
οἷαι ‎(hoîai)	IPA^(key): /hói̯.ai̯/ → /ˈy.ɛ/ → /ˈi.e/ (5^th BC Attic) IPA^(key): /hói̯.ai̯/ (1^st BC Egyptian) IPA^(key): /ˈ(h)y.ɛ/ (4^th AD Koine) IPA^(key): /ˈy.ɛ/ (10^th AD Byzantine) IPA^(key): /ˈy.e/ (15^th AD Constantinopolitan) IPA^(key): /ˈi.e/
πᾶς ‎(pâs)	IPA^(key): /pâːs/ → /pas/ → /pas/ (5^th BC Attic) IPA^(key): /pâːs/ (1^st BC Egyptian) IPA^(key): /pas/ (4^th AD Koine) IPA^(key): /pas/ (10^th AD Byzantine) IPA^(key): /pas/ (15^th AD Constantinopolitan) IPA^(key): /pas/
πατρίς ‎(patrís)	IPA^(key): /pa.trís/ → /paˈtris/ → /paˈtris/ (5^th BC Attic) IPA^(key): /pa.trís/ (1^st BC Egyptian) IPA^(key): /paˈtris/ (4^th AD Koine) IPA^(key): /paˈtris/ (10^th AD Byzantine) IPA^(key): /paˈtris/ (15^th AD Constantinopolitan) IPA^(key): /paˈtris/ Mark the vowel length of the ambiguous vowels αet ί by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
Πηληϊάδης ‎(Pēlēïádēs)	IPA^(key): /pɛː.lɛː.i.á.dɛːs/ → /pi.li.iˈa.ðis/ → /pi.li.iˈa.ðis/ (5^th BC Attic) IPA^(key): /pɛː.lɛː.i.á.dɛːs/ (1^st BC Egyptian) IPA^(key): /pe.le.iˈa.des/ (4^th AD Koine) IPA^(key): /pi.li.iˈa.ðis/ (10^th AD Byzantine) IPA^(key): /pi.li.iˈa.ðis/ (15^th AD Constantinopolitan) IPA^(key): /pi.li.iˈa.ðis/ Mark the vowel length of the ambiguous vowels ϊet ά by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
πρᾶγμα ‎(prâgma)	IPA^(key): /prâːŋ.ma/ → /ˈpraɣ.ma/ → /ˈpraɣ.ma/ (5^th BC Attic) IPA^(key): /prâːŋ.ma/ (1^st BC Egyptian) IPA^(key): /ˈpraɡ.ma/ (4^th AD Koine) IPA^(key): /ˈpraɣ.ma/ (10^th AD Byzantine) IPA^(key): /ˈpraɣ.ma/ (15^th AD Constantinopolitan) IPA^(key): /ˈpraɣ.ma/ Mark the vowel length of the ambiguous vowel α by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
σβέννῡμῐ ‎(sbénnȳmi)	IPA^(key): /zbén.nyː.mi/ → /ˈzβen.ny.mi/ → /ˈzve.ni.mi/ (5^th BC Attic) IPA^(key): /zbén.nyː.mi/ (1^st BC Egyptian) IPA^(key): /ˈzbɛn.ny.mi/ (4^th AD Koine) IPA^(key): /ˈzβen.ny.mi/ (10^th AD Byzantine) IPA^(key): /ˈzven.ny.mi/ (15^th AD Constantinopolitan) IPA^(key): /ˈzve.ni.mi/
σημεῖον ‎(sēmeîon)	IPA^(key): /sɛː.mêː.on/ → /siˈmi.on/ → /siˈmi.on/ (5^th BC Attic) IPA^(key): /sɛː.mêː.on/ (1^st BC Egyptian) IPA^(key): /seˈmi.on/ (4^th AD Koine) IPA^(key): /siˈmi.on/ (10^th AD Byzantine) IPA^(key): /siˈmi.on/ (15^th AD Constantinopolitan) IPA^(key): /siˈmi.on/
σμῑκρός ‎(smīkrós)	IPA^(key): /zmiː.krós/ → /zmiˈkros/ → /zmiˈkros/ (5^th BC Attic) IPA^(key): /zmiː.krós/ (1^st BC Egyptian) IPA^(key): /zmiˈkros/ (4^th AD Koine) IPA^(key): /zmiˈkros/ (10^th AD Byzantine) IPA^(key): /zmiˈkros/ (15^th AD Constantinopolitan) IPA^(key): /zmiˈkros/
τάττω ‎(táttō)	IPA^(key): /tát.tɔː/ → /ˈtat.to/ → /ˈta.to/ (5^th BC Attic) IPA^(key): /tát.tɔː/ (1^st BC Egyptian) IPA^(key): /ˈtat.to/ (4^th AD Koine) IPA^(key): /ˈtat.to/ (10^th AD Byzantine) IPA^(key): /ˈtat.to/ (15^th AD Constantinopolitan) IPA^(key): /ˈta.to/ Mark the vowel length of the ambiguous vowel ά by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
τὴν ᾰ̓οιδήν ‎(tḕn aoidḗn)	IPA^(key): /tɛ᷆ːn a.oi̯.dɛ̌ːn/ → /tin a.yˈðin/ → /tin a.iˈðin/ (5^th BC Attic) IPA^(key): /tɛ᷆ːn a.oi̯.dɛ̌ːn/ (1^st BC Egyptian) IPA^(key): /ten a.yˈden/ (4^th AD Koine) IPA^(key): /tin a.yˈðin/ (10^th AD Byzantine) IPA^(key): /tin a.yˈðin/ (15^th AD Constantinopolitan) IPA^(key): /tin a.iˈðin/
τμῆμα ‎(tmêma)	IPA^(key): /tmɛ̂ː.ma/ → /ˈtmi.ma/ → /ˈtmi.ma/ (5^th BC Attic) IPA^(key): /tmɛ̂ː.ma/ (1^st BC Egyptian) IPA^(key): /ˈtme.ma/ (4^th AD Koine) IPA^(key): /ˈtmi.ma/ (10^th AD Byzantine) IPA^(key): /ˈtmi.ma/ (15^th AD Constantinopolitan) IPA^(key): /ˈtmi.ma/ Mark the vowel length of the ambiguous vowel α by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
φιλίᾳ ‎(philíāi)	IPA^(key): /pʰi.lí.aːi̯/ → /ɸiˈli.a/ → /fiˈli.a/ (5^th BC Attic) IPA^(key): /pʰi.lí.aːi̯/ (1^st BC Egyptian) IPA^(key): /pʰiˈli.a/ (4^th AD Koine) IPA^(key): /ɸiˈli.a/ (10^th AD Byzantine) IPA^(key): /fiˈli.a/ (15^th AD Constantinopolitan) IPA^(key): /fiˈli.a/ Mark the vowel length of the ambiguous vowels ιet ί by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
χάσμα ‎(khásma)	IPA^(key): /kʰáz.ma/ → /ˈxaz.ma/ → /ˈxaz.ma/ (5^th BC Attic) IPA^(key): /kʰáz.ma/ (1^st BC Egyptian) IPA^(key): /ˈkʰaz.ma/ (4^th AD Koine) IPA^(key): /ˈxaz.ma/ (10^th AD Byzantine) IPA^(key): /ˈxaz.ma/ (15^th AD Constantinopolitan) IPA^(key): /ˈxaz.ma/ Mark the vowel length of the ambiguous vowels άet α by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. [This message shows only in preview mode.]
χέω ‎(khéō)	IPA^(key): /kʰé.ɔː/ → /ˈxe.o/ → /ˈçe.o/ (5^th BC Attic) IPA^(key): /kʰé.ɔː/ (1^st BC Egyptian) IPA^(key): /ˈkʰɛ.o/ (4^th AD Koine) IPA^(key): /ˈxe.o/ (10^th AD Byzantine) IPA^(key): /ˈçe.o/ (15^th AD Constantinopolitan) IPA^(key): /ˈçe.o/
ᾠδῇ ‎(ōidêi)	IPA^(key): /ɔːi̯.dɛ́ːi̯/ → /oˈði/ → /oˈði/ (5^th BC Attic) IPA^(key): /ɔːi̯.dɛ́ːi̯/ (1^st BC Egyptian) IPA^(key): /oˈde/ (4^th AD Koine) IPA^(key): /oˈði/ (10^th AD Byzantine) IPA^(key): /oˈði/ (15^th AD Constantinopolitan) IPA^(key): /oˈði/

local export = {}

local strip_accent = require('Module:grc-accent').strip_accent
-- [[Module:grc-utilities]] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local m_utils = require("Module:grc-utilities")
local m_general_utils = require("Module:utilities")
local rearrangeDiacritics = m_utils.pronunciationOrder
local m_utils_data = require("Module:grc-utilities/data")
local diacritics = m_utils_data.diacritics
local m_data = mw.loadData("Module:grc-pronunciation/data")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")
local lang = require("Module:languages").getByCode("grc")
local sc = require("Module:scripts").getByCode("polytonic")

local full_link = m_utils.link
local tag_text = m_utils.tag

local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'cla', 'koi2', 'byz2'}

local rsplit = mw.text.split
local rfind = mw.ustring.find
local usub = mw.ustring.sub
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
local U = mw.ustring.char
local function fetch(s, i)
	--[==[
	because we fetch a single character at a time so often
	out of bounds fetch gives ''
	]==]
	return usub(s, i, i)
end

--Combining diacritics are tricky.
local tie = U(0x35C)				-- tie bar
local nonsyllabic = U(0x32F)		-- combining inverted breve below
local high = U(0x341)				-- combining acute tone mark
local low = U(0x340)				-- combining grave tone mark
local rising = U(0x30C)				-- combining caron
local falling = diacritics.Latin_circum	-- combining circumflex
local midHigh = U(0x1DC4)			-- mid–high pitch
local midLow = U(0x1DC6)			-- mid–low pitch
local highMid = U(0x1DC7)			-- high–mid pitch
local voiceless = U(0x325)			-- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'

local function is(text, X)
	if not text or not X then
		return false
	end
	pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2)
	if X == "frontDiphth" or X == "Greekdiacritic" then
		pattern = "^" .. pattern .. "$"
	else
		pattern = "^[" .. pattern .. "]$"
	end
	return rfind(text, pattern)
end

local env_functions = {
	preFront = function(term, index)
		local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
		return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
	end,
	isIDiphth = function(term, index)
		local letter = fetch(term, index + 1)
		return strip_accent(letter) == 'ι' and not m_data[letter].diaer
	end,
	isUDiphth = function(term, index)
		local letter = fetch(term, index + 1)
		return strip_accent(letter) == 'υ' and not m_data[letter].diaer
	end,
	hasMacronBreve = function(term, index)
		return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve
	end,
}

local function decode(condition, x, term)
	--[==[
		"If" and "and" statements.
		Note that we're finding the last operator first, 
		which means that the first will get ultimately get decided first.
		If + ("and") or / ("or") is found, the function is called again,
		until if-statements are found.
		In if-statements:
		* A number represents the character under consideration:
			 -1 is the previous character, 0 is the current, and 1 is the next.
		* Equals sign (=) checks to see if the character under consideration
			is equal to a character.
		* Period (.) plus a word sends the module to the corresponding entry
			in the letter's data table.
		* Tilde (~) calls a function on the character under consideration,
			if the function exists.
	]==]
	if rfind(condition, '[+/]') then
		-- Find slash or plus sign preceded by something else, and followed by anything
		-- (including another sequence of slash or plus sign and something else).
		local subcondition1, sep, subcondition2 = rmatch(condition, "^([^/+]-)([/+])(.*)$")
		if not (subcondition1 or subcondition2) then
			error('Condition "' .. tostring(condition) .. '" is improperly formed')
		end
		
		if sep == '/' then		-- logical operator: or
			return decode(subcondition1, x, term) or decode(subcondition2, x, term)
		elseif sep == '+' then	-- logical operator: and
			return decode(subcondition1, x, term) and decode(subcondition2, x, term)
		end
	elseif rfind(condition, '=') then				-- check character identity
		local offset, char = unpack(rsplit(condition, "="))
		return char == fetch(term, x + offset) -- out of bounds fetch gives ''
	elseif rfind(condition, '%.') then				-- check character quality
		local offset, quality = unpack(rsplit(condition, "%."))
		local character = fetch(term, x + offset)
		return m_data[character] and m_data[character][quality] or false
	elseif rfind(condition, '~') then				-- check character(s) using function
		local offset, func = unpack(rsplit(condition, "~"))
		return env_functions[func] and env_functions[func](term, x + offset) or false
	end
end

local function check(p, x, term)
	if type(p) == 'string' or type(p) == 'number' then
		return p
	elseif type(p) == 'table' then   --This table is sequential, with a variable number of entries.
		for _, possP in ipairs(p) do
			if type(possP) == 'string' or type(possP) == 'number' then
				return possP
			elseif type(possP) == 'table' then    --This table is paired, with two values: a condition and a result.
				rawCondition, rawResult = possP[1], possP[2]
				if decode(rawCondition, x, term) then
					return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term)
				end	
			end
		end
	else
		error('"p" is of unrecongized type ' .. type(p))
	end
end

local function convert_term(term, periodstart)
	if not term then error('The variable "term" in the function "convert_term" is nil.') end
	local IPAs = {}
	local start
	local outPeriods = {}
	if periodstart and periodstart ~= "" then
		start = false
	else
		start = true
	end
	for _, period in ipairs(periods) do 
		if period == periodstart then
			start = true
		end
		if start then
			IPAs[period] = {}
			table.insert(outPeriods, period)
		end
	end
	local length, x, advance, letter, p = ulen(term), 1, 0, '', nil
	while x <= length do
		letter = fetch(term, x)
		local data = m_data[letter]
		if not data then		-- no data found
			-- explicit pass
		else
			-- check to see if a multicharacter search is warranted
			advance = data.pre and check(data.pre, x, term) or 0
			p = (advance ~= 0) and m_data[usub(term, x, x + advance)].p or data.p
			for _, period in ipairs(outPeriods) do
				table.insert(IPAs[period], check(p[period], x, term))
			end
			x = x + advance
		end
		x = x + 1
	end
	
	--Concatenate the IPAs
	for _, period in ipairs(outPeriods) do
		IPAs[period] = { IPA = table.concat(IPAs[period], '')}
	end
	
	return IPAs, outPeriods
end

local function find_syllable_break(word, nVowel, wordEnd)
	if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end
	if wordEnd then
		return ulen(word)
	elseif is(fetch(word, nVowel - 1), "liquid") then
		if is(fetch(word, nVowel - 2), "obst") then
			return nVowel - 3
		elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
			return nVowel - 4
		else
			return nVowel - 2
		end
	elseif is(fetch(word, nVowel - 1), "cons") then
		return nVowel - 2
	elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
		return nVowel - 3
	elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
		return nVowel - 3
	else
		return nVowel - 1
	end
end

local function syllabify_word(word)
	local syllables = {}
	--[[	cVowel means "current vowel", nVowel "next vowel",
			sBreak "syllable break".							]]--
	local cVowel, nVowel, sBreak, stress, wordEnd, searching
	while word ~= '' do
		cVowel, nVowel, sBreak, stress = false, false, false, false
		
		--First thing is to find the first vowel.
		searching = 1
		cVowelFound = false
		while not cVowel do
			letter = fetch(word, searching)
			local nextLetter = fetch(word, searching + 1)
			if cVowelFound then
				if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then
					cVowel = searching - 1
				elseif is(letter, "diacritic") then
					searching = searching + 1
				elseif letter == tie then
					cVowelFound = false
					searching = searching + 1
				else
					searching = searching + 1
				end
			else
				if is(letter, "vowel") then
					cVowelFound = true
				elseif letter == 'ˈ' then
					stress = true
				end
				searching = searching + 1
			end
		end
	
		--Next we try and find the next vowel or the end.
		searching = cVowel + 1
		while (not nVowel) and (not wordEnd) do
			letter = fetch(word, searching)
			if is(letter, "vowel") or letter == 'ˈ' then
				nVowel = searching
			elseif letter == '' then
				wordEnd = true
			else
				searching = searching + 1
			end
		end
		
		--Finally we find the syllable break point.
		sBreak = find_syllable_break(word, nVowel, wordEnd)
		
		--Pull everything up to and including the syllable Break.
		local syllable = usub(word, 1, sBreak)
		
		--If there is a stress accent, then we need to move it to the 
		--beginning of the syllable, unless it is a monosyllabic word,
		--in which case we remove it altogether.
		if stress then
			if next(syllables) or syllable ~= word then
				syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
			else 
				syllable = rsubn(syllable, 'ˈ', '')
			end
			stress = false
		end
		table.insert(syllables, syllable)
		word = usub(word, sBreak + 1)
	end
	
	local out = nil
	
	if #syllables > 0 then
		out = table.concat(syllables, '.')
		out = rsubn(out, '%.ˈ', 'ˈ')
	end
	return out
end

local function syllabify(IPAs, periods)
	--Syllabify
	local word_ipa = ''
	local ipa = {}
	for _, period in ipairs(periods) do
		ipa = {}
		for _, word in ipairs(rsplit(IPAs[period].IPA, ' ')) do
			word_ipa = syllabify_word(word)
			if word_ipa then
				table.insert(ipa, word_ipa)
			end
		end
		IPAs[period].IPA = table.concat(ipa, ' ')
	end
	return IPAs
end

local function make_ambig_note(ambig, ambig_letter_list)
	-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
	local ambig_note = ''
	if ambig and #ambig > 0 then
		local agr = (#ambig > 1) and { 's ', 'each one' } or { ' ', 'it' }
		
		local categories = {
			'Ancient Greek terms with incomplete pronunciation',
		}
		
		for vowel in pairs(ambig_letter_list) do
			table.insert(categories, 'Ancient Greek terms with incomplete pronunciation/' .. vowel)
		end
			
		ambig_note = '\n<p class="previewonly">Mark the vowel length of the ambiguous vowel' .. agr[1]
			.. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agr[2]
			.. ' if it is long, or a breve if it is short. By default, [[Module:grc-pronunciation]] assumes it is short if unmarked.'
			.. '<br/><small>[This message shows only in preview mode.]</small>'
			.. m_general_utils.format_categories(categories, lang)
			..'</span></p>\n'
	end
	return ambig_note
end

local function make_table(IPAs, ambig, periods, ambig_letter_list)
	--Final format
	local inlineProns = {}
	local listOfProns = {}
	local fullProns = {}
	local periods2 = {}
	
	for _, period in ipairs(periods) do
		table.insert(fullProns, '* ' .. m_a.show({'grc-' .. period}) .. ' ' ..  m_IPA.format_IPA_full(lang, {{pron = '/' .. IPAs[period].IPA .. '/'}}))
		periods2[period] = true
	end
	
	for _, period in ipairs(inlinePeriods) do
		if periods2[period] then
			local pron = '/' .. IPAs[period].IPA .. '/'
			table.insert(inlineProns, {pron = pron})
			table.insert(listOfProns, pron)
		end
	end
	
	local inlineIPAlength = ulen("IPA(key): " .. table.concat(listOfProns, ' → ') or "")
	
	local inline = '<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full(lang, inlineProns, nil, ' → ') .. '</div>'
	
	local full = '<div class="vsHide">\n' .. table.concat(fullProns, '\n') .. make_ambig_note(ambig, ambig_letter_list) .. '</div>'
	
	return '<div class="vsSwitcher vsToggleCategory-pronunciations" style="width: ' .. inlineIPAlength * 0.68 .. 'em;"><span class="vsToggleElement" style="float: right;">&nbsp;</span>' .. inline .. full .. '</div>'
end

function export.create(frame)
	local params = {
		[1] = {alias_of = 'w'},
		["w"] = {default = mw.title.getCurrentTitle().text},
		["period"] = {default = "cla"},
	}
	local args = require("Module:parameters").process(frame.getParent and frame:getParent().args or frame, params)
	
	local term = ulower(args.w)
	
	local decomposed = mw.ustring.toNFD(term)
	if rfind(decomposed, "[εοηω]" .. m_utils_data.diacritic .. "*[" .. diacritics.spacing_macron .. diacritics.spacing_breve .. diacritics.breve .. diacritics.macron .. "]") then
		error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")
	end
	
	local ambig, ambig_letter_list
	if args.period == "cla" then
		ambig, ambig_letter_list = m_utils.findAmbig(term)
	end
	term = rsubn(term, 'ς', 'σ')
	term = rsubn(term, 'ῤ', 'ρ')
	term = rearrangeDiacritics(term)
	
	local IPAs, periods = convert_term(term, args.period)
	
	IPAs = syllabify(IPAs, periods)
	
	return make_table(IPAs, ambig, periods, ambig_letter_list)
end

function export.example(frame)
	local output = { '{| class="wikitable"' }
	
	local params = {
		[1] = {}
	}
	
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local terms = mw.text.split(args[1], ",%s+")
	
	for _, term in pairs(terms) do
		local period = rmatch(term, "%(period ?= ?([^%)]+)%)") or "cla"
		local entry = rmatch(term, "([^%(]+) %(") or term or error('No term found in "' .. term .. '".') 
		local link = full_link(entry)
		local IPA = export.create{ entry, ["period"] = period }
		table.insert(output, "\n|-\n| " .. link .. " || " .. IPA)
	end
	
	table.insert(output, "\n|}")
	
	return table.concat(output)
end

return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops. 
--Proper alerts for editors, especially on ambiguous vowels.