diff --git a/Makefile b/Makefile
index 875fb8cb68..c67e87874c 100644
--- a/Makefile
+++ b/Makefile
@@ -267,6 +267,10 @@ prune_cache:
 
 clean: goodbye hdown prune prune_cache
 
+# clean tests, remove containers and volume (useful if you changed env variables, etc.)
+clean_tests:
+	${DOCKER_COMPOSE_TEST} down -v --remove-orphans
+
 #-----------#
 # Utilities #
 #-----------#
diff --git a/data/grammars/terminal_allergen_de.lark b/data/grammars/terminal_allergen_de.lark
new file mode 100644
index 0000000000..1ddd8bc83c
--- /dev/null
+++ b/data/grammars/terminal_allergen_de.lark
@@ -0,0 +1,302 @@
+// This file has been generated automatically, DO NOT EDIT!
+ALLERGEN_DE.1: /\bgluten enthaltendes getreide\b/  // "en:gluten"
+             | /\bgluten-enthaltendes-getreide\b/  // "en:gluten"
+             | /\bvollmilchschokolade[üu]berzug\b/  // "en:milk"
+             | /\bschwefeldioxid und sulfite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bmagermilchjoghurtpulver\b/  // "en:milk"
+             | /\banderen schalenfr[üu]chten\b/  // "en:nuts"
+             | /\bh[üu]hnerei-trockeneiwei(ß|ss)\b/  // "en:eggs"
+             | /\bh[üu]hnerei-eiweisspulver\b/  // "en:eggs"
+             | /\bh[üu]hnertrockeneieiweiss\b/  // "en:eggs"
+             | /\bmagermilchpulverzusatz\b/  // "en:milk"
+             | /\bvollkorn-weizenflocken\b/  // "en:gluten"
+             | /\bweitere schalenfr[üu]chte\b/  // "en:nuts"
+             | /\bh[üu]hnerei-eiwei(ß|ss)pulver\b/  // "en:eggs"
+             | /\bh[üu]hnertrockeneieiwei(ß|ss)\b/  // "en:eggs"
+             | /\bsojaproteinhydrolysat\b/  // "en:soybeans"
+             | /\bsojaeiweisskonzentrat\b/  // "en:soybeans"
+             | /\bmilcheiweisserzeugnis\b/  // "en:milk"
+             | /\bvollkorn-haferflocken\b/  // "en:gluten"
+             | /\bweizenvollkornflocken\b/  // "en:gluten"
+             | /\broggenvollkornflocken\b/  // "en:gluten"
+             | /\bdinkelvollkornflocken\b/  // "en:gluten"
+             | /\balaska-seelachs-filet\b/  // "en:fish"
+             | /\bh[üu]hnereieiwei(ß|ss)pulver\b/  // "en:eggs"
+             | /\bh[üu]hnertrockeneiweiss\b/  // "en:eggs"
+             | /\bsojaeiwei(ß|ss)konzentrat\b/  // "en:soybeans"
+             | /\bmagermilchkonzentrat\b/  // "en:milk"
+             | /\bmilcheiwei(ß|ss)erzeugnis\b/  // "en:milk"
+             | /\bhafervollkornflocken\b/  // "en:gluten"
+             | /\bvollkornhaferflocken\b/  // "en:gluten"
+             | /\broggenvollkornschrot\b/  // "en:gluten"
+             | /\bweizenvollkornschrot\b/  // "en:gluten"
+             | /\bdinkelvollkornschrot\b/  // "en:gluten"
+             | /\balaska-seelachsfilet\b/  // "en:fish"
+             | /\bh[üu]hnertrockeneiwei(ß|ss)\b/  // "en:eggs"
+             | /\bh[üu]hnereiweisspulver\b/  // "en:eggs"
+             | /\bvollmilchschokolade\b/  // "en:milk"
+             | /\bgerstenvollkornmehl\b/  // "en:gluten"
+             | /\bvollkornhaferschrot\b/  // "en:gluten"
+             | /\bh[üu]hnervolleipulver\b/  // "en:eggs"
+             | /\bh[üu]hnereigelbpulver\b/  // "en:eggs"
+             | /\bh[üu]hnereiklarpulver\b/  // "en:eggs"
+             | /\bh[üu]hnereiwei(ß|ss)pulver\b/  // "en:eggs"
+             | /\bhafer-vollkornmehl\b/  // "en:gluten"
+             | /\bvollkorn-hafermehl\b/  // "en:gluten"
+             | /\bvollkornweizenmehl\b/  // "en:gluten"
+             | /\bweizenvollkornmehl\b/  // "en:gluten"
+             | /\bgerstenmalzextrakt\b/  // "en:gluten"
+             | /\broggenvollkornmehl\b/  // "en:gluten"
+             | /\bdinkelvollkornmehl\b/  // "en:gluten"
+             | /\bweizenrostmalzmehl\b/  // "en:gluten"
+             | /\bsojaproteinisolat\b/  // "en:soybeans"
+             | /\bbuttermilchpulver\b/  // "en:milk"
+             | /\bmilcherzeugnissen\b/  // "en:milk"
+             | /\bmagermilchjoghurt\b/  // "en:milk"
+             | /\bkondensmagermilch\b/  // "en:milk"
+             | /\bhafervollkornmehl\b/  // "en:gluten"
+             | /\bvollkornhafermehl\b/  // "en:gluten"
+             | /\bweizenspeisekleie\b/  // "en:gluten"
+             | /\bsojasossenpulver\b/  // "en:soybeans"
+             | /\bsojaeiwei(ß|ss)isolat\b/  // "en:soybeans"
+             | /\bblauschimmelk[äa]se\b/  // "en:milk"
+             | /\bmilcherzeugnisse\b/  // "en:milk"
+             | /\bmagermilchpulver\b/  // "en:milk"
+             | /\bmolkeneiwei(ß|ss)k[äa]se\b/  // "en:milk"
+             | /\bs[üu]ssmolkenpulver\b/  // "en:milk"
+             | /\bjoghurterzeugnis\b/  // "en:milk"
+             | /\barachis hypogaea\b/  // "en:peanuts"
+             | /\bhartweizengriess\b/  // "en:gluten"
+             | /\bh[üu]hnerei-eiwei(ß|ss)\b/  // "en:eggs"
+             | /\bsojaso(ß|ss)enpulver\b/  // "en:soybeans"
+             | /\bmolkenerzeugnis\b/  // "en:milk"
+             | /\bmilchschokolade\b/  // "en:milk"
+             | /\bsauermilchquark\b/  // "en:milk"
+             | /\bsauerrahmpulver\b/  // "en:milk"
+             | /\bs[üu](ß|ss)molkenpulver\b/  // "en:milk"
+             | /\bvollmilchpulver\b/  // "en:milk"
+             | /\bs[üu]ssmolkepulver\b/  // "en:milk"
+             | /\bweizensauerteig\b/  // "en:gluten"
+             | /\bhartweizengrie(ß|ss)\b/  // "en:gluten"
+             | /\bgerstenmalzmehl\b/  // "en:gluten"
+             | /\bweizenquellmehl\b/  // "en:gluten"
+             | /\bweichweizenmehl\b/  // "en:gluten"
+             | /\bschalenfr[üu]chten\b/  // "en:nuts"
+             | /\bmandelst[üu]ckchen\b/  // "en:nuts"
+             | /\bqueenslandn[üu]sse\b/  // "en:nuts"
+             | /\bhaselnussst[üu]cke\b/  // "en:nuts"
+             | /\bstaudensellerie\b/  // "en:celery"
+             | /\bstangensellerie\b/  // "en:celery"
+             | /\bknollensellerie\b/  // "en:celery"
+             | /\bselleriebl[äa]tter\b/  // "en:celery"
+             | /\bsellerieextrakt\b/  // "en:celery"
+             | /\balaska-seelachs\b/  // "en:fish"
+             | /\bh[üu]hnereieiwei(ß|ss)\b/  // "en:eggs"
+             | /\bh[üu]hnereieigelb\b/  // "en:eggs"
+             | /\bsoja-lecithine\b/  // "en:soybeans"
+             | /\bbutterreinfett\b/  // "en:milk"
+             | /\bs[üu](ß|ss)molkepulver\b/  // "en:milk"
+             | /\bvollkorn-hafer\b/  // "en:gluten"
+             | /\bweizenmalzmehl\b/  // "en:gluten"
+             | /\bgerstenflocken\b/  // "en:gluten"
+             | /\bvollkornweizen\b/  // "en:gluten"
+             | /\bschalenfr[üu]chte\b/  // "en:nuts"
+             | /\bhaselnussmasse\b/  // "en:nuts"
+             | /\bhaselnusskerne\b/  // "en:nuts"
+             | /\bhaselnusspaste\b/  // "en:nuts"
+             | /\bmacadamian[üu]sse\b/  // "en:nuts"
+             | /\bschwefeldioxid\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bs[üu](ß|ss)lupinenmehl\b/  // "en:lupin"
+             | /\bmakrelenfilets\b/  // "en:fish"
+             | /\bsojalecithine\b/  // "en:soybeans"
+             | /\bsojaerzeugnis\b/  // "en:soybeans"
+             | /\bjoghurtpulver\b/  // "en:milk"
+             | /\bmolkeneiweiss\b/  // "en:milk"
+             | /\bmolkenprotein\b/  // "en:milk"
+             | /\bbuttereinfett\b/  // "en:milk"
+             | /\bvollkornhafer\b/  // "en:gluten"
+             | /\bweizenflocken\b/  // "en:gluten"
+             | /\broggenflocken\b/  // "en:gluten"
+             | /\bdinkelflocken\b/  // "en:gluten"
+             | /\bhaselnussmark\b/  // "en:nuts"
+             | /\bparanusskerne\b/  // "en:nuts"
+             | /\bsellerieblatt\b/  // "en:celery"
+             | /\bselleriesamen\b/  // "en:celery"
+             | /\bsesambr[öo]tchen\b/  // "en:sesame-seeds"
+             | /\bheringsfilets\b/  // "en:fish"
+             | /\bmakrelenfilet\b/  // "en:fish"
+             | /\beigelbpulver\b/  // "en:eggs"
+             | /\beiklarpulver\b/  // "en:eggs"
+             | /\bh[üu]hnereiwei(ß|ss)\b/  // "en:eggs"
+             | /\bh[üu]hnervollei\b/  // "en:eggs"
+             | /\bvolleipulver\b/  // "en:eggs"
+             | /\bh[üu]hnereigelb\b/  // "en:eggs"
+             | /\bh[üu]hnereiklar\b/  // "en:eggs"
+             | /\bsojalecithin\b/  // "en:soybeans"
+             | /\bsojabohnen[öo]l\b/  // "en:soybeans"
+             | /\bkondensmilch\b/  // "en:milk"
+             | /\bkuhvollmilch\b/  // "en:milk"
+             | /\bmolkenpulver\b/  // "en:milk"
+             | /\bsahnejoghurt\b/  // "en:milk"
+             | /\bk[äa]sereimilch\b/  // "en:milk"
+             | /\bmilchprotein\b/  // "en:milk"
+             | /\bmolkeneiwei(ß|ss)\b/  // "en:milk"
+             | /\berdnusskerne\b/  // "en:peanuts"
+             | /\bhaferflocken\b/  // "en:gluten"
+             | /\bweizengluten\b/  // "en:gluten"
+             | /\bweizenst[äa]rke\b/  // "en:gluten"
+             | /\bweizenkleber\b/  // "en:gluten"
+             | /\bweizengriess\b/  // "en:gluten"
+             | /\broggenschrot\b/  // "en:gluten"
+             | /\bmandelst[üu]cke\b/  // "en:nuts"
+             | /\bwalnusskerne\b/  // "en:nuts"
+             | /\bandere n[üu]sse\b/  // "en:nuts"
+             | /\bselleriesaft\b/  // "en:celery"
+             | /\bheringsfilet\b/  // "en:fish"
+             | /\bsojaprotein\b/  // "en:soybeans"
+             | /\bsojaflocken\b/  // "en:soybeans"
+             | /\bbuttermilch\b/  // "en:milk"
+             | /\bedamer-k[äa]se\b/  // "en:milk"
+             | /\bmilchzucker\b/  // "en:milk"
+             | /\bmilcheiwei(ß|ss)\b/  // "en:milk"
+             | /\bmilchpulver\b/  // "en:milk"
+             | /\bspeisequark\b/  // "en:milk"
+             | /\bsahnepulver\b/  // "en:milk"
+             | /\brahmjoghurt\b/  // "en:milk"
+             | /\bschlagsahne\b/  // "en:milk"
+             | /\bziegenmilch\b/  // "en:milk"
+             | /\bschafsmilch\b/  // "en:milk"
+             | /\bmolkepulver\b/  // "en:milk"
+             | /\bgerstenmehl\b/  // "en:gluten"
+             | /\bgerstenmalz\b/  // "en:gluten"
+             | /\bweizengrie(ß|ss)\b/  // "en:gluten"
+             | /\bweizenkleie\b/  // "en:gluten"
+             | /\bmandelkerne\b/  // "en:nuts"
+             | /\bhaseln[üu]ssen\b/  // "en:nuts"
+             | /\bkaschun[üu]sse\b/  // "en:nuts"
+             | /\bcashewkerne\b/  // "en:nuts"
+             | /\bcashewn[üu]sse\b/  // "en:nuts"
+             | /\bsesamk[öo]rner\b/  // "en:sesame-seeds"
+             | /\blupinenmehl\b/  // "en:lupin"
+             | /\blachsfilets\b/  // "en:fish"
+             | /\bkrebstieren\b/  // "en:crustaceans"
+             | /\bh[üu]hnereier\b/  // "en:eggs"
+             | /\bsojabohnen\b/  // "en:soybeans"
+             | /\bsojaeiwei(ß|ss)\b/  // "en:soybeans"
+             | /\bsojaschrot\b/  // "en:soybeans"
+             | /\bemmentaler\b/  // "en:milk"
+             | /\bfrischk[äa]se\b/  // "en:milk"
+             | /\bgorgonzola\b/  // "en:milk"
+             | /\bk[äa]sepulver\b/  // "en:milk"
+             | /\bmagermilch\b/  // "en:milk"
+             | /\bmagerquark\b/  // "en:milk"
+             | /\bmozzarella\b/  // "en:milk"
+             | /\bsauermilch\b/  // "en:milk"
+             | /\bbutterfett\b/  // "en:milk"
+             | /\bschafmilch\b/  // "en:milk"
+             | /\bsauermolke\b/  // "en:milk"
+             | /\bhaferkleie\b/  // "en:gluten"
+             | /\bweizenmehl\b/  // "en:gluten"
+             | /\bhartweizen\b/  // "en:gluten"
+             | /\broggenmehl\b/  // "en:gluten"
+             | /\bdinkelmehl\b/  // "en:gluten"
+             | /\bweizenmalz\b/  // "en:gluten"
+             | /\bhaferfaser\b/  // "en:gluten"
+             | /\bhaseln[üu]sse\b/  // "en:nuts"
+             | /\bpecann[üu]sse\b/  // "en:nuts"
+             | /\bsesamsamen\b/  // "en:sesame-seeds"
+             | /\bsesampaste\b/  // "en:sesame-seeds"
+             | /\bsenfk[öo]rner\b/  // "en:mustard"
+             | /\bsenfsaaten\b/  // "en:mustard"
+             | /\bsenfschrot\b/  // "en:mustard"
+             | /\blachsfilet\b/  // "en:fish"
+             | /\bkrebstiere\b/  // "en:crustaceans"
+             | /\bweichtiere\b/  // "en:molluscs"
+             | /\bsojamilch\b/  // "en:soybeans"
+             | /\bsojakerne\b/  // "en:soybeans"
+             | /\bsojakleie\b/  // "en:soybeans"
+             | /\bsojasauce\b/  // "en:soybeans"
+             | /\bsojagrie(ß|ss)\b/  // "en:soybeans"
+             | /\bsojasosse\b/  // "en:soybeans"
+             | /\bcamembert\b/  // "en:milk"
+             | /\bsauerrahm\b/  // "en:milk"
+             | /\bvollmilch\b/  // "en:milk"
+             | /\bs[üu]ssmolke\b/  // "en:milk"
+             | /\bmilchfett\b/  // "en:milk"
+             | /\berdn[üu]ssen\b/  // "en:peanuts"
+             | /\bhafermehl\b/  // "en:gluten"
+             | /\bwalnuss[öo]l\b/  // "en:nuts"
+             | /\bparan[üu]sse\b/  // "en:nuts"
+             | /\bpistazien\b/  // "en:nuts"
+             | /\bhaselnuss\b/  // "en:nuts"
+             | /\berdnuss[öo]l\b/  // "en:nuts"
+             | /\bsesamkorn\b/  // "en:sesame-seeds"
+             | /\bsenfsamen\b/  // "en:mustard"
+             | /\bthunfisch\b/  // "en:fish"
+             | /\beipulver\b/  // "en:eggs"
+             | /\bh[üu]hnerei\b/  // "en:eggs"
+             | /\bfrischei\b/  // "en:eggs"
+             | /\bsojafett\b/  // "en:soybeans"
+             | /\bsojamehl\b/  // "en:soybeans"
+             | /\bsojaso(ß|ss)e\b/  // "en:soybeans"
+             | /\bbergk[äa]se\b/  // "en:milk"
+             | /\bhartk[äa]se\b/  // "en:milk"
+             | /\bkuhmilch\b/  // "en:milk"
+             | /\brohmilch\b/  // "en:milk"
+             | /\bpecorino\b/  // "en:milk"
+             | /\bs[üu](ß|ss)molke\b/  // "en:milk"
+             | /\berdn[üu]sse\b/  // "en:peanuts"
+             | /\bmalzmehl\b/  // "en:gluten"
+             | /\bwaln[üu]sse\b/  // "en:nuts"
+             | /\bsulphite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bsellerie\b/  // "en:celery"
+             | /\bsenfmehl\b/  // "en:mustard"
+             | /\bsenfsaat\b/  // "en:mustard"
+             | /\bsenfkorn\b/  // "en:mustard"
+             | /\beiweiss\b/  // "en:eggs"
+             | /\bcheddar\b/  // "en:milk"
+             | /\bjoghurt\b/  // "en:milk"
+             | /\blaktose\b/  // "en:milk"
+             | /\bricotta\b/  // "en:milk"
+             | /\berdnuss\b/  // "en:peanuts"
+             | /\bgersten\b/  // "en:gluten"
+             | /\bmandeln\b/  // "en:nuts"
+             | /\bsulfite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bsesam[öo]l\b/  // "en:sesame-seeds"
+             | /\blupinen\b/  // "en:lupin"
+             | /\bmakrele\b/  // "en:fish"
+             | /\bfisch[öo]l\b/  // "en:fish"
+             | /\beigelb\b/  // "en:eggs"
+             | /\beiklar\b/  // "en:eggs"
+             | /\bvollei\b/  // "en:eggs"
+             | /\beiwei(ß|ss)\b/  // "en:eggs"
+             | /\bsoja[öo]l\b/  // "en:soybeans"
+             | /\bbutter\b/  // "en:milk"
+             | /\bedamer\b/  // "en:milk"
+             | /\bgluten\b/  // "en:gluten"
+             | /\bweizen\b/  // "en:gluten"
+             | /\broggen\b/  // "en:gluten"
+             | /\bgerste\b/  // "en:gluten"
+             | /\bdinkel\b/  // "en:gluten"
+             | /\bmandel\b/  // "en:nuts"
+             | /\bfische\b/  // "en:fish"
+             | /\bhering\b/  // "en:fish"
+             | /\beiern\b/  // "en:eggs"
+             | /\bmilch\b/  // "en:milk"
+             | /\bgouda\b/  // "en:milk"
+             | /\bquark\b/  // "en:milk"
+             | /\bsahne\b/  // "en:milk"
+             | /\bmolke\b/  // "en:milk"
+             | /\bhafer\b/  // "en:gluten"
+             | /\bkamut\b/  // "en:gluten"
+             | /\bn[üu]sse\b/  // "en:nuts"
+             | /\bsesam\b/  // "en:sesame-seeds"
+             | /\bfisch\b/  // "en:fish"
+             | /\blachs\b/  // "en:fish"
+             | /\beier\b/  // "en:eggs"
+             | /\bsoja\b/  // "en:soybeans"
+             | /\bk[äa]se\b/  // "en:milk"
+             | /\brahm\b/  // "en:milk"
+             | /\bsenf\b/  // "en:mustard"
+             | /\bei\b/  // "en:eggs"
+
diff --git a/data/grammars/terminal_allergen_en.lark b/data/grammars/terminal_allergen_en.lark
new file mode 100644
index 0000000000..15124c43a0
--- /dev/null
+++ b/data/grammars/terminal_allergen_en.lark
@@ -0,0 +1,126 @@
+// This file has been generated automatically, DO NOT EDIT!
+ALLERGEN_EN.1: /\bother cereals containing gluten\b/  // "en:gluten"
+             | /\bsulphur dioxide and sulphites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bcereals containing gluten\b/  // "en:gluten"
+             | /\bmilk chocolate coating\b/  // "en:milk"
+             | /\bmalted barley extract\b/  // "en:gluten"
+             | /\bsoy protein isolate\b/  // "en:soybeans"
+             | /\bparmigiano reggiano\b/  // "en:milk"
+             | /\bmalted barley flour\b/  // "en:gluten"
+             | /\bbarley malt flour\b/  // "en:gluten"
+             | /\barachis hypogaea\b/  // "en:peanuts"
+             | /\bqueensland nuts\b/  // "en:nuts"
+             | /\bsulphur dioxide\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bsoya lecithins\b/  // "en:soybeans"
+             | /\bsoy lecithines\b/  // "en:soybeans"
+             | /\bwheat semolina\b/  // "en:gluten"
+             | /\bpistachio nuts\b/  // "en:nuts"
+             | /\bmacadamia nuts\b/  // "en:nuts"
+             | /\bsoy lecithins\b/  // "en:soybeans"
+             | /\bsoya lecithin\b/  // "en:soybeans"
+             | /\bsoya products\b/  // "en:soybeans"
+             | /\bmalted barley\b/  // "en:gluten"
+             | /\bsoy lecithin\b/  // "en:soybeans"
+             | /\bgrana padano\b/  // "en:milk"
+             | /\bmilk protein\b/  // "en:milk"
+             | /\bsesame seeds\b/  // "en:sesame-seeds"
+             | /\bmilk powder\b/  // "en:milk"
+             | /\bwheat flour\b/  // "en:gluten"
+             | /\bbrazil nuts\b/  // "en:nuts"
+             | /\bcrustaceans\b/  // "en:crustaceans"
+             | /\begg whites\b/  // "en:eggs"
+             | /\bwhole eggs\b/  // "en:eggs"
+             | /\bsoya flour\b/  // "en:soybeans"
+             | /\bbuttermilk\b/  // "en:milk"
+             | /\bspeltflour\b/  // "en:gluten"
+             | /\bwheatflour\b/  // "en:gluten"
+             | /\bpecan nuts\b/  // "en:nuts"
+             | /\bother nuts\b/  // "en:nuts"
+             | /\bcuttlefish\b/  // "en:molluscs"
+             | /\begg white\b/  // "en:eggs"
+             | /\begg yolks\b/  // "en:eggs"
+             | /\bwhole egg\b/  // "en:eggs"
+             | /\bsoya bean\b/  // "en:soybeans"
+             | /\bsoy flour\b/  // "en:soybeans"
+             | /\brye flour\b/  // "en:gluten"
+             | /\boat fiber\b/  // "en:gluten"
+             | /\bhazelnuts\b/  // "en:nuts"
+             | /\bpistachio\b/  // "en:nuts"
+             | /\bmacadamia\b/  // "en:nuts"
+             | /\btree nuts\b/  // "en:nuts"
+             | /\bsulphites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bbarn egg\b/  // "en:eggs"
+             | /\begg yolk\b/  // "en:eggs"
+             | /\bsoybeans\b/  // "en:soybeans"
+             | /\btreenuts\b/  // "en:nuts"
+             | /\bsulfites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bceleriac\b/  // "en:celery"
+             | /\bbrassica\b/  // "en:mustard"
+             | /\bmackerel\b/  // "en:fish"
+             | /\bflounder\b/  // "en:fish"
+             | /\bsardines\b/  // "en:fish"
+             | /\bcrayfish\b/  // "en:crustaceans"
+             | /\bmolluscs\b/  // "en:molluscs"
+             | /\bmollusks\b/  // "en:molluscs"
+             | /\bscallops\b/  // "en:molluscs"
+             | /\blactose\b/  // "en:milk"
+             | /\byoghurt\b/  // "en:milk"
+             | /\bpeanuts\b/  // "en:peanuts"
+             | /\balmonds\b/  // "en:nuts"
+             | /\bwalnuts\b/  // "en:nuts"
+             | /\bcashews\b/  // "en:nuts"
+             | /\bmustard\b/  // "en:mustard"
+             | /\bhalibut\b/  // "en:fish"
+             | /\bhaddock\b/  // "en:fish"
+             | /\bsardine\b/  // "en:fish"
+             | /\blobster\b/  // "en:crustaceans"
+             | /\bmollusc\b/  // "en:molluscs"
+             | /\bmollusk\b/  // "en:molluscs"
+             | /\boysters\b/  // "en:molluscs"
+             | /\bmussels\b/  // "en:molluscs"
+             | /\bscallop\b/  // "en:molluscs"
+             | /\bbutter\b/  // "en:milk"
+             | /\byogurt\b/  // "en:milk"
+             | /\bcheese\b/  // "en:milk"
+             | /\bpeanut\b/  // "en:peanuts"
+             | /\bgluten\b/  // "en:gluten"
+             | /\bbarley\b/  // "en:gluten"
+             | /\bcashew\b/  // "en:nuts"
+             | /\bcelery\b/  // "en:celery"
+             | /\bsesame\b/  // "en:sesame-seeds"
+             | /\blupine\b/  // "en:lupin"
+             | /\bfishes\b/  // "en:fish"
+             | /\bturbot\b/  // "en:fish"
+             | /\bsalmon\b/  // "en:fish"
+             | /\bshrimp\b/  // "en:crustaceans"
+             | /\boyster\b/  // "en:molluscs"
+             | /\bmussel\b/  // "en:molluscs"
+             | /\bdairy\b/  // "en:milk"
+             | /\bcream\b/  // "en:milk"
+             | /\bkamut\b/  // "en:gluten"
+             | /\bspelt\b/  // "en:gluten"
+             | /\bwheat\b/  // "en:gluten"
+             | /\bpecan\b/  // "en:nuts"
+             | /\blupin\b/  // "en:lupin"
+             | /\btrout\b/  // "en:fish"
+             | /\bprawn\b/  // "en:crustaceans"
+             | /\bsquid\b/  // "en:molluscs"
+             | /\bclams\b/  // "en:molluscs"
+             | /\beggs\b/  // "en:eggs"
+             | /\bsoya\b/  // "en:soybeans"
+             | /\bsoja\b/  // "en:soybeans"
+             | /\bsoia\b/  // "en:soybeans"
+             | /\bmilk\b/  // "en:milk"
+             | /\bwhey\b/  // "en:milk"
+             | /\boats\b/  // "en:gluten"
+             | /\bnuts\b/  // "en:nuts"
+             | /\bfish\b/  // "en:fish"
+             | /\bsole\b/  // "en:fish"
+             | /\btuna\b/  // "en:fish"
+             | /\bcrab\b/  // "en:crustaceans"
+             | /\bclam\b/  // "en:molluscs"
+             | /\begg\b/  // "en:eggs"
+             | /\bsoy\b/  // "en:soybeans"
+             | /\brye\b/  // "en:gluten"
+             | /\bcod\b/  // "en:fish"
+
diff --git a/data/grammars/terminal_allergen_en_map.json b/data/grammars/terminal_allergen_en_map.json
new file mode 100644
index 0000000000..1fa265fc04
--- /dev/null
+++ b/data/grammars/terminal_allergen_en_map.json
@@ -0,0 +1 @@
+{"other cereals containing gluten":["en:gluten"],"sulphur dioxide and sulphites":["en:sulphur-dioxide-and-sulphites"],"cereals containing gluten":["en:gluten"],"milk chocolate coating":["en:milk"],"malted barley extract":["en:gluten"],"soy protein isolate":["en:soybeans"],"parmigiano reggiano":["en:milk"],"malted barley flour":["en:gluten"],"barley malt flour":["en:gluten"],"arachis hypogaea":["en:peanuts"],"queensland nuts":["en:nuts"],"sulphur dioxide":["en:sulphur-dioxide-and-sulphites"],"soya lecithins":["en:soybeans"],"soy lecithines":["en:soybeans"],"wheat semolina":["en:gluten"],"pistachio nuts":["en:nuts"],"macadamia nuts":["en:nuts"],"soy lecithins":["en:soybeans"],"soya lecithin":["en:soybeans"],"soya products":["en:soybeans"],"malted barley":["en:gluten"],"soy lecithin":["en:soybeans"],"grana padano":["en:milk"],"milk protein":["en:milk"],"sesame seeds":["en:sesame-seeds"],"milk powder":["en:milk"],"wheat flour":["en:gluten"],"brazil nuts":["en:nuts"],"crustaceans":["en:crustaceans"],"egg whites":["en:eggs"],"whole eggs":["en:eggs"],"soya flour":["en:soybeans"],"buttermilk":["en:milk"],"speltflour":["en:gluten"],"wheatflour":["en:gluten"],"pecan nuts":["en:nuts"],"other nuts":["en:nuts"],"cuttlefish":["en:molluscs"],"egg white":["en:eggs"],"egg yolks":["en:eggs"],"whole egg":["en:eggs"],"soya bean":["en:soybeans"],"soy flour":["en:soybeans"],"rye flour":["en:gluten"],"oat fiber":["en:gluten"],"hazelnuts":["en:nuts"],"pistachio":["en:nuts"],"macadamia":["en:nuts"],"tree nuts":["en:nuts"],"sulphites":["en:sulphur-dioxide-and-sulphites"],"barn egg":["en:eggs"],"egg yolk":["en:eggs"],"soybeans":["en:soybeans"],"treenuts":["en:nuts"],"sulfites":["en:sulphur-dioxide-and-sulphites"],"celeriac":["en:celery"],"brassica":["en:mustard"],"mackerel":["en:fish"],"flounder":["en:fish"],"sardines":["en:fish"],"crayfish":["en:crustaceans"],"molluscs":["en:molluscs"],"mollusks":["en:molluscs"],"scallops":["en:molluscs"],"lactose":["en:milk"],"yoghurt":["en:milk"],"peanuts":["en:peanuts"],"almonds":["en:nuts"],"walnuts":["en:nuts"],"cashews":["en:nuts"],"mustard":["en:mustard"],"halibut":["en:fish"],"haddock":["en:fish"],"sardine":["en:fish"],"lobster":["en:crustaceans"],"mollusc":["en:molluscs"],"mollusk":["en:molluscs"],"oysters":["en:molluscs"],"mussels":["en:molluscs"],"scallop":["en:molluscs"],"butter":["en:milk"],"yogurt":["en:milk"],"cheese":["en:milk"],"peanut":["en:peanuts"],"gluten":["en:gluten"],"barley":["en:gluten"],"cashew":["en:nuts"],"celery":["en:celery"],"sesame":["en:sesame-seeds"],"lupine":["en:lupin"],"fishes":["en:fish"],"turbot":["en:fish"],"salmon":["en:fish"],"shrimp":["en:crustaceans"],"oyster":["en:molluscs"],"mussel":["en:molluscs"],"dairy":["en:milk"],"cream":["en:milk"],"kamut":["en:gluten"],"spelt":["en:gluten"],"wheat":["en:gluten"],"pecan":["en:nuts"],"lupin":["en:lupin"],"trout":["en:fish"],"prawn":["en:crustaceans"],"squid":["en:molluscs"],"clams":["en:molluscs"],"eggs":["en:eggs"],"soya":["en:soybeans"],"soja":["en:soybeans"],"soia":["en:soybeans"],"milk":["en:milk"],"whey":["en:milk"],"oats":["en:gluten"],"nuts":["en:nuts"],"fish":["en:fish"],"sole":["en:fish"],"tuna":["en:fish"],"crab":["en:crustaceans"],"clam":["en:molluscs"],"egg":["en:eggs"],"soy":["en:soybeans"],"rye":["en:gluten"],"cod":["en:fish"]}
\ No newline at end of file
diff --git a/data/grammars/terminal_allergen_es.lark b/data/grammars/terminal_allergen_es.lark
new file mode 100644
index 0000000000..febad6d40b
--- /dev/null
+++ b/data/grammars/terminal_allergen_es.lark
@@ -0,0 +1,106 @@
+// This file has been generated automatically, DO NOT EDIT!
+ALLERGEN_ES.1: /\bcereales que contienen gluten\b/  // "en:gluten"
+             | /\botros frutos secos de c[áa]scara\b/  // "en:nuts"
+             | /\bfrutos de c[áa]scara y derivados\b/  // "en:nuts"
+             | /\bproductos derivados de huevo\b/  // "en:eggs"
+             | /\bdi[óo]xido de azufre y sulfitos\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bharina de trigo integral\b/  // "en:gluten"
+             | /\bfrutos secos de c[áa]scara\b/  // "en:nuts"
+             | /\botros frutos de c[áa]scara\b/  // "en:nuts"
+             | /\btrigo de joras[áa]n kamut\b/  // "en:gluten"
+             | /\bleche y sus derivados\b/  // "en:milk"
+             | /\bsemillas de ajonjol[íi]\b/  // "en:sesame-seeds"
+             | /\baceite de cacahuete\b/  // "en:peanuts"
+             | /\baceite de cacahuate\b/  // "en:peanuts"
+             | /\bcereales con gluten\b/  // "en:gluten"
+             | /\bnueces del amazonas\b/  // "en:nuts"
+             | /\bnueces de macadamia\b/  // "en:nuts"
+             | /\bnueces de australia\b/  // "en:nuts"
+             | /\bsemillas de mostaza\b/  // "en:mustard"
+             | /\bfrutos con c[áa]scara\b/  // "en:nuts"
+             | /\botros frutos secos\b/  // "en:nuts"
+             | /\bsemillas de s[ée]samo\b/  // "en:sesame-seeds"
+             | /\bgranos de ajonjol[íi]\b/  // "en:sesame-seeds"
+             | /\bderivados l[áa]cteos\b/  // "en:milk"
+             | /\bproductos l[áa]cteos\b/  // "en:milk"
+             | /\bleche y derivados\b/  // "en:milk"
+             | /\bprote[íi]na de leche\b/  // "en:milk"
+             | /\bfrutos de c[áa]scara\b/  // "en:nuts"
+             | /\bnueces de [áa]rboles\b/  // "en:nuts"
+             | /\bdi[óo]xido de azufre\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\blecitina de soja\b/  // "en:soybeans"
+             | /\blecitina de soya\b/  // "en:soybeans"
+             | /\barachis hypogaea\b/  // "en:peanuts"
+             | /\bnueces de brasil\b/  // "en:nuts"
+             | /\bnueces macadamia\b/  // "en:nuts"
+             | /\bgranos de s[ée]samo\b/  // "en:sesame-seeds"
+             | /\bharina de trigo\b/  // "en:gluten"
+             | /\bcangrejo de r[íi]o\b/  // "en:crustaceans"
+             | /\bnuez de brasil\b/  // "en:nuts"
+             | /\bhabas de soja\b/  // "en:soybeans"
+             | /\bhabas de soya\b/  // "en:soybeans"
+             | /\bfrutos secos\b/  // "en:nuts"
+             | /\botros nueces\b/  // "en:nuts"
+             | /\bcacahuetes\b/  // "en:peanuts"
+             | /\bcacahuates\b/  // "en:peanuts"
+             | /\balf[óo]ncigos\b/  // "en:nuts"
+             | /\baltramuces\b/  // "en:lupin"
+             | /\bcrust[áa]ceos\b/  // "en:crustaceans"
+             | /\blangostino\b/  // "en:crustaceans"
+             | /\bmejillones\b/  // "en:molluscs"
+             | /\bcacahuete\b/  // "en:peanuts"
+             | /\bcacahuate\b/  // "en:peanuts"
+             | /\balmendras\b/  // "en:nuts"
+             | /\bavellanas\b/  // "en:nuts"
+             | /\banacardos\b/  // "en:nuts"
+             | /\bpistachos\b/  // "en:nuts"
+             | /\bescal[óo]pas\b/  // "en:molluscs"
+             | /\bcaracoles\b/  // "en:molluscs"
+             | /\bcereales\b/  // "en:gluten"
+             | /\bcoquitos\b/  // "en:nuts"
+             | /\bpistacho\b/  // "en:nuts"
+             | /\bcastañas\b/  // "en:nuts"
+             | /\bsulfitos\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bajonjol[íi]\b/  // "en:sesame-seeds"
+             | /\baltramuz\b/  // "en:lupin"
+             | /\bcangrejo\b/  // "en:crustaceans"
+             | /\blangosta\b/  // "en:crustaceans"
+             | /\bmoluscos\b/  // "en:molluscs"
+             | /\bmejill[óo]n\b/  // "en:molluscs"
+             | /\blactosa\b/  // "en:milk"
+             | /\bl[áa]cteos\b/  // "en:milk"
+             | /\bcenteno\b/  // "en:gluten"
+             | /\bespelta\b/  // "en:gluten"
+             | /\bpacanas\b/  // "en:nuts"
+             | /\bcoquito\b/  // "en:nuts"
+             | /\bpiñones\b/  // "en:nuts"
+             | /\bcastaña\b/  // "en:nuts"
+             | /\bmostaza\b/  // "en:mustard"
+             | /\bpescado\b/  // "en:fish"
+             | /\bcamar[óo]n\b/  // "en:crustaceans"
+             | /\bmolusco\b/  // "en:molluscs"
+             | /\balmejas\b/  // "en:molluscs"
+             | /\bcalamar\b/  // "en:molluscs"
+             | /\bcaracol\b/  // "en:molluscs"
+             | /\bhuevos\b/  // "en:eggs"
+             | /\bgluten\b/  // "en:gluten"
+             | /\bcebada\b/  // "en:gluten"
+             | /\bnueces\b/  // "en:nuts"
+             | /\bs[ée]samo\b/  // "en:sesame-seeds"
+             | /\bostras\b/  // "en:molluscs"
+             | /\bvieras\b/  // "en:molluscs"
+             | /\bhuevo\b/  // "en:eggs"
+             | /\bleche\b/  // "en:milk"
+             | /\bqueso\b/  // "en:milk"
+             | /\btrigo\b/  // "en:gluten"
+             | /\bkamut\b/  // "en:gluten"
+             | /\bpiñ[óo]n\b/  // "en:nuts"
+             | /\bgamba\b/  // "en:crustaceans"
+             | /\bsepia\b/  // "en:molluscs"
+             | /\bostra\b/  // "en:molluscs"
+             | /\bviera\b/  // "en:molluscs"
+             | /\bsoja\b/  // "en:soybeans"
+             | /\bsoya\b/  // "en:soybeans"
+             | /\bnuez\b/  // "en:nuts"
+             | /\bapio\b/  // "en:celery"
+
diff --git a/data/grammars/terminal_allergen_es_map.json b/data/grammars/terminal_allergen_es_map.json
new file mode 100644
index 0000000000..a5c80ad87d
--- /dev/null
+++ b/data/grammars/terminal_allergen_es_map.json
@@ -0,0 +1 @@
+{"cereales que contienen gluten":["en:gluten"],"otros frutos secos de cáscara":["en:nuts"],"frutos de cáscara y derivados":["en:nuts"],"productos derivados de huevo":["en:eggs"],"dióxido de azufre y sulfitos":["en:sulphur-dioxide-and-sulphites"],"harina de trigo integral":["en:gluten"],"frutos secos de cáscara":["en:nuts"],"otros frutos de cáscara":["en:nuts"],"trigo de jorasán kamut":["en:gluten"],"leche y sus derivados":["en:milk"],"semillas de ajonjolí":["en:sesame-seeds"],"aceite de cacahuete":["en:peanuts"],"aceite de cacahuate":["en:peanuts"],"cereales con gluten":["en:gluten"],"nueces del amazonas":["en:nuts"],"nueces de macadamia":["en:nuts"],"nueces de australia":["en:nuts"],"semillas de mostaza":["en:mustard"],"frutos con cáscara":["en:nuts"],"otros frutos secos":["en:nuts"],"semillas de sésamo":["en:sesame-seeds"],"granos de ajonjolí":["en:sesame-seeds"],"derivados lácteos":["en:milk"],"productos lácteos":["en:milk"],"leche y derivados":["en:milk"],"proteína de leche":["en:milk"],"frutos de cáscara":["en:nuts"],"nueces de árboles":["en:nuts"],"dióxido de azufre":["en:sulphur-dioxide-and-sulphites"],"lecitina de soja":["en:soybeans"],"lecitina de soya":["en:soybeans"],"arachis hypogaea":["en:peanuts"],"nueces de brasil":["en:nuts"],"nueces macadamia":["en:nuts"],"granos de sésamo":["en:sesame-seeds"],"harina de trigo":["en:gluten"],"cangrejo de río":["en:crustaceans"],"nuez de brasil":["en:nuts"],"habas de soja":["en:soybeans"],"habas de soya":["en:soybeans"],"frutos secos":["en:nuts"],"otros nueces":["en:nuts"],"cacahuetes":["en:peanuts"],"cacahuates":["en:peanuts"],"alfóncigos":["en:nuts"],"altramuces":["en:lupin"],"crustáceos":["en:crustaceans"],"langostino":["en:crustaceans"],"mejillones":["en:molluscs"],"cacahuete":["en:peanuts"],"cacahuate":["en:peanuts"],"almendras":["en:nuts"],"avellanas":["en:nuts"],"anacardos":["en:nuts"],"pistachos":["en:nuts"],"escalópas":["en:molluscs"],"caracoles":["en:molluscs"],"cereales":["en:gluten"],"coquitos":["en:nuts"],"pistacho":["en:nuts"],"castañas":["en:nuts"],"sulfitos":["en:sulphur-dioxide-and-sulphites"],"ajonjolí":["en:sesame-seeds"],"altramuz":["en:lupin"],"cangrejo":["en:crustaceans"],"langosta":["en:crustaceans"],"moluscos":["en:molluscs"],"mejillón":["en:molluscs"],"lactosa":["en:milk"],"lácteos":["en:milk"],"centeno":["en:gluten"],"espelta":["en:gluten"],"pacanas":["en:nuts"],"coquito":["en:nuts"],"piñones":["en:nuts"],"castaña":["en:nuts"],"mostaza":["en:mustard"],"pescado":["en:fish"],"camarón":["en:crustaceans"],"molusco":["en:molluscs"],"almejas":["en:molluscs"],"calamar":["en:molluscs"],"caracol":["en:molluscs"],"huevos":["en:eggs"],"gluten":["en:gluten"],"cebada":["en:gluten"],"nueces":["en:nuts"],"sésamo":["en:sesame-seeds"],"ostras":["en:molluscs"],"vieras":["en:molluscs"],"huevo":["en:eggs"],"leche":["en:milk"],"queso":["en:milk"],"trigo":["en:gluten"],"kamut":["en:gluten"],"piñón":["en:nuts"],"gamba":["en:crustaceans"],"sepia":["en:molluscs"],"ostra":["en:molluscs"],"viera":["en:molluscs"],"soja":["en:soybeans"],"soya":["en:soybeans"],"nuez":["en:nuts"],"apio":["en:celery"]}
\ No newline at end of file
diff --git a/data/grammars/terminal_allergen_fr.lark b/data/grammars/terminal_allergen_fr.lark
new file mode 100644
index 0000000000..db8b12d9b6
--- /dev/null
+++ b/data/grammars/terminal_allergen_fr.lark
@@ -0,0 +1,212 @@
+// This file has been generated automatically, DO NOT EDIT!
+ALLERGEN_FR.1: /\blaits et d[ée]riv[ée]s y compris lactose\b/  // "en:milk"
+             | /\banhydride sulfureux et sulfites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bnoix de coquilles saint-jacques\b/  // "en:molluscs"
+             | /\bproduits laitiers et d[ée]riv[ée]es\b/  // "en:milk"
+             | /\bproduits laitiers et d[ée]riv[ée]s\b/  // "en:milk"
+             | /\bc[ée]r[ée]ales contenant du gluten\b/  // "en:gluten"
+             | /\bnoix de coquilles st-jacques\b/  // "en:molluscs"
+             | /\bcoquilles? saint jacques\b/  // "en:molluscs"
+             | /\bautres? fruits? à coque\b/  // "en:nuts"
+             | /\bnoix de saint-jacques\b/  // "en:molluscs"
+             | /\bprot[ée]ines laiti[èe]res\b/  // "en:milk"
+             | /\bfruits? secs? à coque\b/  // "en:nuts"
+             | /\bfruits? à coque dure\b/  // "en:nuts"
+             | /\banhydride sulfureux\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bgraines? de moutarde\b/  // "en:mustard"
+             | /\bcoquilles? st jacques\b/  // "en:molluscs"
+             | /\bferments lactiques\b/  // "en:milk"
+             | /\bnoix du queensland\b/  // "en:nuts"
+             | /\bautres? fruits? secs?\b/  // "en:nuts"
+             | /\bnoix de st-jacques\b/  // "en:molluscs"
+             | /\bl[ée]cithines? de soja\b/  // "en:soybeans"
+             | /\bfromages? de ch[èe]vre\b/  // "en:milk"
+             | /\bprot[ée]ines? de lait\b/  // "en:milk"
+             | /\bproduits? laitiers?\b/  // "en:milk"
+             | /\bfarine de froment\b/  // "en:gluten"
+             | /\bfarine d\'[ée]peautre\b/  // "en:gluten"
+             | /\bnoix de macadamia\b/  // "en:nuts"
+             | /\bgraines? de s[ée]same\b/  // "en:sesame-seeds"
+             | /\bbeurre patissier\b/  // "en:milk"
+             | /\bfromage de vache\b/  // "en:milk"
+             | /\bd[ée]riv[ée]s laitiers\b/  // "en:milk"
+             | /\bbeurre concentr[ée]\b/  // "en:milk"
+             | /\blait demi-[ée]cr[ée]m[ée]\b/  // "en:milk"
+             | /\barachis hypogaea\b/  // "en:peanuts"
+             | /\bgraines? de s[ée]same\b/  // "en:sesame-seeds"
+             | /\bgraines? de soja\b/  // "en:soybeans"
+             | /\bpoudres? de lait\b/  // "en:milk"
+             | /\blait de ch[èe]vre\b/  // "en:milk"
+             | /\blait de brebis\b/  // "en:milk"
+             | /\bpetit [ée]peautre\b/  // "en:gluten"
+             | /\bgrand [ée]peautre\b/  // "en:gluten"
+             | /\bfruits? à coque\b/  // "en:nuts"
+             | /\bnoix du br[ée]sil\b/  // "en:nuts"
+             | /\bmetabisulphite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bcolin d\'alaska\b/  // "en:fish"
+             | /\bcr[èe]me de lait\b/  // "en:milk"
+             | /\bcr[èe]me fraiche\b/  // "en:milk"
+             | /\bfromage blanc\b/  // "en:milk"
+             | /\bfromage fondu\b/  // "en:milk"
+             | /\bfromage frais\b/  // "en:milk"
+             | /\blait de vache\b/  // "en:milk"
+             | /\bgluten de bl[ée]\b/  // "en:gluten"
+             | /\bfarine de bl[ée]\b/  // "en:gluten"
+             | /\bnoix de cajou\b/  // "en:nuts"
+             | /\bnoix de p[ée]can\b/  // "en:nuts"
+             | /\bmetabisulfite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bsaint-jacques\b/  // "en:molluscs"
+             | /\bblanc d\'oeuf\b/  // "en:eggs"
+             | /\bjaune d\'oeuf\b/  // "en:eggs"
+             | /\bblanc d\'œufs\b/  // "en:eggs"
+             | /\bjaune d\'œufs\b/  // "en:eggs"
+             | /\bfibres? de bl[ée]\b/  // "en:gluten"
+             | /\bblanc d\'œuf\b/  // "en:eggs"
+             | /\bjaune d\'œuf\b/  // "en:eggs"
+             | /\boeufs frais\b/  // "en:eggs"
+             | /\bson de soja\b/  // "en:soybeans"
+             | /\blait [ée]cr[ée]m[ée]\b/  // "en:milk"
+             | /\blait entier\b/  // "en:milk"
+             | /\bcacahou[èe]tes\b/  // "en:peanuts"
+             | /\bbl[ée] complet\b/  // "en:gluten"
+             | /\bmalt d\'orge\b/  // "en:gluten"
+             | /\bfruits? secs?\b/  // "en:nuts"
+             | /\bc[ée]leri-rave\b/  // "en:celery"
+             | /\bsaumon fum[ée]\b/  // "en:fish"
+             | /\blangoustine\b/  // "en:crustaceans"
+             | /\boeuf frais\b/  // "en:eggs"
+             | /\bœufs frais\b/  // "en:eggs"
+             | /\bbas-beurre\b/  // "en:milk"
+             | /\bpetit-lait\b/  // "en:milk"
+             | /\blactoserum\b/  // "en:milk"
+             | /\blactos[ée]rum\b/  // "en:milk"
+             | /\bmozzarella\b/  // "en:milk"
+             | /\bmascarpone\b/  // "en:milk"
+             | /\bgorgonzola\b/  // "en:milk"
+             | /\blait frais\b/  // "en:milk"
+             | /\bcacahu[èe]tes\b/  // "en:peanuts"
+             | /\bcacahou[èe]te\b/  // "en:peanuts"
+             | /\bson de bl[ée]\b/  // "en:gluten"
+             | /\bdisulfites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\b[ée]crevisses\b/  // "en:crustaceans"
+             | /\bmollusques\b/  // "en:molluscs"
+             | /\bst-jacques\b/  // "en:molluscs"
+             | /\bœuf frais\b/  // "en:eggs"
+             | /\blactiques\b/  // "en:milk"
+             | /\breblochon\b/  // "en:milk"
+             | /\bmimolette\b/  // "en:milk"
+             | /\bcas[ée]inate\b/  // "en:milk"
+             | /\broquefort\b/  // "en:milk"
+             | /\barachides\b/  // "en:peanuts"
+             | /\bcacahu[èe]te\b/  // "en:peanuts"
+             | /\bnoisettes?\b/  // "en:nuts"
+             | /\bpistaches\b/  // "en:nuts"
+             | /\bsulphites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bbisulfite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bcabillaud\b/  // "en:fish"
+             | /\bmaquereau\b/  // "en:fish"
+             | /\bcrustac[ée]s\b/  // "en:crustaceans"
+             | /\blangouste\b/  // "en:crustaceans"
+             | /\bcrevettes\b/  // "en:crustaceans"
+             | /\b[ée]crevisse\b/  // "en:crustaceans"
+             | /\bmollusque\b/  // "en:molluscs"
+             | /\bescargots\b/  // "en:molluscs"
+             | /\bp[ée]toncles\b/  // "en:molluscs"
+             | /\blaitiere\b/  // "en:milk"
+             | /\blaitiers\b/  // "en:milk"
+             | /\bbabeurre\b/  // "en:milk"
+             | /\blait cru\b/  // "en:milk"
+             | /\bemmental\b/  // "en:milk"
+             | /\blactique\b/  // "en:milk"
+             | /\bparmesan\b/  // "en:milk"
+             | /\braclette\b/  // "en:milk"
+             | /\barachide\b/  // "en:peanuts"
+             | /\b[ée]peautre\b/  // "en:gluten"
+             | /\bboulgour\b/  // "en:gluten"
+             | /\bsulfites\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bsulphite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bmoutarde\b/  // "en:mustard"
+             | /\bpoissons\b/  // "en:fish"
+             | /\baiglefin\b/  // "en:fish"
+             | /\bsardines\b/  // "en:fish"
+             | /\bcrustac[ée]\b/  // "en:crustaceans"
+             | /\bcrevette\b/  // "en:crustaceans"
+             | /\bencornet\b/  // "en:molluscs"
+             | /\bescargot\b/  // "en:molluscs"
+             | /\blactose\b/  // "en:milk"
+             | /\blaitier\b/  // "en:milk"
+             | /\bfromage\b/  // "en:milk"
+             | /\bcheddar\b/  // "en:milk"
+             | /\bricotta\b/  // "en:milk"
+             | /\bcas[ée]ine\b/  // "en:milk"
+             | /\bgruy[èe]re\b/  // "en:milk"
+             | /\bfroment\b/  // "en:gluten"
+             | /\bbl[ée] dur\b/  // "en:gluten"
+             | /\bamandes?\b/  // "en:nuts"
+             | /\bsulfite\b/  // "en:sulphur-dioxide-and-sulphites"
+             | /\bpoisson\b/  // "en:fish"
+             | /\bhaddock\b/  // "en:fish"
+             | /\bsardine\b/  // "en:fish"
+             | /\banchois\b/  // "en:fish"
+             | /\bbrochet\b/  // "en:fish"
+             | /\bharengs\b/  // "en:fish"
+             | /\blimande\b/  // "en:fish"
+             | /\bhomards\b/  // "en:crustaceans"
+             | /\bcalamar\b/  // "en:molluscs"
+             | /\bhuitres\b/  // "en:molluscs"
+             | /\bbeurre\b/  // "en:milk"
+             | /\byaourt\b/  // "en:milk"
+             | /\bgluten\b/  // "en:gluten"
+             | /\bseigle\b/  // "en:gluten"
+             | /\bc[ée]leri\b/  // "en:celery"
+             | /\bs[ée]same\b/  // "en:sesame-seeds"
+             | /\bfl[ée]tan\b/  // "en:fish"
+             | /\bturbot\b/  // "en:fish"
+             | /\bsaumon\b/  // "en:fish"
+             | /\btruite\b/  // "en:fish"
+             | /\bhareng\b/  // "en:fish"
+             | /\bmerlan\b/  // "en:fish"
+             | /\brouget\b/  // "en:fish"
+             | /\btacaud\b/  // "en:fish"
+             | /\bcrabes\b/  // "en:crustaceans"
+             | /\bhomard\b/  // "en:crustaceans"
+             | /\bgambas\b/  // "en:crustaceans"
+             | /\bpoulpe\b/  // "en:molluscs"
+             | /\bseiche\b/  // "en:molluscs"
+             | /\bcalmar\b/  // "en:molluscs"
+             | /\bhuitre\b/  // "en:molluscs"
+             | /\bmoules\b/  // "en:molluscs"
+             | /\boeufs\b/  // "en:eggs"
+             | /\bsojas\b/  // "en:soybeans"
+             | /\btonyu\b/  // "en:soybeans"
+             | /\bcr[èe]me\b/  // "en:milk"
+             | /\bm[ée]ton\b/  // "en:milk"
+             | /\bcomte\b/  // "en:milk"
+             | /\bgouda\b/  // "en:milk"
+             | /\bkamut\b/  // "en:gluten"
+             | /\blupin\b/  // "en:lupin"
+             | /\bmorue\b/  // "en:fish"
+             | /\bcolin\b/  // "en:fish"
+             | /\bmerlu\b/  // "en:fish"
+             | /\bcrabe\b/  // "en:crustaceans"
+             | /\bmoule\b/  // "en:molluscs"
+             | /\bœufs\b/  // "en:eggs"
+             | /\boeuf\b/  // "en:eggs"
+             | /\bsoja\b/  // "en:soybeans"
+             | /\bsoya\b/  // "en:soybeans"
+             | /\bsoia\b/  // "en:soybeans"
+             | /\btofu\b/  // "en:soybeans"
+             | /\blait\b/  // "en:milk"
+             | /\bbleu\b/  // "en:milk"
+             | /\bedam\b/  // "en:milk"
+             | /\bfeta\b/  // "en:milk"
+             | /\borge\b/  // "en:gluten"
+             | /\bmalt\b/  // "en:gluten"
+             | /\bnoix\b/  // "en:nuts"
+             | /\bsole\b/  // "en:fish"
+             | /\bthon\b/  // "en:fish"
+             | /\bloup\b/  // "en:fish"
+             | /\blieu\b/  // "en:fish"
+             | /\bœuf\b/  // "en:eggs"
+             | /\bbl[ée]\b/  // "en:gluten"
+
diff --git a/data/grammars/terminal_allergen_fr_map.json b/data/grammars/terminal_allergen_fr_map.json
new file mode 100644
index 0000000000..f2be743a45
--- /dev/null
+++ b/data/grammars/terminal_allergen_fr_map.json
@@ -0,0 +1 @@
+{"laits et dérivés y compris lactose":["en:milk"],"anhydride sulfureux et sulfites":["en:sulphur-dioxide-and-sulphites"],"noix de coquilles saint-jacques":["en:molluscs"],"produits laitiers et dérivées":["en:milk"],"produits laitiers et dérivés":["en:milk"],"céréales contenant du gluten":["en:gluten"],"noix de coquilles st-jacques":["en:molluscs"],"coquille saint jacques":["en:molluscs"],"autres fruits à coque":["en:nuts"],"noix de saint-jacques":["en:molluscs"],"protéines laitières":["en:milk"],"fruits secs à coque":["en:nuts"],"fruits à coque dure":["en:nuts"],"anhydride sulfureux":["en:sulphur-dioxide-and-sulphites"],"graines de moutarde":["en:mustard"],"coquille st jacques":["en:molluscs"],"ferments lactiques":["en:milk"],"noix du queensland":["en:nuts"],"autres fruits secs":["en:nuts"],"noix de st-jacques":["en:molluscs"],"lécithine de soja":["en:soybeans"],"fromage de chèvre":["en:milk"],"protéines de lait":["en:milk"],"produits laitiers":["en:milk"],"farine de froment":["en:gluten"],"farine d'épeautre":["en:gluten"],"noix de macadamia":["en:nuts"],"graines de sésame":["en:sesame-seeds"],"beurre patissier":["en:milk"],"fromage de vache":["en:milk"],"dérivés laitiers":["en:milk"],"beurre concentré":["en:milk"],"lait demi-écrémé":["en:milk"],"arachis hypogaea":["en:peanuts"],"graine de sésame":["en:sesame-seeds"],"graines de soja":["en:soybeans"],"graine de soja":["en:soybeans"],"poudre de lait":["en:milk"],"lait de chèvre":["en:milk"],"lait de brebis":["en:milk"],"petit épeautre":["en:gluten"],"grand épeautre":["en:gluten"],"fruits à coque":["en:nuts"],"noix du brésil":["en:nuts"],"metabisulphite":["en:sulphur-dioxide-and-sulphites"],"colin d'alaska":["en:fish"],"crème de lait":["en:milk"],"crème fraiche":["en:milk"],"fromage blanc":["en:milk"],"fromage fondu":["en:milk"],"fromage frais":["en:milk"],"lait de vache":["en:milk"],"gluten de blé":["en:gluten"],"farine de blé":["en:gluten"],"noix de cajou":["en:nuts"],"noix de pécan":["en:nuts"],"metabisulfite":["en:sulphur-dioxide-and-sulphites"],"saint-jacques":["en:molluscs"],"blanc d'oeuf":["en:eggs"],"jaune d'oeuf":["en:eggs"],"blanc d'œufs":["en:eggs"],"jaune d'œufs":["en:eggs"],"fibre de blé":["en:gluten"],"blanc d'œuf":["en:eggs"],"jaune d'œuf":["en:eggs"],"oeufs frais":["en:eggs"],"son de soja":["en:soybeans"],"lait écrémé":["en:milk"],"lait entier":["en:milk"],"cacahouètes":["en:peanuts"],"blé complet":["en:gluten"],"malt d'orge":["en:gluten"],"fruits secs":["en:nuts"],"céleri-rave":["en:celery"],"saumon fumé":["en:fish"],"langoustine":["en:crustaceans"],"oeuf frais":["en:eggs"],"œufs frais":["en:eggs"],"bas-beurre":["en:milk"],"petit-lait":["en:milk"],"lactoserum":["en:milk"],"lactosérum":["en:milk"],"mozzarella":["en:milk"],"mascarpone":["en:milk"],"gorgonzola":["en:milk"],"lait frais":["en:milk"],"cacahuètes":["en:peanuts"],"cacahouète":["en:peanuts"],"son de blé":["en:gluten"],"disulfites":["en:sulphur-dioxide-and-sulphites"],"écrevisses":["en:crustaceans"],"mollusques":["en:molluscs"],"st-jacques":["en:molluscs"],"œuf frais":["en:eggs"],"lactiques":["en:milk"],"reblochon":["en:milk"],"mimolette":["en:milk"],"caséinate":["en:milk"],"roquefort":["en:milk"],"arachides":["en:peanuts"],"cacahuète":["en:peanuts"],"noisettes":["en:nuts"],"pistaches":["en:nuts"],"sulphites":["en:sulphur-dioxide-and-sulphites"],"bisulfite":["en:sulphur-dioxide-and-sulphites"],"cabillaud":["en:fish"],"maquereau":["en:fish"],"crustacés":["en:crustaceans"],"langouste":["en:crustaceans"],"crevettes":["en:crustaceans"],"écrevisse":["en:crustaceans"],"mollusque":["en:molluscs"],"escargots":["en:molluscs"],"pétoncles":["en:molluscs"],"laitiere":["en:milk"],"laitiers":["en:milk"],"babeurre":["en:milk"],"lait cru":["en:milk"],"emmental":["en:milk"],"lactique":["en:milk"],"parmesan":["en:milk"],"raclette":["en:milk"],"arachide":["en:peanuts"],"épeautre":["en:gluten"],"boulgour":["en:gluten"],"sulfites":["en:sulphur-dioxide-and-sulphites"],"sulphite":["en:sulphur-dioxide-and-sulphites"],"moutarde":["en:mustard"],"poissons":["en:fish"],"aiglefin":["en:fish"],"sardines":["en:fish"],"crustacé":["en:crustaceans"],"crevette":["en:crustaceans"],"encornet":["en:molluscs"],"escargot":["en:molluscs"],"lactose":["en:milk"],"laitier":["en:milk"],"fromage":["en:milk"],"cheddar":["en:milk"],"ricotta":["en:milk"],"caséine":["en:milk"],"gruyère":["en:milk"],"froment":["en:gluten"],"blé dur":["en:gluten"],"amandes":["en:nuts"],"sulfite":["en:sulphur-dioxide-and-sulphites"],"poisson":["en:fish"],"haddock":["en:fish"],"sardine":["en:fish"],"anchois":["en:fish"],"brochet":["en:fish"],"harengs":["en:fish"],"limande":["en:fish"],"homards":["en:crustaceans"],"calamar":["en:molluscs"],"huitres":["en:molluscs"],"beurre":["en:milk"],"yaourt":["en:milk"],"gluten":["en:gluten"],"seigle":["en:gluten"],"céleri":["en:celery"],"sésame":["en:sesame-seeds"],"flétan":["en:fish"],"turbot":["en:fish"],"saumon":["en:fish"],"truite":["en:fish"],"hareng":["en:fish"],"merlan":["en:fish"],"rouget":["en:fish"],"tacaud":["en:fish"],"crabes":["en:crustaceans"],"homard":["en:crustaceans"],"gambas":["en:crustaceans"],"poulpe":["en:molluscs"],"seiche":["en:molluscs"],"calmar":["en:molluscs"],"huitre":["en:molluscs"],"moules":["en:molluscs"],"oeufs":["en:eggs"],"sojas":["en:soybeans"],"tonyu":["en:soybeans"],"crème":["en:milk"],"méton":["en:milk"],"comte":["en:milk"],"gouda":["en:milk"],"kamut":["en:gluten"],"lupin":["en:lupin"],"morue":["en:fish"],"colin":["en:fish"],"merlu":["en:fish"],"crabe":["en:crustaceans"],"moule":["en:molluscs"],"œufs":["en:eggs"],"oeuf":["en:eggs"],"soja":["en:soybeans"],"soya":["en:soybeans"],"soia":["en:soybeans"],"tofu":["en:soybeans"],"lait":["en:milk"],"bleu":["en:milk"],"edam":["en:milk"],"feta":["en:milk"],"orge":["en:gluten"],"malt":["en:gluten"],"noix":["en:nuts"],"sole":["en:fish"],"thon":["en:fish"],"loup":["en:fish"],"lieu":["en:fish"],"œuf":["en:eggs"],"blé":["en:gluten"]}
\ No newline at end of file
diff --git a/data/grammars/traces.lark b/data/grammars/traces.lark
new file mode 100644
index 0000000000..b35c2787fc
--- /dev/null
+++ b/data/grammars/traces.lark
@@ -0,0 +1,59 @@
+// match any non-whitespace word that is not detected by other rule/terminal
+// it has priority 0, lower than packaging related terminals that have priority
+// 1, so it matches words that were not detected by other terminals
+OTHER: /[^\s]+/
+
+
+// ## FR ##
+
+OF_FR.1: /\bdes?\b/
+       | /\bd\'/
+       | /\bd\b/
+       | /\bdu\b/
+
+POSSIBLE_FR.1: /\b[ée]ventuelles?\b/
+             | /\bpossibles?\b/
+
+PRODUCTED_FR.1: /\bfabriqu[ée]\b/
+              | /\bélabor[ée]\b/
+
+THAT_USES_FR.1: /\bqui utilise\b/
+             | /\butilisant\b/
+
+// Peut contenir des traces de fruits à coque, de cacahuete de sésame, de sulfites et de gluten
+manufactured_in_fr: ("produit"i WS)? PRODUCTED_FR WS "dans" WS "un" WS "atelier" WS THAT_USES_FR WS? (":" WS)? trace_list_fr
+can_contain_fr: ("peut"i WS "contenir" WS "des" WS)? "traces"i WS (POSSIBLE_FR? WS)? ("de"? WS? ":" WS?)? trace_list_fr
+can_contain_2_fr: "peut"i WS "contenir" WS? (":" WS?)? trace_list_fr
+contains_fr: "contient"i (WS "naturellement")? WS trace_list_fr
+
+trace_list_fr: (OF_FR WS?)? ALLERGEN_FR (WS? ("," WS?)? (("et" WS)? (OF_FR WS?)?)? ALLERGEN_FR)*
+traces_fr: can_contain_fr | can_contain_2_fr | contains_fr | manufactured_in_fr
+
+// ## EN ##
+
+// It may contain traces of nuts, peanuts, sesame, sulphites and gluten.
+can_contain_en: ("it"i WS)? "may" WS "contain" WS ("traces" WS "of" WS)? trace_list_en
+contain_en: "contains"i (WS "traces" WS "of")? (WS? ":")? WS? trace_list_en
+manufactured_in_en: "prepared"i WS "in" WS "premises" WS "where" WS "traces" WS "of" WS trace_list_en WS "are" WS "used"
+trace_list_en: ALLERGEN_EN (WS? ("," WS)? (("and" WS)? ("of" WS)?)? ALLERGEN_EN)*
+traces_en: can_contain_en | contain_en | manufactured_in_en
+
+// ## ES ##
+
+// PUEDE CONTENER LECHE
+can_contain_es: ("este" WS "producto" WS)? "puede" WS "contener"i (WS? ":")? WS? trace_list_es 
+// Contiene leche
+contain_es: ("este" WS "producto" WS)? "contiene"i (WS? ":")? WS? trace_list_es
+trace_list_es: ALLERGEN_ES (WS? ("," WS)? (("y" WS)?)? ALLERGEN_ES)*
+traces_es: contain_es | can_contain_es
+
+
+traces: traces_fr | traces_en | traces_fr | traces_es
+start: (traces | junk | WS)+
+// all other words
+junk: OTHER+
+
+%import common.WS
+%import .terminal_allergen_fr.ALLERGEN_FR
+%import .terminal_allergen_en.ALLERGEN_EN
+%import .terminal_allergen_es.ALLERGEN_ES
diff --git a/robotoff/models.py b/robotoff/models.py
index ff0a5fb330..2a8db5e571 100644
--- a/robotoff/models.py
+++ b/robotoff/models.py
@@ -132,7 +132,7 @@ class ProductInsight(BaseModel):
     # the annotator (or first annotator, if multiple votes were cast).
     username = peewee.TextField(index=True, null=True)
 
-    # Stores the list of counties that are associated with the product.
+    # Stores the list of countries that are associated with the product.
     # E.g. possible values are "en:united-states" or "en:france".
     countries = BinaryJSONField(null=True, index=True, default=list)
 
@@ -171,7 +171,7 @@ class ProductInsight(BaseModel):
         null=True,
         max_length=10,
         help_text="project associated with the insight, "
-        "one of 'off', 'obf', 'opff', 'opf'",
+        "one of 'off', 'obf', 'opff', 'opf', 'off-pro'",
         index=True,
     )
 
diff --git a/robotoff/off.py b/robotoff/off.py
index 5d67228ea6..b55eb93b1e 100644
--- a/robotoff/off.py
+++ b/robotoff/off.py
@@ -806,6 +806,78 @@ def send_image(
     return r
 
 
+def parse_ingredients(text: str, lang: str, timeout: int = 10) -> list[JSONType]:
+    """Parse ingredients text using Product Opener API.
+
+    It is only available for `off` flavor (food).
+
+    The result is a list of ingredients, each ingredient is a dict with the
+    following keys:
+
+    - id: the ingredient ID. Having an ID does not means that the ingredient
+        is recognized, you must check if it exists in the taxonomy.
+    - text: the ingredient text (as it appears in the input ingredients list)
+    - percent_min: the minimum percentage of the ingredient in the product
+    - percent_max: the maximum percentage of the ingredient in the product
+    - percent_estimate: the estimated percentage of the ingredient in the
+        product
+    - vegan (bool): optional key indicating if the ingredient is vegan
+    - vegetarian (bool): optional key indicating if the ingredient is
+        vegetarian
+
+
+    :param server_type: the server type (project) to use
+    :param text: the ingredients text to parse
+    :param lang: the language of the text (used for parsing) as a 2-letter code
+    :param timeout: the request timeout in seconds, defaults to 10s
+    :raises RuntimeError: a RuntimeError is raised if the parsing fails
+    :return: the list of parsed ingredients
+    """
+    base_url = settings.BaseURLProvider.world(ServerType.off)
+    # by using "test" as code, we don't save any information to database
+    # This endpoint is specifically designed for testing purposes
+    url = f"{base_url}/api/v3/product/test"
+
+    if len(text) == 0:
+        raise ValueError("text must be a non-empty string")
+
+    try:
+        r = http_session.patch(
+            url,
+            auth=settings._off_request_auth,
+            json={
+                "fields": "ingredients",
+                "lc": lang,
+                "tags_lc": lang,
+                "product": {
+                    "lang": lang,
+                    f"ingredients_text_{lang}": text,
+                },
+            },
+            timeout=timeout,
+        )
+    except (
+        requests.exceptions.ConnectionError,
+        requests.exceptions.SSLError,
+        requests.exceptions.Timeout,
+    ) as e:
+        raise RuntimeError(
+            f"Unable to parse ingredients: error during HTTP request: {e}"
+        )
+
+    if not r.ok:
+        raise RuntimeError(
+            f"Unable to parse ingredients (non-200 status code): {r.status_code}, {r.text}"
+        )
+
+    response_data = r.json()
+
+    if response_data.get("status") != "success":
+        raise RuntimeError(f"Unable to parse ingredients: {response_data}")
+
+    return response_data["product"]["ingredients"]
+
+
 def normalize_tag(value, lowercase=True):
     """Given a value normalize it to a tag (as in taxonomies).
 
diff --git a/robotoff/prediction/ingredient_list/__init__.py b/robotoff/prediction/ingredient_list/__init__.py
index 555cb470d3..0eb55789a7 100644
--- a/robotoff/prediction/ingredient_list/__init__.py
+++ b/robotoff/prediction/ingredient_list/__init__.py
@@ -9,17 +9,21 @@
 from tritonclient.grpc import service_pb2
 
 from robotoff import settings
+from robotoff.prediction.ingredient_list.postprocess import detect_additional_mentions
 from robotoff.prediction.langid import LanguagePrediction, predict_lang_batch
 from robotoff.triton import get_triton_inference_stub
 from robotoff.utils import http_session
 
-from .postprocess import AggregationStrategy, TokenClassificationPipeline
+from .transformers_pipeline import AggregationStrategy, TokenClassificationPipeline
 
 # The tokenizer assets are stored in the model directory
 INGREDIENT_NER_MODEL_DIR = settings.TRITON_MODELS_DIR / "ingredient-ner/1/model.onnx"
 
 INGREDIENT_ID2LABEL = {0: "O", 1: "B-ING", 2: "I-ING"}
 
+MODEL_NAME = "ingredient-detection"
+MODEL_VERSION = "ingredient-detection-1.0"
+
 
 @dataclasses.dataclass
 class IngredientPredictionAggregatedEntity:
@@ -27,9 +31,12 @@ class IngredientPredictionAggregatedEntity:
     start: int
     # character end index of the entity
     end: int
+    # character start index of the entity, before postprocessing (i.e.
+    # before adding organic or allergen mentions)
+    raw_end: int
     # confidence score
     score: float
-    # entity text
+    # entity text (without organic or allergen mentions)
     text: str
     # language prediction of the entity text
     lang: Optional[LanguagePrediction] = None
@@ -175,13 +182,16 @@ def predict_batch(
             agg_entities = []
             for output in pipeline_output:
                 start = int(output["start"])
-                end = int(output["end"])
+                raw_end = int(output["end"])
+                end = detect_additional_mentions(sentence, raw_end)
+                text = sentence[start:end]
                 agg_entities.append(
                     IngredientPredictionAggregatedEntity(
                         start=start,
                         end=end,
+                        raw_end=raw_end,
                         score=float(output["score"]),
-                        text=sentence[start:end],
+                        text=text,
                     ),
                 )
             if predict_lang:
diff --git a/robotoff/prediction/ingredient_list/postprocess.py b/robotoff/prediction/ingredient_list/postprocess.py
index 047c83e27d..37f8fd6988 100644
--- a/robotoff/prediction/ingredient_list/postprocess.py
+++ b/robotoff/prediction/ingredient_list/postprocess.py
@@ -1,372 +1,136 @@
-"""
-This file has been copied and adapted from
-https://github.com/huggingface/transformers/blob/v4.25.1/src/transformers/pipelines/token_classification.py
-
-The code is under Apache-2.0 license:
-https://github.com/huggingface/transformers/blob/main/LICENSE
-
-We use Triton to serve the request, but still need NER prediction
-post-processing, and HuggingFace transformers library provide this feature
-nicely using `TokenClassificationPipeline`.
-
-Most of the code was kept unchanged, the only modifications that were made
-were the following:
-
-- accept numpy array as input instead of Tensorflow/Pytorch tensors
-- remove unnecessary code (everything that is not related to post-processing)
-- `postprocess` now accepts a single sample (instead of a batched sample of
-    size 1)
-"""
-import enum
-import warnings
-from typing import List, Optional, Tuple
-
-import numpy as np
-
-
-class AggregationStrategy(enum.Enum):
-    """All the valid aggregation strategies for TokenClassificationPipeline"""
-
-    NONE = "none"
-    SIMPLE = "simple"
-    FIRST = "first"
-    AVERAGE = "average"
-    MAX = "max"
-
-
-class TokenClassificationPipeline:
-    default_input_names = "sequences"
-
-    def __init__(self, tokenizer, id2label):
-        self.tokenizer = tokenizer
-        self.id2label = id2label
-
-    def _sanitize_parameters(
-        self,
-        ignore_labels=None,
-        grouped_entities: Optional[bool] = None,
-        ignore_subwords: Optional[bool] = None,
-        aggregation_strategy: Optional[AggregationStrategy] = None,
-        offset_mapping: Optional[List[Tuple[int, int]]] = None,
-    ):
-
-        preprocess_params = {}
-        if offset_mapping is not None:
-            preprocess_params["offset_mapping"] = offset_mapping
-
-        postprocess_params = {}
-        if grouped_entities is not None or ignore_subwords is not None:
-            if grouped_entities and ignore_subwords:
-                aggregation_strategy = AggregationStrategy.FIRST
-            elif grouped_entities and not ignore_subwords:
-                aggregation_strategy = AggregationStrategy.SIMPLE
+import functools
+import re
+
+from lark import Discard, Lark, Transformer
+
+from robotoff import settings
+
+ASTERISK_SYMBOL = r"((\* ?=?|\(¹\)|\") ?)"
+FROM_ORGANIC_FARMING_FR = r"issus? de l'agriculture (biologique|bio|durable)"
+ORGANIC_MENTIONS_RE = re.compile(
+    rf"{ASTERISK_SYMBOL}?ingr[ée]dients?( agricoles?)? {FROM_ORGANIC_FARMING_FR}"
+    rf"|{ASTERISK_SYMBOL}?produits? {FROM_ORGANIC_FARMING_FR}"
+    rf"|{ASTERISK_SYMBOL}?{FROM_ORGANIC_FARMING_FR}"
+    rf"|{ASTERISK_SYMBOL}organic( farming)?"
+    rf"|{ASTERISK_SYMBOL}?aus biologischer landwirtschaft"
+    rf"|{ASTERISK_SYMBOL}?procedentes del cultivo ecol[óo]gico"
+    rf"|{ASTERISK_SYMBOL}?de cultivo ecol[óo]gico certificado"
+    rf"|{ASTERISK_SYMBOL}?ingredientes? ecol[óo]gicos?",
+    re.I,
+)
+
+
+def detect_additional_mentions(text: str, end_idx: int) -> int:
+    """Detect additional mentions that are relevant to include in the
+    ingredient list (such as organic/fair trade or allergen mentions) but
+    that are not currently detected by the model (as the model was trained
+    not to include them in the ingredient list).
+
+    :param text: the full text to process
+    :param end_idx: the end character index of the current ingredient list
+    :return: the new end index of the ingredient list, if any mention was
+        detected. Return the initial end index otherwise.
+    """
+    initial_end_idx = end_idx
+    last_updated = True
+    matched = False
+
+    while last_updated:
+        last_updated = False
+        lookup_end_idx = end_idx
+        candidate = text[lookup_end_idx:]
+
+        for char in candidate:
+            if char.isspace() or char in (".", ","):
+                lookup_end_idx += 1
             else:
-                aggregation_strategy = AggregationStrategy.NONE
-
-            if grouped_entities is not None:
-                warnings.warn(
-                    "`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to"
-                    f' `aggregation_strategy="{aggregation_strategy}"` instead.'
-                )
-            if ignore_subwords is not None:
-                warnings.warn(
-                    "`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to"
-                    f' `aggregation_strategy="{aggregation_strategy}"` instead.'
-                )
-
-        if aggregation_strategy is not None:
-            if isinstance(aggregation_strategy, str):
-                aggregation_strategy = AggregationStrategy[aggregation_strategy.upper()]
-            if (
-                aggregation_strategy
-                in {
-                    AggregationStrategy.FIRST,
-                    AggregationStrategy.MAX,
-                    AggregationStrategy.AVERAGE,
-                }
-                and not self.tokenizer.is_fast
-            ):
-                raise ValueError(
-                    "Slow tokenizers cannot handle subwords. Please set the `aggregation_strategy` option"
-                    'to `"simple"` or use a fast tokenizer.'
-                )
-            postprocess_params["aggregation_strategy"] = aggregation_strategy
-        if ignore_labels is not None:
-            postprocess_params["ignore_labels"] = ignore_labels
-        return preprocess_params, {}, postprocess_params
+                break
 
-    def postprocess(
-        self,
-        model_outputs,
-        aggregation_strategy=AggregationStrategy.NONE,
-        ignore_labels=None,
-    ):
-        if ignore_labels is None:
-            ignore_labels = ["O"]
-        logits = model_outputs["logits"]
-        sentence = model_outputs["sentence"]
-        input_ids = model_outputs["input_ids"]
-        offset_mapping = (
-            model_outputs["offset_mapping"]
-            if model_outputs["offset_mapping"] is not None
-            else None
-        )
-        special_tokens_mask = model_outputs["special_tokens_mask"]
+        candidate = text[lookup_end_idx:]
 
-        maxes = np.max(logits, axis=-1, keepdims=True)
-        shifted_exp = np.exp(logits - maxes)
-        scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+        if (match := ORGANIC_MENTIONS_RE.search(candidate)) is not None:
+            if match.start() == 0:
+                matched = True
+                last_updated = True
+                lookup_end_idx += match.end()
+                end_idx = lookup_end_idx
 
-        pre_entities = self.gather_pre_entities(
-            sentence,
-            input_ids,
-            scores,
-            offset_mapping,
-            special_tokens_mask,
-            aggregation_strategy,
-        )
-        grouped_entities = self.aggregate(pre_entities, aggregation_strategy)
-        # Filter anything that is in self.ignore_labels
-        entities = [
-            entity
-            for entity in grouped_entities
-            if entity.get("entity", None) not in ignore_labels
-            and entity.get("entity_group", None) not in ignore_labels
-        ]
-        return entities
+        if (
+            new_end_idx := detect_trace_mention(text, lookup_end_idx)
+        ) != lookup_end_idx:
+            matched = True
+            lookup_end_idx = new_end_idx
+            end_idx = new_end_idx
+            last_updated = True
 
-    def gather_pre_entities(
-        self,
-        sentence: str,
-        input_ids: np.ndarray,
-        scores: np.ndarray,
-        offset_mapping: Optional[List[Tuple[int, int]]],
-        special_tokens_mask: np.ndarray,
-        aggregation_strategy: AggregationStrategy,
-    ) -> List[dict]:
-        """Fuse various numpy arrays into dicts with all the information
-        needed for aggregation"""
-        pre_entities = []
-        for idx, token_scores in enumerate(scores):
-            # Filter special_tokens, they should only occur
-            # at the sentence boundaries since we're not encoding pairs of
-            # sentences so we don't have to keep track of those.
-            if special_tokens_mask[idx]:
-                continue
+    # If a mention was detected, return the new end index
+    if matched:
+        return end_idx
 
-            word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx]))
-            if offset_mapping is not None:
-                start_ind, end_ind = offset_mapping[idx]
-                word_ref = sentence[start_ind:end_ind]
-                if getattr(
-                    self.tokenizer._tokenizer.model, "continuing_subword_prefix", None
-                ):
-                    # This is a BPE, word aware tokenizer, there is a correct
-                    # way to fuse tokens
-                    is_subword = len(word) != len(word_ref)
-                else:
-                    # This is a fallback heuristic. This will fail most likely
-                    # on any kind of text + punctuation mixtures that will be
-                    # considered "words". Non word aware models cannot do
-                    # better than this unfortunately.
-                    if aggregation_strategy in {
-                        AggregationStrategy.FIRST,
-                        AggregationStrategy.AVERAGE,
-                        AggregationStrategy.MAX,
-                    }:
-                        warnings.warn(
-                            "Tokenizer does not support real words, using fallback heuristic",
-                            UserWarning,
-                        )
-                    is_subword = (
-                        start_ind > 0
-                        and " " not in sentence[start_ind - 1 : start_ind + 1]
-                    )
+    # If no mention was detected, reset the end index to its initial value
+    return initial_end_idx
 
-                if int(input_ids[idx]) == self.tokenizer.unk_token_id:
-                    word = word_ref
-                    is_subword = False
-            else:
-                start_ind = None
-                end_ind = None
-                is_subword = False
-
-            pre_entity = {
-                "word": word,
-                "scores": token_scores,
-                "start": start_ind,
-                "end": end_ind,
-                "index": idx,
-                "is_subword": is_subword,
-            }
-            pre_entities.append(pre_entity)
-        return pre_entities
 
-    def aggregate(
-        self, pre_entities: List[dict], aggregation_strategy: AggregationStrategy
-    ) -> List[dict]:
-        if aggregation_strategy in {
-            AggregationStrategy.NONE,
-            AggregationStrategy.SIMPLE,
-        }:
-            entities = []
-            for pre_entity in pre_entities:
-                entity_idx = pre_entity["scores"].argmax()
-                score = pre_entity["scores"][entity_idx]
-                entity = {
-                    "entity": self.id2label[entity_idx],
-                    "score": score,
-                    "index": pre_entity["index"],
-                    "word": pre_entity["word"],
-                    "start": pre_entity["start"],
-                    "end": pre_entity["end"],
-                }
-                entities.append(entity)
-        else:
-            entities = self.aggregate_words(pre_entities, aggregation_strategy)
+@functools.cache
+def load_trace_grammar() -> Lark:
+    return Lark.open(
+        str(settings.GRAMMARS_DIR / "traces.lark"),
+        start="start",
+        # include start and end positions in the parse tree
+        propagate_positions=True,
+    )
 
-        if aggregation_strategy == AggregationStrategy.NONE:
-            return entities
-        return self.group_entities(entities)
 
-    def aggregate_word(
-        self, entities: List[dict], aggregation_strategy: AggregationStrategy
-    ) -> dict:
-        word = self.tokenizer.convert_tokens_to_string(
-            [entity["word"] for entity in entities]
-        )
-        if aggregation_strategy == AggregationStrategy.FIRST:
-            scores = entities[0]["scores"]
-            idx = scores.argmax()
-            score = scores[idx]
-            entity = self.id2label[idx]
-        elif aggregation_strategy == AggregationStrategy.MAX:
-            max_entity = max(entities, key=lambda entity: entity["scores"].max())
-            scores = max_entity["scores"]
-            idx = scores.argmax()
-            score = scores[idx]
-            entity = self.id2label[idx]
-        elif aggregation_strategy == AggregationStrategy.AVERAGE:
-            scores = np.stack([entity["scores"] for entity in entities])
-            average_scores = np.nanmean(scores, axis=0)
-            entity_idx = average_scores.argmax()
-            entity = self.id2label[entity_idx]
-            score = average_scores[entity_idx]
-        else:
-            raise ValueError("Invalid aggregation_strategy")
-        new_entity = {
-            "entity": entity,
-            "score": score,
-            "word": word,
-            "start": entities[0]["start"],
-            "end": entities[-1]["end"],
-        }
-        return new_entity
+class TraceDetectionTransformer(Transformer):
+    """Transformer to detect trace mentions in the ingredient list.
 
-    def aggregate_words(
-        self, entities: List[dict], aggregation_strategy: AggregationStrategy
-    ) -> List[dict]:
-        """
-        Override tokens from a given word that disagree to force agreement on
-        word boundaries.
+    Only the start and end positions of the first item are returned,
+    as we're only interested in the end position of the first trace mention.
+    Start position is returned to make sure that the match is at the start of
+    the text.
+    """
 
-        Example: micro|soft| com|pany| B-ENT I-NAME I-ENT I-ENT will be
-        rewritten with first strategy as microsoft|
-        company| B-ENT I-ENT
-        """
-        if aggregation_strategy in {
-            AggregationStrategy.NONE,
-            AggregationStrategy.SIMPLE,
-        }:
-            raise ValueError(
-                "NONE and SIMPLE strategies are invalid for word aggregation"
-            )
+    def start(self, items: list):
+        if items:
+            return items[0]
+        return None, None
 
-        word_entities = []
-        word_group = None
-        for entity in entities:
-            if word_group is None:
-                word_group = [entity]
-            elif entity["is_subword"]:
-                word_group.append(entity)
-            else:
-                word_entities.append(
-                    self.aggregate_word(word_group, aggregation_strategy)
-                )
-                word_group = [entity]
-        # Last item
-        word_entities.append(self.aggregate_word(word_group, aggregation_strategy))  # type: ignore
-        return word_entities
+    def value(self, items: list):
+        return items
 
-    def group_sub_entities(self, entities: List[dict]) -> dict:
-        """
-        Group together the adjacent tokens with the same entity predicted.
+    def traces(self, items):
+        item = items[0]
+        return item.meta.start_pos, item.meta.end_pos
 
-        Args:
-            entities (`dict`): The entities predicted by the pipeline.
-        """
-        # Get the first entity in the entity group
-        entity = entities[0]["entity"].split("-")[-1]
-        scores = np.nanmean([entity["score"] for entity in entities])
-        tokens = [entity["word"] for entity in entities]
+    def WS(self, token):
+        return Discard
 
-        entity_group = {
-            "entity_group": entity,
-            "score": np.mean(scores),
-            "word": self.tokenizer.convert_tokens_to_string(tokens),
-            "start": entities[0]["start"],
-            "end": entities[-1]["end"],
-        }
-        return entity_group
+    def OTHER(self, token):
+        return Discard
 
-    def get_tag(self, entity_name: str) -> Tuple[str, str]:
-        if entity_name.startswith("B-"):
-            bi = "B"
-            tag = entity_name[2:]
-        elif entity_name.startswith("I-"):
-            bi = "I"
-            tag = entity_name[2:]
-        else:
-            # It's not in B-, I- format
-            # Default to I- for continuation.
-            bi = "I"
-            tag = entity_name
-        return bi, tag
+    def junk(self, items):
+        return Discard
 
-    def group_entities(self, entities: List[dict]) -> List[dict]:
-        """
-        Find and group together the adjacent tokens with the same entity
-        predicted.
 
-        Args:
-            entities (`dict`): The entities predicted by the pipeline.
-        """
+def detect_trace_mention(text: str, end_idx: int) -> int:
+    """Detect trace mentions that are relevant to include in the ingredient
+    list.
 
-        entity_groups = []
-        entity_group_disagg: list[dict] = []
+    :param text: the full text to process
+    :param end_idx: the end character index of the current ingredient list
+    :return: the new end index of the ingredient list, if any mention was
+        detected, or the initial end index otherwise
+    """
+    if not text[end_idx:]:
+        return end_idx
 
-        for entity in entities:
-            if not entity_group_disagg:
-                entity_group_disagg.append(entity)
-                continue
+    initial_end_idx = end_idx
+    grammar = load_trace_grammar()
+    t = grammar.parse(text[end_idx:].lower())
+    start_idx, end_idx_offset = TraceDetectionTransformer().transform(t)
 
-            # If the current entity is similar and adjacent to the previous
-            # entity, append it to the disaggregated entity group
-            # The split is meant to account for the "B" and "I" prefixes
-            # Shouldn't merge if both entities are B-type
-            bi, tag = self.get_tag(entity["entity"])
-            last_bi, last_tag = self.get_tag(entity_group_disagg[-1]["entity"])
-
-            if tag == last_tag and bi != "B":
-                # Modify subword type to be previous_type
-                entity_group_disagg.append(entity)
-            else:
-                # If the current entity is different from the previous entity
-                # aggregate the disaggregated entity group
-                entity_groups.append(self.group_sub_entities(entity_group_disagg))
-                entity_group_disagg = [entity]
-        if entity_group_disagg:
-            # it's the last entity, add it to the entity groups
-            entity_groups.append(self.group_sub_entities(entity_group_disagg))
+    if start_idx != 0:
+        return initial_end_idx
 
-        return entity_groups
+    end_idx += end_idx_offset
+    return end_idx
diff --git a/robotoff/prediction/ingredient_list/transformers_pipeline.py b/robotoff/prediction/ingredient_list/transformers_pipeline.py
new file mode 100644
index 0000000000..047c83e27d
--- /dev/null
+++ b/robotoff/prediction/ingredient_list/transformers_pipeline.py
@@ -0,0 +1,372 @@
+"""
+This file has been copied and adapted from
+https://github.com/huggingface/transformers/blob/v4.25.1/src/transformers/pipelines/token_classification.py
+
+The code is under Apache-2.0 license:
+https://github.com/huggingface/transformers/blob/main/LICENSE
+
+We use Triton to serve the request, but still need NER prediction
+post-processing, and HuggingFace transformers library provide this feature
+nicely using `TokenClassificationPipeline`.
+
+Most of the code was kept unchanged, the only modifications that were made
+were the following:
+
+- accept numpy array as input instead of Tensorflow/Pytorch tensors
+- remove unnecessary code (everything that is not related to post-processing)
+- `postprocess` now accepts a single sample (instead of a batched sample of
+    size 1)
+"""
+import enum
+import warnings
+from typing import List, Optional, Tuple
+
+import numpy as np
+
+
+class AggregationStrategy(enum.Enum):
+    """All the valid aggregation strategies for TokenClassificationPipeline"""
+
+    NONE = "none"
+    SIMPLE = "simple"
+    FIRST = "first"
+    AVERAGE = "average"
+    MAX = "max"
+
+
+class TokenClassificationPipeline:
+    default_input_names = "sequences"
+
+    def __init__(self, tokenizer, id2label):
+        self.tokenizer = tokenizer
+        self.id2label = id2label
+
+    def _sanitize_parameters(
+        self,
+        ignore_labels=None,
+        grouped_entities: Optional[bool] = None,
+        ignore_subwords: Optional[bool] = None,
+        aggregation_strategy: Optional[AggregationStrategy] = None,
+        offset_mapping: Optional[List[Tuple[int, int]]] = None,
+    ):
+
+        preprocess_params = {}
+        if offset_mapping is not None:
+            preprocess_params["offset_mapping"] = offset_mapping
+
+        postprocess_params = {}
+        if grouped_entities is not None or ignore_subwords is not None:
+            if grouped_entities and ignore_subwords:
+                aggregation_strategy = AggregationStrategy.FIRST
+            elif grouped_entities and not ignore_subwords:
+                aggregation_strategy = AggregationStrategy.SIMPLE
+            else:
+                aggregation_strategy = AggregationStrategy.NONE
+
+            if grouped_entities is not None:
+                warnings.warn(
+                    "`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to"
+                    f' `aggregation_strategy="{aggregation_strategy}"` instead.'
+                )
+            if ignore_subwords is not None:
+                warnings.warn(
+                    "`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to"
+                    f' `aggregation_strategy="{aggregation_strategy}"` instead.'
+                )
+
+        if aggregation_strategy is not None:
+            if isinstance(aggregation_strategy, str):
+                aggregation_strategy = AggregationStrategy[aggregation_strategy.upper()]
+            if (
+                aggregation_strategy
+                in {
+                    AggregationStrategy.FIRST,
+                    AggregationStrategy.MAX,
+                    AggregationStrategy.AVERAGE,
+                }
+                and not self.tokenizer.is_fast
+            ):
+                raise ValueError(
+                    "Slow tokenizers cannot handle subwords. Please set the `aggregation_strategy` option"
+                    'to `"simple"` or use a fast tokenizer.'
+                )
+            postprocess_params["aggregation_strategy"] = aggregation_strategy
+        if ignore_labels is not None:
+            postprocess_params["ignore_labels"] = ignore_labels
+        return preprocess_params, {}, postprocess_params
+
+    def postprocess(
+        self,
+        model_outputs,
+        aggregation_strategy=AggregationStrategy.NONE,
+        ignore_labels=None,
+    ):
+        if ignore_labels is None:
+            ignore_labels = ["O"]
+        logits = model_outputs["logits"]
+        sentence = model_outputs["sentence"]
+        input_ids = model_outputs["input_ids"]
+        offset_mapping = (
+            model_outputs["offset_mapping"]
+            if model_outputs["offset_mapping"] is not None
+            else None
+        )
+        special_tokens_mask = model_outputs["special_tokens_mask"]
+
+        maxes = np.max(logits, axis=-1, keepdims=True)
+        shifted_exp = np.exp(logits - maxes)
+        scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+
+        pre_entities = self.gather_pre_entities(
+            sentence,
+            input_ids,
+            scores,
+            offset_mapping,
+            special_tokens_mask,
+            aggregation_strategy,
+        )
+        grouped_entities = self.aggregate(pre_entities, aggregation_strategy)
+        # Filter anything that is in self.ignore_labels
+        entities = [
+            entity
+            for entity in grouped_entities
+            if entity.get("entity", None) not in ignore_labels
+            and entity.get("entity_group", None) not in ignore_labels
+        ]
+        return entities
+
+    def gather_pre_entities(
+        self,
+        sentence: str,
+        input_ids: np.ndarray,
+        scores: np.ndarray,
+        offset_mapping: Optional[List[Tuple[int, int]]],
+        special_tokens_mask: np.ndarray,
+        aggregation_strategy: AggregationStrategy,
+    ) -> List[dict]:
+        """Fuse various numpy arrays into dicts with all the information
+        needed for aggregation"""
+        pre_entities = []
+        for idx, token_scores in enumerate(scores):
+            # Filter special_tokens, they should only occur
+            # at the sentence boundaries since we're not encoding pairs of
+            # sentences so we don't have to keep track of those.
+            if special_tokens_mask[idx]:
+                continue
+
+            word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx]))
+            if offset_mapping is not None:
+                start_ind, end_ind = offset_mapping[idx]
+                word_ref = sentence[start_ind:end_ind]
+                if getattr(
+                    self.tokenizer._tokenizer.model, "continuing_subword_prefix", None
+                ):
+                    # This is a BPE, word aware tokenizer, there is a correct
+                    # way to fuse tokens
+                    is_subword = len(word) != len(word_ref)
+                else:
+                    # This is a fallback heuristic. This will fail most likely
+                    # on any kind of text + punctuation mixtures that will be
+                    # considered "words". Non word aware models cannot do
+                    # better than this unfortunately.
+                    if aggregation_strategy in {
+                        AggregationStrategy.FIRST,
+                        AggregationStrategy.AVERAGE,
+                        AggregationStrategy.MAX,
+                    }:
+                        warnings.warn(
+                            "Tokenizer does not support real words, using fallback heuristic",
+                            UserWarning,
+                        )
+                    is_subword = (
+                        start_ind > 0
+                        and " " not in sentence[start_ind - 1 : start_ind + 1]
+                    )
+
+                if int(input_ids[idx]) == self.tokenizer.unk_token_id:
+                    word = word_ref
+                    is_subword = False
+            else:
+                start_ind = None
+                end_ind = None
+                is_subword = False
+
+            pre_entity = {
+                "word": word,
+                "scores": token_scores,
+                "start": start_ind,
+                "end": end_ind,
+                "index": idx,
+                "is_subword": is_subword,
+            }
+            pre_entities.append(pre_entity)
+        return pre_entities
+
+    def aggregate(
+        self, pre_entities: List[dict], aggregation_strategy: AggregationStrategy
+    ) -> List[dict]:
+        if aggregation_strategy in {
+            AggregationStrategy.NONE,
+            AggregationStrategy.SIMPLE,
+        }:
+            entities = []
+            for pre_entity in pre_entities:
+                entity_idx = pre_entity["scores"].argmax()
+                score = pre_entity["scores"][entity_idx]
+                entity = {
+                    "entity": self.id2label[entity_idx],
+                    "score": score,
+                    "index": pre_entity["index"],
+                    "word": pre_entity["word"],
+                    "start": pre_entity["start"],
+                    "end": pre_entity["end"],
+                }
+                entities.append(entity)
+        else:
+            entities = self.aggregate_words(pre_entities, aggregation_strategy)
+
+        if aggregation_strategy == AggregationStrategy.NONE:
+            return entities
+        return self.group_entities(entities)
+
+    def aggregate_word(
+        self, entities: List[dict], aggregation_strategy: AggregationStrategy
+    ) -> dict:
+        word = self.tokenizer.convert_tokens_to_string(
+            [entity["word"] for entity in entities]
+        )
+        if aggregation_strategy == AggregationStrategy.FIRST:
+            scores = entities[0]["scores"]
+            idx = scores.argmax()
+            score = scores[idx]
+            entity = self.id2label[idx]
+        elif aggregation_strategy == AggregationStrategy.MAX:
+            max_entity = max(entities, key=lambda entity: entity["scores"].max())
+            scores = max_entity["scores"]
+            idx = scores.argmax()
+            score = scores[idx]
+            entity = self.id2label[idx]
+        elif aggregation_strategy == AggregationStrategy.AVERAGE:
+            scores = np.stack([entity["scores"] for entity in entities])
+            average_scores = np.nanmean(scores, axis=0)
+            entity_idx = average_scores.argmax()
+            entity = self.id2label[entity_idx]
+            score = average_scores[entity_idx]
+        else:
+            raise ValueError("Invalid aggregation_strategy")
+        new_entity = {
+            "entity": entity,
+            "score": score,
+            "word": word,
+            "start": entities[0]["start"],
+            "end": entities[-1]["end"],
+        }
+        return new_entity
+
+    def aggregate_words(
+        self, entities: List[dict], aggregation_strategy: AggregationStrategy
+    ) -> List[dict]:
+        """
+        Override tokens from a given word that disagree to force agreement on
+        word boundaries.
+
+        Example: micro|soft| com|pany| B-ENT I-NAME I-ENT I-ENT will be
+        rewritten with first strategy as microsoft|
+        company| B-ENT I-ENT
+        """
+        if aggregation_strategy in {
+            AggregationStrategy.NONE,
+            AggregationStrategy.SIMPLE,
+        }:
+            raise ValueError(
+                "NONE and SIMPLE strategies are invalid for word aggregation"
+            )
+
+        word_entities = []
+        word_group = None
+        for entity in entities:
+            if word_group is None:
+                word_group = [entity]
+            elif entity["is_subword"]:
+                word_group.append(entity)
+            else:
+                word_entities.append(
+                    self.aggregate_word(word_group, aggregation_strategy)
+                )
+                word_group = [entity]
+        # Last item
+        word_entities.append(self.aggregate_word(word_group, aggregation_strategy))  # type: ignore
+        return word_entities
+
+    def group_sub_entities(self, entities: List[dict]) -> dict:
+        """
+        Group together the adjacent tokens with the same entity predicted.
+
+        Args:
+            entities (`dict`): The entities predicted by the pipeline.
+        """
+        # Get the first entity in the entity group
+        entity = entities[0]["entity"].split("-")[-1]
+        scores = np.nanmean([entity["score"] for entity in entities])
+        tokens = [entity["word"] for entity in entities]
+
+        entity_group = {
+            "entity_group": entity,
+            "score": np.mean(scores),
+            "word": self.tokenizer.convert_tokens_to_string(tokens),
+            "start": entities[0]["start"],
+            "end": entities[-1]["end"],
+        }
+        return entity_group
+
+    def get_tag(self, entity_name: str) -> Tuple[str, str]:
+        if entity_name.startswith("B-"):
+            bi = "B"
+            tag = entity_name[2:]
+        elif entity_name.startswith("I-"):
+            bi = "I"
+            tag = entity_name[2:]
+        else:
+            # It's not in B-, I- format
+            # Default to I- for continuation.
+            bi = "I"
+            tag = entity_name
+        return bi, tag
+
+    def group_entities(self, entities: List[dict]) -> List[dict]:
+        """
+        Find and group together the adjacent tokens with the same entity
+        predicted.
+
+        Args:
+            entities (`dict`): The entities predicted by the pipeline.
+        """
+
+        entity_groups = []
+        entity_group_disagg: list[dict] = []
+
+        for entity in entities:
+            if not entity_group_disagg:
+                entity_group_disagg.append(entity)
+                continue
+
+            # If the current entity is similar and adjacent to the previous
+            # entity, append it to the disaggregated entity group
+            # The split is meant to account for the "B" and "I" prefixes
+            # Shouldn't merge if both entities are B-type
+            bi, tag = self.get_tag(entity["entity"])
+            last_bi, last_tag = self.get_tag(entity_group_disagg[-1]["entity"])
+
+            if tag == last_tag and bi != "B":
+                # Modify subword type to be previous_type
+                entity_group_disagg.append(entity)
+            else:
+                # If the current entity is different from the previous entity
+                # aggregate the disaggregated entity group
+                entity_groups.append(self.group_sub_entities(entity_group_disagg))
+                entity_group_disagg = [entity]
+        if entity_group_disagg:
+            # it's the last entity, add it to the entity groups
+            entity_groups.append(self.group_sub_entities(entity_group_disagg))
+
+        return entity_groups
diff --git a/robotoff/prediction/ocr/grammar.py b/robotoff/prediction/ocr/grammar.py
index 326528177c..2645d8cc9f 100644
--- a/robotoff/prediction/ocr/grammar.py
+++ b/robotoff/prediction/ocr/grammar.py
@@ -131,7 +131,7 @@ def generate_terminal_symbols_text(
     """
     ignore_ids = ignore_ids or set()
     texts = []
-    taxonomy = get_taxonomy(taxonomy_type.name, offline=True)
+    taxonomy = get_taxonomy(taxonomy_type.name, offline=False)
     seen_set: dict[str, str] = {}
 
     node_id_names = extract_taxonomy_names(
diff --git a/robotoff/settings.py b/robotoff/settings.py
index 3282b69403..809701115f 100644
--- a/robotoff/settings.py
+++ b/robotoff/settings.py
@@ -155,6 +155,8 @@ def event_api() -> str:
     + "/data/taxonomies/packaging_materials.full.json",
     "packaging_recycling": BaseURLProvider.static(ServerType.off)
     + "/data/taxonomies/packaging_recycling.full.json",
+    "allergen": BaseURLProvider.static(ServerType.off)
+    + "/data/taxonomies/allergen.full.json",
 }
 
 _off_password = os.environ.get("OFF_PASSWORD", "")
diff --git a/robotoff/taxonomy.py b/robotoff/taxonomy.py
index 24ef7b1a02..fd5e9143a9 100644
--- a/robotoff/taxonomy.py
+++ b/robotoff/taxonomy.py
@@ -40,7 +40,7 @@ def generate_category_hierarchy(
 
 
 @cachetools.cached(cache=cachetools.TTLCache(maxsize=100, ttl=12 * 60 * 60))  # 12h
-def get_taxonomy(taxonomy_type: str, offline: bool = False) -> Taxonomy:
+def get_taxonomy(taxonomy_type: TaxonomyType | str, offline: bool = False) -> Taxonomy:
     """Return the taxonomy of type `taxonomy_type`.
 
     The taxonomy is cached in memory and locally on disk. Every 12h, we check
@@ -57,8 +57,11 @@ def get_taxonomy(taxonomy_type: str, offline: bool = False) -> Taxonomy:
     if offline:
         return Taxonomy.from_path(str(settings.TAXONOMY_PATHS[taxonomy_type]))
 
+    taxonomy_type_enum = (
+        TaxonomyType[taxonomy_type] if isinstance(taxonomy_type, str) else taxonomy_type
+    )
     return _get_taxonomy(
-        TaxonomyType[taxonomy_type],
+        taxonomy_type_enum,
         force_download=False,
         cache_dir=settings.DATA_DIR / "taxonomies",
     )
diff --git a/robotoff/workers/tasks/import_image.py b/robotoff/workers/tasks/import_image.py
index 9a108ae307..f570e35b0a 100644
--- a/robotoff/workers/tasks/import_image.py
+++ b/robotoff/workers/tasks/import_image.py
@@ -1,3 +1,4 @@
+import dataclasses
 import datetime
 from pathlib import Path
 from typing import Optional
@@ -5,6 +6,7 @@
 import elasticsearch
 from elasticsearch.helpers import BulkIndexError
 from openfoodfacts import OCRResult
+from openfoodfacts.types import TaxonomyType
 from PIL import Image
 
 from robotoff import settings
@@ -32,10 +34,12 @@
     db,
     with_db,
 )
-from robotoff.off import generate_image_url, get_source_from_url
+from robotoff.off import generate_image_url, get_source_from_url, parse_ingredients
+from robotoff.prediction import ingredient_list
 from robotoff.prediction.upc_image import UPCImageType, find_image_is_upc
 from robotoff.products import get_product_store
 from robotoff.slack import NotifierFactory
+from robotoff.taxonomy import get_taxonomy
 from robotoff.triton import generate_clip_embedding
 from robotoff.types import (
     JSONType,
@@ -115,6 +119,18 @@ def run_import_image_job(product_id: ProductIdentifier, image_url: str, ocr_url:
             image_url=image_url,
             ocr_url=ocr_url,
         )
+        # Only extract ingredient lists for food products, as the model was not
+        # trained on non-food products
+        enqueue_job(
+            extract_ingredients_job,
+            get_high_queue(product_id),
+            # We add a higher timeout, as we request Product Opener to
+            # parse ingredient list, which may take a while depending on
+            # the number of ingredient list (~1s per ingredient list)
+            job_kwargs={"result_ttl": 0, "timeout": "2m"},
+            product_id=product_id,
+            ocr_url=ocr_url,
+        )
     # We make sure there are no concurrent insight processing by sending
     # the job to the same queue. The queue is selected based on the product
     # barcode. See `get_high_queue` documentation for more details.
@@ -573,3 +589,85 @@ def add_image_fingerprint_job(image_model_id: int):
         return
 
     add_image_fingerprint(image_model)
+
+
+@with_db
+def extract_ingredients_job(product_id: ProductIdentifier, ocr_url: str):
+    """Extracts ingredients using ingredient extraction model from an image
+    OCR.
+
+    :param product_id: The identifier of the product to extract ingredients
+      for.
+    :param ocr_url: The URL of the image to extract ingredients from.
+    """
+    source_image = get_source_from_url(ocr_url)
+
+    with db:
+        image_model = ImageModel.get_or_none(
+            source_image=source_image, server_type=product_id.server_type.name
+        )
+
+        if not image_model:
+            logger.info("Missing image in DB for image %s", source_image)
+            return
+
+        # Stop the job here if the image has already been processed
+        if (
+            ImagePrediction.get_or_none(
+                image=image_model, model_name=ingredient_list.MODEL_NAME
+            )
+        ) is not None:
+            return
+
+        output = ingredient_list.predict_from_ocr(ocr_url)
+        logger.warning("predict_from_ocr output: %s", output)
+        entities: list[
+            ingredient_list.IngredientPredictionAggregatedEntity
+        ] = output.entities  # type: ignore
+        # (we know it's an aggregated entity, so we can ignore the type)
+
+        image_prediction_data = dataclasses.asdict(output)
+        ingredient_taxonomy = get_taxonomy(TaxonomyType.ingredient)
+
+        for entity in image_prediction_data["entities"]:
+            # This is just an extra check, we should have lang information
+            # available
+            if entity["lang"]:
+                lang_id = entity["lang"]["lang"]
+                try:
+                    # Parse ingredients using Product Opener ingredient parser,
+                    # and add it to the entity data
+                    parsed_ingredients = parse_ingredients(entity["text"], lang_id)
+                except RuntimeError as e:
+                    logger.info(
+                        "Error while parsing ingredients, skipping "
+                        "to the next ingredient list",
+                        exc_info=e,
+                    )
+                    continue
+
+                known_ingredients_n = 0
+                ingredients_n = len(parsed_ingredients)
+                for ingredient_data in parsed_ingredients:
+                    ingredient_id = ingredient_data["id"]
+                    ingredient_data["in_taxonomy"] = (
+                        ingredient_id in ingredient_taxonomy
+                    )
+                    known_ingredients_n += int(ingredient_data["in_taxonomy"])
+
+                # We use the same terminology as Product Opener
+                entity["ingredients_n"] = ingredients_n
+                entity["known_ingredients_n"] = known_ingredients_n
+                entity["unknown_ingredients_n"] = ingredients_n - known_ingredients_n
+                entity["ingredients"] = parsed_ingredients
+
+        ImagePrediction.create(
+            image=image_model,
+            type="ner",
+            model_name=ingredient_list.MODEL_NAME,
+            model_version=ingredient_list.MODEL_VERSION,
+            data=image_prediction_data,
+            timestamp=datetime.datetime.utcnow(),
+            max_confidence=max(entity.score for entity in entities),
+        )
+        logger.info("create image prediction (ingredient detection) from %s", ocr_url)
diff --git a/tests/integration/workers/__init__.py b/tests/integration/workers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/integration/workers/tasks/__init__.py b/tests/integration/workers/tasks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/integration/workers/tasks/test_import_image.py b/tests/integration/workers/tasks/test_import_image.py
new file mode 100644
index 0000000000..aa98a0f7ed
--- /dev/null
+++ b/tests/integration/workers/tasks/test_import_image.py
@@ -0,0 +1,168 @@
+import pytest
+
+from robotoff.models import ImagePrediction
+from robotoff.prediction.ingredient_list import (
+    IngredientPredictionAggregatedEntity,
+    IngredientPredictionOutput,
+)
+from robotoff.prediction.langid import LanguagePrediction
+from robotoff.types import ProductIdentifier, ServerType
+from robotoff.workers.tasks.import_image import extract_ingredients_job
+
+from ...models_utils import ImageModelFactory, ImagePredictionFactory, clean_db
+
+
+@pytest.fixture(autouse=True)
+def _set_up_and_tear_down(peewee_db):
+    with peewee_db:
+        clean_db()
+        # Run the test case.
+    yield
+
+    with peewee_db:
+        clean_db()
+
+
+def test_extract_ingredients_job(mocker, peewee_db):
+    full_text = "Best product ever!\ningredients: water, salt, sugar."
+    entities = [
+        IngredientPredictionAggregatedEntity(
+            start=19,
+            end=51,
+            raw_end=51,
+            score=0.9,
+            text="water, salt, sugar.",
+            lang=LanguagePrediction(lang="en", confidence=0.9),
+        )
+    ]
+    parsed_ingredients = [
+        {
+            "ciqual_food_code": "18066",
+            "id": "en:water",
+            "percent_estimate": 66.6666666666667,
+            "percent_max": 100,
+            "percent_min": 33.3333333333333,
+            "text": "water",
+            "vegan": "yes",
+            "vegetarian": "yes",
+        },
+        {
+            "ciqual_food_code": "11058",
+            "id": "en:salt",
+            "percent_estimate": 16.6666666666667,
+            "percent_max": 50,
+            "percent_min": 0,
+            "text": "salt",
+            "vegan": "yes",
+            "vegetarian": "yes",
+        },
+        {
+            "id": "en:sugar",
+            "percent_estimate": 16.6666666666667,
+            "percent_max": 33.3333333333333,
+            "percent_min": 0,
+            "text": "sugar",
+            "vegan": "yes",
+            "vegetarian": "yes",
+        },
+    ]
+    ingredient_list_mocker = mocker.patch(
+        "robotoff.workers.tasks.import_image.ingredient_list"
+    )
+    parse_ingredients_mocker = mocker.patch(
+        "robotoff.workers.tasks.import_image.parse_ingredients",
+        return_value=parsed_ingredients,
+    )
+    ingredient_list_mocker.predict_from_ocr.return_value = IngredientPredictionOutput(
+        entities=entities, text=full_text
+    )
+    ingredient_list_mocker.MODEL_NAME = "ingredient-detection"
+    ingredient_list_mocker.MODEL_VERSION = "ingredient-detection-1.0"
+
+    barcode = "1234567890123"
+    ocr_url = "https://images.openfoodfacts.org/images/products/123/456/789/0123/1.json"
+
+    with peewee_db:
+        image = ImageModelFactory(
+            barcode=barcode, server_type=ServerType.off, image_id="1"
+        )
+        extract_ingredients_job(
+            ProductIdentifier(barcode, ServerType.off), ocr_url=ocr_url
+        )
+        ingredient_list_mocker.predict_from_ocr.assert_called_once_with(ocr_url)
+        parse_ingredients_mocker.assert_called_once_with("water, salt, sugar.", "en")
+        image_prediction = ImagePrediction.get_or_none(
+            ImagePrediction.model_name == "ingredient-detection",
+            ImagePrediction.image_id == image.id,
+        )
+        assert image_prediction is not None
+        assert image_prediction.data == {
+            "text": full_text,
+            "entities": [
+                {
+                    "end": 51,
+                    "lang": {"lang": "en", "confidence": 0.9},
+                    "text": "water, salt, sugar.",
+                    "score": 0.9,
+                    "start": 19,
+                    "raw_end": 51,
+                    "ingredients_n": 3,
+                    "known_ingredients_n": 3,
+                    "unknown_ingredients_n": 0,
+                    "ingredients": [
+                        {"in_taxonomy": True, **ingredient}
+                        for ingredient in parsed_ingredients
+                    ],
+                }
+            ],
+        }
+        assert image_prediction.max_confidence == 0.9
+        assert image_prediction.type == "ner"
+        assert image_prediction.model_name == "ingredient-detection"
+        assert image_prediction.model_version == "ingredient-detection-1.0"
+
+
+def test_extract_ingredients_job_missing_image(mocker, peewee_db):
+    ingredient_list_mocker = mocker.patch(
+        "robotoff.workers.tasks.import_image.ingredient_list"
+    )
+    parse_ingredients_mocker = mocker.patch(
+        "robotoff.workers.tasks.import_image.parse_ingredients"
+    )
+    barcode = "1234567890123"
+    ocr_url = "https://images.openfoodfacts.org/images/products/123/456/789/0123/1.json"
+
+    with peewee_db:
+        extract_ingredients_job(
+            ProductIdentifier(barcode, ServerType.off), ocr_url=ocr_url
+        )
+        ingredient_list_mocker.predict_from_ocr.assert_not_called()
+        parse_ingredients_mocker.assert_not_called()
+
+
+def test_extract_ingredients_job_existing_image_prediction(mocker, peewee_db):
+    ingredient_list_mocker = mocker.patch(
+        "robotoff.workers.tasks.import_image.ingredient_list"
+    )
+    parse_ingredients_mocker = mocker.patch(
+        "robotoff.workers.tasks.import_image.parse_ingredients"
+    )
+    ingredient_list_mocker.MODEL_NAME = "ingredient-detection"
+    ingredient_list_mocker.MODEL_VERSION = "ingredient-detection-1.0"
+    barcode = "1234567890123"
+    ocr_url = "https://images.openfoodfacts.org/images/products/123/456/789/0123/1.json"
+
+    with peewee_db:
+        image = ImageModelFactory(
+            barcode=barcode, server_type=ServerType.off, image_id="1"
+        )
+        ImagePredictionFactory(
+            image=image,
+            model_name="ingredient-detection",
+            model_version="ingredient-detection-1.0",
+        )
+        extract_ingredients_job(
+            ProductIdentifier(barcode, ServerType.off), ocr_url=ocr_url
+        )
+        ingredient_list_mocker.predict_from_ocr.assert_not_called()
+        parse_ingredients_mocker.assert_not_called()
diff --git a/tests/unit/prediction/ingredient_list/test_postprocess.py b/tests/unit/prediction/ingredient_list/test_postprocess.py
new file mode 100644
index 0000000000..59216bbd90
--- /dev/null
+++ b/tests/unit/prediction/ingredient_list/test_postprocess.py
@@ -0,0 +1,115 @@
+import pytest
+
+from robotoff.prediction.ingredient_list.postprocess import (
+    ORGANIC_MENTIONS_RE,
+    detect_additional_mentions,
+    detect_trace_mention,
+)
+
+
+@pytest.mark.parametrize(
+    "text,match",
+    [
+        ("Ingrédients issus de l'agriculture biologique", True),
+        ("*Ingrédients agricoles issus de l'agriculture biologique", True),
+        ("*issu de l'agriculture biologique", True),
+        ("issu de l'agriculture biologique", True),
+        ("*Produits issus de l'agriculture biologique", True),
+        ("Produit issu de l'agriculture biologique", True),
+        ("\"produit issu de l'agriculture durable", True),
+        ("*= produits issus de l'agriculture biologique", True),
+        ("* = ingrédients issus de l'agriculture durable", True),
+        ("* Produit issu de l'Agriculture Biologique", True),
+        ("*organic", True),
+        ('"aus biologischer Landwirtschaft', True),
+        ("*de cultivo ecologico certificado", True),
+        ("organic", False),
+        ("agriculture biologique", False),
+        ("produit issu", False),
+    ],
+)
+def test_organic_mention_detection(text: str, match: bool):
+    assert (ORGANIC_MENTIONS_RE.match(text) is not None) is match
+
+
+@pytest.mark.parametrize(
+    "text, initial_end_idx, new_end_idx",
+    [
+        (", *ingrédients issus de l'agriculture biologique", 0, 48),
+        (
+            "Eau, poireaux*, carottes*, navet*. *= produits issus de l'agriculture durable. Valeurs nutritionnelles",
+            33,
+            77,
+        ),
+        (
+            "Eau, poireaux*, carottes*, navet*, *ingrédients issus de l'agriculture bio. Valeurs nutritionnelles",
+            33,
+            74,
+        ),
+        (
+            "Eau, poireaux*, carottes*, navet*, *ingrédients issus de l'agriculture bio. Peut contenir des traces de noix. Valeurs nutritionnelles",
+            33,
+            108,
+        ),
+        (
+            "Eau, poireaux*, carottes*, navet*. Peut contenir des traces de noix. *ingrédients issus de l'agriculture bio. Valeurs nutritionnelles",
+            33,
+            108,
+        ),
+        # If no mention was detected, reset the end index to its initial value
+        (
+            "Eau, poireaux*, carottes*, navet*, ",
+            33,
+            33,
+        ),
+    ],
+)
+def test_detect_additional_mentions(text: str, initial_end_idx, new_end_idx: int):
+    assert detect_additional_mentions(text, initial_end_idx) == new_end_idx
+
+
+@pytest.mark.parametrize(
+    "text, new_end_idx",
+    [
+        ("Peut contenir des traces de fruit à coque.", 41),
+        (
+            "Peut contenir des traces de soja, lait, sésame, amande, noisette, noix de cajou et arachide !",
+            91,
+        ),
+        ("Eau, banane", 0),
+        ("peut contenir des traces d'arachides et de cacahuètes. Attention", 53),
+        (
+            "produit élaboré dans un atelier utilisant du lait demi-écrémé et du gorgonzola. OTHER",
+            78,
+        ),
+        # This should not match, as the string does not start with the
+        # allergen mention
+        ("OTHER. Peut contenir des traces d'arachides et de cacahuètes", 0),
+        ("contient naturellement du jaune d'oeuf. Info nutritionnelles", 38),
+        # This should not match, as the first word is "acontient" and not
+        # "contient" (we check for word boundaries)
+        ("acontient naturellement du jaune d'oeuf. Info nutritionnelles", 0),
+        # EN
+        ("contains wheat", 14),
+    ],
+)
+def test_detect_trace_mention(text: str, new_end_idx: int):
+    assert detect_trace_mention(text, end_idx=0) == new_end_idx
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        # FR
+        "Peut contenir des traces de fruit à coque",
+        # ES
+        "CONTIENE LECHE",
+        "Contiene lecitina de soya",
+        "Este producto contiene espelta, trigo y gluten",
+        "PUEDE CONTENER LECHE",
+    ],
+)
+def test_detect_trace_mention_full_match(text: str):
+    """Test that the trace mention detection works (only full matches are
+    tested here)."""
+    assert detect_trace_mention(text, end_idx=0) == len(text)