Skip to content

A phonetically-aware Python library for parsing IPA transcriptions.

License

Notifications You must be signed in to change notification settings

danmysak/ipa-parser

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

IPA parser for Python

Installation

pip install ipaparser

Quick start

from ipaparser import IPA

print([str(symbol) for symbol in IPA('[ˈpʰɹɛʔt͡sɫ̩]') if 'consonant' in symbol.features()])
['pʰ', 'ɹ', 'ʔ', 't͡s', 'ɫ̩']

Usage/reference

IPA

The IPA class can be used to parse IPA transcriptions. It behaves as a wrapper around a list of symbols:

from ipaparser import IPA

print([
    len(IPA('[aɪ pʰiː eɪ]')),
    # 8
    
    list(IPA('[aɪ pʰiː eɪ]')),
    # [IPASymbol('a'),
    #  IPASymbol('ɪ'),
    #  IPASymbol(' '),
    #  IPASymbol('pʰ'),
    #  IPASymbol('iː'),
    #  IPASymbol(' '),
    #  IPASymbol('e'),
    #  IPASymbol('ɪ')]
    
    IPA('[aɪ pʰiː eɪ]')[0],
    # IPASymbol('a')
    
    IPA('[aɪ pʰiː eɪ]')[-1],
    # IPASymbol('ɪ')
    
    IPA('[aɪ pʰiː eɪ]')[3:5],
    # IPA('[pʰiː]')
])

You can control some aspects of how parsing is performed by additionally passing a configuration:

from ipaparser import IPA, IPAConfig

print([
    list(IPA('[aɪ pʰiː eɪ]', IPAConfig(combined=[('a', 'ɪ'), ('e', 'ɪ')]))),
    # [IPASymbol('a͡ɪ'),
    #  IPASymbol(' '),
    #  IPASymbol('pʰ'),
    #  IPASymbol('iː'),
    #  IPASymbol(' '),
    #  IPASymbol('e͡ɪ')]
])

Objects of the IPA class provide basic information about their transcription type:

from ipaparser import IPA
from ipaparser.definitions import TranscriptionType

print([
    IPA('[aɪ pʰiː eɪ]').type,
    # <TranscriptionType.PHONETIC: 'phonetic'>
    
    IPA('/ˌaɪ.piːˈeɪ/').type == TranscriptionType.PHONEMIC,
    # True
    
    IPA('[aɪ pʰiː eɪ]').left_bracket,
    # '['
    
    IPA('[aɪ pʰiː eɪ]').right_bracket,
    # ']'
])

IPA objects can be compared with other IPA objects as well as with strings:

from ipaparser import IPA

print([
    IPA('[aɪ pʰiː eɪ]') == IPA('[aɪ pʰiː eɪ]'),
    # True
    
    IPA('[aɪ pʰiː eɪ]') == '[aɪ pʰiː eɪ]',
    # True
    
    IPA(transcription := '[ú]') == transcription,
    # False: `IPA` performs unicode normalizations on strings
    
    IPA('[aɪ pʰiː eɪ]') == IPA('[eɪ pʰiː aɪ]'),
    # False
    
    IPA('[aɪ pʰiː eɪ]') == IPA('/aɪ pʰiː eɪ/'),
    # False
    
    IPA('[aɪ pʰiː eɪ]').as_string(),
    # '[aɪ pʰiː eɪ]'
    
    str(IPA('[aɪ pʰiː eɪ]')),
    # '[aɪ pʰiː eɪ]'
])

You can concatenate multiple IPA objects as well as append or prepend symbols to them:

from ipaparser import IPA, IPASymbol

print([
    IPA('[aɪ pʰiː]') + IPASymbol(' ') + IPA('[eɪ]'),
    # IPA('[aɪ pʰiː eɪ]')
    
    IPA('[aɪ]') * 3,
    # IPA('[aɪaɪaɪ]')
])

IPASymbol

IPASymbol represents an individual unit of IPA transcriptions: either a sound (like a, t͡s, or ᶢǁʱ), a break (like . or a space), a suprasegmental letter (stress mark, tone number, etc.), or an unknown grapheme.

from ipaparser import IPA, IPASymbol

print([
    IPA('[aɪ pʰiː]')[0].is_sound(),
    # True
    
    IPA('[aɪ pʰiː]')[2].is_sound(),
    # False
    
    IPA('[aɪ pʰiː]')[2].is_break(),
    # True
    
    IPASymbol('˦').is_suprasegmental(),
    # True
    
    IPASymbol('˦').is_known(),
    # True
    
    IPASymbol('*').is_known(),
    # False
])

Just as with the IPA constructor, you can additionally pass to IPASymbol a configuration:

from ipaparser import IPAConfig, IPASymbol

print([
    IPASymbol('g:').is_known(),
    # False
    
    IPASymbol('g:', IPAConfig(substitutions=True)),
    # IPASymbol('ɡː')
    
    IPASymbol('g:', IPAConfig(substitutions=True)).is_known(),
    # True
])

Symbols can be queried for their features:

from ipaparser import IPASymbol
from ipaparser.features import Aspiration, Backness, Height, Manner, SoundType

print([
    IPASymbol('pʰ').features(),
    # frozenset({<Aspiration.ASPIRATED: 'aspirated'>,
    #            <Place.BILABIAL: 'bilabial'>,
    #            <SoundType.CONSONANT: 'consonant'>,
    #            <PlaceCategory.LABIAL: 'labial'>,
    #            <SoundSubtype.SIMPLE_CONSONANT: 'simple consonant'>,
    #            <SymbolType.SOUND: 'sound'>,
    #            <Manner.STOP: 'stop'>})
    
    IPASymbol('a').features({Backness, Height}),
    # frozenset({<Backness.FRONT: 'front'>,
    #            <Height.OPEN: 'open'>})
    
    IPASymbol('s').features(Manner),
    # frozenset({<Manner.FRICATIVE: 'fricative'>,
    #            <Manner.SIBILANT: 'sibilant'>})
    
    IPASymbol('b').features('voicing'),  # shortcut to `ipaparser.features.Voicing`
    # frozenset({<Voicing.VOICED: 'voiced'>})
    
    IPASymbol(' ').features(),
    # frozenset({<SymbolType.BREAK: 'break'>,
    #            <BreakType.SPACE: 'space'>})
    
    IPASymbol('*').features(),
    # None
    
    IPASymbol('pʰ').has_feature(Aspiration.ASPIRATED),
    # True
    
    IPASymbol('a').has_feature('vowel'),  # shortcut to `ipaparser.features.SoundType.VOWEL`
    # True
    
    IPASymbol('b').has_feature(SoundType.VOWEL),
    # False
    
    IPASymbol('*').has_feature(SoundType.VOWEL),
    # False
])

Some sounds may be requested for alternative interpretations:

  1. Nonsyllabic front/back close vowels (, , ɯ̯, ) can be reinterpreted as palatal/velar approximants (j, ɥ, ɰ, w).
  2. “Ambiguous” as to the exact place of articulation consonants such as t, n, ǁ, etc., which are treated as alveolar by default, can be reinterpreted as dental or as postalveolar.
  3. Ad-hoc grapheme combinations used in the IPA (e.g., ä for the open central unrounded vowel) can be alternatively treated literally (so that ä becomes a centralized open front unrounded vowel).
from ipaparser import IPASymbol
from ipaparser.features import Place, SoundType

print([
    IPASymbol('i̯').features(SoundType),
    # frozenset({<SoundType.VOWEL: 'vowel'>})
    
    IPASymbol('i̯').features(role=SoundType.CONSONANT),
    # frozenset({<Manner.APPROXIMANT: 'approximant'>,
    #            <SoundType.CONSONANT: 'consonant'>,
    #            <PlaceCategory.DORSAL: 'dorsal'>,
    #            <Place.PALATAL: 'palatal'>,
    #            <SoundSubtype.SIMPLE_CONSONANT: 'simple consonant'>,
    #            <SymbolType.SOUND: 'sound'>,
    #            <Voicing.VOICED: 'voiced'>})
    
    IPASymbol('i̯').features(role='consonant') == IPASymbol('j').features(),
    # True
    
    IPASymbol('t').features(Place),
    # frozenset({<Place.ALVEOLAR: 'alveolar'>})
    
    IPASymbol('t').features(Place, role=Place.POSTALVEOLAR),
    # frozenset({<Place.POSTALVEOLAR: 'postalveolar'>})
    
    IPASymbol('t').features(Place, role=Place.DENTAL),
    # frozenset({<Place.DENTAL: 'dental'>})
    
    IPASymbol('t').features(Place, role=Place.BILABIAL),
    # None
    
    IPASymbol('ɹ̠̊˔').features(),
    # frozenset({<SoundType.CONSONANT: 'consonant'>,
    #            <PlaceCategory.CORONAL: 'coronal'>,
    #            <Manner.FRICATIVE: 'fricative'>,
    #            <Place.POSTALVEOLAR: 'postalveolar'>,
    #            <SoundSubtype.SIMPLE_CONSONANT: 'simple consonant'>,
    #            <SymbolType.SOUND: 'sound'>})
    
    IPASymbol('ɹ̠̊˔').features(role=Place.ALVEOLAR),
    # frozenset({<Place.ALVEOLAR: 'alveolar'>,
    #            <Manner.APPROXIMANT: 'approximant'>,
    #            <SoundType.CONSONANT: 'consonant'>,
    #            <PlaceCategory.CORONAL: 'coronal'>,
    #            <Articulation.RAISED: 'raised'>,
    #            <Articulation.RETRACTED: 'retracted'>,
    #            <SoundSubtype.SIMPLE_CONSONANT: 'simple consonant'>,
    #            <SymbolType.SOUND: 'sound'>})
])

Symbols preserve information about their constituents:

from ipaparser import IPASymbol
from ipaparser.features import Manner

print([
    IPASymbol('ts').features(Manner),
    # frozenset({<Manner.AFFRICATE: 'affricate'>,
    #            <Manner.SIBILANT: 'sibilant'>})
    
    IPASymbol('ts').components,
    # (IPASymbol('t'), IPASymbol('s'))
    
    IPASymbol('ts').left,
    # IPASymbol('t')
    
    IPASymbol('ts').left.features(Manner),
    # frozenset({<Manner.STOP: 'stop'>})
    
    IPASymbol('ts').right.features(Manner),
    # frozenset({<Manner.FRICATIVE: 'fricative'>,
    #            <Manner.SIBILANT: 'sibilant'>})
    
    IPASymbol('t͡s').components == IPASymbol('ts').components,
    # True
    
    IPASymbol('t').components,
    # None
    
    IPASymbol('d͢').is_known(),
    # False
    
    IPASymbol('d͢').components,
    # (IPASymbol('d'),)
    
    IPASymbol('d͢').components[0].is_known(),
    # True
])

IPASymbol objects can be compared with other symbols as well as with strings:

from ipaparser import IPASymbol

print([
    IPASymbol('ts') == IPASymbol('ts'),
    # True
    
    IPASymbol('ts') == 'ts',
    # True
    
    IPASymbol(symbol := 'ú') == symbol,
    # False: `IPASymbol` performs unicode normalizations on strings
    
    IPASymbol('ts').features() == IPASymbol('t͡s').features(),
    # True
    
    IPASymbol('ts') == IPASymbol('t͡s'),
    # False: underlying strings are compared, not features
    
    IPASymbol('ts').as_string(),
    # 'ts'
    
    str(IPASymbol('ts')),
    # 'ts'
])

IPAConfig

IPAConfig can be used to control some aspects of how transcriptions and individual symbols are parsed. The following parameters are available for configuration:

Parameter Type(s) Default Description
substitutions bool False Whether to perform normalizing substitutions that allow to properly handle commonly observed simplifications in IPA notation such as the Latin letter g being used instead of the IPA’s dedicated character ɡ or a colon in place of the length mark ː.
brackets BracketStrategy
str
BracketStrategy.KEEP
'keep'
What to do with content in brackets denoting optional pronunciation, such as in [bə(j)ɪz⁽ʲ⁾ˈlʲivɨj]:
  • keep (and treat brackets as invalid IPA characters);
  • expand: [bəjɪzʲˈlʲivɨj];
  • strip: [bəɪzˈlʲivɨj].
combined Iterable[tuple[str, ...]] () Sound sequences to be treated as though they were connected by a tie, e.g., [('t', 's'), ('d̠', 'ɹ̠˔'), ('a', 'ɪ'), ('u̯', 'e', 'i̯')].
Note that, say, ('a', 'ɪ') will not match 'aɪ̯', and likewise ('a', 'ɪ̯') will not match 'aɪ'.
from ipaparser import IPA, IPAConfig, IPASymbol
from ipaparser.definitions import BracketStrategy

print([
    IPA('/ɹɪˈdʒɔɪndʒə(ɹ)/', IPAConfig(brackets=BracketStrategy.STRIP, combined=[('d', 'ʒ'), ('ɔ', 'ɪ')])),
    # IPA('/ɹɪˈd͡ʒɔ͡ɪnd͡ʒə/')
    
    IPASymbol('o(:)', IPAConfig(substitutions=True, brackets='expand'))
    # IPASymbol('oː')
])

load

Call this function to eagerly load and preprocess supporting data so that the first parse is a little faster. Compare:

from timeit import timeit

from ipaparser import IPA

print([
    timeit(lambda: IPA('[aɪ pʰiː eɪ]'), number=1),
    # 0.007

    timeit(lambda: IPA('[ˈpʰɹɛʔt͡sɫ̩]'), number=1),
    # 0.0004
])
from timeit import timeit

from ipaparser import IPA, load

load()

print([
    timeit(lambda: IPA('[aɪ pʰiː eɪ]'), number=1),
    # 0.0002

    timeit(lambda: IPA('[ˈpʰɹɛʔt͡sɫ̩]'), number=1),
    # 0.0004
])

Definitions

BracketStrategy

For usage, see IPAConfig.

Value String representation
BracketStrategy.KEEP keep
BracketStrategy.EXPAND expand
BracketStrategy.STRIP strip

TranscriptionType

For usage, see IPA transcription type.

Value String representation Brackets
TranscriptionType.PHONETIC phonetic [...]
TranscriptionType.PHONEMIC phonemic /.../
TranscriptionType.LITERAL literal ⟨...⟩

Exceptions

from ipaparser import IPA, IPAConfig, IPASymbol
from ipaparser.exceptions import (
    BracketStrategyError,
    CombinedLengthError,
    CombinedSoundError,
    EnclosingError,
    FeatureError,
    FeatureKindError,
    IncompatibleTypesError,
)

try:
    config = IPAConfig(brackets='custom')
except BracketStrategyError as e:
    print(str(e))  # 'custom' is not a valid strategy; use one of the following: 'keep'/'expand'/'strip'
    print(e.value)  # 'custom'

try:
    config = IPAConfig(combined=[('t', 's'), ('e',)])
except CombinedLengthError as e:
    print(str(e))  # A sound sequence to be combined must contain at least 2 elements (got 1: 'e')
    print(e.sequence)  # ('e',)

try:
    config = IPAConfig(combined=[('t', 's'), ('i', '̯ɐ')])
except CombinedSoundError as e:
    print(str(e))  # A sound to be combined must start with a non-combining character (got ' ̯ɐ')
    print(e.sound)  # '̯ɐ'

try:
    config = IPAConfig(combined=[('t', 's'), ('i', '')])
except CombinedSoundError as e:
    print(str(e))  # A sound to be combined cannot be empty
    print(e.sound)  # ''

try:
    ipa = IPA('aɪ pʰiː eɪ')
except EnclosingError as e:
    print(str(e))  # 'aɪ pʰiː eɪ' is not properly delimited (like [so] or /so/)
    print(e.transcription)  # 'aɪ pʰiː eɪ'

try:
    is_vowel = IPASymbol('a').has_feature('vocalic')
except FeatureError as e:
    print(str(e))  # Invalid feature: 'vocalic'
    print(e.value)  # 'vocalic'

try:
    features = IPASymbol('a').features('sonority')
except FeatureKindError as e:
    print(str(e))  # Invalid feature kind: 'sonority'
    print(e.value)  # 'sonority'

try:
    concatenated = IPA('[a]') + IPA('/b/')
except IncompatibleTypesError as e:
    print(str(e))  # '[a]' and '/b/' have incompatible types and cannot be concatenated
    print(e.left)  # '[a]'
    print(e.right)  # '/b/'

Features

Kind Feature Examples Sources
Airflow
'airflow'
EGRESSIVE_AIRFLOW
'egressive airflow'
[1]
INGRESSIVE_AIRFLOW
'ingressive airflow'
Articulation
'articulation'
APICAL
'apical'
, , ɾ̺ [1]
LAMINAL
'laminal'
, ,
ADVANCED
'advanced'
ɯ̟ᵝ, o̞˖, ʎ̟
RETRACTED
'retracted'
, ð̩˕˗ˠˀ,
CENTRALIZED
'centralized'
æ̈, ɑ̈ː,
MID_CENTRALIZED
'mid-centralized'
, ɯ̥̽, ɤ̽
RAISED
'raised'
ɛ̝, ʎ̥˔, e̝ˀ
LOWERED
'lowered'
ʏ̞, ò˕, ɛ̞̃
Aspiration
'aspiration'
ASPIRATED
'aspirated'
, kʰː, ǀʰ [1], [2]
UNASPIRATED
'unaspirated'
ʔ˭,
PREASPIRATED
'preaspirated'
ʰt͡s, ʰkʰʲ, ʰp
Backness
'backness'
FRONT
'front'
a, ɛ̀ː, ǽ [1]
NEAR_FRONT
'near-front'
ɪ, ʏ˞, ɪˑ
CENTRAL
'central'
ə, ʉ́, ɘ̯
NEAR_BACK
'near-back'
ʊ, ʊ́, ʊ̂
BACK
'back'
o, , ɯ̟̃ᵝ
BacknessCategory
'backness category'
ABOUT_FRONT
'about front'
a, ɪ̙́, ʏ˞
ABOUT_CENTRAL
'about central'
ə, ʉ́, ɘ̯
ABOUT_BACK
'about back'
o, ʊ̥,
BreakType
'break type'
SPACE
'space'
[1]
HYPHEN
'hyphen'
-
LINKING
'linking'
SYLLABLE_BREAK
'syllable break'
.
MINOR_BREAK
'minor break'
|
MAJOR_BREAK
'major break'
EQUIVALENCE
'equivalence'
~,
ELLIPSIS
'ellipsis'
Height
'height'
CLOSE
'close'
i, ṳ̌ː, ʉ̀ [1]
NEAR_CLOSE
'near-close'
ɪ, ʏ˞, ʊ̯ˑ
CLOSE_MID
'close-mid'
e, ɤː,
MID
'mid'
ə, ɚː, ɤ̞
OPEN_MID
'open-mid'
ɛ, ɔ̃ː, ɜ˞
NEAR_OPEN
'near-open'
ɐ, ǽ, ɐ̆
OPEN
'open'
a, ɒ̯̽ˀ, ɑ̃ː
HeightCategory
'height category'
ABOUT_CLOSE
'about close'
i, ʏ˞, ʊ̯ˑ
ABOUT_MID
'about mid'
ə, ɘ̯, ɜˑ
ABOUT_OPEN
'about open'
a, æːˀ, ɑ̆
Intonation
'intonation'
GLOBAL_RISE
'global rise'
[1]
GLOBAL_FALL
'global fall'
Length
'length'
EXTRA_SHORT
'extra-short'
, ø̆, [1]
HALF_LONG
'half-long'
äˑ, e̞ˑ, øˑ
LONG
'long'
, l̺ː, ɞː
EXTRA_LONG
'extra-long'
øːˑ, ɛːː, ɨˤːː
Manner
'manner'
AFFRICATE
'affricate'
t͡s, d͡zː, q͡χʷ [1]
APPROXIMANT
'approximant'
l, w̥ʰ, ɻ̊
FRICATIVE
'fricative'
s, ʂ͜ʲ,
LATERAL
'lateral'
l, t͡ɬʼ, ŋ͜ǁ
NASAL
'nasal'
n, , ɳ̩
SIBILANT
'sibilant'
s, ʑː, t͡ʃʲ
STOP
'stop'
k, tʲʰː,
TAP_FLAP
'tap/flap'
ɾ, ɽ̃, ɺ
TRILL
'trill'
r, ʀ̟, ʙ
CLICK
'click'
ǃ, ᵑǀʱ, ǁ
EJECTIVE
'ejective'
, ɬˤʼ, kʼʷ
IMPLOSIVE
'implosive'
ɓ, ʄ, ɗʲ
Phonation
'phonation'
BREATHY
'breathy'
, ṳ̌, ᵑǀʱ [1], [2]
CREAKY
'creaky'
æ̰ˀ, ɑ̰́ː,
WHISPERY
'whispery'
, ạ̀,
Place
'place'
BILABIAL
'bilabial'
m, b̥ˀ, p͡f [1]
LABIODENTAL
'labiodental'
f, ᶬv, ʋ̥
LINGUOLABIAL
'linguolabial'
, θ̼
DENTAL
'dental'
, ðˠ, ɡ̊ǀ
ALVEOLAR
'alveolar'
n, t͜ɬ,
POSTALVEOLAR
'postalveolar'
ʃ, d͡ʒʲ, t̠̚
RETROFLEX
'retroflex'
ʂ, ʈⁿ, ɽʷ
PALATAL
'palatal'
j, ɟʱ, kǂʰ
VELAR
'velar'
k, ɡ̞,
UVULAR
'uvular'
ʁ, q͡χʷ, ʀ̥
PHARYNGEAL_EPIGLOTTAL
'pharyngeal/epiglottal'
ħ, ʕː, ħʷ
GLOTTAL
'glottal'
ʔ, ɦʲ,
PlaceCategory
'place category'
LABIAL
'labial'
m, ᶬv, b̥ˀ [1]
CORONAL
'coronal'
n, t͡ʃʲ, ɻ̊
DORSAL
'dorsal'
k, q͡χʷ, ʎ̥˔
LARYNGEAL
'laryngeal'
ʔ, ʕː,
Release
'release'
NO_AUDIBLE_RELEASE
'no audible release'
, ʔ̚, d̪̚ [1]
NASAL_RELEASE
'nasal release'
tⁿ, t̪ⁿ, ʈⁿ
LATERAL_RELEASE
'lateral release'
, , ᵐbˡ
VOICELESS_DENTAL_FRICATIVE_RELEASE
'voiceless dental fricative release'
tᶿ
VOICELESS_ALVEOLAR_SIBILANT_FRICATIVE_RELEASE
'voiceless alveolar sibilant fricative release'
, , tˢʰ
VOICELESS_VELAR_FRICATIVE_RELEASE
'voiceless velar fricative release'
Roundedness
'roundedness'
ROUNDED
'rounded'
o, ṳ̌ː, ʉ̀ [1]
RoundednessModifier
'roundedness modifier'
MORE_ROUNDED
'more rounded'
ʌ̹, ə̹, ɔ̹ [1], [2]
LESS_ROUNDED
'less rounded'
, ɒ̜˔ː, ɔ̜ˑ
COMPRESSED
'compressed'
ɯ̟ᵝ, ɨ̃ᵝ, ɰᵝ
LABIAL_SPREADING
'labial spreading'
, u͍ː,
SecondaryModifier
'secondary modifier'
ADVANCED_TONGUE_ROOT
'advanced tongue root'
ɨ̘, ɤ̘, í̘ː [1], [2], [3], [4], [5]
RETRACTED_TONGUE_ROOT
'retracted tongue root'
ɪ̙̞, ɒ̙̀, ʊ̙́
R_COLORED
'r-colored'
ɚ, ɝˑ, ɑ˞
NASALIZED
'nasalized'
, õ̤, ɯ̟̃ᵝ
PRENASALIZED
'prenasalized'
ⁿdˠ, n͡t, ᶬv
VOICELESSLY_PRENASALIZED
'voicelessly prenasalized'
m̥͡bʷ
PRESTOPPED
'prestopped'
ᵈn, ᵇm, ᵈl
PREGLOTTALIZED
'preglottalized'
ˀt, ˀd
SecondaryPlace
'secondary place'
LABIALIZED
'labialized'
w, sʷː, ʍ [1]
PALATALIZED
'palatalized'
, ʃᶣ, k̚ʲ
VELARIZED
'velarized'
ɫ, l̩ˠ,
PHARYNGEALIZED
'pharyngealized'
t̪ˤ, , ɑˤː
GLOTTALIZED
'glottalized'
æ̰ˀ, ɔˀ,
SoundSubtype
'sound subtype'
SIMPLE_CONSONANT
'simple consonant'
n, ʑː, t͡ʃʲ [1], [2], [3], [4], [5]
DOUBLY_ARTICULATED_CONSONANT
'doubly articulated consonant'
ŋ͡m, k͡p̚, ɡ͡b
CONTOUR_CLICK
'contour click'
ᵏǃ͡χʼ, ǃ͡qʰ
SIMPLE_VOWEL
'simple vowel'
a, ə̹, ɯ̟̃ᵝ
DIPHTHONG
'diphthong'
ʉ͜i, u͡ɛ, e͡ɪ
TRIPHTHONG
'triphthong'
œ̞͡ɐ̯͡u̯
SoundType
'sound type'
CONSONANT
'consonant'
n, k͡p̚, ᵏǃ͡χʼ
VOWEL
'vowel'
a, ɔ͜y, ø̯
Strength
'strength'
STRONG
'strong'
t͡s͈, , [1]
WEAK
'weak'
StressSubtype
'stress subtype'
REGULAR_PRIMARY_STRESS
'regular primary stress'
ˈ [1], [2]
EXTRA_STRONG_PRIMARY_STRESS
'extra-strong primary stress'
ˈˈ
REGULAR_SECONDARY_STRESS
'regular secondary stress'
ˌ
EXTRA_WEAK_SECONDARY_STRESS
'extra-weak secondary stress'
ˌˌ
StressType
'stress type'
PRIMARY_STRESS
'primary stress'
ˈ, ˈˈ [1], [2]
SECONDARY_STRESS
'secondary stress'
ˌ, ˌˌ
SuprasegmentalType
'suprasegmental type'
STRESS
'stress'
ˈ, ˌ, ˈˈ [1], [2]
TONE
'tone'
˥, , ¹
INTONATION
'intonation'
,
AIRFLOW
'airflow'
,
Syllabicity
'syllabicity'
SYLLABIC
'syllabic'
, ŋ̍, r̩̂ [1], [2]
NONSYLLABIC
'nonsyllabic'
, ʏ̯ː, ɪ̯ˑ
ANAPTYCTIC
'anaptyctic'
SymbolType
'symbol type'
SOUND
'sound'
a, ɡʲʷ, ʰk
BREAK
'break'
., -,
SUPRASEGMENTAL
'suprasegmental'
ˈ, ,
Tone
'tone'
EXTRA_HIGH_TONE
'extra-high tone'
ɹ̩̋, , [1], [2]
HIGH_TONE
'high tone'
, ɑ̃́, ɯ́ᵝː
MID_TONE
'mid tone'
, ɵ̄, īː
LOW_TONE
'low tone'
, ù̘, æ̀ː
EXTRA_LOW_TONE
'extra-low tone'
ɨ̏ː,
RISING_TONE
'rising tone'
, ěː, m̩̌
FALLING_TONE
'falling tone'
êː, , ɔ̂
HIGH_MID_RISING_TONE
'high/mid rising tone'
a᷄ː, a᷄, u᷄
LOW_RISING_TONE
'low rising tone'
i᷅ː, a᷅ː, ɛ᷅
HIGH_FALLING_TONE
'high falling tone'
a᷇, u᷇ː, u᷇
LOW_MID_FALLING_TONE
'low/mid falling tone'
ɪ᷆, e᷆ː, ə᷆
PEAKING_TONE
'peaking tone'
a̤᷈ː, e̤᷈ː, ṳ᷈ː
DIPPING_TONE
'dipping tone'
a᷉
ToneLetter
'tone letter'
HIGH_TONE_LETTER
'high tone letter'
˥ [1]
HALF_HIGH_TONE_LETTER
'half-high tone letter'
˦, ˦ˀ
MID_TONE_LETTER
'mid tone letter'
˧, , ˧ˀ
HALF_LOW_TONE_LETTER
'half-low tone letter'
˨, ˨ˀ
LOW_TONE_LETTER
'low tone letter'
˩, ˩̰ˀ, ˩̤
ToneNumber
'tone number'
TONE_0
'tone 0'
[1], [2]
TONE_1
'tone 1'
¹
TONE_2
'tone 2'
²
TONE_3
'tone 3'
³
TONE_4
'tone 4'
TONE_5
'tone 5'
TONE_6
'tone 6'
TONE_7
'tone 7'
TONE_NUMBER_SEPARATOR
'tone number separator'
ToneStep
'tone step'
UPSTEP
'upstep'
[1]
DOWNSTEP
'downstep'
ToneType
'tone type'
TONE_LETTER
'tone letter'
˥, ˦, ˨ [1], [2], [3]
TONE_NUMBER
'tone number'
, ¹, ²
TONE_STEP
'tone step'
,
Voicing
'voicing'
VOICED
'voiced'
n, , ɡʰ [1], [2]
DEVOICED
'devoiced'
, ɯ̟̊, ĭ̥

Feature typing and helper methods

You can iterate through the supported features using the tuple FEATURE_KINDS. Feature kinds (such as Height, Manner, or Voicing) are all string-based enums subclassed off of the base class Feature. Feature kinds themselves have the type Type[Feature], aliased FeatureKind. The .kind_values() method can be called to retrieve supported string representations of the feature kind.

from ipaparser.features import Feature, FEATURE_KINDS, FeatureKind

kind: FeatureKind
for kind in FEATURE_KINDS:
    print(kind)
    # <enum 'Airflow'>, <enum 'Articulation'>, ..., <enum 'ToneType'>, <enum 'Voicing'>
    print(kind.kind_values())
    # ('Airflow', 'airflow'), ('Articulation', 'articulation'), ..., ('ToneType', 'tone type'), ('Voicing', 'voicing')
    
    feature: Feature
    for feature in kind:
        print(feature)
        # Airflow.EGRESSIVE_AIRFLOW, Airflow.INGRESSIVE_AIRFLOW, Articulation.APICAL, ..., Voicing.DEVOICED
        print(feature.value)
        # 'egressive airflow', 'ingressive airflow', 'apical', ..., 'devoiced'

FeatureSet (which can be imported from ipaparser.features) is an alias for frozenset[Feature]. The return type of IPASymbol.features() is hence Optional[FeatureSet].

Finally, the .derived() and .extend() methods of individual features may be called to obtain basic hierarchical relationships between features:

from ipaparser.features import Place, ToneLetter

print(Place.ALVEOLAR.derived())  # get the most specific derived feature
# PlaceCategory.CORONAL

print(ToneLetter.HIGH_TONE_LETTER.extend())  # all the derived features, including the caller
# frozenset({<ToneLetter.HIGH_TONE_LETTER: 'high tone letter'>,
#            <ToneType.TONE_LETTER: 'tone letter'>,
#            <SuprasegmentalType.TONE: 'tone'>,
#            <SymbolType.SUPRASEGMENTAL: 'suprasegmental'>})

About

A phonetically-aware Python library for parsing IPA transcriptions.

Resources

License

Stars

Watchers

Forks

Languages