diff --git a/Cargo.toml b/Cargo.toml index af82c4893..10d69c524 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ needless_return = "allow" # Explicit returns are needed from time to time redundant_guards = "allow" # Currently broken for some cases, might enable later into_iter_without_iter = "allow" # This is only going to fire on some internal types, doesn't matter much struct_excessive_bools = "allow" # I have yet to find one case of this being useful +struct_field_names = "allow" [workspace.lints.rustdoc] broken_intra_doc_links = "deny" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index dc46a9856..2680431c6 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -50,6 +50,10 @@ path = "fuzz_targets/apefile_read_from.rs" test = false doc = false +[[bin]] +name = "ebmlfile_read_from" +path = "fuzz_targets/ebmlfile_read_from.rs" + [[bin]] name = "flacfile_read_from" path = "fuzz_targets/flacfile_read_from.rs" diff --git a/fuzz/fuzz_targets/ebmlfile_read_from.rs b/fuzz/fuzz_targets/ebmlfile_read_from.rs new file mode 100644 index 000000000..21b46a9ed --- /dev/null +++ b/fuzz/fuzz_targets/ebmlfile_read_from.rs @@ -0,0 +1,11 @@ +#![no_main] + +use std::io::Cursor; + +use libfuzzer_sys::fuzz_target; +use lofty::config::ParseOptions; +use lofty::file::AudioFile; + +fuzz_target!(|data: Vec| { + let _ = lofty::ebml::EbmlFile::read_from(&mut Cursor::new(data), ParseOptions::new()); +}); diff --git a/lofty/Cargo.toml b/lofty/Cargo.toml index 6c35f09ce..c1e7810f0 100644 --- a/lofty/Cargo.toml +++ b/lofty/Cargo.toml @@ -18,7 +18,7 @@ byteorder = { workspace = true } # ID3 compressed frames flate2 = { version = "1.0.30", optional = true } # Proc macros -lofty_attr = "0.11.0" +lofty_attr = { path = "../lofty_attr" } # Debug logging log = "0.4.22" # OGG Vorbis/Opus diff --git a/lofty/src/ape/tag/mod.rs b/lofty/src/ape/tag/mod.rs index daecf4285..ddfbc3d42 100644 --- a/lofty/src/ape/tag/mod.rs +++ b/lofty/src/ape/tag/mod.rs @@ -915,6 +915,7 @@ mod tests { fn skip_reading_cover_art() { let p = Picture::new_unchecked( PictureType::CoverFront, + None, Some(MimeType::Jpeg), None, std::iter::repeat(0).take(50).collect::>(), diff --git a/lofty/src/ebml/element_reader.rs b/lofty/src/ebml/element_reader.rs new file mode 100644 index 000000000..870ba5789 --- /dev/null +++ b/lofty/src/ebml/element_reader.rs @@ -0,0 +1,785 @@ +use crate::ebml::vint::{ElementId, VInt}; +use crate::error::Result; +use crate::macros::{decode_err, try_vec}; + +use std::io::{self, Read}; +use std::ops::{Deref, DerefMut}; + +use byteorder::{BigEndian, ReadBytesExt}; +use lofty_attr::ebml_master_elements; + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub struct ElementHeader { + pub(crate) id: ElementId, + pub(crate) size: VInt, +} + +impl ElementHeader { + fn read(reader: &mut R, max_id_length: u8, max_vint_length: u8) -> Result + where + R: Read, + { + Ok(Self { + id: ElementId::parse(reader, max_id_length)?, + size: VInt::::parse(reader, max_vint_length)?, + }) + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum ElementDataType { + SignedInt, + UnsignedInt, + Float, + String, + Utf8, + Date, + Master, + Binary, +} + +#[derive(Copy, Clone, Debug)] +struct MasterElement { + id: ElementIdent, + children: &'static [(ElementId, ChildElementDescriptor)], +} + +#[derive(Copy, Clone, Debug)] +pub(crate) struct ChildElementDescriptor { + pub(crate) ident: ElementIdent, + pub(crate) data_type: ElementDataType, +} + +// This macro helps us define the EBML master elements and their children. +// +// It will generate the `ElementIdent` enum, and the `master_elements` function. +// +// The `ElementIdent` enum is used to represent **ONLY** the elements that we care about. +// When one of these elements is encountered, `ElementReader::next()` will return an +// `ElementReaderYield::Master` or `ElementReaderYield::Child`. Otherwise, it will return +// `ElementReaderYield::Unknown`. +// +// The `master_elements` function is used to map the element IDs to their respective +// `MasterElement` struct, which contains the element's identifier and its children. +// This is used to determine the children of a master element when it is encountered. +// +// If a master element is a child to another master element, it will be defined BOTH as a +// child element in the parent master element, and as a top level master element. +// +// To define a master element, use the following syntax: +// +// ELEMENT_IDENT_VARIANT: { +// id: 0x1234_5678, +// children: [ +// CHILD_ELEMENT_VARIANT: { 0x1234_5679, DataType }, +// CHILD_ELEMENT_VARIANT2: { 0x1234_567A, DataType }, +// ], +// }, +// +// If `CHILD_ELEMENT_VARIANT2` is a master element, it should ALSO be defined at the top level with +// its own children. +// +// Then when parsing, `ELEMENT_IDENT_VARIANT`, `CHILD_ELEMENT_VARIANT`, and `CHILD_ELEMENT_VARIANT2` +// will be available as `ElementIdent` variants. +ebml_master_elements! { + EBML: { + id: 0x1A45_DFA3, + children: [ + EBMLVersion: { 0x4286, UnsignedInt }, + EBMLReadVersion: { 0x42F7, UnsignedInt }, + EBMLMaxIDLength: { 0x42F2, UnsignedInt }, + EBMLMaxSizeLength: { 0x42F3, UnsignedInt }, + DocType: { 0x4282, String }, + DocTypeExtension: { 0x4281, Master }, + DocTypeVersion: { 0x4287, UnsignedInt }, + DocTypeReadVersion: { 0x4285, UnsignedInt }, + ], + }, + DocTypeExtension: { + id: 0x4281, + children: [ + DocTypeExtensionName: { 0x4283, String }, + DocTypeExtensionVersion: { 0x4284, UnsignedInt }, + ], + }, + + // The Root Element that contains all other Top-Level Elements + Segment: { + id: 0x1853_8067, + children: [ + // SeekHead: { 0x114D_9B74, Master }, + Info: { 0x1549_A966, Master }, + Cluster: { 0x1F43_B675, Master }, + Tracks: { 0x1654_AE6B, Master }, + Tags: { 0x1254_C367, Master }, + Attachments: { 0x1941_A469, Master }, + Chapters: { 0x1043_A770, Master }, + ], + }, + + // segment.seekHead + // SeekHead: { + // id: 0x114D_9B74, + // children: [ + // Seek: { 0x4DBB, Master }, + // ], + // }, + + // segment.info + Info: { + id: 0x1549_A966, + children: [ + TimecodeScale: { 0x2AD7_B1, UnsignedInt }, + MuxingApp: { 0x4D80, Utf8 }, + WritingApp: { 0x5741, Utf8 }, + Duration: { 0x4489, Float }, + ], + }, + + // segment.cluster + Cluster: { + id: 0x1F43_B675, + children: [ + Timestamp: { 0xE7, UnsignedInt }, + SimpleBlock: { 0xA3, Binary }, + BlockGroup: { 0xA0, Master }, + ], + }, + + // segment.cluster.blockGroup + BlockGroup: { + id: 0xA0, + children: [ + Block: { 0xA1, Binary }, + ] + }, + + // segment.tracks + Tracks: { + id: 0x1654_AE6B, + children: [ + TrackEntry: { 0xAE, Master }, + ], + }, + + // segment.tracks.trackEntry + TrackEntry: { + id: 0xAE, + children: [ + TrackNumber: { 0xD7, UnsignedInt }, + TrackUid: { 0x73C5, UnsignedInt }, + TrackType: { 0x83, UnsignedInt }, + FlagEnabled: { 0xB9, UnsignedInt }, + FlagDefault: { 0x88, UnsignedInt }, + DefaultDuration: { 0x23E3_83, UnsignedInt }, + TrackTimecodeScale: { 0x2331_59, Float }, + Language: { 0x22B5_9C, String }, + LanguageBCP47: { 0x22B59D, String }, + CodecID: { 0x86, String }, + CodecPrivate: { 0x63A2, Binary }, + CodecName: { 0x258688, Utf8 }, + CodecDelay: { 0x56AA, UnsignedInt }, + SeekPreRoll: { 0x56BB, UnsignedInt }, + Audio: { 0xE1, Master }, + ], + }, + + // segment.tracks.trackEntry.audio + Audio: { + id: 0xE1, + children: [ + SamplingFrequency: { 0xB5, Float }, + OutputSamplingFrequency: { 0x78B5, Float }, + Channels: { 0x9F, UnsignedInt }, + BitDepth: { 0x6264, UnsignedInt }, + Emphasis: { 0x52F1, UnsignedInt }, + ], + }, + + + // segment.tags + Tags: { + id: 0x1254_C367, + children: [ + Tag: { 0x7373, Master }, + ], + }, + + // segment.tags.tag + Tag: { + id: 0x7373, + children: [ + Targets: { 0x63C0, Master }, + SimpleTag: { 0x67C8, Master }, + ], + }, + + // segment.tags.tag.targets + Targets: { + id: 0x63C0, + children: [ + TargetTypeValue: { 0x68CA, UnsignedInt }, + TargetType: { 0x63CA, String }, + TagTrackUID: { 0x63C5, UnsignedInt }, + TagEditionUID: { 0x63C9, UnsignedInt }, + TagChapterUID: { 0x63C4, UnsignedInt }, + TagAttachmentUID: { 0x63C6, UnsignedInt }, + ], + }, + + // segment.tags.tag.simpleTag + SimpleTag: { + id: 0x67C8, + children: [ + TagName: { 0x45A3, Utf8 }, + TagLanguage: { 0x447A, String }, + TagLanguageBCP47: { 0x447B, String }, + TagDefault: { 0x4484, UnsignedInt }, + TagDefaultBogus: { 0x44B4, UnsignedInt }, + TagString: { 0x4487, Utf8 }, + TagBinary: { 0x4485, Binary }, + ], + }, + + // segment.attachments + Attachments: { + id: 0x1941_A469, + children: [ + AttachedFile: { 0x61A7, Master }, + ], + }, + + // segment.attachments.attachedFile + AttachedFile: { + id: 0x61A7, + children: [ + FileDescription: { 0x467E, String }, + FileName: { 0x466E, Utf8 }, + FileMimeType: { 0x4660, String }, + FileData: { 0x465C, Binary }, + FileUID: { 0x46AE, UnsignedInt }, + FileReferral: { 0x4675, Binary }, + FileUsedStartTime: { 0x4661, UnsignedInt }, + FileUsedEndTime: { 0x4662, UnsignedInt }, + ], + }, +} + +const MAX_DEPTH: u8 = 16; +const ROOT_DEPTH: u8 = 1; + +#[derive(Copy, Clone, Debug)] +struct Depth { + level: u8, + length: VInt, +} + +#[derive(Copy, Clone, Debug)] +struct MasterElementContext { + element: MasterElement, + depth: Depth, +} + +#[derive(Debug)] +struct ElementReaderContext { + depth: u8, + masters: Vec, + /// Maximum size in octets of all element IDs + max_id_length: u8, + /// Maximum size in octets of all element data sizes + max_size_length: u8, + /// Whether the reader is locked to the master element at `lock_depth` + /// + /// This is set with [`ElementReader::lock`], and is used to prevent + /// the reader from reading past the end of the current master element. + locked: bool, + /// The depths at which we are locked + /// + /// When we reach the end of one lock and unlock the reader, we need + /// to know which depth to lock the reader at again (if any). + /// + /// This will **always** be sorted, so the current lock will be at the end. + lock_depths: Vec, +} + +impl Default for ElementReaderContext { + fn default() -> Self { + Self { + depth: 0, + masters: Vec::with_capacity(MAX_DEPTH as usize), + // https://www.rfc-editor.org/rfc/rfc8794.html#name-ebmlmaxidlength-element + max_id_length: 4, + // https://www.rfc-editor.org/rfc/rfc8794.html#name-ebmlmaxsizelength-element + max_size_length: 8, + locked: false, + lock_depths: Vec::with_capacity(MAX_DEPTH as usize), + } + } +} + +impl ElementReaderContext { + fn current_master(&self) -> Option { + if self.depth == 0 { + return None; + } + + self.masters.get((self.depth - 1) as usize).copied() + } + + fn current_master_length(&self) -> VInt { + assert!(self.depth > 0); + self.current_master() + .expect("should have current master element") + .depth + .length + } + + fn propagate_length_change(&mut self, length: u64) { + for master in &mut self.masters { + master.depth.length = master.depth.length.saturating_sub(length); + } + } + + fn remaining_lock_length(&self) -> VInt { + assert!(self.locked && !self.lock_depths.is_empty()); + + let lock_depth = *self.lock_depths.last().unwrap(); + self.masters[lock_depth - 1].depth.length + } +} + +#[derive(Debug)] +pub(crate) enum ElementReaderYield { + Master((ElementIdent, VInt)), + Child((ChildElementDescriptor, VInt)), + Unknown(ElementHeader), + Eof, +} + +impl ElementReaderYield { + pub fn ident(&self) -> Option { + match self { + ElementReaderYield::Master((ident, _)) => Some(*ident as u64), + ElementReaderYield::Child((child, _)) => Some(child.ident as u64), + ElementReaderYield::Unknown(header) => Some(header.id.value()), + _ => None, + } + } + + pub fn size(&self) -> Option { + match self { + ElementReaderYield::Master((_, size)) | ElementReaderYield::Child((_, size)) => { + Some(size.value()) + }, + ElementReaderYield::Unknown(header) => Some(header.size.value()), + _ => None, + } + } +} + +/// An EBML element reader. +pub struct ElementReader { + reader: R, + pub(self) ctx: ElementReaderContext, +} + +impl Read for ElementReader +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.ctx.locked { + let lock_len = self.ctx.remaining_lock_length().value(); + if buf.len() > lock_len as usize { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "Cannot read past the end of the current master element", + )); + } + } + + let ret = self.reader.read(buf)?; + if self.ctx.current_master().is_none() { + return Ok(ret); + } + + self.ctx.propagate_length_change(ret as u64); + + let current_master = self + .ctx + .current_master() + .expect("should have current master element"); + if current_master.depth.length == 0 { + self.goto_previous_master()?; + } + + Ok(ret) + } +} + +impl ElementReader +where + R: Read, +{ + pub(crate) fn new(reader: R) -> Self { + Self { + reader, + ctx: ElementReaderContext::default(), + } + } + + pub(crate) fn set_max_id_length(&mut self, len: u8) { + self.ctx.max_id_length = len + } + + pub(crate) fn set_max_size_length(&mut self, len: u8) { + self.ctx.max_size_length = len + } + + fn push_new_master(&mut self, master: MasterElement, size: VInt) -> Result<()> { + log::debug!("New master element: {:?}", master.id); + + if self.ctx.depth == MAX_DEPTH { + decode_err!(@BAIL Ebml, "Maximum depth reached"); + } + + // If we are at the root level, we do not increment the depth + // since we are not actually inside a master element. + // For example, we are moving from \EBML to \Segment. + let at_root_level = self.ctx.depth == ROOT_DEPTH && self.ctx.current_master_length() == 0; + if at_root_level { + assert_eq!(self.ctx.masters.len(), 1); + self.ctx.masters.clear(); + } else { + self.ctx.depth += 1; + } + + self.ctx.masters.push(MasterElementContext { + element: master, + depth: Depth { + level: self.ctx.depth, + length: size, + }, + }); + + Ok(()) + } + + fn goto_previous_master(&mut self) -> io::Result<()> { + let lock_depth = self + .ctx + .lock_depths + .last() + .copied() + .unwrap_or(ROOT_DEPTH as usize); + if lock_depth == self.ctx.depth as usize || self.ctx.depth == 0 { + return Ok(()); + } + + if self.ctx.depth == ROOT_DEPTH { + return Err(io::Error::new( + io::ErrorKind::Other, + "Cannot go to previous master element, already at root", + )); + } + + while self.ctx.current_master_length() == 0 + && (self.ctx.depth as usize != lock_depth && self.ctx.depth != ROOT_DEPTH) + { + self.ctx.depth -= 1; + let _ = self.ctx.masters.pop(); + } + + Ok(()) + } + + fn goto_next_master(&mut self) -> Result { + self.exhaust_current_master()?; + + let header = ElementHeader::read(self, self.ctx.max_id_length, self.ctx.max_size_length)?; + let Some(master) = master_elements().get(&header.id) else { + // We encountered an unknown master element + return Ok(ElementReaderYield::Unknown(header)); + }; + + self.push_new_master(*master, header.size)?; + + Ok(ElementReaderYield::Master((master.id, header.size))) + } + + pub(crate) fn next(&mut self) -> Result { + let Some(current_master) = self.ctx.current_master() else { + return self.goto_next_master(); + }; + + if self.ctx.locked && self.ctx.remaining_lock_length() == 0 { + return Ok(ElementReaderYield::Eof); + } + + if current_master.depth.length == 0 { + return self.goto_next_master(); + } + + let header = ElementHeader::read(self, self.ctx.max_id_length, self.ctx.max_size_length)?; + + let Some((_, child)) = current_master + .element + .children + .iter() + .find(|(id, _)| *id == header.id) + else { + return Ok(ElementReaderYield::Unknown(header)); + }; + + if child.data_type == ElementDataType::Master { + let master = *master_elements() + .get(&header.id) + .expect("Nested master elements should be defined at this level."); + + self.push_new_master(master, header.size)?; + + // We encountered a nested master element + return Ok(ElementReaderYield::Master((child.ident, header.size))); + } + + Ok(ElementReaderYield::Child((*child, header.size))) + } + + pub(crate) fn exhaust_current_master(&mut self) -> Result<()> { + let Some(current_master) = self.ctx.current_master() else { + return Ok(()); + }; + + self.skip(current_master.depth.length.value())?; + Ok(()) + } + + pub(crate) fn lock(&mut self) { + log::trace!("New lock at depth: {}", self.ctx.depth); + + self.ctx.locked = true; + self.ctx.lock_depths.push(self.ctx.depth as usize); + } + + pub(crate) fn unlock(&mut self) { + let _ = self.ctx.lock_depths.pop(); + + let [.., last] = &*self.ctx.lock_depths else { + // We can only ever *truly* unlock if we are at the root level. + log::trace!("Lock freed"); + + self.ctx.locked = false; + return; + }; + + log::trace!("Moving lock to depth: {}", last); + } + + pub(crate) fn children(&mut self) -> ElementChildIterator<'_, R> { + self.lock(); + ElementChildIterator::new(self) + } + + pub(crate) fn skip(&mut self, length: u64) -> Result<()> { + log::trace!("Skipping {} bytes", length); + + let current_master_length = self.ctx.current_master_length(); + if length > current_master_length.value() { + decode_err!(@BAIL Ebml, "Cannot skip past the end of the current master element") + } + + std::io::copy(&mut self.by_ref().take(length), &mut io::sink())?; + Ok(()) + } + + pub(crate) fn skip_element(&mut self, element_header: ElementHeader) -> Result<()> { + log::debug!( + "Encountered unknown EBML element: {:X}, skipping", + element_header.id.0 + ); + self.skip(element_header.size.value())?; + Ok(()) + } + + pub(crate) fn read_signed_int(&mut self, element_length: u64) -> Result { + // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.1 + // A Signed Integer Element MUST declare a length from zero to eight octets + if element_length > 8 { + decode_err!(@BAIL Ebml, "Invalid size for signed int element") + } + + let mut buf = [0; 8]; + self.read_exact(&mut buf[8 - element_length as usize..])?; + let value = u64::from_be_bytes(buf); + + // Signed Integers are stored with two's complement notation with the leftmost bit being the sign bit. + let value_width = element_length * 8; + let shift = (64 - value_width) as u32; + Ok((value.wrapping_shl(shift) as i64).wrapping_shr(shift)) + } + + pub(crate) fn read_unsigned_int(&mut self, element_length: u64) -> Result { + // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.2 + // An Unsigned Integer Element MUST declare a length from zero to eight octets + if element_length > 8 { + decode_err!(@BAIL Ebml, "Invalid size for unsigned int element") + } + + let mut buf = [0; 8]; + self.read_exact(&mut buf[8 - element_length as usize..])?; + Ok(u64::from_be_bytes(buf)) + } + + /// Same as `read_unsigned_int`, but will warn if the value is out of range. + pub(crate) fn read_flag(&mut self, element_length: u64) -> Result { + let val = self.read_unsigned_int(element_length)?; + if val > 1 { + log::warn!("Flag value `{}` is out of range, assuming true", val); + } + + Ok(val != 0) + } + + pub(crate) fn read_float(&mut self, element_length: u64) -> Result { + // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.3 + // A Float Element MUST declare a length of either zero octets (0 bit), + // four octets (32 bit), or eight octets (64 bit) + Ok(match element_length { + 0 => 0.0, + 4 => f64::from(self.read_f32::()?), + 8 => self.read_f64::()?, + _ => decode_err!(@BAIL Ebml, "Invalid size for float element"), + }) + } + + pub(crate) fn read_string(&mut self, element_length: u64) -> Result { + // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.4 + // A String Element MUST declare a length in octets from zero to VINTMAX + let mut content = try_vec![0; element_length as usize]; + self.read_exact(&mut content)?; + + // https://www.rfc-editor.org/rfc/rfc8794.html#section-13 + // Null Octets, which are octets with all bits set to zero, + // MAY follow the value of a String Element or UTF-8 Element to serve as a terminator. + if let Some(i) = content.iter().rposition(|x| *x != 0) { + let new_len = i + 1; + content.truncate(new_len); + } + + String::from_utf8(content).map_err(Into::into) + } + + pub(crate) fn read_utf8(&mut self, element_length: u64) -> Result { + // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.5 + // A UTF-8 Element MUST declare a length in octets from zero to VINTMAX + + // Since the UTF-8 and String elements are both just turned into `String`s, + // we can just reuse the `read_string` method. + self.read_string(element_length) + } + + pub(crate) fn read_date(&mut self) -> Result { + todo!() + } + + pub(crate) fn read_binary(&mut self, element_length: u64) -> Result> { + // https://www.rfc-editor.org/rfc/rfc8794.html#section-7.8 + // A Binary Element MUST declare a length in octets from zero to VINTMAX. + + if element_length > VInt::::MAX { + decode_err!(@BAIL Ebml, "Binary element length is too large") + } + + let mut content = try_vec![0; element_length as usize]; + self.read_exact(&mut content)?; + Ok(content) + } +} + +/// An iterator over the children of an EBML master element. +/// +/// This is created by calling [`ElementReader::children`]. +/// +/// This is essentially a fancy wrapper around `ElementReader` that: +/// +/// * Automatically skips unknown elements ([`ElementReaderYield::Unknown`]). +/// * [`Deref`]s to `ElementReader` so you can access the reader's methods. +/// * Unlocks the reader when dropped. +/// * If the reader is locked at multiple depths (meaning [`ElementReader::children`] was called +/// multiple times), it will move the lock to the previously locked depth. +pub(crate) struct ElementChildIterator<'a, R> +where + R: Read, +{ + reader: &'a mut ElementReader, +} + +impl<'a, R> ElementChildIterator<'a, R> +where + R: Read, +{ + pub(crate) fn new(reader: &'a mut ElementReader) -> Self { + Self { reader } + } + + pub(crate) fn next(&mut self) -> Result> { + match self.reader.next() { + Ok(ElementReaderYield::Unknown(header)) => { + self.reader.skip_element(header)?; + self.next() + }, + Err(e) => Err(e), + element => element.map(Some), + } + } + + pub(crate) fn master_exhausted(&self) -> bool { + let lock_depth = *self + .reader + .ctx + .lock_depths + .last() + .expect("a child iterator should always have a lock depth"); + assert!(lock_depth <= self.reader.ctx.depth as usize); + + self.reader.ctx.remaining_lock_length() == 0 + } +} + +impl Read for ElementChildIterator<'_, R> +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.reader.read(buf) + } +} + +impl Deref for ElementChildIterator<'_, R> +where + R: Read, +{ + type Target = ElementReader; + + fn deref(&self) -> &Self::Target { + self.reader + } +} + +impl DerefMut for ElementChildIterator<'_, R> +where + R: Read, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + self.reader + } +} + +impl Drop for ElementChildIterator<'_, R> +where + R: Read, +{ + fn drop(&mut self) { + self.reader.unlock(); + } +} diff --git a/lofty/src/ebml/mod.rs b/lofty/src/ebml/mod.rs new file mode 100644 index 000000000..ce5a179f6 --- /dev/null +++ b/lofty/src/ebml/mod.rs @@ -0,0 +1,26 @@ +//! EBML specific items +mod element_reader; +mod properties; +mod read; +pub(crate) mod tag; +mod vint; + +use lofty_attr::LoftyFile; + +// Exports + +pub use properties::*; +pub use tag::*; +pub use vint::*; + +/// An EBML file +#[derive(LoftyFile, Default)] +#[lofty(read_fn = "read::read_from")] +#[lofty(internal_write_module_do_not_use_anywhere_else)] +pub struct EbmlFile { + /// An EBML tag + #[lofty(tag_type = "Matroska")] + pub(crate) ebml_tag: Option, + /// The file's audio properties + pub(crate) properties: EbmlProperties, +} diff --git a/lofty/src/ebml/properties.rs b/lofty/src/ebml/properties.rs new file mode 100644 index 000000000..6d7e8105f --- /dev/null +++ b/lofty/src/ebml/properties.rs @@ -0,0 +1,404 @@ +use super::Language; +use crate::properties::FileProperties; + +use std::time::Duration; + +/// Properties from the EBML header +/// +/// These are present for all EBML formats. +#[derive(Debug, Clone, PartialEq, Default)] +pub struct EbmlHeaderProperties { + pub(crate) version: u64, + pub(crate) read_version: u64, + pub(crate) max_id_length: u8, + pub(crate) max_size_length: u8, + pub(crate) doc_type: String, + pub(crate) doc_type_version: u64, + pub(crate) doc_type_read_version: u64, +} + +impl EbmlHeaderProperties { + /// The EBML version, should be `1` + pub fn version(&self) -> u64 { + self.version + } + + /// The minimum EBML version required to read the file, <= [`Self::version()`] + pub fn read_version(&self) -> u64 { + self.read_version + } + + /// The maximum length of an EBML element ID, in octets + pub fn max_id_length(&self) -> u8 { + self.max_id_length + } + + /// The maximum length of an EBML element size, in octets + pub fn max_size_length(&self) -> u8 { + self.max_size_length + } + + /// A string that describes the type of document + pub fn doc_type(&self) -> &str { + &self.doc_type + } + + /// The version of DocType interpreter used to create the EBML Document + pub fn doc_type_version(&self) -> u64 { + self.doc_type_version + } + + /// The minimum DocType interpreter version needed to read the EBML Document + pub fn doc_type_read_version(&self) -> u64 { + self.doc_type_read_version + } +} + +/// An EBML DocType extension +#[derive(Debug, Clone, PartialEq, Default)] +pub struct EbmlExtension { + pub(crate) name: String, + pub(crate) version: u64, +} + +impl EbmlExtension { + /// The name of the extension + pub fn name(&self) -> &str { + &self.name + } + + /// The version of the extension + pub fn version(&self) -> u64 { + self.version + } +} + +/// Information about a segment +#[derive(Debug, Clone, PartialEq)] +pub struct SegmentInfo { + pub(crate) timestamp_scale: u64, + pub(crate) muxing_app: String, + pub(crate) writing_app: String, + pub(crate) duration: Option, +} + +impl SegmentInfo { + /// Base unit for Segment Ticks and Track Ticks, in nanoseconds. + /// + /// A TimestampScale value of 1000000 means scaled timestamps in the Segment are expressed in milliseconds. + pub fn timestamp_scale(&self) -> u64 { + self.timestamp_scale + } + + /// Muxing application or library (example: "libmatroska-0.4.3"). + /// + /// Includes the full name of the application or library followed by the version number. + pub fn muxing_app(&self) -> &str { + &self.muxing_app + } + + /// Writing application (example: "mkvmerge-0.3.3"). + /// + /// Includes the full name of the application followed by the version number. + pub fn writing_app(&self) -> &str { + &self.writing_app + } + + /// The duration of the segment + /// + /// NOTE: This information is not always present in the segment, in which case + /// [`EbmlProperties::duration`] should be used. + pub fn duration(&self) -> Option { + self.duration + } +} + +impl Default for SegmentInfo { + fn default() -> Self { + Self { + // https://matroska.org/technical/elements.html + timestamp_scale: 1_000_000, + muxing_app: String::new(), + writing_app: String::new(), + duration: None, + } + } +} + +/// A full descriptor for an audio track +#[derive(Debug, Clone, PartialEq)] +pub struct AudioTrackDescriptor { + pub(crate) number: u64, + pub(crate) uid: u64, + pub(crate) enabled: bool, + pub(crate) default: bool, + pub(crate) language: Language, + pub(crate) default_duration: u64, + pub(crate) codec_id: String, + pub(crate) codec_private: Option>, + pub(crate) codec_name: Option, + pub(crate) settings: AudioTrackSettings, +} + +impl Default for AudioTrackDescriptor { + fn default() -> Self { + AudioTrackDescriptor { + // Note, these values are not spec compliant and will hopefully be overwritten when + // parsing. It doesn't really matter though, since we aren't an encoder. + number: 0, + uid: 0, + default_duration: 0, + codec_id: String::new(), + + // Spec-compliant defaults + enabled: true, + default: true, + language: Language::Iso639_2(String::from("eng")), + codec_private: None, + codec_name: None, + settings: AudioTrackSettings::default(), + } + } +} + +impl AudioTrackDescriptor { + /// The track number + pub fn number(&self) -> u64 { + self.number + } + + /// A unique ID to identify the track + pub fn uid(&self) -> u64 { + self.uid + } + + /// Whether the track is usable + pub fn is_enabled(&self) -> bool { + self.enabled + } + + /// Whether the track is eligible for automatic selection + pub fn is_default(&self) -> bool { + self.default + } + + /// The language of the track, in the Matroska languages form + /// + /// NOTE: See [basics](https://matroska.org/technical/basics.html#language-codes) on language codes. + pub fn language(&self) -> &Language { + &self.language + } + + /// The default duration of the track + pub fn default_duration(&self) -> u64 { + self.default_duration + } + + /// The codec ID of the track + /// + /// NOTE: See [Matroska codec RFC] for more info. + /// + /// [Matroska codec RFC]: https://matroska.org/technical/codec_specs.html + pub fn codec_id(&self) -> &str { + &self.codec_id + } + + /// Private data only known to the codec + pub fn codec_private(&self) -> Option<&[u8]> { + self.codec_private.as_deref() + } + + /// A human-readable string for the [codec_id](AudioTrackDescriptor::codec_id) + pub fn codec_name(&self) -> Option<&str> { + self.codec_name.as_deref() + } + + /// The audio settings of the track + pub fn settings(&self) -> &AudioTrackSettings { + &self.settings + } +} + +/// Settings for an audio track +#[derive(Debug, Clone, PartialEq, Default)] +pub struct AudioTrackSettings { + // Provided to us for free + pub(crate) sampling_frequency: f64, + pub(crate) output_sampling_frequency: f64, + pub(crate) channels: u8, + pub(crate) bit_depth: Option, + pub(crate) emphasis: Option, + + // Need to be calculated + pub(crate) bitrate: Option, +} + +impl AudioTrackSettings { + /// The sampling frequency of the track + pub fn sampling_frequency(&self) -> f64 { + self.sampling_frequency + } + + /// Real output sampling frequency in Hz (used for SBR techniques). + /// + /// The default value for `output_sampling_frequency` of the same TrackEntry is equal to the [`Self::sampling_frequency`]. + pub fn output_sampling_frequency(&self) -> f64 { + self.output_sampling_frequency + } + + /// The number of channels in the track + pub fn channels(&self) -> u8 { + self.channels + } + + /// The bit depth of the track + pub fn bit_depth(&self) -> Option { + self.bit_depth + } + + /// Audio emphasis applied on audio samples + pub fn emphasis(&self) -> Option { + self.emphasis + } +} + +/// A rarely-used decoder hint that the file must be de-emphasized +#[allow(missing_docs)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum EbmlAudioTrackEmphasis { + CdAudio = 1, + Reserved = 2, + CcitJ17 = 3, + Fm50 = 4, + Fm75 = 5, + PhonoRiaa = 10, + PhonoIecN78 = 11, + PhonoTeldec = 12, + PhonoEmi = 13, + PhonoColumbiaLp = 14, + PhonoLondon = 15, + PhonoNartb = 16, +} + +impl EbmlAudioTrackEmphasis { + /// Get the audio emphasis from a `u8` + pub fn from_u8(value: u8) -> Option { + match value { + 1 => Some(Self::CdAudio), + 2 => Some(Self::Reserved), + 3 => Some(Self::CcitJ17), + 4 => Some(Self::Fm50), + 5 => Some(Self::Fm75), + 10 => Some(Self::PhonoRiaa), + 11 => Some(Self::PhonoIecN78), + 12 => Some(Self::PhonoTeldec), + 13 => Some(Self::PhonoEmi), + 14 => Some(Self::PhonoColumbiaLp), + 15 => Some(Self::PhonoLondon), + 16 => Some(Self::PhonoNartb), + _ => None, + } + } +} + +/// EBML audio properties +#[derive(Debug, Clone, PartialEq, Default)] +pub struct EbmlProperties { + pub(crate) header: EbmlHeaderProperties, + pub(crate) extensions: Vec, + pub(crate) segment_info: SegmentInfo, + pub(crate) audio_tracks: Vec, +} + +impl EbmlProperties { + /// The EBML header properties + /// + /// This includes the properties that are part of the EBML base specification. + /// All Matroska-specific properties are in [`Self::segment_info`] and [`Self::default_audio_track`]. + pub fn header(&self) -> &EbmlHeaderProperties { + &self.header + } + + /// The DocType extensions + pub fn extensions(&self) -> &[EbmlExtension] { + &self.extensions + } + + /// Information from the `\Segment\Info` element + pub fn segment_info(&self) -> &SegmentInfo { + &self.segment_info + } + + /// All audio tracks in the file + /// + /// This includes all audio tracks in the Matroska `\Segment\Tracks` element. + pub fn audio_tracks(&self) -> &[AudioTrackDescriptor] { + &self.audio_tracks + } + + /// Information about the default audio track + /// + /// The "default" track is selected as: + /// 1. The first audio track with its `default` flag set + /// 2. If 1 fails, just grab the first audio track with its `enabled` flag set + pub fn default_audio_track(&self) -> Option<&AudioTrackDescriptor> { + if let Some(position) = self.default_audio_track_position() { + return self.audio_tracks.get(position); + } + + None + } + + // TODO: Actually calculate from cluster + /// The duration of the default audio track + /// + /// NOTE: see [`EbmlProperties::default_audio_track`] + /// + /// This will always use the duration written in `\Segment\Info` if present. Otherwise, it will + /// be manually calculated using `\Segment\Cluster` data. + pub fn duration(&self) -> Duration { + self.segment_info.duration().unwrap() + } + + /// Audio bitrate (kbps) + /// + /// NOTE: This is the bitrate of the default audio track see [`EbmlProperties::default_audio_track`] + /// for what this means. + pub fn bitrate(&self) -> Option { + self.default_audio_track() + .and_then(|track| track.settings.bitrate) + } + + pub(crate) fn default_audio_track_position(&self) -> Option { + self.audio_tracks + .iter() + .position(|track| track.default) + .or_else(|| { + // Otherwise, it's normal to just pick the first enabled track + self.audio_tracks.iter().position(|track| track.enabled) + }) + } +} + +impl From for FileProperties { + fn from(input: EbmlProperties) -> Self { + let Some(default_audio_track) = input.default_audio_track() else { + let mut properties = FileProperties::default(); + if let Some(duration) = input.segment_info.duration { + properties.duration = duration; + } + + return properties; + }; + + Self { + duration: input.duration(), + overall_bitrate: input.bitrate(), + audio_bitrate: input.bitrate(), + sample_rate: Some(default_audio_track.settings.sampling_frequency as u32), + bit_depth: default_audio_track.settings.bit_depth, + channels: Some(default_audio_track.settings.channels), + channel_mask: None, // TODO: Will require reading into track data + } + } +} diff --git a/lofty/src/ebml/read.rs b/lofty/src/ebml/read.rs new file mode 100644 index 000000000..0b0325058 --- /dev/null +++ b/lofty/src/ebml/read.rs @@ -0,0 +1,169 @@ +mod segment; +mod segment_attachments; +mod segment_chapters; +mod segment_cluster; +mod segment_info; +mod segment_tags; +mod segment_tracks; + +use super::EbmlFile; +use crate::config::ParseOptions; +use crate::ebml::element_reader::{ElementHeader, ElementIdent, ElementReader, ElementReaderYield}; +use crate::ebml::vint::ElementId; +use crate::ebml::EbmlProperties; +use crate::error::Result; +use crate::macros::decode_err; + +use std::io::{Read, Seek}; + +const SUPPORTED_DOC_TYPES: &[&str] = &["matroska", "webm"]; + +const CRC32_ID: ElementId = ElementId(0xBF); +const VOID_ID: ElementId = ElementId(0xEC); + +pub(super) fn read_from(reader: &mut R, parse_options: ParseOptions) -> Result +where + R: Read + Seek, +{ + // Default initialize the properties up here since we end up discovering + // new ones all scattered throughout the file + let mut properties = EbmlProperties::default(); + + let ebml_tag; + + let mut element_reader = ElementReader::new(reader); + + // First we need to go through the elements in the EBML master element + read_ebml_header(&mut element_reader, parse_options, &mut properties)?; + + log::debug!("File verified to be EBML"); + + loop { + let res = element_reader.next()?; + match res { + ElementReaderYield::Master((ElementIdent::Segment, _)) => { + ebml_tag = segment::read_from(&mut element_reader, parse_options, &mut properties)?; + break; + }, + // CRC-32 (0xBF) and Void (0xEC) elements can occur at the top level. + // This is valid, and we can just skip them. + ElementReaderYield::Unknown(ElementHeader { + id: id @ (CRC32_ID | VOID_ID), + size, + }) => { + log::debug!("Skipping global element: {:X}", id); + element_reader.skip(size.value())?; + continue; + }, + _ => { + decode_err!(@BAIL Ebml, "File does not contain a segment element") + }, + } + } + + Ok(EbmlFile { + ebml_tag, + properties, + }) +} + +fn read_ebml_header( + element_reader: &mut ElementReader, + parse_options: ParseOptions, + properties: &mut EbmlProperties, +) -> Result<()> +where + R: Read + Seek, +{ + log::trace!("Reading EBML header"); + + match element_reader.next() { + Ok(ElementReaderYield::Master((ElementIdent::EBML, _))) => {}, + Ok(_) => decode_err!(@BAIL Ebml, "File does not start with an EBML master element"), + Err(e) => return Err(e), + } + + let mut child_reader = element_reader.children(); + while let Some(child) = child_reader.next()? { + let ident; + let size; + + match child { + // The only expected master element in the header is `DocTypeExtension` + ElementReaderYield::Master((ElementIdent::DocTypeExtension, size)) => { + child_reader.skip(size.value())?; + continue; + }, + ElementReaderYield::Master(_) => { + decode_err!( + @BAIL Ebml, + "Unexpected master element in the EBML header" + ); + }, + ElementReaderYield::Child((child, size_)) => { + ident = child.ident; + size = size_; + }, + ElementReaderYield::Unknown(header) => { + child_reader.skip_element(header)?; + continue; + }, + ElementReaderYield::Eof => break, + } + + if ident == ElementIdent::EBMLMaxIDLength { + properties.header.max_id_length = child_reader.read_unsigned_int(size.value())? as u8; + child_reader.set_max_id_length(properties.header.max_id_length); + continue; + } + + if ident == ElementIdent::EBMLMaxSizeLength { + properties.header.max_size_length = child_reader.read_unsigned_int(size.value())? as u8; + child_reader.set_max_size_length(properties.header.max_size_length); + continue; + } + + if ident == ElementIdent::DocType { + properties.header.doc_type = child_reader.read_string(size.value())?; + if !SUPPORTED_DOC_TYPES.contains(&properties.header.doc_type.as_str()) { + decode_err!( + @BAIL Ebml, + "Unsupported EBML DocType" + ); + } + + continue; + } + + // Anything else in the header is unnecessary, and only read for the properties + // struct + if !parse_options.read_properties { + child_reader.skip(size.value())?; + continue; + } + + match ident { + ElementIdent::EBMLVersion => { + properties.header.version = child_reader.read_unsigned_int(size.value())? + }, + ElementIdent::EBMLReadVersion => { + properties.header.read_version = child_reader.read_unsigned_int(size.value())? + }, + ElementIdent::DocTypeVersion => { + properties.header.doc_type_version = child_reader.read_unsigned_int(size.value())? + }, + _ => child_reader.skip(size.value())?, + } + } + + debug_assert!( + child_reader.master_exhausted(), + "There should be no remaining elements in the header" + ); + + if properties.header.doc_type.is_empty() { + decode_err!(@BAIL Ebml, "Unable to determine EBML DocType"); + } + + Ok(()) +} diff --git a/lofty/src/ebml/read/segment.rs b/lofty/src/ebml/read/segment.rs new file mode 100644 index 000000000..c2718f2de --- /dev/null +++ b/lofty/src/ebml/read/segment.rs @@ -0,0 +1,87 @@ +use super::{segment_attachments, segment_cluster, segment_info, segment_tags, segment_tracks}; +use crate::config::ParseOptions; +use crate::ebml::element_reader::{ElementHeader, ElementIdent, ElementReader, ElementReaderYield}; +use crate::ebml::properties::EbmlProperties; +use crate::ebml::tag::MatroskaTag; +use crate::ebml::ElementId; +use crate::error::Result; + +use std::io::{Read, Seek}; + +pub(super) fn read_from( + element_reader: &mut ElementReader, + parse_options: ParseOptions, + properties: &mut EbmlProperties, +) -> Result> +where + R: Read + Seek, +{ + let mut tags = None; + let mut children_reader = element_reader.children(); + + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Master((id, size)) => { + match id { + ElementIdent::Info if parse_options.read_properties => { + segment_info::read_from( + &mut children_reader.children(), + parse_options, + properties, + )?; + }, + ElementIdent::Cluster if parse_options.read_properties => { + segment_cluster::read_from( + &mut children_reader.children(), + parse_options, + properties, + )? + }, + ElementIdent::Tracks if parse_options.read_properties => { + segment_tracks::read_from( + &mut children_reader.children(), + parse_options, + properties, + )?; + }, + // TODO: ElementIdent::Chapters + ElementIdent::Tags if parse_options.read_tags => { + let mut tag = tags.unwrap_or_default(); + + segment_tags::read_from( + &mut children_reader.children(), + parse_options, + &mut tag, + )?; + + tags = Some(tag); + }, + ElementIdent::Attachments if parse_options.read_cover_art => { + let mut tag = tags.unwrap_or_default(); + + segment_attachments::read_from( + &mut children_reader.children(), + parse_options, + &mut tag, + )?; + + tags = Some(tag); + }, + _ => { + // We do not end up using information from all of the segment + // elements, so we can just skip any useless ones. + + children_reader.skip_element(ElementHeader { + id: ElementId(id as u64), + size, + })?; + }, + } + }, + ElementReaderYield::Eof => break, + _ => unreachable!("Unhandled child element in \\Segment: {child:?}"), + } + } + + Ok(tags) +} diff --git a/lofty/src/ebml/read/segment_attachments.rs b/lofty/src/ebml/read/segment_attachments.rs new file mode 100644 index 000000000..4db7a2865 --- /dev/null +++ b/lofty/src/ebml/read/segment_attachments.rs @@ -0,0 +1,118 @@ +use crate::config::ParseOptions; +use crate::ebml::element_reader::{ + ElementChildIterator, ElementIdent, ElementReader, ElementReaderYield, +}; +use crate::ebml::{AttachedFile, MatroskaTag}; +use crate::error::Result; +use crate::macros::decode_err; +use crate::picture::MimeType; + +use std::borrow::Cow; +use std::io::{Read, Seek}; + +pub(super) fn read_from( + children_reader: &mut ElementChildIterator<'_, R>, + _parse_options: ParseOptions, + tag: &mut MatroskaTag, +) -> Result<()> +where + R: Read + Seek, +{ + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Master((ElementIdent::AttachedFile, _size)) => { + let attached_file = read_attachment(children_reader)?; + tag.attached_files.push(attached_file); + }, + ElementReaderYield::Eof => break, + _ => unreachable!("Unhandled child element in \\Segment\\Attachments: {child:?}"), + } + } + + Ok(()) +} + +fn read_attachment(element_reader: &mut ElementReader) -> Result> +where + R: Read + Seek, +{ + let mut description = None; + let mut file_name = None; + let mut mime_type = None; + let mut file_data = None; + let mut uid = None; + let mut referral = None; + let mut used_start_time = None; + let mut used_end_time = None; + + let mut children_reader = element_reader.children(); + while let Some(child) = children_reader.next()? { + let ElementReaderYield::Child((child, size)) = child else { + match child { + ElementReaderYield::Eof => break, + _ => unreachable!( + "Unhandled child element in \\Segment\\Attachments\\AttachedFile: {child:?}" + ), + } + }; + + let size = size.value(); + match child.ident { + ElementIdent::FileDescription => { + description = Some(children_reader.read_string(size)?); + }, + ElementIdent::FileName => { + file_name = Some(children_reader.read_string(size)?); + }, + ElementIdent::FileMimeType => { + let mime_str = children_reader.read_string(size)?; + mime_type = Some(MimeType::from_str(&mime_str)); + }, + ElementIdent::FileData => { + file_data = Some(children_reader.read_binary(size)?); + }, + ElementIdent::FileUID => { + uid = Some(children_reader.read_unsigned_int(size)?); + }, + ElementIdent::FileReferral => { + referral = Some(children_reader.read_binary(size)?); + }, + ElementIdent::FileUsedStartTime => { + used_start_time = Some(children_reader.read_unsigned_int(size)?); + }, + ElementIdent::FileUsedEndTime => { + used_end_time = Some(children_reader.read_unsigned_int(size)?); + }, + _ => unreachable!( + "Unhandled child element in \\Segment\\Attachments\\AttachedFile: {child:?}" + ), + } + } + + let Some(file_name) = file_name else { + decode_err!(@BAIL Ebml, "File name is required for an attached file"); + }; + + let Some(mime_type) = mime_type else { + decode_err!(@BAIL Ebml, "MIME type is required for an attached file"); + }; + + let Some(file_data) = file_data else { + decode_err!(@BAIL Ebml, "File data is required for an attached file"); + }; + + let Some(uid) = uid else { + decode_err!(@BAIL Ebml, "UID is required for an attached file"); + }; + + Ok(AttachedFile { + description: description.map(Cow::Owned), + file_name: Cow::Owned(file_name), + mime_type, + file_data: Cow::Owned(file_data), + uid, + referral: referral.map(Cow::Owned), + used_start_time, + used_end_time, + }) +} diff --git a/lofty/src/ebml/read/segment_chapters.rs b/lofty/src/ebml/read/segment_chapters.rs new file mode 100644 index 000000000..b75cb5119 --- /dev/null +++ b/lofty/src/ebml/read/segment_chapters.rs @@ -0,0 +1,18 @@ +use crate::config::ParseOptions; +use crate::ebml::element_reader::ElementChildIterator; +use crate::ebml::MatroskaTag; +use crate::error::Result; + +use std::io::{Read, Seek}; + +#[allow(dead_code)] +pub(super) fn read_from( + _children_reader: &mut ElementChildIterator<'_, R>, + _parse_options: ParseOptions, + _tag: &mut MatroskaTag, +) -> Result<()> +where + R: Read + Seek, +{ + unimplemented!("\\Segment\\Chapters") +} diff --git a/lofty/src/ebml/read/segment_cluster.rs b/lofty/src/ebml/read/segment_cluster.rs new file mode 100644 index 000000000..a17ffde63 --- /dev/null +++ b/lofty/src/ebml/read/segment_cluster.rs @@ -0,0 +1,175 @@ +use crate::config::ParseOptions; +use crate::ebml::element_reader::{ + ChildElementDescriptor, ElementChildIterator, ElementIdent, ElementReaderYield, +}; +use crate::ebml::properties::EbmlProperties; +use crate::ebml::VInt; +use crate::error::Result; + +use std::io::{Read, Seek}; + +pub(super) fn read_from( + children_reader: &mut ElementChildIterator<'_, R>, + parse_options: ParseOptions, + properties: &mut EbmlProperties, +) -> Result<()> +where + R: Read + Seek, +{ + // TODO: Support Tracks appearing after Cluster (should implement SeekHead first) + let Some(default_audio_track_position) = properties.default_audio_track_position() else { + log::warn!( + "No default audio track found (does \\Segment\\Cluster appear before \ + \\Segment\\Tracks?)" + ); + children_reader.exhaust_current_master()?; + return Ok(()); + }; + + let default_audio_track = &properties.audio_tracks[default_audio_track_position]; + + let target_track_number = default_audio_track.number(); + let mut total_audio_data_size = 0u64; + + while let Some(child) = children_reader.next()? { + let ident; + let size; + match child { + ElementReaderYield::Master((master_ident, master_size)) => { + ident = master_ident; + size = master_size; + }, + ElementReaderYield::Child((descriptor, child_size)) => { + ident = descriptor.ident; + size = child_size; + }, + ElementReaderYield::Unknown(unknown) => { + children_reader.skip_element(unknown)?; + continue; + }, + ElementReaderYield::Eof => break, + } + + match ident { + ElementIdent::Timestamp => { + // TODO: Fancy timestamp durations + children_reader.skip(size.value())?; + continue; + }, + ElementIdent::SimpleBlock => { + let (block_is_applicable, header_size) = check_block( + children_reader, + parse_options, + size.value(), + target_track_number, + properties.header.max_size_length, + )?; + + if !block_is_applicable { + continue; + } + + total_audio_data_size += size.value() - u64::from(header_size); + }, + ElementIdent::BlockGroup => read_block_group( + &mut children_reader.children(), + parse_options, + properties, + target_track_number, + &mut total_audio_data_size, + )?, + _ => unreachable!("Unhandled child element in \\Segment\\Cluster: {child:?}"), + } + } + + if total_audio_data_size == 0 { + log::warn!("No audio data found, audio bitrate will be 0, duration may be 0"); + return Ok(()); + } + + let duration_millis = u128::from(properties.duration().as_secs()); + if duration_millis == 0 { + log::warn!("Duration is zero, cannot calculate bitrate"); + return Ok(()); + } + + let default_audio_track = &mut properties.audio_tracks[default_audio_track_position]; // TODO + + let bitrate_bps = ((u128::from(total_audio_data_size) * 8) / duration_millis) as u32; + default_audio_track.settings.bitrate = Some(bitrate_bps / 1000); + + Ok(()) +} + +fn read_block_group( + children_reader: &mut ElementChildIterator<'_, R>, + parse_options: ParseOptions, + properties: &mut EbmlProperties, + target_track_number: u64, + total_audio_data_size: &mut u64, +) -> Result<()> +where + R: Read + Seek, +{ + while let Some(child) = children_reader.next()? { + let size; + match child { + ElementReaderYield::Child(( + ChildElementDescriptor { + ident: ElementIdent::Block, + .. + }, + child_size, + )) => { + size = child_size; + }, + ElementReaderYield::Unknown(unknown) => { + children_reader.skip_element(unknown)?; + continue; + }, + _ => unimplemented!( + "Unhandled child element in \\Segment\\Cluster\\BlockGroup: {child:?}" + ), + } + + let (block_is_applicable, header_size) = check_block( + children_reader, + parse_options, + size.value(), + target_track_number, + properties.header.max_size_length, + )?; + + if !block_is_applicable { + continue; + } + + *total_audio_data_size += size.value() - u64::from(header_size); + } + + Ok(()) +} + +fn check_block( + children_reader: &mut ElementChildIterator<'_, R>, + _parse_options: ParseOptions, + block_size: u64, + target_track_number: u64, + max_size_length: u8, +) -> Result<(bool, u8)> +where + R: Read + Seek, +{ + // The block header is Track number (variable), timestamp (i16), and flags (u8) + const NON_VARIABLE_BLOCK_HEADER_SIZE: u8 = 2 /* Timestamp */ + 1 /* Flags */; + + let track_number = VInt::::parse(children_reader, max_size_length)?; + let track_number_octets = track_number.octet_length(); + + children_reader.skip(block_size - u64::from(track_number_octets))?; + if track_number != target_track_number { + return Ok((false, track_number_octets + NON_VARIABLE_BLOCK_HEADER_SIZE)); + } + + Ok((true, track_number_octets + NON_VARIABLE_BLOCK_HEADER_SIZE)) +} diff --git a/lofty/src/ebml/read/segment_info.rs b/lofty/src/ebml/read/segment_info.rs new file mode 100644 index 000000000..a049817c9 --- /dev/null +++ b/lofty/src/ebml/read/segment_info.rs @@ -0,0 +1,82 @@ +use crate::config::{ParseOptions, ParsingMode}; +use crate::ebml::element_reader::{ElementChildIterator, ElementIdent, ElementReaderYield}; +use crate::ebml::properties::EbmlProperties; +use crate::error::Result; +use crate::macros::decode_err; + +use std::io::{Read, Seek}; +use std::time::Duration; + +pub(super) fn read_from( + children_reader: &mut ElementChildIterator<'_, R>, + parse_options: ParseOptions, + properties: &mut EbmlProperties, +) -> Result<()> +where + R: Read + Seek, +{ + // Deal with duration after parsing, in case the timestamp scale appears after it + // for some reason. + let mut duration = None; + + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Master((id, size)) => { + // We do not end up using information from any of the nested master + // elements, so we can just skip them. + + log::debug!("Skipping EBML master element: {:?}", id); + children_reader.skip(size.value())?; + continue; + }, + ElementReaderYield::Child((child, size)) => { + match child.ident { + ElementIdent::TimecodeScale => { + properties.segment_info.timestamp_scale = + children_reader.read_unsigned_int(size.value())?; + }, + ElementIdent::MuxingApp => { + let muxing_app = children_reader.read_utf8(size.value())?; + properties.segment_info.muxing_app = muxing_app; + }, + ElementIdent::WritingApp => { + let writing_app = children_reader.read_utf8(size.value())?; + properties.segment_info.writing_app = writing_app; + }, + ElementIdent::Duration => { + duration = Some(children_reader.read_float(size.value())?); + }, + _ => { + // We do not end up using information from all of the segment + // elements, so we can just skip any useless ones. + + log::debug!("Skipping EBML child element: {:?}", child.ident); + children_reader.skip(size.value())?; + continue; + }, + } + }, + ElementReaderYield::Unknown(header) => { + children_reader.skip_element(header)?; + continue; + }, + _ => break, + } + } + + if properties.segment_info.timestamp_scale == 0 { + log::warn!("Segment.Info.TimecodeScale is 0, which is invalid"); + if parse_options.parsing_mode == ParsingMode::Strict { + decode_err!(@BAIL Ebml, "Segment.Info.TimecodeScale must be non-zero"); + } + + return Ok(()); + } + + if let Some(duration) = duration { + let scaled_duration = duration * properties.segment_info.timestamp_scale as f64; + properties.segment_info.duration = Some(Duration::from_nanos(scaled_duration as u64)); + } + + Ok(()) +} diff --git a/lofty/src/ebml/read/segment_tags.rs b/lofty/src/ebml/read/segment_tags.rs new file mode 100644 index 000000000..1d5f3e9e1 --- /dev/null +++ b/lofty/src/ebml/read/segment_tags.rs @@ -0,0 +1,222 @@ +use crate::config::ParseOptions; +use crate::ebml::element_reader::{ElementChildIterator, ElementIdent, ElementReaderYield}; +use crate::ebml::{Language, MatroskaTag, SimpleTag, Tag, TagValue, Target, TargetType}; +use crate::error::Result; +use crate::macros::decode_err; + +use std::io::{Read, Seek}; + +pub(super) fn read_from( + children_reader: &mut ElementChildIterator<'_, R>, + _parse_options: ParseOptions, + tag: &mut MatroskaTag, +) -> Result<()> +where + R: Read + Seek, +{ + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Master((ElementIdent::Tag, _size)) => { + let tag_element = read_tag(&mut children_reader.children())?; + tag.tags.push(tag_element); + }, + ElementReaderYield::Eof => break, + _ => unimplemented!("Unhandled child element in \\Segment\\Tags: {child:?}"), + } + } + + Ok(()) +} + +fn read_tag(children_reader: &mut ElementChildIterator<'_, R>) -> Result> +where + R: Read + Seek, +{ + let mut target = None; + let mut simple_tags = Vec::new(); + + while let Some(child) = children_reader.next()? { + let ElementReaderYield::Master((master, _size)) = child else { + match child { + ElementReaderYield::Eof => break, + _ => { + unreachable!("Unhandled child element in \\Segment\\Tags\\Tag: {child:?}") + }, + } + }; + + match master { + ElementIdent::Targets => { + if target.is_some() { + decode_err!( + @BAIL Ebml, + "Duplicate Targets element found in \\Segment\\Tags\\Tag" + ); + } + + target = Some(read_targets(&mut children_reader.children())?); + }, + ElementIdent::SimpleTag => { + simple_tags.push(read_simple_tag(&mut children_reader.children())?) + }, + _ => { + unimplemented!("Unhandled child element in \\Segment\\Tags\\Tag: {master:?}"); + }, + } + } + + let Some(target) = target else { + decode_err!(@BAIL Ebml, "\\Segment\\Tags\\Tag is missing the required `Targets` element"); + }; + + Ok(Tag { + target: Some(target), + simple_tags, + }) +} + +fn read_targets(children_reader: &mut ElementChildIterator<'_, R>) -> Result +where + R: Read + Seek, +{ + let mut target = Target::default(); + + while let Some(child) = children_reader.next()? { + let ElementReaderYield::Child((child, size)) = child else { + match child { + ElementReaderYield::Eof => break, + _ => unreachable!( + "Unhandled child element in \\Segment\\Tags\\Tag\\Targets: {child:?}" + ), + } + }; + + match child.ident { + ElementIdent::TargetTypeValue => { + let value = children_reader.read_unsigned_int(size.value())?; + + // Casting the `u64` to `u8` is safe because the value is checked to be within + // the range of `TargetType` anyway. + let target_type = TargetType::try_from(value as u8)?; + target.target_type = target_type; + }, + ElementIdent::TargetType => { + target.name = Some(children_reader.read_string(size.value())?); + }, + ElementIdent::TagTrackUID => { + let mut track_uids = target.track_uids.unwrap_or_default(); + track_uids.push(children_reader.read_unsigned_int(size.value())?); + target.track_uids = Some(track_uids); + }, + ElementIdent::TagEditionUID => { + let mut edition_uids = target.edition_uids.unwrap_or_default(); + edition_uids.push(children_reader.read_unsigned_int(size.value())?); + target.edition_uids = Some(edition_uids); + }, + ElementIdent::TagChapterUID => { + let mut chapter_uids = target.chapter_uids.unwrap_or_default(); + chapter_uids.push(children_reader.read_unsigned_int(size.value())?); + target.chapter_uids = Some(chapter_uids); + }, + ElementIdent::TagAttachmentUID => { + let mut attachment_uids = target.attachment_uids.unwrap_or_default(); + attachment_uids.push(children_reader.read_unsigned_int(size.value())?); + target.attachment_uids = Some(attachment_uids); + }, + _ => { + unreachable!("Unhandled child element in \\Segment\\Tags\\Tag\\Targets: {child:?}") + }, + } + } + + Ok(target) +} + +fn read_simple_tag( + children_reader: &mut ElementChildIterator<'_, R>, +) -> Result> +where + R: Read + Seek, +{ + let mut name = None; + let mut language = None; + let mut default = false; + let mut value = None; + + while let Some(child) = children_reader.next()? { + let ElementReaderYield::Child((child, size)) = child else { + match child { + ElementReaderYield::Eof => break, + _ => unreachable!( + "Unhandled child element in \\Segment\\Tags\\Tag\\SimpleTag: {child:?}" + ), + } + }; + + match child.ident { + ElementIdent::TagName => { + name = Some(children_reader.read_string(size.value())?); + }, + ElementIdent::TagLanguage => { + if language.is_some() { + log::warn!("Duplicate language found in SimpleTag, ignoring"); + children_reader.skip(size.value())?; + continue; + } + + language = Some(Language::Iso639_2( + children_reader.read_string(size.value())?, + )); + }, + ElementIdent::TagLanguageBCP47 => { + if language.is_some() { + log::warn!("Duplicate language found in SimpleTag, ignoring"); + children_reader.skip(size.value())?; + continue; + } + + language = Some(Language::Bcp47(children_reader.read_string(size.value())?)); + }, + ElementIdent::TagDefault => { + default = children_reader.read_flag(size.value())?; + }, + ElementIdent::TagString => { + if value.is_some() { + log::warn!("Duplicate value found in SimpleTag, ignoring"); + children_reader.skip(size.value())?; + continue; + } + + value = Some(TagValue::from(children_reader.read_string(size.value())?)); + }, + ElementIdent::TagBinary => { + if value.is_some() { + log::warn!("Duplicate value found in SimpleTag, ignoring"); + children_reader.skip(size.value())?; + continue; + } + + value = Some(TagValue::from(children_reader.read_binary(size.value())?)); + }, + _ => { + unreachable!( + "Unhandled child element in \\Segment\\Tags\\Tag\\SimpleTag: {child:?}" + ); + }, + } + } + + let Some(name) = name else { + decode_err!( + @BAIL Ebml, + "SimpleTag is missing the required TagName element" + ); + }; + + Ok(SimpleTag { + name: name.into(), + language: language.unwrap_or_default(), + default, + value, + }) +} diff --git a/lofty/src/ebml/read/segment_tracks.rs b/lofty/src/ebml/read/segment_tracks.rs new file mode 100644 index 000000000..da0f4639a --- /dev/null +++ b/lofty/src/ebml/read/segment_tracks.rs @@ -0,0 +1,180 @@ +use crate::config::ParseOptions; +use crate::ebml::element_reader::{ + ChildElementDescriptor, ElementChildIterator, ElementIdent, ElementReaderYield, +}; +use crate::ebml::properties::EbmlProperties; +use crate::ebml::{AudioTrackDescriptor, EbmlAudioTrackEmphasis, Language}; +use crate::error::Result; + +use std::io::{Read, Seek}; + +pub(super) fn read_from( + children_reader: &mut ElementChildIterator<'_, R>, + parse_options: ParseOptions, + properties: &mut EbmlProperties, +) -> Result<()> +where + R: Read + Seek, +{ + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Master((ElementIdent::TrackEntry, _size)) => { + read_track_entry(children_reader, parse_options, &mut properties.audio_tracks)?; + }, + ElementReaderYield::Eof => break, + _ => { + unimplemented!("Unhandled child element in \\Segment\\Tracks: {child:?}"); + }, + } + } + + Ok(()) +} + +const AUDIO_TRACK_TYPE: u64 = 2; + +fn read_track_entry( + children_reader: &mut ElementChildIterator<'_, R>, + parse_options: ParseOptions, + audio_tracks: &mut Vec, +) -> Result<()> +where + R: Read + Seek, +{ + let mut track = AudioTrackDescriptor::default(); + + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Child((ChildElementDescriptor { ident, .. }, size)) => { + match ident { + ElementIdent::TrackNumber => { + let track_number = children_reader.read_unsigned_int(size.value())?; + track.number = track_number; + }, + ElementIdent::TrackUid => { + let track_uid = children_reader.read_unsigned_int(size.value())?; + track.uid = track_uid; + }, + ElementIdent::TrackType => { + let track_type = children_reader.read_unsigned_int(size.value())?; + log::trace!("Encountered new track of type: {}", track_type); + + if track_type != AUDIO_TRACK_TYPE { + children_reader.exhaust_current_master()?; + break; + } + }, + ElementIdent::FlagEnabled => { + let enabled = children_reader.read_flag(size.value())?; + track.enabled = enabled; + }, + ElementIdent::FlagDefault => { + let default = children_reader.read_flag(size.value())?; + track.default = default; + }, + ElementIdent::DefaultDuration => { + let _default_duration = children_reader.read_unsigned_int(size.value())?; + }, + ElementIdent::TrackTimecodeScale => { + let _timecode_scale = children_reader.read_float(size.value())?; + }, + ElementIdent::Language => { + let language = children_reader.read_string(size.value())?; + track.language = Language::Iso639_2(language); + }, + ElementIdent::LanguageBCP47 => { + let language = children_reader.read_string(size.value())?; + track.language = Language::Bcp47(language); + }, + ElementIdent::CodecID => { + let codec_id = children_reader.read_string(size.value())?; + track.codec_id = codec_id; + }, + ElementIdent::CodecPrivate => { + let codec_private = children_reader.read_binary(size.value())?; + track.codec_private = Some(codec_private); + }, + ElementIdent::CodecDelay => { + let _codec_delay = children_reader.read_unsigned_int(size.value())?; + }, + ElementIdent::CodecName => { + let codec_name = children_reader.read_utf8(size.value())?; + track.codec_name = Some(codec_name); + }, + ElementIdent::SeekPreRoll => { + let _seek_pre_roll = children_reader.read_unsigned_int(size.value())?; + }, + _ => unreachable!("Unhandled child element in TrackEntry: {:?}", ident), + } + }, + ElementReaderYield::Master((id, _size)) => match id { + ElementIdent::Audio => { + read_audio_settings(&mut children_reader.children(), parse_options, &mut track)? + }, + _ => { + unreachable!("Unhandled master element in TrackEntry: {:?}", id); + }, + }, + ElementReaderYield::Eof => break, + _ => { + unreachable!("Unhandled child element in TrackEntry: {child:?}"); + }, + } + } + + audio_tracks.push(track); + + Ok(()) +} + +fn read_audio_settings( + children_reader: &mut ElementChildIterator<'_, R>, + _parse_options: ParseOptions, + audio_track: &mut AudioTrackDescriptor, +) -> Result<()> +where + R: Read + Seek, +{ + while let Some(child) = children_reader.next()? { + match child { + ElementReaderYield::Child((ChildElementDescriptor { ident, .. }, size)) => { + match ident { + ElementIdent::SamplingFrequency => { + let sampling_frequency = children_reader.read_float(size.value())?; + audio_track.settings.sampling_frequency = sampling_frequency; + }, + ElementIdent::OutputSamplingFrequency => { + let output_sampling_frequency = children_reader.read_float(size.value())?; + audio_track.settings.output_sampling_frequency = output_sampling_frequency; + }, + ElementIdent::Channels => { + let channels = children_reader.read_unsigned_int(size.value())? as u8; + audio_track.settings.channels = channels; + }, + ElementIdent::BitDepth => { + let bit_depth = children_reader.read_unsigned_int(size.value())? as u8; + audio_track.settings.bit_depth = Some(bit_depth); + }, + ElementIdent::Emphasis => { + let emphasis = children_reader.read_unsigned_int(size.value())?; + if emphasis == 0 { + continue; // No emphasis + } + + audio_track.settings.emphasis = + EbmlAudioTrackEmphasis::from_u8(emphasis as u8); + }, + _ => { + unreachable!("Unhandled child element in Audio: {child:?}"); + }, + } + }, + ElementReaderYield::Eof => break, + _ => { + unreachable!("Unhandled child element in Audio: {child:?}"); + }, + } + } + + Ok(()) +} diff --git a/lofty/src/ebml/tag/attached_file.rs b/lofty/src/ebml/tag/attached_file.rs new file mode 100644 index 000000000..f528feaf1 --- /dev/null +++ b/lofty/src/ebml/tag/attached_file.rs @@ -0,0 +1,140 @@ +use crate::error::Result; +use crate::macros::encode_err; +use crate::picture::{MimeType, Picture}; + +use std::borrow::Cow; +use std::fmt::Debug; + +/// Some attached file +/// +/// This element contains any attached files, similar to the [GEOB] +/// frame in ID3v2. The difference is, this is *also* used for images. +/// +/// **Unsupported in WebM** +/// +/// [GEOB]: crate::id3::v2::GeneralEncapsulatedObject +#[derive(Clone, Eq, PartialEq)] +pub struct AttachedFile<'a> { + /// A human-friendly name for the attached file. + pub description: Option>, + /// The actual file name of the attached file. + pub file_name: Cow<'a, str>, + /// Media type of the file following the [RFC6838] format. + /// + /// [RFC6838]: https://tools.ietf.org/html/rfc6838 + pub mime_type: MimeType, + /// The data of the file. + pub file_data: Cow<'a, [u8]>, + /// Unique ID representing the file, as random as possible. + pub uid: u64, + /// A binary value that a track/codec can refer to when the attachment is needed. + pub referral: Option>, + /// The timestamp at which this optimized font attachment comes into context. + /// + /// This is expressed in Segment Ticks which is based on `TimestampScale`. This element is + /// reserved for future use and if written **MUST** be the segment start timestamp. + pub used_start_time: Option, + /// The timestamp at which this optimized font attachment goes out of context. + /// + /// This is expressed in Segment Ticks which is based on `TimestampScale`. This element is + /// reserved for future use and if written **MUST** be the segment end timestamp. + pub used_end_time: Option, +} + +impl Debug for AttachedFile<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AttachedFile") + .field("description", &self.description) + .field("file_name", &self.file_name) + .field("mime_type", &self.mime_type) + .field("file_data", &format!("<{} bytes>", self.file_data.len())) + .field("uid", &self.uid) + .field("referral", &self.referral) + .field("used_start_time", &self.used_start_time) + .field("used_end_time", &self.used_end_time) + .finish() + } +} + +impl From for AttachedFile<'_> { + fn from(picture: Picture) -> Self { + Self { + description: picture.description, + file_name: picture.file_name.unwrap_or_default(), + mime_type: picture + .mime_type + .unwrap_or(MimeType::Unknown(String::from("image/"))), + file_data: picture.data, + uid: 0, + referral: None, + used_start_time: None, + used_end_time: None, + } + } +} + +impl AttachedFile<'_> { + /// Whether this file is an image + /// + /// This will check if the [`MimeType`] starts with `image/`. + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::AttachedFile; + /// use lofty::picture::MimeType; + /// + /// let file = AttachedFile { + /// description: None, + /// file_name: "something.png".into(), + /// // PNG MIME type + /// mime_type: MimeType::Png, + /// file_data: vec![1, 2, 3].into(), + /// uid: 0, + /// referral: None, + /// used_start_time: None, + /// used_end_time: None + /// }; + /// + /// assert!(file.is_image()); + pub fn is_image(&self) -> bool { + match &self.mime_type { + MimeType::Unknown(mime) if mime.starts_with("image/") => true, + MimeType::Unknown(_) => false, + // `MimeType` is only ever used for `Picture`s outside of Matroska + _ => true, + } + } + + pub(crate) fn validate(&self) -> Result<()> { + if self.uid == 0 { + encode_err!(@BAIL Ebml, "The UID of an attachment cannot be 0"); + } + + Ok(()) + } + + pub(crate) fn into_owned(self) -> AttachedFile<'static> { + let AttachedFile { + description, + file_name, + mime_type, + file_data, + uid, + referral, + used_start_time, + used_end_time, + } = self; + + AttachedFile { + description: description.map(|d| Cow::Owned(d.into_owned())), + file_name: Cow::Owned(file_name.into_owned()), + mime_type, + file_data: Cow::Owned(file_data.into_owned()), + uid, + referral: referral.map(|r| Cow::Owned(r.into_owned())), + used_start_time, + used_end_time, + } + } +} diff --git a/lofty/src/ebml/tag/generic.rs b/lofty/src/ebml/tag/generic.rs new file mode 100644 index 000000000..6a1bb0758 --- /dev/null +++ b/lofty/src/ebml/tag/generic.rs @@ -0,0 +1,248 @@ +//! Conversions to and from generic types +//! +//! NOTE: We can **ONLY** convert `SimpleTags` that come from a target with **NO** uids + +use super::{Language, MatroskaTag, SimpleTag, TargetType, TOMBSTONE_SIMPLE_TAG}; +use crate::tag::items::Lang; +use crate::tag::{ItemKey, ItemValue, Tag, TagItem, TagType}; + +use std::borrow::Cow; +use std::collections::HashMap; +use std::sync::LazyLock; + +macro_rules! matroska_mapping_tables { + ( + $($target:ident => [ + $($matroska_key:literal <=> $item_key:ident),* $(,)? + ]);+ $(;)? + ) => { + const _: () = { + match TargetType::Album { + $( + TargetType::$target => {} + ),+ + } + }; + + pub(crate) const SUPPORTED_ITEMKEYS: &[ItemKey] = &[ + $( + $( + ItemKey::$item_key, + )* + )+ + ]; + + static MAPPINGS: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + $( + $( + m.insert((TargetType::$target, $matroska_key), ItemKey::$item_key); + )* + )+ + m + }); + + static REVERSE_MAPPINGS: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + $( + $( + m.insert(ItemKey::$item_key, (TargetType::$target, $matroska_key)); + )* + )+ + m + }); + }; +} + +matroska_mapping_tables!( + Shot => []; + Scene => []; + Track => [ + // Organization Information + "PART_NUMBER" <=> TrackNumber, + + // Titles + "TITLE" <=> TrackTitle, + "SUBTITLE" <=> TrackSubtitle, + + // Nested Information + "SORT_WITH" <=> TrackTitleSortOrder, + + // Entities + "ARTIST" <=> TrackArtist, + "LYRICS" <=> Lyrics, + "COMPOSER" <=> Composer, + "ARRANGER" <=> Arranger, + "LYRICIST" <=> Lyricist, + "CONDUCTOR" <=> Conductor, + "DIRECTOR" <=> Director, + "PRODUCER" <=> Producer, + "ENCODED_BY" <=> EncodedBy, + "MIXED_BY" <=> MixDj, + "REMIXED_BY" <=> Remixer, + "PUBLISHER" <=> Publisher, + "LABEL" <=> Label, + + // Search and Classification + "GENRE" <=> Genre, + "MOOD" <=> Mood, + "INITIAL_KEY" <=> InitialKey, + "ORIGINAL_MEDIA_TYPE" <=> OriginalMediaType, + + // Technical Information + "ENCODER" <=> EncoderSoftware, + "ENCODER_SETTINGS" <=> EncoderSettings, + "BPM" <=> Bpm, + // TODO: ReplayGain? The values are binary in Matroska + + // Identifiers + "ISRC" <=> Isrc, + "BARCODE" <=> Barcode, + "CATALOG_NUMBER" <=> CatalogNumber, + ]; + Part => []; + Album => [ + // Organization Information + "TOTAL_PARTS" <=> TrackTotal, + + // Titles + "TITLE" <=> AlbumTitle, + + // Nested Information + "SORT_WITH" <=> AlbumTitleSortOrder, + + // Entities + "ARTIST" <=> AlbumArtist, + + // Temporal Information + "DATE_RELEASED" <=> ReleaseDate, + "DATE_RECORDED" <=> RecordingDate, + + // Technical Information + // TODO: ReplayGain? The values are binary in Matroska + + // Commercial + "PURCHASE_ITEM" <=> PaymentUrl, + "PURCHASE_INFO" <=> CommercialInformationUrl, + "PURCHASE_OWNER" <=> FileOwner, + + // Legal + "COPYRIGHT" <=> CopyrightMessage, + "LICENSE" <=> License, + ]; + Edition => []; + Collection => []; +); + +const TAG_RETAINED: bool = true; +const TAG_CONSUMED: bool = false; + +pub(super) fn split_tag(mut matroska_tag: MatroskaTag) -> (MatroskaTag, Tag) { + let mut tag = Tag::new(TagType::Matroska); + + // TODO: Pictures, can they be handled in a generic way? + // - What about the uid and referral? + // - It seems like the "standard" way of adding cover art is to name it "cover.{ext}" + // - Maybe only support front covers? who knows. + + matroska_tag.tags.retain_mut(|t| { + let target_type = match &t.target { + Some(t) if !t.has_uids() => t.target_type, + // We cannot use any tags bound to uids + Some(_) => return TAG_RETAINED, + None => TargetType::default(), + }; + + t.simple_tags + .retain_mut(|simple_tag| split_simple_tags(target_type, simple_tag, &mut tag)); + if t.simple_tags.is_empty() { + return TAG_CONSUMED; + } + + return TAG_RETAINED; + }); + + (matroska_tag, tag) +} + +fn split_simple_tags( + target_type: TargetType, + simple_tag: &mut SimpleTag<'_>, + tag: &mut Tag, +) -> bool { + let lang: Lang; + let Language::Iso639_2(l) = &simple_tag.language else { + return TAG_RETAINED; + }; + + // `Lang` doesn't support anything outside of a 3 character ISO-639-2 code. + if l.len() != 3 { + return TAG_CONSUMED; + } + + lang = l.as_bytes().try_into().unwrap(); // Infallible + + let Some(item_key) = MAPPINGS.get(&(target_type, &*simple_tag.name)).cloned() else { + return TAG_RETAINED; + }; + + if simple_tag.value.is_none() { + // Ignore empty items, `TagItem` is not made to handle them. + return TAG_RETAINED; + } + + let simple_tag = std::mem::replace(simple_tag, TOMBSTONE_SIMPLE_TAG); + tag.push(TagItem { + lang, + description: String::new(), + item_key, + item_value: simple_tag.value.unwrap().into(), // Infallible + }); + + return TAG_CONSUMED; +} + +pub(super) fn merge_tag(tag: Tag, mut matroska_tag: MatroskaTag) -> MatroskaTag { + for item in tag.items { + let Some((simple_tag, target_type)) = simple_tag_for_item(Cow::Owned(item)) else { + continue; + }; + + let tag = matroska_tag.get_or_insert_tag_for_type(target_type); + + tag.simple_tags.push(simple_tag); + } + + matroska_tag +} + +pub(super) fn simple_tag_for_item( + item: Cow<'_, TagItem>, +) -> Option<(SimpleTag<'static>, TargetType)> { + if !matches!(item.item_value, ItemValue::Text(_) | ItemValue::Locator(_)) { + return None; + } + + let (target_type, simple_tag_name) = REVERSE_MAPPINGS.get(&item.item_key).copied()?; + + let TagItem { + mut lang, + item_value: ItemValue::Text(text) | ItemValue::Locator(text), + .. + } = item.into_owned() + else { + return None; + }; + + // Matroska uses "und" for unknown languages + if lang == *b"XXX" { + lang = *b"und"; + } + + let lang_str = std::str::from_utf8(lang.as_slice()).unwrap_or("und"); + + let mut simple_tag = SimpleTag::new((*simple_tag_name).to_string(), text); + simple_tag.language = Language::Iso639_2(lang_str.to_string()); + + Some((simple_tag, target_type)) +} diff --git a/lofty/src/ebml/tag/mod.rs b/lofty/src/ebml/tag/mod.rs new file mode 100644 index 000000000..78c93a8de --- /dev/null +++ b/lofty/src/ebml/tag/mod.rs @@ -0,0 +1,453 @@ +mod attached_file; +mod generic; +mod simple_tag; +mod tag; +mod tag_name; +mod target; +#[cfg(test)] +mod tests; +mod write; + +pub use attached_file::*; +pub(crate) use generic::SUPPORTED_ITEMKEYS; +pub use simple_tag::*; +pub use tag::*; +pub use tag_name::*; +pub use target::*; + +use crate::config::{global_options, WriteOptions}; +use crate::error::{LoftyError, Result}; +use crate::io::{FileLike, Length, Truncate}; +use crate::picture::Picture; +use crate::tag::companion_tag::CompanionTag; +use crate::tag::{Accessor, MergeTag, SplitTag, TagExt, TagType}; + +use std::borrow::Cow; +use std::collections::HashMap; +use std::io::Write; +use std::ops::Deref; + +use lofty_attr::tag; + +macro_rules! impl_accessor { + ($($method:ident => ($target:ident, $name:literal)),+ $(,)?) => { + paste::paste! { + $( + fn $method(&self) -> Option> { + self.get_str(MatroskaTagKey(TargetType::$target, Cow::Borrowed($name))) + } + + fn [](&mut self, value: String) { + todo!() + } + + fn [](&mut self) { + todo!() + } + )+ + } + } +} + +/// Note that this is NOT a singular tag, but a collection of [`Tag`]s and [`AttachedFile`]s. +/// That makes this akin to the `\Segment\Tags` element. +/// +/// Due to how [`Tag`]s work, they cannot be combined. This means that for every operation, they +/// must all be iterated to check conditions, making them more expensive compared to other tags. +/// +/// For more information, see the following: +/// * [`Tag`] +/// * [`Target`] +/// * [`AttachedFile`] +#[derive(Default, Debug, PartialEq, Eq, Clone)] +#[tag(description = "A Matroska/WebM \"tag\"", supported_formats(Ebml))] +pub struct MatroskaTag { + pub(crate) tags: Vec>, + pub(crate) attached_files: Vec>, +} + +// TODO +#[allow(missing_docs)] +pub struct MatroskaTagKey<'a>(TargetType, Cow<'a, str>); + +impl MatroskaTag { + fn get(&self, key: MatroskaTagKey<'_>) -> Option<&SimpleTag<'_>> { + let MatroskaTagKey(target, key) = key; + + let applicable_tags = self.tags.iter().filter(|tag| tag.matches_target(target)); + for applicable_tag in applicable_tags { + for item in &applicable_tag.simple_tags { + if item.name == key && matches!(&item.language, Language::Iso639_2(l) if l == "und") + { + return Some(item); + } + } + } + + None + } + + fn get_or_insert_tag_for_type(&mut self, target_type: TargetType) -> &mut Tag<'static> { + let mut pos = None; + if let Some(applicable_tag_pos) = self + .tags + .iter() + .position(|tag| tag.matches_target(target_type)) + { + pos = Some(applicable_tag_pos); + } + + if pos.is_none() { + pos = Some(self.tags.len()); + + let mut new_tag = Tag::default(); + if target_type != TargetType::Album { + new_tag.target = Some(Target::from(target_type)); + } + + self.tags.push(new_tag); + } + + self.tags.get_mut(pos.unwrap()).unwrap() + } + + fn get_str(&self, key: MatroskaTagKey<'_>) -> Option> { + let simple_tag = self.get(key)?; + simple_tag.get_str().map(Cow::from) + } + + /// Returns all [`Tag`]s, if there are any + pub fn tags(&self) -> impl Iterator> { + self.tags.iter() + } + + /// Inserts a new [`Tag`] + /// + /// Note that if a tag exists with a matching [`Target`], the two tags will be merged, with the + /// new tag's items taking precedence. + pub fn insert_tag(&mut self, tag: Tag<'_>) { + let tag = tag.into_owned(); + for t in &mut self.tags { + if t.target == tag.target { + t.merge_with(tag); + return; + } + } + + self.tags.push(tag); + } + + /// Returns all pictures, if there are any + /// + /// This will search all [`AttachedFile`]s, returning any with a MIME type beginning with `image/`. + /// + /// # Examples + /// + /// ```rust,no_run + /// use lofty::ebml::MatroskaTag; + /// use lofty::picture::Picture; + /// + /// # fn main() -> lofty::error::Result<()> { + /// let mut tag = MatroskaTag::default(); + /// + /// let mut picture = std::fs::read("something.png")?; + /// let mut picture2 = std::fs::read("something_else.png")?; + /// tag.insert_picture(Picture::from_reader(&mut &picture[..])?); + /// tag.insert_picture(Picture::from_reader(&mut &picture2[..])?); + /// + /// let pictures = tag.pictures(); + /// assert_eq!(pictures.count(), 2); + /// # Ok(()) } + pub fn pictures(&self) -> impl Iterator> { + self.attached_files + .iter() + .filter(|file| file.mime_type.as_str().starts_with("image/")) + } + + /// Inserts a new [`Picture`] + /// + /// Note: See [`MatroskaTag::insert_attached_file`] + /// + /// ```rust,no_run + /// use lofty::ebml::MatroskaTag; + /// use lofty::picture::Picture; + /// + /// # fn main() -> lofty::error::Result<()> { + /// let mut tag = MatroskaTag::default(); + /// + /// let mut picture_file = std::fs::read("something.png")?; + /// tag.insert_picture(Picture::from_reader(&mut &picture_file[..])?); + /// + /// assert_eq!(tag.pictures().count(), 1); + /// # Ok(()) } + pub fn insert_picture(&mut self, picture: Picture) { + let file = AttachedFile::from(picture); + self.insert_attached_file(file); + } + + /// Removes all [`AttachedFile`]s that are pictures + /// + /// Note that this determines whether a file is a picture via [`AttachedFile::is_image`]. + pub fn remove_pictures(&mut self) -> impl Iterator> { + // TODO: drain_filter + let mut split_idx = 0_usize; + + for read_idx in 0..self.attached_files.len() { + if self.attached_files[read_idx].is_image() { + self.attached_files.swap(split_idx, read_idx); + split_idx += 1; + } + } + + self.attached_files.drain(..split_idx) + } + + /// Inserts a new [`AttachedFile`] + /// + /// Note that due to format requirements, all other [`AttachedFile`]s will be checked + /// in order to generate new random [`uid`]. + /// + /// [`uid`]: AttachedFile::uid + pub fn insert_attached_file(&mut self, file: AttachedFile<'_>) { + // TODO: Generate a new uid + self.attached_files.push(file.into_owned()); + } + + /// Removes all [`AttachedFile`]s with `uid` + /// + /// Note that while the IDs are *supposed* to be unique, they aren't guaranteed to be. This means + /// that this method may return multiple files. + pub fn remove_attached_file(&mut self, uid: u64) -> impl Iterator> { + // TODO: drain_filter + let mut split_idx = 0_usize; + + for read_idx in 0..self.attached_files.len() { + if self.attached_files[read_idx].uid == uid { + self.attached_files.swap(split_idx, read_idx); + split_idx += 1; + } + } + + self.attached_files.drain(..split_idx) + } +} + +impl Accessor for MatroskaTag { + impl_accessor!( + artist => (Track, "ARTIST"), + title => (Track, "TITLE"), + album => (Album, "TITLE"), + comment => (Track, "COMMENT"), + ); + + fn track(&self) -> Option { + self.get(MatroskaTagKey( + TargetType::Track, + Cow::Borrowed("PART_NUMBER"), + )) + .and_then(SimpleTag::get_str) + .and_then(|val| val.parse::().ok()) + } + + fn set_track(&mut self, _value: u32) { + todo!() + } + + fn remove_track(&mut self) { + todo!() + } + + fn track_total(&self) -> Option { + self.get(MatroskaTagKey( + TargetType::Album, + Cow::Borrowed("TOTAL_PARTS"), + )) + .and_then(SimpleTag::get_str) + .and_then(|val| val.parse::().ok()) + } + + fn set_track_total(&mut self, _value: u32) { + todo!() + } + + fn remove_track_total(&mut self) { + todo!() + } + + fn year(&self) -> Option { + // `DATE_RELEASED` + todo!() + } + + fn set_year(&mut self, _value: u32) { + todo!() + } + + fn remove_year(&mut self) { + todo!() + } +} + +impl TagExt for MatroskaTag { + type Err = LoftyError; + type RefKey<'a> = MatroskaTagKey<'a>; + + #[inline] + fn tag_type(&self) -> TagType { + TagType::Matroska + } + + fn len(&self) -> usize { + self.tags.iter().map(Tag::len).sum::() + self.attached_files.len() + } + + fn contains<'a>(&'a self, key: Self::RefKey<'a>) -> bool { + let MatroskaTagKey(target_type, key) = key; + self.tags.iter().any(|tag| { + if let Some(target) = &tag.target { + return target.target_type == target_type + && tag.simple_tags.iter().any(|t| t.name == key); + } + + false + }) + } + + fn is_empty(&self) -> bool { + self.tags.is_empty() && self.attached_files.is_empty() + } + + fn save_to( + &self, + _file: &mut F, + _write_options: WriteOptions, + ) -> std::result::Result<(), Self::Err> + where + F: FileLike, + LoftyError: From<::Error>, + LoftyError: From<::Error>, + { + todo!() + } + + fn dump_to( + &self, + _writer: &mut W, + _write_options: WriteOptions, + ) -> std::result::Result<(), Self::Err> { + todo!() + } + + fn clear(&mut self) { + self.tags.clear(); + self.attached_files.clear(); + } +} + +#[doc(hidden)] +#[derive(Debug, Clone, Default)] +pub struct SplitTagRemainder(MatroskaTag); + +impl From for MatroskaTag { + fn from(from: SplitTagRemainder) -> Self { + from.0 + } +} + +impl Deref for SplitTagRemainder { + type Target = MatroskaTag; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl SplitTag for MatroskaTag { + type Remainder = SplitTagRemainder; + + fn split_tag(self) -> (Self::Remainder, crate::tag::Tag) { + let (remainder, tag) = generic::split_tag(self); + (SplitTagRemainder(remainder), tag) + } +} + +impl MergeTag for SplitTagRemainder { + type Merged = MatroskaTag; + + fn merge_tag(self, tag: crate::tag::Tag) -> Self::Merged { + generic::merge_tag(tag, self.0) + } +} + +impl From for crate::tag::Tag { + fn from(input: MatroskaTag) -> Self { + let (remainder, mut tag) = input.split_tag(); + + if unsafe { global_options().preserve_format_specific_items } && remainder.0.len() > 0 { + tag.companion_tag = Some(CompanionTag::Matroska(remainder.0)); + } + + tag + } +} + +impl From for MatroskaTag { + fn from(mut input: crate::tag::Tag) -> Self { + if unsafe { global_options().preserve_format_specific_items } { + if let Some(companion) = input.companion_tag.take().and_then(CompanionTag::matroska) { + return SplitTagRemainder(companion).merge_tag(input); + } + } + + SplitTagRemainder::default().merge_tag(input) + } +} + +pub(crate) struct MatroskaTagRef<'a, I> +where + I: Iterator>, +{ + pub(crate) tags: I, +} + +pub(crate) fn simple_tags_for_tag(tag: &crate::tag::Tag) -> impl Iterator> { + let mut mapped_tags: HashMap>>> = + HashMap::new(); + for item in &tag.items { + if let Some((simple_tag, target_type)) = generic::simple_tag_for_item(Cow::Borrowed(item)) { + mapped_tags + .entry(target_type) + .or_default() + .push(Cow::Owned(simple_tag)) + } + } + + mapped_tags + .into_iter() + .map(|(target_type, simple_tags)| TagRef { + targets: TargetDescriptor::Basic(target_type), + simple_tags: Box::new(simple_tags.into_iter()), + }) +} + +impl<'a, I> MatroskaTagRef<'a, I> +where + I: Iterator>, +{ + pub(crate) fn write_to(&mut self, _file: &mut F, _write_options: WriteOptions) -> Result<()> + where + F: FileLike, + LoftyError: From<::Error>, + LoftyError: From<::Error>, + { + todo!("Writing matroska tags") + } + + pub(crate) fn dump_to( + &self, + _writer: &mut W, + _write_options: WriteOptions, + ) -> Result<()> { + todo!("Dumping matroska tags") + } +} diff --git a/lofty/src/ebml/tag/simple_tag.rs b/lofty/src/ebml/tag/simple_tag.rs new file mode 100644 index 000000000..99c741add --- /dev/null +++ b/lofty/src/ebml/tag/simple_tag.rs @@ -0,0 +1,248 @@ +use crate::tag::ItemValue; + +use std::borrow::Cow; + +/// The language of a [`SimpleTag`] or chapter +/// +/// Notes: +/// +/// - The default language is `Iso639_2("und")` +/// - ISO-639-2 was the original language code used in Matroska. +/// - BCP-47 is the newer, **recommended** language option. +/// - The ISO-639-2 language code allows for an optional country code, so the [Lang] type cannot be used. +/// +/// [Lang]: crate::tag::items::Lang +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Language { + /// An ISO-639-2 language code + Iso639_2(String), + /// A BCP-47 language code (recommended) + Bcp47(String), +} + +impl Default for Language { + fn default() -> Self { + Self::Iso639_2(String::from("und")) + } +} + +impl Language { + /// Get the string value of the [`Language`] + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::Language; + /// + /// let lang = Language::Iso639_2(String::from("eng")); + /// assert_eq!(lang.as_str(), "eng"); + /// ``` + pub fn as_str(&self) -> &str { + match self { + Self::Iso639_2(value) | Self::Bcp47(value) => value.as_str(), + } + } +} + +/// The type of content stored in a [`SimpleTag`] +/// +/// Matroska allows two different types of content to be stored in tags: UTF-8 strings and binary data. +/// +/// ## Conversions with [`ItemValue`] +/// +/// A `TagValue` can be converted to and from an [`ItemValue`] with the following conversions: +/// +/// ### To [`ItemValue`] +/// +/// - [`TagValue::String`] -> [`ItemValue::Text`] +/// - [`TagValue::Binary`] -> [`ItemValue::Binary`] +/// +/// ### From [`ItemValue`] +/// +/// - [`ItemValue::Text`] | [`ItemValue::Locator`] -> [`TagValue::String`] +/// - [`ItemValue::Binary`] -> [`TagValue::Binary`] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TagValue<'a> { + /// A UTF-8 string tag value + String(Cow<'a, str>), + /// A binary tag value + Binary(Cow<'a, [u8]>), +} + +impl From> for ItemValue { + fn from(value: TagValue<'_>) -> Self { + match value { + TagValue::String(s) => ItemValue::Text(s.into_owned()), + TagValue::Binary(b) => ItemValue::Binary(b.into_owned()), + } + } +} + +impl From for TagValue<'_> { + fn from(value: ItemValue) -> Self { + match value { + ItemValue::Text(s) | ItemValue::Locator(s) => TagValue::String(Cow::Owned(s)), + ItemValue::Binary(b) => TagValue::Binary(Cow::Owned(b)), + } + } +} + +impl From for TagValue<'_> { + fn from(value: String) -> Self { + TagValue::String(value.into()) + } +} + +impl<'a> From> for TagValue<'a> { + fn from(value: Cow<'a, str>) -> Self { + TagValue::String(value) + } +} + +impl<'a> From<&'a str> for TagValue<'a> { + fn from(value: &'a str) -> Self { + TagValue::String(Cow::Borrowed(value)) + } +} + +impl From> for TagValue<'_> { + fn from(value: Vec) -> Self { + TagValue::Binary(value.into()) + } +} + +impl<'a> From> for TagValue<'a> { + fn from(value: Cow<'a, [u8]>) -> Self { + TagValue::Binary(value) + } +} + +impl<'a> From<&'a [u8]> for TagValue<'a> { + fn from(value: &'a [u8]) -> Self { + TagValue::Binary(Cow::Borrowed(value)) + } +} + +impl TagValue<'_> { + fn into_owned(self) -> TagValue<'static> { + match self { + TagValue::String(s) => TagValue::String(Cow::Owned(s.into_owned())), + TagValue::Binary(b) => TagValue::Binary(Cow::Owned(b.into_owned())), + } + } +} + +/// General information about the target +/// +/// Notes on how `SimpleTag`s work: +/// - Their meaning depends on the [`Target`] of their parent [`Tag`] +/// - They **do not** need to have a value. +/// +/// For more information, see [`Language`] and [`TagValue`] +/// +/// [`Tag`]: crate::ebml::Tag +/// [`Target`]: crate::ebml::Target +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SimpleTag<'a> { + /// The name of the tag as it is stored + /// + /// This field can essentially contain anything, but the following conditions are recommended: + /// + /// - It **SHOULD** consist of capital letters, numbers and the underscore character ‘_’. + /// - It **SHOULD NOT** contain any space. + /// + /// When in doubt, the [`TagName`] enum can be used, which covers all specified tags. + /// + /// [`TagName`]: crate::ebml::TagName + pub name: Cow<'a, str>, + /// The language of the tag + /// + /// See [`Language`] for more information. + pub language: Language, + /// Whether [`language`] is the default/original language to use + /// + /// This is used when multiple languages are present in a file. Otherwise, this + /// will always be `true`. + /// + /// [`language`]: #structfield.language + pub default: bool, + /// The actual tag value + /// + /// For more information, see [`TagValue`] + pub value: Option>, +} + +impl<'a> SimpleTag<'a> { + /// Create a new `SimpleTag` with the given name and value + /// + /// # Example + /// + /// ``` + /// use lofty::ebml::{SimpleTag, TagName, TagValue}; + /// + /// let tag = SimpleTag::new(TagName::Title, "My Title"); + /// ``` + pub fn new(name: N, value: V) -> Self + where + N: Into>, + V: Into>, + { + Self { + name: name.into(), + language: Language::default(), + default: false, + value: Some(value.into()), + } + } + + /// Get the value of the `SimpleTag` if it is [`TagValue::String`] + /// + /// # Example + /// + /// ```rust + /// use lofty::ebml::{SimpleTag, TagName, TagValue}; + /// + /// let tag = SimpleTag::new(TagName::Title, "My Title"); + /// assert_eq!(tag.get_str(), Some("My Title")); + /// ``` + pub fn get_str(&'a self) -> Option<&'a str> { + match &self.value { + Some(TagValue::String(s)) => Some(s), + _ => None, + } + } + + /// Get the value of the `SimpleTag` if it is [`TagValue::Binary`] + /// + /// # Example + /// + /// ```rust + /// use lofty::ebml::{SimpleTag, TagName, TagValue}; + /// + /// let tag = SimpleTag::new(TagName::Title, vec![1, 2, 3]); + /// assert_eq!(tag.get_binary(), Some(&[1, 2, 3][..])); + /// ``` + pub fn get_binary(&'a self) -> Option<&'a [u8]> { + match &self.value { + Some(TagValue::Binary(b)) => Some(b), + _ => None, + } + } + + pub(crate) fn into_owned(self) -> SimpleTag<'static> { + SimpleTag { + name: Cow::Owned(self.name.into_owned()), + language: self.language, + default: self.default, + value: self.value.map(TagValue::into_owned), + } + } +} + +// Used in conversions +pub(super) const TOMBSTONE_SIMPLE_TAG: SimpleTag<'static> = SimpleTag { + name: Cow::Borrowed(""), + language: Language::Iso639_2(String::new()), + default: false, + value: None, +}; diff --git a/lofty/src/ebml/tag/tag.rs b/lofty/src/ebml/tag/tag.rs new file mode 100644 index 000000000..8ecf2f822 --- /dev/null +++ b/lofty/src/ebml/tag/tag.rs @@ -0,0 +1,128 @@ +use super::simple_tag::SimpleTag; +use super::target::{Target, TargetDescriptor, TargetType}; + +use std::borrow::Cow; + +/// A single metadata descriptor. +/// +/// This represents a `\Segment\Tags\Tag` element in the EBML tree. It contains a single [`Target`] and +/// its associated [`SimpleTag`]s. +/// +/// Notes on how `Tag`s work: +/// +/// - Multiple [`Tag`]s can exist in a file. +/// - They each describe a single [`Target`]. +/// - This also means that multiple tags can describe the same target. +/// +/// This structure is very different from other formats. See [`Target`] and [`SimpleTag`] for more +/// information on how these work. +#[derive(Default, Debug, PartialEq, Eq, Clone)] +pub struct Tag<'a> { + /// The target for which the tags are applied. + /// + /// Note that `None` is equivalent to `Some(Target::default())`. + pub target: Option, + /// General information about the target + pub simple_tags: Vec>, +} + +impl<'a> Tag<'a> { + /// Get all [`SimpleTag`]s with `name` + /// + /// # Example + /// + /// ``` + /// use lofty::ebml::{SimpleTag, Tag, Target}; + /// use std::collections::HashSet; + /// + /// let tag = Tag { + /// target: None, + /// simple_tags: vec![ + /// SimpleTag::new("TITLE", "My Title"), + /// SimpleTag::new("ARTIST", "My Artist"), + /// ], + /// }; + /// + /// assert_eq!(tag.get("TITLE").count(), 1); + /// assert_eq!(tag.get("ARTIST").count(), 1); + /// assert_eq!(tag.get("SOMETHING_ELSE").count(), 0); + /// ``` + pub fn get(&'a self, name: &'a str) -> impl Iterator> { + self.simple_tags.iter().filter(move |tag| tag.name == name) + } + + /// Get the number of simple tags in this tag. + /// + /// # Example + /// + /// ``` + /// use lofty::ebml::{SimpleTag, Tag, Target}; + /// use std::collections::HashSet; + /// + /// let tag = Tag { + /// target: None, + /// simple_tags: vec![ + /// SimpleTag::new("TITLE", "My Title"), + /// SimpleTag::new("ARTIST", "My Artist"), + /// ], + /// }; + /// + /// assert_eq!(tag.len(), 2); + /// ``` + pub fn len(&self) -> usize { + self.simple_tags.len() + } + + /// Check if there are no simple tags in this tag. + /// + /// # Example + /// + /// ``` + /// use lofty::ebml::{SimpleTag, Tag, Target}; + /// use std::collections::HashSet; + /// + /// let tag = Tag::default(); + /// + /// assert!(tag.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.simple_tags.is_empty() + } + + /// Whether the tag can be used solely by the TargetType (its target is not bound to any uids) + /// + /// This is used by `MatroskaTag::get` to find applicable tags for `Accessor` methods + pub(crate) fn matches_target(&self, target_type: TargetType) -> bool { + let Some(target) = &self.target else { + // An empty target is implicitly `Album` + return target_type == TargetType::Album; + }; + + target.is_candidate_for_type(target_type) + } + + pub(crate) fn into_owned(self) -> Tag<'static> { + Tag { + target: self.target, + simple_tags: self + .simple_tags + .into_iter() + .map(SimpleTag::into_owned) + .collect(), + } + } +} + +impl Tag<'static> { + pub(crate) fn merge_with(&mut self, other: Tag<'_>) { + assert_eq!(self.target, other.target); + + let other = other.into_owned(); + self.simple_tags.extend(other.simple_tags); + } +} + +pub(crate) struct TagRef<'a> { + pub(crate) targets: TargetDescriptor<'a>, + pub(crate) simple_tags: Box>>>, +} diff --git a/lofty/src/ebml/tag/tag_name.rs b/lofty/src/ebml/tag/tag_name.rs new file mode 100644 index 000000000..2215df0e4 --- /dev/null +++ b/lofty/src/ebml/tag/tag_name.rs @@ -0,0 +1,252 @@ +// !!! DO NOT EDIT !!! +// !!! THIS FILE IS GENERATED BY `scripts/update-matroska-tags.py` !!! + +use std::borrow::Cow; + +/// A list of all specified Matroska tag names +/// +/// The tag list is available [here](https://matroska.org/technical/tagging.html). It provides +/// descriptions and expected data types of each tag. +#[rustfmt::skip] +#[allow(missing_docs)] +#[non_exhaustive] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum TagName { + + // Nesting Information + Original, + Sample, + Country, + + // Organization Information + TotalParts, + PartNumber, + PartOffset, + + // Titles + Title, + Subtitle, + + // Nested Information + Url, + SortWith, + Instruments, + Email, + Address, + Fax, + Phone, + + // Entities + Artist, + LeadPerformer, + Accompaniment, + Composer, + Arranger, + Lyrics, + Lyricist, + Conductor, + Director, + AssistantDirector, + DirectorOfPhotography, + SoundEngineer, + ArtDirector, + ProductionDesigner, + Choregrapher, + CostumeDesigner, + Actor, + Character, + WrittenBy, + ScreenplayBy, + EditedBy, + Producer, + Coproducer, + ExecutiveProducer, + DistributedBy, + MasteredBy, + EncodedBy, + MixedBy, + RemixedBy, + ProductionStudio, + ThanksTo, + Publisher, + Label, + + // Search and Classification + Genre, + Mood, + OriginalMediaType, + ContentType, + Subject, + Description, + Keywords, + Summary, + Synopsis, + InitialKey, + Period, + LawRating, + + // Temporal Information + DateReleased, + DateRecorded, + DateEncoded, + DateTagged, + DateDigitized, + DateWritten, + DatePurchased, + + // Spatial Information + RecordingLocation, + CompositionLocation, + ComposerNationality, + + // Personal + Comment, + PlayCounter, + Rating, + + // Technical Information + Encoder, + EncoderSettings, + Bps, + Fps, + Bpm, + Measure, + Tuning, + ReplaygainGain, + ReplaygainPeak, + + // Identifiers + Isrc, + Mcdi, + Isbn, + Barcode, + CatalogNumber, + LabelCode, + Lccn, + Imdb, + Tmdb, + Tvdb, + Tvdb2, + + // Commercial + PurchaseItem, + PurchaseInfo, + PurchaseOwner, + PurchasePrice, + PurchaseCurrency, + + // Legal + Copyright, + ProductionCopyright, + License, + TermsOfUse, +} + +impl From for Cow<'static, str> { + fn from(value: TagName) -> Self { + match value { + TagName::Original => Cow::Borrowed("ORIGINAL"), + TagName::Sample => Cow::Borrowed("SAMPLE"), + TagName::Country => Cow::Borrowed("COUNTRY"), + TagName::TotalParts => Cow::Borrowed("TOTAL_PARTS"), + TagName::PartNumber => Cow::Borrowed("PART_NUMBER"), + TagName::PartOffset => Cow::Borrowed("PART_OFFSET"), + TagName::Title => Cow::Borrowed("TITLE"), + TagName::Subtitle => Cow::Borrowed("SUBTITLE"), + TagName::Url => Cow::Borrowed("URL"), + TagName::SortWith => Cow::Borrowed("SORT_WITH"), + TagName::Instruments => Cow::Borrowed("INSTRUMENTS"), + TagName::Email => Cow::Borrowed("EMAIL"), + TagName::Address => Cow::Borrowed("ADDRESS"), + TagName::Fax => Cow::Borrowed("FAX"), + TagName::Phone => Cow::Borrowed("PHONE"), + TagName::Artist => Cow::Borrowed("ARTIST"), + TagName::LeadPerformer => Cow::Borrowed("LEAD_PERFORMER"), + TagName::Accompaniment => Cow::Borrowed("ACCOMPANIMENT"), + TagName::Composer => Cow::Borrowed("COMPOSER"), + TagName::Arranger => Cow::Borrowed("ARRANGER"), + TagName::Lyrics => Cow::Borrowed("LYRICS"), + TagName::Lyricist => Cow::Borrowed("LYRICIST"), + TagName::Conductor => Cow::Borrowed("CONDUCTOR"), + TagName::Director => Cow::Borrowed("DIRECTOR"), + TagName::AssistantDirector => Cow::Borrowed("ASSISTANT_DIRECTOR"), + TagName::DirectorOfPhotography => Cow::Borrowed("DIRECTOR_OF_PHOTOGRAPHY"), + TagName::SoundEngineer => Cow::Borrowed("SOUND_ENGINEER"), + TagName::ArtDirector => Cow::Borrowed("ART_DIRECTOR"), + TagName::ProductionDesigner => Cow::Borrowed("PRODUCTION_DESIGNER"), + TagName::Choregrapher => Cow::Borrowed("CHOREGRAPHER"), + TagName::CostumeDesigner => Cow::Borrowed("COSTUME_DESIGNER"), + TagName::Actor => Cow::Borrowed("ACTOR"), + TagName::Character => Cow::Borrowed("CHARACTER"), + TagName::WrittenBy => Cow::Borrowed("WRITTEN_BY"), + TagName::ScreenplayBy => Cow::Borrowed("SCREENPLAY_BY"), + TagName::EditedBy => Cow::Borrowed("EDITED_BY"), + TagName::Producer => Cow::Borrowed("PRODUCER"), + TagName::Coproducer => Cow::Borrowed("COPRODUCER"), + TagName::ExecutiveProducer => Cow::Borrowed("EXECUTIVE_PRODUCER"), + TagName::DistributedBy => Cow::Borrowed("DISTRIBUTED_BY"), + TagName::MasteredBy => Cow::Borrowed("MASTERED_BY"), + TagName::EncodedBy => Cow::Borrowed("ENCODED_BY"), + TagName::MixedBy => Cow::Borrowed("MIXED_BY"), + TagName::RemixedBy => Cow::Borrowed("REMIXED_BY"), + TagName::ProductionStudio => Cow::Borrowed("PRODUCTION_STUDIO"), + TagName::ThanksTo => Cow::Borrowed("THANKS_TO"), + TagName::Publisher => Cow::Borrowed("PUBLISHER"), + TagName::Label => Cow::Borrowed("LABEL"), + TagName::Genre => Cow::Borrowed("GENRE"), + TagName::Mood => Cow::Borrowed("MOOD"), + TagName::OriginalMediaType => Cow::Borrowed("ORIGINAL_MEDIA_TYPE"), + TagName::ContentType => Cow::Borrowed("CONTENT_TYPE"), + TagName::Subject => Cow::Borrowed("SUBJECT"), + TagName::Description => Cow::Borrowed("DESCRIPTION"), + TagName::Keywords => Cow::Borrowed("KEYWORDS"), + TagName::Summary => Cow::Borrowed("SUMMARY"), + TagName::Synopsis => Cow::Borrowed("SYNOPSIS"), + TagName::InitialKey => Cow::Borrowed("INITIAL_KEY"), + TagName::Period => Cow::Borrowed("PERIOD"), + TagName::LawRating => Cow::Borrowed("LAW_RATING"), + TagName::DateReleased => Cow::Borrowed("DATE_RELEASED"), + TagName::DateRecorded => Cow::Borrowed("DATE_RECORDED"), + TagName::DateEncoded => Cow::Borrowed("DATE_ENCODED"), + TagName::DateTagged => Cow::Borrowed("DATE_TAGGED"), + TagName::DateDigitized => Cow::Borrowed("DATE_DIGITIZED"), + TagName::DateWritten => Cow::Borrowed("DATE_WRITTEN"), + TagName::DatePurchased => Cow::Borrowed("DATE_PURCHASED"), + TagName::RecordingLocation => Cow::Borrowed("RECORDING_LOCATION"), + TagName::CompositionLocation => Cow::Borrowed("COMPOSITION_LOCATION"), + TagName::ComposerNationality => Cow::Borrowed("COMPOSER_NATIONALITY"), + TagName::Comment => Cow::Borrowed("COMMENT"), + TagName::PlayCounter => Cow::Borrowed("PLAY_COUNTER"), + TagName::Rating => Cow::Borrowed("RATING"), + TagName::Encoder => Cow::Borrowed("ENCODER"), + TagName::EncoderSettings => Cow::Borrowed("ENCODER_SETTINGS"), + TagName::Bps => Cow::Borrowed("BPS"), + TagName::Fps => Cow::Borrowed("FPS"), + TagName::Bpm => Cow::Borrowed("BPM"), + TagName::Measure => Cow::Borrowed("MEASURE"), + TagName::Tuning => Cow::Borrowed("TUNING"), + TagName::ReplaygainGain => Cow::Borrowed("REPLAYGAIN_GAIN"), + TagName::ReplaygainPeak => Cow::Borrowed("REPLAYGAIN_PEAK"), + TagName::Isrc => Cow::Borrowed("ISRC"), + TagName::Mcdi => Cow::Borrowed("MCDI"), + TagName::Isbn => Cow::Borrowed("ISBN"), + TagName::Barcode => Cow::Borrowed("BARCODE"), + TagName::CatalogNumber => Cow::Borrowed("CATALOG_NUMBER"), + TagName::LabelCode => Cow::Borrowed("LABEL_CODE"), + TagName::Lccn => Cow::Borrowed("LCCN"), + TagName::Imdb => Cow::Borrowed("IMDB"), + TagName::Tmdb => Cow::Borrowed("TMDB"), + TagName::Tvdb => Cow::Borrowed("TVDB"), + TagName::Tvdb2 => Cow::Borrowed("TVDB2"), + TagName::PurchaseItem => Cow::Borrowed("PURCHASE_ITEM"), + TagName::PurchaseInfo => Cow::Borrowed("PURCHASE_INFO"), + TagName::PurchaseOwner => Cow::Borrowed("PURCHASE_OWNER"), + TagName::PurchasePrice => Cow::Borrowed("PURCHASE_PRICE"), + TagName::PurchaseCurrency => Cow::Borrowed("PURCHASE_CURRENCY"), + TagName::Copyright => Cow::Borrowed("COPYRIGHT"), + TagName::ProductionCopyright => Cow::Borrowed("PRODUCTION_COPYRIGHT"), + TagName::License => Cow::Borrowed("LICENSE"), + TagName::TermsOfUse => Cow::Borrowed("TERMS_OF_USE"), + } + } +} diff --git a/lofty/src/ebml/tag/target.rs b/lofty/src/ebml/tag/target.rs new file mode 100644 index 000000000..c373de60e --- /dev/null +++ b/lofty/src/ebml/tag/target.rs @@ -0,0 +1,169 @@ +use crate::error::{LoftyError, Result}; +use crate::macros::decode_err; + +/// The type of the target. +/// +/// This is used to determine the type of the target that the tag is applied to. +#[repr(u8)] +#[non_exhaustive] +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub enum TargetType { + /// For video, this represents: SHOT + Shot = 10, + /// This is used to represent the following: + /// + /// - Audio: SUBTRACK / PART / MOVEMENT + /// - Video: SCENE + Scene = 20, + /// This is used to represent the following: + /// + /// - Audio: TRACK / SONG + /// - Video: CHAPTER + Track = 30, + /// For both audio and video, this represents: PART / SESSION + Part = 40, + /// This is used to represent the following: + /// + /// - Audio: ALBUM / OPERA / CONCERT + /// - Video: MOVIE / EPISODE / CONCERT + // The spec defines TargetType 50 (Album) as the default value, as it is the most + // common grouping level. + #[default] + Album = 50, + /// This is used to represent the following: + /// + /// - Audio: EDITION / ISSUE / VOLUME / OPUS + /// - Video: SEASON / SEQUEL / VOLUME + Edition = 60, + /// For both audio and video, this represents: COLLECTION + Collection = 70, +} + +impl TryFrom for TargetType { + type Error = LoftyError; + + fn try_from(value: u8) -> Result { + match value { + 10 => Ok(Self::Shot), + 20 => Ok(Self::Scene), + 30 => Ok(Self::Track), + 40 => Ok(Self::Part), + 50 => Ok(Self::Album), + 60 => Ok(Self::Edition), + 70 => Ok(Self::Collection), + _ => decode_err!(@BAIL Ebml, "TargetType value out of range"), + } + } +} + +/// The target for which a [`SimpleTag`] is applied. +/// +/// In Matroska, tags are specified on the level of targets. For example, there is no "TRACK TITLE" +/// tag, but rather a "TITLE" tag that is applied to a [`TargetType::Track`] target. +/// +/// See [`TargetType`] for more information on the types of targets. +/// +/// [`SimpleTag`]: crate::ebml::SimpleTag +#[derive(Default, Debug, Clone, PartialEq, Eq, Hash)] +pub struct Target { + /// The type of the target. + pub target_type: TargetType, + /// An informational string that can be used to display the logical level of the target. + pub name: Option, + /// A unique ID to identify the track(s) the tags belong to. + /// + /// If the value is 0 at this level, the tags apply to all tracks in the Segment. If set to any + /// other value, it **MUST** match the `TrackUID` value of a track found in this Segment. + /// + /// **Unsupported in WebM** + pub track_uids: Option>, + /// A unique ID to identify the `EditionEntry`(s) the tags belong to. + /// + /// If the value is 0 at this level, the tags apply to all editions in the Segment. If set to + /// any other value, it **MUST** match the `EditionUID` value of an edition found in this Segment. + /// + /// **Unsupported in WebM** + pub edition_uids: Option>, + /// A unique ID to identify the Chapter(s) the tags belong to. + /// + /// If the value is 0 at this level, the tags apply to all chapters in the Segment. If set to + /// any other value, it **MUST** match the `ChapterUID` value of a chapter found in this Segment. + /// + /// **Unsupported in WebM** + pub chapter_uids: Option>, + /// A unique ID to identify the [`AttachedFile`]\(s) the tags belong to. + /// + /// If the value is 0 at this level, the tags apply to all the attachments in the Segment. If + /// set to any other value, it **MUST** match the [`AttachedFile::uid`] value of an attachment + /// found in this Segment. + /// + /// [`AttachedFile`]: crate::ebml::AttachedFile + /// [`AttachedFile::uid`]: crate::ebml::AttachedFile::uid + /// + /// **Unsupported in WebM** + pub attachment_uids: Option>, +} + +impl From for Target { + fn from(target_type: TargetType) -> Self { + Self { + target_type, + ..Default::default() + } + } +} + +impl Target { + /// Used by [`EbmlTag::get`] to find eligible tags to search and edit + /// given a specific target type + pub(super) fn is_candidate_for_type(&self, target_type: TargetType) -> bool { + self.target_type == target_type && !self.has_uids() + } + + // TargetType::Album is the default value. If nothing else is set, it is valid to write + // a zero-sized Targets element. + pub(super) fn is_empty_candidate(&self) -> bool { + self.target_type == TargetType::Album && self.name.is_none() && !self.has_uids() + } + + pub(super) fn has_uids(&self) -> bool { + self.track_uids.is_some() + || self.edition_uids.is_some() + || self.chapter_uids.is_some() + || self.attachment_uids.is_some() + } +} + +/// Used to simplify conversions when writing a generic `Tag`, where extra Target information +/// will, of course, not be available. +pub(crate) enum TargetDescriptor<'a> { + Basic(TargetType), + Full(&'a Target), +} + +impl TargetDescriptor<'_> { + pub(crate) fn target_type(&self) -> TargetType { + match self { + Self::Basic(ty) => *ty, + Self::Full(target) => target.target_type, + } + } + + pub(crate) fn is_empty_candidate(&self) -> bool { + match self { + Self::Basic(ty) if *ty == TargetType::Album => true, + Self::Full(target) => target.is_empty_candidate(), + _ => false, + } + } +} + +impl<'a> From<&'a Target> for TargetDescriptor<'a> { + fn from(target: &'a Target) -> Self { + if !target.has_uids() { + return TargetDescriptor::Basic(target.target_type); + } + + TargetDescriptor::Full(target) + } +} diff --git a/lofty/src/ebml/tag/tests.rs b/lofty/src/ebml/tag/tests.rs new file mode 100644 index 000000000..810166403 --- /dev/null +++ b/lofty/src/ebml/tag/tests.rs @@ -0,0 +1,22 @@ +use crate::ebml::MatroskaTag; +use crate::prelude::ItemKey; +use crate::tag::{Accessor, Tag, TagType}; + +#[test_log::test] +fn tag_to_matroska_tag() { + let mut tag = Tag::new(TagType::Matroska); + + tag.insert_text(ItemKey::TrackArtist, String::from("Foo artist")); + tag.insert_text(ItemKey::TrackTitle, String::from("Bar title")); + tag.insert_text(ItemKey::AlbumTitle, String::from("Baz album")); + tag.insert_text(ItemKey::TrackNumber, String::from("1")); + tag.insert_text(ItemKey::TrackTotal, String::from("2")); + + let matroska_tag: MatroskaTag = tag.into(); + + assert_eq!(matroska_tag.artist().as_deref(), Some("Foo artist")); + assert_eq!(matroska_tag.title().as_deref(), Some("Bar title")); + assert_eq!(matroska_tag.album().as_deref(), Some("Baz album")); + assert_eq!(matroska_tag.track(), Some(1)); + assert_eq!(matroska_tag.track_total(), Some(2)); +} diff --git a/lofty/src/ebml/tag/write/elements/attached_file.rs b/lofty/src/ebml/tag/write/elements/attached_file.rs new file mode 100644 index 000000000..3486caf8a --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/attached_file.rs @@ -0,0 +1,81 @@ +use crate::ebml::tag::write::{write_element, ElementWriterCtx, WriteableElement}; +use crate::ebml::{AttachedFile, ElementId, VInt}; +use crate::io::FileLike; + +const FileDescription_ID: ElementId = ElementId(0x467E); +const FileName_ID: ElementId = ElementId(0x466E); +const FileMediaType_ID: ElementId = ElementId(0x4660); +const FileData_ID: ElementId = ElementId(0x465C); +const FileUID_ID: ElementId = ElementId(0x46AE); +const FileReferral_ID: ElementId = ElementId(0x4675); +const FileUsedStartTime_ID: ElementId = ElementId(0x4661); +const FileUsedEndTime_ID: ElementId = ElementId(0x4662); + +impl WriteableElement for AttachedFile<'_> { + const ID: ElementId = ElementId(0x61A7); + + fn write_element( + &self, + ctx: ElementWriterCtx, + writer: &mut F, + ) -> crate::error::Result<()> { + self.validate()?; + + let mut element_children = Vec::new(); + if let Some(description) = &self.description { + write_element( + ctx, + FileDescription_ID, + &description.as_ref(), + &mut element_children, + )?; + } + + write_element( + ctx, + FileName_ID, + &self.file_name.as_ref(), + &mut element_children, + )?; + + write_element( + ctx, + FileMediaType_ID, + &self.mime_type.as_str(), + &mut element_children, + )?; + + write_element( + ctx, + FileData_ID, + &self.file_data.as_ref(), + &mut element_children, + )?; + + let uid = VInt::::try_from(self.uid)?; + write_element(ctx, FileUID_ID, &uid, &mut element_children)?; + + if let Some(referral) = &self.referral { + write_element( + ctx, + FileReferral_ID, + &referral.as_ref(), + &mut element_children, + )?; + } + + if let Some(start_time) = &self.used_start_time { + let vint = VInt::::try_from(*start_time)?; + write_element(ctx, FileUsedStartTime_ID, &vint, &mut element_children)?; + } + + if let Some(end_time) = &self.used_end_time { + let vint = VInt::::try_from(*end_time)?; + write_element(ctx, FileUsedEndTime_ID, &vint, &mut element_children)?; + } + + write_element(ctx, Self::ID, &element_children.as_slice(), writer)?; + + Ok(()) + } +} diff --git a/lofty/src/ebml/tag/write/elements/mod.rs b/lofty/src/ebml/tag/write/elements/mod.rs new file mode 100644 index 000000000..7d8431d27 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/mod.rs @@ -0,0 +1,6 @@ +#![allow(non_upper_case_globals)] + +pub(super) mod attached_file; +pub(super) mod simple_tag; +pub(super) mod tags; +pub(super) mod target; diff --git a/lofty/src/ebml/tag/write/elements/simple_tag.rs b/lofty/src/ebml/tag/write/elements/simple_tag.rs new file mode 100644 index 000000000..89bbbecb0 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/simple_tag.rs @@ -0,0 +1,55 @@ +use crate::ebml::tag::write::{write_element, ElementWriterCtx, WriteableElement}; +use crate::ebml::{ElementId, Language, SimpleTag, TagValue}; +use crate::io::FileLike; + +const TagName_ID: ElementId = ElementId(0x45A3); +const TagLanguage_ID: ElementId = ElementId(0x447A); +const TagLanguageBcp47_ID: ElementId = ElementId(0x447B); +const TagDefault_ID: ElementId = ElementId(0x4484); +const TagString_ID: ElementId = ElementId(0x4487); +const TagBinary_ID: ElementId = ElementId(0x4485); + +impl WriteableElement for SimpleTag<'_> { + const ID: ElementId = ElementId(0x67C8); + + fn write_element( + &self, + ctx: ElementWriterCtx, + writer: &mut F, + ) -> crate::error::Result<()> { + let mut element_children = Vec::new(); + write_element(ctx, TagName_ID, &self.name.as_ref(), &mut element_children)?; + + match &self.language { + Language::Iso639_2(iso_639_2) => write_element( + ctx, + TagLanguage_ID, + &iso_639_2.as_str(), + &mut element_children, + )?, + Language::Bcp47(bcp47) => write_element( + ctx, + TagLanguageBcp47_ID, + &bcp47.as_str(), + &mut element_children, + )?, + } + + write_element(ctx, TagDefault_ID, &self.default, &mut element_children)?; + + if let Some(value) = self.value.as_ref() { + match value { + TagValue::String(s) => { + write_element(ctx, TagString_ID, &s.as_ref(), &mut element_children)? + }, + TagValue::Binary(b) => { + write_element(ctx, TagBinary_ID, &b.as_ref(), &mut element_children)? + }, + } + } + + write_element(ctx, Self::ID, &element_children.as_slice(), writer)?; + + Ok(()) + } +} diff --git a/lofty/src/ebml/tag/write/elements/tags.rs b/lofty/src/ebml/tag/write/elements/tags.rs new file mode 100644 index 000000000..d960a38e9 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/tags.rs @@ -0,0 +1,32 @@ +use crate::ebml::tag::write::{write_element, ElementWriterCtx, WriteableElement}; +use crate::ebml::{ElementId, TagRef}; +use crate::io::FileLike; + +use std::io::Cursor; + +impl WriteableElement for TagRef<'_> { + const ID: ElementId = ElementId(0x7373); + + fn write_element( + &self, + ctx: ElementWriterCtx, + writer: &mut F, + ) -> crate::error::Result<()> { + let mut element_children = Cursor::new(Vec::new()); + self.targets.write_element(ctx, &mut element_children)?; + + // TODO + // for simple_tag in self.simple_tags { + // simple_tag.write_element(ctx, &mut element_children)?; + // } + + write_element( + ctx, + Self::ID, + &element_children.get_ref().as_slice(), + writer, + )?; + + Ok(()) + } +} diff --git a/lofty/src/ebml/tag/write/elements/target.rs b/lofty/src/ebml/tag/write/elements/target.rs new file mode 100644 index 000000000..8b8cd58d5 --- /dev/null +++ b/lofty/src/ebml/tag/write/elements/target.rs @@ -0,0 +1,108 @@ +use crate::ebml::tag::write::{write_element, EbmlWriteExt, ElementWriterCtx, WriteableElement}; +use crate::ebml::{ElementId, TargetDescriptor, TargetType, VInt}; +use crate::io::FileLike; + +const TargetTypeValue_ID: ElementId = ElementId(0x68CA); +const TargetType_ID: ElementId = ElementId(0x63CA); +const TagTrackUID_ID: ElementId = ElementId(0x63C5); +const TagEditionUID_ID: ElementId = ElementId(0x63C9); +const TagChapterUID_ID: ElementId = ElementId(0x63C4); +const TagAttachmentUID_ID: ElementId = ElementId(0x63C6); + +impl WriteableElement for TargetDescriptor<'_> { + const ID: ElementId = ElementId(0x63C0); + + fn write_element( + &self, + ctx: ElementWriterCtx, + writer: &mut F, + ) -> crate::error::Result<()> { + if self.is_empty_candidate() { + writer.write_id(ctx, Self::ID)?; + writer.write_size(ctx, VInt::::ZERO)?; + return Ok(()); + } + + let mut element_children = Vec::new(); + + let target_type = self.target_type(); + if target_type == TargetType::Album { + write_element( + ctx, + TargetTypeValue_ID, + &[].as_slice(), + &mut element_children, + )?; + } else { + let vint = VInt::::try_from(target_type as u64)?; + write_element(ctx, TargetTypeValue_ID, &vint, &mut element_children)?; + } + + if let TargetDescriptor::Full(target) = self { + if let Some(name) = &target.name { + write_element(ctx, TargetType_ID, &name.as_str(), &mut element_children)?; + } + + if let Some(track_uids) = &target.track_uids { + for &uid in track_uids { + let vint = VInt::::try_from(uid)?; + write_element(ctx, TagTrackUID_ID, &vint, &mut element_children)?; + } + } + + if let Some(edition_uids) = &target.edition_uids { + for &uid in edition_uids { + let vint = VInt::::try_from(uid)?; + write_element(ctx, TagEditionUID_ID, &vint, &mut element_children)?; + } + } + + if let Some(chapter_uids) = &target.chapter_uids { + for &uid in chapter_uids { + let vint = VInt::::try_from(uid)?; + write_element(ctx, TagChapterUID_ID, &vint, &mut element_children)?; + } + } + + if let Some(attachment_uids) = &target.attachment_uids { + for &uid in attachment_uids { + let vint = VInt::::try_from(uid)?; + write_element(ctx, TagAttachmentUID_ID, &vint, &mut element_children)?; + } + } + } + + write_element(ctx, Self::ID, &element_children.as_slice(), writer)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ebml::Target; + + use std::io::Cursor; + + #[test_log::test] + fn write_empty_default() { + let target = Target::default(); + + let mut buf = Cursor::new(Vec::new()); + let target_descriptor = TargetDescriptor::from(&target); + target_descriptor + .write_element( + ElementWriterCtx { + max_id_len: 4, + max_size_len: 8, + }, + &mut buf, + ) + .unwrap(); + + let expected = vec![0x63, 0xC0, 0x80]; + + assert_eq!(buf.into_inner(), expected); + } +} diff --git a/lofty/src/ebml/tag/write/mod.rs b/lofty/src/ebml/tag/write/mod.rs new file mode 100644 index 000000000..27ace950a --- /dev/null +++ b/lofty/src/ebml/tag/write/mod.rs @@ -0,0 +1,48 @@ +mod elements; +mod type_encodings; + +use crate::ebml::{ElementId, VInt}; +use crate::error::Result; +use crate::io::FileLike; + +use std::io::Write; + +use type_encodings::ElementEncodable; + +#[derive(Copy, Clone)] +pub(crate) struct ElementWriterCtx { + pub(crate) max_id_len: u8, + pub(crate) max_size_len: u8, +} + +pub(crate) trait EbmlWriteExt: Write + Sized { + fn write_id(&mut self, ctx: ElementWriterCtx, id: ElementId) -> Result<()> { + id.write_to(Some(ctx.max_id_len), self)?; + Ok(()) + } + + fn write_size(&mut self, ctx: ElementWriterCtx, size: VInt) -> Result<()> { + VInt::::write_to(size.value(), None, Some(ctx.max_size_len), self)?; + Ok(()) + } +} + +impl EbmlWriteExt for T where T: Write {} + +pub(crate) trait WriteableElement { + const ID: ElementId; + + fn write_element(&self, ctx: ElementWriterCtx, writer: &mut F) -> Result<()>; +} + +pub(crate) fn write_element( + ctx: ElementWriterCtx, + id: ElementId, + element: &E, + writer: &mut W, +) -> Result<()> { + writer.write_id(ctx, id)?; + element.write_to(ctx, writer)?; + + Ok(()) +} diff --git a/lofty/src/ebml/tag/write/type_encodings.rs b/lofty/src/ebml/tag/write/type_encodings.rs new file mode 100644 index 000000000..e015ba9c4 --- /dev/null +++ b/lofty/src/ebml/tag/write/type_encodings.rs @@ -0,0 +1,124 @@ +use super::{EbmlWriteExt, ElementWriterCtx}; +use crate::ebml::{TagValue, VInt}; +use crate::error::Result; +use std::io::Write; + +use byteorder::WriteBytesExt; + +pub(crate) trait ElementEncodable { + fn len(&self) -> Result>; + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()>; +} + +impl ElementEncodable for VInt { + fn len(&self) -> Result> { + Ok(VInt(u64::from(self.octet_length()))) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, self.len()?)?; + VInt::::write_to(self.value(), None, None, writer)?; + Ok(()) + } +} + +impl ElementEncodable for VInt { + fn len(&self) -> Result> { + Ok(VInt(u64::from(self.octet_length()))) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, self.len()?)?; + VInt::::write_to(self.value() as u64, None, None, writer)?; + Ok(()) + } +} + +impl ElementEncodable for f32 { + fn len(&self) -> Result> { + Ok(VInt(size_of::() as u64)) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + if *self == 0.0 { + VInt::::write_to(VInt::::ZERO.value(), None, None, writer)?; + return Ok(()); + } + + writer.write_size(ctx, self.len()?)?; + writer.write_f32::(*self)?; + Ok(()) + } +} + +impl ElementEncodable for f64 { + fn len(&self) -> Result> { + Ok(VInt(size_of::() as u64)) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + if *self == 0.0 { + VInt::::write_to(VInt::::ZERO.value(), None, None, writer)?; + return Ok(()); + } + + writer.write_size(ctx, self.len()?)?; + writer.write_f64::(*self)?; + Ok(()) + } +} + +impl ElementEncodable for bool { + fn len(&self) -> Result> { + Ok(VInt(size_of::() as u64)) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + if *self { + VInt::(1).write_to(ctx, writer) + } else { + VInt::::ZERO.write_to(ctx, writer) + } + } +} + +impl ElementEncodable for &[u8] { + fn len(&self) -> Result> { + VInt::try_from(<[u8]>::len(self) as u64) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, <&[u8] as ElementEncodable>::len(self)?)?; + writer.write_all(self)?; + Ok(()) + } +} + +impl ElementEncodable for &str { + fn len(&self) -> Result> { + VInt::try_from(str::len(self) as u64) + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + writer.write_size(ctx, <&str as ElementEncodable>::len(self)?)?; + writer.write_all(self.as_bytes())?; + Ok(()) + } +} + +impl ElementEncodable for TagValue<'_> { + fn len(&self) -> Result> { + match self { + TagValue::String(s) => <&str as ElementEncodable>::len(&&**s), + TagValue::Binary(b) => <&[u8] as ElementEncodable>::len(&&**b), + } + } + + fn write_to(&self, ctx: ElementWriterCtx, writer: &mut W) -> Result<()> { + match self { + TagValue::String(s) => <&str as ElementEncodable>::write_to(&&**s, ctx, writer), + TagValue::Binary(b) => <&[u8] as ElementEncodable>::write_to(&&**b, ctx, writer), + } + } +} diff --git a/lofty/src/ebml/vint.rs b/lofty/src/ebml/vint.rs new file mode 100644 index 000000000..e428caa61 --- /dev/null +++ b/lofty/src/ebml/vint.rs @@ -0,0 +1,461 @@ +use crate::error::Result; +use crate::macros::err; +use std::fmt::UpperHex; + +use std::io::{Read, Write}; +use std::ops::{Add, Sub}; + +use byteorder::{ReadBytesExt, WriteBytesExt}; + +macro_rules! impl_vint { + ($($t:ty),*) => { + $( + paste::paste! { + #[allow(trivial_numeric_casts)] + impl VInt<$t> { + /// The maximum value that can be represented by a `VInt` + pub const MAX: $t = <$t>::MAX >> (<$t>::BITS as u64 - Self::USABLE_BITS); + /// The minimum value that can be represented by a `VInt` + pub const MIN: $t = <$t>::MIN; + /// A `VInt` with a value of 0 + pub const ZERO: Self = Self(0); + + /// Gets the inner value of the `VInt` + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + #[doc = " let vint = VInt::<" $t ">::try_from(2)?;"] + /// assert_eq!(vint.value(), 2); + /// # Ok(()) } + /// ``` + pub fn value(&self) -> $t { + self.0 + } + + /// Parse a `VInt` from a reader + /// + /// `max_length` can be used to specify the maximum number of octets the number should + /// occupy, otherwise it should be `8`. + /// + /// # Errors + /// + /// * The int cannot fit within the maximum width of 54 bits + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + /// // This octet count (9) is too large to represent + /// let mut invalid_vint_reader = &[0b0000_0000_1]; + #[doc = " let invalid_vint = VInt::<" $t ">::parse(&mut &invalid_vint_reader[..], 8);"] + /// assert!(invalid_vint.is_err()); + /// + /// // This octet count (4) is too large to represent given our `max_length` + /// let mut invalid_vint_reader2 = &[0b0001_1111]; + #[doc = " let invalid_vint2 = VInt::<" $t ">::parse(&mut &invalid_vint_reader2[..], 3);"] + /// assert!(invalid_vint2.is_err()); + /// + /// // This value is small enough to represent + /// let mut valid_vint_reader = &[0b1000_0010]; + #[doc = " let valid_vint = VInt::<" $t ">::parse(&mut &valid_vint_reader[..], 8)?;"] + /// assert_eq!(valid_vint.value(), 2); + /// # Ok(()) } + /// ``` + pub fn parse(reader: &mut R, max_length: u8) -> Result + where + R: Read, + { + Ok(Self(parse_vint(reader, max_length, false)? as $t)) + } + + /// Represents the length of the `VInt` in octets + /// + /// NOTE: The value returned will always be <= 8 + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + /// // Anything <= 254 will fit into a single octet + /// let vint = VInt::try_from(100u64)?; + /// assert_eq!(vint.octet_length(), 1); + /// + /// // A larger number will need to + /// let vint = VInt::try_from(500_000u64)?; + /// assert_eq!(vint.octet_length(), 3); + /// # Ok(()) } + /// ``` + pub fn octet_length(&self) -> u8 { + octet_length(self.0 as u64) + } + + /// Converts the `VInt` into a byte Vec + /// + /// * `min_length` can be used to specify the minimum number of octets the number should + /// occupy. + /// * `max_length` can be used to specify the maximum number of octets the number should + /// occupy. + /// + /// # Errors + /// + /// * The octet length is greater than `max_length` (if provided) + /// * `min_length` is greater than `max_length` OR `8` + /// * Unable to write to the buffer + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::VInt; + /// + /// # fn main() -> lofty::error::Result<()> { + /// let vint = VInt::try_from(10u64)?; + /// let bytes = vint.as_bytes(None, None)?; + /// + /// assert_eq!(bytes, &[0b1000_1010]); + /// # Ok(()) } + /// ``` + pub fn as_bytes(&self, min_length: Option, max_length: Option) -> Result> { + let mut ret = Vec::with_capacity(8); + VInt::<$t>::write_to(self.0 as u64, min_length, max_length, &mut ret)?; + Ok(ret) + } + } + + impl Add for VInt<$t> { + type Output = Self; + + fn add(self, other: Self) -> Self::Output { + let val = self.0 + other.0; + assert!(val <= Self::MAX, "VInt overflow"); + + Self(val) + } + } + + impl Sub for VInt<$t> { + type Output = Self; + + fn sub(self, other: Self) -> Self::Output { + Self(self.0 - other.0) + } + } + + impl PartialEq<$t> for VInt<$t> { + fn eq(&self, other: &$t) -> bool { + self.0 == *other + } + } + + impl TryFrom<$t> for VInt<$t> { + type Error = crate::error::LoftyError; + + fn try_from(value: $t) -> Result { + if value > Self::MAX { + err!(BadVintSize); + } + + Ok(Self(value)) + } + } + } + )* + }; +} + +/// An EMBL variable-size integer +/// +/// A `VInt` is an unsigned integer composed of up to 8 octets, with 7 usable bits per octet. +/// +/// To ensure safe construction of `VInt`s, users must create them through the `TryFrom` implementations or [`VInt::parse`]. +#[repr(transparent)] +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct VInt(pub(crate) T); + +impl VInt { + // Each octet will shave a single bit off each byte + const USABLE_BITS_PER_BYTE: u64 = 7; + const MAX_OCTET_LENGTH: u64 = 8; + const USABLE_BITS: u64 = Self::MAX_OCTET_LENGTH * Self::USABLE_BITS_PER_BYTE; + + pub(crate) fn write_to( + mut value: u64, + min_length: Option, + max_length: Option, + writer: &mut W, + ) -> Result<()> + where + W: Write, + { + let octets = std::cmp::max(octet_length(value), min_length.unwrap_or(0)); + if octets > max_length.unwrap_or(Self::MAX_OCTET_LENGTH as u8) { + err!(BadVintSize); + } + + // Add the octet length + value |= 1 << (octets * (Self::USABLE_BITS_PER_BYTE as u8)); + + let mut byte_shift = (octets - 1) as i8; + while byte_shift >= 0 { + writer.write_u8((value >> (byte_shift * 8)) as u8)?; + byte_shift -= 1; + } + + Ok(()) + } +} + +impl_vint!(u64, i64); + +fn parse_vint(reader: &mut R, max_length: u8, retain_marker: bool) -> Result +where + R: Read, +{ + let start = reader.read_u8()?; + let octet_length = verify_length(start, max_length)?; + + let mut bytes_read = 1; + + let mut val = u64::from(start); + if !retain_marker { + val ^= 1 << start.ilog2(); + } + + while bytes_read < octet_length { + bytes_read += 1; + val = (val << 8) | u64::from(reader.read_u8()?); + } + + Ok(val) +} + +// Verify that the octet length is nonzero and <= 8 +fn verify_length(first_byte: u8, max_length: u8) -> Result { + // A value of 0b0000_0000 indicates either an invalid VInt, or one with an octet length > 8 + if first_byte == 0b0000_0000 { + err!(BadVintSize); + } + + let octet_length = (VInt::<()>::MAX_OCTET_LENGTH as u32) - first_byte.ilog2(); + if octet_length > 8 || octet_length as u8 > max_length { + err!(BadVintSize); + } + + Ok(octet_length) +} + +fn octet_length(mut value: u64) -> u8 { + let mut octets = 0; + loop { + octets += 1; + + value >>= VInt::<()>::USABLE_BITS_PER_BYTE; + if value == 0 { + break; + } + } + + octets +} + +/// An EBML element ID +/// +/// An `ElementId` is a [`VInt`], but with the following conditions: +/// +/// * The `VINT_MARKER` is retained after parsing +/// * When encoding, the minimum number of octets must be used +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] +pub struct ElementId(pub(crate) u64); + +impl ElementId { + /// Parse an `ElementId` from a reader + /// + /// An element ID is parsed similarly to a normal [`VInt`], but the `VINT_MARKER` is retained. + /// + /// # Errors + /// + /// * The ID cannot fit within the maximum width + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::ElementId; + /// + /// # fn main() -> lofty::error::Result<()> { + /// // Parse the EBML header element ID + /// let mut reader = &[0x1A, 0x45, 0xDF, 0xA3][..]; + /// let id = ElementId::parse(&mut reader, 8)?; + /// assert_eq!(id, 0x1A45DFA3); + /// # Ok(()) } + pub fn parse(reader: &mut R, max_id_length: u8) -> Result + where + R: Read, + { + let val = parse_vint(reader, max_id_length, true)?; + Ok(Self(val)) + } + + /// Get the inner value of the `ElementId` + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::ElementId; + /// + /// # fn main() -> lofty::error::Result<()> { + /// let id = ElementId::parse(&mut &[0x1A, 0x45, 0xDF, 0xA3][..], 8)?; + /// assert_eq!(id.value(), 0x1A45DFA3); + /// # Ok(()) } + pub fn value(&self) -> u64 { + self.0 + } + + /// Converts the `ElementId` into a byte Vec + /// + /// Unlike a [`VInt`], an `ElementId` **MUST** be encoded with the shortest possible octet length. + /// + /// * `max_length` can be used to specify the maximum number of octets the number should + /// occupy. + /// + /// # Errors + /// + /// * The octet length is greater than `max_length` (if provided) + /// * Unable to write to the buffer + /// + /// # Examples + /// + /// ```rust + /// use lofty::ebml::ElementId; + /// + /// const EBML_ID: [u8; 4] = [0x1A, 0x45, 0xDF, 0xA3]; + /// + /// # fn main() -> lofty::error::Result<()> { + /// let id = ElementId::parse(&mut &EBML_ID[..], 8)?; + /// let bytes = id.as_bytes(None)?; + /// + /// assert_eq!(bytes, &EBML_ID); + /// # Ok(()) } + /// ``` + pub fn as_bytes(self, max_length: Option) -> Result> { + let mut buf = Vec::with_capacity(8); + self.write_to(max_length, &mut buf)?; + Ok(buf) + } + + // Same as writing a VInt, but we need to remove the VINT_MARKER from the value first + pub(crate) fn write_to(self, max_length: Option, writer: &mut W) -> Result<()> { + let mut val = self.0; + val ^= 1 << val.ilog2(); + VInt::<()>::write_to(val, None, max_length, writer)?; + Ok(()) + } +} + +impl PartialEq for ElementId { + fn eq(&self, other: &u64) -> bool { + self.0 == *other + } +} + +impl UpperHex for ElementId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::UpperHex::fmt(&self.0, f) + } +} + +#[cfg(test)] +mod tests { + use crate::ebml::VInt; + use std::io::Cursor; + + const VALID_REPRESENTATIONS_OF_2: [&[u8]; 8] = [ + &[0b1000_0010], + &[0b0100_0000, 0b0000_0010], + &[0b0010_0000, 0b0000_0000, 0b0000_0010], + &[0b0001_0000, 0b0000_0000, 0b0000_0000, 0b0000_0010], + &[0b0000_1000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0010], + &[ + 0b0000_0100, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0010, + ], + &[ + 0b0000_0010, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0010, + ], + &[ + 0b0000_0001, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0010, + ], + ]; + + #[test_log::test] + fn bytes_to_vint() { + for representation in VALID_REPRESENTATIONS_OF_2 { + assert_eq!( + VInt::::parse(&mut Cursor::new(representation), 8) + .unwrap() + .value(), + 2 + ); + } + } + + #[test_log::test] + fn vint_to_bytes() { + for representation in VALID_REPRESENTATIONS_OF_2 { + let vint = VInt::::parse(&mut Cursor::new(representation), 8).unwrap(); + assert_eq!( + vint.as_bytes(Some(representation.len() as u8), None) + .unwrap(), + representation + ); + } + } + + #[test_log::test] + fn large_integers_should_fail() { + assert!(VInt::try_from(u64::MAX).is_err()); + assert!(VInt::try_from(i64::MAX).is_err()); + + let mut acc = 1000; + for _ in 0..16 { + assert!(VInt::try_from(u64::MAX - acc).is_err()); + acc *= 10; + } + } + + #[test_log::test] + fn maximum_possible_representable_vint() { + assert!(VInt::try_from(u64::MAX >> 8).is_ok()); + } + + #[test_log::test] + fn octet_lengths() { + let n = u64::MAX >> 8; + for i in 1u8..=7 { + assert_eq!(VInt::try_from(n >> (i * 7)).unwrap().octet_length(), 8 - i); + } + } +} diff --git a/lofty/src/error.rs b/lofty/src/error.rs index 4b945e37c..73f19c429 100644 --- a/lofty/src/error.rs +++ b/lofty/src/error.rs @@ -60,6 +60,9 @@ pub enum ErrorKind { /// Arises when attempting to use [`Atom::merge`](crate::mp4::Atom::merge) with mismatching identifiers AtomMismatch, + /// Arises when an EBML variable-size integer exceeds the maximum allowed size + BadVintSize, + // Conversions for external errors /// Errors that arise while parsing OGG pages OggPage(ogg_pager::PageError), @@ -555,6 +558,10 @@ impl Display for LoftyError { f, "MP4 Atom: Attempted to use `Atom::merge()` with mismatching identifiers" ), + ErrorKind::BadVintSize => write!( + f, + "EBML: Attempted to create a VInt with an invalid octet length" + ), // Files ErrorKind::TooMuchData => write!( diff --git a/lofty/src/file/file_type.rs b/lofty/src/file/file_type.rs index 30f7a13e8..6295c4611 100644 --- a/lofty/src/file/file_type.rs +++ b/lofty/src/file/file_type.rs @@ -13,6 +13,7 @@ pub enum FileType { Aac, Aiff, Ape, + Ebml, Flac, Mpeg, Mp4, @@ -34,6 +35,7 @@ impl FileType { /// | `Ape` , `Mpc`, `WavPack` | `Ape` | /// | `Flac`, `Opus`, `Vorbis`, `Speex` | `VorbisComments` | /// | `Mp4` | `Mp4Ilst` | + /// | `Ebml` | `Matroska` | /// /// # Panics /// @@ -52,6 +54,7 @@ impl FileType { match self { FileType::Aac | FileType::Aiff | FileType::Mpeg | FileType::Wav => TagType::Id3v2, FileType::Ape | FileType::Mpc | FileType::WavPack => TagType::Ape, + FileType::Ebml => TagType::Matroska, FileType::Flac | FileType::Opus | FileType::Vorbis | FileType::Speex => { TagType::VorbisComments }, @@ -90,6 +93,7 @@ impl FileType { match tag_type { TagType::Ape => crate::ape::ApeTag::SUPPORTED_FORMATS.contains(self), + TagType::Matroska => crate::ebml::MatroskaTag::SUPPORTED_FORMATS.contains(self), TagType::Id3v1 => crate::id3::v1::Id3v1Tag::SUPPORTED_FORMATS.contains(self), TagType::Id3v2 => crate::id3::v2::Id3v2Tag::SUPPORTED_FORMATS.contains(self), TagType::Mp4Ilst => crate::mp4::Ilst::SUPPORTED_FORMATS.contains(self), @@ -137,6 +141,7 @@ impl FileType { "opus" => Some(Self::Opus), "flac" => Some(Self::Flac), "ogg" => Some(Self::Vorbis), + "mka" | "mkv" | "webm" => Some(Self::Ebml), "mp4" | "m4a" | "m4b" | "m4p" | "m4r" | "m4v" | "3gp" => Some(Self::Mp4), "mpc" | "mp+" | "mpp" => Some(Self::Mpc), "spx" => Some(Self::Speex), @@ -300,6 +305,7 @@ impl FileType { None }, 119 if buf.len() >= 4 && &buf[..4] == b"wvpk" => Some(Self::WavPack), + 26 if buf.starts_with(&[0x1A, 0x45, 0xDF, 0xA3]) => Some(Self::Ebml), _ if buf.len() >= 8 && &buf[4..8] == b"ftyp" => Some(Self::Mp4), _ if buf.starts_with(b"MPCK") || buf.starts_with(b"MP+") => Some(Self::Mpc), _ => None, diff --git a/lofty/src/id3/v2/items/attached_picture_frame.rs b/lofty/src/id3/v2/items/attached_picture_frame.rs index 79ecfb55a..005626f83 100644 --- a/lofty/src/id3/v2/items/attached_picture_frame.rs +++ b/lofty/src/id3/v2/items/attached_picture_frame.rs @@ -110,6 +110,7 @@ impl<'a> AttachedPictureFrame<'a> { let picture = Picture { pic_type, + file_name: None, mime_type, description, data: Cow::from(data), diff --git a/lofty/src/id3/v2/tag/tests.rs b/lofty/src/id3/v2/tag/tests.rs index 94220c6d9..87ac5b942 100644 --- a/lofty/src/id3/v2/tag/tests.rs +++ b/lofty/src/id3/v2/tag/tests.rs @@ -247,6 +247,7 @@ fn create_full_test_tag(version: Id3v2Version) -> Id3v2Tag { TextEncoding::Latin1, Picture { pic_type: PictureType::CoverFront, + file_name: None, mime_type: Some(MimeType::Png), description: None, data: read_path("tests/tags/assets/id3v2/test_full_cover.png").into(), @@ -312,6 +313,7 @@ fn issue_36() { let picture = Picture::new_unchecked( PictureType::CoverFront, + None, Some(MimeType::Jpeg), Some(String::from("cover")), picture_data, @@ -1359,6 +1361,7 @@ fn hold_back_4_character_txxx_description() { fn skip_reading_cover_art() { let p = Picture::new_unchecked( PictureType::CoverFront, + None, Some(MimeType::Jpeg), None, std::iter::repeat(0).take(50).collect::>(), diff --git a/lofty/src/lib.rs b/lofty/src/lib.rs index 22ef81ca3..986b128af 100644 --- a/lofty/src/lib.rs +++ b/lofty/src/lib.rs @@ -104,6 +104,7 @@ // proc macro hacks extern crate self as lofty; + pub(crate) mod _this_is_internal {} pub mod config; @@ -119,6 +120,7 @@ mod util; pub mod aac; pub mod ape; +pub mod ebml; pub mod flac; pub mod id3; pub mod iff; diff --git a/lofty/src/macros.rs b/lofty/src/macros.rs index 2ec5ff582..24e0175ba 100644 --- a/lofty/src/macros.rs +++ b/lofty/src/macros.rs @@ -50,6 +50,34 @@ macro_rules! decode_err { }; } +// Shorthand for FileEncodingError::new(FileType::Foo, "Message") +// +// Usage: +// +// - encode_err!(Variant, Message) +// - encode_err!(Message) +// +// or bail: +// +// - encode_err!(@BAIL Variant, Message) +// - encode_err!(@BAIL Message) +macro_rules! encode_err { + ($file_ty:ident, $reason:literal) => { + Into::::into(crate::error::FileEncodingError::new( + crate::file::FileType::$file_ty, + $reason, + )) + }; + ($reason:literal) => { + Into::::into(crate::error::FileEncodingError::from_description( + $reason, + )) + }; + (@BAIL $($file_ty:ident,)? $reason:literal) => { + return Err(encode_err!($($file_ty,)? $reason)) + }; +} + // A macro for handling the different `ParsingMode`s // // NOTE: All fields are optional, if `STRICT` or `RELAXED` are missing, it will @@ -95,4 +123,4 @@ macro_rules! parse_mode_choice { }; } -pub(crate) use {decode_err, err, parse_mode_choice, try_vec}; +pub(crate) use {decode_err, encode_err, err, parse_mode_choice, try_vec}; diff --git a/lofty/src/mp4/ilst/atom.rs b/lofty/src/mp4/ilst/atom.rs index b4d0a17e9..d81e17b9a 100644 --- a/lofty/src/mp4/ilst/atom.rs +++ b/lofty/src/mp4/ilst/atom.rs @@ -327,6 +327,7 @@ impl AtomData { /// /// let data = AtomData::Picture(Picture::new_unchecked( /// PictureType::CoverFront, + /// None, /// Some(MimeType::Jpeg), /// None, /// Vec::new(), diff --git a/lofty/src/mp4/ilst/mod.rs b/lofty/src/mp4/ilst/mod.rs index b01508be1..5d87b0dcc 100644 --- a/lofty/src/mp4/ilst/mod.rs +++ b/lofty/src/mp4/ilst/mod.rs @@ -290,6 +290,7 @@ impl Ilst { /// // Insert pictures /// ilst.insert_picture(Picture::new_unchecked( /// PictureType::Other, + /// None, /// Some(MimeType::Png), /// None, /// png_data, @@ -298,6 +299,7 @@ impl Ilst { /// # let jpeg_data = b"bar".to_vec(); /// ilst.insert_picture(Picture::new_unchecked( /// PictureType::Other, + /// None, /// Some(MimeType::Jpeg), /// None, /// jpeg_data, @@ -334,6 +336,7 @@ impl Ilst { /// // Insert a single picture /// ilst.insert_picture(Picture::new_unchecked( /// PictureType::Other, + /// None, /// Some(MimeType::Png), /// None, /// png_data, @@ -344,6 +347,7 @@ impl Ilst { /// // Insert another picture /// ilst.insert_picture(Picture::new_unchecked( /// PictureType::Other, + /// None, /// Some(MimeType::Jpeg), /// None, /// jpeg_data, @@ -1455,6 +1459,7 @@ mod tests { fn skip_reading_cover_art() { let p = Picture::new_unchecked( PictureType::CoverFront, + None, Some(MimeType::Jpeg), None, std::iter::repeat(0).take(50).collect::>(), diff --git a/lofty/src/mp4/ilst/read.rs b/lofty/src/mp4/ilst/read.rs index 42b7f7c70..ef53e4a02 100644 --- a/lofty/src/mp4/ilst/read.rs +++ b/lofty/src/mp4/ilst/read.rs @@ -393,6 +393,7 @@ where let picture_data = AtomData::Picture(Picture { pic_type: PictureType::Other, + file_name: None, mime_type, description: None, data: Cow::from(value), diff --git a/lofty/src/ogg/picture_storage.rs b/lofty/src/ogg/picture_storage.rs index fd3051794..7b2c9e915 100644 --- a/lofty/src/ogg/picture_storage.rs +++ b/lofty/src/ogg/picture_storage.rs @@ -79,6 +79,7 @@ pub trait OggPictureStorage: private::Sealed { /// // Add a front cover /// let front_cover = Picture::new_unchecked( /// PictureType::CoverFront, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), @@ -92,6 +93,7 @@ pub trait OggPictureStorage: private::Sealed { /// // Replace the front cover with a back cover /// let back_cover = Picture::new_unchecked( /// PictureType::CoverBack, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), @@ -103,8 +105,13 @@ pub trait OggPictureStorage: private::Sealed { /// assert_eq!(tag.pictures()[0].0.pic_type(), PictureType::CoverBack); /// /// // Use an out of bounds index - /// let another_picture = - /// Picture::new_unchecked(PictureType::Band, Some(MimeType::Png), None, Vec::new()); + /// let another_picture = Picture::new_unchecked( + /// PictureType::Band, + /// None, + /// Some(MimeType::Png), + /// None, + /// Vec::new(), + /// ); /// tag.set_picture(100, another_picture, PictureInformation::default()); /// /// assert_eq!(tag.pictures().len(), 2); @@ -135,6 +142,7 @@ pub trait OggPictureStorage: private::Sealed { /// # fn main() -> lofty::error::Result<()> { /// let front_cover = Picture::new_unchecked( /// PictureType::CoverFront, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), @@ -171,6 +179,7 @@ pub trait OggPictureStorage: private::Sealed { /// // Add front and back covers /// let front_cover = Picture::new_unchecked( /// PictureType::CoverFront, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), @@ -180,6 +189,7 @@ pub trait OggPictureStorage: private::Sealed { /// /// let back_cover = Picture::new_unchecked( /// PictureType::CoverBack, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), diff --git a/lofty/src/ogg/read.rs b/lofty/src/ogg/read.rs index 32d75bade..ec5b2a3b8 100644 --- a/lofty/src/ogg/read.rs +++ b/lofty/src/ogg/read.rs @@ -150,6 +150,7 @@ where let picture = Picture { pic_type: PictureType::Other, + file_name: None, mime_type: Some(mime_type), description: None, data: Cow::from(picture_data), diff --git a/lofty/src/ogg/tag.rs b/lofty/src/ogg/tag.rs index cb36ed82b..9ac0fc4b8 100644 --- a/lofty/src/ogg/tag.rs +++ b/lofty/src/ogg/tag.rs @@ -905,6 +905,7 @@ mod tests { fn skip_reading_cover_art() { let p = Picture::new_unchecked( PictureType::CoverFront, + None, Some(MimeType::Jpeg), None, std::iter::repeat(0).take(50).collect::>(), diff --git a/lofty/src/picture.rs b/lofty/src/picture.rs index 6a75daf81..adc18c4d3 100644 --- a/lofty/src/picture.rs +++ b/lofty/src/picture.rs @@ -443,6 +443,8 @@ impl PictureInformation { pub struct Picture { /// The picture type according to ID3v2 APIC pub(crate) pic_type: PictureType, + /// A file name for the picture, only used in Matroska + pub(crate) file_name: Option>, /// The picture's mimetype pub(crate) mime_type: Option, /// The picture's description @@ -455,6 +457,7 @@ impl Debug for Picture { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("Picture") .field("pic_type", &self.pic_type) + .field("file_name", &self.file_name) .field("mime_type", &self.mime_type) .field("description", &self.description) .field("data", &format!("<{} bytes>", self.data.len())) @@ -490,6 +493,7 @@ impl Picture { Ok(Self { pic_type: PictureType::Other, + file_name: None, mime_type: Some(mime_type), description: None, data: data.into(), @@ -498,17 +502,22 @@ impl Picture { /// Create a new `Picture` /// - /// NOTE: This will **not** verify `data`'s signature. + /// NOTES: + /// + /// * This will **not** verify `data`'s signature. /// This should only be used if all data has been verified /// beforehand. + /// * `file_name` is only used in Matroska pub fn new_unchecked( pic_type: PictureType, + file_name: Option, mime_type: Option, description: Option, data: Vec, ) -> Self { Self { pic_type, + file_name: file_name.map(Cow::Owned), mime_type, description: description.map(Cow::Owned), data: Cow::Owned(data), @@ -704,6 +713,7 @@ impl Picture { return Ok(( Self { pic_type: PictureType::from_u8(pic_ty as u8), + file_name: None, mime_type, description, data: Cow::from(data), @@ -785,6 +795,7 @@ impl Picture { Ok(Picture { pic_type, + file_name: None, mime_type: Some(mime_type), description, data, @@ -806,6 +817,7 @@ impl Picture { // A placeholder that is needed during conversions. pub(crate) const TOMBSTONE_PICTURE: Picture = Picture { pic_type: PictureType::Other, + file_name: None, mime_type: None, description: None, data: Cow::Owned(Vec::new()), diff --git a/lofty/src/probe.rs b/lofty/src/probe.rs index 110c74101..a2d76b951 100644 --- a/lofty/src/probe.rs +++ b/lofty/src/probe.rs @@ -19,6 +19,7 @@ use crate::ogg::vorbis::VorbisFile; use crate::resolve::custom_resolvers; use crate::wavpack::WavPackFile; +use crate::ebml::EbmlFile; use std::fs::File; use std::io::{BufReader, Cursor, Read, Seek, SeekFrom}; use std::path::Path; @@ -468,6 +469,7 @@ impl Probe { FileType::Aac => AacFile::read_from(reader, options)?.into(), FileType::Aiff => AiffFile::read_from(reader, options)?.into(), FileType::Ape => ApeFile::read_from(reader, options)?.into(), + FileType::Ebml => EbmlFile::read_from(reader, options)?.into(), FileType::Flac => FlacFile::read_from(reader, options)?.into(), FileType::Mpeg => MpegFile::read_from(reader, options)?.into(), FileType::Opus => OpusFile::read_from(reader, options)?.into(), diff --git a/lofty/src/properties/tests.rs b/lofty/src/properties/tests.rs index af65b4345..120194e30 100644 --- a/lofty/src/properties/tests.rs +++ b/lofty/src/properties/tests.rs @@ -1,6 +1,10 @@ use crate::aac::{AACProperties, AacFile}; use crate::ape::{ApeFile, ApeProperties}; use crate::config::ParseOptions; +use crate::ebml::{ + AudioTrackDescriptor, AudioTrackSettings, EbmlFile, EbmlHeaderProperties, EbmlProperties, + Language, SegmentInfo, +}; use crate::file::AudioFile; use crate::flac::{FlacFile, FlacProperties}; use crate::iff::aiff::{AiffFile, AiffProperties}; @@ -67,6 +71,47 @@ const FLAC_PROPERTIES: FlacProperties = FlacProperties { signature: 164_506_065_180_489_231_127_156_351_872_182_799_315, }; +#[allow(non_snake_case)] +fn MKA_PROPERTIES() -> EbmlProperties { + EbmlProperties { + header: EbmlHeaderProperties { + version: 1, + read_version: 1, + max_id_length: 4, + max_size_length: 8, + doc_type: String::from("matroska"), + doc_type_version: 4, + doc_type_read_version: 0, + }, + extensions: Vec::new(), + segment_info: SegmentInfo { + timestamp_scale: 1_000_000, + muxing_app: String::from("Lavf60.3.100"), + writing_app: String::from("Lavf60.3.100"), + duration: Some(Duration::from_millis(1431)), + }, + audio_tracks: vec![AudioTrackDescriptor { + number: 1, + uid: 18_181_673_715_630_629_642, + enabled: true, + default: false, + language: Language::Iso639_2(String::from("und")), + default_duration: 0, + codec_id: String::from("A_VORBIS"), + codec_private: None, + codec_name: None, + settings: AudioTrackSettings { + sampling_frequency: 48000.0, + output_sampling_frequency: 0.0, + channels: 2, + bit_depth: Some(32), + emphasis: None, + bitrate: Some(99), // TODO: FFmpeg reports 97, not bad + }, + }], + } +} + const MP1_PROPERTIES: MpegProperties = MpegProperties { version: MpegVersion::V1, layer: Layer::Layer1, @@ -325,6 +370,14 @@ fn flac_properties() { ) } +#[test_log::test] +fn mka_properties() { + assert_eq!( + get_properties::("tests/files/assets/minimal/full_test.mka"), + MKA_PROPERTIES() + ); +} + #[test_log::test] fn mp1_properties() { assert_eq!( diff --git a/lofty/src/tag/companion_tag.rs b/lofty/src/tag/companion_tag.rs index 6c36bf35d..9e37d43de 100644 --- a/lofty/src/tag/companion_tag.rs +++ b/lofty/src/tag/companion_tag.rs @@ -1,3 +1,4 @@ +use crate::ebml::MatroskaTag; use crate::id3::v2::Id3v2Tag; use crate::mp4::Ilst; @@ -5,6 +6,7 @@ use crate::mp4::Ilst; pub(crate) enum CompanionTag { Id3v2(Id3v2Tag), Ilst(Ilst), + Matroska(MatroskaTag), } impl CompanionTag { @@ -21,4 +23,11 @@ impl CompanionTag { _ => None, } } + + pub(crate) fn matroska(self) -> Option { + match self { + CompanionTag::Matroska(tag) => Some(tag), + _ => None, + } + } } diff --git a/lofty/src/tag/item.rs b/lofty/src/tag/item.rs index 7dbcc309e..b38aa5aac 100644 --- a/lofty/src/tag/item.rs +++ b/lofty/src/tag/item.rs @@ -902,6 +902,12 @@ impl TagItem { return VALID_ITEMKEYS.contains(&self.item_key); } + if tag_type == TagType::Matroska { + use crate::ebml::tag::SUPPORTED_ITEMKEYS; + + return SUPPORTED_ITEMKEYS.contains(&self.item_key); + } + self.item_key.map_key(tag_type, false).is_some() } } diff --git a/lofty/src/tag/items/lang.rs b/lofty/src/tag/items/lang.rs index 3816e9a8f..854910bd6 100644 --- a/lofty/src/tag/items/lang.rs +++ b/lofty/src/tag/items/lang.rs @@ -1,6 +1,6 @@ /// A three character language code, as specified by [ISO-639-2]. /// -/// For now, this is used exclusively in ID3v2. +/// For now, this is only used in ID3v2 and Matroska (when available). /// /// Excerpt from : /// diff --git a/lofty/src/tag/mod.rs b/lofty/src/tag/mod.rs index 36140ca44..b94b7ca08 100644 --- a/lofty/src/tag/mod.rs +++ b/lofty/src/tag/mod.rs @@ -566,6 +566,7 @@ impl Tag { /// // Add a front cover /// let front_cover = Picture::new_unchecked( /// PictureType::CoverFront, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), @@ -578,6 +579,7 @@ impl Tag { /// // Replace the front cover with a back cover /// let back_cover = Picture::new_unchecked( /// PictureType::CoverBack, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), @@ -588,8 +590,13 @@ impl Tag { /// assert_eq!(tag.pictures()[0].pic_type(), PictureType::CoverBack); /// /// // Use an out of bounds index - /// let another_picture = - /// Picture::new_unchecked(PictureType::Band, Some(MimeType::Png), None, Vec::new()); + /// let another_picture = Picture::new_unchecked( + /// PictureType::Band, + /// None, + /// Some(MimeType::Png), + /// None, + /// Vec::new(), + /// ); /// tag.set_picture(100, another_picture); /// /// assert_eq!(tag.pictures().len(), 2); @@ -618,6 +625,7 @@ impl Tag { /// /// let picture = Picture::new_unchecked( /// PictureType::CoverFront, + /// None, /// Some(MimeType::Png), /// None, /// Vec::new(), diff --git a/lofty/src/tag/split_merge_tag.rs b/lofty/src/tag/split_merge_tag.rs index 1a76844b7..4d5e59a7e 100644 --- a/lofty/src/tag/split_merge_tag.rs +++ b/lofty/src/tag/split_merge_tag.rs @@ -70,6 +70,7 @@ pub trait MergeTag: private::Sealed { // https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed mod private { use crate::ape::ApeTag; + use crate::ebml::MatroskaTag; use crate::id3::v1::Id3v1Tag; use crate::id3::v2::Id3v2Tag; use crate::iff::aiff::AiffTextChunks; @@ -85,6 +86,9 @@ mod private { impl Sealed for ApeTag {} impl Sealed for crate::ape::tag::SplitTagRemainder {} + impl Sealed for MatroskaTag {} + impl Sealed for crate::ebml::tag::SplitTagRemainder {} + impl Sealed for Id3v1Tag {} impl Sealed for crate::id3::v1::tag::SplitTagRemainder {} diff --git a/lofty/src/tag/tag_ext.rs b/lofty/src/tag/tag_ext.rs index d7b91e0ec..34f1caca6 100644 --- a/lofty/src/tag/tag_ext.rs +++ b/lofty/src/tag/tag_ext.rs @@ -153,6 +153,7 @@ pub trait TagExt: Accessor + Into + Sized + private::Sealed { // https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed mod private { use crate::ape::ApeTag; + use crate::ebml::MatroskaTag; use crate::id3::v1::Id3v1Tag; use crate::id3::v2::Id3v2Tag; use crate::iff::aiff::AiffTextChunks; @@ -165,6 +166,7 @@ mod private { impl Sealed for AiffTextChunks {} impl Sealed for ApeTag {} + impl Sealed for MatroskaTag {} impl Sealed for Id3v1Tag {} impl Sealed for Id3v2Tag {} impl Sealed for Ilst {} diff --git a/lofty/src/tag/tag_type.rs b/lofty/src/tag/tag_type.rs index f995a968d..6abf4b65e 100644 --- a/lofty/src/tag/tag_type.rs +++ b/lofty/src/tag/tag_type.rs @@ -15,6 +15,8 @@ use std::path::Path; pub enum TagType { /// This covers both APEv1 and APEv2 as it doesn't matter much Ape, + /// Represents a `\Segment\Tags` element in Matroska/WebM + Matroska, /// Represents an ID3v1 tag Id3v1, /// This covers all ID3v2 versions since they all get upgraded to ID3v2.4 diff --git a/lofty/src/tag/utils.rs b/lofty/src/tag/utils.rs index a949ba772..bd14d24a9 100644 --- a/lofty/src/tag/utils.rs +++ b/lofty/src/tag/utils.rs @@ -4,7 +4,7 @@ use crate::file::FileType; use crate::macros::err; use crate::tag::{Tag, TagType}; use crate::util::io::{FileLike, Length, Truncate}; -use crate::{aac, ape, flac, iff, mpeg, musepack, wavpack}; +use crate::{aac, ape, ebml, flac, iff, mpeg, musepack, wavpack}; use crate::id3::v1::tag::Id3v1TagRef; use crate::id3::v2::tag::Id3v2TagRef; @@ -34,6 +34,7 @@ where FileType::Aac => aac::write::write_to(file, tag, write_options), FileType::Aiff => iff::aiff::write::write_to(file, tag, write_options), FileType::Ape => ape::write::write_to(file, tag, write_options), + FileType::Ebml => ebml::write::write_to(file, tag, write_options), FileType::Flac => flac::write::write_to(file, tag, write_options), FileType::Opus | FileType::Speex | FileType::Vorbis => { crate::ogg::write::write_to(file, tag, file_type, write_options) @@ -98,6 +99,10 @@ pub(crate) fn dump_tag( } } .dump_to(writer, write_options), + TagType::Matroska => ebml::tag::MatroskaTagRef { + tags: ebml::tag::simple_tags_for_tag(tag), + } + .dump_to(writer, write_options), _ => Ok(()), } } diff --git a/lofty/tests/files/aac.rs b/lofty/tests/files/aac.rs index 737ec0519..fd3dacd07 100644 --- a/lofty/tests/files/aac.rs +++ b/lofty/tests/files/aac.rs @@ -12,7 +12,7 @@ fn read() { // Here we have an AAC file with an ID3v2, and an ID3v1 tag let file = Probe::open("tests/files/assets/minimal/full_test.aac") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/aiff.rs b/lofty/tests/files/aiff.rs index 5076ee211..6ed464427 100644 --- a/lofty/tests/files/aiff.rs +++ b/lofty/tests/files/aiff.rs @@ -12,7 +12,7 @@ fn read() { // Here we have an AIFF file with both an ID3v2 chunk and text chunks let file = Probe::open("tests/files/assets/minimal/full_test.aiff") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/ape.rs b/lofty/tests/files/ape.rs index 93ae133d1..009179567 100644 --- a/lofty/tests/files/ape.rs +++ b/lofty/tests/files/ape.rs @@ -12,7 +12,7 @@ fn read() { // Here we have an APE file with an ID3v2, ID3v1, and an APEv2 tag let file = Probe::open("tests/files/assets/minimal/full_test.ape") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/assets/matroska-test-files/README.md b/lofty/tests/files/assets/matroska-test-files/README.md new file mode 100644 index 000000000..4164da4b4 --- /dev/null +++ b/lofty/tests/files/assets/matroska-test-files/README.md @@ -0,0 +1 @@ +This contains the files from the official Matroska test suite here: https://github.com/ietf-wg-cellar/matroska-test-files diff --git a/lofty/tests/files/assets/matroska-test-files/Release.txt b/lofty/tests/files/assets/matroska-test-files/Release.txt new file mode 100644 index 000000000..b16175ae3 --- /dev/null +++ b/lofty/tests/files/assets/matroska-test-files/Release.txt @@ -0,0 +1,162 @@ +Matroska Test Files - Wave 1 + +This suite of files was created to validate the various Matroska players, +parsers to make sure users get a consistent experience when moving +their files on various programs/hardware. Since Matroska has a lot of +features, it is hard to tell which are essential, which are encouraged and +which are deprecated. The files presented here represent the minimum +support a player should have to fully qualify as a Matroska player. + +Codecs + +Matroska can support any codec that is around. That doesn't mean +softwares should support all of them. For various reasons softwares and +hardwares can't always be upgraded to support all codecs around. But in +the other hand there are a few safe codecs that are often found in +Matroska and WebM that should be supported. These codec can be found +in various resolutions and features so even with the right codec support, +it is not guaranteed that an implementation may support all the possibilities +of a codec (it is hardly ever the case). This document will not cover codec +details. But here is a list of codecs that are commonly found in Matroska: + +Video codecs +- H264/AVC/MPEG4 Part 10, usually up to 1080p +- MPEG4 Part 2, usually up to 720p +- VP8, usually up to 720p +- Theora, usually up to 720p + +Audio codecs +- MPEG Audio Layer 3 (MP3) +- Vorbis +- AAC, AAC+, eACC+ +- AC-3 +- DTS +- FLAC + +Subtitles codecs +- plain UTF-8 text +- ASS/SSA text +- VOBSUB (bitmaps from DVDs) +- Audio only files + +It is important to note that audio can also be used in audio only files, +usually with the .mka extension. Those files should be handled as well, as +long as the codec is supported. + +Extra features + +There are a number of features that are not essential to the playback +experience but could really improve it, like support for tags, cover art, +embedded fonts, segment linking. We won't blame you if you don't support +these, but your users/customers will probably ask for it at some point. +There is also 3D support that is meant to grow in the coming years. +Matroska should be able to support all the formats, but given the subject +is really new, it's not covered by this suite of files. + +** Test Files ** + +1. Basic file + +This file is the absolute minimum a compliant player should be able to +handle. + +The sample comes from the Big Buck Bunny open project. It contains MPEG4.2 +(DivX) video, (854x480) MP3 audio, uses only SimpleBlock (matroska DocType v2) + +2. Non default timecodescale & aspect ratio + +This file has different features that need to be looked at carefully. The +main one is the global TimecodeScale in the SegmentInfo is set to 100,000 +rather than the default 1,000,000. That value affects the values of the file +Duration in the Segment and the Clusters Timecode. The aspect ratio has +also been stretched artificially to represent a 2.35 movie (from the original +16:9 aspect ratio). This file also contains CRC-32 values in the EBML +header, the MetaSeek, the Segment Info, the Tracks and the Tags and +PrevSize/Position in the Clusters for better error recovery. + +It contains H264 (1024x576 pixels), and stereo AAC. The source material is +taken from the Elephant Dreams video project + +3. Header stripping & standard block + +This file is using BlockGroup+Block only for audio and video frames. It also +removes 2 bytes off each video and audio frame since they are all equal. +These 2 bytes have to be put back in the frame before decoding. his file +also contains CRC-32 values in the EBML header, the MetaSeek, the +Segment Info, the Tracks and the Tags and PrevSize/Position in the +Clusters for better error recovery. + +It contains H264 (1024x576 pixels), and stereo MP3. The source material +is taken from the Elephant Dreams video project + +4. Live stream recording + +This file is using the EBML feature that allows Master elements to have no +known size. It is used for live streams because they don't know ahead of +time the size of the Segment (virtually infinite) and even sometimes the +size of the Clusters (no caching on the server side). The first timecode of +the file also doesn't start at 0 since it's supposed to be a capture from +something continuous. The SegmentInfo also doesn't contain any Duration +as it is not know. + +The sample comes from the Big Buck Bunny open project. It contains Theora +video (1280x720), Vorbis audio, uses only SimpleBlock (matroska DocType v2) + +A similar file can be created with mkclean using the "--live" option + +5. Multiple audio/subtitles + +This has a main audio track in english and a secondary audio track in +english. It also has subtitles in English, French, German, Hungarian, +Spanish, Italian and Japanese. The player should provide the possibility to +switch between these streams. + +The sample contains H264 (1024x576 pixels), and stereo AAC and +commentary in AAC+ (using SBR). The source material is taken from the +Elephant Dreams video project + +6. Different EBML head sizes & cue-less seeking + +This file is a test of the EBML parser of the player. The size of the +Segment and Block/SimpleBlock is coded using 1 (or the minimum possible +the size) and 8 bytes randomly. The file also have no Cues entry. So +seeking should be disabled or look for Cluster boundaries in the stream +(much slower than using Cues). + +The sample comes from the Big Buck Bunny open project. It contains +MPEG4.2 (DivX) video, (854x480) MP3 audio, uses only SimpleBlock +(matroska DocType v2) + +7. Extra unknown/junk elements & damaged + +This file contains junk elements (elements not defined in the specs) either +at the beggining or the end of Clusters. These elements should be skipped. +There is also an invalid element at 451417 that should be skipped until the +next valid Cluster is found. + +The sample contains H264 (1024x576 pixels), and stereo AAC. The source +material is taken from the Elephant Dreams video project + +8. Audio gap + +This file has a few audio frames missing between timecodes 6.019s and +6.360s. The playback should not stop, and if possible the video should not +be skipped where the audio is missing + +The sample contains H264 (1024x576 pixels), and stereo AAC. The source +material is taken from the Elephant Dreams video project + +Tools + +All these files were created with mkvmerge and mkclean. They also pass +the mkvalidator test tool (the test file 4 needs the --live option to +correctly valdiate the file), except for the damaged file, as it is damaged. + +Contact + +If you have any question about these files please contact us at contact@matroska.org + +Changelog + +2011-06-02 - w1_1: fix test2.mkv with had a duration not based on TimecodeScale +2010-08-21 - w1: initial version diff --git a/lofty/tests/files/assets/matroska-test-files/test1.mkv b/lofty/tests/files/assets/matroska-test-files/test1.mkv new file mode 100644 index 000000000..37e30f4dc Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test1.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test2.mkv b/lofty/tests/files/assets/matroska-test-files/test2.mkv new file mode 100644 index 000000000..56f290ba2 Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test2.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test3.mkv b/lofty/tests/files/assets/matroska-test-files/test3.mkv new file mode 100644 index 000000000..35c823fa7 Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test3.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test4.mkv b/lofty/tests/files/assets/matroska-test-files/test4.mkv new file mode 100644 index 000000000..0d7ec1676 Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test4.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test5.mkv b/lofty/tests/files/assets/matroska-test-files/test5.mkv new file mode 100644 index 000000000..6aa824020 Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test5.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test6.mkv b/lofty/tests/files/assets/matroska-test-files/test6.mkv new file mode 100644 index 000000000..6e53921ac Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test6.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test7.mkv b/lofty/tests/files/assets/matroska-test-files/test7.mkv new file mode 100644 index 000000000..84a84d4e4 Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test7.mkv differ diff --git a/lofty/tests/files/assets/matroska-test-files/test8.mkv b/lofty/tests/files/assets/matroska-test-files/test8.mkv new file mode 100644 index 000000000..2630f73b4 Binary files /dev/null and b/lofty/tests/files/assets/matroska-test-files/test8.mkv differ diff --git a/lofty/tests/files/assets/minimal/full_test.mka b/lofty/tests/files/assets/minimal/full_test.mka new file mode 100644 index 000000000..19fb2cb72 Binary files /dev/null and b/lofty/tests/files/assets/minimal/full_test.mka differ diff --git a/lofty/tests/files/flac.rs b/lofty/tests/files/flac.rs index c79830808..2880bebf4 100644 --- a/lofty/tests/files/flac.rs +++ b/lofty/tests/files/flac.rs @@ -16,16 +16,14 @@ fn multiple_vorbis_comments() { // not allowed by spec. assert!(FlacFile::read_from( &mut file, - ParseOptions::new() - .read_properties(false) - .parsing_mode(ParsingMode::Strict) + ParseOptions::new().parsing_mode(ParsingMode::Strict) ) .is_err()); file.rewind().unwrap(); // But by default, we should just take the last tag in the stream - let f = FlacFile::read_from(&mut file, ParseOptions::new().read_properties(false)).unwrap(); + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); // The first tag has the artist "Artist 1", the second has "Artist 2". assert_eq!( diff --git a/lofty/tests/files/main.rs b/lofty/tests/files/main.rs index 8466dc9f7..ce24f2bb7 100644 --- a/lofty/tests/files/main.rs +++ b/lofty/tests/files/main.rs @@ -4,6 +4,7 @@ mod aac; mod aiff; mod ape; mod flac; +mod matroska; mod mp4; mod mpc; mod mpeg; diff --git a/lofty/tests/files/matroska.rs b/lofty/tests/files/matroska.rs new file mode 100644 index 000000000..c83f57f28 --- /dev/null +++ b/lofty/tests/files/matroska.rs @@ -0,0 +1,70 @@ +use crate::{set_artist, temp_file, verify_artist}; +use lofty::config::ParseOptions; +use lofty::file::FileType; +use lofty::prelude::*; +use lofty::probe::Probe; +use lofty::tag::TagType; + +use std::io::Seek; + +#[test_log::test] +fn read() { + // This file contains a tags element + let file = Probe::open("tests/files/assets/minimal/full_test.mka") + .unwrap() + .options(ParseOptions::new()) + .read() + .unwrap(); + + assert_eq!(file.file_type(), FileType::Ebml); + + // Verify the tag + crate::verify_artist!(file, primary_tag, "Foo artist", 1); +} + +#[test_log::test] +fn write() { + let mut file = temp_file!("tests/files/assets/minimal/full_test.mka"); + + let mut tagged_file = Probe::new(&mut file) + .options(ParseOptions::new().read_properties(false)) + .guess_file_type() + .unwrap() + .read() + .unwrap(); + + assert_eq!(tagged_file.file_type(), FileType::Ebml); + + // Tags + crate::set_artist!(tagged_file, tag_mut, TagType::Matroska, "Foo artist", 1 => file, "Bar artist"); + + // Now reread the file + file.rewind().unwrap(); + + let mut tagged_file = Probe::new(&mut file) + .options(ParseOptions::new().read_properties(false)) + .guess_file_type() + .unwrap() + .read() + .unwrap(); + + crate::set_artist!(tagged_file, tag_mut, TagType::Matroska, "Bar artist", 1 => file, "Foo artist"); +} + +#[test_log::test] +fn remove() { + crate::remove_tag!( + "tests/files/assets/minimal/full_test.mka", + TagType::Matroska + ); +} + +#[test_log::test] +fn read_no_properties() { + crate::no_properties_test!("tests/files/assets/minimal/full_test.mka"); +} + +#[test_log::test] +fn read_no_tags() { + crate::no_tag_test!("tests/files/assets/minimal/full_test.mka"); +} diff --git a/lofty/tests/files/mp4.rs b/lofty/tests/files/mp4.rs index fc9c0d189..375a68591 100644 --- a/lofty/tests/files/mp4.rs +++ b/lofty/tests/files/mp4.rs @@ -12,7 +12,7 @@ fn read() { // This file contains an ilst atom let file = Probe::open("tests/files/assets/minimal/m4a_codec_aac.m4a") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/mpc.rs b/lofty/tests/files/mpc.rs index e3a251d53..99cacb518 100644 --- a/lofty/tests/files/mpc.rs +++ b/lofty/tests/files/mpc.rs @@ -20,7 +20,7 @@ macro_rules! generate_tests { // Here we have an MPC file with an ID3v2, ID3v1, and an APEv2 tag let file = Probe::open($path) .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/mpeg.rs b/lofty/tests/files/mpeg.rs index bae4ae529..7cc4e2d61 100644 --- a/lofty/tests/files/mpeg.rs +++ b/lofty/tests/files/mpeg.rs @@ -15,7 +15,7 @@ fn read() { // Here we have an MP3 file with an ID3v2, ID3v1, and an APEv2 tag let file = Probe::open("tests/files/assets/minimal/full_test.mp3") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/ogg.rs b/lofty/tests/files/ogg.rs index 3fb792e83..a9164da70 100644 --- a/lofty/tests/files/ogg.rs +++ b/lofty/tests/files/ogg.rs @@ -86,7 +86,7 @@ fn speex_remove() { fn read(path: &str, file_type: FileType) { let file = Probe::open(path) .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/util/mod.rs b/lofty/tests/files/util/mod.rs index 8fd12fb4a..a9625e12b 100644 --- a/lofty/tests/files/util/mod.rs +++ b/lofty/tests/files/util/mod.rs @@ -73,13 +73,9 @@ macro_rules! verify_artist { assert_eq!(tag.item_count(), $item_count); - assert_eq!( - tag.get(&lofty::prelude::ItemKey::TrackArtist), - Some(&lofty::tag::TagItem::new( - lofty::prelude::ItemKey::TrackArtist, - lofty::tag::ItemValue::Text(String::from($expected_value)) - )) - ); + let item = tag.get(&lofty::prelude::ItemKey::TrackArtist).expect("tag should contain artist"); + assert_eq!(item.key(), &lofty::prelude::ItemKey::TrackArtist); + assert_eq!(item.value(), &lofty::tag::ItemValue::Text(String::from($expected_value))); tag }}; diff --git a/lofty/tests/files/wav.rs b/lofty/tests/files/wav.rs index 9fc5ede4f..745a1ed72 100644 --- a/lofty/tests/files/wav.rs +++ b/lofty/tests/files/wav.rs @@ -12,7 +12,7 @@ fn read() { // Here we have a WAV file with both an ID3v2 chunk and a RIFF INFO chunk let file = Probe::open("tests/files/assets/minimal/wav_format_pcm.wav") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/files/wavpack.rs b/lofty/tests/files/wavpack.rs index 28c97e1df..dbc4df73c 100644 --- a/lofty/tests/files/wavpack.rs +++ b/lofty/tests/files/wavpack.rs @@ -12,7 +12,7 @@ fn read() { // Here we have a WacPack file with both an ID3v1 tag and an APE tag let file = Probe::open("tests/files/assets/minimal/full_test.wv") .unwrap() - .options(ParseOptions::new().read_properties(false)) + .options(ParseOptions::new()) .read() .unwrap(); diff --git a/lofty/tests/fuzz/assets/ebmlfile_read_from/.gitkeep b/lofty/tests/fuzz/assets/ebmlfile_read_from/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/lofty/tests/fuzz/ebmlfile_read_from.rs b/lofty/tests/fuzz/ebmlfile_read_from.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/lofty/tests/fuzz/ebmlfile_read_from.rs @@ -0,0 +1 @@ + diff --git a/lofty/tests/fuzz/main.rs b/lofty/tests/fuzz/main.rs index 1ce29900d..36dc310b2 100644 --- a/lofty/tests/fuzz/main.rs +++ b/lofty/tests/fuzz/main.rs @@ -11,6 +11,7 @@ use std::time::Instant; mod aacfile_read_from; mod aifffile_read_from; mod apefile_read_from; +mod ebmlfile_read_from; mod flacfile_read_from; mod id3v2; mod mp4file_read_from; diff --git a/lofty_attr/src/ebml.rs b/lofty_attr/src/ebml.rs new file mode 100644 index 000000000..22cd4a4cb --- /dev/null +++ b/lofty_attr/src/ebml.rs @@ -0,0 +1,117 @@ +use proc_macro::TokenStream; +use std::collections::HashMap; +use syn::parse::{Parse, Parser}; +use syn::punctuated::Punctuated; +use syn::{braced, bracketed, Ident, Token}; + +#[derive(Debug)] +pub(crate) struct EbmlMasterElement { + pub(crate) readable_ident: Ident, + pub(crate) info: EbmlMasterInfo, +} + +impl Parse for EbmlMasterElement { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let readable_ident = input.parse::()?; + let _: syn::Token![:] = input.parse()?; + + let info; + braced!(info in input); + + Ok(Self { + readable_ident, + info: info.parse::()?, + }) + } +} + +#[derive(Debug)] +pub(crate) struct EbmlMasterInfo { + pub(crate) id: u64, + pub(crate) children: Vec, +} + +impl Parse for EbmlMasterInfo { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let _id_field = input.parse::()?; + let _: syn::Token![:] = input.parse()?; + + let id = input.parse::()?.base10_parse()?; + let _: syn::Token![,] = input.parse()?; + + let _children_field = input.parse::()?; + let _: syn::Token![:] = input.parse()?; + + let children; + bracketed!(children in input); + + let children = children + .parse_terminated(EbmlChildElement::parse, syn::Token![,])? + .into_iter() + .collect(); + + let _trailing_comma = input.parse::().ok(); + + Ok(Self { id, children }) + } +} + +#[derive(Debug)] +pub(crate) struct EbmlChildElement { + pub(crate) readable_ident: Ident, + pub(crate) info: EbmlChildInfo, +} + +impl Parse for EbmlChildElement { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let readable_ident = input.parse::()?; + let _: syn::Token![:] = input.parse()?; + + let info; + braced!(info in input); + + Ok(Self { + readable_ident, + info: info.parse::()?, + }) + } +} + +#[derive(Debug)] +pub(crate) struct EbmlChildInfo { + pub(crate) id: u64, + pub(crate) data_type: Ident, +} + +impl Parse for EbmlChildInfo { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let id = input.parse::()?.base10_parse()?; + let _: syn::Token![,] = input.parse()?; + + let data_type = input.parse::()?; + + Ok(Self { id, data_type }) + } +} + +fn insert_element_identifiers(identifiers: &mut HashMap, element: &EbmlMasterElement) { + identifiers.insert(element.readable_ident.clone(), element.info.id); + for child in &element.info.children { + identifiers.insert(child.readable_ident.clone(), child.info.id); + } +} + +pub(crate) fn parse_ebml_master_elements( + input: TokenStream, +) -> syn::Result<(HashMap, Vec)> { + let mut element_identifiers = HashMap::new(); + + let parser = Punctuated::::parse_terminated; + let elements = parser.parse(input)?; + + for element in &elements { + insert_element_identifiers(&mut element_identifiers, element); + } + + Ok((element_identifiers, elements.into_iter().collect())) +} diff --git a/lofty_attr/src/internal.rs b/lofty_attr/src/internal.rs index 5a1978440..f178c7e20 100644 --- a/lofty_attr/src/internal.rs +++ b/lofty_attr/src/internal.rs @@ -9,9 +9,9 @@ use quote::quote; pub(crate) fn opt_internal_file_type( struct_name: String, ) -> Option<(proc_macro2::TokenStream, bool)> { - const LOFTY_FILE_TYPES: [&str; 12] = [ - "Aac", "Aiff", "Ape", "Flac", "Mpeg", "Mp4", "Mpc", "Opus", "Vorbis", "Speex", "Wav", - "WavPack", + const LOFTY_FILE_TYPES: [&str; 13] = [ + "Aac", "Aiff", "Ape", "Ebml", "Flac", "Mpeg", "Mp4", "Mpc", "Opus", "Vorbis", "Speex", + "Wav", "WavPack", ]; const ID3V2_STRIPPABLE: [&str; 2] = ["Flac", "Ape"]; @@ -51,6 +51,13 @@ pub(crate) fn init_write_lookup( .write_to(file, write_options) }); + insert!(map, Matroska, { + lofty::ebml::tag::MatroskaTagRef { + tags: lofty::ebml::tag::simple_tags_for_tag(tag), + } + .write_to(file, write_options) + }); + insert!(map, Id3v1, { Into::>::into(tag).write_to(file, write_options) }); diff --git a/lofty_attr/src/lib.rs b/lofty_attr/src/lib.rs index 6ba10c031..ad96a45f2 100644 --- a/lofty_attr/src/lib.rs +++ b/lofty_attr/src/lib.rs @@ -34,6 +34,7 @@ )] mod attribute; +mod ebml; mod internal; mod lofty_file; mod lofty_tag; @@ -43,6 +44,7 @@ use crate::lofty_file::LoftyFile; use crate::lofty_tag::{LoftyTag, LoftyTagAttribute}; use proc_macro::TokenStream; +use quote::quote; use syn::{parse_macro_input, ItemStruct}; /// Creates a file usable by Lofty @@ -66,3 +68,64 @@ pub fn tag(args_input: TokenStream, input: TokenStream) -> TokenStream { let lofty_tag = LoftyTag::new(attribute, input); lofty_tag.emit() } + +#[proc_macro] +#[doc(hidden)] +pub fn ebml_master_elements(input: TokenStream) -> TokenStream { + let ret = ebml::parse_ebml_master_elements(input); + + if let Err(err) = ret { + return TokenStream::from(err.to_compile_error()); + } + + let (identifiers, elements) = ret.unwrap(); + let elements_map_inserts = elements.iter().map(|element| { + let readable_ident = &element.readable_ident; + let id = element.info.id; + let children = element.info.children.iter().map(|child| { + let readable_ident = &child.readable_ident; + let id = child.info.id; + let data_type = &child.info.data_type; + quote! { + (ElementId(#id), ChildElementDescriptor { + ident: ElementIdent::#readable_ident, + data_type: ElementDataType::#data_type, + }) + } + }); + + quote! { + m.insert( + ElementId(#id), + MasterElement { + id: ElementIdent::#readable_ident, + children: &[#( #children ),*][..] + } + ); + } + }); + + let mut ident_variants = Vec::new(); + for (ident, id) in &identifiers { + ident_variants.push(quote! { + #ident = #id, + }); + } + + TokenStream::from(quote! { + #[derive(Copy, Clone, Eq, PartialEq, Debug)] + #[repr(u64)] + pub(crate) enum ElementIdent { + #( #ident_variants )* + } + + fn master_elements() -> &'static ::std::collections::HashMap { + static INSTANCE: ::std::sync::OnceLock<::std::collections::HashMap> = ::std::sync::OnceLock::new(); + INSTANCE.get_or_init(|| { + let mut m = ::std::collections::HashMap::new(); + #( #elements_map_inserts )* + m + }) + } + }) +} diff --git a/scripts/update-matroska-tags.py b/scripts/update-matroska-tags.py new file mode 100644 index 000000000..323058cb3 --- /dev/null +++ b/scripts/update-matroska-tags.py @@ -0,0 +1,65 @@ +from pathlib import Path +from requests import get +import xml.etree.ElementTree as ET + +MATROSKA_TAGS_XML = "https://github.com/ietf-wg-cellar/matroska-specification/raw/refs/heads/master/matroska_tags.xml" +TAGNAME_RS = Path("../lofty/src/ebml/tag/tag_name.rs") +FILE_HEADER = """// !!! DO NOT EDIT !!! +// !!! THIS FILE IS GENERATED BY `scripts/update-matroska-tags.py` !!! + +use std::borrow::Cow;""" +TAGNAME_ENUM = """/// A list of all specified Matroska tag names +/// +/// The tag list is available [here](https://matroska.org/technical/tagging.html). It provides +/// descriptions and expected data types of each tag. +#[rustfmt::skip] +#[allow(missing_docs)] +#[non_exhaustive] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum TagName { +""" + + +def camel_case(s): + return ''.join(x for x in s.title() if x.isalnum()) + + +def get_tags(elem): + all_tags = [] + for tag_def in elem: + tag = {'name': tag_def.get('name'), 'class': tag_def.get('class')} + + all_tags.append(tag) + return all_tags + + +def main(): + matroska_tags_def = get(MATROSKA_TAGS_XML).text + root = ET.fromstring(matroska_tags_def) + + tags = get_tags(root[1]) + + file_content = FILE_HEADER + "\n\n" + TAGNAME_ENUM + current_class = "" + for tag in tags: + if tag['class'] != current_class: + current_class = tag['class'] + file_content += f"\n\t// {current_class}\n" + file_content += f"\t{camel_case(tag['name'])},\n" + file_content += '}' + + file_content += "\n\n" + + file_content += """impl From for Cow<'static, str> { + fn from(value: TagName) -> Self { + match value {""" + for tag in tags: + file_content += f"\n\t\t\tTagName::{camel_case(tag['name'])} => Cow::Borrowed(\"{tag['name']}\")," + file_content += "\n\t\t}\n\t}\n}\n" + + with open(TAGNAME_RS, 'w+') as f: + f.write(file_content) + + +if __name__ == "__main__": + main()