symphonia-format-mkv-0.5.4/.cargo_vcs_info.json0000644000000001620000000000100151310ustar { "git": { "sha1": "d3b7742fa73674b70d9ab80cc5f8384cc653df3a" }, "path_in_vcs": "symphonia-format-mkv" }symphonia-format-mkv-0.5.4/Cargo.toml0000644000000023440000000000100131330ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.53" name = "symphonia-format-mkv" version = "0.5.4" authors = ["Dariusz Niedoba "] description = "Pure Rust MKV/WebM demuxer (a part of project Symphonia)." homepage = "https://github.com/pdeljanov/Symphonia" readme = "README.md" keywords = [ "media", "demuxer", "mkv", "matroska", "webm", ] categories = [ "multimedia", "multimedia::audio", "multimedia::encoding", ] license = "MPL-2.0" repository = "https://github.com/pdeljanov/Symphonia" [dependencies.lazy_static] version = "1.4.0" [dependencies.log] version = "0.4" [dependencies.symphonia-core] version = "0.5.4" [dependencies.symphonia-metadata] version = "0.5.4" [dependencies.symphonia-utils-xiph] version = "0.5.4" symphonia-format-mkv-0.5.4/Cargo.toml.orig000064400000000000000000000014121046102023000166070ustar 00000000000000[package] name = "symphonia-format-mkv" version = "0.5.4" description = "Pure Rust MKV/WebM demuxer (a part of project Symphonia)." homepage = "https://github.com/pdeljanov/Symphonia" repository = "https://github.com/pdeljanov/Symphonia" authors = ["Dariusz Niedoba "] license = "MPL-2.0" readme = "README.md" categories = ["multimedia", "multimedia::audio", "multimedia::encoding"] keywords = ["media", "demuxer", "mkv", "matroska", "webm"] edition = "2018" rust-version = "1.53" [dependencies] log = "0.4" lazy_static = "1.4.0" symphonia-core = { version = "0.5.4", path = "../symphonia-core" } symphonia-metadata = { version = "0.5.4", path = "../symphonia-metadata" } symphonia-utils-xiph = { version = "0.5.4", path = "../symphonia-utils-xiph" }symphonia-format-mkv-0.5.4/README.md000064400000000000000000000010431046102023000151770ustar 00000000000000# Symphonia MKV/WebM Demuxer MKV/WebM demuxer for Project Symphonia. **Note:** This crate is part of Symphonia. Please use the [`symphonia`](https://crates.io/crates/symphonia) crate instead of this one directly. ## License Symphonia is provided under the MPL v2.0 license. Please refer to the LICENSE file for more details. ## Contributing Symphonia is a free and open-source project that welcomes contributions! To get started, please read our [Contribution Guidelines](https://github.com/pdeljanov/Symphonia/tree/master/CONTRIBUTING.md). symphonia-format-mkv-0.5.4/src/codecs.rs000064400000000000000000000035771046102023000163330ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use symphonia_core::codecs; use symphonia_core::codecs::CodecType; use crate::segment::TrackElement; pub(crate) fn codec_id_to_type(track: &TrackElement) -> Option { let bit_depth = track.audio.as_ref().and_then(|a| a.bit_depth); match track.codec_id.as_str() { "A_MPEG/L1" => Some(codecs::CODEC_TYPE_MP1), "A_MPEG/L2" => Some(codecs::CODEC_TYPE_MP2), "A_MPEG/L3" => Some(codecs::CODEC_TYPE_MP3), "A_FLAC" => Some(codecs::CODEC_TYPE_FLAC), "A_OPUS" => Some(codecs::CODEC_TYPE_OPUS), "A_VORBIS" => Some(codecs::CODEC_TYPE_VORBIS), "A_AAC/MPEG2/MAIN" | "A_AAC/MPEG2/LC" | "A_AAC/MPEG2/LC/SBR" | "A_AAC/MPEG2/SSR" | "A_AAC/MPEG4/MAIN" | "A_AAC/MPEG4/LC" | "A_AAC/MPEG4/LC/SBR" | "A_AAC/MPEG4/SSR" | "A_AAC/MPEG4/LTP" | "A_AAC" => Some(codecs::CODEC_TYPE_AAC), "A_PCM/INT/BIG" => match bit_depth? { 16 => Some(codecs::CODEC_TYPE_PCM_S16BE), 24 => Some(codecs::CODEC_TYPE_PCM_S24BE), 32 => Some(codecs::CODEC_TYPE_PCM_S32BE), _ => None, }, "A_PCM/INT/LIT" => match bit_depth? { 16 => Some(codecs::CODEC_TYPE_PCM_S16LE), 24 => Some(codecs::CODEC_TYPE_PCM_S24LE), 32 => Some(codecs::CODEC_TYPE_PCM_S32LE), _ => None, }, "A_PCM/FLOAT/IEEE" => match bit_depth? { 32 => Some(codecs::CODEC_TYPE_PCM_F32LE), 64 => Some(codecs::CODEC_TYPE_PCM_F64LE), _ => None, }, _ => { log::info!("unknown codec: {}", &track.codec_id); None } } } symphonia-format-mkv-0.5.4/src/demuxer.rs000064400000000000000000000524021046102023000165330ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::{HashMap, VecDeque}; use std::convert::TryFrom; use std::io::{Seek, SeekFrom}; use symphonia_core::audio::Layout; use symphonia_core::codecs::{CodecParameters, CODEC_TYPE_FLAC, CODEC_TYPE_VORBIS}; use symphonia_core::errors::{ decode_error, end_of_stream_error, seek_error, unsupported_error, Error, Result, SeekErrorKind, }; use symphonia_core::formats::{ Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track, }; use symphonia_core::io::{BufReader, MediaSource, MediaSourceStream, ReadBytes}; use symphonia_core::meta::{Metadata, MetadataLog}; use symphonia_core::probe::Instantiate; use symphonia_core::probe::{Descriptor, QueryDescriptor}; use symphonia_core::sample::SampleFormat; use symphonia_core::support_format; use symphonia_core::units::TimeBase; use symphonia_utils_xiph::flac::metadata::{MetadataBlockHeader, MetadataBlockType}; use crate::codecs::codec_id_to_type; use crate::ebml::{EbmlElement, ElementHeader, ElementIterator}; use crate::element_ids::{ElementType, ELEMENTS}; use crate::lacing::{extract_frames, read_xiph_sizes, Frame}; use crate::segment::{ BlockGroupElement, ClusterElement, CuesElement, InfoElement, SeekHeadElement, TagsElement, TracksElement, }; #[allow(dead_code)] pub struct TrackState { /// Codec parameters. pub(crate) codec_params: CodecParameters, /// The track number. track_num: u32, /// Default frame duration in nanoseconds. pub(crate) default_frame_duration: Option, } /// Matroska (MKV) and WebM demultiplexer. /// /// `MkvReader` implements a demuxer for the Matroska and WebM formats. pub struct MkvReader { /// Iterator over EBML element headers iter: ElementIterator, tracks: Vec, track_states: HashMap, current_cluster: Option, metadata: MetadataLog, cues: Vec, frames: VecDeque, timestamp_scale: u64, clusters: Vec, } #[derive(Debug)] struct ClusterState { timestamp: Option, end: Option, } fn vorbis_extra_data_from_codec_private(extra: &[u8]) -> Result> { const VORBIS_PACKET_TYPE_IDENTIFICATION: u8 = 1; const VORBIS_PACKET_TYPE_SETUP: u8 = 5; // Private Data for this codec has the following layout: // - 1 byte that represents number of packets minus one; // - Xiph coded lengths of packets, length of the last packet must be deduced (as in Xiph lacing) // - packets in order: // - The Vorbis identification header // - Vorbis comment header // - codec setup header let mut reader = BufReader::new(extra); let packet_count = reader.read_byte()? as usize; let packet_lengths = read_xiph_sizes(&mut reader, packet_count)?; let mut packets = Vec::new(); for length in packet_lengths { packets.push(reader.read_boxed_slice_exact(length as usize)?); } let last_packet_length = extra.len() - reader.pos() as usize; packets.push(reader.read_boxed_slice_exact(last_packet_length)?); let mut ident_header = None; let mut setup_header = None; for packet in packets { match packet.first().copied() { Some(VORBIS_PACKET_TYPE_IDENTIFICATION) => { ident_header = Some(packet); } Some(VORBIS_PACKET_TYPE_SETUP) => { setup_header = Some(packet); } _ => { log::debug!("unsupported vorbis packet type"); } } } // This is layout expected currently by Vorbis codec. Ok([ ident_header.ok_or(Error::DecodeError("mkv: missing vorbis identification packet"))?, setup_header.ok_or(Error::DecodeError("mkv: missing vorbis setup packet"))?, ] .concat() .into_boxed_slice()) } fn flac_extra_data_from_codec_private(codec_private: &[u8]) -> Result> { let mut reader = BufReader::new(codec_private); let marker = reader.read_quad_bytes()?; if marker != *b"fLaC" { return decode_error("mkv (flac): missing flac stream marker"); } let header = MetadataBlockHeader::read(&mut reader)?; loop { match header.block_type { MetadataBlockType::StreamInfo => { break Ok(reader.read_boxed_slice_exact(header.block_len as usize)?); } _ => reader.ignore_bytes(u64::from(header.block_len))?, } } } impl MkvReader { fn seek_track_by_ts_forward(&mut self, track_id: u32, ts: u64) -> Result { let actual_ts = 'out: loop { // Skip frames from the buffer until the given timestamp while let Some(frame) = self.frames.front() { if frame.timestamp + frame.duration >= ts && frame.track == track_id { break 'out frame.timestamp; } else { self.frames.pop_front(); } } self.next_element()? }; Ok(SeekedTo { track_id, required_ts: ts, actual_ts }) } fn seek_track_by_ts(&mut self, track_id: u32, ts: u64) -> Result { if self.clusters.is_empty() { self.seek_track_by_ts_forward(track_id, ts) } else { let mut target_cluster = None; for cluster in &self.clusters { if cluster.timestamp > ts { break; } target_cluster = Some(cluster); } let cluster = target_cluster.ok_or(Error::SeekError(SeekErrorKind::OutOfRange))?; let mut target_block = None; for block in cluster.blocks.iter() { if block.track as u32 != track_id { continue; } if block.timestamp > ts { break; } target_block = Some(block); } let pos = match target_block { Some(block) => block.pos, None => cluster.pos, }; self.iter.seek(pos)?; // Restore cluster's metadata self.current_cluster = Some(ClusterState { timestamp: Some(cluster.timestamp), end: cluster.end }); // Seek to a specified block inside the cluster. self.seek_track_by_ts_forward(track_id, ts) } } fn next_element(&mut self) -> Result<()> { if let Some(ClusterState { end: Some(end), .. }) = &self.current_cluster { // Make sure we don't read past the current cluster if its size is known. if self.iter.pos() >= *end { // log::debug!("ended cluster"); self.current_cluster = None; } } // Each Cluster is being read incrementally so we need to keep track of // which cluster we are currently in. let header = match self.iter.read_child_header()? { Some(header) => header, None => { // If we reached here, it must be an end of stream. return end_of_stream_error(); } }; match header.etype { ElementType::Cluster => { self.current_cluster = Some(ClusterState { timestamp: None, end: header.end() }); } ElementType::Timestamp => match self.current_cluster.as_mut() { Some(cluster) => { cluster.timestamp = Some(self.iter.read_u64()?); } None => { self.iter.ignore_data()?; log::warn!("timestamp element outside of a cluster"); return Ok(()); } }, ElementType::SimpleBlock => { let cluster_ts = match self.current_cluster.as_ref() { Some(ClusterState { timestamp: Some(ts), .. }) => *ts, Some(_) => { self.iter.ignore_data()?; log::warn!("missing cluster timestamp"); return Ok(()); } None => { self.iter.ignore_data()?; log::warn!("simple block element outside of a cluster"); return Ok(()); } }; let data = self.iter.read_boxed_slice()?; extract_frames( &data, None, &self.track_states, cluster_ts, self.timestamp_scale, &mut self.frames, )?; } ElementType::BlockGroup => { let cluster_ts = match self.current_cluster.as_ref() { Some(ClusterState { timestamp: Some(ts), .. }) => *ts, Some(_) => { self.iter.ignore_data()?; log::warn!("missing cluster timestamp"); return Ok(()); } None => { self.iter.ignore_data()?; log::warn!("block group element outside of a cluster"); return Ok(()); } }; let group = self.iter.read_element_data::()?; extract_frames( &group.data, group.duration, &self.track_states, cluster_ts, self.timestamp_scale, &mut self.frames, )?; } ElementType::Tags => { let tags = self.iter.read_element_data::()?; self.metadata.push(tags.to_metadata()); self.current_cluster = None; } _ if header.etype.is_top_level() => { self.current_cluster = None; } other => { log::debug!("ignored element {:?}", other); self.iter.ignore_data()?; } } Ok(()) } } impl FormatReader for MkvReader { fn try_new(mut reader: MediaSourceStream, _options: &FormatOptions) -> Result where Self: Sized, { let is_seekable = reader.is_seekable(); // Get the total length of the stream, if possible. let total_len = if is_seekable { let pos = reader.pos(); let len = reader.seek(SeekFrom::End(0))?; reader.seek(SeekFrom::Start(pos))?; log::info!("stream is seekable with len={} bytes.", len); Some(len) } else { None }; let mut it = ElementIterator::new(reader, total_len); let ebml = it.read_element::()?; if !matches!(ebml.header.doc_type.as_str(), "matroska" | "webm") { return unsupported_error("mkv: not a matroska / webm file"); } let segment_pos = match it.read_child_header()? { Some(ElementHeader { etype: ElementType::Segment, data_pos, .. }) => data_pos, _ => return unsupported_error("mkv: missing segment element"), }; let mut segment_tracks = None; let mut info = None; let mut clusters = Vec::new(); let mut metadata = MetadataLog::default(); let mut current_cluster = None; let mut seek_positions = Vec::new(); while let Ok(Some(header)) = it.read_child_header() { match header.etype { ElementType::SeekHead => { let seek_head = it.read_element_data::()?; for element in seek_head.seeks.into_vec() { let tag = element.id as u32; let etype = match ELEMENTS.get(&tag) { Some((_, etype)) => *etype, None => continue, }; seek_positions.push((etype, segment_pos + element.position)); } } ElementType::Tracks => { segment_tracks = Some(it.read_element_data::()?); } ElementType::Info => { info = Some(it.read_element_data::()?); } ElementType::Cues => { let cues = it.read_element_data::()?; for cue in cues.points.into_vec() { clusters.push(ClusterElement { timestamp: cue.time, pos: segment_pos + cue.positions.cluster_position, end: None, blocks: Box::new([]), }); } } ElementType::Tags => { let tags = it.read_element_data::()?; metadata.push(tags.to_metadata()); } ElementType::Cluster => { // Set state for current cluster for the first call of `next_element`. current_cluster = Some(ClusterState { timestamp: None, end: header.end() }); // Don't look forward into the stream since // we can't be sure that we'll find anything useful. break; } other => { it.ignore_data()?; log::debug!("ignored element {:?}", other); } } } if is_seekable { // Make sure we don't jump backwards unnecessarily. seek_positions.sort_by_key(|sp| sp.1); for (etype, pos) in seek_positions { it.seek(pos)?; // Safety: The element type or position may be incorrect. The element iterator will // validate the type (as declared in the header) of the element at the seeked // position against the element type asked to be read. match etype { ElementType::Tracks => { segment_tracks = Some(it.read_element::()?); } ElementType::Info => { info = Some(it.read_element::()?); } ElementType::Tags => { let tags = it.read_element::()?; metadata.push(tags.to_metadata()); } ElementType::Cues => { let cues = it.read_element::()?; for cue in cues.points.into_vec() { clusters.push(ClusterElement { timestamp: cue.time, pos: segment_pos + cue.positions.cluster_position, end: None, blocks: Box::new([]), }); } } _ => (), } } } let segment_tracks = segment_tracks.ok_or(Error::DecodeError("mkv: missing Tracks element"))?; if is_seekable { let mut reader = it.into_inner(); reader.seek(SeekFrom::Start(segment_pos))?; it = ElementIterator::new(reader, total_len); } let info = info.ok_or(Error::DecodeError("mkv: missing Info element"))?; // TODO: remove this unwrap? let time_base = TimeBase::new(u32::try_from(info.timestamp_scale).unwrap(), 1_000_000_000); let mut tracks = Vec::new(); let mut states = HashMap::new(); for track in segment_tracks.tracks.into_vec() { let codec_type = codec_id_to_type(&track); let mut codec_params = CodecParameters::new(); codec_params.with_time_base(time_base); if let Some(duration) = info.duration { codec_params.with_n_frames(duration as u64); } if let Some(audio) = track.audio { codec_params.with_sample_rate(audio.sampling_frequency.round() as u32); let format = audio.bit_depth.and_then(|bits| match bits { 8 => Some(SampleFormat::S8), 16 => Some(SampleFormat::S16), 24 => Some(SampleFormat::S24), 32 => Some(SampleFormat::S32), _ => None, }); if let Some(format) = format { codec_params.with_sample_format(format); } if let Some(bits) = audio.bit_depth { codec_params.with_bits_per_sample(bits as u32); } let layout = match audio.channels { 1 => Some(Layout::Mono), 2 => Some(Layout::Stereo), 3 => Some(Layout::TwoPointOne), 6 => Some(Layout::FivePointOne), other => { log::warn!( "track #{} has custom number of channels: {}", track.number, other ); None } }; if let Some(layout) = layout { codec_params.with_channel_layout(layout); } if let Some(codec_type) = codec_type { codec_params.for_codec(codec_type); if let Some(codec_private) = track.codec_private { let extra_data = match codec_type { CODEC_TYPE_VORBIS => { vorbis_extra_data_from_codec_private(&codec_private)? } CODEC_TYPE_FLAC => flac_extra_data_from_codec_private(&codec_private)?, _ => codec_private, }; codec_params.with_extra_data(extra_data); } } } let track_id = track.number as u32; tracks.push(Track { id: track_id, codec_params: codec_params.clone(), language: track.language, }); states.insert( track_id, TrackState { codec_params, track_num: track_id, default_frame_duration: track.default_duration, }, ); } Ok(Self { iter: it, tracks, track_states: states, current_cluster, metadata, cues: Vec::new(), frames: VecDeque::new(), timestamp_scale: info.timestamp_scale, clusters, }) } fn cues(&self) -> &[Cue] { &self.cues } fn metadata(&mut self) -> Metadata<'_> { self.metadata.metadata() } fn seek(&mut self, _mode: SeekMode, to: SeekTo) -> Result { if self.tracks.is_empty() { return seek_error(SeekErrorKind::Unseekable); } match to { SeekTo::Time { time, track_id } => { let track = match track_id { Some(id) => self.tracks.iter().find(|track| track.id == id), None => self.tracks.first(), }; let track = track.ok_or(Error::SeekError(SeekErrorKind::InvalidTrack))?; let tb = track.codec_params.time_base.unwrap(); let ts = tb.calc_timestamp(time); let track_id = track.id; self.seek_track_by_ts(track_id, ts) } SeekTo::TimeStamp { ts, track_id } => { match self.tracks.iter().find(|t| t.id == track_id) { Some(_) => self.seek_track_by_ts(track_id, ts), None => seek_error(SeekErrorKind::InvalidTrack), } } } } fn tracks(&self) -> &[Track] { &self.tracks } fn next_packet(&mut self) -> Result { loop { if let Some(frame) = self.frames.pop_front() { return Ok(Packet::new_from_boxed_slice( frame.track, frame.timestamp, frame.duration, frame.data, )); } self.next_element()?; } } fn into_inner(self: Box) -> MediaSourceStream { self.iter.into_inner() } } impl QueryDescriptor for MkvReader { fn query() -> &'static [Descriptor] { &[support_format!( "matroska", "Matroska / WebM", &["webm", "mkv"], &["video/webm", "video/x-matroska"], &[b"\x1A\x45\xDF\xA3"] // Top-level element Ebml element )] } fn score(_context: &[u8]) -> u8 { 255 } } symphonia-format-mkv-0.5.4/src/ebml.rs000064400000000000000000000462731046102023000160120ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::io::SeekFrom; use symphonia_core::errors::{decode_error, seek_error, Error, Result, SeekErrorKind}; use symphonia_core::io::{MediaSource, ReadBytes}; use symphonia_core::util::bits::sign_extend_leq64_to_i64; use crate::element_ids::{ElementType, Type, ELEMENTS}; use crate::segment::EbmlHeaderElement; /// Reads a single EBML element ID (as in RFC8794) from the stream /// and returns its value, length in bytes (1-4 bytes) /// and a flag indicating whether any data was ignored, or an error. #[allow(clippy::never_loop)] pub(crate) fn read_tag(mut reader: R) -> Result<(u32, u32, bool)> { // Try to read a tag at current reader position. loop { let byte = reader.read_byte()?; let remaining_octets = byte.leading_zeros(); if remaining_octets > 3 { // First byte should be ignored since we know it could not start a tag. // We immediately proceed to seek a first valid tag. break; } // Read remaining octets let mut vint = u32::from(byte); for _ in 0..remaining_octets { let byte = reader.read_byte()?; vint = (vint << 8) | u32::from(byte); } // log::debug!("element with tag: {:X}", vint); return Ok((vint, remaining_octets + 1, false)); } // Seek to next supported tag of a top level element (`Cluster`, `Info`, etc.) let mut tag = 0u32; loop { let ty = ELEMENTS.get(&tag).map(|(_, ty)| ty).filter(|ty| ty.is_top_level()); if let Some(ty) = ty { log::info!("found next supported tag {:08X} ({:?})", tag, ty); return Ok((tag, 4, true)); } tag = (tag << 8) | u32::from(reader.read_u8()?); } } pub(crate) fn read_size(reader: R) -> Result> { let (size, len) = read_vint(reader)?; if size == u64::MAX && len == 1 { return Ok(None); } Ok(Some(size)) } /// Reads a single unsigned variable size integer (as in RFC8794) from the stream /// and returns it or an error. pub(crate) fn read_unsigned_vint(reader: R) -> Result { Ok(read_vint(reader)?.0) } /// Reads a single signed variable size integer (as in RFC8794) from the stream /// and returns it or an error. pub(crate) fn read_signed_vint(mut reader: R) -> Result { let (value, len) = read_vint(&mut reader)?; // Convert to a signed integer by range shifting. let half_range = i64::pow(2, (len * 7) - 1) - 1; Ok(value as i64 - half_range) } /// Reads a single unsigned variable size integer (as in RFC8794) from the stream /// and returns both its value and length in octects, or an error. fn read_vint(mut reader: R) -> Result<(u64, u32)> { let byte = reader.read_byte()?; if byte == 0xFF { // Special case: unknown size elements. return Ok((u64::MAX, 1)); } let vint_width = byte.leading_zeros(); let mut vint = u64::from(byte); // Clear VINT_MARKER bit vint ^= 1 << (7 - vint_width); // Read remaining octets for _ in 0..vint_width { let byte = reader.read_byte()?; vint = (vint << 8) | u64::from(byte); } Ok((vint, vint_width + 1)) } #[cfg(test)] mod tests { use symphonia_core::io::BufReader; use super::{read_signed_vint, read_tag, read_unsigned_vint}; #[test] fn element_tag_parsing() { assert_eq!(read_tag(BufReader::new(&[0x82])).unwrap(), (0x82, 1, false)); assert_eq!(read_tag(BufReader::new(&[0x40, 0x02])).unwrap(), (0x4002, 2, false)); assert_eq!(read_tag(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), (0x200002, 3, false)); assert_eq!( read_tag(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), (0x10000002, 4, false) ); } #[test] fn variable_unsigned_integer_parsing() { assert_eq!(read_unsigned_vint(BufReader::new(&[0x82])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x40, 0x02])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x08, 0x00, 0x00, 0x00, 0x02])).unwrap(), 2); assert_eq!( read_unsigned_vint(BufReader::new(&[0x04, 0x00, 0x00, 0x00, 0x00, 0x02])).unwrap(), 2 ); assert_eq!( read_unsigned_vint(BufReader::new(&[0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02])) .unwrap(), 2 ); assert_eq!( read_unsigned_vint(BufReader::new(&[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02])) .unwrap(), 2 ); } #[test] fn variable_signed_integer_parsing() { assert_eq!(read_signed_vint(BufReader::new(&[0x80])).unwrap(), -63); assert_eq!(read_signed_vint(BufReader::new(&[0x40, 0x00])).unwrap(), -8191); } } #[derive(Copy, Clone, Debug)] pub struct ElementHeader { /// The element tag. pub tag: u32, /// The element type. pub etype: ElementType, /// The element's offset in the stream. pub pos: u64, /// The total size of the element including the header. pub len: u64, /// The element's data offset in the stream. pub data_pos: u64, /// The size of the payload data. pub data_len: u64, } impl ElementHeader { /// Returns an iterator over child elements of the current element. pub(crate) fn children(&self, reader: R) -> ElementIterator { assert_eq!(reader.pos(), self.data_pos, "unexpected position"); ElementIterator::new_of(reader, *self) } pub(crate) fn end(&self) -> Option { if self.data_len == 0 { None } else { Some(self.data_pos + self.data_len) } } } pub trait Element: Sized { const ID: ElementType; fn read(reader: &mut B, header: ElementHeader) -> Result; } impl ElementHeader { /// Reads a single EBML element header from the stream. pub(crate) fn read(mut reader: &mut R) -> Result<(ElementHeader, bool)> { let (tag, tag_len, reset) = read_tag(&mut reader)?; let header_start = reader.pos() - u64::from(tag_len); // According to spec, elements like Segment and Cluster can have unknown size. // Currently, these cases are represented as `data_len` equal to 0, // but it might be worth changing it to an Option at some point. let size = read_size(&mut reader)?.unwrap_or(0); Ok(( ElementHeader { tag, etype: ELEMENTS.get(&tag).map_or(ElementType::Unknown, |(_, etype)| *etype), pos: header_start, len: reader.pos() - header_start + size, data_len: size, data_pos: reader.pos(), }, reset, )) } } #[derive(Debug)] pub(crate) struct EbmlElement { pub(crate) header: EbmlHeaderElement, } impl Element for EbmlElement { const ID: ElementType = ElementType::Ebml; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); Ok(Self { header: it.read_element_data::()? }) } } pub(crate) struct ElementIterator { /// Reader of the stream containing this element. reader: R, /// Store current element header (for sanity check purposes). current: Option, /// Position of the next element header that would be read. next_pos: u64, /// Position immediately past last byte of this element. end: Option, } impl ElementIterator { /// Creates a new iterator over elements starting from the current stream position. pub(crate) fn new(reader: R, end: Option) -> Self { let pos = reader.pos(); Self::new_at(reader, pos, end) } /// Creates a new iterator over elements starting from the given stream position. fn new_at(reader: R, start: u64, end: Option) -> Self { Self { reader, current: None, next_pos: start, end } } /// Creates a new iterator over children of the given parent element. fn new_of(reader: R, parent: ElementHeader) -> Self { Self { reader, current: Some(parent), next_pos: parent.data_pos, end: parent.end() } } /// Seek to a specified offset inside of the stream. pub(crate) fn seek(&mut self, pos: u64) -> Result<()> where R: MediaSource, { let current_pos = self.pos(); self.current = None; if self.reader.is_seekable() { self.reader.seek(SeekFrom::Start(pos))?; } else if pos < current_pos { return seek_error(SeekErrorKind::ForwardOnly); } else { self.reader.ignore_bytes(pos - current_pos)?; } self.next_pos = pos; Ok(()) } /// Consumes this iterator and return the original stream. pub(crate) fn into_inner(self) -> R { self.reader } /// Reads a single element header and moves to its next sibling by ignoring all the children. pub(crate) fn read_header(&mut self) -> Result> { let header = self.read_header_no_consume()?; if let Some(header) = &header { // Move to next sibling. self.next_pos += header.len; } Ok(header) } /// Reads a single element header and shifts the stream to element's child /// if it'a a master element or to next sibling otherwise. pub(crate) fn read_child_header(&mut self) -> Result> { let header = self.read_header_no_consume()?; if let Some(header) = &header { match ELEMENTS.get(&header.tag).map(|it| it.0) { Some(Type::Master) => { // Move to start of a child element. self.next_pos = header.data_pos; } _ => { // Move to next sibling. self.next_pos += header.len; } } } Ok(header) } /// Reads element header at the current stream position /// without moving to the end of the parent element. /// Returns [None] if the current element has no more children or reached end of the stream. fn read_header_no_consume(&mut self) -> Result> { let pos = self.reader.pos(); if pos < self.next_pos { // Ignore bytes that were not read self.reader.ignore_bytes(self.next_pos - pos)?; } assert_eq!(self.next_pos, self.reader.pos(), "invalid position"); if self.reader.pos() < self.end.unwrap_or(u64::MAX) { let (header, reset) = ElementHeader::read(&mut self.reader)?; if reset { // After finding a new top-level element in a broken stream // it is necessary to update `next_pos` so it refers to a position // of a child header. self.next_pos = self.reader.pos(); } self.current = Some(header); return Ok(Some(header)); } Ok(None) } /// Reads a single element with its data. pub(crate) fn read_element(&mut self) -> Result { let _header = self.read_header()?; self.read_element_data() } /// Reads data of current element. Must be used after /// [Self::read_header] or [Self::read_child_header]. pub(crate) fn read_element_data(&mut self) -> Result { let header = self.current.expect("EBML header must be read before calling this function"); // Ensure the EBML element header has the same element type as the one being read. if header.etype != E::ID { return decode_error("mkv: unexpected EBML element"); } let element = E::read(&mut self.reader, header)?; // Update position to match the position element reader finished at self.next_pos = self.reader.pos(); Ok(element) } /// Reads a collection of element with the given type. pub(crate) fn read_elements(&mut self) -> Result> { let mut elements = vec![]; while let Some(header) = self.read_header()? { if header.etype == ElementType::Crc32 { // TODO: ignore crc for now continue; } if header.etype != E::ID { log::warn!("found element with invalid type {:?}", header); self.ignore_data()?; continue; } elements.push(E::read(&mut self.reader, header)?); } Ok(elements.into_boxed_slice()) } /// Reads any primitive data inside of the current element. pub(crate) fn read_data(&mut self) -> Result { let hdr = self.current.expect("not in an element"); let value = self .try_read_data(hdr)? .ok_or(Error::DecodeError("mkv: element has no primitive data"))?; Ok(value) } /// Reads data of the current element as an unsigned integer. pub(crate) fn read_u64(&mut self) -> Result { match self.read_data()? { ElementData::UnsignedInt(s) => Ok(s), _ => Err(Error::DecodeError("mkv: expected an unsigned int")), } } /// Reads data of the current element as a floating-point number. pub(crate) fn read_f64(&mut self) -> Result { match self.read_data()? { ElementData::Float(s) => Ok(s), _ => Err(Error::DecodeError("mkv: expected a float")), } } /// Reads data of the current element as a string. pub(crate) fn read_string(&mut self) -> Result { match self.read_data()? { ElementData::String(s) => Ok(s), _ => Err(Error::DecodeError("mkv: expected a string")), } } /// Reads binary data of the current element as boxed slice. pub(crate) fn read_boxed_slice(&mut self) -> Result> { match self.read_data()? { ElementData::Binary(b) => Ok(b), _ => Err(Error::DecodeError("mkv: expected binary data")), } } /// Reads any primitive data of the current element. It returns [None] /// if the it is a master element. pub(crate) fn try_read_data(&mut self, header: ElementHeader) -> Result> { Ok(match ELEMENTS.get(&header.tag) { Some((ty, _)) => { // Position must always be valid, because this function is called // after reading the element header. assert_eq!(header.data_pos, self.reader.pos(), "invalid stream position"); if let (Some(cur), Some(end)) = (self.current, self.end) { if cur.pos + cur.len > end { log::debug!("reading element data {:?}; parent end={}", cur, end); return decode_error( "mkv: attempt to read element data past master element ", ); } } Some(match ty { Type::Master => { return Ok(None); } Type::Unsigned => { if header.data_len > 8 { self.ignore_data()?; return decode_error("mkv: invalid unsigned integer length"); } let mut buff = [0u8; 8]; let offset = 8 - header.data_len as usize; self.reader.read_buf_exact(&mut buff[offset..])?; let value = u64::from_be_bytes(buff); ElementData::UnsignedInt(value) } Type::Signed | Type::Date => { if header.data_len > 8 { self.ignore_data()?; return decode_error("mkv: invalid signed integer length"); } let len = header.data_len as usize; let mut buff = [0u8; 8]; self.reader.read_buf_exact(&mut buff[8 - len..])?; let value = u64::from_be_bytes(buff); let value = sign_extend_leq64_to_i64(value, (len as u32) * 8); match ty { Type::Signed => ElementData::SignedInt(value), Type::Date => ElementData::Date(value), _ => unreachable!(), } } Type::Float => { let value = match header.data_len { 0 => 0.0, 4 => self.reader.read_be_f32()? as f64, 8 => self.reader.read_be_f64()?, _ => { self.ignore_data()?; return Err(Error::DecodeError("mkv: invalid float length")); } }; ElementData::Float(value) } Type::String => { let data = self.reader.read_boxed_slice_exact(header.data_len as usize)?; let bytes = data.split(|b| *b == 0).next().unwrap_or(&data); ElementData::String(String::from_utf8_lossy(bytes).into_owned()) } Type::Binary => ElementData::Binary( self.reader.read_boxed_slice_exact(header.data_len as usize)?, ), }) } None => None, }) } /// Ignores content of the current element. It can be used after calling /// [Self::read_child_header] to ignore children of a master element. pub(crate) fn ignore_data(&mut self) -> Result<()> { if let Some(header) = self.current { log::debug!("ignoring data of {:?} element", header.etype); self.reader.ignore_bytes(header.data_len)?; self.next_pos = header.data_pos + header.data_len; } Ok(()) } /// Gets the position of the underlying stream. pub(crate) fn pos(&self) -> u64 { self.reader.pos() } } /// An EBML element data. #[derive(Clone, Debug)] pub(crate) enum ElementData { /// A binary buffer. Binary(Box<[u8]>), /// A floating point number. Float(f64), /// A signed integer. SignedInt(i64), /// A string. String(String), /// An unsigned integer. UnsignedInt(u64), /// A point in time referenced in nanoseconds from the precise beginning /// of the third millennium of the Gregorian Calendar in Coordinated Universal Time /// (also known as 2001-01-01T00:00:00.000000000 UTC). Date(i64), } symphonia-format-mkv-0.5.4/src/element_ids.rs000064400000000000000000000340611046102023000173530ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::HashMap; use lazy_static::lazy_static; #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub(crate) enum Type { Master, Unsigned, Signed, Binary, String, Float, Date, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ElementType { Ebml, EbmlVersion, EbmlReadVersion, EbmlMaxIdLength, EbmlMaxSizeLength, DocType, DocTypeVersion, DocTypeReadVersion, Crc32, Void, Segment, SeekHead, Seek, SeekId, SeekPosition, Info, TimestampScale, Duration, DateUtc, Title, MuxingApp, WritingApp, Cluster, Timestamp, PrevSize, SimpleBlock, BlockGroup, Block, BlockAdditions, BlockMore, BlockAddId, BlockAdditional, BlockDuration, ReferenceBlock, DiscardPadding, Tracks, TrackEntry, TrackNumber, TrackUid, TrackType, FlagEnabled, FlagDefault, FlagForced, FlagHearingImpaired, FlagVisualImpaired, FlagTextDescriptions, FlagOriginal, FlagCommentary, FlagLacing, DefaultDuration, Name, Language, CodecId, CodecPrivate, CodecName, CodecDelay, SeekPreRoll, Video, FlagInterlaced, StereoMode, AlphaMode, PixelWidth, PixelHeight, PixelCropBottom, PixelCropTop, PixelCropLeft, PixelCropRight, DisplayWidth, DisplayHeight, DisplayUnit, AspectRatioType, Audio, SamplingFrequency, OutputSamplingFrequency, Channels, BitDepth, ContentEncodings, ContentEncoding, ContentEncodingOrder, ContentEncodingScope, ContentEncodingType, ContentEncryption, ContentEncAlgo, ContentEncKeyId, ContentEncAesSettings, AesSettingsCipherMode, Colour, MatrixCoefficients, BitsPerChannel, ChromaSubsamplingHorz, ChromaSubsamplingVert, CbSubsamplingHorz, CbSubsamplingVert, ChromaSitingHorz, ChromaSitingVert, Range, TransferCharacteristics, Primaries, MaxCll, MaxFall, MasteringMetadata, PrimaryRChromaticityX, PrimaryRChromaticityY, PrimaryGChromaticityX, PrimaryGChromaticityY, PrimaryBChromaticityX, PrimaryBChromaticityY, WhitePointChromaticityX, WhitePointChromaticityY, LuminanceMax, LuminanceMin, Cues, CuePoint, CueTime, CueTrackPositions, CueTrack, CueClusterPosition, CueRelativePosition, CueDuration, CueBlockNumber, Chapters, EditionEntry, ChapterAtom, ChapterUid, ChapterStringUid, ChapterTimeStart, ChapterTimeEnd, ChapterDisplay, ChapString, ChapLanguage, ChapLanguageIetf, ChapCountry, Tags, Tag, Targets, TargetTypeValue, TargetType, TagTrackUid, SimpleTag, TagName, TagLanguage, TagDefault, TagString, TagBinary, /// Special type for unknown tags. Unknown, } impl ElementType { pub(crate) fn is_top_level(&self) -> bool { matches!( self, ElementType::Cluster | ElementType::Cues | ElementType::Info | ElementType::SeekHead | ElementType::Tags | ElementType::Tracks ) } } lazy_static! { pub(crate) static ref ELEMENTS: HashMap = { let mut elems = HashMap::new(); elems.insert(0x1A45DFA3, (Type::Master, ElementType::Ebml)); elems.insert(0x4286, (Type::Unsigned, ElementType::EbmlVersion)); elems.insert(0x42F7, (Type::Unsigned, ElementType::EbmlReadVersion)); elems.insert(0x42F2, (Type::Unsigned, ElementType::EbmlMaxIdLength)); elems.insert(0x42F3, (Type::Unsigned, ElementType::EbmlMaxSizeLength)); elems.insert(0x4282, (Type::String, ElementType::DocType)); elems.insert(0x4287, (Type::Unsigned, ElementType::DocTypeVersion)); elems.insert(0x4285, (Type::Unsigned, ElementType::DocTypeReadVersion)); elems.insert(0xBF, (Type::Binary, ElementType::Crc32)); elems.insert(0xEC, (Type::Binary, ElementType::Void)); elems.insert(0x18538067, (Type::Master, ElementType::Segment)); elems.insert(0x114D9B74, (Type::Master, ElementType::SeekHead)); elems.insert(0x4DBB, (Type::Master, ElementType::Seek)); elems.insert(0x53AB, (Type::Unsigned, ElementType::SeekId)); elems.insert(0x53AC, (Type::Unsigned, ElementType::SeekPosition)); elems.insert(0x1549A966, (Type::Master, ElementType::Info)); elems.insert(0x2AD7B1, (Type::Unsigned, ElementType::TimestampScale)); elems.insert(0x4489, (Type::Float, ElementType::Duration)); elems.insert(0x4461, (Type::Date, ElementType::DateUtc)); elems.insert(0x7BA9, (Type::String, ElementType::Title)); elems.insert(0x4D80, (Type::String, ElementType::MuxingApp)); elems.insert(0x5741, (Type::String, ElementType::WritingApp)); elems.insert(0x1F43B675, (Type::Master, ElementType::Cluster)); elems.insert(0xE7, (Type::Unsigned, ElementType::Timestamp)); elems.insert(0xAB, (Type::Unsigned, ElementType::PrevSize)); elems.insert(0xA3, (Type::Binary, ElementType::SimpleBlock)); elems.insert(0xA0, (Type::Master, ElementType::BlockGroup)); elems.insert(0xA1, (Type::Binary, ElementType::Block)); elems.insert(0x75A1, (Type::Master, ElementType::BlockAdditions)); elems.insert(0xA6, (Type::Master, ElementType::BlockMore)); elems.insert(0xEE, (Type::Unsigned, ElementType::BlockAddId)); elems.insert(0xA5, (Type::Binary, ElementType::BlockAdditional)); elems.insert(0x9B, (Type::Unsigned, ElementType::BlockDuration)); elems.insert(0xFB, (Type::Signed, ElementType::ReferenceBlock)); elems.insert(0x75A2, (Type::Signed, ElementType::DiscardPadding)); elems.insert(0x1654AE6B, (Type::Master, ElementType::Tracks)); elems.insert(0xAE, (Type::Master, ElementType::TrackEntry)); elems.insert(0xD7, (Type::Unsigned, ElementType::TrackNumber)); elems.insert(0x73C5, (Type::Unsigned, ElementType::TrackUid)); elems.insert(0x83, (Type::Unsigned, ElementType::TrackType)); elems.insert(0xB9, (Type::Unsigned, ElementType::FlagEnabled)); elems.insert(0x88, (Type::Unsigned, ElementType::FlagDefault)); elems.insert(0x55AA, (Type::Unsigned, ElementType::FlagForced)); elems.insert(0x55AB, (Type::Unsigned, ElementType::FlagHearingImpaired)); elems.insert(0x55AC, (Type::Unsigned, ElementType::FlagVisualImpaired)); elems.insert(0x55AD, (Type::Unsigned, ElementType::FlagTextDescriptions)); elems.insert(0x55AE, (Type::Unsigned, ElementType::FlagOriginal)); elems.insert(0x55AF, (Type::Unsigned, ElementType::FlagCommentary)); elems.insert(0x9C, (Type::Unsigned, ElementType::FlagLacing)); elems.insert(0x23E383, (Type::Unsigned, ElementType::DefaultDuration)); elems.insert(0x536E, (Type::String, ElementType::Name)); elems.insert(0x22B59C, (Type::String, ElementType::Language)); elems.insert(0x86, (Type::String, ElementType::CodecId)); elems.insert(0x63A2, (Type::Binary, ElementType::CodecPrivate)); elems.insert(0x258688, (Type::String, ElementType::CodecName)); elems.insert(0x56AA, (Type::Unsigned, ElementType::CodecDelay)); elems.insert(0x56BB, (Type::Unsigned, ElementType::SeekPreRoll)); elems.insert(0xE0, (Type::Master, ElementType::Video)); elems.insert(0x9A, (Type::Unsigned, ElementType::FlagInterlaced)); elems.insert(0x53B8, (Type::Unsigned, ElementType::StereoMode)); elems.insert(0x53C0, (Type::Unsigned, ElementType::AlphaMode)); elems.insert(0xB0, (Type::Unsigned, ElementType::PixelWidth)); elems.insert(0xBA, (Type::Unsigned, ElementType::PixelHeight)); elems.insert(0x54AA, (Type::Unsigned, ElementType::PixelCropBottom)); elems.insert(0x54BB, (Type::Unsigned, ElementType::PixelCropTop)); elems.insert(0x54CC, (Type::Unsigned, ElementType::PixelCropLeft)); elems.insert(0x54DD, (Type::Unsigned, ElementType::PixelCropRight)); elems.insert(0x54B0, (Type::Unsigned, ElementType::DisplayWidth)); elems.insert(0x54BA, (Type::Unsigned, ElementType::DisplayHeight)); elems.insert(0x54B2, (Type::Unsigned, ElementType::DisplayUnit)); elems.insert(0x54B3, (Type::Unsigned, ElementType::AspectRatioType)); elems.insert(0xE1, (Type::Master, ElementType::Audio)); elems.insert(0xB5, (Type::Float, ElementType::SamplingFrequency)); elems.insert(0x78B5, (Type::Float, ElementType::OutputSamplingFrequency)); elems.insert(0x9F, (Type::Unsigned, ElementType::Channels)); elems.insert(0x6264, (Type::Unsigned, ElementType::BitDepth)); elems.insert(0x6D80, (Type::Master, ElementType::ContentEncodings)); elems.insert(0x6240, (Type::Master, ElementType::ContentEncoding)); elems.insert(0x5031, (Type::Unsigned, ElementType::ContentEncodingOrder)); elems.insert(0x5032, (Type::Unsigned, ElementType::ContentEncodingScope)); elems.insert(0x5033, (Type::Unsigned, ElementType::ContentEncodingType)); elems.insert(0x5035, (Type::Master, ElementType::ContentEncryption)); elems.insert(0x47E1, (Type::Unsigned, ElementType::ContentEncAlgo)); elems.insert(0x47E2, (Type::Unsigned, ElementType::ContentEncKeyId)); elems.insert(0x47E7, (Type::Master, ElementType::ContentEncAesSettings)); elems.insert(0x47E8, (Type::Unsigned, ElementType::AesSettingsCipherMode)); elems.insert(0x55B0, (Type::Master, ElementType::Colour)); elems.insert(0x55B1, (Type::Unsigned, ElementType::MatrixCoefficients)); elems.insert(0x55B2, (Type::Unsigned, ElementType::BitsPerChannel)); elems.insert(0x55B3, (Type::Unsigned, ElementType::ChromaSubsamplingHorz)); elems.insert(0x55B4, (Type::Unsigned, ElementType::ChromaSubsamplingVert)); elems.insert(0x55B5, (Type::Unsigned, ElementType::CbSubsamplingHorz)); elems.insert(0x55B6, (Type::Unsigned, ElementType::CbSubsamplingVert)); elems.insert(0x55B7, (Type::Unsigned, ElementType::ChromaSitingHorz)); elems.insert(0x55B8, (Type::Unsigned, ElementType::ChromaSitingVert)); elems.insert(0x55B9, (Type::Unsigned, ElementType::Range)); elems.insert(0x55BA, (Type::Unsigned, ElementType::TransferCharacteristics)); elems.insert(0x55BB, (Type::Unsigned, ElementType::Primaries)); elems.insert(0x55BC, (Type::Unsigned, ElementType::MaxCll)); elems.insert(0x55BD, (Type::Unsigned, ElementType::MaxFall)); elems.insert(0x55D0, (Type::Master, ElementType::MasteringMetadata)); elems.insert(0x55D1, (Type::Float, ElementType::PrimaryRChromaticityX)); elems.insert(0x55D2, (Type::Float, ElementType::PrimaryRChromaticityY)); elems.insert(0x55D3, (Type::Float, ElementType::PrimaryGChromaticityX)); elems.insert(0x55D4, (Type::Float, ElementType::PrimaryGChromaticityY)); elems.insert(0x55D5, (Type::Float, ElementType::PrimaryBChromaticityX)); elems.insert(0x55D6, (Type::Float, ElementType::PrimaryBChromaticityY)); elems.insert(0x55D7, (Type::Float, ElementType::WhitePointChromaticityX)); elems.insert(0x55D8, (Type::Float, ElementType::WhitePointChromaticityY)); elems.insert(0x55D9, (Type::Float, ElementType::LuminanceMax)); elems.insert(0x55DA, (Type::Float, ElementType::LuminanceMin)); elems.insert(0x1C53BB6B, (Type::Master, ElementType::Cues)); elems.insert(0xBB, (Type::Master, ElementType::CuePoint)); elems.insert(0xB3, (Type::Unsigned, ElementType::CueTime)); elems.insert(0xB7, (Type::Master, ElementType::CueTrackPositions)); elems.insert(0xF7, (Type::Unsigned, ElementType::CueTrack)); elems.insert(0xF1, (Type::Unsigned, ElementType::CueClusterPosition)); elems.insert(0xF0, (Type::Unsigned, ElementType::CueRelativePosition)); elems.insert(0xB2, (Type::Unsigned, ElementType::CueDuration)); elems.insert(0x5378, (Type::Unsigned, ElementType::CueBlockNumber)); elems.insert(0x1043A770, (Type::Master, ElementType::Chapters)); elems.insert(0x45B9, (Type::Master, ElementType::EditionEntry)); elems.insert(0xB6, (Type::Master, ElementType::ChapterAtom)); elems.insert(0x73C4, (Type::Unsigned, ElementType::ChapterUid)); elems.insert(0x5654, (Type::String, ElementType::ChapterStringUid)); elems.insert(0x91, (Type::Unsigned, ElementType::ChapterTimeStart)); elems.insert(0x92, (Type::Unsigned, ElementType::ChapterTimeEnd)); elems.insert(0x80, (Type::Master, ElementType::ChapterDisplay)); elems.insert(0x85, (Type::String, ElementType::ChapString)); elems.insert(0x437C, (Type::String, ElementType::ChapLanguage)); elems.insert(0x437D, (Type::String, ElementType::ChapLanguageIetf)); elems.insert(0x437E, (Type::String, ElementType::ChapCountry)); elems.insert(0x1254C367, (Type::Master, ElementType::Tags)); elems.insert(0x7373, (Type::Master, ElementType::Tag)); elems.insert(0x63C0, (Type::Master, ElementType::Targets)); elems.insert(0x68CA, (Type::Unsigned, ElementType::TargetTypeValue)); elems.insert(0x63CA, (Type::String, ElementType::TargetType)); elems.insert(0x63C5, (Type::Unsigned, ElementType::TagTrackUid)); elems.insert(0x67C8, (Type::Master, ElementType::SimpleTag)); elems.insert(0x45A3, (Type::String, ElementType::TagName)); elems.insert(0x447A, (Type::String, ElementType::TagLanguage)); elems.insert(0x4484, (Type::Unsigned, ElementType::TagDefault)); elems.insert(0x4487, (Type::String, ElementType::TagString)); elems.insert(0x4485, (Type::Binary, ElementType::TagBinary)); elems }; } symphonia-format-mkv-0.5.4/src/lacing.rs000064400000000000000000000117551046102023000163250ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::{HashMap, VecDeque}; use symphonia_core::errors::{decode_error, Result}; use symphonia_core::io::{BufReader, ReadBytes}; use crate::demuxer::TrackState; use crate::ebml::{read_signed_vint, read_unsigned_vint}; enum Lacing { None, Xiph, FixedSize, Ebml, } fn parse_flags(flags: u8) -> Result { match (flags >> 1) & 0b11 { 0b00 => Ok(Lacing::None), 0b01 => Ok(Lacing::Xiph), 0b10 => Ok(Lacing::FixedSize), 0b11 => Ok(Lacing::Ebml), _ => unreachable!(), } } fn read_ebml_sizes(mut reader: R, frames: usize) -> Result> { let mut sizes = Vec::new(); for _ in 0..frames { if let Some(last_size) = sizes.last().copied() { let delta = read_signed_vint(&mut reader)?; sizes.push((last_size as i64 + delta) as u64) } else { let size = read_unsigned_vint(&mut reader)?; sizes.push(size); } } Ok(sizes) } pub(crate) fn read_xiph_sizes(mut reader: R, frames: usize) -> Result> { let mut prefixes = 0; let mut sizes = Vec::new(); while sizes.len() < frames { let byte = reader.read_byte()? as u64; if byte == 255 { prefixes += 1; } else { let size = prefixes * 255 + byte; prefixes = 0; sizes.push(size); } } Ok(sizes) } pub(crate) struct Frame { pub(crate) track: u32, /// Absolute frame timestamp. pub(crate) timestamp: u64, pub(crate) duration: u64, pub(crate) data: Box<[u8]>, } pub(crate) fn calc_abs_block_timestamp(cluster_ts: u64, rel_block_ts: i16) -> u64 { if rel_block_ts < 0 { cluster_ts - (-rel_block_ts) as u64 } else { cluster_ts + rel_block_ts as u64 } } pub(crate) fn extract_frames( block: &[u8], block_duration: Option, tracks: &HashMap, cluster_timestamp: u64, timestamp_scale: u64, buffer: &mut VecDeque, ) -> Result<()> { let mut reader = BufReader::new(block); let track = read_unsigned_vint(&mut reader)? as u32; let rel_ts = reader.read_be_u16()? as i16; let flags = reader.read_byte()?; let lacing = parse_flags(flags)?; let default_frame_duration = tracks.get(&track).and_then(|it| it.default_frame_duration).map(|it| it / timestamp_scale); let mut timestamp = calc_abs_block_timestamp(cluster_timestamp, rel_ts); match lacing { Lacing::None => { let data = reader.read_boxed_slice_exact(block.len() - reader.pos() as usize)?; let duration = block_duration.or(default_frame_duration).unwrap_or(0); buffer.push_back(Frame { track, timestamp, data, duration }); } Lacing::Xiph | Lacing::Ebml => { // Read number of stored sizes which is actually `number of frames` - 1 // since size of the last frame is deduced from block size. let frames = reader.read_byte()? as usize; let sizes = match lacing { Lacing::Xiph => read_xiph_sizes(&mut reader, frames)?, Lacing::Ebml => read_ebml_sizes(&mut reader, frames)?, _ => unreachable!(), }; let frame_duration = block_duration .map(|it| it / (frames + 1) as u64) .or(default_frame_duration) .unwrap_or(0); for frame_size in sizes { let data = reader.read_boxed_slice_exact(frame_size as usize)?; buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); timestamp += frame_duration; } // Size of last frame is not provided so we read to the end of the block. let size = block.len() - reader.pos() as usize; let data = reader.read_boxed_slice_exact(size)?; buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); } Lacing::FixedSize => { let frames = reader.read_byte()? as usize + 1; let total_size = block.len() - reader.pos() as usize; if total_size % frames != 0 { return decode_error("mkv: invalid block size"); } let frame_duration = block_duration.map(|it| it / frames as u64).or(default_frame_duration).unwrap_or(0); let frame_size = total_size / frames; for _ in 0..frames { let data = reader.read_boxed_slice_exact(frame_size)?; buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); timestamp += frame_duration; } } } Ok(()) } symphonia-format-mkv-0.5.4/src/lib.rs000064400000000000000000000013001046102023000156170ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. #![warn(rust_2018_idioms)] #![forbid(unsafe_code)] // The following lints are allowed in all Symphonia crates. Please see clippy.toml for their // justification. #![allow(clippy::comparison_chain)] #![allow(clippy::excessive_precision)] #![allow(clippy::identity_op)] #![allow(clippy::manual_range_contains)] mod codecs; mod demuxer; mod ebml; mod element_ids; mod lacing; mod segment; pub use crate::demuxer::MkvReader; symphonia-format-mkv-0.5.4/src/segment.rs000064400000000000000000000471431046102023000165320ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use symphonia_core::errors::{Error, Result}; use symphonia_core::io::{BufReader, ReadBytes}; use symphonia_core::meta::{MetadataBuilder, MetadataRevision, Tag, Value}; use crate::ebml::{read_unsigned_vint, Element, ElementData, ElementHeader}; use crate::element_ids::ElementType; use crate::lacing::calc_abs_block_timestamp; #[allow(dead_code)] #[derive(Debug)] pub(crate) struct TrackElement { pub(crate) number: u64, pub(crate) uid: u64, pub(crate) language: Option, pub(crate) codec_id: String, pub(crate) codec_private: Option>, pub(crate) audio: Option, pub(crate) default_duration: Option, } impl Element for TrackElement { const ID: ElementType = ElementType::TrackEntry; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut number = None; let mut uid = None; let mut language = None; let mut audio = None; let mut codec_private = None; let mut codec_id = None; let mut default_duration = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::TrackNumber => { number = Some(it.read_u64()?); } ElementType::TrackUid => { uid = Some(it.read_u64()?); } ElementType::Language => { language = Some(it.read_string()?); } ElementType::CodecId => { codec_id = Some(it.read_string()?); } ElementType::CodecPrivate => { codec_private = Some(it.read_boxed_slice()?); } ElementType::Audio => { audio = Some(it.read_element_data()?); } ElementType::DefaultDuration => { default_duration = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { number: number.ok_or(Error::DecodeError("mkv: missing track number"))?, uid: uid.ok_or(Error::DecodeError("mkv: missing track UID"))?, language, codec_id: codec_id.ok_or(Error::DecodeError("mkv: missing codec id"))?, codec_private, audio, default_duration, }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct AudioElement { pub(crate) sampling_frequency: f64, pub(crate) output_sampling_frequency: Option, pub(crate) channels: u64, pub(crate) bit_depth: Option, } impl Element for AudioElement { const ID: ElementType = ElementType::Audio; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut sampling_frequency = None; let mut output_sampling_frequency = None; let mut channels = None; let mut bit_depth = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::SamplingFrequency => { sampling_frequency = Some(it.read_f64()?); } ElementType::OutputSamplingFrequency => { output_sampling_frequency = Some(it.read_f64()?); } ElementType::Channels => { channels = Some(it.read_u64()?); } ElementType::BitDepth => { bit_depth = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { sampling_frequency: sampling_frequency.unwrap_or(8000.0), output_sampling_frequency, channels: channels.unwrap_or(1), bit_depth, }) } } #[derive(Debug)] pub(crate) struct SeekHeadElement { pub(crate) seeks: Box<[SeekElement]>, } impl Element for SeekHeadElement { const ID: ElementType = ElementType::SeekHead; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut seeks = Vec::new(); let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::Seek => { seeks.push(it.read_element_data()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { seeks: seeks.into_boxed_slice() }) } } #[derive(Debug)] pub(crate) struct SeekElement { pub(crate) id: u64, pub(crate) position: u64, } impl Element for SeekElement { const ID: ElementType = ElementType::Seek; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut seek_id = None; let mut seek_position = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::SeekId => { seek_id = Some(it.read_u64()?); } ElementType::SeekPosition => { seek_position = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { id: seek_id.ok_or(Error::DecodeError("mkv: missing seek track id"))?, position: seek_position.ok_or(Error::DecodeError("mkv: missing seek track pos"))?, }) } } #[derive(Debug)] pub(crate) struct TracksElement { pub(crate) tracks: Box<[TrackElement]>, } impl Element for TracksElement { const ID: ElementType = ElementType::Tracks; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); Ok(Self { tracks: it.read_elements()? }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct EbmlHeaderElement { pub(crate) version: u64, pub(crate) read_version: u64, pub(crate) max_id_length: u64, pub(crate) max_size_length: u64, pub(crate) doc_type: String, pub(crate) doc_type_version: u64, pub(crate) doc_type_read_version: u64, } impl Element for EbmlHeaderElement { const ID: ElementType = ElementType::Ebml; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut version = None; let mut read_version = None; let mut max_id_length = None; let mut max_size_length = None; let mut doc_type = None; let mut doc_type_version = None; let mut doc_type_read_version = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::EbmlVersion => { version = Some(it.read_u64()?); } ElementType::EbmlReadVersion => { read_version = Some(it.read_u64()?); } ElementType::EbmlMaxIdLength => { max_id_length = Some(it.read_u64()?); } ElementType::EbmlMaxSizeLength => { max_size_length = Some(it.read_u64()?); } ElementType::DocType => { doc_type = Some(it.read_string()?); } ElementType::DocTypeVersion => { doc_type_version = Some(it.read_u64()?); } ElementType::DocTypeReadVersion => { doc_type_read_version = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { version: version.unwrap_or(1), read_version: read_version.unwrap_or(1), max_id_length: max_id_length.unwrap_or(4), max_size_length: max_size_length.unwrap_or(8), doc_type: doc_type.ok_or(Error::Unsupported("mkv: invalid ebml file"))?, doc_type_version: doc_type_version.unwrap_or(1), doc_type_read_version: doc_type_read_version.unwrap_or(1), }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct InfoElement { pub(crate) timestamp_scale: u64, pub(crate) duration: Option, title: Option>, muxing_app: Box, writing_app: Box, } impl Element for InfoElement { const ID: ElementType = ElementType::Info; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut duration = None; let mut timestamp_scale = None; let mut title = None; let mut muxing_app = None; let mut writing_app = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::TimestampScale => { timestamp_scale = Some(it.read_u64()?); } ElementType::Duration => { duration = Some(it.read_f64()?); } ElementType::Title => { title = Some(it.read_string()?); } ElementType::MuxingApp => { muxing_app = Some(it.read_string()?); } ElementType::WritingApp => { writing_app = Some(it.read_string()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { timestamp_scale: timestamp_scale.unwrap_or(1_000_000), duration, title: title.map(|it| it.into_boxed_str()), muxing_app: muxing_app.unwrap_or_default().into_boxed_str(), writing_app: writing_app.unwrap_or_default().into_boxed_str(), }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct CuesElement { pub(crate) points: Box<[CuePointElement]>, } impl Element for CuesElement { const ID: ElementType = ElementType::Cues; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); Ok(Self { points: it.read_elements()? }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct CuePointElement { pub(crate) time: u64, pub(crate) positions: CueTrackPositionsElement, } impl Element for CuePointElement { const ID: ElementType = ElementType::CuePoint; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); let mut time = None; let mut pos = None; while let Some(header) = it.read_header()? { match header.etype { ElementType::CueTime => time = Some(it.read_u64()?), ElementType::CueTrackPositions => { pos = Some(it.read_element_data()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { time: time.ok_or(Error::DecodeError("mkv: missing time in cue"))?, positions: pos.ok_or(Error::DecodeError("mkv: missing positions in cue"))?, }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct CueTrackPositionsElement { pub(crate) track: u64, pub(crate) cluster_position: u64, } impl Element for CueTrackPositionsElement { const ID: ElementType = ElementType::CueTrackPositions; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); let mut track = None; let mut pos = None; while let Some(header) = it.read_header()? { match header.etype { ElementType::CueTrack => { track = Some(it.read_u64()?); } ElementType::CueClusterPosition => { pos = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { track: track.ok_or(Error::DecodeError("mkv: missing track in cue track positions"))?, cluster_position: pos .ok_or(Error::DecodeError("mkv: missing position in cue track positions"))?, }) } } #[derive(Debug)] pub(crate) struct BlockGroupElement { pub(crate) data: Box<[u8]>, pub(crate) duration: Option, } impl Element for BlockGroupElement { const ID: ElementType = ElementType::BlockGroup; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); let mut data = None; let mut block_duration = None; while let Some(header) = it.read_header()? { match header.etype { ElementType::DiscardPadding => { let _nanos = it.read_data()?; } ElementType::Block => { data = Some(it.read_boxed_slice()?); } ElementType::BlockDuration => { block_duration = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { data: data.ok_or(Error::DecodeError("mkv: missing block inside block group"))?, duration: block_duration, }) } } #[derive(Debug)] pub(crate) struct BlockElement { pub(crate) track: u64, pub(crate) timestamp: u64, pub(crate) pos: u64, } #[derive(Debug)] pub(crate) struct ClusterElement { pub(crate) timestamp: u64, pub(crate) pos: u64, pub(crate) end: Option, pub(crate) blocks: Box<[BlockElement]>, } impl Element for ClusterElement { const ID: ElementType = ElementType::Cluster; fn read(reader: &mut B, header: ElementHeader) -> Result { let pos = reader.pos(); let mut timestamp = None; let mut blocks = Vec::new(); let has_size = header.end().is_some(); fn read_block(data: &[u8], timestamp: u64, offset: u64) -> Result { let mut reader = BufReader::new(data); let track = read_unsigned_vint(&mut reader)?; let rel_ts = reader.read_be_u16()? as i16; let timestamp = calc_abs_block_timestamp(timestamp, rel_ts); Ok(BlockElement { track, timestamp, pos: offset }) } fn get_timestamp(timestamp: Option) -> Result { timestamp.ok_or(Error::DecodeError("mkv: missing timestamp for a cluster")) } let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::Timestamp => { timestamp = Some(it.read_u64()?); } ElementType::BlockGroup => { let group = it.read_element_data::()?; blocks.push(read_block(&group.data, get_timestamp(timestamp)?, header.pos)?); } ElementType::SimpleBlock => { let data = it.read_boxed_slice()?; blocks.push(read_block(&data, get_timestamp(timestamp)?, header.pos)?); } _ if header.etype.is_top_level() && !has_size => break, other => { log::debug!("ignored element {:?}", other); } } } Ok(ClusterElement { timestamp: get_timestamp(timestamp)?, blocks: blocks.into_boxed_slice(), pos, end: header.end(), }) } } #[derive(Debug)] pub(crate) struct TagsElement { pub(crate) tags: Box<[TagElement]>, } impl Element for TagsElement { const ID: ElementType = ElementType::Tags; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut tags = Vec::new(); let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::Tag => { tags.push(it.read_element_data::()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { tags: tags.into_boxed_slice() }) } } impl TagsElement { pub(crate) fn to_metadata(&self) -> MetadataRevision { let mut metadata = MetadataBuilder::new(); for tag in self.tags.iter() { for simple_tag in tag.simple_tags.iter() { // TODO: support std_key metadata.add_tag(Tag::new( None, &simple_tag.name, match &simple_tag.value { ElementData::Binary(b) => Value::Binary(b.clone()), ElementData::String(s) => Value::String(s.clone()), _ => unreachable!(), }, )); } } metadata.metadata() } } #[derive(Debug)] pub(crate) struct TagElement { pub(crate) simple_tags: Box<[SimpleTagElement]>, } impl Element for TagElement { const ID: ElementType = ElementType::Tag; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut simple_tags = Vec::new(); let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::SimpleTag => { simple_tags.push(it.read_element_data::()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { simple_tags: simple_tags.into_boxed_slice() }) } } #[derive(Debug)] pub(crate) struct SimpleTagElement { pub(crate) name: Box, pub(crate) value: ElementData, } impl Element for SimpleTagElement { const ID: ElementType = ElementType::SimpleTag; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut name = None; let mut value = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::TagName => { name = Some(it.read_string()?); } ElementType::TagString | ElementType::TagBinary => { value = Some(it.read_data()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { name: name.ok_or(Error::DecodeError("mkv: missing tag name"))?.into_boxed_str(), value: value.ok_or(Error::DecodeError("mkv: missing tag value"))?, }) } }