symphonia-format-mkv-0.5.2/.cargo_vcs_info.json0000644000000001620000000000100151270ustar { "git": { "sha1": "412f44daab39920beeb81d78b0e4271b263d33e9" }, "path_in_vcs": "symphonia-format-mkv" }symphonia-format-mkv-0.5.2/Cargo.toml0000644000000023440000000000100131310ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.53" name = "symphonia-format-mkv" version = "0.5.2" authors = ["Dariusz Niedoba "] description = "Pure Rust MKV/WebM demuxer (a part of project Symphonia)." homepage = "https://github.com/pdeljanov/Symphonia" readme = "README.md" keywords = [ "media", "demuxer", "mkv", "matroska", "webm", ] categories = [ "multimedia", "multimedia::audio", "multimedia::encoding", ] license = "MPL-2.0" repository = "https://github.com/pdeljanov/Symphonia" [dependencies.lazy_static] version = "1.4.0" [dependencies.log] version = "0.4" [dependencies.symphonia-core] version = "0.5.2" [dependencies.symphonia-metadata] version = "0.5.2" [dependencies.symphonia-utils-xiph] version = "0.5.2" symphonia-format-mkv-0.5.2/Cargo.toml.orig000064400000000000000000000014121046102023000166050ustar 00000000000000[package] name = "symphonia-format-mkv" version = "0.5.2" description = "Pure Rust MKV/WebM demuxer (a part of project Symphonia)." homepage = "https://github.com/pdeljanov/Symphonia" repository = "https://github.com/pdeljanov/Symphonia" authors = ["Dariusz Niedoba "] license = "MPL-2.0" readme = "README.md" categories = ["multimedia", "multimedia::audio", "multimedia::encoding"] keywords = ["media", "demuxer", "mkv", "matroska", "webm"] edition = "2018" rust-version = "1.53" [dependencies] log = "0.4" lazy_static = "1.4.0" symphonia-core = { version = "0.5.2", path = "../symphonia-core" } symphonia-metadata = { version = "0.5.2", path = "../symphonia-metadata" } symphonia-utils-xiph = { version = "0.5.2", path = "../symphonia-utils-xiph" }symphonia-format-mkv-0.5.2/README.md000064400000000000000000000012311046102023000151740ustar 00000000000000# Symphonia MKV/WebM Demuxer MKV/WebM demuxer for Project Symphonia. **Note:** This crate is part of Symphonia. Please use the [`symphonia`](https://crates.io/crates/symphonia) crate instead of this one directly. ## License Symphonia is provided under the MPL v2.0 license. Please refer to the LICENSE file for more details. ## Contributing Symphonia is an open-source project and contributions are very welcome! If you would like to make a large contribution, please raise an issue ahead of time to make sure your efforts fit into the project goals, and that no duplication of efforts occurs. All contributors will be credited within the CONTRIBUTORS file. symphonia-format-mkv-0.5.2/src/codecs.rs000064400000000000000000000035771046102023000163310ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use symphonia_core::codecs; use symphonia_core::codecs::CodecType; use crate::segment::TrackElement; pub(crate) fn codec_id_to_type(track: &TrackElement) -> Option { let bit_depth = track.audio.as_ref().and_then(|a| a.bit_depth); match track.codec_id.as_str() { "A_MPEG/L1" => Some(codecs::CODEC_TYPE_MP1), "A_MPEG/L2" => Some(codecs::CODEC_TYPE_MP2), "A_MPEG/L3" => Some(codecs::CODEC_TYPE_MP3), "A_FLAC" => Some(codecs::CODEC_TYPE_FLAC), "A_OPUS" => Some(codecs::CODEC_TYPE_OPUS), "A_VORBIS" => Some(codecs::CODEC_TYPE_VORBIS), "A_AAC/MPEG2/MAIN" | "A_AAC/MPEG2/LC" | "A_AAC/MPEG2/LC/SBR" | "A_AAC/MPEG2/SSR" | "A_AAC/MPEG4/MAIN" | "A_AAC/MPEG4/LC" | "A_AAC/MPEG4/LC/SBR" | "A_AAC/MPEG4/SSR" | "A_AAC/MPEG4/LTP" | "A_AAC" => Some(codecs::CODEC_TYPE_AAC), "A_PCM/INT/BIG" => match bit_depth? { 16 => Some(codecs::CODEC_TYPE_PCM_S16BE), 24 => Some(codecs::CODEC_TYPE_PCM_S24BE), 32 => Some(codecs::CODEC_TYPE_PCM_S32BE), _ => None, }, "A_PCM/INT/LIT" => match bit_depth? { 16 => Some(codecs::CODEC_TYPE_PCM_S16LE), 24 => Some(codecs::CODEC_TYPE_PCM_S24LE), 32 => Some(codecs::CODEC_TYPE_PCM_S32LE), _ => None, }, "A_PCM/FLOAT/IEEE" => match bit_depth? { 32 => Some(codecs::CODEC_TYPE_PCM_F32LE), 64 => Some(codecs::CODEC_TYPE_PCM_F64LE), _ => None, }, _ => { log::info!("unknown codec: {}", &track.codec_id); None } } } symphonia-format-mkv-0.5.2/src/demuxer.rs000064400000000000000000000517651046102023000165440ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::{HashMap, VecDeque}; use std::convert::TryFrom; use std::io::{Seek, SeekFrom}; use symphonia_core::audio::Layout; use symphonia_core::codecs::{CodecParameters, CODEC_TYPE_FLAC, CODEC_TYPE_VORBIS}; use symphonia_core::errors::{ decode_error, end_of_stream_error, seek_error, unsupported_error, Error, Result, SeekErrorKind, }; use symphonia_core::formats::{ Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track, }; use symphonia_core::io::{BufReader, MediaSource, MediaSourceStream, ReadBytes}; use symphonia_core::meta::{Metadata, MetadataLog}; use symphonia_core::probe::Instantiate; use symphonia_core::probe::{Descriptor, QueryDescriptor}; use symphonia_core::sample::SampleFormat; use symphonia_core::support_format; use symphonia_core::units::TimeBase; use symphonia_utils_xiph::flac::metadata::{MetadataBlockHeader, MetadataBlockType}; use crate::codecs::codec_id_to_type; use crate::ebml::{EbmlElement, ElementHeader, ElementIterator}; use crate::element_ids::{ElementType, ELEMENTS}; use crate::lacing::{extract_frames, read_xiph_sizes, Frame}; use crate::segment::{ BlockGroupElement, ClusterElement, CuesElement, InfoElement, SeekHeadElement, TagsElement, TracksElement, }; #[allow(dead_code)] pub struct TrackState { /// Codec parameters. pub(crate) codec_params: CodecParameters, /// The track number. track_num: u32, /// Default frame duration in nanoseconds. pub(crate) default_frame_duration: Option, } /// Matroska (MKV) and WebM demultiplexer. /// /// `MkvReader` implements a demuxer for the Matroska and WebM formats. pub struct MkvReader { /// Iterator over EBML element headers iter: ElementIterator, tracks: Vec, track_states: HashMap, current_cluster: Option, metadata: MetadataLog, cues: Vec, frames: VecDeque, timestamp_scale: u64, clusters: Vec, } #[derive(Debug)] struct ClusterState { timestamp: Option, end: Option, } fn vorbis_extra_data_from_codec_private(extra: &[u8]) -> Result> { const VORBIS_PACKET_TYPE_IDENTIFICATION: u8 = 1; const VORBIS_PACKET_TYPE_SETUP: u8 = 5; // Private Data for this codec has the following layout: // - 1 byte that represents number of packets minus one; // - Xiph coded lengths of packets, length of the last packet must be deduced (as in Xiph lacing) // - packets in order: // - The Vorbis identification header // - Vorbis comment header // - codec setup header let mut reader = BufReader::new(extra); let packet_count = reader.read_byte()? as usize; let packet_lengths = read_xiph_sizes(&mut reader, packet_count)?; let mut packets = Vec::new(); for length in packet_lengths { packets.push(reader.read_boxed_slice_exact(length as usize)?); } let last_packet_length = extra.len() - reader.pos() as usize; packets.push(reader.read_boxed_slice_exact(last_packet_length)?); let mut ident_header = None; let mut setup_header = None; for packet in packets { match packet.first().copied() { Some(VORBIS_PACKET_TYPE_IDENTIFICATION) => { ident_header = Some(packet); } Some(VORBIS_PACKET_TYPE_SETUP) => { setup_header = Some(packet); } _ => { log::debug!("unsupported vorbis packet type"); } } } // This is layout expected currently by Vorbis codec. Ok([ ident_header.ok_or(Error::DecodeError("mkv: missing vorbis identification packet"))?, setup_header.ok_or(Error::DecodeError("mkv: missing vorbis setup packet"))?, ] .concat() .into_boxed_slice()) } fn flac_extra_data_from_codec_private(codec_private: &[u8]) -> Result> { let mut reader = BufReader::new(codec_private); let marker = reader.read_quad_bytes()?; if marker != *b"fLaC" { return decode_error("mkv (flac): missing flac stream marker"); } let header = MetadataBlockHeader::read(&mut reader)?; loop { match header.block_type { MetadataBlockType::StreamInfo => { break Ok(reader.read_boxed_slice_exact(header.block_len as usize)?); } _ => reader.ignore_bytes(u64::from(header.block_len))?, } } } impl MkvReader { fn seek_track_by_ts_forward(&mut self, track_id: u32, ts: u64) -> Result { let actual_ts = 'out: loop { // Skip frames from the buffer until the given timestamp while let Some(frame) = self.frames.front() { if frame.timestamp + frame.duration >= ts && frame.track == track_id { break 'out frame.timestamp; } else { self.frames.pop_front(); } } self.next_element()? }; Ok(SeekedTo { track_id, required_ts: ts, actual_ts }) } fn seek_track_by_ts(&mut self, track_id: u32, ts: u64) -> Result { if self.clusters.is_empty() { self.seek_track_by_ts_forward(track_id, ts) } else { let mut target_cluster = None; for cluster in &self.clusters { if cluster.timestamp > ts { break; } target_cluster = Some(cluster); } let cluster = target_cluster.ok_or(Error::SeekError(SeekErrorKind::OutOfRange))?; let mut target_block = None; for block in cluster.blocks.iter() { if block.track as u32 != track_id { continue; } if block.timestamp > ts { break; } target_block = Some(block); } let pos = match target_block { Some(block) => block.pos, None => cluster.pos, }; self.iter.seek(pos)?; // Restore cluster's metadata self.current_cluster = Some(ClusterState { timestamp: Some(cluster.timestamp), end: cluster.end }); // Seek to a specified block inside the cluster. self.seek_track_by_ts_forward(track_id, ts) } } fn next_element(&mut self) -> Result<()> { if let Some(ClusterState { end: Some(end), .. }) = &self.current_cluster { // Make sure we don't read past the current cluster if its size is known. if self.iter.pos() >= *end { log::debug!("ended cluster"); self.current_cluster = None; } } // Each Cluster is being read incrementally so we need to keep track of // which cluster we are currently in. let header = match self.iter.read_child_header()? { Some(header) => header, None => { // If we reached here, it must be an end of stream. return end_of_stream_error(); } }; match header.etype { ElementType::Cluster => { self.current_cluster = Some(ClusterState { timestamp: None, end: header.end() }); } ElementType::Timestamp => match self.current_cluster.as_mut() { Some(cluster) => { cluster.timestamp = Some(self.iter.read_u64()?); } None => { self.iter.ignore_data()?; log::warn!("timestamp element outside of a cluster"); return Ok(()); } }, ElementType::SimpleBlock => { let cluster_ts = match self.current_cluster.as_ref() { Some(ClusterState { timestamp: Some(ts), .. }) => *ts, Some(_) => { self.iter.ignore_data()?; log::warn!("missing cluster timestamp"); return Ok(()); } None => { self.iter.ignore_data()?; log::warn!("simple block element outside of a cluster"); return Ok(()); } }; let data = self.iter.read_boxed_slice()?; extract_frames( &data, None, &self.track_states, cluster_ts, self.timestamp_scale, &mut self.frames, )?; } ElementType::BlockGroup => { let cluster_ts = match self.current_cluster.as_ref() { Some(ClusterState { timestamp: Some(ts), .. }) => *ts, Some(_) => { self.iter.ignore_data()?; log::warn!("missing cluster timestamp"); return Ok(()); } None => { self.iter.ignore_data()?; log::warn!("block group element outside of a cluster"); return Ok(()); } }; let group = self.iter.read_element_data::()?; extract_frames( &group.data, group.duration, &self.track_states, cluster_ts, self.timestamp_scale, &mut self.frames, )?; } ElementType::Tags => { let tags = self.iter.read_element_data::()?; self.metadata.push(tags.to_metadata()); self.current_cluster = None; } _ if header.etype.is_top_level() => { self.current_cluster = None; } other => { log::debug!("ignored element {:?}", other); self.iter.ignore_data()?; } } Ok(()) } } impl FormatReader for MkvReader { fn try_new(mut reader: MediaSourceStream, _options: &FormatOptions) -> Result where Self: Sized, { let is_seekable = reader.is_seekable(); // Get the total length of the stream, if possible. let total_len = if is_seekable { let pos = reader.pos(); let len = reader.seek(SeekFrom::End(0))?; reader.seek(SeekFrom::Start(pos))?; log::info!("stream is seekable with len={} bytes.", len); Some(len) } else { None }; let mut it = ElementIterator::new(reader, total_len); let ebml = it.read_element::()?; if !matches!(ebml.header.doc_type.as_str(), "matroska" | "webm") { return unsupported_error("mkv: not a matroska / webm file"); } let segment_pos = match it.read_child_header()? { Some(ElementHeader { etype: ElementType::Segment, data_pos, .. }) => data_pos, _ => return unsupported_error("mkv: missing segment element"), }; let mut segment_tracks = None; let mut info = None; let mut clusters = Vec::new(); let mut metadata = MetadataLog::default(); let mut current_cluster = None; let mut seek_positions = Vec::new(); while let Ok(Some(header)) = it.read_child_header() { match header.etype { ElementType::SeekHead => { let seek_head = it.read_element_data::()?; for element in seek_head.seeks.into_vec() { let tag = element.id as u32; let etype = match ELEMENTS.get(&tag) { Some((_, etype)) => *etype, None => continue, }; seek_positions.push((etype, segment_pos + element.position)); } } ElementType::Tracks => { segment_tracks = Some(it.read_element_data::()?); } ElementType::Info => { info = Some(it.read_element_data::()?); } ElementType::Cues => { let cues = it.read_element_data::()?; for cue in cues.points.into_vec() { clusters.push(ClusterElement { timestamp: cue.time, pos: segment_pos + cue.positions.cluster_position, end: None, blocks: Box::new([]), }); } } ElementType::Tags => { let tags = it.read_element_data::()?; metadata.push(tags.to_metadata()); } ElementType::Cluster => { // Set state for current cluster for the first call of `next_element`. current_cluster = Some(ClusterState { timestamp: None, end: header.end() }); // Don't look forward into the stream since // we can't be sure that we'll find anything useful. break; } other => { it.ignore_data()?; log::debug!("ignored element {:?}", other); } } } if is_seekable { // Make sure we don't jump backwards unnecessarily. seek_positions.sort_by_key(|sp| sp.1); for (etype, pos) in seek_positions { it.seek(pos)?; match etype { ElementType::Tracks => { segment_tracks = Some(it.read_element::()?); } ElementType::Info => { info = Some(it.read_element::()?); } ElementType::Tags => { let tags = it.read_element::()?; metadata.push(tags.to_metadata()); } ElementType::Cues => { let cues = it.read_element::()?; for cue in cues.points.into_vec() { clusters.push(ClusterElement { timestamp: cue.time, pos: segment_pos + cue.positions.cluster_position, end: None, blocks: Box::new([]), }); } } _ => (), } } } let segment_tracks = segment_tracks.ok_or(Error::DecodeError("mkv: missing Tracks element"))?; if is_seekable { let mut reader = it.into_inner(); reader.seek(SeekFrom::Start(segment_pos))?; it = ElementIterator::new(reader, total_len); } let info = info.ok_or(Error::DecodeError("mkv: missing Info element"))?; // TODO: remove this unwrap? let time_base = TimeBase::new(u32::try_from(info.timestamp_scale).unwrap(), 1_000_000_000); let mut tracks = Vec::new(); let mut states = HashMap::new(); for track in segment_tracks.tracks.into_vec() { let codec_type = codec_id_to_type(&track); let mut codec_params = CodecParameters::new(); codec_params.with_time_base(time_base); if let Some(duration) = info.duration { codec_params.with_n_frames(duration as u64); } if let Some(audio) = track.audio { codec_params.with_sample_rate(audio.sampling_frequency.round() as u32); let format = audio.bit_depth.and_then(|bits| match bits { 8 => Some(SampleFormat::S8), 16 => Some(SampleFormat::S16), 24 => Some(SampleFormat::S24), 32 => Some(SampleFormat::S32), _ => None, }); if let Some(format) = format { codec_params.with_sample_format(format); } if let Some(bits) = audio.bit_depth { codec_params.with_bits_per_sample(bits as u32); } let layout = match audio.channels { 1 => Some(Layout::Mono), 2 => Some(Layout::Stereo), 3 => Some(Layout::TwoPointOne), 6 => Some(Layout::FivePointOne), other => { log::warn!( "track #{} has custom number of channels: {}", track.number, other ); None } }; if let Some(layout) = layout { codec_params.with_channel_layout(layout); } if let Some(codec_type) = codec_type { codec_params.for_codec(codec_type); if let Some(codec_private) = track.codec_private { let extra_data = match codec_type { CODEC_TYPE_VORBIS => { vorbis_extra_data_from_codec_private(&codec_private)? } CODEC_TYPE_FLAC => flac_extra_data_from_codec_private(&codec_private)?, _ => codec_private, }; codec_params.with_extra_data(extra_data); } } } let track_id = track.number as u32; tracks.push(Track { id: track_id, codec_params: codec_params.clone(), language: track.language, }); states.insert( track_id, TrackState { codec_params, track_num: track_id, default_frame_duration: track.default_duration, }, ); } Ok(Self { iter: it, tracks, track_states: states, current_cluster, metadata, cues: Vec::new(), frames: VecDeque::new(), timestamp_scale: info.timestamp_scale, clusters, }) } fn cues(&self) -> &[Cue] { &self.cues } fn metadata(&mut self) -> Metadata<'_> { self.metadata.metadata() } fn seek(&mut self, _mode: SeekMode, to: SeekTo) -> Result { if self.tracks.is_empty() { return seek_error(SeekErrorKind::Unseekable); } match to { SeekTo::Time { time, track_id } => { let track = match track_id { Some(id) => self.tracks.iter().find(|track| track.id == id), None => self.tracks.first(), }; let track = track.ok_or(Error::SeekError(SeekErrorKind::InvalidTrack))?; let tb = track.codec_params.time_base.unwrap(); let ts = tb.calc_timestamp(time); let track_id = track.id; self.seek_track_by_ts(track_id, ts) } SeekTo::TimeStamp { ts, track_id } => { match self.tracks.iter().find(|t| t.id == track_id) { Some(_) => self.seek_track_by_ts(track_id, ts), None => seek_error(SeekErrorKind::InvalidTrack), } } } } fn tracks(&self) -> &[Track] { &self.tracks } fn next_packet(&mut self) -> Result { loop { if let Some(frame) = self.frames.pop_front() { return Ok(Packet::new_from_boxed_slice( frame.track, frame.timestamp, frame.duration, frame.data, )); } self.next_element()?; } } fn into_inner(self: Box) -> MediaSourceStream { self.iter.into_inner() } } impl QueryDescriptor for MkvReader { fn query() -> &'static [Descriptor] { &[support_format!( "matroska", "Matroska / WebM", &["webm", "mkv"], &["video/webm", "video/x-matroska"], &[b"\x1A\x45\xDF\xA3"] // Top-level element Ebml element )] } fn score(_context: &[u8]) -> u8 { 255 } } symphonia-format-mkv-0.5.2/src/ebml.rs000064400000000000000000000462071046102023000160050ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::io::SeekFrom; use symphonia_core::errors::{decode_error, seek_error, Error, Result, SeekErrorKind}; use symphonia_core::io::{MediaSource, ReadBytes}; use symphonia_core::util::bits::sign_extend_leq64_to_i64; use crate::element_ids::{ElementType, Type, ELEMENTS}; use crate::segment::EbmlHeaderElement; /// Reads a single EBML element ID (as in RFC8794) from the stream /// and returns its value, length in bytes (1-4 bytes) /// and a flag indicating whether any data was ignored, or an error. #[allow(clippy::never_loop)] pub(crate) fn read_tag(mut reader: R) -> Result<(u32, u32, bool)> { // Try to read a tag at current reader position. loop { let byte = reader.read_byte()?; let remaining_octets = byte.leading_zeros(); if remaining_octets > 3 { // First byte should be ignored since we know it could not start a tag. // We immediately proceed to seek a first valid tag. break; } // Read remaining octets let mut vint = u32::from(byte); for _ in 0..remaining_octets { let byte = reader.read_byte()?; vint = (vint << 8) | u32::from(byte); } log::debug!("element with tag: {:X}", vint); return Ok((vint, remaining_octets + 1, false)); } // Seek to next supported tag of a top level element (`Cluster`, `Info`, etc.) let mut tag = 0u32; loop { let ty = ELEMENTS.get(&tag).map(|(_, ty)| ty).filter(|ty| ty.is_top_level()); if let Some(ty) = ty { log::info!("found next supported tag {:08X} ({:?})", tag, ty); return Ok((tag, 4, true)); } tag = (tag << 8) | u32::from(reader.read_u8()?); } } pub(crate) fn read_size(reader: R) -> Result> { let (size, len) = read_vint(reader)?; if size == u64::MAX && len == 1 { return Ok(None); } Ok(Some(size)) } /// Reads a single unsigned variable size integer (as in RFC8794) from the stream /// and returns it or an error. pub(crate) fn read_unsigned_vint(reader: R) -> Result { Ok(read_vint(reader)?.0) } /// Reads a single signed variable size integer (as in RFC8794) from the stream /// and returns it or an error. pub(crate) fn read_signed_vint(mut reader: R) -> Result { let (value, len) = read_vint(&mut reader)?; // Convert to a signed integer by range shifting. let half_range = i64::pow(2, (len * 7) - 1) - 1; Ok(value as i64 - half_range) } /// Reads a single unsigned variable size integer (as in RFC8794) from the stream /// and returns both its value and length in octects, or an error. fn read_vint(mut reader: R) -> Result<(u64, u32)> { let byte = reader.read_byte()?; if byte == 0xFF { // Special case: unknown size elements. return Ok((u64::MAX, 1)); } let vint_width = byte.leading_zeros(); let mut vint = u64::from(byte); // Clear VINT_MARKER bit vint ^= 1 << (7 - vint_width); // Read remaining octets for _ in 0..vint_width { let byte = reader.read_byte()?; vint = (vint << 8) | u64::from(byte); } Ok((vint, vint_width + 1)) } #[cfg(test)] mod tests { use symphonia_core::io::BufReader; use super::{read_signed_vint, read_tag, read_unsigned_vint}; #[test] fn element_tag_parsing() { assert_eq!(read_tag(BufReader::new(&[0x82])).unwrap(), (0x82, 1, false)); assert_eq!(read_tag(BufReader::new(&[0x40, 0x02])).unwrap(), (0x4002, 2, false)); assert_eq!(read_tag(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), (0x200002, 3, false)); assert_eq!( read_tag(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), (0x10000002, 4, false) ); } #[test] fn variable_unsigned_integer_parsing() { assert_eq!(read_unsigned_vint(BufReader::new(&[0x82])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x40, 0x02])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), 2); assert_eq!(read_unsigned_vint(BufReader::new(&[0x08, 0x00, 0x00, 0x00, 0x02])).unwrap(), 2); assert_eq!( read_unsigned_vint(BufReader::new(&[0x04, 0x00, 0x00, 0x00, 0x00, 0x02])).unwrap(), 2 ); assert_eq!( read_unsigned_vint(BufReader::new(&[0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02])) .unwrap(), 2 ); assert_eq!( read_unsigned_vint(BufReader::new(&[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02])) .unwrap(), 2 ); } #[test] fn variable_signed_integer_parsing() { assert_eq!(read_signed_vint(BufReader::new(&[0x80])).unwrap(), -63); assert_eq!(read_signed_vint(BufReader::new(&[0x40, 0x00])).unwrap(), -8191); } } #[derive(Copy, Clone, Debug)] pub struct ElementHeader { /// The element tag. pub tag: u32, /// The element type. pub etype: ElementType, /// The element's offset in the stream. pub pos: u64, /// The total size of the element including the header. pub len: u64, /// The element's data offset in the stream. pub data_pos: u64, /// The size of the payload data. pub data_len: u64, } impl ElementHeader { /// Returns an iterator over child elements of the current element. pub(crate) fn children(&self, reader: R) -> ElementIterator { assert_eq!(reader.pos(), self.data_pos, "unexpected position"); ElementIterator::new_of(reader, *self) } pub(crate) fn end(&self) -> Option { if self.data_len == 0 { None } else { Some(self.data_pos + self.data_len) } } } pub trait Element: Sized { const ID: ElementType; fn read(reader: &mut B, header: ElementHeader) -> Result; } impl ElementHeader { /// Reads a single EBML element header from the stream. pub(crate) fn read(mut reader: &mut R) -> Result<(ElementHeader, bool)> { let (tag, tag_len, reset) = read_tag(&mut reader)?; let header_start = reader.pos() - u64::from(tag_len); // According to spec, elements like Segment and Cluster can have unknown size. // Currently, these cases are represented as `data_len` equal to 0, // but it might be worth changing it to an Option at some point. let size = read_size(&mut reader)?.unwrap_or(0); Ok(( ElementHeader { tag, etype: ELEMENTS.get(&tag).map_or(ElementType::Unknown, |(_, etype)| *etype), pos: header_start, len: reader.pos() - header_start + size, data_len: size, data_pos: reader.pos(), }, reset, )) } } #[derive(Debug)] pub(crate) struct EbmlElement { pub(crate) header: EbmlHeaderElement, } impl Element for EbmlElement { const ID: ElementType = ElementType::Ebml; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); Ok(Self { header: it.read_element_data::()? }) } } pub(crate) struct ElementIterator { /// Reader of the stream containing this element. reader: R, /// Store current element header (for sanity check purposes). current: Option, /// Position of the next element header that would be read. next_pos: u64, /// Position immediately past last byte of this element. end: Option, } impl ElementIterator { /// Creates a new iterator over elements starting from the current stream position. pub(crate) fn new(reader: R, end: Option) -> Self { let pos = reader.pos(); Self::new_at(reader, pos, end) } /// Creates a new iterator over elements starting from the given stream position. fn new_at(reader: R, start: u64, end: Option) -> Self { Self { reader, current: None, next_pos: start, end } } /// Creates a new iterator over children of the given parent element. fn new_of(reader: R, parent: ElementHeader) -> Self { Self { reader, current: Some(parent), next_pos: parent.data_pos, end: parent.end() } } /// Seek to a specified offset inside of the stream. pub(crate) fn seek(&mut self, pos: u64) -> Result<()> where R: MediaSource, { let current_pos = self.pos(); self.current = None; if self.reader.is_seekable() { self.reader.seek(SeekFrom::Start(pos))?; } else if pos < current_pos { return seek_error(SeekErrorKind::ForwardOnly); } else { self.reader.ignore_bytes(pos - current_pos)?; } self.next_pos = pos; Ok(()) } /// Consumes this iterator and return the original stream. pub(crate) fn into_inner(self) -> R { self.reader } /// Reads a single element header and moves to its next sibling by ignoring all the children. pub(crate) fn read_header(&mut self) -> Result> { let header = self.read_header_no_consume()?; if let Some(header) = &header { // Move to next sibling. self.next_pos += header.len; } Ok(header) } /// Reads a single element header and shifts the stream to element's child /// if it'a a master element or to next sibling otherwise. pub(crate) fn read_child_header(&mut self) -> Result> { let header = self.read_header_no_consume()?; if let Some(header) = &header { match ELEMENTS.get(&header.tag).map(|it| it.0) { Some(Type::Master) => { // Move to start of a child element. self.next_pos = header.data_pos; } _ => { // Move to next sibling. self.next_pos += header.len; } } } Ok(header) } /// Reads element header at the current stream position /// without moving to the end of the parent element. /// Returns [None] if the current element has no more children or reached end of the stream. fn read_header_no_consume(&mut self) -> Result> { let pos = self.reader.pos(); if pos < self.next_pos { // Ignore bytes that were not read self.reader.ignore_bytes(self.next_pos - pos)?; } assert_eq!(self.next_pos, self.reader.pos(), "invalid position"); if self.reader.pos() < self.end.unwrap_or(u64::MAX) { let (header, reset) = ElementHeader::read(&mut self.reader)?; if reset { // After finding a new top-level element in a broken stream // it is necessary to update `next_pos` so it refers to a position // of a child header. self.next_pos = self.reader.pos(); } self.current = Some(header); return Ok(Some(header)); } Ok(None) } /// Reads a single element with its data. pub(crate) fn read_element(&mut self) -> Result { let _header = self.read_header()?; self.read_element_data() } /// Reads data of current element. Must be used after /// [Self::read_header] or [Self::read_child_header]. pub(crate) fn read_element_data(&mut self) -> Result { let header = self.current.expect("EBML header must be read before calling this function"); assert_eq!( header.etype, E::ID, "EBML element type must be checked before calling this function" ); let element = E::read(&mut self.reader, header)?; // Update position to match the position element reader finished at self.next_pos = self.reader.pos(); Ok(element) } /// Reads a collection of element with the given type. pub(crate) fn read_elements(&mut self) -> Result> { let mut elements = vec![]; while let Some(header) = self.read_header()? { if header.etype == ElementType::Crc32 { // TODO: ignore crc for now continue; } if header.etype != E::ID { log::warn!("found element with invalid type {:?}", header); self.ignore_data()?; continue; } elements.push(E::read(&mut self.reader, header)?); } Ok(elements.into_boxed_slice()) } /// Reads any primitive data inside of the current element. pub(crate) fn read_data(&mut self) -> Result { let hdr = self.current.expect("not in an element"); let value = self .try_read_data(hdr)? .ok_or(Error::DecodeError("mkv: element has no primitive data"))?; Ok(value) } /// Reads data of the current element as an unsigned integer. pub(crate) fn read_u64(&mut self) -> Result { match self.read_data()? { ElementData::UnsignedInt(s) => Ok(s), _ => Err(Error::DecodeError("mkv: expected an unsigned int")), } } /// Reads data of the current element as a floating-point number. pub(crate) fn read_f64(&mut self) -> Result { match self.read_data()? { ElementData::Float(s) => Ok(s), _ => Err(Error::DecodeError("mkv: expected a float")), } } /// Reads data of the current element as a string. pub(crate) fn read_string(&mut self) -> Result { match self.read_data()? { ElementData::String(s) => Ok(s), _ => Err(Error::DecodeError("mkv: expected a string")), } } /// Reads binary data of the current element as boxed slice. pub(crate) fn read_boxed_slice(&mut self) -> Result> { match self.read_data()? { ElementData::Binary(b) => Ok(b), _ => Err(Error::DecodeError("mkv: expected binary data")), } } /// Reads any primitive data of the current element. It returns [None] /// if the it is a master element. pub(crate) fn try_read_data(&mut self, header: ElementHeader) -> Result> { Ok(match ELEMENTS.get(&header.tag) { Some((ty, _)) => { // Position must always be valid, because this function is called // after reading the element header. assert_eq!(header.data_pos, self.reader.pos(), "invalid stream position"); if let (Some(cur), Some(end)) = (self.current, self.end) { if cur.pos + cur.len > end { log::debug!("reading element data {:?}; parent end={}", cur, end); return decode_error( "mkv: attempt to read element data past master element ", ); } } Some(match ty { Type::Master => { return Ok(None); } Type::Unsigned => { if header.data_len > 8 { self.ignore_data()?; return decode_error("mkv: invalid unsigned integer length"); } let mut buff = [0u8; 8]; let offset = 8 - header.data_len as usize; self.reader.read_buf_exact(&mut buff[offset..])?; let value = u64::from_be_bytes(buff); ElementData::UnsignedInt(value) } Type::Signed | Type::Date => { if header.data_len > 8 { self.ignore_data()?; return decode_error("mkv: invalid signed integer length"); } let len = header.data_len as usize; let mut buff = [0u8; 8]; self.reader.read_buf_exact(&mut buff[8 - len..])?; let value = u64::from_be_bytes(buff); let value = sign_extend_leq64_to_i64(value, (len as u32) * 8); match ty { Type::Signed => ElementData::SignedInt(value), Type::Date => ElementData::Date(value), _ => unreachable!(), } } Type::Float => { let value = match header.data_len { 0 => 0.0, 4 => self.reader.read_be_f32()? as f64, 8 => self.reader.read_be_f64()?, _ => { self.ignore_data()?; return Err(Error::DecodeError("mkv: invalid float length")); } }; ElementData::Float(value) } Type::String => { let data = self.reader.read_boxed_slice_exact(header.data_len as usize)?; let bytes = data.split(|b| *b == 0).next().unwrap_or(&data); ElementData::String(String::from_utf8_lossy(bytes).into_owned()) } Type::Binary => ElementData::Binary( self.reader.read_boxed_slice_exact(header.data_len as usize)?, ), }) } None => None, }) } /// Ignores content of the current element. It can be used after calling /// [Self::read_child_header] to ignore children of a master element. pub(crate) fn ignore_data(&mut self) -> Result<()> { if let Some(header) = self.current { log::debug!("ignoring data of {:?} element", header.etype); self.reader.ignore_bytes(header.data_len)?; self.next_pos = header.data_pos + header.data_len; } Ok(()) } /// Gets the position of the underlying stream. pub(crate) fn pos(&self) -> u64 { self.reader.pos() } } /// An EBML element data. #[derive(Clone, Debug)] pub(crate) enum ElementData { /// A binary buffer. Binary(Box<[u8]>), /// A floating point number. Float(f64), /// A signed integer. SignedInt(i64), /// A string. String(String), /// An unsigned integer. UnsignedInt(u64), /// A point in time referenced in nanoseconds from the precise beginning /// of the third millennium of the Gregorian Calendar in Coordinated Universal Time /// (also known as 2001-01-01T00:00:00.000000000 UTC). Date(i64), } symphonia-format-mkv-0.5.2/src/element_ids.rs000064400000000000000000000340611046102023000173510ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::HashMap; use lazy_static::lazy_static; #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub(crate) enum Type { Master, Unsigned, Signed, Binary, String, Float, Date, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ElementType { Ebml, EbmlVersion, EbmlReadVersion, EbmlMaxIdLength, EbmlMaxSizeLength, DocType, DocTypeVersion, DocTypeReadVersion, Crc32, Void, Segment, SeekHead, Seek, SeekId, SeekPosition, Info, TimestampScale, Duration, DateUtc, Title, MuxingApp, WritingApp, Cluster, Timestamp, PrevSize, SimpleBlock, BlockGroup, Block, BlockAdditions, BlockMore, BlockAddId, BlockAdditional, BlockDuration, ReferenceBlock, DiscardPadding, Tracks, TrackEntry, TrackNumber, TrackUid, TrackType, FlagEnabled, FlagDefault, FlagForced, FlagHearingImpaired, FlagVisualImpaired, FlagTextDescriptions, FlagOriginal, FlagCommentary, FlagLacing, DefaultDuration, Name, Language, CodecId, CodecPrivate, CodecName, CodecDelay, SeekPreRoll, Video, FlagInterlaced, StereoMode, AlphaMode, PixelWidth, PixelHeight, PixelCropBottom, PixelCropTop, PixelCropLeft, PixelCropRight, DisplayWidth, DisplayHeight, DisplayUnit, AspectRatioType, Audio, SamplingFrequency, OutputSamplingFrequency, Channels, BitDepth, ContentEncodings, ContentEncoding, ContentEncodingOrder, ContentEncodingScope, ContentEncodingType, ContentEncryption, ContentEncAlgo, ContentEncKeyId, ContentEncAesSettings, AesSettingsCipherMode, Colour, MatrixCoefficients, BitsPerChannel, ChromaSubsamplingHorz, ChromaSubsamplingVert, CbSubsamplingHorz, CbSubsamplingVert, ChromaSitingHorz, ChromaSitingVert, Range, TransferCharacteristics, Primaries, MaxCll, MaxFall, MasteringMetadata, PrimaryRChromaticityX, PrimaryRChromaticityY, PrimaryGChromaticityX, PrimaryGChromaticityY, PrimaryBChromaticityX, PrimaryBChromaticityY, WhitePointChromaticityX, WhitePointChromaticityY, LuminanceMax, LuminanceMin, Cues, CuePoint, CueTime, CueTrackPositions, CueTrack, CueClusterPosition, CueRelativePosition, CueDuration, CueBlockNumber, Chapters, EditionEntry, ChapterAtom, ChapterUid, ChapterStringUid, ChapterTimeStart, ChapterTimeEnd, ChapterDisplay, ChapString, ChapLanguage, ChapLanguageIetf, ChapCountry, Tags, Tag, Targets, TargetTypeValue, TargetType, TagTrackUid, SimpleTag, TagName, TagLanguage, TagDefault, TagString, TagBinary, /// Special type for unknown tags. Unknown, } impl ElementType { pub(crate) fn is_top_level(&self) -> bool { matches!( self, ElementType::Cluster | ElementType::Cues | ElementType::Info | ElementType::SeekHead | ElementType::Tags | ElementType::Tracks ) } } lazy_static! { pub(crate) static ref ELEMENTS: HashMap = { let mut elems = HashMap::new(); elems.insert(0x1A45DFA3, (Type::Master, ElementType::Ebml)); elems.insert(0x4286, (Type::Unsigned, ElementType::EbmlVersion)); elems.insert(0x42F7, (Type::Unsigned, ElementType::EbmlReadVersion)); elems.insert(0x42F2, (Type::Unsigned, ElementType::EbmlMaxIdLength)); elems.insert(0x42F3, (Type::Unsigned, ElementType::EbmlMaxSizeLength)); elems.insert(0x4282, (Type::String, ElementType::DocType)); elems.insert(0x4287, (Type::Unsigned, ElementType::DocTypeVersion)); elems.insert(0x4285, (Type::Unsigned, ElementType::DocTypeReadVersion)); elems.insert(0xBF, (Type::Binary, ElementType::Crc32)); elems.insert(0xEC, (Type::Binary, ElementType::Void)); elems.insert(0x18538067, (Type::Master, ElementType::Segment)); elems.insert(0x114D9B74, (Type::Master, ElementType::SeekHead)); elems.insert(0x4DBB, (Type::Master, ElementType::Seek)); elems.insert(0x53AB, (Type::Unsigned, ElementType::SeekId)); elems.insert(0x53AC, (Type::Unsigned, ElementType::SeekPosition)); elems.insert(0x1549A966, (Type::Master, ElementType::Info)); elems.insert(0x2AD7B1, (Type::Unsigned, ElementType::TimestampScale)); elems.insert(0x4489, (Type::Float, ElementType::Duration)); elems.insert(0x4461, (Type::Date, ElementType::DateUtc)); elems.insert(0x7BA9, (Type::String, ElementType::Title)); elems.insert(0x4D80, (Type::String, ElementType::MuxingApp)); elems.insert(0x5741, (Type::String, ElementType::WritingApp)); elems.insert(0x1F43B675, (Type::Master, ElementType::Cluster)); elems.insert(0xE7, (Type::Unsigned, ElementType::Timestamp)); elems.insert(0xAB, (Type::Unsigned, ElementType::PrevSize)); elems.insert(0xA3, (Type::Binary, ElementType::SimpleBlock)); elems.insert(0xA0, (Type::Master, ElementType::BlockGroup)); elems.insert(0xA1, (Type::Binary, ElementType::Block)); elems.insert(0x75A1, (Type::Master, ElementType::BlockAdditions)); elems.insert(0xA6, (Type::Master, ElementType::BlockMore)); elems.insert(0xEE, (Type::Unsigned, ElementType::BlockAddId)); elems.insert(0xA5, (Type::Binary, ElementType::BlockAdditional)); elems.insert(0x9B, (Type::Unsigned, ElementType::BlockDuration)); elems.insert(0xFB, (Type::Signed, ElementType::ReferenceBlock)); elems.insert(0x75A2, (Type::Signed, ElementType::DiscardPadding)); elems.insert(0x1654AE6B, (Type::Master, ElementType::Tracks)); elems.insert(0xAE, (Type::Master, ElementType::TrackEntry)); elems.insert(0xD7, (Type::Unsigned, ElementType::TrackNumber)); elems.insert(0x73C5, (Type::Unsigned, ElementType::TrackUid)); elems.insert(0x83, (Type::Unsigned, ElementType::TrackType)); elems.insert(0xB9, (Type::Unsigned, ElementType::FlagEnabled)); elems.insert(0x88, (Type::Unsigned, ElementType::FlagDefault)); elems.insert(0x55AA, (Type::Unsigned, ElementType::FlagForced)); elems.insert(0x55AB, (Type::Unsigned, ElementType::FlagHearingImpaired)); elems.insert(0x55AC, (Type::Unsigned, ElementType::FlagVisualImpaired)); elems.insert(0x55AD, (Type::Unsigned, ElementType::FlagTextDescriptions)); elems.insert(0x55AE, (Type::Unsigned, ElementType::FlagOriginal)); elems.insert(0x55AF, (Type::Unsigned, ElementType::FlagCommentary)); elems.insert(0x9C, (Type::Unsigned, ElementType::FlagLacing)); elems.insert(0x23E383, (Type::Unsigned, ElementType::DefaultDuration)); elems.insert(0x536E, (Type::String, ElementType::Name)); elems.insert(0x22B59C, (Type::String, ElementType::Language)); elems.insert(0x86, (Type::String, ElementType::CodecId)); elems.insert(0x63A2, (Type::Binary, ElementType::CodecPrivate)); elems.insert(0x258688, (Type::String, ElementType::CodecName)); elems.insert(0x56AA, (Type::Unsigned, ElementType::CodecDelay)); elems.insert(0x56BB, (Type::Unsigned, ElementType::SeekPreRoll)); elems.insert(0xE0, (Type::Master, ElementType::Video)); elems.insert(0x9A, (Type::Unsigned, ElementType::FlagInterlaced)); elems.insert(0x53B8, (Type::Unsigned, ElementType::StereoMode)); elems.insert(0x53C0, (Type::Unsigned, ElementType::AlphaMode)); elems.insert(0xB0, (Type::Unsigned, ElementType::PixelWidth)); elems.insert(0xBA, (Type::Unsigned, ElementType::PixelHeight)); elems.insert(0x54AA, (Type::Unsigned, ElementType::PixelCropBottom)); elems.insert(0x54BB, (Type::Unsigned, ElementType::PixelCropTop)); elems.insert(0x54CC, (Type::Unsigned, ElementType::PixelCropLeft)); elems.insert(0x54DD, (Type::Unsigned, ElementType::PixelCropRight)); elems.insert(0x54B0, (Type::Unsigned, ElementType::DisplayWidth)); elems.insert(0x54BA, (Type::Unsigned, ElementType::DisplayHeight)); elems.insert(0x54B2, (Type::Unsigned, ElementType::DisplayUnit)); elems.insert(0x54B3, (Type::Unsigned, ElementType::AspectRatioType)); elems.insert(0xE1, (Type::Master, ElementType::Audio)); elems.insert(0xB5, (Type::Float, ElementType::SamplingFrequency)); elems.insert(0x78B5, (Type::Float, ElementType::OutputSamplingFrequency)); elems.insert(0x9F, (Type::Unsigned, ElementType::Channels)); elems.insert(0x6264, (Type::Unsigned, ElementType::BitDepth)); elems.insert(0x6D80, (Type::Master, ElementType::ContentEncodings)); elems.insert(0x6240, (Type::Master, ElementType::ContentEncoding)); elems.insert(0x5031, (Type::Unsigned, ElementType::ContentEncodingOrder)); elems.insert(0x5032, (Type::Unsigned, ElementType::ContentEncodingScope)); elems.insert(0x5033, (Type::Unsigned, ElementType::ContentEncodingType)); elems.insert(0x5035, (Type::Master, ElementType::ContentEncryption)); elems.insert(0x47E1, (Type::Unsigned, ElementType::ContentEncAlgo)); elems.insert(0x47E2, (Type::Unsigned, ElementType::ContentEncKeyId)); elems.insert(0x47E7, (Type::Master, ElementType::ContentEncAesSettings)); elems.insert(0x47E8, (Type::Unsigned, ElementType::AesSettingsCipherMode)); elems.insert(0x55B0, (Type::Master, ElementType::Colour)); elems.insert(0x55B1, (Type::Unsigned, ElementType::MatrixCoefficients)); elems.insert(0x55B2, (Type::Unsigned, ElementType::BitsPerChannel)); elems.insert(0x55B3, (Type::Unsigned, ElementType::ChromaSubsamplingHorz)); elems.insert(0x55B4, (Type::Unsigned, ElementType::ChromaSubsamplingVert)); elems.insert(0x55B5, (Type::Unsigned, ElementType::CbSubsamplingHorz)); elems.insert(0x55B6, (Type::Unsigned, ElementType::CbSubsamplingVert)); elems.insert(0x55B7, (Type::Unsigned, ElementType::ChromaSitingHorz)); elems.insert(0x55B8, (Type::Unsigned, ElementType::ChromaSitingVert)); elems.insert(0x55B9, (Type::Unsigned, ElementType::Range)); elems.insert(0x55BA, (Type::Unsigned, ElementType::TransferCharacteristics)); elems.insert(0x55BB, (Type::Unsigned, ElementType::Primaries)); elems.insert(0x55BC, (Type::Unsigned, ElementType::MaxCll)); elems.insert(0x55BD, (Type::Unsigned, ElementType::MaxFall)); elems.insert(0x55D0, (Type::Master, ElementType::MasteringMetadata)); elems.insert(0x55D1, (Type::Float, ElementType::PrimaryRChromaticityX)); elems.insert(0x55D2, (Type::Float, ElementType::PrimaryRChromaticityY)); elems.insert(0x55D3, (Type::Float, ElementType::PrimaryGChromaticityX)); elems.insert(0x55D4, (Type::Float, ElementType::PrimaryGChromaticityY)); elems.insert(0x55D5, (Type::Float, ElementType::PrimaryBChromaticityX)); elems.insert(0x55D6, (Type::Float, ElementType::PrimaryBChromaticityY)); elems.insert(0x55D7, (Type::Float, ElementType::WhitePointChromaticityX)); elems.insert(0x55D8, (Type::Float, ElementType::WhitePointChromaticityY)); elems.insert(0x55D9, (Type::Float, ElementType::LuminanceMax)); elems.insert(0x55DA, (Type::Float, ElementType::LuminanceMin)); elems.insert(0x1C53BB6B, (Type::Master, ElementType::Cues)); elems.insert(0xBB, (Type::Master, ElementType::CuePoint)); elems.insert(0xB3, (Type::Unsigned, ElementType::CueTime)); elems.insert(0xB7, (Type::Master, ElementType::CueTrackPositions)); elems.insert(0xF7, (Type::Unsigned, ElementType::CueTrack)); elems.insert(0xF1, (Type::Unsigned, ElementType::CueClusterPosition)); elems.insert(0xF0, (Type::Unsigned, ElementType::CueRelativePosition)); elems.insert(0xB2, (Type::Unsigned, ElementType::CueDuration)); elems.insert(0x5378, (Type::Unsigned, ElementType::CueBlockNumber)); elems.insert(0x1043A770, (Type::Master, ElementType::Chapters)); elems.insert(0x45B9, (Type::Master, ElementType::EditionEntry)); elems.insert(0xB6, (Type::Master, ElementType::ChapterAtom)); elems.insert(0x73C4, (Type::Unsigned, ElementType::ChapterUid)); elems.insert(0x5654, (Type::String, ElementType::ChapterStringUid)); elems.insert(0x91, (Type::Unsigned, ElementType::ChapterTimeStart)); elems.insert(0x92, (Type::Unsigned, ElementType::ChapterTimeEnd)); elems.insert(0x80, (Type::Master, ElementType::ChapterDisplay)); elems.insert(0x85, (Type::String, ElementType::ChapString)); elems.insert(0x437C, (Type::String, ElementType::ChapLanguage)); elems.insert(0x437D, (Type::String, ElementType::ChapLanguageIetf)); elems.insert(0x437E, (Type::String, ElementType::ChapCountry)); elems.insert(0x1254C367, (Type::Master, ElementType::Tags)); elems.insert(0x7373, (Type::Master, ElementType::Tag)); elems.insert(0x63C0, (Type::Master, ElementType::Targets)); elems.insert(0x68CA, (Type::Unsigned, ElementType::TargetTypeValue)); elems.insert(0x63CA, (Type::String, ElementType::TargetType)); elems.insert(0x63C5, (Type::Unsigned, ElementType::TagTrackUid)); elems.insert(0x67C8, (Type::Master, ElementType::SimpleTag)); elems.insert(0x45A3, (Type::String, ElementType::TagName)); elems.insert(0x447A, (Type::String, ElementType::TagLanguage)); elems.insert(0x4484, (Type::Unsigned, ElementType::TagDefault)); elems.insert(0x4487, (Type::String, ElementType::TagString)); elems.insert(0x4485, (Type::Binary, ElementType::TagBinary)); elems }; } symphonia-format-mkv-0.5.2/src/lacing.rs000064400000000000000000000117551046102023000163230ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::{HashMap, VecDeque}; use symphonia_core::errors::{decode_error, Result}; use symphonia_core::io::{BufReader, ReadBytes}; use crate::demuxer::TrackState; use crate::ebml::{read_signed_vint, read_unsigned_vint}; enum Lacing { None, Xiph, FixedSize, Ebml, } fn parse_flags(flags: u8) -> Result { match (flags >> 1) & 0b11 { 0b00 => Ok(Lacing::None), 0b01 => Ok(Lacing::Xiph), 0b10 => Ok(Lacing::FixedSize), 0b11 => Ok(Lacing::Ebml), _ => unreachable!(), } } fn read_ebml_sizes(mut reader: R, frames: usize) -> Result> { let mut sizes = Vec::new(); for _ in 0..frames { if let Some(last_size) = sizes.last().copied() { let delta = read_signed_vint(&mut reader)?; sizes.push((last_size as i64 + delta) as u64) } else { let size = read_unsigned_vint(&mut reader)?; sizes.push(size); } } Ok(sizes) } pub(crate) fn read_xiph_sizes(mut reader: R, frames: usize) -> Result> { let mut prefixes = 0; let mut sizes = Vec::new(); while sizes.len() < frames { let byte = reader.read_byte()? as u64; if byte == 255 { prefixes += 1; } else { let size = prefixes * 255 + byte; prefixes = 0; sizes.push(size); } } Ok(sizes) } pub(crate) struct Frame { pub(crate) track: u32, /// Absolute frame timestamp. pub(crate) timestamp: u64, pub(crate) duration: u64, pub(crate) data: Box<[u8]>, } pub(crate) fn calc_abs_block_timestamp(cluster_ts: u64, rel_block_ts: i16) -> u64 { if rel_block_ts < 0 { cluster_ts - (-rel_block_ts) as u64 } else { cluster_ts + rel_block_ts as u64 } } pub(crate) fn extract_frames( block: &[u8], block_duration: Option, tracks: &HashMap, cluster_timestamp: u64, timestamp_scale: u64, buffer: &mut VecDeque, ) -> Result<()> { let mut reader = BufReader::new(block); let track = read_unsigned_vint(&mut reader)? as u32; let rel_ts = reader.read_be_u16()? as i16; let flags = reader.read_byte()?; let lacing = parse_flags(flags)?; let default_frame_duration = tracks.get(&track).and_then(|it| it.default_frame_duration).map(|it| it / timestamp_scale); let mut timestamp = calc_abs_block_timestamp(cluster_timestamp, rel_ts); match lacing { Lacing::None => { let data = reader.read_boxed_slice_exact(block.len() - reader.pos() as usize)?; let duration = block_duration.or(default_frame_duration).unwrap_or(0); buffer.push_back(Frame { track, timestamp, data, duration }); } Lacing::Xiph | Lacing::Ebml => { // Read number of stored sizes which is actually `number of frames` - 1 // since size of the last frame is deduced from block size. let frames = reader.read_byte()? as usize; let sizes = match lacing { Lacing::Xiph => read_xiph_sizes(&mut reader, frames)?, Lacing::Ebml => read_ebml_sizes(&mut reader, frames)?, _ => unreachable!(), }; let frame_duration = block_duration .map(|it| it / (frames + 1) as u64) .or(default_frame_duration) .unwrap_or(0); for frame_size in sizes { let data = reader.read_boxed_slice_exact(frame_size as usize)?; buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); timestamp += frame_duration; } // Size of last frame is not provided so we read to the end of the block. let size = block.len() - reader.pos() as usize; let data = reader.read_boxed_slice_exact(size)?; buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); } Lacing::FixedSize => { let frames = reader.read_byte()? as usize + 1; let total_size = block.len() - reader.pos() as usize; if total_size % frames != 0 { return decode_error("mkv: invalid block size"); } let frame_duration = block_duration.map(|it| it / frames as u64).or(default_frame_duration).unwrap_or(0); let frame_size = total_size / frames; for _ in 0..frames { let data = reader.read_boxed_slice_exact(frame_size)?; buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); timestamp += frame_duration; } } } Ok(()) } symphonia-format-mkv-0.5.2/src/lib.rs000064400000000000000000000013001046102023000156150ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. #![warn(rust_2018_idioms)] #![forbid(unsafe_code)] // The following lints are allowed in all Symphonia crates. Please see clippy.toml for their // justification. #![allow(clippy::comparison_chain)] #![allow(clippy::excessive_precision)] #![allow(clippy::identity_op)] #![allow(clippy::manual_range_contains)] mod codecs; mod demuxer; mod ebml; mod element_ids; mod lacing; mod segment; pub use crate::demuxer::MkvReader; symphonia-format-mkv-0.5.2/src/segment.rs000064400000000000000000000471431046102023000165300ustar 00000000000000// Symphonia // Copyright (c) 2019-2022 The Project Symphonia Developers. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use symphonia_core::errors::{Error, Result}; use symphonia_core::io::{BufReader, ReadBytes}; use symphonia_core::meta::{MetadataBuilder, MetadataRevision, Tag, Value}; use crate::ebml::{read_unsigned_vint, Element, ElementData, ElementHeader}; use crate::element_ids::ElementType; use crate::lacing::calc_abs_block_timestamp; #[allow(dead_code)] #[derive(Debug)] pub(crate) struct TrackElement { pub(crate) number: u64, pub(crate) uid: u64, pub(crate) language: Option, pub(crate) codec_id: String, pub(crate) codec_private: Option>, pub(crate) audio: Option, pub(crate) default_duration: Option, } impl Element for TrackElement { const ID: ElementType = ElementType::TrackEntry; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut number = None; let mut uid = None; let mut language = None; let mut audio = None; let mut codec_private = None; let mut codec_id = None; let mut default_duration = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::TrackNumber => { number = Some(it.read_u64()?); } ElementType::TrackUid => { uid = Some(it.read_u64()?); } ElementType::Language => { language = Some(it.read_string()?); } ElementType::CodecId => { codec_id = Some(it.read_string()?); } ElementType::CodecPrivate => { codec_private = Some(it.read_boxed_slice()?); } ElementType::Audio => { audio = Some(it.read_element_data()?); } ElementType::DefaultDuration => { default_duration = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { number: number.ok_or(Error::DecodeError("mkv: missing track number"))?, uid: uid.ok_or(Error::DecodeError("mkv: missing track UID"))?, language, codec_id: codec_id.ok_or(Error::DecodeError("mkv: missing codec id"))?, codec_private, audio, default_duration, }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct AudioElement { pub(crate) sampling_frequency: f64, pub(crate) output_sampling_frequency: Option, pub(crate) channels: u64, pub(crate) bit_depth: Option, } impl Element for AudioElement { const ID: ElementType = ElementType::Audio; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut sampling_frequency = None; let mut output_sampling_frequency = None; let mut channels = None; let mut bit_depth = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::SamplingFrequency => { sampling_frequency = Some(it.read_f64()?); } ElementType::OutputSamplingFrequency => { output_sampling_frequency = Some(it.read_f64()?); } ElementType::Channels => { channels = Some(it.read_u64()?); } ElementType::BitDepth => { bit_depth = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { sampling_frequency: sampling_frequency.unwrap_or(8000.0), output_sampling_frequency, channels: channels.unwrap_or(1), bit_depth, }) } } #[derive(Debug)] pub(crate) struct SeekHeadElement { pub(crate) seeks: Box<[SeekElement]>, } impl Element for SeekHeadElement { const ID: ElementType = ElementType::SeekHead; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut seeks = Vec::new(); let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::Seek => { seeks.push(it.read_element_data()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { seeks: seeks.into_boxed_slice() }) } } #[derive(Debug)] pub(crate) struct SeekElement { pub(crate) id: u64, pub(crate) position: u64, } impl Element for SeekElement { const ID: ElementType = ElementType::Seek; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut seek_id = None; let mut seek_position = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::SeekId => { seek_id = Some(it.read_u64()?); } ElementType::SeekPosition => { seek_position = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { id: seek_id.ok_or(Error::DecodeError("mkv: missing seek track id"))?, position: seek_position.ok_or(Error::DecodeError("mkv: missing seek track pos"))?, }) } } #[derive(Debug)] pub(crate) struct TracksElement { pub(crate) tracks: Box<[TrackElement]>, } impl Element for TracksElement { const ID: ElementType = ElementType::Tracks; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); Ok(Self { tracks: it.read_elements()? }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct EbmlHeaderElement { pub(crate) version: u64, pub(crate) read_version: u64, pub(crate) max_id_length: u64, pub(crate) max_size_length: u64, pub(crate) doc_type: String, pub(crate) doc_type_version: u64, pub(crate) doc_type_read_version: u64, } impl Element for EbmlHeaderElement { const ID: ElementType = ElementType::Ebml; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut version = None; let mut read_version = None; let mut max_id_length = None; let mut max_size_length = None; let mut doc_type = None; let mut doc_type_version = None; let mut doc_type_read_version = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::EbmlVersion => { version = Some(it.read_u64()?); } ElementType::EbmlReadVersion => { read_version = Some(it.read_u64()?); } ElementType::EbmlMaxIdLength => { max_id_length = Some(it.read_u64()?); } ElementType::EbmlMaxSizeLength => { max_size_length = Some(it.read_u64()?); } ElementType::DocType => { doc_type = Some(it.read_string()?); } ElementType::DocTypeVersion => { doc_type_version = Some(it.read_u64()?); } ElementType::DocTypeReadVersion => { doc_type_read_version = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { version: version.unwrap_or(1), read_version: read_version.unwrap_or(1), max_id_length: max_id_length.unwrap_or(4), max_size_length: max_size_length.unwrap_or(8), doc_type: doc_type.ok_or(Error::Unsupported("mkv: invalid ebml file"))?, doc_type_version: doc_type_version.unwrap_or(1), doc_type_read_version: doc_type_read_version.unwrap_or(1), }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct InfoElement { pub(crate) timestamp_scale: u64, pub(crate) duration: Option, title: Option>, muxing_app: Box, writing_app: Box, } impl Element for InfoElement { const ID: ElementType = ElementType::Info; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut duration = None; let mut timestamp_scale = None; let mut title = None; let mut muxing_app = None; let mut writing_app = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::TimestampScale => { timestamp_scale = Some(it.read_u64()?); } ElementType::Duration => { duration = Some(it.read_f64()?); } ElementType::Title => { title = Some(it.read_string()?); } ElementType::MuxingApp => { muxing_app = Some(it.read_string()?); } ElementType::WritingApp => { writing_app = Some(it.read_string()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { timestamp_scale: timestamp_scale.unwrap_or(1_000_000), duration, title: title.map(|it| it.into_boxed_str()), muxing_app: muxing_app.unwrap_or_default().into_boxed_str(), writing_app: writing_app.unwrap_or_default().into_boxed_str(), }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct CuesElement { pub(crate) points: Box<[CuePointElement]>, } impl Element for CuesElement { const ID: ElementType = ElementType::Cues; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); Ok(Self { points: it.read_elements()? }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct CuePointElement { pub(crate) time: u64, pub(crate) positions: CueTrackPositionsElement, } impl Element for CuePointElement { const ID: ElementType = ElementType::CuePoint; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); let mut time = None; let mut pos = None; while let Some(header) = it.read_header()? { match header.etype { ElementType::CueTime => time = Some(it.read_u64()?), ElementType::CueTrackPositions => { pos = Some(it.read_element_data()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { time: time.ok_or(Error::DecodeError("mkv: missing time in cue"))?, positions: pos.ok_or(Error::DecodeError("mkv: missing positions in cue"))?, }) } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct CueTrackPositionsElement { pub(crate) track: u64, pub(crate) cluster_position: u64, } impl Element for CueTrackPositionsElement { const ID: ElementType = ElementType::CueTrackPositions; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); let mut track = None; let mut pos = None; while let Some(header) = it.read_header()? { match header.etype { ElementType::CueTrack => { track = Some(it.read_u64()?); } ElementType::CueClusterPosition => { pos = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { track: track.ok_or(Error::DecodeError("mkv: missing track in cue track positions"))?, cluster_position: pos .ok_or(Error::DecodeError("mkv: missing position in cue track positions"))?, }) } } #[derive(Debug)] pub(crate) struct BlockGroupElement { pub(crate) data: Box<[u8]>, pub(crate) duration: Option, } impl Element for BlockGroupElement { const ID: ElementType = ElementType::BlockGroup; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut it = header.children(reader); let mut data = None; let mut block_duration = None; while let Some(header) = it.read_header()? { match header.etype { ElementType::DiscardPadding => { let _nanos = it.read_data()?; } ElementType::Block => { data = Some(it.read_boxed_slice()?); } ElementType::BlockDuration => { block_duration = Some(it.read_u64()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { data: data.ok_or(Error::DecodeError("mkv: missing block inside block group"))?, duration: block_duration, }) } } #[derive(Debug)] pub(crate) struct BlockElement { pub(crate) track: u64, pub(crate) timestamp: u64, pub(crate) pos: u64, } #[derive(Debug)] pub(crate) struct ClusterElement { pub(crate) timestamp: u64, pub(crate) pos: u64, pub(crate) end: Option, pub(crate) blocks: Box<[BlockElement]>, } impl Element for ClusterElement { const ID: ElementType = ElementType::Cluster; fn read(reader: &mut B, header: ElementHeader) -> Result { let pos = reader.pos(); let mut timestamp = None; let mut blocks = Vec::new(); let has_size = header.end().is_some(); fn read_block(data: &[u8], timestamp: u64, offset: u64) -> Result { let mut reader = BufReader::new(data); let track = read_unsigned_vint(&mut reader)?; let rel_ts = reader.read_be_u16()? as i16; let timestamp = calc_abs_block_timestamp(timestamp, rel_ts); Ok(BlockElement { track, timestamp, pos: offset }) } fn get_timestamp(timestamp: Option) -> Result { timestamp.ok_or(Error::DecodeError("mkv: missing timestamp for a cluster")) } let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::Timestamp => { timestamp = Some(it.read_u64()?); } ElementType::BlockGroup => { let group = it.read_element_data::()?; blocks.push(read_block(&group.data, get_timestamp(timestamp)?, header.pos)?); } ElementType::SimpleBlock => { let data = it.read_boxed_slice()?; blocks.push(read_block(&data, get_timestamp(timestamp)?, header.pos)?); } _ if header.etype.is_top_level() && !has_size => break, other => { log::debug!("ignored element {:?}", other); } } } Ok(ClusterElement { timestamp: get_timestamp(timestamp)?, blocks: blocks.into_boxed_slice(), pos, end: header.end(), }) } } #[derive(Debug)] pub(crate) struct TagsElement { pub(crate) tags: Box<[TagElement]>, } impl Element for TagsElement { const ID: ElementType = ElementType::Tags; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut tags = Vec::new(); let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::Tag => { tags.push(it.read_element_data::()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { tags: tags.into_boxed_slice() }) } } impl TagsElement { pub(crate) fn to_metadata(&self) -> MetadataRevision { let mut metadata = MetadataBuilder::new(); for tag in self.tags.iter() { for simple_tag in tag.simple_tags.iter() { // TODO: support std_key metadata.add_tag(Tag::new( None, &simple_tag.name, match &simple_tag.value { ElementData::Binary(b) => Value::Binary(b.clone()), ElementData::String(s) => Value::String(s.clone()), _ => unreachable!(), }, )); } } metadata.metadata() } } #[derive(Debug)] pub(crate) struct TagElement { pub(crate) simple_tags: Box<[SimpleTagElement]>, } impl Element for TagElement { const ID: ElementType = ElementType::Tag; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut simple_tags = Vec::new(); let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::SimpleTag => { simple_tags.push(it.read_element_data::()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { simple_tags: simple_tags.into_boxed_slice() }) } } #[derive(Debug)] pub(crate) struct SimpleTagElement { pub(crate) name: Box, pub(crate) value: ElementData, } impl Element for SimpleTagElement { const ID: ElementType = ElementType::SimpleTag; fn read(reader: &mut B, header: ElementHeader) -> Result { let mut name = None; let mut value = None; let mut it = header.children(reader); while let Some(header) = it.read_header()? { match header.etype { ElementType::TagName => { name = Some(it.read_string()?); } ElementType::TagString | ElementType::TagBinary => { value = Some(it.read_data()?); } other => { log::debug!("ignored element {:?}", other); } } } Ok(Self { name: name.ok_or(Error::DecodeError("mkv: missing tag name"))?.into_boxed_str(), value: value.ok_or(Error::DecodeError("mkv: missing tag value"))?, }) } }