From a3d0af607de1d0b542fb622383d4029bd824c1c0 Mon Sep 17 00:00:00 2001 From: Will Hedgecock Date: Wed, 2 Oct 2024 12:28:38 -0500 Subject: [PATCH] Finish ZIP reader implementation --- Makefile | 2 +- musicxml/src/parser/mod.rs | 14 ++-- musicxml/src/parser/zip_parser.rs | 116 +++++++++++++----------------- musicxml_internal/src/lib.rs | 4 ++ musicxml_macros/src/lib.rs | 15 ++++ 5 files changed, 74 insertions(+), 77 deletions(-) diff --git a/Makefile b/Makefile index b7b8a4d..6c0aad1 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ checknostd: cd ensure_no_std && cargo rustc -- -C link-arg=-nostartfiles check: - cargo clippy -- -W clippy::all -W clippy::correctness -W clippy::suspicious -W clippy::complexity -W clippy::perf -W clippy::style -W clippy::pedantic -A clippy::missing_errors_doc -A clippy::missing_panics_doc -A clippy::doc_markdown -A clippy::wildcard_imports -A clippy::module_name_repetitions -D warnings + cargo clippy -- -W clippy::all -W clippy::correctness -W clippy::suspicious -W clippy::complexity -W clippy::perf -W clippy::style -W clippy::pedantic -W clippy::panic -A clippy::doc_markdown -A clippy::wildcard_imports -A clippy::module_name_repetitions -D warnings testunit: cargo test --features debug -- --nocapture diff --git a/musicxml/src/parser/mod.rs b/musicxml/src/parser/mod.rs index bda26df..29d6672 100644 --- a/musicxml/src/parser/mod.rs +++ b/musicxml/src/parser/mod.rs @@ -35,10 +35,9 @@ fn get_musicxml_contents_from_file(path: &str) -> Result { .read_to_end(&mut mxl_data.content) .unwrap_or(0); let archive = zip_parser::ZipArchive::new(&mut mxl_data); - for item in archive.iter() { - if item.file_name == "META-INF/container.xml" { - let container = - xml_parser::parse_from_string(core::str::from_utf8(item.data.as_slice()).map_err(|e| e.to_string())?)?; + for file_name in archive.iter() { + if file_name == "META-INF/container.xml" { + let container = xml_parser::parse_from_string(archive.read_file_to_string(file_name)?.as_str())?; xml_path = container .elements .iter() @@ -49,12 +48,7 @@ fn get_musicxml_contents_from_file(path: &str) -> Result { } } if let Some(full_path) = &xml_path { - let file = archive - .get_file(full_path.as_str()) - .ok_or("MXL file missing expected contents")?; - core::str::from_utf8(file.data.as_slice()) - .map_err(|e| e.to_string())? - .clone_into(&mut contents); + contents = archive.read_file_to_string(full_path)?; } else { Err(String::from("Cannot find MusicXML file in compressed archive"))?; } diff --git a/musicxml/src/parser/zip_parser.rs b/musicxml/src/parser/zip_parser.rs index 080b079..d0fabc6 100644 --- a/musicxml/src/parser/zip_parser.rs +++ b/musicxml/src/parser/zip_parser.rs @@ -4,13 +4,13 @@ use alloc::{collections::BTreeMap, vec::Vec}; use core2::io::Read; use libflate::deflate::Decoder; -const DEFLATE_METHOD: u16 = 8; +const DEFLATE_METHOD_CODE: u16 = 8; const LOCAL_FILE_HEADER_LEN: usize = core::mem::size_of::(); const CENTRAL_FILE_HEADER_LEN: usize = core::mem::size_of::(); const CENTRAL_DIR_END_LEN: usize = core::mem::size_of::(); #[repr(packed)] -pub(crate) struct LocalFileHeader { +struct LocalFileHeader { signature: u32, version_needed_to_extract: u16, general_purpose_bit_flag: u16, @@ -86,23 +86,20 @@ impl CentralDirEnd { } } -pub(crate) struct LocalFile { +struct LocalFile { pub file_name: String, - pub compressed_size: u64, - pub uncompressed_size: u64, - pub data: Vec, + pub relative_offset: usize, + pub compressed_size: usize, + pub uncompressed_size: usize, } impl LocalFile { - pub fn new(file_name: String, data: &[u8], details: &LocalFileHeader) -> Self { - let mut decoded_data = Vec::new(); - let mut decoder = Decoder::new(&data[details.len()..(details.len() + details.compressed_size as usize)]); - let _ = decoder.read_to_end(&mut decoded_data); + pub fn new(file_name: String, file_offset: usize, details: &LocalFileHeader) -> Self { Self { file_name, - compressed_size: u64::from(details.compressed_size), - uncompressed_size: u64::from(details.uncompressed_size), - data: decoded_data, + relative_offset: file_offset, + compressed_size: details.compressed_size as usize, + uncompressed_size: details.uncompressed_size as usize, } } } @@ -164,9 +161,7 @@ impl<'a> Iterator for ZipParser<'a> { for i in 0..(self.buffer_len + bytes_read - 4) { if self.buffer[i..i + 4] == [0x50, 0x4b, 0x01, 0x02] { let len = self.buffer_len + bytes_read - i; - unsafe { - core::ptr::copy(self.buffer.as_ptr().add(i), self.buffer.as_mut_ptr(), len); - } + self.buffer.copy_within(i..(len + i), 0); self.buffer_len = len; entry_found = true; break; @@ -178,22 +173,14 @@ impl<'a> Iterator for ZipParser<'a> { bytes_read => { self.buffer_len += bytes_read; if self.buffer[28] < 8 || self.buffer[29] > 0 { - unsafe { - core::ptr::copy( - self.buffer.as_ptr().add(4), - self.buffer.as_mut_ptr(), - self.buffer_len - 4, - ); - } + self.buffer.copy_within(4..self.buffer_len, 0); self.buffer_len -= 4; entry_found = false; } } } } else if bytes_read > 2 { - unsafe { - core::ptr::copy(self.buffer.as_ptr().add(bytes_read - 3), self.buffer.as_mut_ptr(), 3); - } + self.buffer.copy_within((bytes_read - 3).., 0); self.buffer_len = 3; } else { return None; @@ -203,66 +190,62 @@ impl<'a> Iterator for ZipParser<'a> { } // Read the file name, header, and compression method - let (file_name, file_header, compression_method, entry_length) = { + let (file_name, file_header, file_offset, compression_method, entry_length) = { let file_info = CentralFileHeader::from_bytes(&self.buffer); - let mut file_name_buffer = vec![0; file_info.file_name_length as usize]; - unsafe { - core::ptr::copy( - self.buffer.as_ptr().add(CENTRAL_FILE_HEADER_LEN), - file_name_buffer.as_mut_ptr(), - file_name_buffer.len(), - ); - ( - String::from_utf8_unchecked(file_name_buffer), - &self.contents.content[file_info.relative_offset_of_local_header as usize..], - file_info.compression_method, - file_info.len(), - ) - } + ( + self.buffer[CENTRAL_FILE_HEADER_LEN..(CENTRAL_FILE_HEADER_LEN + file_info.file_name_length as usize)] + .iter() + .map(|&x| x as char) + .collect::(), + &self.contents.content[file_info.relative_offset_of_local_header as usize..], + file_info.relative_offset_of_local_header as usize, + file_info.compression_method, + file_info.len(), + ) }; // Move remainder of the contents to beginning of the buffer for next iteration - unsafe { - core::ptr::copy( - self.buffer.as_ptr().add(entry_length), - self.buffer.as_mut_ptr(), - self.buffer_len - entry_length, - ); - } + self.buffer.copy_within(entry_length..self.buffer_len, 0); self.buffer_len -= entry_length; - // Decompress the file if its method is DEFLATE - if compression_method == DEFLATE_METHOD { - Some(LocalFile::new( - file_name, - file_header, - LocalFileHeader::from_bytes(file_header), - )) + // Decompress the file if its compression method is DEFLATE + if compression_method == DEFLATE_METHOD_CODE { + let header = LocalFileHeader::from_bytes(file_header); + Some(LocalFile::new(file_name, file_offset + header.len(), header)) } else { self.next() } } } -pub(crate) struct ZipArchive { +pub(crate) struct ZipArchive<'a> { + zip_data: &'a mut ZipData, file_map: BTreeMap, } -impl ZipArchive { - pub fn new(zip_data: &mut ZipData) -> Self { +impl<'a> ZipArchive<'a> { + pub fn new(zip_data: &'a mut ZipData) -> Self { let mut file_map = BTreeMap::new(); for file in ZipParser::new(zip_data) { file_map.insert(file.file_name.clone(), file); } - Self { file_map } + Self { zip_data, file_map } } - pub fn get_file(&self, file_name: &str) -> Option<&LocalFile> { - self.file_map.get(file_name) + pub fn read_file_to_string(&self, file_name: &str) -> Result { + let mut decoded_data = Vec::new(); + let file = self + .file_map + .get(file_name) + .ok_or(format!("File \"{file_name}\" not found within compressed archive"))?; + let mut decoder = + Decoder::new(&self.zip_data.content[file.relative_offset..(file.relative_offset + file.compressed_size)]); + decoder.read_to_end(&mut decoded_data).unwrap_or(0); + String::from_utf8(decoded_data).map_err(|e| e.to_string()) } - pub fn iter(&self) -> impl Iterator { - self.file_map.values() + pub fn iter(&self) -> impl Iterator { + self.file_map.keys() } } @@ -278,9 +261,10 @@ mod tests { .unwrap() .read_to_end(&mut zip_data.content) .unwrap_or(0); - for item in ZipArchive::new(&mut zip_data).iter() { - println!("File: {}, Size: {}", item.file_name, item.uncompressed_size); - println!("Content: {}", core::str::from_utf8(item.data.as_slice()).unwrap()); + let zip_archive = ZipArchive::new(&mut zip_data); + for item in zip_archive.iter() { + println!("File: {}", item); + println!("Content: {}", zip_archive.read_file_to_string(item).unwrap()); } } } diff --git a/musicxml_internal/src/lib.rs b/musicxml_internal/src/lib.rs index ad68234..6a114b2 100644 --- a/musicxml_internal/src/lib.rs +++ b/musicxml_internal/src/lib.rs @@ -13,18 +13,22 @@ pub struct XmlElement { pub text: String, } +#[allow(clippy::missing_errors_doc)] pub trait DatatypeDeserializer: Sized { fn deserialize(value: &str) -> Result; } +#[allow(clippy::missing_errors_doc)] pub trait AttributeDeserializer: Sized { fn deserialize(attributes: &[(String, String)]) -> Result; } +#[allow(clippy::missing_errors_doc)] pub trait ContentDeserializer: Sized { fn deserialize(elements: &[XmlElement]) -> Result; } +#[allow(clippy::missing_errors_doc)] pub trait ElementDeserializer: Sized { fn deserialize(element: &XmlElement) -> Result; } diff --git a/musicxml_macros/src/lib.rs b/musicxml_macros/src/lib.rs index 0dcfe0a..485dfd7 100644 --- a/musicxml_macros/src/lib.rs +++ b/musicxml_macros/src/lib.rs @@ -104,6 +104,7 @@ fn serialize_datatype_enum(element_type: &syn::Ident, data: &syn::DataEnum) -> T }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] fn deserialize_datatype_tuple_struct(element_type: &syn::Ident, fields: &syn::FieldsUnnamed) -> TokenStream { // Deserialize first unnamed struct field let deserialized_field = match &fields.unnamed.first().unwrap().ty { @@ -132,6 +133,7 @@ fn serialize_datatype_tuple_struct(element_type: &syn::Ident, _fields: &syn::Fie }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(DatatypeDeserialize, attributes(rename))] pub fn deserialize_datatype(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -145,6 +147,7 @@ pub fn deserialize_datatype(tokens: TokenStream) -> TokenStream { } } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(DatatypeSerialize, attributes(rename))] pub fn serialize_datatype(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -162,6 +165,7 @@ pub fn serialize_datatype(tokens: TokenStream) -> TokenStream { // ATTRIBUTE FUNCTIONALITY -------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------- +#[allow(clippy::panic, clippy::missing_panics_doc)] fn deserialize_attribute_named_struct(element_type: &syn::Ident, fields: &syn::FieldsNamed) -> TokenStream { let mut deserialized_fields: Vec = Vec::new(); let element_type_string = element_type.to_string(); @@ -218,6 +222,7 @@ fn deserialize_attribute_named_struct(element_type: &syn::Ident, fields: &syn::F }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] fn serialize_attribute_named_struct(element_type: &syn::Ident, fields: &syn::FieldsNamed) -> TokenStream { let mut serialized_fields: Vec = Vec::new(); @@ -269,6 +274,7 @@ fn serialize_attribute_named_struct(element_type: &syn::Ident, fields: &syn::Fie }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(AttributeDeserialize, attributes(rename))] pub fn deserialize_attribute(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -281,6 +287,7 @@ pub fn deserialize_attribute(tokens: TokenStream) -> TokenStream { } } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(AttributeSerialize, attributes(rename))] pub fn serialize_attribute(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -297,6 +304,7 @@ pub fn serialize_attribute(tokens: TokenStream) -> TokenStream { // CONTENT FUNCTIONALITY ---------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------- +#[allow(clippy::panic, clippy::missing_panics_doc)] fn deserialize_content_named_struct(element_type: &syn::Ident, fields: &syn::FieldsNamed) -> TokenStream { let mut deserialized_fields: Vec = Vec::new(); let element_type_string = element_type.to_string(); @@ -350,6 +358,7 @@ fn deserialize_content_named_struct(element_type: &syn::Ident, fields: &syn::Fie }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] fn serialize_content_named_struct(element_type: &syn::Ident, fields: &syn::FieldsNamed) -> TokenStream { let mut serialized_fields: Vec = Vec::new(); @@ -403,6 +412,7 @@ fn serialize_content_named_struct(element_type: &syn::Ident, fields: &syn::Field }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(ContentDeserialize, attributes(rename))] pub fn deserialize_content(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -415,6 +425,7 @@ pub fn deserialize_content(tokens: TokenStream) -> TokenStream { } } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(ContentSerialize, attributes(rename))] pub fn serialize_content(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -531,6 +542,7 @@ fn serialize_element_enum(element_type: &syn::Ident, data: &syn::DataEnum) -> To }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] fn deserialize_element_named_struct(element_type: &syn::Ident, fields: &syn::FieldsNamed) -> TokenStream { let mut deserialized_fields: Vec = Vec::new(); @@ -576,6 +588,7 @@ fn deserialize_element_named_struct(element_type: &syn::Ident, fields: &syn::Fie }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] fn serialize_element_named_struct( element_type: &syn::Ident, element_type_name: &String, @@ -631,6 +644,7 @@ fn serialize_element_named_struct( }) } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(ElementDeserialize, attributes(rename, flatten))] pub fn deserialize_element(tokens: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(tokens).unwrap(); @@ -644,6 +658,7 @@ pub fn deserialize_element(tokens: TokenStream) -> TokenStream { } } +#[allow(clippy::panic, clippy::missing_panics_doc)] #[proc_macro_derive(ElementSerialize, attributes(rename))] pub fn serialize_element(tokens: TokenStream) -> TokenStream { // Fetch the actual or renamed name of the element to serialize