Skip to content

Commit

Permalink
Use char() iterator to allow Unicode characters
Browse files Browse the repository at this point in the history
  • Loading branch information
hedgecrw committed Jul 11, 2024
1 parent ab95a01 commit 95cd565
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 34 deletions.
8 changes: 6 additions & 2 deletions musicxml/src/elements/part_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@ impl ContentDeserializer for PartListContents {
let mut content = PartListContents { content: Vec::new() };
for element in elements {
if element.name == "part-group" {
content.content.push(PartListElement::PartGroup(PartGroup::deserialize(element)?));
content
.content
.push(PartListElement::PartGroup(PartGroup::deserialize(element)?));
} else if element.name == "score-part" {
content.content.push(PartListElement::ScorePart(ScorePart::deserialize(element)?));
content
.content
.push(PartListElement::ScorePart(ScorePart::deserialize(element)?));
} else {
return Err(format!("Unexpected <part-list> element '{}'", element.name));
}
Expand Down
111 changes: 79 additions & 32 deletions musicxml/src/parser/xml_parser.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use musicxml_internal::XmlElement;

enum TagType {
Opening { tag_length: usize, tag: XmlElement },
Closing { tag_length: usize, tag: XmlElement },
SelfClosing { tag_length: usize, tag: XmlElement },
Ignored { tag_length: usize },
Opening(XmlElement),
Closing(XmlElement),
SelfClosing(XmlElement),
Ignored,
Done,
}

fn read_tag_str(str: &str) -> TagType {
fn read_tag_str(str: &mut std::str::Chars) -> TagType {
let mut tag = XmlElement {
name: String::new(),
attributes: Vec::new(),
Expand All @@ -18,20 +18,20 @@ fn read_tag_str(str: &str) -> TagType {
let (mut is_closing, mut is_self_closing, mut in_attribute, mut in_string, mut in_tag, mut ignore) =
(false, false, true, false, true, false);
let (mut attribute, mut value) = (String::new(), String::new());
for (i, c) in str.chars().enumerate() {
for (i, c) in str.enumerate() {
match c {
'>' => {
if !in_attribute {
tag.attributes.push((attribute.clone(), value.clone()));
}
return if ignore {
TagType::Ignored { tag_length: i + 1 }
TagType::Ignored
} else if is_closing {
TagType::Closing { tag_length: i + 1, tag }
TagType::Closing(tag)
} else if is_self_closing {
TagType::SelfClosing { tag_length: i + 1, tag }
TagType::SelfClosing(tag)
} else {
TagType::Opening { tag_length: i + 1, tag }
TagType::Opening(tag)
};
}
'\r' => (),
Expand Down Expand Up @@ -110,25 +110,19 @@ pub fn parse_to_string(xml: &XmlElement, depth: i16) -> String {
xml_str
}

pub fn parse_from_string(mut str: &str) -> Result<XmlElement, String> {
pub fn parse_from_string(str: &str) -> Result<XmlElement, String> {
let mut it = str.chars();
let mut open_tags: Vec<XmlElement> = Vec::new();
while !str.is_empty() {
if str.starts_with('<') {
match read_tag_str(&str[1..]) {
TagType::Ignored { tag_length } => str = &str[tag_length..],
TagType::Opening { tag_length, tag } => {
str = &str[tag_length..];
open_tags.push(tag)
}
TagType::SelfClosing { tag_length, tag } => {
str = &str[tag_length..];
match open_tags.last_mut() {
Some(last_open_tag) => last_open_tag.elements.push(tag),
None => return Err(format!("Root tag cannot be self-closing")),
}
}
TagType::Closing { tag_length, tag } => {
str = &str[tag_length..];
while let Some(ch) = it.next() {
if ch == '<' {
match read_tag_str(&mut it) {
TagType::Ignored => (),
TagType::Opening(tag) => open_tags.push(tag),
TagType::SelfClosing(tag) => match open_tags.last_mut() {
Some(last_open_tag) => last_open_tag.elements.push(tag),
None => return Err(format!("Root tag cannot be self-closing")),
},
TagType::Closing(tag) => {
let mut element = open_tags.pop().unwrap();
element.text.truncate(element.text.trim().len());
if tag.name != element.name {
Expand All @@ -143,17 +137,15 @@ pub fn parse_from_string(mut str: &str) -> Result<XmlElement, String> {
return Ok(element);
}
}
TagType::Done => str = &str[str.chars().count() - 1..],
TagType::Done => break,
}
} else if !str.starts_with('\r') && !str.starts_with('\n') && !str.starts_with('\t') {
} else if ch != '\r' && ch != '\n' && ch != '\t' {
if let Some(item) = open_tags.last_mut() {
let ch = str.chars().next().unwrap();
if !item.text.is_empty() || ch != ' ' {
item.text.push(ch)
}
}
}
str = &str[1..];
}
Err(format!("Missing one or more matched tags"))
}
Expand Down Expand Up @@ -276,4 +268,59 @@ mod xml_parser_tests {
}
);
}

#[test]
fn serialize_valid_unicode_str() {
let test_xml_str = "<element><test1>Waltz in E♭ Major</test1><test2>Frédéric François Chopin</test2></element>";
let test_xml = XmlElement {
name: String::from("element"),
attributes: vec![],
elements: vec![
XmlElement {
name: String::from("test1"),
attributes: vec![],
elements: vec![],
text: String::from("Waltz in E♭ Major"),
},
XmlElement {
name: String::from("test2"),
attributes: vec![],
elements: vec![],
text: String::from("Frédéric François Chopin"),
},
],
text: String::new(),
};
let result = parse_to_string(&test_xml, -1);
assert_eq!(result.as_str(), test_xml_str);
}

#[test]
fn deserialize_valid_unicode_str() {
let test_xml = "<element><test1>Waltz in E♭ Major</test1><test2>Frédéric François Chopin</test2></element>";
let result = parse_from_string(test_xml);
assert!(result.is_ok());
assert_eq!(
result.unwrap(),
XmlElement {
name: String::from("element"),
attributes: vec![],
elements: vec![
XmlElement {
name: String::from("test1"),
attributes: vec![],
elements: vec![],
text: String::from("Waltz in E♭ Major")
},
XmlElement {
name: String::from("test2"),
attributes: vec![],
elements: vec![],
text: String::from("Frédéric François Chopin")
},
],
text: String::new()
}
);
}
}

0 comments on commit 95cd565

Please sign in to comment.