Skip to content

Commit

Permalink
fix handling non-utf-8 charsets for plain messages
Browse files Browse the repository at this point in the history
  • Loading branch information
r10s committed Jan 2, 2021
1 parent 0ed0adb commit a8fb946
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 14 deletions.
36 changes: 22 additions & 14 deletions src/originalhtml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@ impl Message {
pub struct HtmlMsgParser {
pub html: String,
pub plain: Option<String>,
pub plain_charset: Option<String>,
}

impl HtmlMsgParser {
pub async fn from_bytes(context: &Context, rawmime: &[u8]) -> Result<Self> {
let mut parser = HtmlMsgParser {
html: "".to_string(),
plain: None,
plain_charset: None,
};

let parsedmail = mailparse::parse_mail(rawmime)?;
Expand All @@ -45,7 +43,7 @@ impl HtmlMsgParser {

if parser.html.is_empty() {
if let Some(plain) = parser.plain.clone() {
parser.html = plain_to_html(&plain, parser.plain_charset.clone()).await;
parser.html = plain_to_html(&plain).await;
}
}

Expand Down Expand Up @@ -130,10 +128,6 @@ impl HtmlMsgParser {
} else if mimetype == mime::TEXT_PLAIN {
if let Ok(decoded_data) = mail.get_body() {
self.plain = Some(decoded_data);
if let Some(charset) = mimetype.get_param(mime::CHARSET) {
// TODO: is that working? add a test!
self.plain_charset = Some(charset.to_string());
}
return Ok(true);
}
}
Expand All @@ -142,16 +136,14 @@ impl HtmlMsgParser {
}

// convert plain text to html
async fn plain_to_html(plain: &str, charset: Option<String>) -> String {
let lines = split_lines(&plain);
async fn plain_to_html(plain_utf8: &str) -> String {
let lines = split_lines(&plain_utf8);

let charset = charset.unwrap_or_else(|| "utf-8".to_string());
let mut ret =
"<!DOCTYPE html>\n<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>\n".to_string();

let mut ret = format!(
"<!DOCTYPE html>\n<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset={}\" /></head><body>\n",
charset
);
for line in lines {
// TODO: make links clickable
ret += &*escaper::encode_minimal(line);
ret += "<br/>\n";
}
Expand Down Expand Up @@ -201,6 +193,22 @@ This message does not have Content-Type nor Subject.<br/>
);
}

#[async_std::test]
async fn test_htmlparse_plain_iso88591() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert_eq!(
parser.html,
r##"<!DOCTYPE html>
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
<br/>
</body></html>
"##
);
}

#[async_std::test]
async fn test_htmlparse_alt_plain() {
let t = TestContext::new().await;
Expand Down
7 changes: 7 additions & 0 deletions test-data/message/text_plain_iso88591.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Message-Id: <[email protected]>
Date: Sat, 14 Sep 2019 19:00:13 +0200
From: lmn <[email protected]>
To: abc <[email protected]>
Content-Type: text/plain; charset=iso-8859-1

message with a non-UTF-8 encoding: äöüßÄÖÜ

0 comments on commit a8fb946

Please sign in to comment.