Skip to content

Commit

Permalink
Merge pull request #6 from deepsourcelabs/fix-unicode-handling
Browse files Browse the repository at this point in the history
fix: bug with possible unicode bound fail
  • Loading branch information
tushar-deepsource authored Feb 14, 2023
2 parents a3dd26b + c929e51 commit 8a0fe70
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 34 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ members = [
[package]
name = "dracula"
description = "🧛 Count-ing lines, AH AH AHH!"
version = "0.1.0"
version = "0.1.2"
authors = ["Swarnim Arun <[email protected]>"]
edition = "2021"
license-file = "LICENSE"
Expand Down
2 changes: 1 addition & 1 deletion cdracula/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "cdracula"
description = "🧛 Count-ing lines, AH AH AHH!"
version = "0.1.0"
version = "0.1.2"
authors = ["Swarnim Arun <[email protected]>"]
edition = "2021"

Expand Down
5 changes: 4 additions & 1 deletion cdracula/tests/test_capi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ mod python {
let src = CString::from_vec_unchecked(
(String::from(
r#"
# entp için anayzer
if index == 10:
pass
# skip this
def python():
"""
Expand All @@ -47,7 +50,7 @@ def python():
) + "\0")
.into(),
);
assert_eq!(get_meaningful_line_count(src.as_ptr(), PYTHON_LANG, 0), 2);
assert_eq!(get_meaningful_line_count(src.as_ptr(), PYTHON_LANG, 0), 4);
}
}

Expand Down
2 changes: 1 addition & 1 deletion pydracula/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pydracula"
version = "0.1.0"
version = "0.1.2"
authors = ["Swarnim Arun <[email protected]>"]
edition = "2018"

Expand Down
49 changes: 27 additions & 22 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ pub struct Parser<'a, L: Language> {
src: &'a str,
index: usize,
language_items: &'static [ParseItem],
_marker: PhantomData<L>
_marker: PhantomData<L>,
}

// most this is only used in tests atm!
Expand All @@ -290,7 +290,7 @@ impl<L: Language> Parser<'_, L> {
src,
language_items: L::PARSE_ITEMS,
index: 0,
_marker: PhantomData::default()
_marker: PhantomData::default(),
}
}

Expand All @@ -304,35 +304,40 @@ impl<L: Language> Parser<'_, L> {
.find_map(|i| Some((i, items[i].begin().matches(src)?)))
.and_then(|(i, matches)| {
(matches[2].end..src.len()).find_map(|b| {
Some((
i,
b,
if items[i].is_key_matched() {
items[i].end().matches_with_key(
&src[b..],
&src[matches[1].start..matches[1].end],
)?
} else {
items[i].end().matches(&src[b..])?
},
))
if src.is_char_boundary(b) {
Some((
i,
b,
if items[i].is_key_matched() {
items[i].end().matches_with_key(
&src[b..],
&src[matches[1].start..matches[1].end],
)?
} else {
items[i].end().matches(&src[b..])?
},
))
} else {
None
}
})
})
{
Ok(items[i].to_parse_output(&src[0..b + end_matches[2].end]))
Ok(items[i].to_parse_output(&src[..b + end_matches[2].end]))
} else if let Some(end) = (1..=src.len()).find(|&idx| {
idx == src.len()
|| src[idx..].starts_with('\n')
|| items
.iter()
.find_map(|i| i.begin().matches(&src[idx..]))
.is_some()
src.is_char_boundary(idx)
&& (idx == src.len()
|| src[idx..].starts_with('\n')
|| items
.iter()
.find_map(|i| i.begin().matches(&src[idx..]))
.is_some())
}) {
// if it's not a range then it's a source line
if end == 0 {
Err("Failed to parse, for some random reason, pls lookie here")?;
}
Ok(ParseOutput::Source(&src[0..end]))
Ok(ParseOutput::Source(&src[..end]))
} else {
Err("Failed to parse the rest.")?
}
Expand Down
15 changes: 10 additions & 5 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ mod simple_python {
#[test]
fn try_parse() {
let parsed = Parser::<Python>::new(
r#"# some top level comments
r#"# entp için anayzer
if index == 10:
pass
# some top level comments
def main():
print("s");"""
Multi-line Comments
Expand All @@ -53,6 +56,7 @@ mod simple_python {
let mut line_count: usize = 0;
let mut stack = vec![];
for p in parsed {
eprintln!("{:?}", p);
if matches!(p, ParseOutput::EOL(_) | ParseOutput::EOF) {
if stack.iter().any(|i| match i {
ParseOutput::Source(s) => Python::is_meaningful_src(s),
Expand All @@ -65,7 +69,7 @@ mod simple_python {
stack.push(p);
}
}
assert_eq!(line_count, 3)
assert_eq!(line_count, 5)
}
}

Expand All @@ -86,10 +90,11 @@ mod simple_rust {
// the platform to be present.
fn main() {
let c = 2;
/* Multi-Line Comments
/* Multi-Line Comments यह काम करना चाहिए
seems to work as well */
let src = "hello, World!";
let src2 = r#"hello, World!"#;
let यह = "hello, World!";
let src = "Gello, World!";
let src2 = r#"यह, काम!"#;
return 0;
}
"##,
Expand Down

0 comments on commit 8a0fe70

Please sign in to comment.