Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split tables #218

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 19 additions & 25 deletions gen/src/writer/ucd/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
// except according to those terms.

use std::char;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
use std::collections::{BTreeMap, /*BTreeSet*/};
// use std::fmt::Write;
use std::path::Path;

use source::ucd::jamo::JAMO_DATA;
Expand All @@ -29,45 +29,39 @@ pub fn generate(dir: &Path) {

#[derive(Clone, Debug)]
struct NameRecord<'a> {
pieces: Vec<&'a str>,
name: &'a str,
}

fn emit_name_tables(dir: &Path) {
let mut values: BTreeSet<&str> = BTreeSet::default();
// let mut values: BTreeSet<&str> = BTreeSet::default();
let map: BTreeMap<char, NameRecord> = UNICODE_DATA
.entries
.iter()
.filter(|x| !x.name.starts_with('<'))
.map(|x| {
let pieces = x.name.split_whitespace().collect::<Vec<_>>();
values.extend(pieces.iter());
(x.character, NameRecord { pieces })
}).collect();
(x.character, NameRecord { name: &x.name })
})
.collect();

let mut values_contents = String::new();
for piece in values.iter() {
writeln!(
values_contents,
"const {}: &str = \"{}\";",
piece.replace('-', "_"),
piece
).unwrap();
}
write(dir, "name_values.rsd", &values_contents);
// let mut values_contents = String::new();
// for piece in values.iter() {
// writeln!(
// values_contents,
// "const {}: &str = \"{}\";",
// piece.replace('-', "_"),
// piece
// ).unwrap();
// }
// write(dir, "name_values.rsd", &values_contents);

write(
dir,
"name_map.rsv",
&map.to_direct_char_table(|record, f| {
write!(
f,
"&[{}]",
record
.pieces
.iter()
.map(|s| s.replace('-', "_"))
.collect::<Vec<_>>()
.join(", ")
"\"{}\"",
record.name
)
}),
);
Expand Down
16 changes: 8 additions & 8 deletions gen/src/writer/ucd/normal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,16 @@ fn emit_canonical_composition_mapping(dir: &Path) {
dir,
"canonical_composition_mapping.rsv",
&map.to_direct_char_table(|val, f| {
write!(f, "CharDataTable::Direct(&[")?;
// TODO(CAD97): get this using the traits so it doesn't break on refactor
write!(f, "CharMap{{chars:&[")?;
for pair in val.iter() {
write!(
f,
"('{}','{}'),",
pair.0.escape_unicode(),
pair.1.escape_unicode(),
)?;
write!(f, "'{}',", pair.0.escape_unicode(), )?;
}
write!(f, "])")
write!(f, "],values:&[")?;
for pair in val.iter() {
write!(f, "'{}',", pair.1.escape_unicode(), )?;
}
write!(f, "]}}")
}),
);
}
Expand Down
49 changes: 31 additions & 18 deletions gen/src/writer/utils/tables/direct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,22 @@ impl<T> ToDirectCharTable<T> for BTreeMap<char, T> {
where
F: Fn(&T, &mut fmt::Formatter) -> fmt::Result,
{
let entries = self.iter();
let mut out = String::from("CharDataTable::Direct(&[\n");
let mut out = String::from("CharMap {\n");

for (char, property) in entries {
writeln!(
out,
" ('{}', {}),",
char.escape_unicode(),
DisplayWrapper(property, &display_fn)
).expect("`String` `Write` failed");
out.push_str(" chars: &[\n");
for ch in self.keys() {
writeln!(out, " '{}',", ch.escape_unicode()).expect("`String` `Write` failed");
}
out.push_str(" ],\n");

out.push_str("])");
out.push_str(" values: &[\n");
for val in self.values() {
writeln!(out, " {},", DisplayWrapper(val, &display_fn))
.expect("`String` `Write` failed");
}
out.push_str(" ],\n");

out.push_str("}");
out
}
}
Expand All @@ -61,14 +64,24 @@ mod test {
assert_eq!(
map.to_direct_char_table(Display::fmt),
"\
CharDataTable::Direct(&[
('\\u{61}', A),
('\\u{62}', B),
('\\u{63}', C),
('\\u{78}', X),
('\\u{79}', Y),
('\\u{7a}', Z),
])"
CharMap {
chars: &[
'\\u{61}',
'\\u{62}',
'\\u{63}',
'\\u{78}',
'\\u{79}',
'\\u{7a}',
],
values: &[
A,
B,
C,
X,
Y,
Z,
],
}"
);
}
}
72 changes: 44 additions & 28 deletions gen/src/writer/utils/tables/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use super::DisplayWrapper;
use std::collections::BTreeMap;
use std::fmt::{self, Write};

use super::DisplayWrapper;

/// Create the source for a `CharDataTable`, using `CharRange`s to deduplicate data.
pub trait ToRangeCharTable<T: Eq> {
/// Convert this mapping to a `String`.
Expand All @@ -26,38 +25,48 @@ impl<T: Eq> ToRangeCharTable<T> for BTreeMap<char, T> {
where
F: Fn(&T, &mut fmt::Formatter) -> fmt::Result,
{
let mut entries = self.iter();
let mut out = String::from("CharDataTable::Range(&[\n");
let mut range_map = vec![];

let mut entries = self.iter();
if let Some((&low, mut value)) = entries.next() {
let (mut low, mut high) = (low, low);

let append_entry = |out: &mut String, low: char, high: char, c: &T| {
writeln!(
out,
" (chars!('{}'..='{}'), {}),",
low.escape_unicode(),
high.escape_unicode(),
DisplayWrapper(c, &display_fn),
).expect("`String` `Write` failed");
};

for (&char, property) in entries {
if property != value || (char as u32) > (high as u32 + 1) {
append_entry(&mut out, low, high, value);
low = char;
high = char;
for (&codepoint, property) in entries {
if property != value || (codepoint as u32) > (high as u32 + 1) {
range_map.push(((low, high), value));
low = codepoint;
high = codepoint;
value = property;
} else {
assert_eq!(char as u32, high as u32 + 1);
high = char;
assert_eq!(codepoint as u32, high as u32 + 1);
high = codepoint;
}
}

append_entry(&mut out, low, high, value);
range_map.push(((low, high), value));
}

let mut out = String::from("CharRangeMap {\n");

out.push_str(" ranges: &[\n");
for &((low, high), _) in range_map.iter() {
writeln!(
out,
" chars!('{}'..='{}'),",
low.escape_unicode(),
high.escape_unicode(),
).expect("`String` `Write` failed");
}
out.push_str(" ],\n");

out.push_str(" values: &[\n");
for &(_, val) in range_map.iter() {
writeln!(out, " {},", DisplayWrapper(val, &display_fn))
.expect("`String` `Write` failed");
}
out.push_str(" ],\n");

out.push_str("])");
out.push_str("}");
out
}
}
Expand All @@ -83,11 +92,18 @@ mod test {
assert_eq!(
map.to_range_char_table(Display::fmt),
"\
CharDataTable::Range(&[
(chars!('\\u{61}'..='\\u{63}'), Low),
(chars!('\\u{64}'..='\\u{66}'), Mid),
(chars!('\\u{78}'..='\\u{7a}'), High),
])"
CharRangeMap {
ranges: &[
chars!('\\u{61}'..='\\u{63}'),
chars!('\\u{64}'..='\\u{66}'),
chars!('\\u{78}'..='\\u{7a}'),
],
values: &[
Low,
Mid,
High,
],
}"
);
}
}
64 changes: 40 additions & 24 deletions gen/src/writer/utils/tables/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,46 @@ pub trait ToRangeCharSet {

impl ToRangeCharSet for BTreeSet<char> {
fn to_range_char_set(&self) -> String {
let mut entries = self.iter();
let mut out = String::from("CharDataTable::Range(&[\n");
let mut range_map = vec![];

let mut entries = self.iter();
if let Some(&low) = entries.next() {
let (mut low, mut high) = (low, low);

let append_entry = |out: &mut String, low: char, high: char| {
writeln!(
out,
" (chars!('{}'..='{}'), ()),",
low.escape_unicode(),
high.escape_unicode(),
).expect("`String` `Write` failed");
};

for &char in entries {
if (char as u32) > (high as u32 + 1) {
append_entry(&mut out, low, high);
low = char;
high = char;
for &codepoint in entries {
if (codepoint as u32) > (high as u32 + 1) {
range_map.push((low, high));
low = codepoint;
high = codepoint;
} else {
assert_eq!(char as u32, high as u32 + 1);
high = char;
assert_eq!(codepoint as u32, high as u32 + 1);
high = codepoint;
}
}

append_entry(&mut out, low, high);
range_map.push((low, high));
}

let mut out = String::from("CharRangeMap {\n");

out.push_str(" ranges: &[\n");
for &(low, high) in range_map.iter() {
writeln!(
out,
" chars!('{}'..='{}'),",
low.escape_unicode(),
high.escape_unicode(),
).expect("`String` `Write` failed");
}
out.push_str(" ],\n");

out.push_str(" values: &[\n");
for _ in range_map.iter() {
out.push_str(" (),\n");
}
out.push_str(" ],\n");

out.push_str("])");
out.push_str("}");
out
}
}
Expand All @@ -72,10 +82,16 @@ mod test {
assert_eq!(
set.to_range_char_set(),
"\
CharDataTable::Range(&[
(chars!('\\u{61}'..='\\u{66}'), ()),
(chars!('\\u{78}'..='\\u{7a}'), ()),
])"
CharRangeMap {
ranges: &[
chars!('\\u{61}'..='\\u{66}'),
chars!('\\u{78}'..='\\u{7a}'),
],
values: &[
(),
(),
],
}"
);
}
}
1 change: 0 additions & 1 deletion unic/char/property/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
//!
//! * [PropertyAliases.txt](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt).

#[macro_use]
extern crate unic_char_range;

mod pkg_info;
Expand Down
5 changes: 3 additions & 2 deletions unic/char/property/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,9 @@ macro_rules! char_property {
impl $prop_name {
/// Get (struct) property value of the character.
pub fn of(ch: char) -> Self {
use $crate::tables::CharDataTable;
const TABLE: CharDataTable<()> = include!($data_path);
// TODO(CAD97): Force BoolTrie (currently RangeMap) or allow specification?
use $crate::tables::CharRangeMap;
const TABLE: CharRangeMap<()> = include!($data_path);
$prop_name(TABLE.contains(ch))
}

Expand Down
Loading