Skip to content

Commit baaed8c

Browse files
authored
重写 remove_emoji_overlap.py
1 parent 1153c60 commit baaed8c

1 file changed

Lines changed: 38 additions & 20 deletions

File tree

script/remove_emoji_overlap.py

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,46 @@
22
# -*- coding: utf-8 -*-
33

44
"""
5-
Remove glyphs from target fonts that overlap with NotoColorEmoji.ttf
5+
Remove glyphs from target fonts that overlap with emoji defined in all.toml
66
77
Logic:
8-
1. Read all Unicode codepoints from NotoColorEmoji.ttf
8+
1. Parse all.toml and extract Unicode codepoints from svg filenames
99
2. Remove these codepoints from target fonts
10-
3. Automatically clean unused glyphs via fontTools Subsetter
10+
3. Clean unused glyphs via fontTools Subsetter
1111
"""
1212

1313
import sys
14+
import re
1415
from fontTools.ttLib import TTFont
1516
from fontTools.subset import Subsetter, Options
1617

1718

18-
def get_codepoints(font_path):
19+
def parse_all_toml(toml_path):
1920
"""
20-
Extract all Unicode codepoints from a font
21+
Extract emoji Unicode codepoints from all.toml
2122
"""
23+
emoji_codepoints = set()
24+
25+
with open(toml_path, "r", encoding="utf-8") as f:
26+
for line in f:
27+
# match: emoji_uXXXX.svg or emoji_uXXXX_XXXX.svg
28+
m = re.search(r'emoji_u([0-9a-fA-F_]+)\.svg', line)
29+
if not m:
30+
continue
31+
32+
sequence = m.group(1)
33+
parts = sequence.split("_")
34+
35+
for p in parts:
36+
try:
37+
emoji_codepoints.add(int(p, 16))
38+
except ValueError:
39+
pass
40+
41+
return emoji_codepoints
42+
43+
44+
def get_font_codepoints(font_path):
2245
font = TTFont(font_path)
2346
codepoints = set()
2447

@@ -30,25 +53,20 @@ def get_codepoints(font_path):
3053

3154

3255
def remove_overlap(source_font_path, emoji_codepoints, output_path):
33-
"""
34-
Remove overlapping Unicode codepoints from source font
35-
"""
3656
font = TTFont(source_font_path)
3757

38-
# Collect existing codepoints in source font
3958
existing_codepoints = set()
4059
for table in font["cmap"].tables:
4160
existing_codepoints.update(table.cmap.keys())
4261

43-
# Determine which codepoints to keep
62+
overlap = existing_codepoints & emoji_codepoints
4463
keep_codepoints = existing_codepoints - emoji_codepoints
4564

4665
print(f"\nProcessing: {source_font_path}")
4766
print(f"Total glyphs before: {len(existing_codepoints)}")
48-
print(f"Removing: {len(existing_codepoints & emoji_codepoints)}")
49-
print(f"Keeping: {len(keep_codepoints)}")
67+
print(f"Emoji removed: {len(overlap)}")
68+
print(f"Remaining: {len(keep_codepoints)}")
5069

51-
# Configure subsetter
5270
options = Options()
5371
options.set(layout_features='*')
5472
options.recalc_average_width = True
@@ -62,21 +80,21 @@ def remove_overlap(source_font_path, emoji_codepoints, output_path):
6280
font.save(output_path)
6381
font.close()
6482

65-
print(f"Saved to: {output_path}")
83+
print(f"Font saved to: {output_path}")
6684

6785

6886
def main():
69-
if len(sys.argv) < 4:
87+
if len(sys.argv) < 3:
7088
print("Usage:")
71-
print("python remove_emoji_overlap.py NotoColorEmoji.ttf target1.otf target2.otf ...")
89+
print("python remove_emoji_overlap.py all.toml target1.otf target2.otf ...")
7290
sys.exit(1)
7391

74-
emoji_font_path = sys.argv[1]
92+
toml_path = sys.argv[1]
7593
target_fonts = sys.argv[2:]
7694

77-
print("Reading emoji font...")
78-
emoji_codepoints = get_codepoints(emoji_font_path)
79-
print(f"Emoji codepoints found: {len(emoji_codepoints)}")
95+
print("Parsing emoji list from all.toml...")
96+
emoji_codepoints = parse_all_toml(toml_path)
97+
print(f"Total emoji codepoints from TOML: {len(emoji_codepoints)}")
8098

8199
for font_path in target_fonts:
82100
output_path = font_path.replace(".otf", "_noEmoji.otf").replace(".ttf", "_noEmoji.ttf")

0 commit comments

Comments
 (0)