22# -*- coding: utf-8 -*-
33
44"""
5- Remove glyphs from target fonts that overlap with NotoColorEmoji.ttf
5+ Remove glyphs from target fonts that overlap with emoji defined in all.toml
66
77Logic:
8- 1. Read all Unicode codepoints from NotoColorEmoji.ttf
8+ 1. Parse all.toml and extract Unicode codepoints from svg filenames
992. Remove these codepoints from target fonts
10- 3. Automatically clean unused glyphs via fontTools Subsetter
10+ 3. Clean unused glyphs via fontTools Subsetter
1111"""
1212
1313import sys
14+ import re
1415from fontTools .ttLib import TTFont
1516from fontTools .subset import Subsetter , Options
1617
1718
18- def get_codepoints ( font_path ):
19+ def parse_all_toml ( toml_path ):
1920 """
20- Extract all Unicode codepoints from a font
21+ Extract emoji Unicode codepoints from all.toml
2122 """
23+ emoji_codepoints = set ()
24+
25+ with open (toml_path , "r" , encoding = "utf-8" ) as f :
26+ for line in f :
27+ # match: emoji_uXXXX.svg or emoji_uXXXX_XXXX.svg
28+ m = re .search (r'emoji_u([0-9a-fA-F_]+)\.svg' , line )
29+ if not m :
30+ continue
31+
32+ sequence = m .group (1 )
33+ parts = sequence .split ("_" )
34+
35+ for p in parts :
36+ try :
37+ emoji_codepoints .add (int (p , 16 ))
38+ except ValueError :
39+ pass
40+
41+ return emoji_codepoints
42+
43+
44+ def get_font_codepoints (font_path ):
2245 font = TTFont (font_path )
2346 codepoints = set ()
2447
@@ -30,25 +53,20 @@ def get_codepoints(font_path):
3053
3154
3255def remove_overlap (source_font_path , emoji_codepoints , output_path ):
33- """
34- Remove overlapping Unicode codepoints from source font
35- """
3656 font = TTFont (source_font_path )
3757
38- # Collect existing codepoints in source font
3958 existing_codepoints = set ()
4059 for table in font ["cmap" ].tables :
4160 existing_codepoints .update (table .cmap .keys ())
4261
43- # Determine which codepoints to keep
62+ overlap = existing_codepoints & emoji_codepoints
4463 keep_codepoints = existing_codepoints - emoji_codepoints
4564
4665 print (f"\n Processing: { source_font_path } " )
4766 print (f"Total glyphs before: { len (existing_codepoints )} " )
48- print (f"Removing : { len (existing_codepoints & emoji_codepoints )} " )
49- print (f"Keeping : { len (keep_codepoints )} " )
67+ print (f"Emoji removed : { len (overlap )} " )
68+ print (f"Remaining : { len (keep_codepoints )} " )
5069
51- # Configure subsetter
5270 options = Options ()
5371 options .set (layout_features = '*' )
5472 options .recalc_average_width = True
@@ -62,21 +80,21 @@ def remove_overlap(source_font_path, emoji_codepoints, output_path):
6280 font .save (output_path )
6381 font .close ()
6482
65- print (f"Saved to: { output_path } " )
83+ print (f"Font saved to: { output_path } " )
6684
6785
6886def main ():
69- if len (sys .argv ) < 4 :
87+ if len (sys .argv ) < 3 :
7088 print ("Usage:" )
71- print ("python remove_emoji_overlap.py NotoColorEmoji.ttf target1.otf target2.otf ..." )
89+ print ("python remove_emoji_overlap.py all.toml target1.otf target2.otf ..." )
7290 sys .exit (1 )
7391
74- emoji_font_path = sys .argv [1 ]
92+ toml_path = sys .argv [1 ]
7593 target_fonts = sys .argv [2 :]
7694
77- print ("Reading emoji font ..." )
78- emoji_codepoints = get_codepoints ( emoji_font_path )
79- print (f"Emoji codepoints found : { len (emoji_codepoints )} " )
95+ print ("Parsing emoji list from all.toml ..." )
96+ emoji_codepoints = parse_all_toml ( toml_path )
97+ print (f"Total emoji codepoints from TOML : { len (emoji_codepoints )} " )
8098
8199 for font_path in target_fonts :
82100 output_path = font_path .replace (".otf" , "_noEmoji.otf" ).replace (".ttf" , "_noEmoji.ttf" )
0 commit comments