-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathytanno2ass.py
More file actions
420 lines (361 loc) · 16.2 KB
/
ytanno2ass.py
File metadata and controls
420 lines (361 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# ytanno2ass.py
# Paul <paulguy on GitHub>
# This code is in the public domain
import xml.etree.ElementTree as ET
from Tkinter import * #needed for fonts
import tkFont
import re
import sys
defaultFont = "Arial"
opaque = 0
translucent = 127
transparent = 255
boxBorders = "000000"
defaultBorderWidth = 1
#sometimes there's no appearance tag
speechDefaultEffects = ""
speechDefaultTextSize = 12 #probably wrong
speechDefaultFGColor = "000000"
speechDefaultBGColor = "FFFFFF"
speechDefaultBGAlpha = 1
# Try experimenting with WrapStyle. Not super interested in perfect text placement but jsut getting it in the box
ASSHeader = """[Script Info]
Title: YouTube Annotations
ScriptType: v4.00+
WrapStyle: 1
ScaleBorderAndShadow: yes
YCbCr Matrix: None
"""
videoResKeys = ("PlayResX: ", "PlayResY: ")
# May need more values here but let's stay lean for now
styleHeading = """[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
#size is going to be overridden every time so this value is meaningless. Font is totally meaningless for purely vector draws
styles = """Style: def,""" + defaultFont + """,12,&H00000000,&H00000000,&HFF000000,&HEE000000,0,0,0,0,100,100,0,0,0,0,2,7,0,0,0,1
"""
eventsHeading = """[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
eventKey = "Dialogue: "
def makeASSHeader(width, height):
return ASSHeader + videoResKeys[0] + str(width) + "\n" + videoResKeys[1] \
+ str(height) + "\n\n" + styleHeading + styles + "\n" + eventsHeading
# just place everything at a 0, 0 margin since it doesn't seem to work reliably
def makeASSEvent(num, start, end, text):
return eventKey + str(num) + "," + start + "," + end + ",def,,0,0,0,," + text + "\n"
def makeASSBox(x, y, w, h):
# top left -> top right -> bottom right -> bottom left -> return to top left
return "m " + str(x) + " " + str(y) \
+ " l " + str(x + w) + " " + str(y) + " " \
+ str(x + w) + " " + str(y + h) + " " \
+ str(x) + " " + str(y + h) + " " \
+ str(x) + " " + str(y)
def eightBitToHex(val):
lookup = "0123456789ABCDEF"
return lookup[val / 16] + lookup[val % 16]
#these return unicode byte arrays for writing to a file
def makeASSBoxWithStyle(x, y, w, h, bcolor, balpha, bsize, fcolor, falpha, keepopen=False):
#set colors and draw a box
return ("{\\3a&H" + eightBitToHex(balpha) + "\\1a&H" + eightBitToHex(falpha) + "\\1c&H" + fcolor + "\\3c&H" + bcolor + "\\bord" + str(bsize) \
+ "\\p1}" + makeASSBox(x, y, w, h) + "{\\p0}").encode('utf8', errors='replace')
def makeASSTextWithStyle(text, x, y, color, size,):
return ("{\\pos(" + str(x) + "," + str(y) + ")\\1c&H" + color + "\\fs" + str(int(size)) + "}" + text).encode('utf8', errors='replace')
def wordWrap2(text, size, width):
font = tkFont.Font(family = defaultFont, size = int(size), weight = tkFont.NORMAL, slant = tkFont.ROMAN)
lines = list()
width *= 2 #awful assumption but seems to help a bit...
spcwidth = font.measure(" ")
wordsl = text.split()
words = list()
for word in wordsl:
words.append((word, font.measure(word)))
line = ""
linewidth = 0
while len(words) > 0:
#print(words)
if linewidth + words[0][1] <= width:
line += words[0][0] # add word to line
linewidth += words[0][1]
if linewidth + words[0][1] + spcwidth > width: #if a space won't fit, we're at the end
linewidth += spcwidth
#print(line)
lines.append(line) # add the line to the list
line = "" # clear the line
linewidth = 0
else:
line += " " # add a space
del words[0] # delete the word from the list
elif linewidth == 0 and words[0][1] > width: #cut up a too long word to lines
#print("too long")
minlen = 0 #we start with the entire string being a possibility
maxlen = len(words[0][0])
curlen = maxlen
while True:
changed = 0
#shrink string in half increments until it fits
while font.measure(words[0][0][:curlen]) > width:
changed = 1
maxlen = curlen # string still doesn't fit, so invalidate possibilities that don't fit
if curlen == minlen + 1:
break
curlen -= ((curlen - minlen) / 2) # halfway between min and maximum validated size
#print("%d" % curlen)
#at this point, max length is the previous size before the last halving
#we've halved the string until it fits, now try growing the string in 1/2 increments between min and max until it's too big
#print("%d %d" % (curlen, maxlen))
#curlen wasn't greater, so it's the new largest value we know fits
while font.measure(words[0][0][:curlen]) < width:
changed = 1
minlen = curlen # string still fits so invalidate possibilities that we know are too short
if curlen == maxlen - 1:
if curlen == 0:
minlen = 1
break
curlen += ((maxlen - curlen) / 2) # halfway between current and max
#print("%d" % curlen)
#print("%d %d" % (minlen, curlen))
if changed == 0 or minlen == maxlen or minlen + 1 == maxlen: #we've found the length that'll fit
lines.append(words[0][0][:minlen]) # create a line with the head
# replace the original word with the tail
# and find the new length of the tail
tail = (words[0][0][minlen:], font.measure(words[0][0][minlen:]))
words[0] = tail
break #we're done
else: # couldn't append next word
#print(line)
lines.append(line) # add the line to the list
line = "" # clear the line
linewidth = 0
if linewidth != 0:
lines.append(line)
return lines
def annoAlphaToASSAlpha(alpha):
if alpha > 0.1:
return translucent
return opaque
def annosToASSFile(annos, assfile, width, height):
assfile.write(makeASSHeader(width, height))
num = 0
for anno in annos:
text = ""
if anno['type'] == 'highlight': #just a box
text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], anno['bgColor'], translucent, anno['highlightWidth'], "000000", transparent)
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
elif anno['type'] == 'text':
if anno['style'] == 'anchored': #a speech bubble with text inside
alpha = annoAlphaToASSAlpha(anno['bgAlpha'])
text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], boxBorders, opaque, defaultBorderWidth, anno['bgColor'], alpha)
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
text = makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
elif anno['style'] == 'popup': #box with text
alpha = annoAlphaToASSAlpha(anno['bgAlpha'])
text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], boxBorders, opaque, defaultBorderWidth, anno['bgColor'], alpha)
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
text = makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
elif anno['style'] == 'title' or anno['style'] == 'highlightText': #just text
text = makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
elif anno['style'] == 'label': #box with bottom-aligned text, just top align it...
alpha = annoAlphaToASSAlpha(anno['bgAlpha'])
text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], anno['bgColor'], alpha, defaultBorderWidth, "000000", transparent)
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
text = "{\\3a&H00" #turn border back on for readability
if anno['fgColor'] == "000000": # as of this writing, youtube only supports black and white so do simple invert
text += "\\3c&HFFFFFF}"
text += makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
num += 1
else:
raise Exception("Unimplemented annotation style")
else:
raise Exception("Unimplemented annotation type")
def getXMLFromFile(filename):
tree = ET.parse(filename)
return tree.getroot()
def videoTimeToMS(time):
if(time == 'never'): # sillyness with highlight type, gets overwritten
return -1
parts = time.split(':')
hours = 0
mins = 0
if(len(parts) == 3):
hours = int(parts[0])
mins = int(parts[1])
msecs = int(float(parts[2]) * 1000)
elif(len(parts) == 2):
mins = int(parts[0])
msecs = int(float(parts[1]) * 1000)
elif(len(parts) == 1):
msecs = int(float(parts[0]) * 1000)
else:
raise Exception("Unrecognized time format")
return (hours * 60 * 60 * 1000) + (mins * 60 * 1000) + msecs
def MSToASSTime(time):
time /= 10 # ASS time is only down to centiseconds, so cut off the thousandths
hours = time / (100 * 60 * 60)
mins = (time - (hours * 100 * 60 * 60)) / (100 * 60)
secs = (time - (hours * 100 * 60 * 60) - (mins * 100 * 60)) / 100
csecs = time % 100
return "%d:%02d:%02d.%02d" % (hours, mins, secs, csecs)
#slow but should be safe. Not terribly speed-critical
def RGBIntToBGRHex(color):
red = color / (256 * 256)
green = (color - (red * 256 * 256)) / 256
blue = color % 256
return eightBitToHex(blue) + eightBitToHex(green) + eightBitToHex(red)
def XMLElementToAnnotationsList(elem, width, height):
annos = list()
if elem.tag != 'document':
raise Exception("root tag isn't 'document'")
xmlannos = elem.find('annotations').findall('annotation')
if xmlannos == None:
raise Exception("didn't find any annotations")
for xmlanno in xmlannos:
anno = dict()
# text - any sort of text annotation
# highlight - just a box
anno['type'] = xmlanno.get('type')
appearance = xmlanno.find('appearance')
if anno['type'] == 'text':
anno['text'] = xmlanno.find('TEXT') # only text types have text.
if anno['text'] == None:
anno['text'] = ""
else:
anno['text'] = anno['text'].text.splitlines()
# popup - big ugly box
# label - box with text at bottom on hover
# highlightText - refers to a highlight type by id, x and y are RELATIVE
# anchored - speech bubble
# title - undecorated text
anno['style'] = xmlanno.get('style') # also the only ones with a style
if anno['style'] == 'anchored':
anno['textSize'] = float(appearance.get('textSize'))
anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
anno['bgAlpha'] = float(appearance.get('bgAlpha'))
elif anno['style'] == 'speech': #like an anchored, but may be missing an appearance
anno['style'] = 'anchored'
if appearance == None:
anno['textSize'] = speechDefaultTextSize
anno['fgColor'] = speechDefaultFGColor
anno['bgColor'] = speechDefaultBGColor
anno['bgAlpha'] = speechDefaultBGAlpha
else:
if 'textSize' not in anno:
anno['textSize'] = speechDefaultTextSize
else:
anno['textSize'] = float(appearance.get('textSize'))
anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
anno['bgAlpha'] = float(appearance.get('bgAlpha'))
elif anno['style'] == 'popup':
anno['effects'] = appearance.get('effects')
anno['textSize'] = float(appearance.get('textSize'))
anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
anno['bgAlpha'] = float(appearance.get('bgAlpha'))
elif anno['style'] == 'title':
anno['textSize'] = float(appearance.get('textSize'))
anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
elif anno['style'] == 'highlightText':
anno['textSize'] = float(appearance.get('textSize'))
anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('highlightFontColor')))
# get id of relative highlight
anno['relativeid'] = xmlanno.find('segment').get('spaceRelative')
elif anno['style'] == 'label':
anno['effects'] = appearance.get('effects')
anno['textSize'] = float(appearance.get('textSize'))
anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('highlightFontColor')))
anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor'))) # this may be wrong
anno['bgAlpha'] = float(appearance.get('bgAlpha'))
else:
print("WARNING: Unsupported style \"%s\"" % anno['style'])
continue
elif anno['type'] == 'highlight':
anno['id'] = xmlanno.get('id') # annotation id, used by highlight
anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
anno['bgAlpha'] = float(appearance.get('borderAlpha'))
anno['highlightWidth'] = float(appearance.get('highlightWidth'))
else:
print("WARNING: Unsupported type \"%s\"" % anno['type'])
continue
# effects - bevel, dropshadow, textdropshadow
# textSize - text height, 100 = video height?
# fgColor - text color
# bgColor - box color
# bgAlpha - solid is almost 0 and transparent is nonzero?
# highlightWidth - box line width for highlights
if anno['type'] == 'text' and anno['style'] == 'anchored': # speech bubble ones use a different name
annoregion = xmlanno.find('segment').find('movingRegion').findall('anchoredRegion')
anno['sx'] = float(annoregion[0].get('sx')) # TODO figure this out, speech bubble pointer location
anno['sy'] = float(annoregion[0].get('sy'))
else:
annoregion = xmlanno.find('segment').find('movingRegion').findall('rectRegion')
anno['x'] = int(float(annoregion[0].get('x')) / 100 * width) # location. all location values seem to be from 0 to 100
anno['y'] = int(float(annoregion[0].get('y')) / 100 * height) # 0,0 being top left, 100,100 being bottom right
anno['w'] = int(float(annoregion[0].get('w')) / 100 * width) # size
anno['h'] = int(float(annoregion[0].get('h')) / 100 * height)
anno['start'] = videoTimeToMS(annoregion[0].get('t')) # start and end time in video
anno['end'] = videoTimeToMS(annoregion[1].get('t'))
action = xmlanno.find('action')
if action != None and action.get('type') == 'openUrl': # get URLs to place on link annotations
anno['link'] = action.find('url').get('value')
annos.append(anno)
print("%s" % anno['type'])
if anno['type'] == 'text':
print("%s \"%s\"" % (anno['style'], anno['text']))
#resolve highlights and make relative values absolute, copy time to highlightText
for anno in annos:
if anno['type'] == 'text' and anno['style'] == 'highlightText':
if anno['relativeid'] == None or anno['relativeid'] == "":
raise Exception("No spaceRelative for highlightText")
relanno = None
for findanno in annos:
if 'id' in findanno and findanno['id'] == anno['relativeid']:
relanno = findanno
if relanno == None:
raise Exception("highlightText refers to id that does not exist")
anno['x'] = relanno['x'] + anno['x']
anno['y'] = relanno['y'] + anno['y']
anno['start'] = relanno['start']
anno['end'] = relanno['end']
#sort annotations by start time
annos.sort(key = lambda x: x['start'])
#convert times to ASS times h:MM:SS.CC, scale font heights
root = Tk() # have the window open as short a time as possible
for anno in annos:
anno['start'] = MSToASSTime(anno['start'])
anno['end'] = MSToASSTime(anno['end'])
if anno['type'] == 'text':
anno['textSize'] = anno['textSize'] / 100 * height
#also wrap text. This part is ugly and requires creating a window
if anno['type'] == 'text':
lines = list()
for text in anno['text']:
lines.extend(wordWrap2(text, anno['textSize'], anno['w']))
anno['text'] = lines
newtext = ""
for line in enumerate(anno['text']):
newtext += line[1]
if line[0] < len(anno['text']) - 1: # don't add new line to last line
newtext += "\\N"
anno['text'] = newtext
root.destroy()
return annos
if len(sys.argv) != 4:
print("USAGE: ytanno2ass.py <file> <width> <height>")
else:
root = getXMLFromFile(sys.argv[1])
annos = XMLElementToAnnotationsList(root, int(sys.argv[2]), int(sys.argv[3]))
with open("%s.ass" % sys.argv[1], "w") as assfile:
annosToASSFile(annos, assfile, int(sys.argv[2]), int(sys.argv[3]))