minitools/ytanno2ass.py at master · paulguy/minitools · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# ytanno2ass.py
# Paul <paulguy on GitHub>
# This code is in the public domain

import xml.etree.ElementTree as ET
from Tkinter import * #needed for fonts
import tkFont
import re
import sys

defaultFont = "Arial"
opaque = 0
translucent = 127
transparent = 255
boxBorders = "000000"
defaultBorderWidth = 1

#sometimes there's no appearance tag
speechDefaultEffects = ""
speechDefaultTextSize = 12 #probably wrong
speechDefaultFGColor = "000000"
speechDefaultBGColor = "FFFFFF"
speechDefaultBGAlpha = 1

# Try experimenting with WrapStyle.  Not super interested in perfect text placement but jsut getting it in the box
ASSHeader = """[Script Info]
Title: YouTube Annotations
ScriptType: v4.00+
WrapStyle: 1
ScaleBorderAndShadow: yes
YCbCr Matrix: None
"""
videoResKeys = ("PlayResX: ", "PlayResY: ")
# May need more values here but let's stay lean for now
styleHeading = """[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
#size is going to be overridden every time so this value is meaningless.  Font is totally meaningless for purely vector draws
styles = """Style: def,""" + defaultFont + """,12,&H00000000,&H00000000,&HFF000000,&HEE000000,0,0,0,0,100,100,0,0,0,0,2,7,0,0,0,1
"""
eventsHeading = """[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
eventKey = "Dialogue: "


def makeASSHeader(width, height):
  return ASSHeader + videoResKeys[0] + str(width) + "\n" + videoResKeys[1] \
    + str(height) + "\n\n" + styleHeading + styles + "\n" + eventsHeading


# just place everything at a 0, 0 margin since it doesn't seem to work reliably
def makeASSEvent(num, start, end, text):
  return eventKey + str(num) + "," + start + "," + end + ",def,,0,0,0,," + text + "\n"


def makeASSBox(x, y, w, h):
  # top left -> top right -> bottom right -> bottom left -> return to top left
  return "m " + str(x) + " " + str(y) \
	 + " l " + str(x + w) + " " + str(y) + " " \
	 + str(x + w) + " " + str(y + h) + " " \
	 + str(x) + " " + str(y + h) + " " \
	 + str(x) + " " + str(y)


def eightBitToHex(val):
    lookup = "0123456789ABCDEF"
    return lookup[val / 16] + lookup[val % 16]


#these return unicode byte arrays for writing to a file
def makeASSBoxWithStyle(x, y, w, h, bcolor, balpha, bsize, fcolor, falpha, keepopen=False):
  #set colors and draw a box
  return ("{\\3a&H" + eightBitToHex(balpha) + "\\1a&H" + eightBitToHex(falpha) + "\\1c&H" + fcolor + "\\3c&H" + bcolor + "\\bord" + str(bsize) \
	   + "\\p1}" + makeASSBox(x, y, w, h) + "{\\p0}").encode('utf8', errors='replace')


def makeASSTextWithStyle(text, x, y, color, size,):
  return ("{\\pos(" + str(x) + "," + str(y) + ")\\1c&H" + color + "\\fs" + str(int(size)) + "}" + text).encode('utf8', errors='replace')


def wordWrap2(text, size, width):
  font = tkFont.Font(family = defaultFont, size = int(size), weight = tkFont.NORMAL, slant = tkFont.ROMAN)
  lines = list()
  width *= 2 #awful assumption but seems to help a bit...

  spcwidth = font.measure(" ")
  wordsl = text.split()

  words = list()
  for word in wordsl:
    words.append((word, font.measure(word)))

  line = ""
  linewidth = 0
  while len(words) > 0:
    #print(words)
    if linewidth + words[0][1] <= width:
      line += words[0][0] # add word to line
      linewidth += words[0][1]
      if linewidth + words[0][1] + spcwidth > width: #if a space won't fit, we're at the end
	linewidth += spcwidth
	#print(line)
	lines.append(line) # add the line to the list
	line = "" # clear the line
	linewidth = 0
      else:
	line += " " # add a space
      del words[0] # delete the word from the list
    elif linewidth == 0 and words[0][1] > width: #cut up a too long word to lines
      #print("too long")
      minlen = 0 #we start with the entire string being a possibility
      maxlen = len(words[0][0])
      curlen = maxlen
      while True:
	changed = 0
	#shrink string in half increments until it fits
	while font.measure(words[0][0][:curlen]) > width:
	  changed = 1
	  maxlen = curlen # string still doesn't fit, so invalidate possibilities that don't fit
	  if curlen == minlen + 1:
	    break
	  curlen -= ((curlen - minlen) / 2) # halfway between min and maximum validated size
	  #print("%d" % curlen)
	#at this point, max length is the previous size before the last halving
	#we've halved the string until it fits, now try growing the string in 1/2 increments between min and max until it's too big
	#print("%d %d" % (curlen, maxlen))
	#curlen wasn't greater, so it's the new largest value we know fits
	while font.measure(words[0][0][:curlen]) < width:
	  changed = 1
	  minlen = curlen # string still fits so invalidate possibilities that we know are too short
	  if curlen == maxlen - 1:
	    if curlen == 0:
	      minlen = 1
	    break
	  curlen += ((maxlen - curlen) / 2) # halfway between current and max
	  #print("%d" % curlen)
	#print("%d %d" % (minlen, curlen))
	if changed == 0 or minlen == maxlen or minlen + 1 == maxlen: #we've found the length that'll fit
	  lines.append(words[0][0][:minlen]) # create a line with the head
	  # replace the original word with the tail
	  # and find the new length of the tail
	  tail = (words[0][0][minlen:], font.measure(words[0][0][minlen:]))
	  words[0] = tail
	  break #we're done
    else: # couldn't append next word
      #print(line)
      lines.append(line) # add the line to the list
      line = "" # clear the line
      linewidth = 0
  if linewidth != 0:
    lines.append(line)

  return lines


def annoAlphaToASSAlpha(alpha):
  if alpha > 0.1:
    return translucent
  return opaque


def annosToASSFile(annos, assfile, width, height):
  assfile.write(makeASSHeader(width, height))

  num = 0
  for anno in annos:
    text = ""
    if anno['type'] == 'highlight': #just a box
      text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], anno['bgColor'], translucent, anno['highlightWidth'], "000000", transparent)
      assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
      num += 1
    elif anno['type'] == 'text':
      if anno['style'] == 'anchored': #a speech bubble with text inside
	alpha = annoAlphaToASSAlpha(anno['bgAlpha'])
	text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], boxBorders, opaque, defaultBorderWidth, anno['bgColor'], alpha)
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
	text = makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
      elif anno['style'] == 'popup': #box with text
	alpha = annoAlphaToASSAlpha(anno['bgAlpha'])
	text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], boxBorders, opaque, defaultBorderWidth, anno['bgColor'], alpha)
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
	text = makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
      elif anno['style'] == 'title' or anno['style'] == 'highlightText': #just text
	text = makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
      elif anno['style'] == 'label': #box with bottom-aligned text, just top align it...
	alpha = annoAlphaToASSAlpha(anno['bgAlpha'])
	text = makeASSBoxWithStyle(anno['x'], anno['y'], anno['w'], anno['h'], anno['bgColor'], alpha, defaultBorderWidth, "000000", transparent)
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
	text = "{\\3a&H00" #turn border back on for readability
	if anno['fgColor'] == "000000": # as of this writing, youtube only supports black and white so do simple invert
	  text += "\\3c&HFFFFFF}"
	text += makeASSTextWithStyle(anno['text'], anno['x'], anno['y'], anno['fgColor'], anno['textSize'])
	assfile.write(makeASSEvent(num, anno['start'], anno['end'], text))
	num += 1
      else:
	raise Exception("Unimplemented annotation style")
    else:
      raise Exception("Unimplemented annotation type")


def getXMLFromFile(filename):
  tree = ET.parse(filename)
  return tree.getroot()


def videoTimeToMS(time):
  if(time == 'never'): # sillyness with highlight type, gets overwritten
    return -1

  parts = time.split(':')
  hours = 0
  mins = 0

  if(len(parts) == 3):
    hours = int(parts[0])
    mins = int(parts[1])
    msecs = int(float(parts[2]) * 1000)
  elif(len(parts) == 2):
    mins = int(parts[0])
    msecs = int(float(parts[1]) * 1000)
  elif(len(parts) == 1):
    msecs = int(float(parts[0]) * 1000)
  else:
    raise Exception("Unrecognized time format")

  return (hours * 60 * 60 * 1000) + (mins * 60 * 1000) + msecs


def MSToASSTime(time):
  time /= 10 # ASS time is only down to centiseconds, so cut off the thousandths
  hours = time / (100 * 60 * 60)
  mins = (time - (hours * 100 * 60 * 60)) / (100 * 60)
  secs = (time - (hours * 100 * 60 * 60) - (mins * 100 * 60)) / 100
  csecs = time % 100

  return "%d:%02d:%02d.%02d" % (hours, mins, secs, csecs)


#slow but should be safe.  Not terribly speed-critical
def RGBIntToBGRHex(color):
  red = color / (256 * 256)
  green = (color - (red * 256 * 256)) / 256
  blue = color % 256

  return eightBitToHex(blue) + eightBitToHex(green) + eightBitToHex(red)


def XMLElementToAnnotationsList(elem, width, height):
  annos = list()

  if elem.tag != 'document':
    raise Exception("root tag isn't 'document'")

  xmlannos = elem.find('annotations').findall('annotation')
  if xmlannos == None:
    raise Exception("didn't find any annotations")

  for xmlanno in xmlannos:
    anno = dict()

    # text - any sort of text annotation
    # highlight - just a box
    anno['type'] = xmlanno.get('type')

    appearance = xmlanno.find('appearance')

    if anno['type'] == 'text':
      anno['text'] = xmlanno.find('TEXT') # only text types have text.
      if anno['text'] == None:
	anno['text'] = ""
      else:
	anno['text'] = anno['text'].text.splitlines()

      # popup - big ugly box
      # label - box with text at bottom on hover
      # highlightText - refers to a highlight type by id, x and y are RELATIVE
      # anchored - speech bubble
      # title - undecorated text
      anno['style'] = xmlanno.get('style') # also the only ones with a style
      if anno['style'] == 'anchored':
	anno['textSize'] = float(appearance.get('textSize'))
	anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
	anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
	anno['bgAlpha'] = float(appearance.get('bgAlpha'))
      elif anno['style'] == 'speech': #like an anchored, but may be missing an appearance
	anno['style'] = 'anchored'
	if appearance == None:
	  anno['textSize'] = speechDefaultTextSize
	  anno['fgColor'] = speechDefaultFGColor
	  anno['bgColor'] = speechDefaultBGColor
	  anno['bgAlpha'] = speechDefaultBGAlpha
	else:
	  if 'textSize' not in anno:
	    anno['textSize'] = speechDefaultTextSize
	  else:
	    anno['textSize'] = float(appearance.get('textSize'))
	  anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
	  anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
	  anno['bgAlpha'] = float(appearance.get('bgAlpha'))
      elif anno['style'] == 'popup':
	anno['effects'] = appearance.get('effects')
	anno['textSize'] = float(appearance.get('textSize'))
	anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
	anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
	anno['bgAlpha'] = float(appearance.get('bgAlpha'))
      elif anno['style'] == 'title':
	anno['textSize'] = float(appearance.get('textSize'))
	anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor')))
      elif anno['style'] == 'highlightText':
	anno['textSize'] = float(appearance.get('textSize'))
	anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('highlightFontColor')))
	# get id of relative highlight
	anno['relativeid'] = xmlanno.find('segment').get('spaceRelative')
      elif anno['style'] == 'label':
	anno['effects'] = appearance.get('effects')
	anno['textSize'] = float(appearance.get('textSize'))
	anno['fgColor'] = RGBIntToBGRHex(int(appearance.get('highlightFontColor')))
	anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('fgColor'))) # this may be wrong
	anno['bgAlpha'] = float(appearance.get('bgAlpha'))
      else:
	print("WARNING: Unsupported style \"%s\"" % anno['style'])
	continue
    elif anno['type'] == 'highlight':
      anno['id'] = xmlanno.get('id') # annotation id, used by highlight
      anno['bgColor'] = RGBIntToBGRHex(int(appearance.get('bgColor')))
      anno['bgAlpha'] = float(appearance.get('borderAlpha'))
      anno['highlightWidth'] = float(appearance.get('highlightWidth'))
    else:
      print("WARNING: Unsupported type \"%s\"" % anno['type'])
      continue
    # effects - bevel, dropshadow, textdropshadow
    # textSize - text height, 100 = video height?
    # fgColor - text color
    # bgColor - box color
    # bgAlpha - solid is almost 0 and transparent is nonzero?
    # highlightWidth - box line width for highlights

    if anno['type'] == 'text' and anno['style'] == 'anchored': # speech bubble ones use a different name
      annoregion = xmlanno.find('segment').find('movingRegion').findall('anchoredRegion')
      anno['sx'] = float(annoregion[0].get('sx')) # TODO figure this out, speech bubble pointer location
      anno['sy'] = float(annoregion[0].get('sy'))
    else:
      annoregion = xmlanno.find('segment').find('movingRegion').findall('rectRegion')
    anno['x'] = int(float(annoregion[0].get('x')) / 100 * width) # location.  all location values seem to be from 0 to 100
    anno['y'] = int(float(annoregion[0].get('y')) / 100 * height) # 0,0 being top left, 100,100 being bottom right
    anno['w'] = int(float(annoregion[0].get('w')) / 100 * width) # size
    anno['h'] = int(float(annoregion[0].get('h')) / 100 * height)
    anno['start'] = videoTimeToMS(annoregion[0].get('t')) # start and end time in video
    anno['end'] = videoTimeToMS(annoregion[1].get('t'))

    action = xmlanno.find('action')
    if action != None and action.get('type') == 'openUrl': # get URLs to place on link annotations
      anno['link'] = action.find('url').get('value')
    annos.append(anno)

    print("%s" % anno['type'])
    if anno['type'] == 'text':
      print("%s \"%s\"" % (anno['style'], anno['text']))

  #resolve highlights and make relative values absolute, copy time to highlightText
  for anno in annos:
    if anno['type'] == 'text' and anno['style'] == 'highlightText':
      if anno['relativeid'] == None or anno['relativeid'] == "":
	raise Exception("No spaceRelative for highlightText")
      relanno = None
      for findanno in annos:
	if 'id' in findanno and findanno['id'] == anno['relativeid']:
	  relanno = findanno
      if relanno == None:
	raise Exception("highlightText refers to id that does not exist")
      anno['x'] = relanno['x'] + anno['x']
      anno['y'] = relanno['y'] + anno['y']
      anno['start'] = relanno['start']
      anno['end'] = relanno['end']

  #sort annotations by start time
  annos.sort(key = lambda x: x['start'])

  #convert times to ASS times h:MM:SS.CC, scale font heights
  root = Tk()  # have the window open as short a time as possible
  for anno in annos:
    anno['start'] = MSToASSTime(anno['start'])
    anno['end'] = MSToASSTime(anno['end'])
    if anno['type'] == 'text':
      anno['textSize'] = anno['textSize'] / 100 * height

    #also wrap text.  This part is ugly and requires creating a window
    if anno['type'] == 'text':
      lines = list()
      for text in anno['text']:
	lines.extend(wordWrap2(text, anno['textSize'], anno['w']))
      anno['text'] = lines

      newtext = ""
      for line in enumerate(anno['text']):
	newtext += line[1]
	if line[0] < len(anno['text']) - 1: # don't add new line to last line
	  newtext += "\\N"
      anno['text'] = newtext
  root.destroy()

  return annos

if len(sys.argv) != 4:
  print("USAGE: ytanno2ass.py <file> <width> <height>")
else:
  root = getXMLFromFile(sys.argv[1])
  annos = XMLElementToAnnotationsList(root, int(sys.argv[2]), int(sys.argv[3]))
  with open("%s.ass" % sys.argv[1], "w") as assfile:
    annosToASSFile(annos, assfile, int(sys.argv[2]), int(sys.argv[3]))