-
Notifications
You must be signed in to change notification settings - Fork 1
/
smi.go
153 lines (130 loc) · 2.66 KB
/
smi.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// Copyright 2013-2015, Homin Lee. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package subtitle
import (
"io"
"strconv"
"strings"
"time"
"golang.org/x/net/html"
)
var nextTs time.Duration
var lastBr bool
// State represents current parsing state
type State uint8
// StateMachine to parse SAMI subtitle
const (
StateFindTag State = iota
StateIdle
StateSync
StateText
StateBr
)
func (s State) String() string {
switch s {
case StateFindTag:
return "StateFindTag"
case StateIdle:
return "StateIdle"
case StateSync:
return "StateSync"
case StateText:
return "StateText"
case StateBr:
return "StateBr"
}
return "StateUnknown"
}
// ReadSmi read smi scripts from data stream
func ReadSmi(r io.Reader) (book Book, err error) {
z := html.NewTokenizer(r)
var state State
var raw string
var t html.Token
stateLoop:
for {
switch state {
case StateFindTag:
tt := z.Next()
if tt == html.ErrorToken {
ttErr := z.Err()
if ttErr == io.EOF {
break stateLoop
}
return nil, ttErr
}
raw = string(z.Raw())
t = z.Token()
// log.Printf("RAW: \"%s\"\n", raw)
// log.Printf("TKN: %v, \"%v\"\n", t.Type, t.Data)
// for _, v := range t.Attr {
// log.Printf(" %v: %v, ", v.Key, v.Val)
// }
if strings.TrimSpace(raw) == "" {
continue
}
// select state
switch {
case t.Type == html.StartTagToken && t.Data == "sync":
state = StateSync
case t.Type == html.TextToken:
state = StateText
case t.Type == html.StartTagToken && t.Data == "br":
state = StateBr
}
continue
case StateSync:
if len(t.Attr) < 1 {
panic("sync tag should have start attr")
}
ts, err := strconv.Atoi(t.Attr[0].Val)
if err != nil {
panic(err)
}
nextTs = time.Duration(ts) * time.Millisecond
state = StateFindTag
continue
case StateText:
s := strings.TrimSpace(t.Data)
// remove html comment
if strings.HasPrefix(s, "<!--") && strings.HasSuffix(s, "-->") {
state = StateFindTag
continue
}
if lastBr {
lastBr = false
if len(book) > 0 {
ls := &book[len(book)-1]
ls.Text += "\n"
ls.Text += s
}
state = StateFindTag
continue
}
// Blank to erase screen
// Or last End of script is empty
if len(book) > 0 {
ls := &book[len(book)-1]
if ls.End == 0 {
ls.End = nextTs
}
}
// Text with contents
if s != "" {
scr := Script{
Text: s,
Start: nextTs,
}
book = append(book, scr)
}
state = StateFindTag
continue
case StateBr:
lastBr = true
state = StateFindTag
continue
}
}
return
}