forked from benbjohnson/hashfs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhashfs.go
553 lines (485 loc) · 19.5 KB
/
hashfs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
/*
Package hashfs handles cache-busting of files by adding a hash of each file's
contents to the filename.
# How This Works:
- You provide your files, as an [fs.FS].
- When your binary runs, a hash is calculated of a static file's contents.
- The hash is appended to the file's name.
- The new filename is rewritten into your HTML code.
- When a browser requests a static file, using the filename-with-hash, the underlying
file is looked up and served with aggressive caching headers.
# Usage:
- Call [hashfs.NewFS] before parsing your HTML templates.
- Define a func to call [HFS.GetHashPath], and add it to your [html/template.FuncMap].
- Modify your HTML templates to use the func defined in your [html/template.FuncMap]
for each static file you want to cache-bust.
- Call [hashfs.FileServer] in your HTTP router on the endpoint you serve static
files from.
# Example:
See the example/example.go file in the source repo.
# Example FuncMap func:
func static(originalPath string) (hashPath string) {
//If in development mode, just return path as-is. We will serve a non-cache-
//busted version of the file since during development we refresh the browser
//a lot and don't want to cache things mistakenly.
if devMode {
return originalPath
}
//Trim path, if needed.
//
//For example, if your static files are served off of www.example.co/static/
//and your fs.FS lists files "inside" the /static/ directory from your source
//code repo, you need to remove the /static/ part of the URL to find the
//matching source file. The fs.FS files will not have /static/ in their paths
//since, the fs.FS just contains the files "inside" the /static/ directory.
trimmedPath := strings.TrimPrefix(originalPath, "/static/")
//Get the hashPath. This is where the hash is calculated, if it has not been
//already (this static func was already called on this originalPath when
//another template was being built in this run of your binary).
hashPath := yourHashFS.GetHashPath(trimmedPath)
//Now, we need to add the /static/ back to the path since that is how the
//browser expects it.
return path.Join("/", "static", hashPath)
}
# Definitions:
- original path: the path to the on-disk source file.
- hash path: the path where the filename includes the hash of the file's contents.
- original name: the filename of the on-disk source file.
- hash name: the filename inclusive of the hash of the file's contents.
*/
package hashfs
import (
"crypto"
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"io"
"io/fs"
"net/http"
"os"
"path"
"strconv"
"strings"
"sync"
"time"
)
// Ensure file system implements interface.
var _ fs.FS = (*HFS)(nil)
// HFS represents an fs.FS with additional lookup tables for storing the calculated
// hashes of each file's contents. The hashes are used for aggressive client-side
// caching and cache-busting.
type HFS struct {
fsys fs.FS
//Lookup tables.
//Note that the lookup tables store a path to each file, not just a filename.
mu sync.RWMutex
originalPathToHashPath map[string]string //a cache so we don't have to recalculate hash over and over.
hashPathReverse map[string]reverse //get original path and hash from hash path.
//Options.
hashLocation hashLocation
hashAlgo crypto.Hash
maxAge time.Duration
hashLength uint
}
// reverse stores the original name and the calculated hash for a file for use in
// the reverse lookup table. This information is used to serve the on-disk source
// file from the hash path when the file is requested.
//
// I.e.: when the browser requests a file via the hash path, we need a way to look
// up the actual file's contents to serve. This is used to get the originalPath from
// the hashPath which is then used to look up the file in the fs.FS.
//
// The hash is used to set the Etag header. This way we don't have to "rip out" the
// hash from the hashPath.
type reverse struct {
originalPath string
hash string
}
// hashLocation defines the position of the hash in the filename.
type hashLocation int
const (
hashLocationStart hashLocation = iota //script.min.js -> a1b2c3...d4e5f6.script.min.js
hashLocationFirstPeriod //script.min.js -> script-a1b2c3...d4e5f6.min.js; original designed hash location
hashLocationEnd //script.min.js -> script.min.a1b2c3...d4e5f6.js
//default is "end" since this looks the best in browser dev tools.
//"first period" was the legacy location.
hashLocationDefault = hashLocationEnd
)
// optionFunc used to modify the way the an HFS works.
type optionFunc func(*HFS)
// NewFS returns the provided fs.FS with additional tooling to support calculating the
// hash of each file's contents for caching purposes.
//
// optionFuncs are used for modifying the HFS. Optional funcs were used, versus just
// additional arguments, since this allows for future expansion without breaking
// existing uses and is cleaner than empty unused arguments.
func NewFS(fsys fs.FS, options ...optionFunc) *HFS {
f := &HFS{
fsys: fsys,
originalPathToHashPath: make(map[string]string),
hashPathReverse: make(map[string]reverse),
hashLocation: hashLocationDefault,
hashAlgo: crypto.SHA256,
maxAge: time.Duration(365 * 24 * 60 * 60 * time.Second),
}
//Apply any options.
for _, option := range options {
option(f)
}
return f
}
//
// There are a few ways to provide the hash location to the NewFS() func. Using a
// func-per-location seems like the cleanest.
//
// We could have exported the hashLocationStart/FirstPeriod/End consts and used
// them directly in NewFS(), but we would have needed a "SetHashLocation()" option
// func anyway (because option funcs are nice for future expansion over just an
// argument to NewFS()) and then you end up with ugly NewFS() calls:
// hashfs.New(f, hashfs.SetHashLocation(hashfs.HashLocationStart)).
//
// We could have not used optional funcs, and instead added an argument to the NewFS()
// func, but this would break existing usage. It would also be annoying to have to
// add a new argument for future options.
//
// HashLocationStart sets the hash to be prepended to the beginning of the filename.
// script.min.js becomes a1b2c3...d4e5f6-script.min.js.
//
// This is nice to keep the filename all together, but is a bit ugly for debugging
// in browser devtools since the on small/narrow screens the hash can take up all
// of the room where a filename will be displayed making identifying a specific file
// difficult.
func HashLocationStart() optionFunc {
return func(hfs *HFS) {
hfs.hashLocation = hashLocationStart
}
}
// HashLocationEnd sets the hash to be appended to the end of the filename with the
// extension copied after the hash. script.min.js becomes script.min.js-a1b2c3...d4e5f6.js.
// This is the default hash location.
//
// This is nice to keep the filename all together; there really is no downside to this
// location.
func HashLocationEnd() optionFunc {
return func(hfs *HFS) {
hfs.hashLocation = hashLocationEnd
}
}
// HashLocationFirstPeriod sets the hash to be added in the middle of the filename,
// specifically at the first period in the filename. This was the original designed
// hash location. script.min.js becomes script-a1b2c3...d4e5f6.min.js
//
// There is really no benefit to this location, and it is a bit ugly since it breaks
// up the filename.
func HashLocationFirstPeriod() optionFunc {
return func(hfs *HFS) {
hfs.hashLocation = hashLocationFirstPeriod
}
}
// HashAlgo specifies the algorithm to use to calculate the hash of each file's
// contents. Default is SHA256. MD5 is what S3 uses. This will panic if an
// unsupported algorithm is provided.
//
// This should rarely be needed, since typically you don't really care about the hash
// algorithm. This is provided mostly for people who like looking at shorter MD5 sums.
func HashAlgo(algo crypto.Hash) optionFunc {
return func(hfs *HFS) {
hfs.hashAlgo = algo
//Make sure given algorithm is one of our supported algorithms.
result := hfs.calculateHash([]byte("hello world"))
if result == "" {
panic("unsupported hash algorithm used")
}
}
}
// MaxAge specifies the max-age value you want to set for the Cache-Control header.
// Default is 1 year. If an invalid value is given, the default is used.
//
// This should rarely be needed, since typically you want to cache files for a really
// long time. This is provided mostly for development and testing.
func MaxAge(d time.Duration) optionFunc {
return func(hfs *HFS) {
//Don't set this field if it is a negative value. Default will be used instead.
if d < 0 {
return
}
hfs.maxAge = d
}
}
// HashLength trims the length of the hash added to a filename. Default is the full
// hash length, based on the hash algorithm. Values less than 8 should not be used
// since a collision is highly likely. If 0 is provided, the default hash length is
// used.
//
// This should rarely be needed, since typically you want as long of a hash as possible
// to alleviate collision concerns. This is helpful if you want shorter filenames.
func HashLength(l uint) optionFunc {
return func(hfs *HFS) {
if l == 0 {
return
}
hfs.hashLength = l
}
}
// Open returns a reference to the file at the provided path. The path could be an
// original path or a hash path. If a hash path is given, the original path will be
// looked up to return the file with.
//
// This func is necessary for HFS to implement fs.FS. You should not need need to
// call this func directly.
func (hfs *HFS) Open(path string) (f fs.File, err error) {
f, _, err = hfs.open(path)
return
}
// open returns a reference to the file at the provided path. The path could be an
// original path or a hash path. If a hash path is given, the original path will be
// looked up to return the file with.
//
// This differs from Open because the hash of the file at the provided path is also
// returned. The hash is used to set the Etag header.
func (hfs *HFS) open(path string) (f fs.File, hash string, err error) {
//Try looking up the path in our table of hash paths. If the path is found, this
//means the given path is a hash path. The returned original path can be used to
//look up the underlying source file.
//
//If the path is not found, than most likely the path is an original path. Just
//use it as-is to look up the source file.
hfs.mu.RLock()
defer hfs.mu.RUnlock()
reverse, exists := hfs.hashPathReverse[path]
if exists {
hash = reverse.hash
path = reverse.originalPath
}
f, err = hfs.fsys.Open(path)
return
}
// GetHashPath returns the hashPath for a provided originalPath. The hashPath is the
// originalPath with a hash of the file's contents added to the filename. The hash
// of the contents of the file located at the originalPath will be calculated if it
// has not already been done so. The hash will be saved to for future reuse and to
// prevent unnecessary recalculation of the hash each time the same originalPath is
// requested.
func (hfs *HFS) GetHashPath(originalPath string) (hashPath string) {
//Check if hashPath has already been created and is cached.
hfs.mu.RLock()
hp, exists := hfs.originalPathToHashPath[originalPath]
if exists {
hfs.mu.RUnlock()
return hp
}
hfs.mu.RUnlock()
//Hash has not already been calculated, look up file and calculate hash.
//
//On error, just return the original filename this way the file can still
//be served.
//TODO: somehow notify of this error? log = ugly. panic = ugly. return err?
fileContents, err := fs.ReadFile(hfs.fsys, originalPath)
if err != nil {
return originalPath
}
//Calculate the hash.
hash := hfs.calculateHash(fileContents)
//Add the hash the filename.
//Format the filename with the hash.
dir, filename := path.Split(originalPath)
fileNameWithHash := hfs.addHashToFilname(filename, hash)
//Build the path to the file with the hash filename.
hashPath = path.Join(dir, fileNameWithHash)
//Store mappings for reuse in the future.
hfs.mu.Lock()
hfs.originalPathToHashPath[originalPath] = hashPath
hfs.hashPathReverse[hashPath] = reverse{originalPath, hash}
hfs.mu.Unlock()
return
}
// calculateHash calculates the hash of a file's contents and returns it with hex
// encoding. If a non-supported hash algorithm is set, the resulting encodedHash will
// be blank (""), however, this should have already been cause in the HashAlgo option
// func when NewFS was called.
func (hfs *HFS) calculateHash(fileContents []byte) (encodedHash string) {
var hash []byte
switch hfs.hashAlgo {
case crypto.SHA256:
h := sha256.Sum256(fileContents)
hash = []byte(h[:])
case crypto.MD5:
h := md5.Sum(fileContents)
hash = []byte(h[:])
default:
//This should never occur since we check if the hash algorithm is supported
//when NewFS is called. This is here mostly for tests.
hash = []byte("")
}
encodedHash = hex.EncodeToString(hash[:])
//Check if the encoded hash should be trimmed to a certain length.
if hfs.hashLength > 0 && int(hfs.hashLength) < len(encodedHash) {
encodedHash = encodedHash[:hfs.hashLength]
}
return
}
// addHashToFilename adds the hash to the originalName at the location specified by
// hashLocation. If originalName or hash is blank, the returned hashName will also
// be blank.
func (hfs *HFS) addHashToFilname(originalName, hash string) (hashName string) {
//Quick validation. Neither of these should ever be blank.
if originalName == "" {
return
}
if hash == "" {
return
}
//Add the hash to the filename.
switch hfs.hashLocation {
case hashLocationFirstPeriod:
//Handle if the filename doesn't have a period in it. This shouldn't really
//ever occur since a filename should have an extension. In this case, just
//append the hash to the filename, separating it with a dash to make it
//stand out a bit.
i := strings.Index(originalName, ".")
if i == -1 {
hashName = originalName + "-" + hash
return
}
//Add the hash just before the first period, separating it with a dash to
//make it stand out a bit.
hashName = originalName[:i] + "-" + hash + originalName[i:]
return
case hashLocationStart:
//Add the hash to the beginning of the filename, separating it with a dash
//to make it stand out a bit.
hashName = hash + "-" + originalName
return
case hashLocationEnd:
//Add the hash to the end of the filename, duplicating the file's extension
//after the hash to prevent breaking MIME type determination in browsers.
//The filename and hash are separated with a dash to make it stand out a bit.
//
//Note, path.Ext() returns a value starting with a period (i.e.: .css).
hashName = originalName + "-" + hash + path.Ext(originalName)
return
default:
//This should never occur since fsys.hashLocation is set by default and
//can only be set to one of our defined funcs. This is just here since all
//switches should have a default.
return
}
}
// hfsHandler is used to define a ServeHTTP func that uses our customized fs.FS.
type hfsHandler struct {
hfs *HFS
}
// FileServer returns an http.Handler for serving files from our custom FS. It
// provides a simplified implementation of http.FileServer which is used to
// aggressivley cache files on the client. You would use this in the same manner as
// http.FileServer. Ex.: http.FileServer(http.FS(someStaticFS)) -> hashfs.FileServer(hfs).
//
// Because FileServer is focused on small known path files, several features
// of http.FileServer have been removed including canonicalizing directories,
// defaulting index.html pages, precondition checks, & content range headers.
func FileServer(fsys fs.FS) http.Handler {
//Check if the fsys is actually our custom HFS that encapsulates an fs.FS.
hfs, ok := fsys.(*HFS)
if !ok {
hfs = NewFS(fsys)
}
return &hfsHandler{hfs}
}
// ServeHTTP serves files from our custom FS.
//
// This func is necessary to fulfill the requirements of hfsHandler to be used as
// an http.Handler.
func (hh *hfsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
//Get path of file being requested. This should match a hash path, but could be
//an original path if a hash was never calculated for the file.
filePath := r.URL.Path
// Clean up filePath based on URL path.
if filePath == "/" {
filePath = "."
} else {
filePath = strings.TrimPrefix(filePath, "/")
}
filePath = path.Clean(filePath)
// Get the file from our fs.FS.
//
//This will look up the original file if the filePath is a hash path. If the
//filePath is an original path (i.e. we don't have this original path in our
//lookup tables), then the given path is used to look up the file with.
f, hash, err := hh.hfs.open(filePath)
if os.IsNotExist(err) {
//Handle if no file exists at the given path.
http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound)
return
} else if err != nil {
//Handle if some other error occured.
//TODO: not sure how to test this. How do we get Open to return an error?
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
defer f.Close()
//Get file's info.
//
//This is used to make sure a directory wasn't mistakenly requested or some
//other strange error occured with the file.
info, err := f.Stat()
if err != nil {
//TODO: not sure how to test this. How do we get Stat to return an error?
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
} else if info.IsDir() {
http.Error(w, http.StatusText(http.StatusForbidden), http.StatusForbidden)
return
}
//Set aggressive caching headers.
//
//We check if a hash exists to prevent setting caching headers on non-hashed
//files. We don't want to cache these files aggressively since if the source
//changes, the browser won't know this and thus continue serving the old files.
//
//Note that if you use Cloudflare free tier, Cloudflare will apply a "W/" to
//the beginning of the Etag value automatically. The "W" represents a weak Etag
//value. For some reason Cloudflare thinks they know better here about strong
//versus weak Etag values.
//https://developers.cloudflare.com/cache/reference/etag-headers/#strong-etags
if hash != "" {
w.Header().Set("Cache-Control", hh.hfs.getCacheControl())
w.Header().Set("ETag", hash)
//We don't set a Last-Modified header since the file info available for
//files in an fs.FS does not include when the file was modified. Instead,
//the ModTime() is when the binary was build and the files were embedded.
}
//Write out the file's contents.
switch f := f.(type) {
case io.ReadSeeker:
http.ServeContent(w, r, filePath, info.ModTime(), f)
default:
//Only write out file's data on non-HEAD requests.
//TODO: not sure how to test this "default" case.
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Length", strconv.FormatInt(info.Size(), 10))
if r.Method != "HEAD" {
io.Copy(w, f)
}
}
}
// getCacheControl creates the value stored in the Cache-Control header. This was
// separated out into a function for better testing and future ability to customize
// the max-age via an optionFunc.
func (hfs *HFS) getCacheControl() string {
maxAge := strconv.Itoa(int(hfs.maxAge.Seconds()))
return "public, max-age=" + maxAge + ", immutable"
}
//printEmbeddedFileList used as development tool only.
// func (hfs *HFS) printEmbeddedFileList() (output []string) {
// //the directory "." means the root directory of the embedded file.
// const startingDirectory = "."
// err := fs.WalkDir(hfs.fsys, startingDirectory, func(path string, d fs.DirEntry, err error) error {
// output = append(output, path)
// return nil
// })
// if err != nil {
// output = []string{"error walking embedded directory", err.Error()}
// return
// }
// return
// }