-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscraper.coffee
93 lines (70 loc) · 1.89 KB
/
scraper.coffee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Import Libraries
request = require "request-promise"
Promise = require "bluebird"
fs = require "fs"
# Scraper Class
class Scraper
# Private Variables
_endpoint: "https://api.justwatch.com"
# Private Methods
_popular: (locale)->
page_size = 200
item_count = 1000
items_list = []
Promise.each [0...(item_count/page_size)], (i)=>
i += 1
console.log "* Downloading titles #{page_size * i}/#{item_count}"
return request
uri: "#{@_endpoint}/titles/#{locale}/popular"
method: 'POST'
gzip: true
json: true
body:
content_types: [ "show", "movie" ]
page: i
page_size: page_size
.then (data)->
items_list = items_list.concat data.items
.then -> return items_list
_show: (id, locale)->
console.log "* Downloading show data for ID: ##{id}"
url = "#{@_endpoint}/titles/show/#{id}/locale/#{locale}"
return request.get
uri: url
json: true
# Public Methods
scrape: (locale)->
if not locale?
locale = "en_US"
@_popular(locale).then (titles)=>
return Promise.all titles.map (title)=>
data = {
title: title.title
year: title.original_release_year
type: title.object_type
}
if data.type == "movie"
return data
return @_show(title.id, locale).then (show)->
data.season_count = show.seasons.length
return data
.then (titles)->
return {
titles: titles
title_count: titles.length
locale: locale
}
# Make Module Exportable
module.exports = Scraper
# Initialize
# --------------------------------
# Scrape JustWatch.com and save
# output to text file if called
# from command line
if require.main == module
scraper = new Scraper()
scraper.scrape(process.argv[2]).then (response)->
filePath = "#{__dirname}/output.txt"
data = JSON.stringify response
fs.writeFile filePath, data, ->
console.log "* File saved to #{filePath}"