This repository was archived by the owner on Jan 30, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathstars.js
157 lines (126 loc) · 4.11 KB
/
stars.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
const normalize = require('normalize-package-data')
const parallel = require('parallel-transform')
const gitInfo = require('hosted-git-info')
const sublevel = require('level-sublevel')
const rapid = require('rapid-stream')
const sort = require('sort-stream2')
const json = require('JSONStream')
const through = require('through2')
const request = require('request')
const ghauth = require('ghauth')
const level = require('level')
const path = require('path')
const url = require('url')
const fs = require('fs')
const WEEK = 1000 * 60 * 60 * 24 * 7
const dat = require('dat')(__dirname, kickOff)
const dbdir = path.join(__dirname, '.stars')
const db = sublevel(level(dbdir, {
valueEncoding: 'json'
}))
const counts = db.sublevel('counts')
const dates = db.sublevel('dates')
var token = null
function kickOff(err) {
if (err) throw err
ghauth({
configName: 'unpm',
scopes: ['user'],
note: 'npm data aggregation project',
userAgent: 'unpm'
}, function(err, authData) {
if (err) throw err
var stars = []
var count = 0
token = authData.token
fs.createReadStream(path.join(__dirname, 'data', '_downloads.json'))
.pipe(json.parse([true]))
.pipe(rapid(100, grabMeta))
.pipe(rapid(20, grabStars))
.on('data', function(row) {
console.log((100*(count++)/256/128).toFixed(3) + '%', row.name)
stars.push(row)
})
.once('end', function() {
fs.writeFileSync(path.join(__dirname, 'data', '_stars.json'), JSON.stringify(stars))
})
})
function grabMeta(row, _, next) {
dat.get(row.name, next)
}
function grabStars(pkg, _, next) {
if (pkg._deleted) return next()
if (!pkg['dist-tags']) return next()
if (!pkg['dist-tags'].latest) return next()
if (!pkg.key.indexOf('_design/')) return next()
var name = pkg.name
dates.get(name, function(err, date) {
if (err) return ping(pkg)
var since = Date.now() - new Date(date)
if (since > WEEK * 99) return ping(pkg)
counts.get(name, function(err, count) {
if (err) return ping(pkg)
next(null, {
name: name,
count: count
})
})
})
function ping(pkg) {
pkg._id = pkg.name
pkg.version = pkg['dist-tags'].latest
pkg.repository = pkg.repository || {}
var uri = typeof pkg.repository === 'string'
? pkg.repository
: pkg.repository.type === 'git' && pkg.repository.url
if (!uri) return submit(0)
var info = gitInfo.fromUrl(uri)
if (!info) return submit(0)
if (info.type !== 'github') return submit(0)
request.get('https://api.github.com/repos/'+info.user+'/'+info.project, {
json: true,
headers: {
'Authorization': 'token ' + token,
'User-Agent': 'unpm'
}
}, function(err, res, body) {
if (err) return next(err)
var reset = parseInt(res.headers['x-ratelimit-reset'], 10) * 1000 || Date.now()
var remain = parseInt(res.headers['x-ratelimit-remaining'], 10)
var delay = (reset - Date.now()) / remain
delay = remain < 1500 ? delay * 30 : delay * 10
console.log('retrieved:', name, '('+(body && body.stargazers_count)+')')
console.log('remaining:', remain)
console.log('limit delay:', delay)
setTimeout(function() {
if (err) return next(err)
if (!body) return submit(0)
if ('stargazers_count' in body) {
return submit(body.stargazers_count)
}
if (body.message === 'Not Found') {
return submit(0)
}
return next(new Error(body.message))
}, delay)
})
// next()
// request.get('https://api.github.com/repos/')
}
function submit(count) {
counts.put(name, count, function(err) {
if (err) return next(err)
dates.put(name, Date.now(), function(err) {
if (err) return next(err)
next(null, {
name: name,
count: count
})
})
})
}
}
}
function sorter(a, b) {
return b.count - a.count
}