@@ -171,96 +171,183 @@ Parse.Cloud.job("updateLanguageRecords", async (request) => {
171
171
request . message ( "Completed successfully." ) ;
172
172
} ) ;
173
173
174
- // A background job to populate the analytics_* fields in the books table.
174
+ // A background job to populate the analytics_* fields in our books table
175
+ // from api.bloomlibrary.org/stats. Data comes from our postgresql analytics database populated from Segment.
175
176
//
176
177
// This is scheduled on Azure under bloom-library-maintenance-{prod|dev}-daily.
177
178
// You can also run it manually via REST:
178
179
// curl -X POST -H "X-Parse-Application-Id: <app ID>" -H "X-Parse-Master-Key: <master key>" -d "{}" https://bloom-parse-server-develop.azurewebsites.net/parse/jobs/updateBookAnalytics
179
180
Parse . Cloud . job ( "updateBookAnalytics" , async ( request ) => {
180
181
request . log . info ( "updateBookAnalytics - Starting." ) ;
181
182
182
- function getConnectionInfo ( ) {
183
+ // api.bloomlibrary.org/stats looks up analytics based on a parse server query.
184
+ // With that query, we have to provide which parse server we want to get data for (prod|dev|unittest).
185
+ // We want to look up books from the instance we are currently running in,
186
+ // so we gather our parse server metadata from the environment variables.
187
+ function getCurrentInstanceInfoForApiQuery ( ) {
183
188
return {
184
- url : process . env . SERVER_URL + "/" ,
185
- headers : {
186
- "X-Parse-Application-Id" : process . env . APP_ID ,
187
- } ,
189
+ url : process . env . SERVER_URL ,
190
+ appId : process . env . APP_ID ,
188
191
} ;
189
- // When testing locally, you'll need to override using something like
192
+ // But when testing locally, you need to explicitly set which environment you want
193
+ // to collect analytics data for. You'll need to override using something like
190
194
// return {
191
- // url: "https://dev-server.bloomlibrary.org/parse/",
192
- // headers: {
193
- // "X-Parse-Application-Id":
194
- // "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
195
- // },
195
+ // url: "https://dev-server.bloomlibrary.org/parse",
196
+ // appId: "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
196
197
// };
197
198
}
198
- function getNumberOrZero ( value ) {
199
+ function getNumberOrZero ( value , isDecimal = false ) {
199
200
if ( ! value ) return 0 ;
201
+
202
+ if ( isDecimal ) {
203
+ const number = parseFloat ( value ) ;
204
+ return isNaN ( number ) ? 0 : number ;
205
+ }
206
+
200
207
const number = parseInt ( value , 10 ) ;
201
208
return isNaN ( number ) ? 0 : number ;
202
209
}
210
+ // key/value pairs of column names to analytics results metadata
211
+ const analyticsColumnsMap = {
212
+ analytics_startedCount : {
213
+ apiResultName : "started" ,
214
+ } ,
215
+ analytics_finishedCount : {
216
+ apiResultName : "finished" ,
217
+ } ,
218
+ analytics_shellDownloads : {
219
+ apiResultName : "shelldownloads" ,
220
+ } ,
221
+ analytics_pdfDownloads : {
222
+ apiResultName : "pdfdownloads" ,
223
+ } ,
224
+ analytics_epubDownloads : {
225
+ apiResultName : "epubdownloads" ,
226
+ } ,
227
+ analytics_bloompubDownloads : {
228
+ apiResultName : "bloompubdownloads" ,
229
+ } ,
230
+ analytics_questionsInBookCount : {
231
+ apiResultName : "numquestionsinbook" ,
232
+ } ,
233
+ analytics_quizzesTakenCount : {
234
+ apiResultName : "numquizzestaken" ,
235
+ } ,
236
+ analytics_meanQuestionsCorrectPct : {
237
+ apiResultName : "meanpctquestionscorrect" ,
238
+ isDecimal : true ,
239
+ } ,
240
+ analytics_medianQuestionsCorrectPct : {
241
+ apiResultName : "medianpctquestionscorrect" ,
242
+ isDecimal : true ,
243
+ } ,
244
+ } ;
203
245
204
246
try {
205
247
const bloomApiUrl = "https://api.bloomlibrary.org/v1" ;
206
248
// "http://127.0.0.1:7071/v1"; // testing with a locally-run api
207
249
208
- //Query the api for per-books stats for all books
250
+ // Query the api for per-books stats for all books.
251
+ // What is going on behind the scenes is actually somewhat convoluted.
252
+ // We give the api the query to run to get the parse books.
253
+ // It sends that list of books to the postgresql database to get the analytics data
254
+ // and returns it to us. It would be more efficient to ask the postgresql database
255
+ // ourselves, but the api endpoint already exists, and I didn't want to provide
256
+ // postgres connection information to the parse server.
209
257
const axios = require ( "axios" ) ;
210
- const results = await axios . post (
258
+ const analyticsResults = await axios . post (
211
259
`${ bloomApiUrl } /stats/reading/per-book` ,
212
260
{
213
261
filter : {
214
262
parseDBQuery : {
215
- url : `${ getConnectionInfo ( ) . url } classes/books` ,
263
+ url : `${
264
+ getCurrentInstanceInfoForApiQuery ( ) . url
265
+ } /classes/books`,
216
266
method : "GET" ,
217
267
options : {
218
- headers : getConnectionInfo ( ) . headers ,
268
+ headers : {
269
+ "X-Parse-Application-Id" : `${
270
+ getCurrentInstanceInfoForApiQuery ( ) . appId
271
+ } `,
272
+ } ,
219
273
params : {
220
- limit : 1000000 ,
274
+ limit : 1000000 , // Default is 100. We want all of them.
221
275
keys : "objectId,bookInstanceId" ,
222
276
} ,
223
277
} ,
224
278
} ,
225
279
} ,
226
280
}
227
281
) ;
282
+ const analyticsSourceData = analyticsResults . data . stats ;
283
+
284
+ // Make a map of bookInstanceId to analytics data for efficiency
285
+ const bookInstanceIdToAnalyticsMap = { } ;
286
+ analyticsSourceData . forEach ( ( bookAnalytics ) => {
287
+ bookInstanceIdToAnalyticsMap [ bookAnalytics . bookinstanceid ] =
288
+ bookAnalytics ;
289
+ } ) ;
228
290
229
- //Loop through all books, updating analytics
291
+ // Get all the books in our parse database.
292
+ // If the analytics values need to be updated, push it into
293
+ // a new array of books to update.
294
+ const booksToUpdate = [ ] ;
230
295
const bookQuery = new Parse . Query ( "books" ) ;
231
296
bookQuery . limit ( 1000000 ) ; // Default is 100. We want all of them.
232
- bookQuery . select ( "bookInstanceId" ) ;
233
- const books = await bookQuery . find ( ) ;
234
- books . forEach ( ( book ) => {
235
- const { bookInstanceId } = book . attributes ;
236
- const bookStats = results . data . stats . find (
237
- ( bookStat ) => bookStat . bookinstanceid === bookInstanceId
238
- ) ;
239
- book . set (
240
- "analytics_finishedCount" ,
241
- getNumberOrZero ( bookStats ?. finished )
242
- ) ;
243
- book . set (
244
- "analytics_shellDownloads" ,
245
- getNumberOrZero ( bookStats ?. shelldownloads )
246
- ) ;
247
- book . set ( "updateSource" , "updateBookAnalytics" ) ;
297
+ bookQuery . select ( "bookInstanceId" , ...Object . keys ( analyticsColumnsMap ) ) ;
298
+
299
+ const allBooks = await bookQuery . find ( ) ;
300
+ allBooks . forEach ( ( book ) => {
301
+ const bookAnalytics =
302
+ bookInstanceIdToAnalyticsMap [ book . get ( "bookInstanceId" ) ] ;
303
+
304
+ let bookNeedsUpdate = false ;
305
+ Object . keys ( analyticsColumnsMap ) . forEach ( ( columnName ) => {
306
+ const newValue = getNumberOrZero (
307
+ bookAnalytics ?. [
308
+ analyticsColumnsMap [ columnName ] . apiResultName
309
+ ] ,
310
+ analyticsColumnsMap [ columnName ] . isDecimal || false
311
+ ) ;
312
+
313
+ if ( book . get ( columnName ) !== newValue ) {
314
+ book . set ( columnName , newValue ) ;
315
+ bookNeedsUpdate = true ;
316
+ }
317
+ } ) ;
318
+ if ( bookNeedsUpdate ) {
319
+ // Important to set updateSource for proper processing in beforeSave (see details there).
320
+ book . set ( "updateSource" , "updateBookAnalytics" ) ;
321
+
322
+ booksToUpdate . push ( book ) ;
323
+ }
248
324
} ) ;
249
325
250
- //Save all books
251
- const successfulUpdates = await Parse . Object . saveAll ( books , {
326
+ request . log . info ( "booksToUpdate" , booksToUpdate ) ;
327
+
328
+ //Save any books with updated analytics.
329
+ const successfulUpdates = await Parse . Object . saveAll ( booksToUpdate , {
252
330
useMasterKey : true ,
253
331
} ) ;
254
332
request . log . info (
255
333
`updateBookAnalytics - Updated analytics for ${ successfulUpdates . length } books.`
256
334
) ;
257
335
} catch ( error ) {
258
336
if ( error . code === Parse . Error . AGGREGATE_ERROR ) {
259
- error . errors . forEach ( ( iError ) => {
337
+ const maxErrors = 20 ; // Don't blow up the log.
338
+ for ( let i = 0 ; i < error . errors . length && i < maxErrors ; i ++ ) {
339
+ const iError = error . errors [ i ] ;
260
340
request . log . error (
261
341
`Couldn't process ${ iError . object . id } due to ${ iError . message } `
262
342
) ;
263
- } ) ;
343
+ }
344
+ if ( error . errors . length > maxErrors ) {
345
+ request . log . error (
346
+ `${
347
+ error . errors . length - maxErrors
348
+ } more errors were suppressed.`
349
+ ) ;
350
+ }
264
351
request . log . error (
265
352
"updateBookAnalytics - Terminated unsuccessfully."
266
353
) ;
@@ -735,8 +822,16 @@ Parse.Cloud.define("setupTables", async () => {
735
822
{ name : "bloomPUBVersion" , type : "Number" } ,
736
823
737
824
// analytics_* fields are populated by the updateBookAnalytics job.
825
+ { name : "analytics_startCount" , type : "Number" } ,
738
826
{ name : "analytics_finishedCount" , type : "Number" } ,
739
827
{ name : "analytics_shellDownloads" , type : "Number" } ,
828
+ { name : "analytics_pdfDownloads" , type : "Number" } ,
829
+ { name : "analytics_epubDownloads" , type : "Number" } ,
830
+ { name : "analytics_bloompubDownloads" , type : "Number" } ,
831
+ { name : "analytics_questionsInBookCount" , type : "Number" } ,
832
+ { name : "analytics_quizzesTakenCount" , type : "Number" } ,
833
+ { name : "analytics_meanQuestionsCorrectPct" , type : "Number" } ,
834
+ { name : "analytics_medianQuestionsCorrectPct" , type : "Number" } ,
740
835
] ,
741
836
} ,
742
837
{
0 commit comments