Skip to content

Commit 4b250b5

Browse files
committed
Add other columns and post-review changes (BL-13994)
1 parent ff77a43 commit 4b250b5

File tree

1 file changed

+134
-39
lines changed

1 file changed

+134
-39
lines changed

cloud/main.js

+134-39
Original file line numberDiff line numberDiff line change
@@ -171,96 +171,183 @@ Parse.Cloud.job("updateLanguageRecords", async (request) => {
171171
request.message("Completed successfully.");
172172
});
173173

174-
// A background job to populate the analytics_* fields in the books table.
174+
// A background job to populate the analytics_* fields in our books table
175+
// from api.bloomlibrary.org/stats. Data comes from our postgresql analytics database populated from Segment.
175176
//
176177
// This is scheduled on Azure under bloom-library-maintenance-{prod|dev}-daily.
177178
// You can also run it manually via REST:
178179
// curl -X POST -H "X-Parse-Application-Id: <app ID>" -H "X-Parse-Master-Key: <master key>" -d "{}" https://bloom-parse-server-develop.azurewebsites.net/parse/jobs/updateBookAnalytics
179180
Parse.Cloud.job("updateBookAnalytics", async (request) => {
180181
request.log.info("updateBookAnalytics - Starting.");
181182

182-
function getConnectionInfo() {
183+
// api.bloomlibrary.org/stats looks up analytics based on a parse server query.
184+
// With that query, we have to provide which parse server we want to get data for (prod|dev|unittest).
185+
// We want to look up books from the instance we are currently running in,
186+
// so we gather our parse server metadata from the environment variables.
187+
function getCurrentInstanceInfoForApiQuery() {
183188
return {
184-
url: process.env.SERVER_URL + "/",
185-
headers: {
186-
"X-Parse-Application-Id": process.env.APP_ID,
187-
},
189+
url: process.env.SERVER_URL,
190+
appId: process.env.APP_ID,
188191
};
189-
// When testing locally, you'll need to override using something like
192+
// But when testing locally, you need to explicitly set which environment you want
193+
// to collect analytics data for. You'll need to override using something like
190194
// return {
191-
// url: "https://dev-server.bloomlibrary.org/parse/",
192-
// headers: {
193-
// "X-Parse-Application-Id":
194-
// "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
195-
// },
195+
// url: "https://dev-server.bloomlibrary.org/parse",
196+
// appId: "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
196197
// };
197198
}
198-
function getNumberOrZero(value) {
199+
function getNumberOrZero(value, isDecimal = false) {
199200
if (!value) return 0;
201+
202+
if (isDecimal) {
203+
const number = parseFloat(value);
204+
return isNaN(number) ? 0 : number;
205+
}
206+
200207
const number = parseInt(value, 10);
201208
return isNaN(number) ? 0 : number;
202209
}
210+
// key/value pairs of column names to analytics results metadata
211+
const analyticsColumnsMap = {
212+
analytics_startedCount: {
213+
apiResultName: "started",
214+
},
215+
analytics_finishedCount: {
216+
apiResultName: "finished",
217+
},
218+
analytics_shellDownloads: {
219+
apiResultName: "shelldownloads",
220+
},
221+
analytics_pdfDownloads: {
222+
apiResultName: "pdfdownloads",
223+
},
224+
analytics_epubDownloads: {
225+
apiResultName: "epubdownloads",
226+
},
227+
analytics_bloompubDownloads: {
228+
apiResultName: "bloompubdownloads",
229+
},
230+
analytics_questionsInBookCount: {
231+
apiResultName: "numquestionsinbook",
232+
},
233+
analytics_quizzesTakenCount: {
234+
apiResultName: "numquizzestaken",
235+
},
236+
analytics_meanQuestionsCorrectPct: {
237+
apiResultName: "meanpctquestionscorrect",
238+
isDecimal: true,
239+
},
240+
analytics_medianQuestionsCorrectPct: {
241+
apiResultName: "medianpctquestionscorrect",
242+
isDecimal: true,
243+
},
244+
};
203245

204246
try {
205247
const bloomApiUrl = "https://api.bloomlibrary.org/v1";
206248
// "http://127.0.0.1:7071/v1"; // testing with a locally-run api
207249

208-
//Query the api for per-books stats for all books
250+
// Query the api for per-books stats for all books.
251+
// What is going on behind the scenes is actually somewhat convoluted.
252+
// We give the api the query to run to get the parse books.
253+
// It sends that list of books to the postgresql database to get the analytics data
254+
// and returns it to us. It would be more efficient to ask the postgresql database
255+
// ourselves, but the api endpoint already exists, and I didn't want to provide
256+
// postgres connection information to the parse server.
209257
const axios = require("axios");
210-
const results = await axios.post(
258+
const analyticsResults = await axios.post(
211259
`${bloomApiUrl}/stats/reading/per-book`,
212260
{
213261
filter: {
214262
parseDBQuery: {
215-
url: `${getConnectionInfo().url}classes/books`,
263+
url: `${
264+
getCurrentInstanceInfoForApiQuery().url
265+
}/classes/books`,
216266
method: "GET",
217267
options: {
218-
headers: getConnectionInfo().headers,
268+
headers: {
269+
"X-Parse-Application-Id": `${
270+
getCurrentInstanceInfoForApiQuery().appId
271+
}`,
272+
},
219273
params: {
220-
limit: 1000000,
274+
limit: 1000000, // Default is 100. We want all of them.
221275
keys: "objectId,bookInstanceId",
222276
},
223277
},
224278
},
225279
},
226280
}
227281
);
282+
const analyticsSourceData = analyticsResults.data.stats;
283+
284+
// Make a map of bookInstanceId to analytics data for efficiency
285+
const bookInstanceIdToAnalyticsMap = {};
286+
analyticsSourceData.forEach((bookAnalytics) => {
287+
bookInstanceIdToAnalyticsMap[bookAnalytics.bookinstanceid] =
288+
bookAnalytics;
289+
});
228290

229-
//Loop through all books, updating analytics
291+
// Get all the books in our parse database.
292+
// If the analytics values need to be updated, push it into
293+
// a new array of books to update.
294+
const booksToUpdate = [];
230295
const bookQuery = new Parse.Query("books");
231296
bookQuery.limit(1000000); // Default is 100. We want all of them.
232-
bookQuery.select("bookInstanceId");
233-
const books = await bookQuery.find();
234-
books.forEach((book) => {
235-
const { bookInstanceId } = book.attributes;
236-
const bookStats = results.data.stats.find(
237-
(bookStat) => bookStat.bookinstanceid === bookInstanceId
238-
);
239-
book.set(
240-
"analytics_finishedCount",
241-
getNumberOrZero(bookStats?.finished)
242-
);
243-
book.set(
244-
"analytics_shellDownloads",
245-
getNumberOrZero(bookStats?.shelldownloads)
246-
);
247-
book.set("updateSource", "updateBookAnalytics");
297+
bookQuery.select("bookInstanceId", ...Object.keys(analyticsColumnsMap));
298+
299+
const allBooks = await bookQuery.find();
300+
allBooks.forEach((book) => {
301+
const bookAnalytics =
302+
bookInstanceIdToAnalyticsMap[book.get("bookInstanceId")];
303+
304+
let bookNeedsUpdate = false;
305+
Object.keys(analyticsColumnsMap).forEach((columnName) => {
306+
const newValue = getNumberOrZero(
307+
bookAnalytics?.[
308+
analyticsColumnsMap[columnName].apiResultName
309+
],
310+
analyticsColumnsMap[columnName].isDecimal || false
311+
);
312+
313+
if (book.get(columnName) !== newValue) {
314+
book.set(columnName, newValue);
315+
bookNeedsUpdate = true;
316+
}
317+
});
318+
if (bookNeedsUpdate) {
319+
// Important to set updateSource for proper processing in beforeSave (see details there).
320+
book.set("updateSource", "updateBookAnalytics");
321+
322+
booksToUpdate.push(book);
323+
}
248324
});
249325

250-
//Save all books
251-
const successfulUpdates = await Parse.Object.saveAll(books, {
326+
request.log.info("booksToUpdate", booksToUpdate);
327+
328+
//Save any books with updated analytics.
329+
const successfulUpdates = await Parse.Object.saveAll(booksToUpdate, {
252330
useMasterKey: true,
253331
});
254332
request.log.info(
255333
`updateBookAnalytics - Updated analytics for ${successfulUpdates.length} books.`
256334
);
257335
} catch (error) {
258336
if (error.code === Parse.Error.AGGREGATE_ERROR) {
259-
error.errors.forEach((iError) => {
337+
const maxErrors = 20; // Don't blow up the log.
338+
for (let i = 0; i < error.errors.length && i < maxErrors; i++) {
339+
const iError = error.errors[i];
260340
request.log.error(
261341
`Couldn't process ${iError.object.id} due to ${iError.message}`
262342
);
263-
});
343+
}
344+
if (error.errors.length > maxErrors) {
345+
request.log.error(
346+
`${
347+
error.errors.length - maxErrors
348+
} more errors were suppressed.`
349+
);
350+
}
264351
request.log.error(
265352
"updateBookAnalytics - Terminated unsuccessfully."
266353
);
@@ -735,8 +822,16 @@ Parse.Cloud.define("setupTables", async () => {
735822
{ name: "bloomPUBVersion", type: "Number" },
736823

737824
// analytics_* fields are populated by the updateBookAnalytics job.
825+
{ name: "analytics_startCount", type: "Number" },
738826
{ name: "analytics_finishedCount", type: "Number" },
739827
{ name: "analytics_shellDownloads", type: "Number" },
828+
{ name: "analytics_pdfDownloads", type: "Number" },
829+
{ name: "analytics_epubDownloads", type: "Number" },
830+
{ name: "analytics_bloompubDownloads", type: "Number" },
831+
{ name: "analytics_questionsInBookCount", type: "Number" },
832+
{ name: "analytics_quizzesTakenCount", type: "Number" },
833+
{ name: "analytics_meanQuestionsCorrectPct", type: "Number" },
834+
{ name: "analytics_medianQuestionsCorrectPct", type: "Number" },
740835
],
741836
},
742837
{

0 commit comments

Comments
 (0)