|
13 | 13 | * See the License for the specific language governing permissions and
|
14 | 14 | * limitations under the License.
|
15 | 15 | */
|
16 |
| - |
17 | 16 | require('dotenv').config();
|
18 |
| - |
19 | 17 | const algoliasearch = require('algoliasearch');
|
20 | 18 | const fs = require('fs');
|
21 |
| -const log = require('fancy-log'); |
| 19 | +const {sizeof} = require('sizeof'); |
22 | 20 |
|
23 |
| -const raw = fs.readFileSync('dist/algolia.json', 'utf-8'); |
24 |
| -const indexed = JSON.parse(raw); |
| 21 | +const maxChunkSizeInBytes = 10000000; // 10,000,000 |
25 | 22 |
|
26 |
| -// Revision will look like "YYYYMMDDHHMM". |
27 |
| -const revision = new Date().toISOString().substring(0, 16).replace(/\D/g, ''); |
28 |
| -const primaryIndexName = 'webdev'; |
29 |
| -const deployIndexName = `webdev_deploy_${revision}`; |
| 23 | +/** |
| 24 | + * Chunks array of AlgoliaCollectionItem into array of array of AlgoliaCollectionItem smaller than 10 MB. |
| 25 | + * |
| 26 | + * @param {AlgoliaCollectionItem[]} arr |
| 27 | + * @return {AlgoliaCollectionItem[][]} |
| 28 | + */ |
| 29 | +const chunkAlgolia = (arr) => { |
| 30 | + const chunked = []; |
| 31 | + let tempSizeInBytes = 0; |
| 32 | + let temp = []; |
| 33 | + for (const current of arr) { |
| 34 | + const currentSizeInBytes = sizeof(current); |
| 35 | + if (tempSizeInBytes + currentSizeInBytes < maxChunkSizeInBytes) { |
| 36 | + temp.push(current); |
| 37 | + tempSizeInBytes += currentSizeInBytes; |
| 38 | + } else { |
| 39 | + chunked.push(temp); |
| 40 | + temp = [current]; |
| 41 | + tempSizeInBytes = currentSizeInBytes; |
| 42 | + } |
| 43 | + } |
| 44 | + chunked.push(temp); |
| 45 | + return chunked; |
| 46 | +}; |
30 | 47 |
|
31 | 48 | async function index() {
|
32 |
| - if (!process.env.ALGOLIA_APP || !process.env.ALGOLIA_KEY) { |
| 49 | + const indexedOn = new Date(); |
| 50 | + |
| 51 | + if (!process.env.ALGOLIA_APP_ID || !process.env.ALGOLIA_API_KEY) { |
33 | 52 | console.warn('Missing Algolia environment variables, skipping indexing.');
|
34 | 53 | return;
|
35 | 54 | }
|
36 | 55 |
|
| 56 | + const raw = fs.readFileSync('dist/algolia.json', 'utf-8'); |
| 57 | + /** @type {AlgoliaCollection} */ |
| 58 | + const algoliaData = JSON.parse(raw); |
| 59 | + |
| 60 | + // Set date of when object is being added to algolia |
| 61 | + algoliaData.map((e) => { |
| 62 | + e.indexedOn = indexedOn.getTime(); |
| 63 | + return e; |
| 64 | + }); |
| 65 | + |
| 66 | + const chunkedAlgoliaData = chunkAlgolia(algoliaData); |
| 67 | + const postsCount = algoliaData.length; |
| 68 | + |
| 69 | + // @ts-ignore |
37 | 70 | const client = algoliasearch(
|
38 |
| - process.env.ALGOLIA_APP, |
39 |
| - process.env.ALGOLIA_KEY, |
| 71 | + process.env.ALGOLIA_APP_ID, |
| 72 | + process.env.ALGOLIA_API_KEY, |
40 | 73 | );
|
| 74 | + const index = client.initIndex('prod_web_dev'); |
41 | 75 |
|
42 |
| - const primaryIndex = client.initIndex(primaryIndexName); // nb. not actually used, just forces init |
43 |
| - const deployIndex = client.initIndex(deployIndexName); |
44 |
| - |
45 |
| - log( |
46 |
| - `Indexing ${indexed.length} articles with temporary index ${deployIndex.indexName}...`, |
| 76 | + console.log( |
| 77 | + `Indexing ${postsCount} articles amongst ${chunkedAlgoliaData.length} chunk(s).`, |
47 | 78 | );
|
48 | 79 |
|
49 |
| - // TODO(samthor): This is from https://www.algolia.com/doc/api-reference/api-methods/replace-all-objects/#examples, |
50 |
| - // are there more things that should be copied? |
51 |
| - const scope = ['settings', 'synonyms', 'rules']; |
52 |
| - await client.copyIndex(primaryIndex.indexName, deployIndex.indexName, { |
53 |
| - scope, |
| 80 | + // When indexing data we mark these two fields as fields that can be filtered by. |
| 81 | + await index.setSettings({ |
| 82 | + attributesForFaceting: ['locales', 'tags'], |
54 | 83 | });
|
55 | 84 |
|
56 |
| - // TODO(samthor): Batch uploads so that we don't send more than 10mb. |
57 |
| - // As of September 2019, the JSON itself is only ~70k. \shrug/ |
58 |
| - await deployIndex.saveObjects(indexed); |
59 |
| - log(`Indexed, replacing existing index ${primaryIndex.indexName}`); |
| 85 | + // Update algolia index with new data |
| 86 | + for (let i = 0; i < chunkedAlgoliaData.length; i++) { |
| 87 | + await index.saveObjects(chunkedAlgoliaData[i], { |
| 88 | + autoGenerateObjectIDIfNotExist: true, |
| 89 | + }); |
| 90 | + } |
| 91 | + |
| 92 | + console.log('Updated algolia data.'); |
| 93 | + |
| 94 | + console.log('Deleting old data no longer in algolia.json.'); |
| 95 | + await index.deleteBy({ |
| 96 | + filters: `indexedOn < ${indexedOn.getTime()}`, |
| 97 | + }); |
| 98 | + console.log('Deleted old data.'); |
60 | 99 |
|
61 |
| - // Move our temporary deploy index on-top of the primary index, atomically. |
62 |
| - await client.moveIndex(deployIndex.indexName, primaryIndex.indexName); |
63 |
| - log('Done!'); |
| 100 | + console.log('Done!'); |
64 | 101 | }
|
65 | 102 |
|
66 | 103 | index().catch((err) => {
|
67 |
| - log.error(err); |
| 104 | + console.error(err); |
68 | 105 | throw err;
|
69 | 106 | });
|
0 commit comments