Skip to content

Commit

Permalink
trailing paths cleanup as bulk operations - not limited to created to…
Browse files Browse the repository at this point in the history
… ECS mapping script created one
  • Loading branch information
dk1844 committed Apr 11, 2024
1 parent de80697 commit 1ce8b3f
Showing 1 changed file with 6 additions and 7 deletions.
13 changes: 6 additions & 7 deletions scripts/mongo/ecs_trailing_slash_cleanup.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
* limitations under the License.
*/

// Removes trailing slashes from ECS-mapped paths
// Removes trailing slashes from 's3a://.../' paths (ECS-mapped paths and derived further from there)

function stripTrailingSlashOps(collectionName, fieldToStrip, requiredFieldToExist) {
function stripTrailingSlashOps(collectionName, fieldToStrip) {
print(`PrepOps: Stripping trailing / from field ${fieldToStrip} collection in ${collectionName}`);
var count = 0;
var ops = db[collectionName].find(
{
"$and": [
{[requiredFieldToExist]: {$exists: true}},
{[fieldToStrip]: {$regex: "/$"}}
{[fieldToStrip]: {$regex: "s3a://.*/$"}}
]
}
).map(function (doc) {
Expand All @@ -45,12 +44,12 @@ function stripTrailingSlashOps(collectionName, fieldToStrip, requiredFieldToExis
return ops;
}

var ops_d1 = stripTrailingSlashOps("dataset_v1", "hdfsPath", "bakHdfsPath");
var ops_d1 = stripTrailingSlashOps("dataset_v1", "hdfsPath");
db.getCollection('dataset_v1').bulkWrite(ops_d1);

var ops_d2 = stripTrailingSlashOps("dataset_v1", "hdfsPublishPath", "bakHdfsPublishPath");
var ops_d2 = stripTrailingSlashOps("dataset_v1", "hdfsPublishPath");
db.getCollection('dataset_v1').bulkWrite(ops_d2);

var ops_mt1 = stripTrailingSlashOps("mapping_table_v1", "hdfsPath", "bakHdfsPath");
var ops_mt1 = stripTrailingSlashOps("mapping_table_v1", "hdfsPath");
db.getCollection('mapping_table_v1').bulkWrite(ops_mt1);

0 comments on commit 1ce8b3f

Please sign in to comment.