Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(shell-api): Account for orphan documents count in getShardDistribution() helper MONGOSH-1838 #2203

Merged
merged 24 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions packages/shell-api/src/collection.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2276,6 +2276,70 @@ describe('Collection', function () {
ShellApiErrors.NotConnectedToShardedCluster
);
});

describe('with orphan documents', function () {
const mockedNumChunks = 2;
const mockedCollectionConfigInfo = {};
const mockedShardStats = {
shard: 'test-shard',
storageStats: {
size: 1000,
numOrphanDocs: 10,
avgObjSize: 7,
count: 15,
},
};
const mockedShardInfo = {
host: 'dummy-host',
};

beforeEach(function () {
const serviceProviderCursor = stubInterface<ServiceProviderCursor>();

// Make find and limit have no effect so the value of findOne is determined by tryNext.
serviceProviderCursor.limit.returns(serviceProviderCursor);
serviceProvider.find.returns(serviceProviderCursor);

// Mock according to the order of findOne calls getShardDistribution uses.
serviceProviderCursor.tryNext
.onCall(0)
.resolves(mockedCollectionConfigInfo);
serviceProviderCursor.tryNext.onCall(1).resolves(mockedShardInfo);
serviceProvider.countDocuments.returns(
Promise.resolve(mockedNumChunks)
);

const aggregateTryNext = sinon.stub();
aggregateTryNext.onCall(0).resolves(mockedShardStats);
aggregateTryNext.onCall(1).resolves(null);

// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
serviceProvider.aggregate.returns({
tryNext: aggregateTryNext,
} as any);
});

it('should account for numOrphanDocs when calculating size', async function () {
const shardDistribution = await collection.getShardDistribution();

const { storageStats } = mockedShardStats;
expect(shardDistribution.type).equals('StatsResult');
const adjustedSize =
storageStats.size -
storageStats.numOrphanDocs * storageStats.avgObjSize;
expect(shardDistribution.value.Totals.data).equals(
`${adjustedSize}B`
);
const shardField = Object.keys(shardDistribution.value).find(
(field) => field !== 'Totals'
) as `Shard ${string} at ${string}`;

expect(shardField).not.undefined;
expect(
shardDistribution.value[shardField]['estimated data per chunk']
).equals(`${adjustedSize / mockedNumChunks}B`);
});
});
});

describe('analyzeShardKey', function () {
Expand Down
49 changes: 40 additions & 9 deletions packages/shell-api/src/collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2135,12 +2135,14 @@ export default class Collection extends ShellApiWithMongoClass {
@returnsPromise
@topologies([Topologies.Sharded])
@apiVersions([])
async getShardDistribution(): Promise<CommandResult> {
async getShardDistribution(): Promise<
CommandResult<GetShardDistributionResult>
> {
this._emitCollectionApiCall('getShardDistribution', {});

await getConfigDB(this._database); // Warns if not connected to mongos

const result = {} as Document;
const result = {} as GetShardDistributionResult;
const config = this._mongo.getDB('config');

const collStats = await (
Expand Down Expand Up @@ -2179,17 +2181,24 @@ export default class Collection extends ShellApiWithMongoClass {
.findOne({ _id: extractedShardStats.shard }),
config.getCollection('chunks').countDocuments(countChunksQuery),
]);

// Since 6.0, there can be orphan documents indicated by numOrphanDocs.
// These orphan documents need to be accounted for in the size calculation.
const orphanDocumentsSize =
(extractedShardStats.storageStats.numOrphanDocs ?? 0) *
(extractedShardStats.storageStats.avgObjSize ?? 0);
const ownedSize =
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractedShardStats.storageStats.size - orphanDocumentsSize;

const shardStats = {
shardId: shard,
host: host !== null ? host.host : null,
size: extractedShardStats.storageStats.size,
size: ownedSize,
count: extractedShardStats.storageStats.count,
numChunks: numChunks,
avgObjSize: extractedShardStats.storageStats.avgObjSize,
};

const key = `Shard ${shardStats.shardId} at ${shardStats.host}`;

// In sharded timeseries collections we do not have a count
// so we intentionally pass NaN as a result to the client.
const shardStatsCount: number = shardStats.count ?? NaN;
Expand All @@ -2203,15 +2212,15 @@ export default class Collection extends ShellApiWithMongoClass {
? 0
: Math.floor(shardStatsCount / shardStats.numChunks);

result[key] = {
result[`Shard ${shardStats.shardId} at ${shardStats.host}`] = {
data: dataFormat(coerceToJSNumber(shardStats.size)),
docs: shardStatsCount,
chunks: shardStats.numChunks,
'estimated data per chunk': dataFormat(estimatedChunkDataPerChunk),
'estimated docs per chunk': estimatedDocsPerChunk,
};

totals.size += coerceToJSNumber(shardStats.size);
totals.size += coerceToJSNumber(ownedSize);
totals.count += coerceToJSNumber(shardStatsCount);
totals.numChunks += coerceToJSNumber(shardStats.numChunks);

Expand All @@ -2224,7 +2233,7 @@ export default class Collection extends ShellApiWithMongoClass {
data: dataFormat(totals.size),
docs: totals.count,
chunks: totals.numChunks,
} as Document;
} as GetShardDistributionResult['Totals'];

for (const shardStats of conciseShardsStats) {
const estDataPercent =
Expand All @@ -2243,7 +2252,8 @@ export default class Collection extends ShellApiWithMongoClass {
];
}
result.Totals = totalValue;
return new CommandResult('StatsResult', result);

return new CommandResult<GetShardDistributionResult>('StatsResult', result);
}

@serverVersions(['3.1.0', ServerVersions.latest])
Expand Down Expand Up @@ -2467,3 +2477,24 @@ export default class Collection extends ShellApiWithMongoClass {
);
}
}

export type GetShardDistributionResult = {
Totals: {
data: string;
docs: number;
chunks: number;
} & {
[individualShardDistribution: `Shard ${string}`]: [
`${number} % data`,
`${number} % docs in cluster`,
`${string} avg obj size on shard`
];
};
[individualShardResult: `Shard ${string} at ${string}`]: {
data: string;
docs: number;
chunks: number;
'estimated data per chunk': string;
'estimated docs per chunk': number;
};
};
Loading