Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions PartitionKeyStatistics.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"cells": [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order for this to be a standalone, self-explanatory sample, can you add some markdown text that explains at a high level what the notebook does?

{
"cell_type": "code",
"execution_count": null,
"metadata": {
"trusted": true
},
"outputs": [],
"source": "#Configuration section\n#Change parameters below if you want to export results t Excel workbooks in a blob storage account\n\n#Account name of the blob stroage account the workbooks are saved to\nblob_account_name='<YourAccountName>'\n\n#Account key of the blob stroage account the workbooks are saved to\nblob_account_key='<YourAccountKey>'\n\n#Blob container name taht the excel workbooks will be saved to\nblobContainerName='<YourContainerName>'\n\n#By default use the same cosmos account in which the notebook is stored in\nclient = cosmos_client\n#Alternatively you could point to another cosmso account via connection string for example\n#client = CosmosClient.from_connection_string('<YourConnectionString>')"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Follow Python conventions, e.g. partitionQuotaDataFrame -> partition_quota_dataframe

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: typo:

Change parameters below if you want to export results to Excel workbooks in a blob storage account

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order to make this standalone - can we make this part optional? E.g. we print the statistics in the notebook, and then have an optional cell where they can save it to Excel? Or they can write it to the current directory, and it will show up as a file under Notebooks.

},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"trusted": true
},
"outputs": [],
"source": "#Installing modules needed to export result to Excel workbooks on Blob storage\n!pip install XlsxWriter --user\n!pip install azure.storage --user"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"trusted": true
},
"outputs": [],
"source": "#Running PartitionKeyStatistics\nimport azure.cosmos\nimport datetime\nimport io\nimport pandas as pd\nfrom azure.cosmos import CosmosClient\nfrom azure.storage.blob import (BlockBlobService)\n\nclass color:\n PURPLE = '\\033[95m'\n CYAN = '\\033[96m'\n DARKCYAN = '\\033[36m'\n BLUE = '\\033[94m'\n GREEN = '\\033[92m'\n YELLOW = '\\033[93m'\n RED = '\\033[91m'\n BOLD = '\\033[1m'\n UNDERLINE = '\\033[4m'\n END = '\\033[0m'\n\nfor dbProperties in client.list_databases():\n database = client.get_database_client(dbProperties['id'])\n \n for collectionProperties in database.list_containers():\n container = database.get_container_client(collectionProperties['id'])\n\n containerProperties = container.read(populate_partition_key_range_statistics = True, populate_quota_info = True)\n\n partitionQuotas = {\n 'PartitionId': [],\n 'DocumentCount': [],\n 'StorageSize': [],\n }\n\n partitionKeyStatistics = {\n 'PartitionId': [],\n 'LogicalPartitionKeyValue': [],\n 'StorageSize': [],\n }\n\n for statisticsRecord in containerProperties['statistics']:\n partitionId = str(statisticsRecord['id'])\n partitionQuotas['PartitionId'].append(partitionId)\n partitionQuotas['DocumentCount'].append(float(statisticsRecord['documentCount']))\n partitionQuotas['StorageSize'].append(float(statisticsRecord['sizeInKB']))\n\n for partitionKeyStatisticsRecord in statisticsRecord['partitionKeys']:\n partitionKeyStatistics['PartitionId'].append(partitionId)\n partitionKeyStatistics['LogicalPartitionKeyValue'].append(partitionKeyStatisticsRecord['partitionKey'])\n partitionKeyStatistics['StorageSize'].append(float(partitionKeyStatisticsRecord['sizeInKB']))\n\n partitionQuotaDataFrame = pd.DataFrame(partitionQuotas) \n partitionKeyStatisticsDataFrame = pd.DataFrame(partitionKeyStatistics) \n \n print()\n print('Database {0:s} - Container {1:s}'.format(dbProperties['id'], collectionProperties['id']))\n print('QUOTA')\n print(partitionQuotaDataFrame)\n \n print()\n print('PARTITION KEY STATISTICS')\n print(partitionKeyStatisticsDataFrame)\n \n #Storing statistics for each ocntainer in an Ecel workbook\n filename = '{0:s}_{1:s}_{2:s}.xlsx'.format(dbProperties['id'], collectionProperties['id'], datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S'))\n output= io.BytesIO()\n writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'in_memory': True})\n \n partitionQuotaDataFrame.to_excel(writer, sheet_name='Quota', index=False)\n partitionKeyStatisticsDataFrame.to_excel(writer, sheet_name='Partition key statistics', index=False)\n writer.save()\n\n output.seek(0)\n blobService = BlockBlobService(account_name=blob_account_name, account_key=blob_account_key)\n \n blobService.create_blob_from_bytes(blob_container_name, filename, output.read())\n print()\n print('Exported to file: {0:s}'.format(filename))"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}