diff --git a/PartitionKeyStatistics.ipynb b/PartitionKeyStatistics.ipynb new file mode 100644 index 0000000..5fd29bd --- /dev/null +++ b/PartitionKeyStatistics.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": "#Configuration section\n#Change parameters below if you want to export results t Excel workbooks in a blob storage account\n\n#Account name of the blob stroage account the workbooks are saved to\nblob_account_name=''\n\n#Account key of the blob stroage account the workbooks are saved to\nblob_account_key=''\n\n#Blob container name taht the excel workbooks will be saved to\nblobContainerName=''\n\n#By default use the same cosmos account in which the notebook is stored in\nclient = cosmos_client\n#Alternatively you could point to another cosmso account via connection string for example\n#client = CosmosClient.from_connection_string('')" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": "#Installing modules needed to export result to Excel workbooks on Blob storage\n!pip install XlsxWriter --user\n!pip install azure.storage --user" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": "#Running PartitionKeyStatistics\nimport azure.cosmos\nimport datetime\nimport io\nimport pandas as pd\nfrom azure.cosmos import CosmosClient\nfrom azure.storage.blob import (BlockBlobService)\n\nclass color:\n PURPLE = '\\033[95m'\n CYAN = '\\033[96m'\n DARKCYAN = '\\033[36m'\n BLUE = '\\033[94m'\n GREEN = '\\033[92m'\n YELLOW = '\\033[93m'\n RED = '\\033[91m'\n BOLD = '\\033[1m'\n UNDERLINE = '\\033[4m'\n END = '\\033[0m'\n\nfor dbProperties in client.list_databases():\n database = client.get_database_client(dbProperties['id'])\n \n for collectionProperties in database.list_containers():\n container = database.get_container_client(collectionProperties['id'])\n\n containerProperties = container.read(populate_partition_key_range_statistics = True, populate_quota_info = True)\n\n partitionQuotas = {\n 'PartitionId': [],\n 'DocumentCount': [],\n 'StorageSize': [],\n }\n\n partitionKeyStatistics = {\n 'PartitionId': [],\n 'LogicalPartitionKeyValue': [],\n 'StorageSize': [],\n }\n\n for statisticsRecord in containerProperties['statistics']:\n partitionId = str(statisticsRecord['id'])\n partitionQuotas['PartitionId'].append(partitionId)\n partitionQuotas['DocumentCount'].append(float(statisticsRecord['documentCount']))\n partitionQuotas['StorageSize'].append(float(statisticsRecord['sizeInKB']))\n\n for partitionKeyStatisticsRecord in statisticsRecord['partitionKeys']:\n partitionKeyStatistics['PartitionId'].append(partitionId)\n partitionKeyStatistics['LogicalPartitionKeyValue'].append(partitionKeyStatisticsRecord['partitionKey'])\n partitionKeyStatistics['StorageSize'].append(float(partitionKeyStatisticsRecord['sizeInKB']))\n\n partitionQuotaDataFrame = pd.DataFrame(partitionQuotas) \n partitionKeyStatisticsDataFrame = pd.DataFrame(partitionKeyStatistics) \n \n print()\n print('Database {0:s} - Container {1:s}'.format(dbProperties['id'], collectionProperties['id']))\n print('QUOTA')\n print(partitionQuotaDataFrame)\n \n print()\n print('PARTITION KEY STATISTICS')\n print(partitionKeyStatisticsDataFrame)\n \n #Storing statistics for each ocntainer in an Ecel workbook\n filename = '{0:s}_{1:s}_{2:s}.xlsx'.format(dbProperties['id'], collectionProperties['id'], datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S'))\n output= io.BytesIO()\n writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'in_memory': True})\n \n partitionQuotaDataFrame.to_excel(writer, sheet_name='Quota', index=False)\n partitionKeyStatisticsDataFrame.to_excel(writer, sheet_name='Partition key statistics', index=False)\n writer.save()\n\n output.seek(0)\n blobService = BlockBlobService(account_name=blob_account_name, account_key=blob_account_key)\n \n blobService.create_blob_from_bytes(blob_container_name, filename, output.read())\n print()\n print('Exported to file: {0:s}'.format(filename))" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file