Azure-Samples · FabianMeiswinkel · Oct 9, 2019 · deborahc · Oct 10, 2019 · deborahc
diff --git a/PartitionKeyStatistics.ipynb b/PartitionKeyStatistics.ipynb
@@ -0,0 +1,52 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": "#Configuration section\n#Change parameters below if you want to export results t Excel workbooks in a blob storage account\n\n#Account name of the blob stroage account the workbooks are saved to\nblob_account_name='<YourAccountName>'\n\n#Account key of the blob stroage account the workbooks are saved to\nblob_account_key='<YourAccountKey>'\n\n#Blob container name taht the excel workbooks will be saved to\nblobContainerName='<YourContainerName>'\n\n#By default use the same cosmos account in which the notebook is stored in\nclient = cosmos_client\n#Alternatively you could point to another cosmso account via connection string for example\n#client = CosmosClient.from_connection_string('<YourConnectionString>')"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": "#Installing modules needed to export result to Excel workbooks on Blob storage\n!pip install XlsxWriter --user\n!pip install azure.storage --user"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": "#Running PartitionKeyStatistics\nimport azure.cosmos\nimport datetime\nimport io\nimport pandas as pd\nfrom azure.cosmos import CosmosClient\nfrom azure.storage.blob import (BlockBlobService)\n\nclass color:\n   PURPLE = '\\033[95m'\n   CYAN = '\\033[96m'\n   DARKCYAN = '\\033[36m'\n   BLUE = '\\033[94m'\n   GREEN = '\\033[92m'\n   YELLOW = '\\033[93m'\n   RED = '\\033[91m'\n   BOLD = '\\033[1m'\n   UNDERLINE = '\\033[4m'\n   END = '\\033[0m'\n\nfor dbProperties in client.list_databases():\n    database = client.get_database_client(dbProperties['id'])\n    \n    for collectionProperties in database.list_containers():\n        container = database.get_container_client(collectionProperties['id'])\n\n        containerProperties = container.read(populate_partition_key_range_statistics = True, populate_quota_info = True)\n\n        partitionQuotas = {\n            'PartitionId': [],\n            'DocumentCount': [],\n            'StorageSize': [],\n        }\n\n        partitionKeyStatistics = {\n            'PartitionId': [],\n            'LogicalPartitionKeyValue': [],\n            'StorageSize': [],\n        }\n\n        for statisticsRecord in containerProperties['statistics']:\n            partitionId = str(statisticsRecord['id'])\n            partitionQuotas['PartitionId'].append(partitionId)\n            partitionQuotas['DocumentCount'].append(float(statisticsRecord['documentCount']))\n            partitionQuotas['StorageSize'].append(float(statisticsRecord['sizeInKB']))\n\n            for partitionKeyStatisticsRecord in statisticsRecord['partitionKeys']:\n                partitionKeyStatistics['PartitionId'].append(partitionId)\n                partitionKeyStatistics['LogicalPartitionKeyValue'].append(partitionKeyStatisticsRecord['partitionKey'])\n                partitionKeyStatistics['StorageSize'].append(float(partitionKeyStatisticsRecord['sizeInKB']))\n\n        partitionQuotaDataFrame = pd.DataFrame(partitionQuotas) \n        partitionKeyStatisticsDataFrame = pd.DataFrame(partitionKeyStatistics) \n        \n        print()\n        print('Database {0:s} - Container {1:s}'.format(dbProperties['id'], collectionProperties['id']))\n        print('QUOTA')\n        print(partitionQuotaDataFrame)\n        \n        print()\n        print('PARTITION KEY STATISTICS')\n        print(partitionKeyStatisticsDataFrame)\n        \n        #Storing statistics for each ocntainer in an Ecel workbook\n        filename = '{0:s}_{1:s}_{2:s}.xlsx'.format(dbProperties['id'], collectionProperties['id'], datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S'))\n        output= io.BytesIO()\n        writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'in_memory': True})\n        \n        partitionQuotaDataFrame.to_excel(writer, sheet_name='Quota', index=False)\n        partitionKeyStatisticsDataFrame.to_excel(writer, sheet_name='Partition key statistics', index=False)\n        writer.save()\n\n        output.seek(0)\n        blobService = BlockBlobService(account_name=blob_account_name, account_key=blob_account_key)\n        \n        blobService.create_blob_from_bytes(blob_container_name, filename, output.read())\n        print()\n        print('Exported to file: {0:s}'.format(filename))"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}