diff --git a/config/_default/menus/main.en.yaml b/config/_default/menus/main.en.yaml index d0ffa89f272..f327e510ee1 100644 --- a/config/_default/menus/main.en.yaml +++ b/config/_default/menus/main.en.yaml @@ -21,24 +21,27 @@ menu: - name: Application Performance identifier: apm_heading weight: 7000000 + - name: Data Observability + identifier: data_observability_heading + weight: 8000000 - name: Digital Experience identifier: digital_experience_heading - weight: 8000000 + weight: 9000000 - name: Software Delivery identifier: software_delivery_heading - weight: 9000000 + weight: 10000000 - name: Security identifier: security_platform_heading - weight: 10000000 + weight: 11000000 - name: AI Observability identifier: ai_observability_heading - weight: 11000000 + weight: 12000000 - name: Log Management identifier: log_management_heading - weight: 12000000 + weight: 13000000 - name: Administration identifier: administration_heading - weight: 13000000 + weight: 14000000 - name: Getting Started identifier: getting_started url: getting_started/ @@ -5067,23 +5070,114 @@ menu: identifier: data_streams_metrics_and_tags parent: data_streams weight: 5 - - name: Data Jobs Monitoring - url: data_jobs/ - pre: data-jobs-monitoring - identifier: data_jobs - parent: apm_heading - weight: 60000 - - name: Data Observability + - name: Data Observability Overview url: data_observability/ pre: inventories identifier: data_observability - parent: apm_heading + parent: data_observability_heading + weight: 60000 + - name: Quality Monitoring + url: data_observability/quality_monitoring/ + pre: check-light-wui + identifier: quality_monitoring + parent: data_observability_heading weight: 70000 - - name: Datasets - url: data_observability/datasets - identifier: datasets - parent: data_observability + - name: Data Warehouses + url: data_observability/quality_monitoring/data_warehouses + identifier: data_warehouses + parent: quality_monitoring + weight: 200000 + - name: Snowflake + url: data_observability/quality_monitoring/data_warehouses/snowflake + identifier: snowflake + parent: data_warehouses + weight: 1000000 + - name: Databricks + url: data_observability/quality_monitoring/data_warehouses/databricks + identifier: databricks + parent: data_warehouses + weight: 2000000 + - name: BigQuery + url: data_observability/quality_monitoring/data_warehouses/bigquery + identifier: bigquery + parent: data_warehouses + weight: 3000000 + - name: Business Intelligence Integrations + url: data_observability/quality_monitoring/business_intelligence + identifier: business_intelligence_integrations + parent: quality_monitoring + weight: 300000 + - name: Tableau + url: data_observability/quality_monitoring/business_intelligence/tableau + identifier: business_intelligence_integrations_tableau + parent: business_intelligence_integrations + weight: 1000000 + - name: Sigma + url: data_observability/quality_monitoring/business_intelligence/sigma + identifier: business_intelligence_integrations_sigma + parent: business_intelligence_integrations + weight: 2000000 + - name: Metabase + url: data_observability/quality_monitoring/business_intelligence/metabase + identifier: business_intelligence_integrations_metabase + parent: business_intelligence_integrations + weight: 3000000 + - name: Power BI + url: data_observability/quality_monitoring/integrations/business_intelligence/powerbi + identifier: business_intelligence_integrations_powerbi + parent: business_intelligence_integrations + weight: 4000000 + - name: Jobs Monitoring + url: data_observability/jobs_monitoring/ + pre: data-jobs-monitoring + identifier: data_jobs + parent: data_observability_heading + weight: 80000 + - name: Databricks + url: data_observability/jobs_monitoring/databricks + identifier: jobs_monitoring_databricks + parent: data_jobs weight: 100000 + - name: Airflow + url: data_observability/jobs_monitoring/airflow + identifier: jobs_monitoring_airflow + parent: data_jobs + weight: 200000 + - name: dbt Core + url: data_observability/jobs_monitoring/dbtcore + identifier: jobs_monitoring_dbtcore + parent: data_jobs + weight: 300000 + - name: dbt Cloud + url: data_observability/jobs_monitoring/dbtcloud + identifier: jobs_monitoring_dbtcloud + parent: data_jobs + weight: 400000 + - name: Spark on Kubernetes + url: data_observability/jobs_monitoring/kubernetes + identifier: jobs_monitoring_kubernetes + parent: data_jobs + weight: 500000 + - name: Spark on Amazon EMR + url: data_observability/jobs_monitoring/emr + identifier: jobs_monitoring_emr + parent: transformation_integrations + weight: 600000 + - name: Spark on Google Dataproc + url: data_observability/jobs_monitoring/dataproc + identifier: jobs_monitoring_dataproc + parent: data_jobs + weight: 700000 + - name: Custom Jobs + url: data_observability/jobs_monitoring/openlineage + identifier: openlineage_integrations + parent: data_jobs + weight: 800000 + - name: Datadog Agent for OpenLineage Proxy + url: data_observability/jobs_monitoring/openlineage/datadog_agent_for_openlineage + identifier: openlineage_datadog_agent_for_openlineage + parent: openlineage_integrations + weight: 1000000 - name: LLM Observability url: llm_observability/ pre: llm-observability diff --git a/content/en/data_observability/_index.md b/content/en/data_observability/_index.md index d2275a57ce4..5a052856178 100644 --- a/content/en/data_observability/_index.md +++ b/content/en/data_observability/_index.md @@ -2,57 +2,48 @@ title: Data Observability description: "Monitor data quality, performance, and cost with Data Observability to detect anomalies, analyze data lineage, and prevent issues affecting downstream systems." further_reading: - - link: '/data_observability/datasets' + - link: '/data_observability/quality_monitoring/' tag: 'Documentation' - text: 'Datasets' - - link: '/data_jobs' + text: 'Quality Monitoring' + - link: '/data_observability/jobs_monitoring' tag: 'Documentation' - text: 'Data Jobs Monitoring' - - link: '/data_streams' - tag: 'Documentation' - text: 'Data Streams Monitoring' - - link: '/database_monitoring' - tag: 'Documentation' - text: 'Database Monitoring' + text: 'Jobs Monitoring' - link: 'https://www.datadoghq.com/about/latest-news/press-releases/datadog-metaplane-aquistion/' tag: 'Blog' - text: 'Datadog Brings Observability to Data Teams by Acquiring Metaplane' + text: 'Datadog Brings Observability to Data teams by Acquiring Metaplane' --- -
Data Observability is in Preview.
- -Data Observability helps data teams detect, resolve, and prevent issues that impact data quality, performance, and cost. It enables teams to monitor anomalies, troubleshoot faster, and maintain trust in the data powering downstream systems. +Data Observability helps data teams detect, resolve, and prevent issues that affect data quality, performance, and cost. It enables teams to monitor anomalies, troubleshoot faster, and maintain trust in the data powering downstream systems. -{{< img src="data_observability/data_observability_overview.png" alt="Lineage graph showing a failed Spark job upstream of a Snowflake table with an alert and four downstream nodes labeled Upstream issue." style="width:100%;" >}} +{{< img src="data_observability/data-obs-overview-1.png" alt="Lineage graph showing a failed application upstream." style="width:100%;" >}} -Datadog makes this possible by monitoring key signals across your data stack, including metrics, metadata, lineage, and logs. These signals help detect issues early and support reliable, high-quality data. +Data Observability consists of two products: -## Key capabilities +- **[Quality Monitoring][3]**: Detect anomalies in your tables, including freshness delays, volume changes, and unexpected column-level metric shifts. +- **[Jobs Monitoring][4]**: Track the performance, reliability, and cost of data processing jobs across platforms like Spark, Databricks, and Airflow. -With Data Observability, you can: +Both products share end-to-end lineage, letting you trace data dependencies and correlate issues across your stack. -- Detect anomalies in volume, freshness, null rates, and distributions -- Analyze lineage to trace data dependencies from source to dashboard -- Integrate with pipelines to correlate issues with job runs, data streams, and infrastructure events +## Quality Monitoring -## Monitor data quality +Quality Monitoring tracks metrics and metadata across your tables to detect issues before they impact downstream systems: -{{< img src="data_observability/data_observability_lineage_quality.png" alt="Lineage graph centered on the quoted_pricing Snowflake table with an alert on a pricing metric and sidebar charts for freshness, row count, and size." style="width:100%;" >}} +- **Data metrics**: Null count, null percentage, uniqueness, mean, and standard deviation +- **Metadata**: Schema, row count, and freshness -Datadog continuously tracks metrics and metadata, including: +Configure static thresholds or use automatic anomaly detection to catch missing updates, unexpected row count changes, and metric outliers. -- Data metrics such as null count, null percentage, uniqueness, mean, and standard deviation -- Metadata such as schema, row count, and freshness +## Jobs Monitoring -You can configure static thresholds or rely on automatic anomaly detection to identify unexpected changes, including: +Jobs Monitoring provides visibility into data processing jobs across your accounts and workspaces: -- Missing or delayed updates -- Unexpected row count changes -- Outliers in key metrics +- **Performance**: Track job duration, resource utilization, and identify inefficiencies like high idle CPU +- **Reliability**: Receive alerts when jobs fail or exceed expected completion times +- **Troubleshooting**: Analyze execution details, stack traces, and compare runs to identify issues ## Trace lineage and understand impact -{{< img src="data_observability/data_observability_lineage_trace.png" alt="Lineage graph tracing data flow from Kafka through a failed Spark job to a Snowflake table with an alert and four downstream nodes labeled Upstream issue." style="width:100%;" >}} +{{< img src="data_observability/data-obs-lineage-blurred.png" alt="Lineage graph tracing data flow from Kafka through a failed Spark job to a Snowflake table with an alert and four downstream nodes labeled Upstream issue." style="width:100%;" >}} Data Observability provides end-to-end lineage, helping you: @@ -62,7 +53,7 @@ Data Observability provides end-to-end lineage, helping you: ## Correlate with pipeline and infrastructure activity -{{< img src="data_observability/data_observability_pipeline_infra_correlation.png" alt="Lineage graph showing a failed Spark job with a missing S3 path error, plus a side panel with job run stats and duration trends." style="width:100%;" >}} +{{< img src="data_observability/data-obs-correlate-trace.png" alt="Lineage graph showing a failed Spark job with a missing S3 path error, plus a side panel with job run stats and duration trends." style="width:100%;" >}} Understand how pipeline activity and infrastructure events impact your data. Datadog ingests logs and metadata from pipeline tools and user interactions to provide context for data quality issues, including: @@ -71,6 +62,21 @@ Understand how pipeline activity and infrastructure events impact your data. Dat This operational context helps you trace the source of data incidents and respond faster. +## Required permissions + +Data Observability requires the `integrations_read` permission to read integrations in your account and dynamically render content. Without this permission, you see a permissions screen instead of the app. + +This permission is included in the [Datadog Standard Role][1]. If your current role doesn't include it, add `integrations_read` to your role, then refresh the page. + +## IP allowlists + +If your organization enforces IP allowlists, add the IPs listed under the `webhooks` section of this [webhooks.json][2] file to your allowist. + ## Further reading -{{< partial name="whats-next/whats-next.html" >}} \ No newline at end of file +{{< partial name="whats-next/whats-next.html" >}} + +[1]: /account_management/rbac/?tab=datadogapplication#datadog-default-roles +[2]: https://ip-ranges.datadoghq.com/webhooks.json +[3]: /data_observability/quality_monitoring/ +[4]: /data_observability/jobs_monitoring/ \ No newline at end of file diff --git a/content/en/data_observability/datasets.md b/content/en/data_observability/datasets.md deleted file mode 100644 index 1cd52db949c..00000000000 --- a/content/en/data_observability/datasets.md +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: "Data Observability: Datasets" -description: "Set up Data Observability for datasets to detect freshness delays, unusual data patterns, and column-level metric changes in Snowflake and BigQuery." -further_reading: - - link: '/data_observability' - tag: 'Documentation' - text: 'Data Observability' - - link: '/data_jobs' - tag: 'Documentation' - text: 'Data Jobs Monitoring' - - link: '/data_streams' - tag: 'Documentation' - text: 'Data Streams Monitoring' - - link: '/database_monitoring' - tag: 'Documentation' - text: 'Database Monitoring' ---- - -
Data Observability is in Preview.
- -{{< img src="data_observability/data_quality_tables.png" alt="Datasets page showing a list of tables with columns for query count, storage size, row count, and last data update; two tables are flagged with triggered alerts" style="width:100%;" >}} - -Data Observability for Datasets detects issues such as data freshness delays, unusual data patterns, and changes in column-level metrics before they affect dashboards, machine learning models, or other downstream systems. It alerts you to potential problems and provides the context to trace them back to upstream jobs or sources. - -## Key capabilities - -With Data Observability, you can: - -- Detect delayed updates and unexpected row count behavior in your tables -- Surface changes in column-level metrics such as null counts or uniqueness -- Set up monitors using static thresholds or historical baselines -- Trace quality issues using lineage views that show upstream jobs and downstream impact - -## Supported data sources - -Data Observability supports the following data sources: - -- Snowflake -- BigQuery - -## Setup - -{{< tabs >}} -{{% tab "Snowflake" %}} - -To monitor Snowflake data in Datadog, you must configure both your Snowflake account and the Snowflake integration in Datadog. Before you begin, make sure that: - -- You have access to the `ACCOUNTADMIN` role in Snowflake. -- You have generated an RSA key pair. For more information, see the [Snowflake key-pair authentication docs][1]. - -After you confirm the prerequisites above, complete the following setup steps in Snowflake: - -1. Define the following variables: - ```sql - SET role_name = 'DATADOG_ROLE'; - SET user_name = 'DATADOG_USER'; - SET warehouse_name = 'DATADOG_WH'; - SET database_name = ''; - - ``` -1. Create a role, warehouse, and key-pair-authenticated user. - - ```sql - USE ROLE ACCOUNTADMIN; - - -- Create monitoring role - CREATE ROLE IF NOT EXISTS IDENTIFIER($role_name); - GRANT ROLE IDENTIFIER($role_name) TO ROLE SYSADMIN; - - -- Create an X-SMALL warehouse (auto-suspend after 30s) - CREATE WAREHOUSE IF NOT EXISTS IDENTIFIER($warehouse_name) - WAREHOUSE_SIZE = XSMALL - WAREHOUSE_TYPE = STANDARD - AUTO_SUSPEND = 30 - AUTO_RESUME = TRUE - INITIALLY_SUSPENDED = TRUE; - - -- Create Datadog user—key-pair only (no password) - -- Replace with your RSA public key (PEM, no headers/newlines) - CREATE USER IF NOT EXISTS IDENTIFIER($user_name) - LOGIN_NAME = $user_name - DEFAULT_ROLE = $role_name - DEFAULT_WAREHOUSE = $warehouse_name - RSA_PUBLIC_KEY = ''; - - GRANT ROLE IDENTIFIER($role_name) TO USER IDENTIFIER($user_name); - ``` -1. Grant monitoring privileges to the role. - - ```sql - -- Warehouse usage - GRANT USAGE ON WAREHOUSE IDENTIFIER($warehouse_name) TO ROLE IDENTIFIER($role_name); - - -- Account‐level monitoring (tasks, pipes, query history) - GRANT MONITOR EXECUTION ON ACCOUNT TO ROLE IDENTIFIER($role_name); - - -- Imported privileges on Snowflake's ACCOUNT_USAGE - GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE IDENTIFIER($role_name); - - -- Imported privileges on any external data shares - -- GRANT IMPORTED PRIVILEGES ON DATABASE IDENTIFIER($database_name) TO ROLE IDENTIFIER($role_name); - - -- Grant the following ACCOUNT_USAGE views to the new role. Do this if you wish to collect Snowflake account usage logs and metrics. - GRANT DATABASE ROLE SNOWFLAKE.OBJECT_VIEWER TO ROLE IDENTIFIER($role_name); - GRANT DATABASE ROLE SNOWFLAKE.USAGE_VIEWER TO ROLE IDENTIFIER($role_name); - GRANT DATABASE ROLE SNOWFLAKE.GOVERNANCE_VIEWER TO ROLE IDENTIFIER($role_name); - GRANT DATABASE ROLE SNOWFLAKE.SECURITY_VIEWER TO ROLE IDENTIFIER($role_name); - - -- Grant ORGANIZATION_USAGE_VIEWER to the new role. Do this if you wish to collect Snowflake organization usage metrics. - GRANT DATABASE ROLE SNOWFLAKE.ORGANIZATION_USAGE_VIEWER TO ROLE IDENTIFIER($role_name); - - -- Grant ORGANIZATION_BILLING_VIEWER to the new role. Do this if you wish to collect Snowflake cost data. - GRANT DATABASE ROLE SNOWFLAKE.ORGANIZATION_BILLING_VIEWER TO ROLE IDENTIFIER($role_name); - ``` - -1. Grant read-only access to your data. - - ```sql - USE DATABASE IDENTIFIER($database_name); - - CREATE OR REPLACE PROCEDURE grantFutureAccess(databaseName string, roleName string) - returns string not null - language javascript - as - $$ - var schemaResultSet = snowflake.execute({ sqlText: 'SELECT SCHEMA_NAME FROM ' + '"' + DATABASENAME + '"' + ".INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME != 'INFORMATION_SCHEMA';"}); - - var numberOfSchemasGranted = 0; - while (schemaResultSet.next()) { - numberOfSchemasGranted += 1; - var schemaAndRoleSuffix = ' in schema "' + DATABASENAME + '"."' + - schemaResultSet.getColumnValue('SCHEMA_NAME') + '" to role ' + ROLENAME + ';' - - snowflake.execute({ sqlText: 'grant USAGE on schema "' + DATABASENAME + '"."' + - schemaResultSet.getColumnValue('SCHEMA_NAME') + '" to role ' + ROLENAME + ';'}); - snowflake.execute({ sqlText: 'grant SELECT on all tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on all views' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on all event tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on all external tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on all dynamic tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on future tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on future views' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on future event tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on future external tables' + schemaAndRoleSuffix}); - snowflake.execute({ sqlText: 'grant SELECT on future dynamic tables' + schemaAndRoleSuffix}); - } - - return 'Granted access to ' + numberOfSchemasGranted + ' schemas'; - $$ - ; - - GRANT USAGE ON DATABASE IDENTIFIER($database_name) TO ROLE IDENTIFIER($role_name); - CALL grantFutureAccess('', ''); - ``` - -1. (Optional) If your organization uses [Snowflake event tables][2], you can grant the Datadog role access to them. - - ```sql - -- Grant usage on the database, schema, and table of the event table - GRANT USAGE ON DATABASE TO ROLE IDENTIFIER($role_name); - GRANT USAGE ON SCHEMA . TO ROLE IDENTIFIER($role_name); - GRANT SELECT ON TABLE .. TO ROLE IDENTIFIER($role_name); - - -- Snowflake-provided application roles for event logs - GRANT APPLICATION ROLE SNOWFLAKE.EVENTS_VIEWER TO ROLE IDENTIFIER($role_name); - GRANT APPLICATION ROLE SNOWFLAKE.EVENTS_ADMIN TO ROLE IDENTIFIER($role_name); - - ``` - -After completing the Snowflake setup, configure the Snowflake integration in Datadog. - -1. On the [Snowflake integration tile][3], click **Add Snowflake account**. -1. Enter your Snowflake account URL. -1. Under **Logs**, turn on: - - **Query History Logs** - - **Enable Query Logs with Access History** -1. Under **Data Observability**, turn on: - - **Enable Data Observability for Snowflake tables** -1. Set the **User Name** to `DATADOG_USER`. -1. Under **Configure a key pair authentication**, upload your unencrypted RSA private key. -1. Click **Save**. - -[1]: https://docs.snowflake.com/en/user-guide/key-pair-auth#generate-the-private-key -[2]: https://docs.snowflake.com/en/developer-guide/logging-tracing/event-table-setting-up -[3]: https://app.datadoghq.com/integrations?search=snowflake&integrationId=snowflake-web - -{{% /tab %}} -{{% tab "BigQuery" %}} - -To monitor BigQuery data in Datadog, you must configure permissions in your Google Cloud project and enable the relevant features in the Datadog integration. For detailed instructions, see the [Expanded BigQuery monitoring][1] section of the Datadog Google Cloud Platform documentation. - -[1]: /integrations/google_cloud_platform/?tab=dataflowmethodrecommended#expanded-bigquery-monitoring - -{{% /tab %}} -{{% tab "Databricks" %}} - -To monitor Databricks data in Datadog, you must complete the following steps: - -1. Complete the installation instructions in the [Databricks Integration documentation][1]. -1. Grant read-only access to the data you want to monitor. - - You can grant access to all current and future data within a catalog: - ```sql - GRANT USE_CATALOG ON CATALOG TO ``; - GRANT USE_SCHEMA ON CATALOG TO ``; - GRANT SELECT ON CATALOG TO ``; - ``` - - Or, you can grant access to specific tables: - ```sql - GRANT USE_CATALOG ON CATALOG TO ``; - GRANT USE_SCHEMA ON SCHEMA . TO ``; - GRANT SELECT ON TABLE .. TO ``; - ``` -1. Enable the **Data Observability** toggle in the Configuration pane of the Databricks account you connected in Step 1. - -[1]: https://docs.datadoghq.com/integrations/databricks/?tab=useaserviceprincipalforoauth#setup - -{{% /tab %}} -{{< /tabs >}} - -## Further reading - -{{< partial name="whats-next/whats-next.html" >}} diff --git a/content/en/data_jobs/_index.md b/content/en/data_observability/jobs_monitoring/_index.md similarity index 99% rename from content/en/data_jobs/_index.md rename to content/en/data_observability/jobs_monitoring/_index.md index 14f832a6826..74dbe30f4a8 100644 --- a/content/en/data_jobs/_index.md +++ b/content/en/data_observability/jobs_monitoring/_index.md @@ -1,6 +1,8 @@ --- title: Data Jobs Monitoring description: "Monitor performance, reliability, and cost efficiency of data processing jobs across platforms like EMR, Databricks, Dataproc, and Kubernetes." +aliases: + - /data_jobs/ further_reading: - link: '/data_streams' tag: 'Documentation' diff --git a/content/en/data_jobs/airflow.md b/content/en/data_observability/jobs_monitoring/airflow.md similarity index 99% rename from content/en/data_jobs/airflow.md rename to content/en/data_observability/jobs_monitoring/airflow.md index ca22bf77e02..29650895192 100644 --- a/content/en/data_jobs/airflow.md +++ b/content/en/data_observability/jobs_monitoring/airflow.md @@ -3,6 +3,8 @@ title: Enable Data Jobs Monitoring for Apache Airflow description: "Monitor Apache Airflow DAG workflows with Data Jobs Monitoring using OpenLineage provider across Kubernetes, Amazon MWAA, and other platforms." is_beta: true private: true +aliases: + - /data_jobs/airflow further_reading: - link: 'https://www.datadoghq.com/blog/data-jobs-monitoring/' tag: 'blog' @@ -15,7 +17,7 @@ further_reading: text: 'Data Jobs Monitoring' --- -{{< callout url="#" btn_hidden="true" header="Data Jobs Monitoring for Apache Airflow is in Preview" >}} +{{< callout url="#" btn_hidden="true" header="Data Jobs Monitoring for Apache Airflow is in preview" >}} To try the preview for Airflow monitoring, follow the setup instructions below. {{< /callout >}} diff --git a/content/en/data_jobs/databricks.md b/content/en/data_observability/jobs_monitoring/databricks.md similarity index 99% rename from content/en/data_jobs/databricks.md rename to content/en/data_observability/jobs_monitoring/databricks.md index f999862aa1d..0fc67795771 100644 --- a/content/en/data_jobs/databricks.md +++ b/content/en/data_observability/jobs_monitoring/databricks.md @@ -1,6 +1,8 @@ --- title: Enable Data Jobs Monitoring for Databricks description: "Enable Data Jobs Monitoring for Databricks workspaces with OAuth or Personal Access Token authentication and Datadog Agent installation." +aliases: + - /data_jobs/databricks further_reading: - link: '/data_jobs' tag: 'Documentation' diff --git a/content/en/data_jobs/dataproc.md b/content/en/data_observability/jobs_monitoring/dataproc.md similarity index 99% rename from content/en/data_jobs/dataproc.md rename to content/en/data_observability/jobs_monitoring/dataproc.md index e84e090a035..21d3d22ddf8 100644 --- a/content/en/data_jobs/dataproc.md +++ b/content/en/data_observability/jobs_monitoring/dataproc.md @@ -1,6 +1,8 @@ --- title: Enable Data Jobs Monitoring for Spark on Google Cloud Dataproc description: "Set up Data Jobs Monitoring for Apache Spark applications on Google Cloud Dataproc clusters with API key configuration and service tagging." +aliases: + - /data_jobs/dataproc further_reading: - link: '/data_jobs' tag: 'Documentation' diff --git a/content/en/data_observability/jobs_monitoring/dbtcloud.md b/content/en/data_observability/jobs_monitoring/dbtcloud.md new file mode 100644 index 00000000000..cfa6b497933 --- /dev/null +++ b/content/en/data_observability/jobs_monitoring/dbtcloud.md @@ -0,0 +1,50 @@ +--- +title: dbt Cloud +description: "Connect dbt Cloud to Datadog for job run metadata and model lineage." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +
The dbt Cloud integration is in Preview.
+ +## Overview + +Datadog can access your dbt Cloud metadata to extract information about job runs, including run durations, models generated by dbt, and lineage relationships between models. Datadog matches tables in your warehouse with dbt models to determine the causality and consequences of table failure. + +Follow the steps below to connect dbt Cloud to Datadog. + +## Generate an API token in dbt Cloud + +Create a service token in dbt Cloud so Datadog can access your account's metadata. + +1. In dbt Cloud, go to **User Profile > API Tokens > Service Tokens**. +2. Click on **+ Create Service Token**. +3. Provide a name for the token. +4. Set the token permissions: + - For dbt Cloud Enterprise plan, ensure that the token has **Developer** permissions. + - For dbt Cloud Team plan, ensure that the token has **Account Admin** permissions. +5. Click **Save** and copy the generated API token. + +## Connect your dbt Cloud account to Datadog + +Use the API token to configure the integration in Datadog. + +1. Navigate to Datadog's [dbt Cloud integration tile][1]. +2. If you have already created a dbt Cloud integration account, make sure you have updated it with the API token with permissions described above. +3. If not, create a new account. Fill in the **Account Name**, **Account Id**, **Account Url**, and **API Token** sections. +4. Click **Save** to save your settings. + +## What's next + +After your next dbt job run, you should start seeing job run and lineage data in [Datadog Data Observability][2], as shown below. + +{{< img src="data_observability/data-obs-dbt-cloud-final.png" alt="Data Observability overview showing dbt job runs as a stacked bar chart over time and a table of connected dbt Cloud accounts with their status." style="width:100%;" >}} + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://app.datadoghq.com/integrations/dbt-cloud +[2]: https://app.datadoghq.com/datasets/catalog?integration=dbt \ No newline at end of file diff --git a/content/en/data_observability/jobs_monitoring/dbtcore.md b/content/en/data_observability/jobs_monitoring/dbtcore.md new file mode 100644 index 00000000000..90f65377f44 --- /dev/null +++ b/content/en/data_observability/jobs_monitoring/dbtcore.md @@ -0,0 +1,73 @@ +--- +title: dbt Core +description: "Connect dbt Core to Datadog for job run metadata and model lineage." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +
The dbt Core integration is in Preview.
+ +## Overview + +Datadog can access your dbt Core metadata to extract information about job runs, including run durations, models generated by dbt, and lineage relationships between models. Datadog matches tables in your warehouse with dbt models to determine the causality and consequences of table failure. + +**Note**: If you run dbt Core with an external orchestrator (such as Airflow) and want to correlate orchestrator tasks with dbt runs, follow the [Airflow integration instructions][1] first. + +Follow the steps below to connect dbt Core to Datadog. + +## Retrieve your Datadog API key + +1. [Follow these instructions][2] to create or retrieve a Datadog API key. + +## Install openlineage-dbt + +1. Install the `openlineage-dbt` package. Reference [Using dbt with Amazon MWAA][3] for setting up this package in your virtual environment. + + ```shell + pip3 install openlineage-dbt>=1.39.0 + ``` + +## Set the environment variables + +1. Set the following environment variables. Replace `datadoghq.com` with the relevant [Datadog site][4] for your organization. For more information on predefined Datadog sites, see the [OpenLineage documentation][5]. + + ```shell + export DD_SITE=datadoghq.com + export DD_API_KEY= + export OPENLINEAGE__TRANSPORT__TYPE=datadog + + # OPENLINEAGE_NAMESPACE determines the Datadog tag value for the environment (similar to how the service tag identifies the application). + # Typical values are dev, staging, or prod, but you can over ride it with any custom value. + export OPENLINEAGE_NAMESPACE= + + # Optional, for debugging purposes + export OPENLINEAGE_CLIENT_LOGGING=DEBUG + ``` + +## Update the dbt invocation + +1. Change the dbt invocation to use the OpenLineage wrapper (`dbt-ol`). +2. Add the `--consume-structured-logs` flag to view dbt jobs while the command is still running. + + ```shell + dbt-ol run --consume-structured-logs --openlineage-dbt-job-name + ``` + +## What's next + +After your next dbt job run, you should start seeing job run and lineage data in [Datadog Data Observability][6], as shown below. + +{{< img src="data_observability/data-obs-dbt-cloud-final.png" alt="Data Observability overview showing dbt job runs and model lineage." style="width:100%;" >}} + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: /data_jobs/airflow/?tab=kubernetes +[2]: /account_management/api-app-keys/#add-an-api-key-or-client-token +[3]: https://docs.aws.amazon.com/mwaa/latest/userguide/samples-dbt.html +[4]: /getting_started/site/#access-the-datadog-site +[5]: https://openlineage.io/docs/client/python/#predefined-datadog-sites +[6]: https://app.datadoghq.com/datasets/catalog?integration=dbt diff --git a/content/en/data_jobs/emr.md b/content/en/data_observability/jobs_monitoring/emr.md similarity index 99% rename from content/en/data_jobs/emr.md rename to content/en/data_observability/jobs_monitoring/emr.md index 9e604fe5892..4209726c057 100644 --- a/content/en/data_jobs/emr.md +++ b/content/en/data_observability/jobs_monitoring/emr.md @@ -1,6 +1,8 @@ --- title: Enable Data Jobs Monitoring for Spark on Amazon EMR description: "Configure Data Jobs Monitoring for Apache Spark applications on Amazon EMR clusters using AWS Secrets Manager and bootstrap actions." +aliases: + - /data_jobs/emr further_reading: - link: '/data_jobs' tag: 'Documentation' diff --git a/content/en/data_jobs/kubernetes.md b/content/en/data_observability/jobs_monitoring/kubernetes.md similarity index 99% rename from content/en/data_jobs/kubernetes.md rename to content/en/data_observability/jobs_monitoring/kubernetes.md index caebffd9130..b49bfd4738e 100644 --- a/content/en/data_jobs/kubernetes.md +++ b/content/en/data_observability/jobs_monitoring/kubernetes.md @@ -1,6 +1,8 @@ --- title: Data Jobs Monitoring for Spark on Kubernetes description: "Set up Data Jobs Monitoring for Apache Spark applications on Kubernetes clusters using the Datadog Agent and admission controller." +aliases: + - /data_jobs/kubernetes further_reading: - link: '/data_jobs' tag: 'Documentation' diff --git a/content/en/data_observability/jobs_monitoring/openlineage/_index.md b/content/en/data_observability/jobs_monitoring/openlineage/_index.md new file mode 100644 index 00000000000..e5c22b7fcc9 --- /dev/null +++ b/content/en/data_observability/jobs_monitoring/openlineage/_index.md @@ -0,0 +1,126 @@ +--- +title: Custom Jobs using OpenLineage +description: "Monitor jobs from in-house tools, custom pipelines, and orchestrators that don't have native Datadog integrations." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Data Observability' + - link: '/data_observability/integrations/openlineage/datadog_agent_for_openlineage' + tag: 'Documentation' + text: 'Set up Datadog Agent for OpenLineage Proxy' +--- + +
Custom jobs using OpenLineage is in Preview.
+ +## Overview + +Custom jobs use the [OpenLineage][1] standard to send job and lineage events to Datadog. Use custom jobs when you need to: + +- Capture lineage from systems Datadog doesn't integrate with natively, such as in-house tools or custom ETL scripts +- Emit lineage events for jobs or orchestrators where a native Datadog integration isn't available + +Replace the hostname in the examples with the relevant [Datadog site][2] for your organization. To find your Datadog site, see [Access the Datadog site][3]. This example uses `datadoghq.com`. + +**Note**: To centralize configuration and avoid distributing API keys to every application, you can [set up the Datadog Agent as an OpenLineage proxy][4]. + +You can send [OpenLineage events][1] to Datadog in three ways: +- [Direct HTTP with curl](#option-1-direct-http-with-curl) +- [OpenLineage Python client (HTTP transport)](#option-2-openlineage-python-client-http-transport) +- [OpenLineage Python client (Datadog transport)](#option-3-openlineage-python-client-datadog-transport) + +## Option 1: Direct HTTP with curl + +Send a raw [OpenLineage RunEvent][5] as JSON to Datadog's intake endpoint. + +```shell +curl -X POST "https://data-obs-intake.datadoghq.com/api/v1/lineage" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "eventTime": "2023-01-01T00:00:00Z", + "eventType": "START", + "run": { "runId": "123e4567-e89b-12d3-a456-426614174000" }, + "job": { "namespace": "default", "name": "test-job" }, + "producer": "your-producer-id" + }' +``` + +## Option 2: OpenLineage Python client (HTTP transport) + +Use the [OpenLineage Python client][6] with a manually specified HTTP transport. + +```python +from datetime import datetime +import uuid +from openlineage.client import OpenLineageClient, OpenLineageClientOptions +from openlineage.client.event_v2 import RunEvent, RunState, Job, Run + +client = OpenLineageClient( + url="https://data-obs-intake.datadoghq.com", + options=OpenLineageClientOptions(api_key="") +) + +event = RunEvent( + eventType=RunState.START, + eventTime=datetime.utcnow().isoformat(), + run=Run(runId=str(uuid.uuid4())), + job=Job(namespace="default", name="test-job"), + producer="your-producer-id" +) + +client.emit(event) +``` + +## Option 3: OpenLineage Python client (Datadog transport) + +In OpenLineage 1.37.0+, use the [Datadog transport][7] for automatic configuration and optimized event delivery. + +```python +from datetime import datetime +import uuid +from openlineage.client import OpenLineageClient +from openlineage.client.event_v2 import RunEvent, RunState, Job, Run +from openlineage.client.transport.datadog import DatadogConfig, DatadogTransport + +config = DatadogConfig( + apiKey="", + site="datadoghq.com" # Change if using a different Datadog site +) + +client = OpenLineageClient(transport=DatadogTransport(config)) + +event = RunEvent( + eventType=RunState.START, + eventTime=datetime.utcnow().isoformat(), + run=Run(runId=str(uuid.uuid4())), + job=Job(namespace="default", name="test-job"), + producer="your-producer-id" +) + +client.emit(event) +``` + +
For Option 3, you can skip DatadogConfig by using environment variables: + +```shell +export DD_API_KEY=your-datadog-api-key +export DD_SITE=datadoghq.com +export OPENLINEAGE__TRANSPORT__TYPE=datadog +``` + +```python +client = OpenLineageClient.from_environment() +``` +
+ +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://openlineage.io/ +[2]: https://openlineage.io/docs/client/python/#predefined-datadog-sites +[3]: /getting_started/site/#access-the-datadog-site +[4]: /data_observability/integrations/openlineage/datadog_agent_for_openlineage +[5]: https://openlineage.io/docs/spec/run-cycle/ +[6]: https://openlineage.io/docs/client/python +[7]: https://openlineage.io/docs/client/python#datadog-transport diff --git a/content/en/data_observability/jobs_monitoring/openlineage/datadog_agent_for_openlineage.md b/content/en/data_observability/jobs_monitoring/openlineage/datadog_agent_for_openlineage.md new file mode 100644 index 00000000000..fc33b149dfa --- /dev/null +++ b/content/en/data_observability/jobs_monitoring/openlineage/datadog_agent_for_openlineage.md @@ -0,0 +1,84 @@ +--- +title: Set up Datadog Agent for OpenLineage Proxy +description: Configure the Datadog Agent to proxy OpenLineage events to Datadog Data Observability. +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +## Overview + +You can configure the Datadog Agent to act as a proxy for [OpenLineage events][1], forwarding them to Datadog. This is useful for centralizing configuration and avoiding the need to distribute API keys to every application. + +Replace the hostname in the examples with the relevant [Datadog site][2] for your organization. To find your Datadog site, see [Access the Datadog site][3]. This example uses `datadoghq.com`. + +## Prerequisites + +Ensure you have the Datadog Agent installed and running on your host. If not, follow the [official Datadog Agent installation instructions][4] for your operating system. + +After installation, locate the `datadog.yaml` configuration file. This file is typically found in: + +- **Linux**: `/etc/datadog-agent/datadog.yaml` +- **macOS**: `/opt/datadog-agent/etc/datadog.yaml` + +## Enable the OpenLineage proxy + +To enable the OpenLineage proxy: + +1. Add the following configuration to your `datadog.yaml` file: + + ```yaml + ol_proxy_config: + enabled: true + ddurl: datadoghq.com # optional - defaults to regular Agent DD_SITE + api_key: *** # optional - defaults to regular Agent DD_API_KEY + ``` + +2. After modifying `datadog.yaml`, restart the Datadog Agent for the changes to take effect: + + ```bash + sudo systemctl restart datadog-agent + # Or + sudo service datadog-agent restart + ``` + +You can verify the Agent is running and the proxy is active by checking the Datadog Agent logs. + +## Configure your application to use the proxy + +After the Datadog Agent is configured as an OpenLineage proxy, direct your applications to send OpenLineage events to the Agent's listen port instead of directly to the OpenLineage collector. + +### Set the OpenLineage environment variables + +Change the `OPENLINEAGE_URL` environment variable in your application's environment to point to the Datadog Agent's proxy address. Assuming the Datadog Agent is running on the same host as your application, and the Agent is listening on default port `8126`: + +```bash +export OPENLINEAGE_URL="http://localhost:8126" +export OPENLINEAGE_ENDPOINT="openlineage/api/v1/lineage" +``` + +If your Datadog Agent is on a different host, replace `localhost` with the Agent's IP address or hostname. + +### Ensure OpenLineage client is configured correctly + +Your application should be using an OpenLineage client library (for example, `openlineage-python` or `openlineage-dbt`). The client library picks up the `OPENLINEAGE_URL` environment variable. There is no need to specify `OPENLINEAGE_API_KEY`, as the Agent uses its own. + +### Run your application + +Execute your application or data job that generates OpenLineage events. These events are sent to the Datadog Agent, which then forwards them to Datadog. + +Example for dbt Core: + +```bash +dbt-ol run --openlineage-dbt-job-name +``` + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://openlineage.io/ +[2]: https://openlineage.io/docs/client/python/#predefined-datadog-sites +[3]: /getting_started/site/#access-the-datadog-site +[4]: /getting_started/agent/ diff --git a/content/en/data_observability/quality_monitoring/_index.md b/content/en/data_observability/quality_monitoring/_index.md new file mode 100644 index 00000000000..48ddafd81e1 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/_index.md @@ -0,0 +1,58 @@ +--- +title: Quality Monitoring +description: "Detect data freshness delays, unusual patterns, and column-level metric changes before they impact downstream systems." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Data Observability' + - link: '/data_observability/jobs_monitoring' + tag: 'Documentation' + text: 'Jobs Monitoring' +--- + +
+ Quality Monitoring is in Preview. +
+ +## Overview + +Quality Monitoring detects issues such as data freshness delays, unusual data patterns, and changes in column-level metrics before they affect dashboards, machine learning models, or other downstream systems. It alerts you to potential problems and provides context to trace them back to upstream jobs or sources. + +## Key capabilities + +With Quality Monitoring, you can: +- Detect delayed updates and unexpected row count behavior in your tables +- Surface changes in column-level metrics such as null counts or uniqueness +- Set up monitors using static thresholds or historical baselines +- Trace quality issues using lineage views that show upstream jobs and downstream impact + +## Supported data sources + +{{< whatsnext desc="Connect to these data warehouses:" >}} + {{< nextlink href="data_observability/quality_monitoring/data_warehouses/snowflake" >}}Snowflake{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/data_warehouses/databricks" >}}Databricks{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/data_warehouses/bigquery" >}}BigQuery{{< /nextlink >}} +{{< /whatsnext >}} + +{{< whatsnext desc="Integrate with these transformation and orchestration tools:" >}} + {{< nextlink href="data_observability/jobs_monitoring/databricks" >}}Databricks{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/airflow" >}}Airflow{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/dbtcore" >}}dbt Core{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/dbtcloud" >}}dbt Cloud{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/kubernetes" >}}Spark on Kubernetes{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/emr" >}}Spark on Amazon EMR{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/dataproc" >}}Spark on Google Dataproc{{< /nextlink >}} + {{< nextlink href="data_observability/jobs_monitoring/openlineage" >}}Custom Jobs using OpenLineage{{< /nextlink >}} +{{< /whatsnext >}} + +{{< whatsnext desc="Track downstream impact in these BI tools:" >}} + {{< nextlink href="data_observability/quality_monitoring/integrations/business_intelligence/tableau" >}}Tableau{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/integrations/business_intelligence/sigma" >}}Sigma{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/integrations/business_intelligence/metabase" >}}Metabase{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/integrations/business_intelligence/powerbi" >}}Power BI{{< /nextlink >}} +{{< /whatsnext >}} + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + diff --git a/content/en/data_observability/quality_monitoring/business_intelligence/_index.md b/content/en/data_observability/quality_monitoring/business_intelligence/_index.md new file mode 100644 index 00000000000..1db9c8401cc --- /dev/null +++ b/content/en/data_observability/quality_monitoring/business_intelligence/_index.md @@ -0,0 +1,30 @@ +--- +title: Business Intelligence Integrations +description: "Connect your business intelligence tools to Datadog to visualize data lineage, monitor data quality, and understand downstream impact." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Data Observability' +--- +## Overview + +Datadog Data Observability connects directly to your business intelligence tools to help you understand how your data is delivered, transformed, and consumed across dashboards and reports. + +By integrating with tools like Tableau, Sigma, and Metabase, Datadog automatically ingests metadata about dashboards, datasets, and fields, and builds complete end-to-end lineage between your data warehouse and BI layer. This enables data teams to trace issues upstream to their source and understand their downstream business impact. + +Use these integrations to: +- **Visualize lineage** from warehouse tables and columns to BI dashboards, reports, and fields for impact analysis +- **Monitor data quality** and understand when stale or failed transformations affect downstream dashboards +- **Correlate BI performance and reliability issues** with upstream data pipeline incidents +- **Plan schema or transformation changes** with insight into which dashboards will be affected + +{{< whatsnext desc="Connect to these business intelligence tools:" >}} + {{< nextlink href="data_observability/quality_monitoring/business_intelligence/tableau" >}}Tableau{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/business_intelligence/sigma" >}}Sigma{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/business_intelligence/metabase" >}}Metabase{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/business_intelligence/powerbi" >}}Power BI{{< /nextlink >}} +{{< /whatsnext >}} + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} diff --git a/content/en/data_observability/quality_monitoring/business_intelligence/metabase.md b/content/en/data_observability/quality_monitoring/business_intelligence/metabase.md new file mode 100644 index 00000000000..c405b603c75 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/business_intelligence/metabase.md @@ -0,0 +1,78 @@ +--- +title: Metabase +description: Connect Metabase to Datadog Data Observability to view end-to-end lineage from warehouse tables to dashboards. +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +## Overview + +Datadog's Metabase integration helps data teams make changes to their data platform without breaking Metabase dashboards, and identify unused cards or dashboards. When Datadog connects, it: + +- Pulls metadata from your Metabase environment, including cards and dashboards. +- Automatically generates lineage from warehouse tables and columns to downstream Metabase cards, as well as from those cards to downstream dashboards. + +## Connect Metabase + +### Prerequisites + +This integration requires a Metabase Pro or Enterprise plan. + +### Generate an API key + +Follow [Metabase's API documentation][1] to generate an API key. + +### Get DNS alias (required for cloud instances only) + +1. Log into your Metabase cloud instance as an administrator. +1. Click on the gear icon in the upper right corner. +1. Select **Admin settings**. +1. Go to the **Settings** tab. +1. Click on the **Cloud** tab from the left menu. +1. Click on **Go to the Metabase Store**. +1. Log into your Metabase Store using Metabase credentials. +1. Go to the **Instances** tab. +1. Click on the DNS alias section to get the DNS alias value. + +### Get self-hosted instance domain (required for self-hosted instances only) + +**Note**: Your self-hosted Metabase instance must be accessible from the internet through HTTPS only. + +1. Log in to your Metabase instance as an administrator. +1. Click on the gear icon in the upper right corner. +1. Select **Admin settings**. +1. Go to the **Settings** tab. +1. Click on the **General** tab from the left menu. +1. Under **SITE URL**, copy the domain portion of the URL. For example, if the URL is `https://example.com`, copy `example.com`. + +### Add the Metabase integration + +1. Navigate to the [Metabase integration tile][2] and enter the following information: + + | Parameter | Description | + |-----------|-------------| + | Account name | Datadog-only account name associated with these credentials. | + | Instance type | The hosting type of your Metabase instance. Valid values are `cloud` or `self-hosted`. Default is `cloud`. | + | DNS alias | The DNS alias of your Metabase cloud instance (required for cloud instances only). Must be at least three characters long and contain only lowercase letters, dashes, and numbers. | + | Self-hosted instance domain | The domain of your self-hosted Metabase instance (required for self-hosted instances only). Must be publicly accessible through HTTPS only (for example, `example.com`). | + | Metabase API key | The API key used to authenticate the API requests. | + +2. After you've entered these credentials, click **Save**. + +## What's next + +When your Metabase account is successfully connected, Datadog syncs and automatically derives lineage from warehouse tables/columns to Metabase cards and dashboards. + +Initial syncs may take up to several hours depending on the size of your Metabase deployment. + +After syncing, you can explore your Metabase assets and their upstream dependencies in the [Data Observability Explorer][3]. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://www.metabase.com/docs/latest/people-and-groups/api-keys#create-an-api-key +[2]: https://app.datadoghq.com/integrations/metabase +[3]: https://app.datadoghq.com/datasets/catalog diff --git a/content/en/data_observability/quality_monitoring/business_intelligence/powerbi.md b/content/en/data_observability/quality_monitoring/business_intelligence/powerbi.md new file mode 100644 index 00000000000..b1ead3a5571 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/business_intelligence/powerbi.md @@ -0,0 +1,79 @@ +--- +title: Power BI +description: Connect Power BI to Datadog Data Observability to view end-to-end lineage from warehouse tables to dashboards. +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +## Overview + +Datadog's Power BI integration helps data teams make changes to their data platform without breaking dashboards, and identify unused reports and dashboards to remove. When Datadog connects, it: + +- Pulls metadata from your Power BI account like datasets, reports, and dashboards. +- Automatically generates lineage between warehouse tables with downstream datasets, reports, and dashboards. + +## Connect Power BI + +### Create an app registration and security group + +#### App registration + +1. Sign into Microsoft Azure. +2. Search for **App registrations**. +3. Click **New registration**. +4. Fill in the required fields and register an application for Datadog. +5. Copy the Application (client) ID somewhere safe. +6. Go to **Certificates & secrets** in sidebar and click **New client secret**. +7. Add a secret for Datadog. +8. Copy the secret value somewhere safe. + +#### Security group + +1. Search for **Azure Active Directory**. +2. Go to **Groups** in the sidebar and click **New group**. +3. Create a group for the app registration. +4. Click into the newly created group. You may need to refresh the page for it to show up. +5. Go to **Members** in the sidebar and click **Add members**. +6. Find the app registration created earlier and add it as a member. + +### Grant access in Power BI + +#### Enable API and admin API access for security group in Power BI Admin + +1. Go to the Power BI Admin portal. +2. In Tenant settings, go to **Developer settings**. +3. Enable **Allow service principals to use Power BI APIs** for your security group. +4. In Tenant settings, find **Admin API settings**. +5. Enable the following for your security group: + - **Allow service principals to use read-only admin APIs** + - **Enhance admin APIs responses with detailed metadata** + - **Enhance admin APIs responses with DAX and mashup expressions** + +#### Grant access to workspaces + +From the Power BI Admin portal: + +1. From the sidebar, click **Workspaces** to open the Workspaces pane. +2. For each workspace you want Datadog to have access to, open the **Access** panel by clicking the three vertical dots and selecting **Workspace access**. + +### Add the Power BI integration + +1. Navigate to the [Power BI integration tile][1] and enter your tenant ID, and the client ID and secret from earlier. +2. After you've entered these credentials, click **Save**. + +## What's next + +When your Power BI account is successfully connected, Datadog syncs and automatically derives lineage from warehouse tables/columns to Power BI datasets, reports, and dashboards. + +Initial syncs may take up to several hours depending on the size of your Power BI deployment. + +After syncing, you can explore your Power BI assets and their upstream dependencies in the [Data Observability Explorer][2]. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://app.datadoghq.com/integrations/power-bi +[2]: https://app.datadoghq.com/datasets/catalog diff --git a/content/en/data_observability/quality_monitoring/business_intelligence/sigma.md b/content/en/data_observability/quality_monitoring/business_intelligence/sigma.md new file mode 100644 index 00000000000..320b8cecde6 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/business_intelligence/sigma.md @@ -0,0 +1,49 @@ +--- +title: Sigma +description: Connect Sigma to Datadog Data Observability to view end-to-end lineage from warehouse tables to workbooks. +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +## Overview + +Datadog's Sigma integration helps data teams make changes to their data platform without breaking Sigma workbooks, and identify unused workbooks or datasets. When Datadog connects, it: + +- Pulls metadata from your Sigma site, including workbooks and queries. +- Automatically generates lineage between warehouse tables and columns and downstream Sigma datasets and workbooks. + +## Connect Sigma + +### Retrieve API keys + +Follow [Sigma's API client instructions][1] to retrieve a Client ID and Client Secret (also called an API token). + +### Add the Sigma integration + +1. Navigate to the [Sigma integration tile][2] and enter the following information: + + - Account name + - Client ID + - Client secret + - Cloud provider. If you don't know your cloud provider, you can find it using Sigma's [Supported cloud platforms and regions documentation][3]. + +2. After you've entered these credentials, click **Save**. + +## What's next + +When your Sigma account is successfully connected, Datadog syncs and automatically derives lineage from warehouse tables/columns to Sigma workbooks. + +Initial syncs may take up to several hours depending on the size of your Sigma deployment. + +After syncing, you can explore your Sigma assets and their upstream dependencies in the [Data Observability Explorer][4]. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://help.sigmacomputing.com/reference/generate-client-credentials +[2]: https://app.datadoghq.com/integrations/sigma-computing +[3]: https://help.sigmacomputing.com/docs/region-warehouse-and-feature-support#supported-cloud-platforms-and-regions +[4]: https://app.datadoghq.com/datasets/catalog diff --git a/content/en/data_observability/quality_monitoring/business_intelligence/tableau.md b/content/en/data_observability/quality_monitoring/business_intelligence/tableau.md new file mode 100644 index 00000000000..2bad8bcfa36 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/business_intelligence/tableau.md @@ -0,0 +1,62 @@ +--- +title: Tableau +description: Connect Tableau to Datadog Data Observability to view end-to-end lineage from warehouse tables to dashboards. +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +## Overview + +Datadog's Tableau integration helps data teams make changes to their data platform without breaking dashboards, and identify unused workbooks and data sources to remove. When Datadog connects, it: + +- Pulls metadata from your Tableau site like fields, worksheets, dashboards, workbooks, and data sources +- Automatically generates lineage between warehouse tables/columns with downstream Tableau fields, worksheets, dashboards, and workbooks. + +## Connect Tableau + +### Tableau requirements + +In order for Datadog to extract your metadata from Tableau, you must meet all of the [Tableau Metadata GraphQL][1] requirements: + +- Tableau Cloud/Server v2019.3+ +- Tableau REST API must be enabled +- The Metadata API must be [enabled][2] + +### Create a personal access token + +For details on how to create a Personal Access Token (PAT), see the [Tableau Documentation][3]. Scope the PAT to the target site if needed. + +### Add the Tableau integration + +To connect Tableau to Datadog: + +1. Navigate to the [Tableau integration tile][4] and enter the following information: + + - Account name (for use within Datadog only) + - Site name (optional, leave blank to use the default site) + - Server version (example: 2025.2.0). Find this in your Tableau Server or Cloud admin settings. + - Server endpoint (example: https://prod-useast-b.online.tableau.com) + - Token name + - Token value + +2. After you've entered these credentials, click **Save**. + +## What's next + +When your Tableau account is successfully connected, Datadog syncs and automatically derives lineage from warehouse tables/columns to Tableau fields, worksheets, dashboards, workbooks, and data sources. + +Initial syncs may take up to several hours depending on the size of your Tableau deployment. + +After syncing, you can explore your Tableau assets and their upstream dependencies in the [Data Observability Explorer][5]. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://help.tableau.com/current/api/metadata_api/en-us/index.html#metadata-api-and-graphql +[2]: https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html#enable-the-tableau-metadata-api-for-tableau-server +[3]: https://help.tableau.com/current/pro/desktop/en-us/useracct.htm#create-and-revoke-personal-access-tokens +[4]: https://app.datadoghq.com/integrations/tableau +[5]: https://app.datadoghq.com/datasets/catalog \ No newline at end of file diff --git a/content/en/data_observability/quality_monitoring/data_warehouses/_index.md b/content/en/data_observability/quality_monitoring/data_warehouses/_index.md new file mode 100644 index 00000000000..c3f13b5fc41 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/data_warehouses/_index.md @@ -0,0 +1,27 @@ +--- +title: Warehouse Integrations +description: "Connect your data warehouse to Datadog to monitor data quality, track costs, and analyze usage patterns." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Data Observability' +--- +## Overview + +Datadog Data Observability connects directly to your cloud data warehouse to help monitor the health of your data. When connected, Datadog automatically syncs your warehouse's metadata and query history to detect data quality, cost, performance, and usage issues. + +Use these integrations to: +- **Sync your data schema** to keep Datadog in sync with your warehouse structure +- **Create data quality monitors** to catch anomalies before they impact downstream dashboards and models +- **Analyze query and job history** to understand usage patterns, costs, and performance +- **Trace column-level lineage** to map dependencies and pinpoint root causes of data issues + +{{< whatsnext desc="Connect to these data warehouses:" >}} + {{< nextlink href="data_observability/quality_monitoring/data_warehouses/snowflake" >}}Snowflake{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/data_warehouses/databricks" >}}Databricks{{< /nextlink >}} + {{< nextlink href="data_observability/quality_monitoring/data_warehouses/bigquery" >}}BigQuery{{< /nextlink >}} +{{< /whatsnext >}} + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} diff --git a/content/en/data_observability/quality_monitoring/data_warehouses/bigquery.md b/content/en/data_observability/quality_monitoring/data_warehouses/bigquery.md new file mode 100644 index 00000000000..ec4d9a14ec1 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/data_warehouses/bigquery.md @@ -0,0 +1,52 @@ +--- +title: BigQuery +description: "Connect BigQuery to Datadog Data Observability to monitor data quality, track usage, and detect issues." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Learn about Data Observability' +--- + +## Overview + +The BigQuery integration connects Datadog to your Google Cloud project to sync metadata, query history, and table-level metrics. Use it to monitor data freshness, detect anomalies, and trace lineage across your data stack. + +## Set up your account in BigQuery + +To set up your account in BigQuery, create a service account with the following permissions: + +- [BigQuery Data Viewer][1] (`roles/bigquery.dataViewer`) + - Provides visibility into datasets +- [BigQuery Resource Viewer][2] (`roles/bigquery.resourceViewer`) + - Provides visibility into jobs +- [Job User][3] (`roles/bigquery.jobUser`) + - Required to run data quality queries +- [Compute Viewer][4] (`roles/compute.viewer`) + - Provides read-only access to get and list Compute Engine resources +- [Service Account Token Creator][5] (`roles/iam.serviceAccountTokenCreator`) + - Required to provision short-lived access token for [service account impersonation][6] - a standard and recommended method for authenticating third party systems. + +## Configure the BigQuery integration in Datadog + +To configure the BigQuery integration in Datadog: + +1. Navigate to [**Datadog Data Observability** > **Settings**][7]. +2. Click the **Configure** button for the BigQuery option. +3. Follow the flow to use an existing Service Account or set up a new one. +4. Turn on the **Enable Data Observability** toggle and click **Add Account**. + +## Next steps + +After you configure the integration, Datadog begins syncing your information schema and query history in the background. Initial syncs can take several hours depending on the size of your BigQuery deployment. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://docs.cloud.google.com/bigquery/docs/access-control#bigquery.dataViewer +[2]: https://docs.cloud.google.com/bigquery/docs/access-control#bigquery.resourceViewer +[3]: https://docs.cloud.google.com/bigquery/docs/access-control#bigquery.jobUser +[4]: https://docs.cloud.google.com/compute/docs/access/iam#compute.viewer +[5]: https://docs.cloud.google.com/iam/docs/service-account-permissions#token-creator-role +[6]: https://docs.cloud.google.com/docs/authentication/use-service-account-impersonation +[7]: https://app.datadoghq.com/datasets/settings/integrations \ No newline at end of file diff --git a/content/en/data_observability/quality_monitoring/data_warehouses/databricks.md b/content/en/data_observability/quality_monitoring/data_warehouses/databricks.md new file mode 100644 index 00000000000..a9ea0f3db8e --- /dev/null +++ b/content/en/data_observability/quality_monitoring/data_warehouses/databricks.md @@ -0,0 +1,75 @@ +--- +title: Databricks +description: "Connect Databricks to Datadog Data Observability to monitor data quality, track usage, and detect issues." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Data Observability' + +--- + +
The Quality Monitoring integration with Databricks only supports Unity Catalog. Reach out to your account representative if you use Hive Metastore.
+ +## Overview + +The Databricks integration connects Datadog to your Databricks workspace to sync metadata, query history, and table-level metrics. Use it to monitor data freshness, detect anomalies, and trace lineage across your data stack. + +**Note**: The instructions below are for Quality Monitoring. For Jobs Monitoring, see [Enable Data Jobs Monitoring for Databricks][1]. + +## Set up your account in Databricks + +### Step 1 - Connect the Databricks integration tile + +1. Complete the installation instructions in the [Databricks integration documentation][2] using Datadog's integration tile. Take note of the service principal's application ID and save it somewhere safe, because it will be referenced later. + + **Note**: Workspace Admin permissions are not required for Quality Monitoring. + +2. When configuring the integration, turn on the **Data Observability** toggle. +3. Click **Save Databricks Workspace**. + +### Step 2 - Grant access + +Grant read-only access to the scope of data you want to monitor: + +{{< tabs >}} +{{% tab "Full catalog access" %}} + +Use the full catalog access option for simpler setup. It automatically includes future tables without needing to update permissions. + + +```sql +GRANT USE_CATALOG ON CATALOG TO ``; +GRANT USE_SCHEMA ON CATALOG TO ``; +GRANT SELECT ON CATALOG TO ``; +``` + +{{% /tab %}} +{{% tab "Specific tables" %}} + +Use the specific tables option for least-privilege access or if you only need to monitor a subset of your data. You must update permissions when adding new tables. + +```sql +GRANT USE_CATALOG ON CATALOG TO ``; +GRANT USE_SCHEMA ON SCHEMA . TO ``; +GRANT SELECT ON TABLE .. TO ``; +``` + +{{% /tab %}} +{{< /tabs >}} + +These permissions are needed for the following reasons: + +- `GRANT USE_CATALOG` is required to navigate into the catalog and discover schemas. +- `GRANT USE_SCHEMA` is required to enumerate tables and monitor schema-level health. +- `GRANT SELECT` is required for data quality monitoring, such as custom SQL or distribution checks. + +## Next steps + +After you configure the integration, Datadog begins syncing your metadata and query history in the background. Initial syncs can take several hours depending on the size of your Databricks deployment. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: /data_observability/jobs_monitoring/databricks/ +[2]: /integrations/databricks/ \ No newline at end of file diff --git a/content/en/data_observability/quality_monitoring/data_warehouses/snowflake.md b/content/en/data_observability/quality_monitoring/data_warehouses/snowflake.md new file mode 100644 index 00000000000..e183ef36360 --- /dev/null +++ b/content/en/data_observability/quality_monitoring/data_warehouses/snowflake.md @@ -0,0 +1,170 @@ +--- +title: Snowflake +description: "Connect Snowflake to Datadog Data Observability to monitor data quality, track usage, and detect issues." +further_reading: + - link: '/data_observability/' + tag: 'Documentation' + text: 'Data Observability' +--- + +## Overview + +The Snowflake integration connects Datadog to your Snowflake account to sync metadata, query history, and table-level metrics. Use it to monitor data freshness, detect anomalies, and trace lineage across your warehouse and downstream tools. + +## Prerequisites + +Before you begin, make sure you have: + +- Access to the `ACCOUNTADMIN` role in Snowflake. +- An RSA key pair. For more information, see the [Snowflake key-pair authentication docs][1]. + +## Set up your account in Snowflake + +To set up your account in Snowflake: + +1. Define the following variables: + + ```sql + SET role_name = 'DATADOG_ROLE'; + SET user_name = 'DATADOG_USER'; + SET warehouse_name = 'DATADOG_WH'; + SET database_name = ''; + ``` + +2. Create a role, warehouse, and key-pair-authenticated user. + + ```sql + USE ROLE ACCOUNTADMIN; + + -- Create monitoring role + CREATE ROLE IF NOT EXISTS IDENTIFIER($role_name); + GRANT ROLE IDENTIFIER($role_name) TO ROLE SYSADMIN; + + -- Create an X-SMALL warehouse (auto-suspend after 30s) + CREATE WAREHOUSE IF NOT EXISTS IDENTIFIER($warehouse_name) + WAREHOUSE_SIZE = XSMALL + WAREHOUSE_TYPE = STANDARD + AUTO_SUSPEND = 30 + AUTO_RESUME = TRUE + INITIALLY_SUSPENDED = TRUE; + + -- Create Datadog user—key-pair only (no password) + -- Replace with your RSA public key (PEM, no headers/newlines) + CREATE USER IF NOT EXISTS IDENTIFIER($user_name) + LOGIN_NAME = $user_name + DEFAULT_ROLE = $role_name + DEFAULT_WAREHOUSE = $warehouse_name + RSA_PUBLIC_KEY = ''; + + GRANT ROLE IDENTIFIER($role_name) TO USER IDENTIFIER($user_name); + ``` + +3. Grant monitoring privileges to the role. + + ```sql + -- Warehouse usage + GRANT USAGE ON WAREHOUSE IDENTIFIER($warehouse_name) TO ROLE IDENTIFIER($role_name); + + -- Account‐level monitoring (tasks, pipes, query history) + GRANT MONITOR EXECUTION ON ACCOUNT TO ROLE IDENTIFIER($role_name); + + -- Imported privileges on Snowflake's ACCOUNT_USAGE + GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE IDENTIFIER($role_name); + + -- Imported privileges on any external data shares + -- GRANT IMPORTED PRIVILEGES ON DATABASE IDENTIFIER($database_name) TO ROLE IDENTIFIER($role_name); + + -- Grant the following ACCOUNT_USAGE views to the new role. Do this if you wish to collect Snowflake account usage logs and metrics. + GRANT DATABASE ROLE SNOWFLAKE.OBJECT_VIEWER TO ROLE IDENTIFIER($role_name); + GRANT DATABASE ROLE SNOWFLAKE.USAGE_VIEWER TO ROLE IDENTIFIER($role_name); + GRANT DATABASE ROLE SNOWFLAKE.GOVERNANCE_VIEWER TO ROLE IDENTIFIER($role_name); + GRANT DATABASE ROLE SNOWFLAKE.SECURITY_VIEWER TO ROLE IDENTIFIER($role_name); + + -- Grant ORGANIZATION_USAGE_VIEWER to the new role. Do this if you wish to collect Snowflake organization usage metrics. + GRANT DATABASE ROLE SNOWFLAKE.ORGANIZATION_USAGE_VIEWER TO ROLE IDENTIFIER($role_name); + + -- Grant ORGANIZATION_BILLING_VIEWER to the new role. Do this if you wish to collect Snowflake cost data. + GRANT DATABASE ROLE SNOWFLAKE.ORGANIZATION_BILLING_VIEWER TO ROLE IDENTIFIER($role_name); + ``` + +
To avoid missing new tables, use schema-level future grants. Snowflake gives schema-level grants precedence over database-level ones. If Datadog only has database-level grants but other roles have schema-level grants on the same schemas, new tables may not appear in Datadog. See Snowflake's documentation for details.
+ +4. Grant read-only access to your data. + + ```sql + USE DATABASE IDENTIFIER($database_name); + + CREATE OR REPLACE PROCEDURE grantFutureAccess(databaseName string, roleName string) + returns string not null + language javascript + as + $$ + var schemaResultSet = snowflake.execute({ sqlText: 'SELECT SCHEMA_NAME FROM ' + '"' + DATABASENAME + '"' + ".INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME != 'INFORMATION_SCHEMA';"}); + + var numberOfSchemasGranted = 0; + while (schemaResultSet.next()) { + numberOfSchemasGranted += 1; + var schemaAndRoleSuffix = ' in schema "' + DATABASENAME + '"."' + + schemaResultSet.getColumnValue('SCHEMA_NAME') + '" to role ' + ROLENAME + ';' + + snowflake.execute({ sqlText: 'grant USAGE on schema "' + DATABASENAME + '"."' + + schemaResultSet.getColumnValue('SCHEMA_NAME') + '" to role ' + ROLENAME + ';'}); + snowflake.execute({ sqlText: 'grant SELECT on all tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on all views' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on all event tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on all external tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on all dynamic tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on future tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on future views' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on future event tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on future external tables' + schemaAndRoleSuffix}); + snowflake.execute({ sqlText: 'grant SELECT on future dynamic tables' + schemaAndRoleSuffix}); + } + + return 'Granted access to ' + numberOfSchemasGranted + ' schemas'; + $$ + ; + + GRANT USAGE ON DATABASE IDENTIFIER($database_name) TO ROLE IDENTIFIER($role_name); + CALL grantFutureAccess('', ''); + ``` + +5. (Optional) If your organization uses [Snowflake event tables][2], you can grant the Datadog role access to them. + + ```sql + -- Grant usage on the database, schema, and table of the event table + GRANT USAGE ON DATABASE TO ROLE IDENTIFIER($role_name); + GRANT USAGE ON SCHEMA . TO ROLE IDENTIFIER($role_name); + GRANT SELECT ON TABLE .. TO ROLE IDENTIFIER($role_name); + + -- Snowflake-provided application roles for event logs + GRANT APPLICATION ROLE SNOWFLAKE.EVENTS_VIEWER TO ROLE IDENTIFIER($role_name); + GRANT APPLICATION ROLE SNOWFLAKE.EVENTS_ADMIN TO ROLE IDENTIFIER($role_name); + ``` + + After completing the Snowflake setup, configure the Snowflake integration in Datadog. + +## Configure the Snowflake integration in Datadog + +To configure the Snowflake integration in Datadog: + +1. Navigate to [**Datadog Data Observability** > **Settings**][3]. +2. Click the **Configure** button for the Snowflake option. + + {{< img src="data_observability/data-obs-settings-integrations.png" alt="List of Data Observability integrations on the Settings page" style="width:100%;" >}} + +3. Follow the flow to enter your account details and upload a private key. +4. Turn on **Enable Data Observability for Snowflake tables**. +5. Click **Save & Test**. + +## Next steps + +After you save, Datadog begins syncing your information schema and query history in the background. Initial syncs can take up to several hours depending on the size of your Snowflake deployment. + +## Further reading + +{{< partial name="whats-next/whats-next.html" >}} + +[1]: https://docs.snowflake.com/en/user-guide/key-pair-auth#generate-the-private-key +[2]: https://docs.snowflake.com/en/developer-guide/logging-tracing/event-table-setting-up +[3]: https://app.datadoghq.com/datasets/settings/integrations \ No newline at end of file diff --git a/content/en/database_monitoring/guide/_index.md b/content/en/database_monitoring/guide/_index.md index 1bf0a7eca1b..ce468791cf0 100644 --- a/content/en/database_monitoring/guide/_index.md +++ b/content/en/database_monitoring/guide/_index.md @@ -1,6 +1,7 @@ --- title: Database Monitoring Guides - +aliases: +- /database_monitoring/guide private: true disable_toc: true cascade: diff --git a/content/en/database_monitoring/guide/aurora_autodiscovery.md b/content/en/database_monitoring/guide/aurora_autodiscovery.md index ef49afb5876..29728480f95 100644 --- a/content/en/database_monitoring/guide/aurora_autodiscovery.md +++ b/content/en/database_monitoring/guide/aurora_autodiscovery.md @@ -1,6 +1,7 @@ --- title: Configuring Database Monitoring for Amazon Aurora DB Clusters - +aliases: +- /database_monitoring/aurora_autodiscovery --- This guide assumes you have configured Database Monitoring for your Amazon Aurora [Postgres][1] or [MySQL][11] databases. diff --git a/content/en/database_monitoring/guide/database_identifier.md b/content/en/database_monitoring/guide/database_identifier.md index c65023e11a9..02e4473b155 100644 --- a/content/en/database_monitoring/guide/database_identifier.md +++ b/content/en/database_monitoring/guide/database_identifier.md @@ -1,6 +1,8 @@ --- title: Identifying Databases for Database Monitoring description: Understand how to identify your databases and hosts for DBM +aliases: +- /database_monitoring/database_identifier further_reading: - link: "/database_monitoring/" tag: "Documentation" diff --git a/content/en/database_monitoring/guide/managed_authentication.md b/content/en/database_monitoring/guide/managed_authentication.md index 615e477b5bd..4a25ab033bc 100644 --- a/content/en/database_monitoring/guide/managed_authentication.md +++ b/content/en/database_monitoring/guide/managed_authentication.md @@ -1,6 +1,7 @@ --- title: Connecting with Managed Authentication - +aliases: +- /database_monitoring/managed_authentication --- This guide assumes that you have configured [Database Monitoring][1]. diff --git a/content/en/database_monitoring/guide/parameterized_queries.md b/content/en/database_monitoring/guide/parameterized_queries.md index 618624dcddd..8cd321e4d91 100644 --- a/content/en/database_monitoring/guide/parameterized_queries.md +++ b/content/en/database_monitoring/guide/parameterized_queries.md @@ -1,5 +1,7 @@ --- title: Capturing SQL Query Parameter Values With Database Monitoring +aliases: +- /database_monitoring/parameterized_queries further_reading: - link: "/database_monitoring/" tag: "Documentation" diff --git a/content/en/database_monitoring/guide/pg15_upgrade.md b/content/en/database_monitoring/guide/pg15_upgrade.md index aee29193256..4dfb562e007 100644 --- a/content/en/database_monitoring/guide/pg15_upgrade.md +++ b/content/en/database_monitoring/guide/pg15_upgrade.md @@ -1,6 +1,7 @@ --- title: Upgrading to PostgreSQL 15 and higher - +aliases: +- /database_monitoring/pg15_upgrade --- Run this command on each database host to enable the additional permission needed for the `datadog` user: diff --git a/content/en/database_monitoring/guide/rds_autodiscovery.md b/content/en/database_monitoring/guide/rds_autodiscovery.md index abd81c39086..cc6a5cebc51 100644 --- a/content/en/database_monitoring/guide/rds_autodiscovery.md +++ b/content/en/database_monitoring/guide/rds_autodiscovery.md @@ -1,6 +1,7 @@ --- title: Configuring Database Monitoring for Amazon RDS DB Instances - +aliases: +- /database_monitoring/rds_autodiscovery --- This guide assumes you have configured Database Monitoring for your Amazon RDS [Postgres][1] or [MySQL][11] databases. diff --git a/content/en/database_monitoring/guide/sql_alwayson.md b/content/en/database_monitoring/guide/sql_alwayson.md index 45f376b7ef8..e5c372c214e 100644 --- a/content/en/database_monitoring/guide/sql_alwayson.md +++ b/content/en/database_monitoring/guide/sql_alwayson.md @@ -1,5 +1,7 @@ --- title: Exploring SQL Server AlwaysOn Availability Groups +aliases: +- /database_monitoring/sql_alwayson further_reading: - link: "/database_monitoring/" tag: "Documentation" diff --git a/content/en/database_monitoring/guide/sql_deadlock.md b/content/en/database_monitoring/guide/sql_deadlock.md index 3daf9a51b07..500480f668a 100644 --- a/content/en/database_monitoring/guide/sql_deadlock.md +++ b/content/en/database_monitoring/guide/sql_deadlock.md @@ -1,5 +1,7 @@ --- title: Configuring Deadlock Monitoring on SQL Server +aliases: +- /database_monitoring/sql_deadlock further_reading: - link: "/database_monitoring/" tag: "Documentation" diff --git a/content/en/database_monitoring/guide/sql_extended_events.md b/content/en/database_monitoring/guide/sql_extended_events.md index d634d05ca92..3c3a90e810a 100644 --- a/content/en/database_monitoring/guide/sql_extended_events.md +++ b/content/en/database_monitoring/guide/sql_extended_events.md @@ -1,5 +1,7 @@ --- title: Configuring Query Completion and Query Error Capture on SQL Server +aliases: +- /database_monitoring/sql_extended_events further_reading: - link: "/database_monitoring/" tag: "Documentation" diff --git a/content/en/database_monitoring/guide/tag_database_statements.md b/content/en/database_monitoring/guide/tag_database_statements.md index d67bc69e1b9..6261c185057 100644 --- a/content/en/database_monitoring/guide/tag_database_statements.md +++ b/content/en/database_monitoring/guide/tag_database_statements.md @@ -1,6 +1,7 @@ --- title: Tagging SQL Statements - +aliases: +- /database_monitoring/tag_database_statements --- This guide assumes that you have configured [Database Monitoring][1]. diff --git a/data/partials/home.yaml b/data/partials/home.yaml index d73d7dfd273..e4d77b928fc 100644 --- a/data/partials/home.yaml +++ b/data/partials/home.yaml @@ -99,10 +99,10 @@ nav_sections: link: data_streams/ icon: datastreams-monitoring desc: Track and improve performance of your data streaming pipelines - - title: Data Jobs Monitoring - link: data_jobs/ + - title: Data Observability + link: data_observability/ icon: data-jobs-monitoring - desc: Monitor and optimize your data processing jobs + desc: Monitor data quality, performance, and cost - title: Universal Service Monitoring link: universal_service_monitoring/ icon: usm diff --git a/layouts/partials/data_jobs/setup-platforms.html b/layouts/partials/data_jobs/setup-platforms.html index 51b971eacd0..262292a1738 100644 --- a/layouts/partials/data_jobs/setup-platforms.html +++ b/layouts/partials/data_jobs/setup-platforms.html @@ -1,50 +1,68 @@ {{ $dot := . }}
-
- - - - - - - - - - +
diff --git a/static/images/data_observability/data-obs-correlate-trace.png b/static/images/data_observability/data-obs-correlate-trace.png new file mode 100644 index 00000000000..8c975ae8afc Binary files /dev/null and b/static/images/data_observability/data-obs-correlate-trace.png differ diff --git a/static/images/data_observability/data-obs-dbt-cloud-final.png b/static/images/data_observability/data-obs-dbt-cloud-final.png new file mode 100644 index 00000000000..279db0988c4 Binary files /dev/null and b/static/images/data_observability/data-obs-dbt-cloud-final.png differ diff --git a/static/images/data_observability/data-obs-lineage-blurred.png b/static/images/data_observability/data-obs-lineage-blurred.png new file mode 100644 index 00000000000..81e964007ba Binary files /dev/null and b/static/images/data_observability/data-obs-lineage-blurred.png differ diff --git a/static/images/data_observability/data-obs-overview-1.png b/static/images/data_observability/data-obs-overview-1.png new file mode 100644 index 00000000000..eb53d6e0c71 Binary files /dev/null and b/static/images/data_observability/data-obs-overview-1.png differ diff --git a/static/images/data_observability/data-obs-settings-integrations.png b/static/images/data_observability/data-obs-settings-integrations.png new file mode 100644 index 00000000000..3f605dfe1bb Binary files /dev/null and b/static/images/data_observability/data-obs-settings-integrations.png differ diff --git a/static/images/data_observability/data_observability_lineage_trace-temp.png b/static/images/data_observability/data_observability_lineage_trace-temp.png new file mode 100644 index 00000000000..e60ac7b2fc3 Binary files /dev/null and b/static/images/data_observability/data_observability_lineage_trace-temp.png differ diff --git a/static/images/data_observability/overview-temp.png b/static/images/data_observability/overview-temp.png new file mode 100644 index 00000000000..332d639f723 Binary files /dev/null and b/static/images/data_observability/overview-temp.png differ diff --git a/static/images/integrations_logos/dbt-cloud_avatar.svg b/static/images/integrations_logos/dbt-cloud_avatar.svg new file mode 100644 index 00000000000..c1a1ce82598 --- /dev/null +++ b/static/images/integrations_logos/dbt-cloud_avatar.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/static/images/integrations_logos/dbt-cloud_large.svg b/static/images/integrations_logos/dbt-cloud_large.svg new file mode 100644 index 00000000000..87cd735e1e0 --- /dev/null +++ b/static/images/integrations_logos/dbt-cloud_large.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file