diff --git a/README.md b/README.md index 9b8ba6a6..bf4048ad 100644 --- a/README.md +++ b/README.md @@ -105,157 +105,171 @@ Further Information ### Metrics -| Name | Type | Cardinality | Help | -|-------------------------------------------------------------------|------------|-------------|-----------------------------------------------------------------------------------------------------| -| elasticsearch_breakers_estimated_size_bytes | gauge | 4 | Estimated size in bytes of breaker | -| elasticsearch_breakers_limit_size_bytes | gauge | 4 | Limit size in bytes for breaker | -| elasticsearch_breakers_tripped | counter | 4 | tripped for breaker | -| elasticsearch_cluster_health_active_primary_shards | gauge | 1 | The number of primary shards in your cluster. This is an aggregate total across all indices. | -| elasticsearch_cluster_health_active_shards | gauge | 1 | Aggregate total of all shards across all indices, which includes replica shards. | -| elasticsearch_cluster_health_delayed_unassigned_shards | gauge | 1 | Shards delayed to reduce reallocation overhead | -| elasticsearch_cluster_health_initializing_shards | gauge | 1 | Count of shards that are being freshly created. | -| elasticsearch_cluster_health_number_of_data_nodes | gauge | 1 | Number of data nodes in the cluster. | -| elasticsearch_cluster_health_number_of_in_flight_fetch | gauge | 1 | The number of ongoing shard info requests. | -| elasticsearch_cluster_health_number_of_nodes | gauge | 1 | Number of nodes in the cluster. | -| elasticsearch_cluster_health_number_of_pending_tasks | gauge | 1 | Cluster level changes which have not yet been executed | -| elasticsearch_cluster_health_task_max_waiting_in_queue_millis | gauge | 1 | Max time in millis that a task is waiting in queue. | -| elasticsearch_cluster_health_relocating_shards | gauge | 1 | The number of shards that are currently moving from one node to another node. | -| elasticsearch_cluster_health_status | gauge | 3 | Whether all primary and replica shards are allocated. | -| elasticsearch_cluster_health_timed_out | gauge | 1 | Number of cluster health checks timed out | -| elasticsearch_cluster_health_unassigned_shards | gauge | 1 | The number of shards that exist in the cluster state, but cannot be found in the cluster itself. | -| elasticsearch_clustersettings_stats_max_shards_per_node | gauge | 0 | Current maximum number of shards per node setting. | -| elasticsearch_filesystem_data_available_bytes | gauge | 1 | Available space on block device in bytes | -| elasticsearch_filesystem_data_free_bytes | gauge | 1 | Free space on block device in bytes | -| elasticsearch_filesystem_data_size_bytes | gauge | 1 | Size of block device in bytes | -| elasticsearch_filesystem_io_stats_device_operations_count | gauge | 1 | Count of disk operations | -| elasticsearch_filesystem_io_stats_device_read_operations_count | gauge | 1 | Count of disk read operations | -| elasticsearch_filesystem_io_stats_device_write_operations_count | gauge | 1 | Count of disk write operations | -| elasticsearch_filesystem_io_stats_device_read_size_kilobytes_sum | gauge | 1 | Total kilobytes read from disk | -| elasticsearch_filesystem_io_stats_device_write_size_kilobytes_sum | gauge | 1 | Total kilobytes written to disk | -| elasticsearch_indices_active_queries | gauge | 1 | The number of currently active queries | -| elasticsearch_indices_docs | gauge | 1 | Count of documents on this node | -| elasticsearch_indices_docs_deleted | gauge | 1 | Count of deleted documents on this node | -| elasticsearch_indices_deleted_docs_primary | gauge | 1 | Count of deleted documents with only primary shards | -| elasticsearch_indices_docs_primary | gauge | 1 | Count of documents with only primary shards on all nodes | -| elasticsearch_indices_docs_total | gauge | | Count of documents with shards on all nodes | -| elasticsearch_indices_fielddata_evictions | counter | 1 | Evictions from field data | -| elasticsearch_indices_fielddata_memory_size_bytes | gauge | 1 | Field data cache memory usage in bytes | -| elasticsearch_indices_filter_cache_evictions | counter | 1 | Evictions from filter cache | -| elasticsearch_indices_filter_cache_memory_size_bytes | gauge | 1 | Filter cache memory usage in bytes | -| elasticsearch_indices_flush_time_seconds | counter | 1 | Cumulative flush time in seconds | -| elasticsearch_indices_flush_total | counter | 1 | Total flushes | -| elasticsearch_indices_get_exists_time_seconds | counter | 1 | Total time get exists in seconds | -| elasticsearch_indices_get_exists_total | counter | 1 | Total get exists operations | -| elasticsearch_indices_get_missing_time_seconds | counter | 1 | Total time of get missing in seconds | -| elasticsearch_indices_get_missing_total | counter | 1 | Total get missing | -| elasticsearch_indices_get_time_seconds | counter | 1 | Total get time in seconds | -| elasticsearch_indices_get_total | counter | 1 | Total get | -| elasticsearch_indices_indexing_delete_time_seconds_total | counter | 1 | Total time indexing delete in seconds | -| elasticsearch_indices_indexing_delete_total | counter | 1 | Total indexing deletes | -| elasticsearch_indices_index_current | gauge | 1 | The number of documents currently being indexed to an index | -| elasticsearch_indices_indexing_index_time_seconds_total | counter | 1 | Cumulative index time in seconds | -| elasticsearch_indices_indexing_index_total | counter | 1 | Total index calls | -| elasticsearch_indices_mappings_stats_fields | gauge | 1 | Count of fields currently mapped by index | -| elasticsearch_indices_mappings_stats_json_parse_failures_total | counter | 0 | Number of errors while parsing JSON | -| elasticsearch_indices_mappings_stats_scrapes_total | counter | 0 | Current total Elasticsearch Indices Mappings scrapes | -| elasticsearch_indices_mappings_stats_up | gauge | 0 | Was the last scrape of the Elasticsearch Indices Mappings endpoint successful | -| elasticsearch_indices_merges_docs_total | counter | 1 | Cumulative docs merged | -| elasticsearch_indices_merges_total | counter | 1 | Total merges | -| elasticsearch_indices_merges_total_size_bytes_total | counter | 1 | Total merge size in bytes | -| elasticsearch_indices_merges_total_time_seconds_total | counter | 1 | Total time spent merging in seconds | -| elasticsearch_indices_query_cache_cache_total | counter | 1 | Count of query cache | -| elasticsearch_indices_query_cache_cache_size | gauge | 1 | Size of query cache | -| elasticsearch_indices_query_cache_count | counter | 2 | Count of query cache hit/miss | -| elasticsearch_indices_query_cache_evictions | counter | 1 | Evictions from query cache | -| elasticsearch_indices_query_cache_memory_size_bytes | gauge | 1 | Query cache memory usage in bytes | -| elasticsearch_indices_query_cache_total | counter | 1 | Size of query cache total | -| elasticsearch_indices_refresh_time_seconds_total | counter | 1 | Total time spent refreshing in seconds | -| elasticsearch_indices_refresh_total | counter | 1 | Total refreshes | -| elasticsearch_indices_request_cache_count | counter | 2 | Count of request cache hit/miss | -| elasticsearch_indices_request_cache_evictions | counter | 1 | Evictions from request cache | -| elasticsearch_indices_request_cache_memory_size_bytes | gauge | 1 | Request cache memory usage in bytes | -| elasticsearch_indices_search_fetch_time_seconds | counter | 1 | Total search fetch time in seconds | -| elasticsearch_indices_search_fetch_total | counter | 1 | Total number of fetches | -| elasticsearch_indices_search_query_time_seconds | counter | 1 | Total search query time in seconds | -| elasticsearch_indices_search_query_total | counter | 1 | Total number of queries | -| elasticsearch_indices_segments_count | gauge | 1 | Count of index segments on this node | -| elasticsearch_indices_segments_memory_bytes | gauge | 1 | Current memory size of segments in bytes | -| elasticsearch_indices_settings_stats_read_only_indices | gauge | 1 | Count of indices that have read_only_allow_delete=true | -| elasticsearch_indices_settings_total_fields | gauge | | Index setting value for index.mapping.total_fields.limit (total allowable mapped fields in a index) | -| elasticsearch_indices_settings_replicas | gauge | | Index setting value for index.replicas | -| elasticsearch_indices_shards_docs | gauge | 3 | Count of documents on this shard | -| elasticsearch_indices_shards_docs_deleted | gauge | 3 | Count of deleted documents on each shard | -| elasticsearch_indices_store_size_bytes | gauge | 1 | Current size of stored index data in bytes | -| elasticsearch_indices_store_size_bytes_primary | gauge | | Current size of stored index data in bytes with only primary shards on all nodes | -| elasticsearch_indices_store_size_bytes_total | gauge | | Current size of stored index data in bytes with all shards on all nodes | -| elasticsearch_indices_store_throttle_time_seconds_total | counter | 1 | Throttle time for index store in seconds | -| elasticsearch_indices_translog_operations | counter | 1 | Total translog operations | -| elasticsearch_indices_translog_size_in_bytes | counter | 1 | Total translog size in bytes | -| elasticsearch_indices_warmer_time_seconds_total | counter | 1 | Total warmer time in seconds | -| elasticsearch_indices_warmer_total | counter | 1 | Total warmer count | -| elasticsearch_jvm_gc_collection_seconds_count | counter | 2 | Count of JVM GC runs | -| elasticsearch_jvm_gc_collection_seconds_sum | counter | 2 | GC run time in seconds | -| elasticsearch_jvm_memory_committed_bytes | gauge | 2 | JVM memory currently committed by area | -| elasticsearch_jvm_memory_max_bytes | gauge | 1 | JVM memory max | -| elasticsearch_jvm_memory_used_bytes | gauge | 2 | JVM memory currently used by area | -| elasticsearch_jvm_memory_pool_used_bytes | gauge | 3 | JVM memory currently used by pool | -| elasticsearch_jvm_memory_pool_max_bytes | counter | 3 | JVM memory max by pool | -| elasticsearch_jvm_memory_pool_peak_used_bytes | counter | 3 | JVM memory peak used by pool | -| elasticsearch_jvm_memory_pool_peak_max_bytes | counter | 3 | JVM memory peak max by pool | -| elasticsearch_os_cpu_percent | gauge | 1 | Percent CPU used by the OS | -| elasticsearch_os_load1 | gauge | 1 | Shortterm load average | -| elasticsearch_os_load5 | gauge | 1 | Midterm load average | -| elasticsearch_os_load15 | gauge | 1 | Longterm load average | -| elasticsearch_process_cpu_percent | gauge | 1 | Percent CPU used by process | -| elasticsearch_process_cpu_seconds_total | counter | 1 | Process CPU time in seconds | -| elasticsearch_process_mem_resident_size_bytes | gauge | 1 | Resident memory in use by process in bytes | -| elasticsearch_process_mem_share_size_bytes | gauge | 1 | Shared memory in use by process in bytes | -| elasticsearch_process_mem_virtual_size_bytes | gauge | 1 | Total virtual memory used in bytes | -| elasticsearch_process_open_files_count | gauge | 1 | Open file descriptors | -| elasticsearch_snapshot_stats_number_of_snapshots | gauge | 1 | Total number of snapshots | -| elasticsearch_snapshot_stats_oldest_snapshot_timestamp | gauge | 1 | Oldest snapshot timestamp | -| elasticsearch_snapshot_stats_snapshot_start_time_timestamp | gauge | 1 | Last snapshot start timestamp | -| elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds | gauge | 1 | Timestamp of the latest SUCCESS or PARTIAL snapshot | -| elasticsearch_snapshot_stats_snapshot_end_time_timestamp | gauge | 1 | Last snapshot end timestamp | -| elasticsearch_snapshot_stats_snapshot_number_of_failures | gauge | 1 | Last snapshot number of failures | -| elasticsearch_snapshot_stats_snapshot_number_of_indices | gauge | 1 | Last snapshot number of indices | -| elasticsearch_snapshot_stats_snapshot_failed_shards | gauge | 1 | Last snapshot failed shards | -| elasticsearch_snapshot_stats_snapshot_successful_shards | gauge | 1 | Last snapshot successful shards | -| elasticsearch_snapshot_stats_snapshot_total_shards | gauge | 1 | Last snapshot total shard | -| elasticsearch_thread_pool_active_count | gauge | 14 | Thread Pool threads active | -| elasticsearch_thread_pool_completed_count | counter | 14 | Thread Pool operations completed | -| elasticsearch_thread_pool_largest_count | gauge | 14 | Thread Pool largest threads count | -| elasticsearch_thread_pool_queue_count | gauge | 14 | Thread Pool operations queued | -| elasticsearch_thread_pool_rejected_count | counter | 14 | Thread Pool operations rejected | -| elasticsearch_thread_pool_threads_count | gauge | 14 | Thread Pool current threads count | -| elasticsearch_transport_rx_packets_total | counter | 1 | Count of packets received | -| elasticsearch_transport_rx_size_bytes_total | counter | 1 | Total number of bytes received | -| elasticsearch_transport_tx_packets_total | counter | 1 | Count of packets sent | -| elasticsearch_transport_tx_size_bytes_total | counter | 1 | Total number of bytes sent | -| elasticsearch_clusterinfo_last_retrieval_success_ts | gauge | 1 | Timestamp of the last successful cluster info retrieval | -| elasticsearch_clusterinfo_up | gauge | 1 | Up metric for the cluster info collector | -| elasticsearch_clusterinfo_version_info | gauge | 6 | Constant metric with ES version information as labels | -| elasticsearch_slm_stats_up | gauge | 0 | Up metric for SLM collector | -| elasticsearch_slm_stats_total_scrapes | counter | 0 | Number of scrapes for SLM collector | -| elasticsearch_slm_stats_json_parse_failures | counter | 0 | JSON parse failures for SLM collector | -| elasticsearch_slm_stats_retention_runs_total | counter | 0 | Total retention runs | -| elasticsearch_slm_stats_retention_failed_total | counter | 0 | Total failed retention runs | -| elasticsearch_slm_stats_retention_timed_out_total | counter | 0 | Total retention run timeouts | -| elasticsearch_slm_stats_retention_deletion_time_seconds | gauge | 0 | Retention run deletion time | -| elasticsearch_slm_stats_total_snapshots_taken_total | counter | 0 | Total snapshots taken | -| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed | -| elasticsearch_slm_stats_total_snapshots_deleted_total | counter | 0 | Total snapshots deleted | -| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed | -| elasticsearch_slm_stats_snapshots_taken_total | counter | 1 | Snapshots taken by policy | -| elasticsearch_slm_stats_snapshots_failed_total | counter | 1 | Snapshots failed by policy | -| elasticsearch_slm_stats_snapshots_deleted_total | counter | 1 | Snapshots deleted by policy | -| elasticsearch_slm_stats_snapshot_deletion_failures_total | counter | 1 | Snapshot deletion failures by policy | -| elasticsearch_slm_stats_operation_mode | gauge | 1 | SLM operation mode (Running, stopping, stopped) | -| elasticsearch_data_stream_stats_up | gauge | 0 | Up metric for Data Stream collection | -| elasticsearch_data_stream_stats_total_scrapes | counter | 0 | Total scrapes for Data Stream stats | -| elasticsearch_data_stream_stats_json_parse_failures | counter | 0 | Number of parsing failures for Data Stream stats | -| elasticsearch_data_stream_backing_indices_total | gauge | 1 | Number of backing indices for Data Stream | -| elasticsearch_data_stream_store_size_bytes | gauge | 1 | Current size of data stream backing indices in bytes | +| Name | Type | Cardinality | Help | +| :--------------------------------------------------------------------------------- | :------ | :---------- | :-------------------------------------------------------------------------------------------------- | +| elasticsearch_breakers_estimated_size_bytes | gauge | 4 | Estimated size in bytes of breaker | +| elasticsearch_breakers_limit_size_bytes | gauge | 4 | Limit size in bytes for breaker | +| elasticsearch_breakers_tripped | counter | 4 | tripped for breaker | +| elasticsearch_cluster_health_active_primary_shards | gauge | 1 | The number of primary shards in your cluster. This is an aggregate total across all indices. | +| elasticsearch_cluster_health_active_shards | gauge | 1 | Aggregate total of all shards across all indices, which includes replica shards. | +| elasticsearch_cluster_health_delayed_unassigned_shards | gauge | 1 | Shards delayed to reduce reallocation overhead | +| elasticsearch_cluster_health_initializing_shards | gauge | 1 | Count of shards that are being freshly created. | +| elasticsearch_cluster_health_number_of_data_nodes | gauge | 1 | Number of data nodes in the cluster. | +| elasticsearch_cluster_health_number_of_in_flight_fetch | gauge | 1 | The number of ongoing shard info requests. | +| elasticsearch_cluster_health_number_of_nodes | gauge | 1 | Number of nodes in the cluster. | +| elasticsearch_cluster_health_number_of_pending_tasks | gauge | 1 | Cluster level changes which have not yet been executed | +| elasticsearch_cluster_health_task_max_waiting_in_queue_millis | gauge | 1 | Max time in millis that a task is waiting in queue. | +| elasticsearch_cluster_health_relocating_shards | gauge | 1 | The number of shards that are currently moving from one node to another node. | +| elasticsearch_cluster_health_status | gauge | 3 | Whether all primary and replica shards are allocated. | +| elasticsearch_cluster_health_timed_out | gauge | 1 | Number of cluster health checks timed out | +| elasticsearch_cluster_health_unassigned_shards | gauge | 1 | The number of shards that exist in the cluster state, but cannot be found in the cluster itself. | +| elasticsearch_clustersettings_stats_max_shards_per_node | gauge | 0 | Current maximum number of shards per node setting. | +| elasticsearch_filesystem_data_available_bytes | gauge | 1 | Available space on block device in bytes | +| elasticsearch_filesystem_data_free_bytes | gauge | 1 | Free space on block device in bytes | +| elasticsearch_filesystem_data_size_bytes | gauge | 1 | Size of block device in bytes | +| elasticsearch_filesystem_io_stats_device_operations_count | gauge | 1 | Count of disk operations | +| elasticsearch_filesystem_io_stats_device_read_operations_count | gauge | 1 | Count of disk read operations | +| elasticsearch_filesystem_io_stats_device_write_operations_count | gauge | 1 | Count of disk write operations | +| elasticsearch_filesystem_io_stats_device_read_size_kilobytes_sum | gauge | 1 | Total kilobytes read from disk | +| elasticsearch_filesystem_io_stats_device_write_size_kilobytes_sum | gauge | 1 | Total kilobytes written to disk | +| elasticsearch_indices_active_queries | gauge | 1 | The number of currently active queries | +| elasticsearch_indices_docs | gauge | 1 | Count of documents on this node | +| elasticsearch_indices_docs_deleted | gauge | 1 | Count of deleted documents on this node | +| elasticsearch_indices_deleted_docs_primary | gauge | 1 | Count of deleted documents with only primary shards | +| elasticsearch_indices_docs_primary | gauge | 1 | Count of documents with only primary shards on all nodes | +| elasticsearch_indices_docs_total | gauge | | Count of documents with shards on all nodes | +| elasticsearch_indices_fielddata_evictions | counter | 1 | Evictions from field data | +| elasticsearch_indices_fielddata_memory_size_bytes | gauge | 1 | Field data cache memory usage in bytes | +| elasticsearch_indices_filter_cache_evictions | counter | 1 | Evictions from filter cache | +| elasticsearch_indices_filter_cache_memory_size_bytes | gauge | 1 | Filter cache memory usage in bytes | +| elasticsearch_indices_flush_time_seconds | counter | 1 | Cumulative flush time in seconds | +| elasticsearch_indices_flush_total | counter | 1 | Total flushes | +| elasticsearch_indices_get_exists_time_seconds | counter | 1 | Total time get exists in seconds | +| elasticsearch_indices_get_exists_total | counter | 1 | Total get exists operations | +| elasticsearch_indices_get_missing_time_seconds | counter | 1 | Total time of get missing in seconds | +| elasticsearch_indices_get_missing_total | counter | 1 | Total get missing | +| elasticsearch_indices_get_time_seconds | counter | 1 | Total get time in seconds | +| elasticsearch_indices_get_total | counter | 1 | Total get | +| elasticsearch_indices_indexing_delete_time_seconds_total | counter | 1 | Total time indexing delete in seconds | +| elasticsearch_indices_indexing_delete_total | counter | 1 | Total indexing deletes | +| elasticsearch_indices_index_current | gauge | 1 | The number of documents currently being indexed to an index | +| elasticsearch_indices_indexing_index_time_seconds_total | counter | 1 | Cumulative index time in seconds | +| elasticsearch_indices_indexing_index_total | counter | 1 | Total index calls | +| elasticsearch_indices_mappings_stats_fields | gauge | 1 | Count of fields currently mapped by index | +| elasticsearch_indices_mappings_stats_json_parse_failures_total | counter | 0 | Number of errors while parsing JSON | +| elasticsearch_indices_mappings_stats_scrapes_total | counter | 0 | Current total Elasticsearch Indices Mappings scrapes | +| elasticsearch_indices_mappings_stats_up | gauge | 0 | Was the last scrape of the Elasticsearch Indices Mappings endpoint successful | +| elasticsearch_indices_merges_docs_total | counter | 1 | Cumulative docs merged | +| elasticsearch_indices_merges_total | counter | 1 | Total merges | +| elasticsearch_indices_merges_total_size_bytes_total | counter | 1 | Total merge size in bytes | +| elasticsearch_indices_merges_total_time_seconds_total | counter | 1 | Total time spent merging in seconds | +| elasticsearch_indices_query_cache_cache_total | counter | 1 | Count of query cache | +| elasticsearch_indices_query_cache_cache_size | gauge | 1 | Size of query cache | +| elasticsearch_indices_query_cache_count | counter | 2 | Count of query cache hit/miss | +| elasticsearch_indices_query_cache_evictions | counter | 1 | Evictions from query cache | +| elasticsearch_indices_query_cache_memory_size_bytes | gauge | 1 | Query cache memory usage in bytes | +| elasticsearch_indices_query_cache_total | counter | 1 | Size of query cache total | +| elasticsearch_indices_refresh_time_seconds_total | counter | 1 | Total time spent refreshing in seconds | +| elasticsearch_indices_refresh_total | counter | 1 | Total refreshes | +| elasticsearch_indices_request_cache_count | counter | 2 | Count of request cache hit/miss | +| elasticsearch_indices_request_cache_evictions | counter | 1 | Evictions from request cache | +| elasticsearch_indices_request_cache_memory_size_bytes | gauge | 1 | Request cache memory usage in bytes | +| elasticsearch_indices_search_fetch_time_seconds | counter | 1 | Total search fetch time in seconds | +| elasticsearch_indices_search_fetch_total | counter | 1 | Total number of fetches | +| elasticsearch_indices_search_query_time_seconds | counter | 1 | Total search query time in seconds | +| elasticsearch_indices_search_query_total | counter | 1 | Total number of queries | +| elasticsearch_indices_segments_count | gauge | 1 | Count of index segments on this node | +| elasticsearch_indices_segments_memory_bytes | gauge | 1 | Current memory size of segments in bytes | +| elasticsearch_indices_settings_stats_read_only_indices | gauge | 1 | Count of indices that have read_only_allow_delete=true | +| elasticsearch_indices_settings_total_fields | gauge | | Index setting value for index.mapping.total_fields.limit (total allowable mapped fields in a index) | +| elasticsearch_indices_settings_replicas | gauge | | Index setting value for index.replicas | +| elasticsearch_indices_shards_docs | gauge | 3 | Count of documents on this shard | +| elasticsearch_indices_shards_docs_deleted | gauge | 3 | Count of deleted documents on each shard | +| elasticsearch_indices_store_size_bytes | gauge | 1 | Current size of stored index data in bytes | +| elasticsearch_indices_store_size_bytes_primary | gauge | | Current size of stored index data in bytes with only primary shards on all nodes | +| elasticsearch_indices_store_size_bytes_total | gauge | | Current size of stored index data in bytes with all shards on all nodes | +| elasticsearch_indices_store_throttle_time_seconds_total | counter | 1 | Throttle time for index store in seconds | +| elasticsearch_indices_translog_operations | counter | 1 | Total translog operations | +| elasticsearch_indices_translog_size_in_bytes | counter | 1 | Total translog size in bytes | +| elasticsearch_indices_warmer_time_seconds_total | counter | 1 | Total warmer time in seconds | +| elasticsearch_indices_warmer_total | counter | 1 | Total warmer count | +| elasticsearch_jvm_gc_collection_seconds_count | counter | 2 | Count of JVM GC runs | +| elasticsearch_jvm_gc_collection_seconds_sum | counter | 2 | GC run time in seconds | +| elasticsearch_jvm_memory_committed_bytes | gauge | 2 | JVM memory currently committed by area | +| elasticsearch_jvm_memory_max_bytes | gauge | 1 | JVM memory max | +| elasticsearch_jvm_memory_used_bytes | gauge | 2 | JVM memory currently used by area | +| elasticsearch_jvm_memory_pool_used_bytes | gauge | 3 | JVM memory currently used by pool | +| elasticsearch_jvm_memory_pool_max_bytes | counter | 3 | JVM memory max by pool | +| elasticsearch_jvm_memory_pool_peak_used_bytes | counter | 3 | JVM memory peak used by pool | +| elasticsearch_jvm_memory_pool_peak_max_bytes | counter | 3 | JVM memory peak max by pool | +| elasticsearch_os_cpu_percent | gauge | 1 | Percent CPU used by the OS | +| elasticsearch_os_load1 | gauge | 1 | Shortterm load average | +| elasticsearch_os_load5 | gauge | 1 | Midterm load average | +| elasticsearch_os_load15 | gauge | 1 | Longterm load average | +| elasticsearch_process_cpu_percent | gauge | 1 | Percent CPU used by process | +| elasticsearch_process_cpu_seconds_total | counter | 1 | Process CPU time in seconds | +| elasticsearch_process_mem_resident_size_bytes | gauge | 1 | Resident memory in use by process in bytes | +| elasticsearch_process_mem_share_size_bytes | gauge | 1 | Shared memory in use by process in bytes | +| elasticsearch_process_mem_virtual_size_bytes | gauge | 1 | Total virtual memory used in bytes | +| elasticsearch_process_open_files_count | gauge | 1 | Open file descriptors | +| elasticsearch_snapshot_stats_number_of_snapshots | gauge | 1 | Total number of snapshots | +| elasticsearch_snapshot_stats_oldest_snapshot_timestamp | gauge | 1 | Oldest snapshot timestamp | +| elasticsearch_snapshot_stats_snapshot_start_time_timestamp | gauge | 1 | Last snapshot start timestamp | +| elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds | gauge | 1 | Timestamp of the latest SUCCESS or PARTIAL snapshot | +| elasticsearch_snapshot_stats_snapshot_end_time_timestamp | gauge | 1 | Last snapshot end timestamp | +| elasticsearch_snapshot_stats_snapshot_number_of_failures | gauge | 1 | Last snapshot number of failures | +| elasticsearch_snapshot_stats_snapshot_number_of_indices | gauge | 1 | Last snapshot number of indices | +| elasticsearch_snapshot_stats_snapshot_failed_shards | gauge | 1 | Last snapshot failed shards | +| elasticsearch_snapshot_stats_snapshot_successful_shards | gauge | 1 | Last snapshot successful shards | +| elasticsearch_snapshot_stats_snapshot_total_shards | gauge | 1 | Last snapshot total shard | +| elasticsearch_thread_pool_active_count | gauge | 14 | Thread Pool threads active | +| elasticsearch_thread_pool_completed_count | counter | 14 | Thread Pool operations completed | +| elasticsearch_thread_pool_largest_count | gauge | 14 | Thread Pool largest threads count | +| elasticsearch_thread_pool_queue_count | gauge | 14 | Thread Pool operations queued | +| elasticsearch_thread_pool_rejected_count | counter | 14 | Thread Pool operations rejected | +| elasticsearch_thread_pool_threads_count | gauge | 14 | Thread Pool current threads count | +| elasticsearch_transport_rx_packets_total | counter | 1 | Count of packets received | +| elasticsearch_transport_rx_size_bytes_total | counter | 1 | Total number of bytes received | +| elasticsearch_transport_tx_packets_total | counter | 1 | Count of packets sent | +| elasticsearch_transport_tx_size_bytes_total | counter | 1 | Total number of bytes sent | +| elasticsearch_clusterinfo_last_retrieval_success_ts | gauge | 1 | Timestamp of the last successful cluster info retrieval | +| elasticsearch_clusterinfo_up | gauge | 1 | Up metric for the cluster info collector | +| elasticsearch_clusterinfo_version_info | gauge | 6 | Constant metric with ES version information as labels | +| elasticsearch_slm_stats_up | gauge | 0 | Up metric for SLM collector | +| elasticsearch_slm_stats_total_scrapes | counter | 0 | Number of scrapes for SLM collector | +| elasticsearch_slm_stats_json_parse_failures | counter | 0 | JSON parse failures for SLM collector | +| elasticsearch_slm_stats_retention_runs_total | counter | 0 | Total retention runs | +| elasticsearch_slm_stats_retention_failed_total | counter | 0 | Total failed retention runs | +| elasticsearch_slm_stats_retention_timed_out_total | counter | 0 | Total retention run timeouts | +| elasticsearch_slm_stats_retention_deletion_time_seconds | gauge | 0 | Retention run deletion time | +| elasticsearch_slm_stats_total_snapshots_taken_total | counter | 0 | Total snapshots taken | +| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed | +| elasticsearch_slm_stats_total_snapshots_deleted_total | counter | 0 | Total snapshots deleted | +| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed | +| elasticsearch_slm_stats_snapshots_taken_total | counter | 1 | Snapshots taken by policy | +| elasticsearch_slm_stats_snapshots_failed_total | counter | 1 | Snapshots failed by policy | +| elasticsearch_slm_stats_snapshots_deleted_total | counter | 1 | Snapshots deleted by policy | +| elasticsearch_slm_stats_snapshot_deletion_failures_total | counter | 1 | Snapshot deletion failures by policy | +| elasticsearch_slm_stats_operation_mode | gauge | 1 | SLM operation mode (Running, stopping, stopped) | +| elasticsearch_data_stream_stats_up | gauge | 0 | Up metric for Data Stream collection | +| elasticsearch_data_stream_stats_total_scrapes | counter | 0 | Total scrapes for Data Stream stats | +| elasticsearch_data_stream_stats_json_parse_failures | counter | 0 | Number of parsing failures for Data Stream stats | +| elasticsearch_data_stream_backing_indices_total | gauge | 1 | Number of backing indices for Data Stream | +| elasticsearch_data_stream_store_size_bytes | gauge | 1 | Current size of data stream backing indices in bytes | +| elasticsearch_indexing_pressure_limit_in_bytes | gauge | 1 | Current size of data stream backing indices in bytes | +| elasticsearch_indexing_pressure_current_combined_coordinating_and_primary_in_bytes | gauge | 1 | Memory consumed, in bytes, by indexing requests in the coordinating or primary stage. | +| elasticsearch_indexing_pressure_current_coordinating_in_bytes | gauge | 1 | Memory consumed, in bytes, by indexing requests in the coordinating stage. | +| elasticsearch_indexing_pressure_current_primary_in_bytes | gauge | 1 | Memory consumed, in bytes, by indexing requests in the primary stage. | +| elasticsearch_indexing_pressure_current_replica_in_bytes | gauge | 1 | Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage. | +| elasticsearch_indexing_pressure_current_all_in_bytes | gauge | 1 | Current size of data stream backing indices in bytes | +| elasticsearch_indexing_pressure_total_combined_coordinating_and_primary_in_bytes | counter | 1 | Memory consumed, in bytes, by indexing requests in the coordinating or primary stage. | +| elasticsearch_indexing_pressure_total_coordinating_in_bytes | counter | 1 | Memory consumed, in bytes, by indexing requests in the coordinating stage. | +| elasticsearch_indexing_pressure_total_primary_in_bytes | counter | 1 | Memory consumed, in bytes, by indexing requests in the primary stage. | +| elasticsearch_indexing_pressure_total_replica_in_bytes | counter | 1 | Memory consumed, in bytes, by indexing requests in the replica stage. | +| elasticsearch_indexing_pressure_total_all_in_bytes | counter | 1 | Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage. | +| elasticsearch_indexing_pressure_total_coordinating_rejections | counter | 1 | Number of indexing requests rejected in the coordinating stage. | +| elasticsearch_indexing_pressure_total_primary_rejections | counter | 1 | Number of indexing requests rejected in the primary stage. | +| elasticsearch_indexing_pressure_total_replica_rejections | counter | 1 | Number of indexing requests rejected in the replica stage. | ### Alerts & Recording Rules diff --git a/collector/nodes.go b/collector/nodes.go index 7d648f86..c25ff052 100644 --- a/collector/nodes.go +++ b/collector/nodes.go @@ -171,6 +171,13 @@ type filesystemIODeviceMetric struct { Labels func(cluster string, node NodeStatsNodeResponse, device string) []string } +type indexingPressureMetric struct { + Type prometheus.ValueType + Desc *prometheus.Desc + Value func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 + Labels func(cluster string, node NodeStatsNodeResponse) []string +} + // Nodes information struct type Nodes struct { logger log.Logger @@ -188,6 +195,7 @@ type Nodes struct { threadPoolMetrics []*threadPoolMetric filesystemDataMetrics []*filesystemDataMetric filesystemIODeviceMetrics []*filesystemIODeviceMetric + indexingPressureMetrics []*indexingPressureMetric } // NewNodes defines Nodes Prometheus metrics @@ -1781,6 +1789,176 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no Labels: defaultFilesystemIODeviceLabelValues, }, }, + indexingPressureMetrics: []*indexingPressureMetric{ + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "current_combined_coordinating_and_primary_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the coordinating or primary stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Current.CombinedCoordinatingAndPrimaryInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "current_coordinating_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the coordinating stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Current.CoordinatingInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "current_primary_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the primary stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Current.PrimaryInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "current_replica_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the replica stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Current.ReplicaInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "current_all_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Current.ReplicaInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_combined_coordinating_and_primary_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the coordinating or primary stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.CombinedCoordinatingAndPrimaryInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_coordinating_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the coordinating stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.CoordinatingInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_primary_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the primary stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.PrimaryInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_replica_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the replica stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.ReplicaInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_all_in_bytes"), + "Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.AllInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_coordinating_rejections"), + "Number of indexing requests rejected in the coordinating stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.CoordinatingRejections) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_primary_rejections"), + "Number of indexing requests rejected in the primary stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.PrimaryRejections) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "total_replica_rejections"), + "Number of indexing requests rejected in the replica stage.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.Total.ReplicaRejections) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indexing_pressure", "limit_in_bytes"), + "Configured memory limit, in bytes, for the indexing requests. Replica requests have an automatic limit that is 1.5x this value.", + defaultNodeLabels, nil, + ), + Value: func(indexingPressureMem NodestatsIndexingPressureMemoryResponse) float64 { + return float64(indexingPressureMem.LimitInBytes) + }, + Labels: defaultNodeLabelValues, + }, + }, } } @@ -1801,6 +1979,9 @@ func (c *Nodes) Describe(ch chan<- *prometheus.Desc) { for _, metric := range c.filesystemIODeviceMetrics { ch <- metric.Desc } + for _, metric := range c.indexingPressureMetrics { + ch <- metric.Desc + } ch <- c.up.Desc() ch <- c.totalScrapes.Desc() ch <- c.jsonParseFailures.Desc() @@ -1955,5 +2136,16 @@ func (c *Nodes) Collect(ch chan<- prometheus.Metric) { } } + for _, indexingPressureMem := range node.IndexingPressure { + for _, metric := range c.indexingPressureMetrics { + ch <- prometheus.MustNewConstMetric( + metric.Desc, + metric.Type, + metric.Value(indexingPressureMem), + metric.Labels(nodeStatsResp.ClusterName, node)..., + ) + } + } + } } diff --git a/collector/nodes_response.go b/collector/nodes_response.go index 4985add0..6d5a75a4 100644 --- a/collector/nodes_response.go +++ b/collector/nodes_response.go @@ -23,23 +23,24 @@ type nodeStatsResponse struct { // NodeStatsNodeResponse defines node stats information structure for nodes type NodeStatsNodeResponse struct { - Name string `json:"name"` - Host string `json:"host"` - Timestamp int64 `json:"timestamp"` - TransportAddress string `json:"transport_address"` - Hostname string `json:"hostname"` - Roles []string `json:"roles"` - Attributes map[string]string `json:"attributes"` - Indices NodeStatsIndicesResponse `json:"indices"` - OS NodeStatsOSResponse `json:"os"` - Network NodeStatsNetworkResponse `json:"network"` - FS NodeStatsFSResponse `json:"fs"` - ThreadPool map[string]NodeStatsThreadPoolPoolResponse `json:"thread_pool"` - JVM NodeStatsJVMResponse `json:"jvm"` - Breakers map[string]NodeStatsBreakersResponse `json:"breakers"` - HTTP map[string]interface{} `json:"http"` - Transport NodeStatsTransportResponse `json:"transport"` - Process NodeStatsProcessResponse `json:"process"` + Name string `json:"name"` + Host string `json:"host"` + Timestamp int64 `json:"timestamp"` + TransportAddress string `json:"transport_address"` + Hostname string `json:"hostname"` + Roles []string `json:"roles"` + Attributes map[string]string `json:"attributes"` + Indices NodeStatsIndicesResponse `json:"indices"` + OS NodeStatsOSResponse `json:"os"` + Network NodeStatsNetworkResponse `json:"network"` + FS NodeStatsFSResponse `json:"fs"` + ThreadPool map[string]NodeStatsThreadPoolPoolResponse `json:"thread_pool"` + JVM NodeStatsJVMResponse `json:"jvm"` + Breakers map[string]NodeStatsBreakersResponse `json:"breakers"` + HTTP map[string]interface{} `json:"http"` + Transport NodeStatsTransportResponse `json:"transport"` + Process NodeStatsProcessResponse `json:"process"` + IndexingPressure map[string]NodestatsIndexingPressureMemoryResponse `json:"indexing_pressure"` } // NodeStatsBreakersResponse is a representation of a statistics about the field data circuit breaker @@ -317,6 +318,34 @@ type NodeStatsProcessResponse struct { Memory NodeStatsProcessMemResponse `json:"mem"` } +// NodestatsIndexingPressureMemoryResponse is a representation of a elasticsearc indexing pressure +type NodestatsIndexingPressureMemoryResponse struct { + Current NodestatsIndexingPressureMemoryCurrentResponse `json:"current"` + Total NodestatsIndexingPressureMemoryTotalResponse `json:"total"` + LimitInBytes int64 `json:"limit_in_bytes"` +} + +// NodestatsIndexingPressureMemoryCurrentResponse is a representation of a elasticsearc indexing pressure current memory usage +type NodestatsIndexingPressureMemoryCurrentResponse struct { + CombinedCoordinatingAndPrimaryInBytes int64 `json:"combined_coordinating_and_primary_in_bytes"` + CoordinatingInBytes int64 `json:"coordinating_in_bytes"` + PrimaryInBytes int64 `json:"primary_in_bytes"` + ReplicaInBytes int64 `json:"replica_in_bytes"` + AllInBytes int64 `json:"all_in_bytes"` +} + +// NodestatsIndexingPressureMemoryTotalResponse is a representation of a elasticsearc indexing pressure total memory usage +type NodestatsIndexingPressureMemoryTotalResponse struct { + CombinedCoordinatingAndPrimaryInBytes int64 `json:"combined_coordinating_and_primary_in_bytes"` + CoordinatingInBytes int64 `json:"coordinating_in_bytes"` + PrimaryInBytes int64 `json:"primary_in_bytes"` + ReplicaInBytes int64 `json:"replica_in_bytes"` + AllInBytes int64 `json:"all_in_bytes"` + CoordinatingRejections int64 `json:"coordinating_rejections"` + PrimaryRejections int64 `json:"primary_rejections"` + ReplicaRejections int64 `json:"replica_rejections"` +} + // NodeStatsProcessMemResponse defines node stats process memory usage structure type NodeStatsProcessMemResponse struct { Resident int64 `json:"resident_in_bytes"`