receiver/apachesparkreceiver/metadata.yaml

type: apachespark

status:
  class: receiver
  stability: 
    development: [metrics]
  distributions: [contrib]
  codeowners:
    active: [djaglowski, Caleb-Hurshman, mrsillydog]

resource_attributes:
  spark.application.id:
    description: The ID of the application for which the metric was recorded.
    type: string
    enabled: true
  spark.application.name:
    description: The name of the application for which the metric was recorded.
    type: string
    enabled: true
  spark.stage.id:
    description: The ID of the application stage for which the metric was recorded.
    type: int
    enabled: true
  spark.stage.attempt.id:
    description: The ID of the stage attempt for which the metric was recorded.
    type: int
  spark.executor.id:
    description: The ID of the executor for which the metric was recorded.
    type: string
    enabled: true
  spark.job.id:
    description: The ID of the job for which the metric was recorded.
    type: int
    enabled: true

attributes:
  stage_active:
    name_override: active
    description: Whether the stage for which the metric was recorded is active.
    type: bool
  stage_complete:
    name_override: complete
    description: Whether the stage for which the metric was recorded is complete.
    type: bool
  stage_pending:
    name_override: pending
    description: Whether the stage for which the metric was recorded is pending.
    type: bool
  stage_failed:
    name_override: failed
    description: Whether the stage for which the metric was recorded is failed.
    type: bool
  stage_task_result:
    name_override: result
    description: The result of the stage tasks for which the metric was recorded.
    type: string
    enum:
      - completed
      - failed
      - killed
  executor_task_result:
    name_override: result
    description: The result of the executor tasks for which the metric was recorded.
    type: string
    enum:
      - completed
      - failed
  job_result:
    name_override: result
    description: The result of the job stages or tasks for which the metric was recorded.
    type: string
    enum:
      - completed
      - failed
      - skipped
  direction:
    description: Whether the metric is in regards to input or output operations.
    type: string
    enum:
      - in
      - out
  source:
    description: The source from which data was fetched for the metric.
    type: string
    enum:
      - local
      - remote
  location:
    description: The location of the memory for which the metric was recorded..
    type: string
    enum:
      - on_heap
      - off_heap
  state:
    description: The state of the memory for which the metric was recorded.
    type: string
    enum:
      - used
      - free
  scheduler_status:
    name_override: status
    description: The status of the DAGScheduler stages for which the metric was recorded.
    type: string
    enum:
      - waiting
      - running
  pool_memory_type:
    name_override: type
    description: The type of pool memory for which the metric was recorded.
    type: string
    enum:
      - direct
      - mapped
  gc_type:
    description: The type of the garbage collection performed for the metric.
    type: string
    enum:
      - major
      - minor

metrics:
  #stage
  spark.stage.status:
    description: A one-hot encoding representing the status of this stage.
    enabled: true
    sum:
      monotonic: false
      aggregation_temporality: cumulative
      value_type: int
    unit: "{ status }"
    attributes:
      [stage_active, stage_complete, stage_pending, stage_failed]
  spark.stage.task.active:
    description: Number of active tasks in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ task }"
    attributes: []
  spark.stage.task.result:
    description: Number of tasks with a specific result in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ task }"
    attributes:
      [stage_task_result]
  spark.stage.executor.run_time:
    description: Amount of time spent by the executor in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ms
    attributes: []
  spark.stage.executor.cpu_time:
    description: CPU time spent by the executor in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ns
    attributes: []
  spark.stage.task.result_size:
    description: The amount of data transmitted back to the driver by all the tasks in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: []
  spark.stage.jvm_gc_time:
    description: The amount of time the JVM spent on garbage collection in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ms
    attributes: []
  spark.stage.memory.spilled:
    description: The amount of memory moved to disk due to size constraints (spilled) in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: []
  spark.stage.disk.spilled:
    description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: []
  spark.stage.memory.peak:
    description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: []
  spark.stage.io.size:
    description: Amount of data written and read at this stage.
    enabled: true
    sum: 
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes:
      [direction]
  spark.stage.io.records:
    description: Number of records written and read in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ record }"
    attributes:
      [direction]
  spark.stage.shuffle.blocks_fetched:
    description: Number of blocks fetched in shuffle operations in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ block }"
    attributes:
      [source]
  spark.stage.shuffle.fetch_wait_time:
    description: Time spent in this stage waiting for remote shuffle blocks.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ms
    attributes: []
  spark.stage.shuffle.io.disk:
    description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory).
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: []
  spark.stage.shuffle.io.read.size:
    description: Amount of data read in shuffle operations in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes:
      [source]
  spark.stage.shuffle.io.write.size:
    description: Amount of data written in shuffle operations in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes:
      []
  spark.stage.shuffle.io.records:
    description: Number of records written or read in shuffle operations in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ record }"
    attributes:
      [direction]
  spark.stage.shuffle.write_time:
    description: Time spent blocking on writes to disk or buffer cache in this stage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ns
    attributes: []
  #executor
  spark.executor.memory.usage:
    description: Storage memory used by this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: []
  spark.executor.disk.usage:
    description: Disk space used by this executor for RDD storage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: []
  spark.executor.task.limit:
    description: Maximum number of tasks that can run concurrently in this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ task }"
    attributes: []
  spark.executor.task.active:
    description: Number of tasks currently running in this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ task }"
    attributes: []
  spark.executor.task.result:
    description: Number of tasks with a specific result in this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ task }"
    attributes: [executor_task_result]
  spark.executor.time:
    description: Elapsed time the JVM spent executing tasks in this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ms
    attributes: []
  spark.executor.gc_time:
    description: Elapsed time the JVM spent in garbage collection in this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ms
    attributes: []
  spark.executor.input_size:
    description: Amount of data input for this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: []
  spark.executor.shuffle.io.size:
    description: Amount of data written and read during shuffle operations for this executor.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: bytes
    attributes: [direction]
  spark.executor.storage_memory.usage:
    description: The executor's storage memory usage.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: [location, state]
  #job
  spark.job.task.active:
    description: Number of active tasks in this job.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ task }"
    attributes: []
  spark.job.task.result:
    description: Number of tasks with a specific result in this job.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ task }"
    attributes: [job_result]
  spark.job.stage.active:
    description: Number of active stages in this job.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ task }"
    attributes: []
  spark.job.stage.result:
    description: Number of stages with a specific result in this job.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ task }"
    attributes: [job_result]
  # metrics
  spark.driver.block_manager.disk.usage:
    description: Disk space used by the BlockManager.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: mb
    attributes: []
  spark.driver.block_manager.memory.usage:
    description: Memory usage for the driver's BlockManager.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: mb
    attributes: [location, state]
  spark.driver.hive_external_catalog.file_cache_hits:
    description: Number of file cache hits on the HiveExternalCatalog.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ hit }"
    attributes: []
  spark.driver.hive_external_catalog.files_discovered:
    description: Number of files discovered while listing the partitions of a table in the Hive metastore
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ file }"
    attributes: []
  spark.driver.hive_external_catalog.hive_client_calls:
    description: Number of calls to the underlying Hive Metastore client made by the Spark application.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ call }"
    attributes: []
  spark.driver.hive_external_catalog.parallel_listing_jobs:
    description: Number of parallel listing jobs initiated by the HiveExternalCatalog when listing partitions of a table.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ listing_job }"
    attributes: []
  spark.driver.hive_external_catalog.partitions_fetched:
    description: Table partitions fetched by the HiveExternalCatalog.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ partition }"
    attributes: []
  spark.driver.code_generator.compilation.count:
    description: Number of source code compilation operations performed by the CodeGenerator.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ compilation }"
    attributes: []
  spark.driver.code_generator.compilation.average_time:
    description: Average time spent during CodeGenerator source code compilation operations.
    enabled: true
    gauge:
      value_type: double
    unit: ms
    attributes: []
  spark.driver.code_generator.generated_class.count:
    description: Number of classes generated by the CodeGenerator.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ class }"
    attributes: []
  spark.driver.code_generator.generated_class.average_size:
    description: Average class size of the classes generated by the CodeGenerator.
    enabled: true
    gauge:
      value_type: double
    unit: bytes
    attributes: []
  spark.driver.code_generator.generated_method.count:
    description: Number of methods generated by the CodeGenerator.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ method }"
    attributes: []
  spark.driver.code_generator.generated_method.average_size:
    description: Average method size of the classes generated by the CodeGenerator.
    enabled: true
    gauge:
      value_type: double
    unit: bytes
    attributes: []
  spark.driver.code_generator.source_code.operations:
    description: Number of source code generation operations performed by the CodeGenerator.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ operation }"
    attributes: []
  spark.driver.code_generator.source_code.average_size:
    description: Average size of the source code generated by a CodeGenerator code generation operation.
    enabled: true
    gauge:
      value_type: double
    unit: bytes
    attributes: []
  spark.driver.dag_scheduler.job.active:
    description: Number of active jobs currently being processed by the DAGScheduler.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ job }"
    attributes: []
  spark.driver.dag_scheduler.job.count:
    description: Number of jobs that have been submitted to the DAGScheduler.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ job }"
    attributes: []
  spark.driver.dag_scheduler.stage.failed:
    description: Number of failed stages run by the DAGScheduler.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ stage }"
    attributes: []
  spark.driver.dag_scheduler.stage.count:
    description: Number of stages the DAGScheduler is either running or needs to run.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ stage }"
    attributes: [scheduler_status]
  spark.driver.live_listener_bus.posted:
    description: Number of events that have been posted on the LiveListenerBus.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ event }"
    attributes: []
  spark.driver.live_listener_bus.processing_time.average:
    description: Average time taken for the LiveListenerBus to process an event posted to it.
    enabled: true
    gauge:
      value_type: double
    unit: ms
    attributes: []
  spark.driver.live_listener_bus.dropped:
    description: Number of events that have been dropped by the LiveListenerBus.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ event }"
    attributes: []
  spark.driver.live_listener_bus.queue_size:
    description: Number of events currently waiting to be processed by the LiveListenerBus.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: "{ event }"
    attributes: []
  spark.driver.jvm_cpu_time:
    description: Current CPU time taken by the Spark driver.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ns
    attributes: []
  spark.driver.executor.memory.jvm:
    description: Amount of memory used by the driver's JVM.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: [location]
  spark.driver.executor.memory.execution:
    description: Amount of execution memory currently used by the driver.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: [location]
  spark.driver.executor.memory.storage:
    description: Amount of storage memory currently used by the driver.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: [location]
  spark.driver.executor.memory.pool:
    description: Amount of pool memory currently used by the driver.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: false
      value_type: int
    unit: bytes
    attributes: [pool_memory_type]
  spark.driver.executor.gc.operations:
    description: Number of garbage collection operations performed by the driver.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: "{ gc_operation }"
    attributes: [gc_type]
  spark.driver.executor.gc.time:
    description: Total elapsed time during garbage collection operations performed by the driver.
    enabled: true
    sum:
      aggregation_temporality: cumulative
      monotonic: true
      value_type: int
    unit: ms
    attributes: [gc_type]