File 3020-patch-out-exporter.patch of Package ceph-ceph-20.2.0+20260219.ea3e6758
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fee02ce96..4abfadab1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -664,7 +664,6 @@ endif(NOT WITH_SYSTEM_ROCKSDB)
if(WITH_MGR)
add_subdirectory(mgr)
- add_subdirectory(exporter)
endif()
set(librados_config_srcs
@@ -1093,8 +1092,7 @@ add_custom_target(vstart-base DEPENDS
rados)
foreach(dep
cython_rados
- ceph-mgr
- ceph-exporter)
+ ceph-mgr)
if(TARGET ${dep})
add_dependencies(vstart-base ${dep})
endif()
diff --git a/src/common/options/CMakeLists.txt b/src/common/options/CMakeLists.txt
index 60cdbc3f2..2d39158a6 100644
--- a/src/common/options/CMakeLists.txt
+++ b/src/common/options/CMakeLists.txt
@@ -93,7 +93,6 @@ add_options(osd)
add_options(rbd)
add_options(rbd-mirror)
add_options(immutable-object-cache)
-add_options(ceph-exporter)
# if set to empty string, system default luarocks package location (if exist) will be used
set(rgw_luarocks_location "")
diff --git a/src/common/options/build_options.cc b/src/common/options/build_options.cc
index 867fc2efd..001fac902 100644
--- a/src/common/options/build_options.cc
+++ b/src/common/options/build_options.cc
@@ -18,7 +18,6 @@ std::vector<Option> get_immutable_object_cache_options();
std::vector<Option> get_mds_options();
std::vector<Option> get_mds_client_options();
std::vector<Option> get_cephfs_mirror_options();
-std::vector<Option> get_ceph_exporter_options();
std::vector<Option> build_options()
{
@@ -47,7 +46,6 @@ std::vector<Option> build_options()
ingest(get_mds_options(), "mds");
ingest(get_mds_client_options(), "mds_client");
ingest(get_cephfs_mirror_options(), "cephfs-mirror");
- ingest(get_ceph_exporter_options(), "ceph-exporter");
return result;
}
diff --git a/src/common/options/ceph-exporter.yaml.in b/src/common/options/ceph-exporter.yaml.in
deleted file mode 100644
index c4b24ee43..000000000
--- a/src/common/options/ceph-exporter.yaml.in
+++ /dev/null
@@ -1,68 +0,0 @@
-# -*- mode: YAML -*-
----
-
-options:
-- name: exporter_sock_dir
- type: str
- level: advanced
- desc: The path to ceph daemons socket files dir
- default: /var/run/ceph/
- services:
- - ceph-exporter
- flags:
- - runtime
-- name: exporter_addr
- type: str
- level: advanced
- desc: Host ip address where exporter is deployed
- default: 0.0.0.0
- services:
- - ceph-exporter
-- name: exporter_http_port
- type: int
- level: advanced
- desc: Port to deploy exporter on. Default is 9926
- default: 9926
- services:
- - ceph-exporter
-- name: exporter_cert_file
- type: str
- level: advanced
- desc: Certificate file for TLS.
- default:
- services:
- - ceph-exporter
-- name: exporter_key_file
- type: str
- level: advanced
- desc: Key certificate file for TLS.
- default:
- services:
- - ceph-exporter
-- name: exporter_prio_limit
- type: int
- level: advanced
- desc: Only perf counters greater than or equal to exporter_prio_limit are fetched
- default: 5
- services:
- - ceph-exporter
- flags:
- - runtime
-- name: exporter_stats_period
- type: int
- level: advanced
- desc: Time to wait before sending requests again to exporter server (seconds)
- default: 5
- services:
- - ceph-exporter
- flags:
- - runtime
-- name: exporter_sort_metrics
- type: bool
- level: advanced
- desc: If true it will sort the metrics and group them.
- default: true
- services:
- - ceph-exporter
- flags:
- - runtime
diff --git a/src/common/subsys.h b/src/common/subsys.h
index d756124f1..6f021c50d 100644
--- a/src/common/subsys.h
+++ b/src/common/subsys.h
@@ -107,7 +107,6 @@ SUBSYS(alienstore, 0, 5)
SUBSYS(mclock, 1, 5)
SUBSYS(rgw_dedup, 1, 5)
SUBSYS(cyanstore, 0, 5)
-SUBSYS(ceph_exporter, 1, 5)
SUBSYS(memstore, 1, 5)
SUBSYS(trace, 1, 5)
SUBSYS(ceph_dedup, 0, 5)
diff --git a/src/exporter/CMakeLists.txt b/src/exporter/CMakeLists.txt
deleted file mode 100644
index 0127cc539..000000000
--- a/src/exporter/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-set(exporter_srcs
- ceph_exporter.cc
- DaemonMetricCollector.cc
- web_server.cc
- util.cc
- )
-add_executable(ceph-exporter ${exporter_srcs})
-target_link_libraries(ceph-exporter
- global-static
- ceph-common
- OpenSSL::SSL)
-install(TARGETS ceph-exporter DESTINATION bin)
diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc
deleted file mode 100644
index 160c75d4c..000000000
--- a/src/exporter/DaemonMetricCollector.cc
+++ /dev/null
@@ -1,562 +0,0 @@
-#include "DaemonMetricCollector.h"
-
-#include <boost/asio/io_context.hpp>
-#include <boost/json/src.hpp>
-#include <chrono>
-#include <filesystem>
-#include <iostream>
-#include <map>
-#include <memory>
-#include <regex>
-#include <sstream>
-#include <string>
-#include <utility>
-
-#include "common/admin_socket_client.h"
-#include "common/debug.h"
-#include "common/hostname.h"
-#include "common/perf_counters.h"
-#include "common/split.h"
-#include "global/global_context.h"
-#include "global/global_init.h"
-#include "include/common_fwd.h"
-#include "util.h"
-
-#define dout_context g_ceph_context
-#define dout_subsys ceph_subsys_ceph_exporter
-
-using json_object = boost::json::object;
-using json_value = boost::json::value;
-using json_array = boost::json::array;
-
-void DaemonMetricCollector::request_loop() {
- timer.async_wait([this](const boost::system::error_code &e) {
- if (shutdown_flag) {
- dout(1) << "Metric collector request loop cancelled" << dendl;
- return;
- }
-
- if (e) return; // Exit on error or cancellation
-
- dout(10) << "Getting metrics loop..." << dendl;
- update_sockets();
-
- bool sort_metrics = g_conf().get_val<bool>("exporter_sort_metrics");
- auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
- std::string dump_response;
- std::string schema_response;
- dump_asok_metrics(sort_metrics, prio_limit, true, dump_response, schema_response, true);
- auto stats_period = g_conf().get_val<int64_t>("exporter_stats_period");
- // time to wait before sending requests again
- timer.expires_after(std::chrono::seconds(stats_period));
- request_loop();
- });
-}
-
-void DaemonMetricCollector::main() {
- shutdown_flag = false;
- timer.expires_after(std::chrono::seconds(0));
- request_loop();
- io.run();
-}
-
-void DaemonMetricCollector::shutdown(){
- shutdown_flag = true;
- timer.cancel(); // Explicitly cancel the timer
- dout(1) << "Collector shutdown initiated, timer canceled" << dendl;
- io.stop();
-}
-
-std::string DaemonMetricCollector::get_metrics() {
- const std::lock_guard<std::mutex> lock(metrics_mutex);
- return metrics;
-}
-
-template <class T>
-void add_metric(std::unique_ptr<MetricsBuilder> &builder, T value,
- std::string name, std::string description, std::string mtype,
- labels_t labels) {
- builder->add(std::to_string(value), name, description, mtype, labels);
-}
-
-void add_double_or_int_metric(std::unique_ptr<MetricsBuilder> &builder,
- json_value value, std::string name,
- std::string description, std::string mtype,
- labels_t labels) {
- if (value.is_int64()) {
- int64_t v = value.as_int64();
- add_metric(builder, v, name, description, mtype, labels);
- } else if (value.is_double()) {
- double v = value.as_double();
- add_metric(builder, v, name, description, mtype, labels);
- }
-}
-
-std::string boost_string_to_std(boost::json::string js) {
- std::string res(js.data());
- return res;
-}
-
-std::string quote(std::string value) { return "\"" + value + "\""; }
-
-void DaemonMetricCollector::parse_asok_metrics(
- std::string &counter_dump_response, std::string &counter_schema_response,
- int64_t prio_limit, const std::string &daemon_name) {
- try {
- json_object counter_dump =
- boost::json::parse(counter_dump_response).as_object();
- json_object counter_schema =
- boost::json::parse(counter_schema_response).as_object();
-
- for (auto &perf_group_item : counter_schema) {
- std::string perf_group = {perf_group_item.key().begin(),
- perf_group_item.key().end()};
- json_array perf_group_schema_array = perf_group_item.value().as_array();
- json_array perf_group_dump_array = counter_dump[perf_group].as_array();
- for (auto schema_itr = perf_group_schema_array.begin(),
- dump_itr = perf_group_dump_array.begin();
- schema_itr != perf_group_schema_array.end() &&
- dump_itr != perf_group_dump_array.end();
- ++schema_itr, ++dump_itr) {
- try {
- auto counters = schema_itr->at("counters").as_object();
- auto counters_labels = schema_itr->at("labels").as_object();
- auto counters_values = dump_itr->at("counters").as_object();
- labels_t labels;
-
- for (auto &label : counters_labels) {
- std::string label_key = {label.key().begin(), label.key().end()};
- labels[label_key] = quote(label.value().as_string().c_str());
- }
- for (auto &counter : counters) {
- try {
- json_object counter_group = counter.value().as_object();
- if (counter_group["priority"].as_int64() < prio_limit) {
- continue;
- }
- std::string counter_name_init = {counter.key().begin(),
- counter.key().end()};
- std::string counter_name = perf_group + "_" + counter_name_init;
- promethize(counter_name);
-
- auto extra_labels = get_extra_labels(daemon_name);
- if (extra_labels.empty()) {
- dout(1) << "Unable to parse instance_id from daemon_name: "
- << daemon_name << dendl;
- continue;
- }
- labels.insert(extra_labels.begin(), extra_labels.end());
-
- // For now this is only required for rgw multi-site metrics
- auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
- if (!multisite_labels_and_name.first.empty()) {
- labels.insert(multisite_labels_and_name.first.begin(),
- multisite_labels_and_name.first.end());
- counter_name = multisite_labels_and_name.second;
- }
- auto perf_values = counters_values.at(counter_name_init);
- dump_asok_metric(counter_group, perf_values, counter_name, labels);
- } catch (const std::exception &e) {
- dout(1) << "Exception in counter processing for " << daemon_name << ": " << e.what() << dendl;
- continue;
- }
- }
- } catch (const std::exception &e) {
- dout(1) << "Exception in schema/dump iteration for " << daemon_name << ": " << e.what() << dendl;
- continue;
- }
- }
- }
- } catch (const std::exception &e) {
- dout(1) << "Exception in parse_asok_metrics for " << daemon_name << ": " << e.what() << dendl;
- return;
- }
-}
-
-/*
-perf_values can be either a int/double or a json_object. Since
- json_value is a wrapper of both we use that class.
- */
-void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
- json_value perf_values,
- std::string name,
- labels_t labels) {
- try {
- if (!perf_info.if_contains("type") ||
- !perf_info.if_contains("metric_type") ||
- !perf_info.if_contains("description")) {
- dout(1) << "Missing required key in perf_info for metric: " << name << dendl;
- return;
- }
- int64_t type = perf_info["type"].as_int64();
-
- if (!perf_info["metric_type"].is_string()) {
- dout(1) << "Missing or invalid 'metric_type' in perf_info for metric: " << name << dendl;
- return;
- }
- std::string metric_type =
- boost_string_to_std(perf_info["metric_type"].as_string());
-
- if (!perf_info["description"].is_string()) {
- dout(1) << "Missing or invalid 'description' in perf_info for metric: " << name << dendl;
- return;
- }
- std::string description =
- boost_string_to_std(perf_info["description"].as_string());
-
- if (type & PERFCOUNTER_LONGRUNAVG) {
- if (!perf_values.is_object()) {
- dout(1) << "perf_values is not an object for metric: " << name << dendl;
- return;
- }
- auto perf_obj = perf_values.as_object();
- if (!perf_obj.if_contains("avgcount")) {
- dout(1) << "Missing 'avgcount' in perf_values for metric: " << name << dendl;
- return;
- }
- if (!perf_obj.if_contains("sum")) {
- dout(1) << "Missing 'sum' in perf_values for metric: " << name << dendl;
- return;
- }
- int64_t count = perf_obj["avgcount"].as_int64();
- add_metric(builder, count, name + "_count", description + " Count", "counter",
- labels);
- json_value sum_value = perf_obj["sum"];
- add_double_or_int_metric(builder, sum_value, name + "_sum", description + " Total",
- metric_type, labels);
- } else {
- add_double_or_int_metric(builder, perf_values, name, description,
- metric_type, labels);
- }
- } catch (const std::exception& e) {
- dout(1) << "Exception in dump_asok_metric for metric: " << name << ": " << e.what() << dendl;
- return;
- }
-}
-
-void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter_prio,
- bool sockClientsPing, std::string &dump_response,
- std::string &schema_response,
- bool config_show_response) {
- BlockTimer timer(__FILE__, __FUNCTION__);
-
- std::vector<std::pair<std::string, int>> daemon_pids;
-
- int failures = 0;
- if (sort_metrics) {
- builder =
- std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
- } else {
- builder =
- std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
- }
- auto prio_limit = counter_prio;
- for (auto &[daemon_name, sock_client] : clients) {
- if (sockClientsPing) {
- bool ok;
- sock_client.ping(&ok);
- std::string ceph_daemon_socket_up_desc(
- "Reports the health status of a Ceph daemon, as determined by whether it is able to respond via its admin socket (1 = healthy, 0 = unhealthy).");
- labels_t ceph_daemon_socket_up_labels;
- ceph_daemon_socket_up_labels["hostname"] = quote(ceph_get_hostname());
- ceph_daemon_socket_up_labels["ceph_daemon"] = quote(daemon_name);
- add_metric(builder, static_cast<int>(ok), "ceph_daemon_socket_up", ceph_daemon_socket_up_desc,
- "gauge", ceph_daemon_socket_up_labels);
- if (!ok) {
- failures++;
- continue;
- }
- }
- std::string counter_dump_response = dump_response.size() > 0 ? dump_response :
- asok_request(sock_client, "counter dump", daemon_name);
- if (counter_dump_response.size() == 0) {
- failures++;
- continue;
- }
- std::string counter_schema_response = schema_response.size() > 0 ? schema_response :
- asok_request(sock_client, "counter schema", daemon_name);
- if (counter_schema_response.size() == 0) {
- failures++;
- continue;
- }
-
- try {
- parse_asok_metrics(counter_dump_response, counter_schema_response,
- prio_limit, daemon_name);
-
- std::string config_show = !config_show_response ? "" :
- asok_request(sock_client, "config show", daemon_name);
- if (config_show.size() == 0) {
- failures++;
- continue;
- }
- json_object pid_file_json = boost::json::parse(config_show).as_object();
- std::string pid_path =
- boost_string_to_std(pid_file_json["pid_file"].as_string());
- std::string pid_str = read_file_to_string(pid_path);
- if (!pid_path.size()) {
- dout(1) << "pid path is empty; process metrics won't be fetched for: "
- << daemon_name << dendl;
- }
- if (!pid_str.empty()) {
- daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
- }
- } catch (const std::invalid_argument &e) {
- failures++;
- dout(1) << "failed to handle " << daemon_name << ": " << e.what()
- << dendl;
- continue;
- } catch (const std::runtime_error &e) {
- failures++;
- dout(1) << "failed to parse json for " << daemon_name << ": " << e.what()
- << dendl;
- continue;
- }
- }
- dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
- << clients.size() << " daemons." << dendl;
- // get time spent on this function
- timer.stop();
- std::string scrap_desc(
- "Time spent scraping and transforming perf counters to metrics");
- labels_t scrap_labels;
- scrap_labels["host"] = quote(ceph_get_hostname());
- scrap_labels["function"] = quote(__FUNCTION__);
- add_metric(builder, timer.get_ms(), "ceph_exporter_scrape_time", scrap_desc,
- "gauge", scrap_labels);
-
- const std::lock_guard<std::mutex> lock(metrics_mutex);
- // only get metrics if there's pid path for some or all daemons isn't empty
- if (daemon_pids.size() != 0) {
- get_process_metrics(daemon_pids);
- }
- metrics = builder->dump();
-}
-
-std::vector<std::string> read_proc_stat_file(std::string path) {
- std::string stat = read_file_to_string(path);
- std::vector<std::string> strings;
- auto parts = ceph::split(stat);
- strings.assign(parts.begin(), parts.end());
- return strings;
-}
-
-struct pstat read_pid_stat(int pid) {
- std::string stat_path("/proc/" + std::to_string(pid) + "/stat");
- std::vector<std::string> stats = read_proc_stat_file(stat_path);
- struct pstat stat;
- stat.minflt = std::stoul(stats[9]);
- stat.majflt = std::stoul(stats[11]);
- stat.utime = std::stoul(stats[13]);
- stat.stime = std::stoul(stats[14]);
- stat.num_threads = std::stoul(stats[19]);
- stat.start_time = std::stoul(stats[21]);
- stat.vm_size = std::stoul(stats[22]);
- stat.resident_size = std::stoi(stats[23]);
- return stat;
-}
-
-void DaemonMetricCollector::get_process_metrics(
- std::vector<std::pair<std::string, int>> daemon_pids) {
- std::string path("/proc");
- std::stringstream ss;
- for (auto &[daemon_name, pid] : daemon_pids) {
- std::vector<std::string> uptimes = read_proc_stat_file("/proc/uptime");
- struct pstat stat = read_pid_stat(pid);
- int clk_tck = sysconf(_SC_CLK_TCK);
- double start_time_seconds = stat.start_time / (double)clk_tck;
- double user_time = stat.utime / (double)clk_tck;
- double kernel_time = stat.stime / (double)clk_tck;
- double total_time_seconds = user_time + kernel_time;
- double uptime = std::stod(uptimes[0]);
- double elapsed_time = uptime - start_time_seconds;
- double idle_time = elapsed_time - total_time_seconds;
- double usage = total_time_seconds * 100 / elapsed_time;
-
- labels_t labels;
- labels["ceph_daemon"] = quote(daemon_name);
- add_metric(builder, stat.minflt, "ceph_exporter_minflt_total",
- "Number of minor page faults of daemon", "counter", labels);
- add_metric(builder, stat.majflt, "ceph_exporter_majflt_total",
- "Number of major page faults of daemon", "counter", labels);
- add_metric(builder, stat.num_threads, "ceph_exporter_num_threads",
- "Number of threads used by daemon", "gauge", labels);
- add_metric(builder, usage, "ceph_exporter_cpu_usage",
- "CPU usage of a daemon", "gauge", labels);
-
- std::string cpu_time_desc = "Process time in kernel/user/idle mode";
- labels_t cpu_total_labels;
- cpu_total_labels["ceph_daemon"] = quote(daemon_name);
- cpu_total_labels["mode"] = quote("kernel");
- add_metric(builder, kernel_time, "ceph_exporter_cpu_total", cpu_time_desc,
- "counter", cpu_total_labels);
- cpu_total_labels["mode"] = quote("user");
- add_metric(builder, user_time, "ceph_exporter_cpu_total", cpu_time_desc,
- "counter", cpu_total_labels);
- cpu_total_labels["mode"] = quote("idle");
- add_metric(builder, idle_time, "ceph_exporter_cpu_total", cpu_time_desc,
- "counter", cpu_total_labels);
- add_metric(builder, stat.vm_size, "ceph_exporter_vm_size",
- "Virtual memory used in a daemon", "gauge", labels);
- add_metric(builder, stat.resident_size, "ceph_exporter_resident_size",
- "Resident memory in a daemon", "gauge", labels);
- }
-}
-
-std::string DaemonMetricCollector::asok_request(AdminSocketClient &asok,
- std::string command,
- std::string daemon_name) {
- std::string request("{\"prefix\": \"" + command + "\"}");
- std::string response;
- std::string err = asok.do_request(request, &response);
- if (err.length() > 0 || response.substr(0, 5) == "ERROR") {
- dout(1) << "command " << command << "failed for daemon " << daemon_name
- << "with error: " << err << dendl;
- return "";
- }
- return response;
-}
-
-labels_t DaemonMetricCollector::get_extra_labels(std::string daemon_name) {
- labels_t labels;
- const std::string ceph_daemon_prefix = "ceph-";
- const std::string ceph_client_prefix = "client.";
- if (daemon_name.rfind(ceph_daemon_prefix, 0) == 0) {
- daemon_name = daemon_name.substr(ceph_daemon_prefix.size());
- }
- if (daemon_name.rfind(ceph_client_prefix, 0) == 0) {
- daemon_name = daemon_name.substr(ceph_client_prefix.size());
- }
- // In vstart cluster socket files for rgw are stored as radosgw.<instance_id>.asok
- if (daemon_name.find("radosgw") != std::string::npos) {
- std::size_t pos = daemon_name.find_last_of('.');
- std::string tmp = daemon_name.substr(pos+1);
- labels["instance_id"] = quote(tmp);
- }
- else if (daemon_name.find("rgw") != std::string::npos) {
- // fetch intance_id for e.g. "hrgsea" from daemon_name=rgw.foo.ceph-node-00.hrgsea.2.94739968030880
- std::vector<std::string> elems;
- std::stringstream ss;
- ss.str(daemon_name);
- std::string item;
- while (std::getline(ss, item, '.')) {
- elems.push_back(item);
- }
- if (elems.size() >= 4) {
- labels["instance_id"] = quote(elems[3]);
- } else {
- return labels_t();
- }
- } else {
- labels.insert({"ceph_daemon", quote(daemon_name)});
- }
- return labels;
-}
-
-// Add fixed name metrics from existing ones that have details in their names
-// that should be in labels (not in name). For backward compatibility,
-// a new fixed name metric is created (instead of replacing)and details are put
-// in new labels. Intended for RGW sync perf. counters but extendable as required.
-// See: https://tracker.ceph.com/issues/45311
-std::pair<labels_t, std::string>
-DaemonMetricCollector::add_fixed_name_metrics(std::string metric_name) {
- std::string new_metric_name;
- labels_t labels;
- new_metric_name = metric_name;
-
- std::regex re("data_sync_from_([^_]*)");
- std::smatch match;
- if (std::regex_search(metric_name, match, re)) {
- new_metric_name = std::regex_replace(metric_name, re, "data_sync_from_zone");
- labels["source_zone"] = quote(match.str(1));
- return {labels, new_metric_name};
- }
-
- return {};
-}
-
-void DaemonMetricCollector::update_sockets() {
- std::string sock_dir = g_conf().get_val<std::string>("exporter_sock_dir");
- clients.clear();
- std::filesystem::path sock_path = sock_dir;
- if (!std::filesystem::is_directory(sock_path.parent_path())) {
- dout(1) << "ERROR: No such directory exist" << sock_dir << dendl;
- return;
- }
- for (const auto &entry : std::filesystem::directory_iterator(sock_dir)) {
- if (entry.path().extension() == ".asok") {
- std::string daemon_socket_name = entry.path().filename().string();
- std::string daemon_name =
- daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
- if (clients.find(daemon_name) == clients.end() &&
- !(daemon_name.find("mgr") != std::string::npos) &&
- !(daemon_name.find("ceph-exporter") != std::string::npos)) {
- AdminSocketClient sock(entry.path().string());
- clients.insert({daemon_name, std::move(sock)});
- }
- }
- }
-}
-
-void OrderedMetricsBuilder::add(std::string value, std::string name,
- std::string description, std::string mtype,
- labels_t labels) {
- if (metrics.find(name) == metrics.end()) {
- Metric metric(name, mtype, description);
- metrics[name] = std::move(metric);
- }
- Metric &metric = metrics[name];
- metric.add(labels, value);
-}
-
-std::string OrderedMetricsBuilder::dump() {
- for (auto &[name, metric] : metrics) {
- out += metric.dump() + "\n";
- }
- return out;
-}
-
-void UnorderedMetricsBuilder::add(std::string value, std::string name,
- std::string description, std::string mtype,
- labels_t labels) {
- Metric metric(name, mtype, description);
- metric.add(labels, value);
- out += metric.dump() + "\n\n";
-}
-
-std::string UnorderedMetricsBuilder::dump() { return out; }
-
-void Metric::add(labels_t labels, std::string value) {
- metric_entry entry;
- entry.labels = labels;
- entry.value = value;
- entries.push_back(entry);
-}
-
-std::string Metric::dump() {
- std::stringstream metric_ss;
- metric_ss << "# HELP " << name << " " << description << "\n";
- metric_ss << "# TYPE " << name << " " << mtype << "\n";
- for (auto &entry : entries) {
- std::stringstream labels_ss;
- size_t i = 0;
- for (auto &[label_name, label_value] : entry.labels) {
- labels_ss << label_name << "=" << label_value;
- if (i < entry.labels.size() - 1) {
- labels_ss << ",";
- }
- i++;
- }
- metric_ss << name << "{" << labels_ss.str() << "} " << entry.value;
- if (&entry != &entries.back()) {
- metric_ss << "\n";
- }
- }
- return metric_ss.str();
-}
-
-DaemonMetricCollector &collector_instance() {
- static DaemonMetricCollector instance;
- return instance;
-}
-
diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h
deleted file mode 100644
index 5831a0fa3..000000000
--- a/src/exporter/DaemonMetricCollector.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#pragma once
-
-#include "common/admin_socket_client.h"
-#include <atomic>
-#include <map>
-#include <string>
-#include <vector>
-
-#include <boost/asio/steady_timer.hpp>
-#include <boost/thread.hpp>
-#include <boost/json/object.hpp>
-#include <filesystem>
-#include <map>
-#include <string>
-#include <vector>
-
-
-struct pstat {
- unsigned long utime;
- unsigned long stime;
- unsigned long minflt;
- unsigned long majflt;
- unsigned long start_time;
- int num_threads;
- unsigned long vm_size;
- int resident_size;
-};
-
-class MetricsBuilder;
-class OrderedMetricsBuilder;
-class UnorderedMetricsBuilder;
-class Metric;
-
-typedef std::map<std::string, std::string> labels_t;
-
-class DaemonMetricCollector {
-public:
- void main();
- std::string get_metrics();
- labels_t get_extra_labels(std::string daemon_name);
- void dump_asok_metrics(bool sort_metrics, int64_t counter_prio,
- bool sockClientsPing, std::string &dump_response,
- std::string &schema_response,
- bool config_show_response);
- std::map<std::string, AdminSocketClient> clients;
- std::string metrics;
- std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
- void update_sockets();
- void shutdown();
-
-private:
- std::mutex metrics_mutex;
- std::unique_ptr<MetricsBuilder> builder;
- boost::asio::io_context io;
- boost::asio::steady_timer timer{io};
- std::atomic<bool> shutdown_flag{false};
-
- void request_loop();
-
- void dump_asok_metric(boost::json::object perf_info,
- boost::json::value perf_values, std::string name,
- labels_t labels);
- void parse_asok_metrics(std::string &counter_dump_response,
- std::string &counter_schema_response,
- int64_t prio_limit, const std::string &daemon_name);
- void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
- std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);
-};
-
-class Metric {
-private:
- struct metric_entry {
- labels_t labels;
- std::string value;
- };
- std::string name;
- std::string mtype;
- std::string description;
- std::vector<metric_entry> entries;
-
-public:
- Metric(std::string name, std::string mtype, std::string description)
- : name(name), mtype(mtype), description(description) {}
- Metric(const Metric &) = default;
- Metric() = default;
- void add(labels_t labels, std::string value);
- std::string dump();
-};
-
-class MetricsBuilder {
-public:
- virtual ~MetricsBuilder() = default;
- virtual std::string dump() = 0;
- virtual void add(std::string value, std::string name, std::string description,
- std::string mtype, labels_t labels) = 0;
-
-protected:
- std::string out;
-};
-
-class OrderedMetricsBuilder : public MetricsBuilder {
-private:
- std::map<std::string, Metric> metrics;
-
-public:
- std::string dump();
- void add(std::string value, std::string name, std::string description,
- std::string mtype, labels_t labels);
-};
-
-class UnorderedMetricsBuilder : public MetricsBuilder {
-public:
- std::string dump();
- void add(std::string value, std::string name, std::string description,
- std::string mtype, labels_t labels);
-};
-
-DaemonMetricCollector &collector_instance();
-
diff --git a/src/exporter/ceph_exporter.cc b/src/exporter/ceph_exporter.cc
deleted file mode 100644
index 2232851c0..000000000
--- a/src/exporter/ceph_exporter.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "common/ceph_argparse.h"
-#include "common/config.h"
-#include "common/debug.h"
-#include "global/global_init.h"
-#include "global/global_context.h"
-#include "global/signal_handler.h"
-#include "exporter/DaemonMetricCollector.h"
-#include "exporter/web_server.h"
-#include <boost/thread/thread.hpp>
-#include <iostream>
-#include <map>
-#include <string>
-#include <atomic>
-#include <chrono>
-#include <thread>
-
-#define dout_context g_ceph_context
-#define dout_subsys ceph_subsys_ceph_exporter
-
-DaemonMetricCollector &collector = collector_instance();
-
-static void handle_signal(int signum)
-{
- ceph_assert(signum == SIGINT || signum == SIGTERM);
- derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
- // Finish the DaemonMetricCollector
- collector.shutdown();
-}
-
-static void usage() {
- std::cout << "usage: ceph-exporter [options]\n"
- << "options:\n"
- " --sock-dir: The path to Ceph daemon sockets (*.asok)\n"
- " --addrs: Host IP address on which the exporter is to listen\n"
- " --port: TCP Port on which the exporter is to listen. Default is 9926\n"
- " --cert-file: Path to the certificate file when using HTTPS\n"
- " --key-file: Path to the certificate key file when using HTTPS\n"
- " --prio-limit: Only perf counters greater than or equal to prio-limit are fetched. Default: 5\n"
- " --stats-period: Interval between daemon scrapes (seconds). Default: 5s"
- << std::endl;
- generic_server_usage();
-}
-
-int main(int argc, char **argv) {
- auto args = argv_to_vec(argc, argv);
- if (args.empty()) {
- std::cerr << argv[0] << ": -h or --help for usage" << std::endl;
- exit(1);
- }
- if (ceph_argparse_need_usage(args)) {
- usage();
- exit(0);
- }
-
- auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
- CODE_ENVIRONMENT_DAEMON, 0);
- std::string val;
- for (auto i = args.begin(); i != args.end();) {
- if (ceph_argparse_double_dash(args, i)) {
- break;
- } else if (ceph_argparse_witharg(args, i, &val, "--sock-dir", (char *)NULL)) {
- cct->_conf.set_val("exporter_sock_dir", val);
- } else if (ceph_argparse_witharg(args, i, &val, "--addrs", (char *)NULL)) {
- cct->_conf.set_val("exporter_addr", val);
- } else if (ceph_argparse_witharg(args, i, &val, "--port", (char *)NULL)) {
- cct->_conf.set_val("exporter_http_port", val);
- } else if (ceph_argparse_witharg(args, i, &val, "--cert-file", (char *)NULL)) {
- cct->_conf.set_val("exporter_cert_file", val);
- } else if (ceph_argparse_witharg(args, i, &val, "--key-file", (char *)NULL)) {
- cct->_conf.set_val("exporter_key_file", val);
- } else if (ceph_argparse_witharg(args, i, &val, "--prio-limit", (char *)NULL)) {
- cct->_conf.set_val("exporter_prio_limit", val);
- } else if (ceph_argparse_witharg(args, i, &val, "--stats-period", (char *)NULL)) {
- cct->_conf.set_val("exporter_stats_period", val);
- } else {
- ++i;
- }
- }
- common_init_finish(g_ceph_context);
-
- // Register signal handlers
- init_async_signal_handler();
- register_async_signal_handler(SIGHUP, sighup_handler);
- register_async_signal_handler_oneshot(SIGINT, handle_signal);
- register_async_signal_handler_oneshot(SIGTERM, handle_signal);
-
- // Start the web server thread
- boost::thread server_thread(web_server_thread_entrypoint);
-
- // Start the DaemonMetricCollector
- collector.main();
-
- // Interrupted. Time to terminate
- unregister_async_signal_handler(SIGHUP, sighup_handler);
- unregister_async_signal_handler(SIGINT, handle_signal);
- unregister_async_signal_handler(SIGTERM, handle_signal);
- shutdown_async_signal_handler();
-
- // Stop the web server thread by interrupting it
- stop_web_server();
- server_thread.interrupt(); // Interrupt the web server thread
- server_thread.join();
-
- dout(1) << "Ceph exporter stopped" << dendl;
-
- return 0;
-}
diff --git a/src/exporter/util.cc b/src/exporter/util.cc
deleted file mode 100644
index 451867be2..000000000
--- a/src/exporter/util.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "util.h"
-
-#include <boost/algorithm/string/classification.hpp>
-#include <boost/algorithm/string/replace.hpp>
-#include <cctype>
-#include <chrono>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-
-#include "common/debug.h"
-
-#define dout_context g_ceph_context
-#define dout_subsys ceph_subsys_ceph_exporter
-
-BlockTimer::BlockTimer(std::string_view file, std::string_view function)
- : file(file),
- function(function) {
- t1 = clock_t::now();
-}
-BlockTimer::~BlockTimer() {
- dout(20) << file << ":" << function << ": " << get_ms() << "ms" << dendl;
-}
-
-// useful with stop
-double BlockTimer::get_ms() const {
- using milliseconds_t = std::chrono::duration<double, std::milli>;
- return std::chrono::duration_cast<milliseconds_t>(t2 - t1).count();
-}
-
-// Manually stop the timer as you might want to get the time
-void BlockTimer::stop() {
- if (!stopped) {
- stopped = true;
- t2 = clock_t::now();
- }
-}
-
-std::string read_file_to_string(std::string path) {
- std::ifstream is(path);
- std::stringstream buffer;
- buffer << is.rdbuf();
- return buffer.str();
-}
-
-// Must be kept in sync with promethize() in src/pybind/mgr/prometheus/module.py
-void promethize(std::string &name) {
- if (name[name.size() - 1] == '-') {
- name[name.size() - 1] = '_';
- name += "minus";
- }
-
- auto should_be_underscore = [](char ch) {
- return ch == '.' || ch == '/' || ch == ' ' || ch == '-';
- };
- std::replace_if(name.begin(), name.end(), should_be_underscore, '_');
-
- boost::replace_all(name, "::", "_");
- boost::replace_all(name, "+", "_plus");
-
- name = "ceph_" + name;
-}
diff --git a/src/exporter/util.h b/src/exporter/util.h
deleted file mode 100644
index cd5448812..000000000
--- a/src/exporter/util.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "common/hostname.h"
-#include <chrono>
-#include <string_view>
-
-class BlockTimer {
- public:
- BlockTimer(std::string_view file, std::string_view function);
- ~BlockTimer();
- void stop();
- double get_ms() const;
- private:
- const std::string_view file;
- const std::string_view function;
- bool stopped = false;
- using clock_t = std::chrono::steady_clock;
- clock_t::time_point t1;
- clock_t::time_point t2;
-};
-
-std::string read_file_to_string(std::string path);
-
-void promethize(std::string &name);
diff --git a/src/exporter/web_server.cc b/src/exporter/web_server.cc
deleted file mode 100644
index c01205f26..000000000
--- a/src/exporter/web_server.cc
+++ /dev/null
@@ -1,287 +0,0 @@
-#include "web_server.h"
-#include "common/debug.h"
-#include "common/hostname.h"
-#include "global/global_init.h"
-#include "global/global_context.h"
-#include "exporter/DaemonMetricCollector.h"
-
-#include <boost/asio/ip/tcp.hpp>
-#include <boost/asio/ssl.hpp> // SSL/TLS
-#include <boost/beast/core.hpp>
-#include <boost/beast/http.hpp>
-#include <boost/beast/version.hpp>
-#include <boost/thread/thread.hpp>
-#include <chrono>
-#include <cstdlib>
-#include <ctime>
-#include <iostream>
-#include <map>
-#include <memory>
-#include <string>
-
-#define dout_context g_ceph_context
-#define dout_subsys ceph_subsys_ceph_exporter
-
-namespace beast = boost::beast; // from <boost/beast.hpp>
-namespace http = beast::http; // from <boost/beast/http.hpp>
-namespace net = boost::asio; // from <boost/asio.hpp>
-namespace ssl = boost::asio::ssl; // from <boost/asio/ssl.hpp>
-using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
-
-//common io context for the web servers
-std::shared_ptr<net::io_context> global_ioc;
-
-// Base class for common functionality
-class web_connection {
-public:
- virtual ~web_connection() = default;
- virtual void start() = 0; // Pure virtual function to start the connection
-
-protected:
- beast::flat_buffer buffer_{8192};
- http::request<http::dynamic_body> request_;
- http::response<http::string_body> response_;
- net::steady_timer deadline_;
-
- web_connection(net::any_io_executor executor, std::chrono::seconds timeout)
- : deadline_(executor, timeout) {}
-
- // Common request processing logic
- void process_request() {
- response_.version(request_.version());
- response_.keep_alive(request_.keep_alive());
-
- switch (request_.method()) {
- case http::verb::get:
- response_.result(http::status::ok);
- create_response();
- break;
-
- default:
- response_.result(http::status::method_not_allowed);
- response_.set(http::field::content_type, "text/plain");
- std::string body("Invalid request-method '" + std::string(request_.method_string()) + "'\n");
- response_.body() = body;
- break;
- }
- write_response();
- }
-
- // Construct a response message based on the request target
- void create_response() {
- if (request_.target() == "/") {
- response_.result(http::status::moved_permanently);
- response_.set(http::field::location, "/metrics");
- } else if (request_.target() == "/metrics") {
- response_.set(http::field::content_type, "text/plain; charset=utf-8");
- DaemonMetricCollector &collector = collector_instance();
- std::string metrics = collector.get_metrics();
- response_.body() = metrics;
- } else {
- response_.result(http::status::method_not_allowed);
- response_.set(http::field::content_type, "text/plain");
- response_.body() = "File not found \n";
- }
- }
-
- // Asynchronously transmit the response message
- virtual void write_response() = 0;
-
- // Check whether we have spent enough time on this connection
- void check_deadline(std::shared_ptr<web_connection> self) {
- deadline_.async_wait([self](beast::error_code ec) {
- if (!ec) {
- self->close_connection(ec);
- }
- });
- }
-
- // Bad requests error mgmt (http req->https srv and https req ->http srv)
- void handle_bad_request(beast::error_code ec) {
- response_.version(request_.version());
- response_.keep_alive(request_.keep_alive());
- response_.result(http::status::method_not_allowed);
- response_.set(http::field::content_type, "text/plain");
- std::string body = "Ceph exporter.\nRequest Error: " + ec.message();
- response_.body() = body;
-
- write_response();
- }
-
- virtual void close_connection(beast::error_code& ec) = 0;
-};
-
-// Derived class for HTTP connections
-class http_connection : public web_connection, public std::enable_shared_from_this<http_connection> {
-public:
- explicit http_connection(tcp::socket socket)
- : web_connection(socket.get_executor(), std::chrono::seconds(60)), socket_(std::move(socket)) {}
-
- void start() override {
- read_request(shared_from_this());
- check_deadline(shared_from_this());
- }
-
-private:
- tcp::socket socket_;
-
- void read_request(std::shared_ptr<http_connection> self) {
- http::async_read(socket_, buffer_, request_,
- [self](beast::error_code ec, std::size_t bytes_transferred) {
- boost::ignore_unused(bytes_transferred);
- if (ec) {
- dout(1) << "ERROR: " << ec.message() << dendl;
- self->handle_bad_request(ec);
- return;
- }
- self->process_request();
- });
- }
-
- void write_response() override {
- auto self = shared_from_this();
- response_.prepare_payload();
- http::async_write(socket_, response_,
- [self](beast::error_code ec, std::size_t) {
- self->socket_.shutdown(tcp::socket::shutdown_send, ec);
- self->deadline_.cancel();
- if (ec) {
- dout(1) << "ERROR: " << ec.message() << dendl;
- return;
- }
- });
- }
-
- void close_connection(beast::error_code& ec) override {
- socket_.close(ec);
- }
-};
-
-// Derived class for HTTPS connections
-class https_connection : public web_connection, public std::enable_shared_from_this<https_connection> {
-public:
- explicit https_connection(ssl::stream<tcp::socket> socket)
- : web_connection(socket.get_executor(), std::chrono::seconds(60)), socket_(std::move(socket)) {}
-
- void start() override {
- auto self = shared_from_this();
- socket_.async_handshake(ssl::stream_base::server,
- [self](beast::error_code ec) {
- if (!ec) {
- self->read_request(self);
- } else {
- dout(1) << "ERROR: SSL Handshake failed: " << ec.message() << dendl;
- self->handle_bad_request(ec);
- }
- });
- check_deadline(self);
- }
-
-private:
- ssl::stream<tcp::socket> socket_;
-
- void read_request(std::shared_ptr<https_connection> self) {
- http::async_read(socket_, buffer_, request_,
- [self](beast::error_code ec, std::size_t bytes_transferred) {
- boost::ignore_unused(bytes_transferred);
- if (ec) {
- dout(1) << "ERROR: " << ec.message() << dendl;
- return;
- }
- self->process_request();
- });
- }
-
- void write_response() override {
- auto self = shared_from_this();
- response_.prepare_payload();
- http::async_write(socket_, response_,
- [self](beast::error_code ec, std::size_t) {
- self->socket_.async_shutdown([self](beast::error_code ec) {
- self->deadline_.cancel();
- if (ec) {
- dout(1) << "ERROR: " << ec.message() << dendl;
- }
- });
- });
- }
-
- void close_connection(beast::error_code& ec) override {
- socket_.lowest_layer().close(ec);
- }
-
-};
-
-void http_server(tcp::acceptor &acceptor, tcp::socket &socket) {
- acceptor.async_accept(socket, [&](beast::error_code ec) {
- if (!ec) {
- std::make_shared<http_connection>(std::move(socket))->start();
- }
- http_server(acceptor, socket);
- });
-}
-
-void https_server(tcp::acceptor &acceptor, ssl::context &ssl_ctx) {
- acceptor.async_accept([&](beast::error_code ec, tcp::socket socket) {
- if (!ec) {
- std::make_shared<https_connection>(ssl::stream<tcp::socket>(std::move(socket), ssl_ctx))->start();
- }
- https_server(acceptor, ssl_ctx);
- });
-}
-
-void run_http_server(const std::string& exporter_addr, short unsigned int port) {
- tcp::acceptor acceptor{*global_ioc, {net::ip::make_address(exporter_addr), port}};
- tcp::socket socket{*global_ioc};
-
- http_server(acceptor, socket);
-
- dout(1) << "HTTP server running on " << exporter_addr << ":" << port << dendl;
- global_ioc->run();
-}
-
-void run_https_server(const std::string& exporter_addr, short unsigned int port, const std::string& cert_file, const std::string& key_file) {
- ssl::context ssl_ctx(ssl::context::tlsv13);
-
- ssl_ctx.use_certificate_chain_file(cert_file);
- ssl_ctx.use_private_key_file(key_file, ssl::context::pem);
-
- tcp::acceptor acceptor{*global_ioc, {net::ip::make_address(exporter_addr), port}};
- https_server(acceptor, ssl_ctx);
-
- dout(1) << "HTTPS server running on " << exporter_addr << ":" << port << dendl;
- global_ioc->run();
-}
-
-void stop_web_server() {
- if (global_ioc) {
- global_ioc->stop();
- dout(1) << "Ceph exporter web server stopped" << dendl;
- }
-}
-
-void web_server_thread_entrypoint() {
- try {
- std::string exporter_addr = g_conf().get_val<std::string>("exporter_addr");
- short unsigned int port = g_conf().get_val<int64_t>("exporter_http_port");
- std::string cert_file = g_conf().get_val<std::string>("exporter_cert_file");
- std::string key_file = g_conf().get_val<std::string>("exporter_key_file");
-
- // Initialize global_ioc
- global_ioc = std::make_shared<net::io_context>(1);
-
- if (cert_file.empty() && key_file.empty()) {
- run_http_server(exporter_addr, port);
- } else {
- try {
- run_https_server(exporter_addr, port, cert_file, key_file);
- } catch (const std::exception &e) {
- derr << "Failed to start HTTPS server: " << e.what() << dendl;
- exit(EXIT_FAILURE);
- }
- }
- } catch (std::exception const &e) {
- derr << "Error: " << e.what() << dendl;
- exit(EXIT_FAILURE);
- }
-}
diff --git a/src/exporter/web_server.h b/src/exporter/web_server.h
deleted file mode 100644
index c6d4c54ec..000000000
--- a/src/exporter/web_server.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include <string>
-
-void web_server_thread_entrypoint();
-void stop_web_server();
diff --git a/systemd/CMakeLists.txt b/systemd/CMakeLists.txt
index 366bab419..ad75ce61c 100644
--- a/systemd/CMakeLists.txt
+++ b/systemd/CMakeLists.txt
@@ -14,7 +14,6 @@ set(CEPH_SYSTEMD_ENV_DIR "/etc/sysconfig"
set(SYSTEMD_ENV_FILE "${CEPH_SYSTEMD_ENV_DIR}/ceph")
foreach(service
ceph-crash
- ceph-exporter
ceph-fuse@
ceph-mds@
ceph-mgr@
diff --git a/systemd/ceph-exporter.service.in b/systemd/ceph-exporter.service.in
deleted file mode 100644
index ce5b3b0a9..000000000
--- a/systemd/ceph-exporter.service.in
+++ /dev/null
@@ -1,29 +0,0 @@
-[Unit]
-Description=Ceph cluster exporter daemon
-PartOf=ceph.target
-After=network-online.target local-fs.target
-Before=ceph.target
-Wants=network-online.target local-fs.target ceph.target ceph-mon.target
-
-[Service]
-ExecReload=/bin/kill -HUP $MAINPID
-ExecStart=@CMAKE_INSTALL_PREFIX@/bin/ceph-exporter -f --id %i --setuser ceph --setgroup ceph
-LockPersonality=true
-NoNewPrivileges=true
-PrivateDevices=yes
-PrivateTmp=true
-ProtectControlGroups=true
-ProtectHome=true
-ProtectHostname=true
-ProtectKernelLogs=true
-ProtectKernelModules=true
-ProtectKernelTunables=true
-ProtectSystem=full
-Restart=on-failure
-RestartSec=10
-RestrictSUIDSGID=true
-StartLimitBurst=3
-StartLimitInterval=30min
-
-[Install]
-WantedBy=multi-user.target ceph.target