├── .gitignore ├── .npmrc ├── .streerc ├── .prettierrc.cjs ├── .template-lintrc.cjs ├── stylelint.config.mjs ├── eslint.config.mjs ├── lib ├── internal_metric │ ├── job.rb │ ├── custom.rb │ ├── base.rb │ ├── process.rb │ ├── web.rb │ └── global.rb ├── collector_demon.rb ├── global_reporter_demon.rb ├── reporter │ ├── web.rb │ ├── global.rb │ └── process.rb ├── job_metric_initializer.rb ├── middleware │ └── metrics.rb └── collector.rb ├── spec ├── system │ └── core_features_spec.rb ├── support │ └── null_metric.rb ├── lib │ ├── internal_metric │ │ ├── custom_spec.rb │ │ ├── base_spec.rb │ │ ├── web_spec.rb │ │ └── global_spec.rb │ ├── reporter │ │ ├── global_spec.rb │ │ └── process_spec.rb │ └── collector_spec.rb ├── job_metric_initializer_spec.rb └── middleware │ └── metrics_spec.rb ├── .github └── workflows │ └── discourse-plugin.yml ├── Gemfile ├── .rubocop.yml ├── .discourse-compatibility ├── package.json ├── README.md ├── LICENSE ├── app └── jobs │ └── scheduled │ └── update_stats.rb ├── bin └── collector ├── Gemfile.lock └── plugin.rb /.gitignore: -------------------------------------------------------------------------------- 1 | gems/* 2 | *.swn 3 | /node_modules 4 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict = true 2 | auto-install-peers = false 3 | -------------------------------------------------------------------------------- /.streerc: -------------------------------------------------------------------------------- 1 | --print-width=100 2 | --plugins=plugin/trailing_comma 3 | -------------------------------------------------------------------------------- /.prettierrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = require("@discourse/lint-configs/prettier"); 2 | -------------------------------------------------------------------------------- /.template-lintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = require("@discourse/lint-configs/template-lint"); 2 | -------------------------------------------------------------------------------- /stylelint.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | extends: ["@discourse/lint-configs/stylelint"], 3 | }; 4 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import DiscourseRecommended from "@discourse/lint-configs/eslint"; 2 | 3 | export default [...DiscourseRecommended]; 4 | -------------------------------------------------------------------------------- /lib/internal_metric/job.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus::InternalMetric 4 | class Job < Base 5 | attribute :job_name, :scheduled, :duration, :count, :success 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /spec/system/core_features_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe "Core features", type: :system do 4 | before { enable_current_plugin } 5 | 6 | it_behaves_like "having working core features" 7 | end 8 | -------------------------------------------------------------------------------- /.github/workflows/discourse-plugin.yml: -------------------------------------------------------------------------------- 1 | name: Discourse Plugin 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | ci: 11 | uses: discourse/.github/.github/workflows/discourse-plugin.yml@v1 12 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | group :development do 6 | gem "translations-manager", git: "https://github.com/discourse/translations-manager.git" 7 | gem "rubocop-discourse" 8 | gem "syntax_tree" 9 | end 10 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | inherit_gem: 2 | rubocop-discourse: stree-compat.yml 3 | 4 | inherit_mode: 5 | merge: 6 | - Exclude 7 | 8 | AllCops: 9 | Exclude: 10 | - gems/**/* 11 | 12 | Style/GlobalVars: 13 | AllowedVariables: [$prometheus_client, $parent_pid, $port, $pid_file] 14 | 15 | Discourse/Plugins/NamespaceMethods: 16 | Exclude: 17 | - bin/**/* 18 | -------------------------------------------------------------------------------- /spec/support/null_metric.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus 4 | class NullMetric < InternalMetric::Custom 5 | attribute :name, :labels, :description, :value, :type 6 | 7 | def initialize 8 | @name = "null_metric" 9 | @description = "Testing" 10 | @type = "Gauge" 11 | end 12 | 13 | def collect 14 | @value = 1 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /.discourse-compatibility: -------------------------------------------------------------------------------- 1 | < 3.6.0.beta1-dev: a1e0ba671e13ceb9541a4d62d3ff7d206393d438 2 | < 3.5.0.beta1-dev: f46906e1d555f6838d74ea38d5037264cc1020b0 3 | < 3.4.0.beta1-dev: b1b899ca995783ef5eba90c35dbfc120a2949c38 4 | < 3.3.0.beta1-dev: 831dba15659055361966e0c42e6b517b3d7b133b 5 | 3.1.999: 8a7a46a80cc65aa0839bc5e3c3b6f8ef6544089f 6 | 2.9.2.beta3: 72fff206ba18ad5ca3112fed2f5f0ce6a17ca6f8 7 | 2.7.0.beta6: b3f511e1d2bcb7cedb0bf6c582efbda0a5488bd3 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "private": true, 3 | "devDependencies": { 4 | "@discourse/lint-configs": "2.32.0", 5 | "ember-template-lint": "7.9.1", 6 | "eslint": "9.37.0", 7 | "prettier": "3.6.2", 8 | "stylelint": "16.25.0" 9 | }, 10 | "engines": { 11 | "node": ">= 22", 12 | "npm": "please-use-pnpm", 13 | "yarn": "please-use-pnpm", 14 | "pnpm": "9.x" 15 | }, 16 | "packageManager": "pnpm@9.15.5" 17 | } 18 | -------------------------------------------------------------------------------- /lib/internal_metric/custom.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus::InternalMetric 4 | class Custom < Base 5 | attribute :name, :labels, :description, :value, :type 6 | 7 | def self.create_gauge_hash(name, description, value) 8 | metric = DiscoursePrometheus::InternalMetric::Custom.new 9 | metric.type = "Gauge" 10 | metric.name = name 11 | metric.description = description 12 | metric.value = value 13 | metric.to_h 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Official Prometheus Exporter Plugin for Discourse 2 | 3 | The Discourse Prometheus plugin collects key metrics from Discourse and exposes them in the `/metrics` path so prometheus can consume them. 4 | 5 | ## Adding custom global collectors 6 | 7 | The global reporter can pick custom metrics added by other Discourse plugins. The metric needs to define a collect method, and the `name`, `labels`, `description`, `value`, and `type` attributes. See an example [here](https://github.com/discourse/discourse-antivirus/pull/15). 8 | 9 | For more information, please see: https://meta.discourse.org/t/prometheus-exporter-plugin-for-discourse/72666 10 | -------------------------------------------------------------------------------- /lib/collector_demon.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # 3 | require_dependency "demon/base" 4 | 5 | class DiscoursePrometheus::CollectorDemon < ::Demon::Base 6 | def self.prefix 7 | "prometheus-collector" 8 | end 9 | 10 | def run 11 | @pid = 12 | fork do 13 | collector = File.expand_path("../../bin/collector", __FILE__) 14 | 15 | ENV["RUBY_GLOBAL_METHOD_CACHE_SIZE"] = "2048" 16 | ENV["RUBY_GC_HEAP_INIT_SLOTS"] = "10000" 17 | ENV["PROMETHEUS_EXPORTER_VERSION"] = PrometheusExporter::VERSION 18 | 19 | exec collector, 20 | GlobalSetting.prometheus_collector_port.to_s, 21 | GlobalSetting.prometheus_webserver_bind, 22 | parent_pid.to_s, 23 | pid_file 24 | end 25 | 26 | Process.detach(@pid) 27 | 28 | write_pid_file 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/global_reporter_demon.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # 3 | require_dependency "demon/base" 4 | 5 | class DiscoursePrometheus::GlobalReporterDemon < ::Demon::Base 6 | def self.prefix 7 | "prometheus-global-reporter" 8 | end 9 | 10 | def suppress_stdout 11 | false 12 | end 13 | 14 | def suppress_stderr 15 | false 16 | end 17 | 18 | def after_fork 19 | @logger.info("Starting Prometheus global reporter pid: #{Process.pid}") 20 | t = DiscoursePrometheus::Reporter::Global.start($prometheus_client) 21 | 22 | trap("INT") { DiscoursePrometheus::Reporter::Global.stop } 23 | trap("TERM") { DiscoursePrometheus::Reporter::Global.stop } 24 | trap("QUIT") { DiscoursePrometheus::Reporter::Global.stop } 25 | 26 | t.join 27 | @logger.info("Stopping Prometheus global reporter pid: #{Process.pid}") 28 | exit 0 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /spec/lib/internal_metric/custom_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::InternalMetric::Custom do 4 | let(:result_hash) do 5 | { 6 | name: "post_count", 7 | labels: nil, 8 | description: "Total number of posts", 9 | type: "Gauge", 10 | value: 120, 11 | _type: "Custom", 12 | } 13 | end 14 | 15 | it "creates hash for Custom metric" do 16 | metric = described_class.new 17 | metric.name = "post_count" 18 | metric.description = "Total number of posts" 19 | metric.type = "Gauge" 20 | metric.value = 120 21 | 22 | expect(metric.to_h).to eq(result_hash) 23 | end 24 | 25 | it "creates hash for Custom gauge type metric using class method" do 26 | hash = described_class.create_gauge_hash("post_count", "Total number of posts", 120) 27 | expect(hash).to eq(result_hash) 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/reporter/web.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # 3 | require "middleware/request_tracker" 4 | 5 | class DiscoursePrometheus::Reporter::Web 6 | attr_reader :client 7 | 8 | def self.start(client) 9 | instance = self.new(client) 10 | Middleware::RequestTracker.register_detailed_request_logger( 11 | lambda { |env, data| instance.report(env, data) }, 12 | ) 13 | end 14 | 15 | def initialize(client) 16 | @client = client 17 | end 18 | 19 | def report(env, data) 20 | # CAREFUL, we don't want to hoist env into Scheduler::Defer 21 | # hence the extra method call 22 | host = RailsMultisite::ConnectionManagement.host(env) 23 | log_prom_later(::DiscoursePrometheus::InternalMetric::Web.from_env_data(env, data, host)) 24 | end 25 | 26 | def log_prom_later(metric) 27 | Scheduler::Defer.later("Prom stats", _db = nil) { @client.send_json metric } 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /spec/lib/internal_metric/base_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::InternalMetric::Base do 4 | it "allows #to_h on internal metrics" do 5 | job = DiscoursePrometheus::InternalMetric::Job.new 6 | job.job_name = "bob" 7 | job.scheduled = true 8 | job.duration = 100.1 9 | job.count = 1 10 | job.success = false 11 | 12 | expect(job.to_h).to eq( 13 | job_name: "bob", 14 | scheduled: true, 15 | duration: 100.1, 16 | count: 1, 17 | _type: "Job", 18 | success: false, 19 | ) 20 | end 21 | 22 | it "implements #from_h on internal metrics" do 23 | obj = { job_name: "bill", _type: "Job" } 24 | 25 | job = described_class.from_h(obj) 26 | expect(job.class).to eq(DiscoursePrometheus::InternalMetric::Job) 27 | expect(job.job_name).to eq("bill") 28 | end 29 | 30 | it "implements #from_h with string keys" do 31 | obj = { "job_name" => "bill", "_type" => "Job" } 32 | 33 | job = described_class.from_h(obj) 34 | expect(job.class).to eq(DiscoursePrometheus::InternalMetric::Job) 35 | expect(job.job_name).to eq("bill") 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Civilized Discourse Construction Kit, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /spec/job_metric_initializer_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::JobMetricInitializer do 4 | it "enumerates regular jobs" do 5 | metrics = [] 6 | DiscoursePrometheus::JobMetricInitializer.each_regular_job_metric { |m| metrics << m } 7 | expect(metrics.all? { |m| m.count == 0 }).to eq(true) 8 | expect(metrics.all? { |m| m.duration == 0 }).to eq(true) 9 | expect(metrics.map(&:job_name)).to include("Jobs::RunHeartbeat") 10 | expect(metrics.map(&:job_name)).not_to include("Jobs::Heartbeat") 11 | end 12 | 13 | it "enumerates scheduled jobs" do 14 | # ensure class is loaded (in prod, classes are eager-loaded) 15 | expect(Jobs::Heartbeat).to be_present 16 | 17 | metrics = [] 18 | DiscoursePrometheus::JobMetricInitializer.each_scheduled_job_metric { |m| metrics << m } 19 | expect(metrics.all? { |m| m.count == 0 }).to eq(true) 20 | expect(metrics.all? { |m| m.duration == 0 }).to eq(true) 21 | expect(metrics.all? { |m| m.scheduled == true }).to eq(true) 22 | expect(metrics.map(&:job_name)).to include("Jobs::Heartbeat") 23 | expect(metrics.map(&:job_name)).not_to include("Jobs::RunHeartbeat") 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/internal_metric/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus::InternalMetric 4 | class Base 5 | def self.attribute(*names) 6 | (@attrs ||= []).concat(names) 7 | attr_accessor(*names) 8 | end 9 | 10 | def self.attributes 11 | @attrs 12 | end 13 | 14 | def self.from_h(hash) 15 | klass = 16 | case (hash["_type"] || hash[:_type]) 17 | when "Job" 18 | Job 19 | when "Global" 20 | Global 21 | when "Web" 22 | Web 23 | when "Process" 24 | Process 25 | when "Custom" 26 | Custom 27 | else 28 | raise "class deserialization not implemented" 29 | end 30 | instance = klass.new 31 | instance.from_h(hash) 32 | instance 33 | end 34 | 35 | def from_h(hash) 36 | hash.each do |k, v| 37 | next if k == "_type" || k == :_type 38 | self.send "#{k}=", v 39 | end 40 | self 41 | end 42 | 43 | def to_json(*ignore) 44 | Oj.dump(to_h, mode: :object) 45 | end 46 | 47 | def to_h 48 | hash = Hash[*self.class.attributes.map { |a| [a, send(a)] }.flatten] 49 | 50 | # for perf, this is called alot 51 | type = 52 | case self 53 | when Job 54 | "Job" 55 | when Global 56 | "Global" 57 | when Web 58 | "Web" 59 | when Process 60 | "Process" 61 | when Custom 62 | "Custom" 63 | else 64 | raise "not implemented" 65 | end 66 | 67 | hash[:_type] = type 68 | hash 69 | end 70 | end 71 | end 72 | -------------------------------------------------------------------------------- /app/jobs/scheduled/update_stats.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Jobs 4 | class UpdateStats < ::Jobs::Scheduled 5 | every 1.hour 6 | 7 | def execute(args = {}) 8 | postgres_highest_sequence = DB.query_single(<<~SQL)[0] 9 | WITH columns AS MATERIALIZED ( 10 | SELECT table_name, 11 | column_name, 12 | data_type column_type, 13 | REPLACE(REPLACE(column_default, 'nextval(''', ''), '''::regclass)', '') sequence_name 14 | FROM information_schema.columns 15 | WHERE table_schema = 'public' AND column_default LIKE '%nextval(''%' 16 | ), sequences AS MATERIALIZED ( 17 | SELECT sequencename sequence_name, 18 | data_type::text sequence_type, 19 | COALESCE(last_value, 0) last_value 20 | FROM pg_sequences 21 | ) 22 | SELECT MAX(last_value) 23 | FROM columns 24 | JOIN sequences ON sequences.sequence_name = columns.sequence_name 25 | WHERE columns.column_type = 'integer' OR 26 | -- The column and sequence types should match, but this is just an extra check. 27 | sequences.sequence_type = 'integer' OR 28 | -- The `id` column of these tables is a `bigint`, but the foreign key columns are usually integers. 29 | -- These columns will be migrated in the future. 30 | -- See https://github.com/discourse/discourse/blob/6e1aeb1f504f469ceed189c24d43a7a99b8970c7/spec/rails_helper.rb#L480-L490 31 | table_name IN ('reviewables', 'flags', 'sidebar_sections') 32 | SQL 33 | 34 | Discourse.stats.set("postgres_highest_sequence", postgres_highest_sequence) 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/job_metric_initializer.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class ::DiscoursePrometheus::JobMetricInitializer 4 | def self.initialize_scheduled_job_metrics 5 | each_scheduled_job_metric { |metric| $prometheus_client.send_json metric.to_h } 6 | end 7 | 8 | def self.initialize_regular_job_metrics 9 | each_regular_job_metric { |metric| $prometheus_client.send_json metric.to_h } 10 | end 11 | 12 | def self.each_regular_job_metric 13 | # Enumerate all regular jobs and send a count=0 metric to the collector. This is not perfect - technically 14 | # any class can be passed to Jobs.enqueue. Discourse tends to pass a string to `Jobs.enqueue`, which is then 15 | # looked up from Jobs.constants, so this should cover the vast majority of cases 16 | ::Jobs.constants.each do |const| 17 | job_klass = ::Jobs.const_get(const) 18 | next if job_klass.class != Class 19 | next if job_klass == ::Jobs::Base 20 | 21 | ancestors = job_klass.ancestors 22 | 23 | if ancestors.include?(::Jobs::Base) && !ancestors.include?(::Jobs::Scheduled) && 24 | !ancestors.include?(::Jobs::Onceoff) 25 | metric = DiscoursePrometheus::InternalMetric::Job.new 26 | metric.scheduled = false 27 | metric.duration = 0 28 | metric.count = 0 29 | metric.job_name = job_klass.name 30 | yield metric 31 | end 32 | end 33 | end 34 | 35 | def self.each_scheduled_job_metric 36 | # Enumerate all scheduled jobs and send a count=0 metric to the collector 37 | # to initialize the metric 38 | ::MiniScheduler::Manager.discover_schedules.each do |job_klass| 39 | metric = DiscoursePrometheus::InternalMetric::Job.new 40 | metric.scheduled = true 41 | metric.duration = 0 42 | metric.count = 0 43 | metric.job_name = job_klass.name 44 | yield metric 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /spec/lib/reporter/global_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "../../support/null_metric" 4 | 5 | RSpec.describe DiscoursePrometheus::Reporter::Global do 6 | it "collects gc stats" do 7 | collector = described_class.new(recycle_every: 2) 8 | metric = collector.collect.first 9 | 10 | expect(metric.redis_slave_available[{ type: "main" }]).to eq(0) 11 | 12 | id = metric.object_id 13 | 14 | # test recycling 15 | expect(collector.collect.first.object_id).to eq(id) 16 | expect(collector.collect.first.object_id).not_to eq(id) 17 | ensure 18 | metric.reset! 19 | end 20 | 21 | describe "with readonly mode cleanup" do 22 | after do 23 | Discourse.disable_readonly_mode(Discourse::PG_FORCE_READONLY_MODE_KEY) 24 | Discourse.clear_readonly! 25 | end 26 | 27 | it "collects readonly data from the redis keys" do 28 | metric = described_class.new.collect.first 29 | 30 | Discourse::READONLY_KEYS.each { |k| expect(metric.readonly_sites[key: k]).to eq(0) } 31 | 32 | Discourse.enable_readonly_mode(Discourse::PG_FORCE_READONLY_MODE_KEY) 33 | 34 | metric = described_class.new.collect.first 35 | Discourse::READONLY_KEYS.each do |k| 36 | expect(metric.readonly_sites[key: k]).to eq( 37 | k == Discourse::PG_FORCE_READONLY_MODE_KEY ? 1 : 0, 38 | ) 39 | end 40 | ensure 41 | metric.reset! 42 | end 43 | end 44 | 45 | describe "adding custom collectors" do 46 | after { DiscoursePluginRegistry.reset_register!(:global_collectors) } 47 | 48 | it "collects custom metrics added to the global_collectors registry" do 49 | null_metric_klass = DiscoursePrometheus::NullMetric 50 | DiscoursePluginRegistry.register_global_collector(null_metric_klass, Plugin::Instance.new) 51 | 52 | metric = described_class.new.collect.last 53 | 54 | expect(metric.name).to eq(null_metric_klass.new.name) 55 | end 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /lib/reporter/global.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus::Reporter 4 | class Global 5 | def self.clear_connections! 6 | ActiveRecord::Base.connection_handler.clear_active_connections! 7 | end 8 | 9 | def self.iteration(global_collector, client) 10 | clear_connections! 11 | metrics = global_collector.collect 12 | metrics.each { |metric| client.send_json(metric) } 13 | clear_connections! 14 | rescue => e 15 | begin 16 | Discourse.warn_exception(e, message: "Prometheus Discourse Failed To Collect Global Stats") 17 | rescue => e1 18 | # never crash an iteration 19 | begin 20 | STDERR.puts "ERR failed to log warning: #{e1}" 21 | rescue StandardError 22 | nil 23 | end 24 | end 25 | end 26 | 27 | def self.sleep_unless_interrupted(seconds) 28 | IO.select([@r], nil, nil, seconds) 29 | end 30 | 31 | def self.start(client) 32 | @r, @w = IO.pipe 33 | global_collector = new 34 | 35 | Thread.new do 36 | while !@stopping 37 | iteration(global_collector, client) 38 | sleep_unless_interrupted 5 39 | end 40 | end 41 | end 42 | 43 | def self.stop 44 | @stopping = true 45 | @w.close 46 | end 47 | 48 | def initialize(recycle_every: 6) 49 | @recycle_every = recycle_every 50 | @collections = 0 51 | fetch_metrics 52 | end 53 | 54 | def collect 55 | if @collections >= @recycle_every 56 | fetch_metrics 57 | @collections = 0 58 | else 59 | @collections += 1 60 | end 61 | 62 | @metrics.each(&:collect) 63 | @metrics 64 | end 65 | 66 | private 67 | 68 | def fetch_metrics 69 | metrics = [::DiscoursePrometheus::InternalMetric::Global] 70 | metrics = metrics.concat(DiscoursePluginRegistry.global_collectors) 71 | 72 | @metrics = metrics.map(&:new) 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /bin/collector: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | Process.setproctitle("discourse prometheus-collector") 5 | 6 | spec_file = 7 | File.expand_path( 8 | "../../gems/#{RUBY_VERSION}/specifications/#{"prometheus_exporter-#{ENV["PROMETHEUS_EXPORTER_VERSION"]}"}.gemspec", 9 | __FILE__, 10 | ) 11 | 12 | spec = Gem::Specification.load(spec_file) 13 | spec.activate 14 | 15 | require "oj" 16 | require "prometheus_exporter" 17 | require "prometheus_exporter/server" 18 | require "rbtrace" if ENV["RBTRACE"] == "1" 19 | require "webrick" 20 | 21 | module DiscoursePrometheus 22 | end 23 | 24 | require_relative "../lib/internal_metric/base" 25 | require_relative "../lib/internal_metric/global" 26 | require_relative "../lib/internal_metric/job" 27 | require_relative "../lib/internal_metric/process" 28 | require_relative "../lib/internal_metric/web" 29 | require_relative "../lib/internal_metric/custom" 30 | require_relative "../lib/collector" 31 | 32 | $port = ARGV[0].to_i 33 | bind = ARGV[1] 34 | $parent_pid = ARGV[2].to_i 35 | $pid_file = ARGV[3] 36 | 37 | STDERR.puts "#{Time.now}: Starting Prometheus Collector pid: #{Process.pid} port: #{$port}" 38 | 39 | if $parent_pid > 0 40 | STDERR.puts "#{Time.now}: Prometheus Collector is monitoring #{$parent_pid}" 41 | Thread.new do 42 | def alive?(pid) 43 | Process.kill(0, pid) 44 | File.read($pid_file).to_i == Process.pid 45 | rescue StandardError 46 | false 47 | end 48 | 49 | while true 50 | begin 51 | unless alive?($parent_pid) 52 | STDERR.puts "Parent was terminated!" 53 | Process.kill "TERM", Process.pid 54 | sleep 10 55 | Process.kill "KILL", Process.pid 56 | end 57 | rescue => e 58 | STDERR.puts "URGENT monitoring thread had an exception #{e}" 59 | end 60 | sleep 5 61 | end 62 | end 63 | end 64 | 65 | PrometheusExporter::Metric::Base.default_prefix = "discourse_" 66 | 67 | collector = DiscoursePrometheus::Collector.new 68 | server = PrometheusExporter::Server::WebServer.new port: $port, bind: bind, collector: collector 69 | 70 | server.start 71 | 72 | sleep 73 | -------------------------------------------------------------------------------- /lib/middleware/metrics.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "ipaddr" 4 | 5 | module DiscoursePrometheus 6 | module Middleware 7 | end 8 | class Middleware::Metrics 9 | def initialize(app, settings = {}) 10 | @app = app 11 | end 12 | 13 | def call(env) 14 | intercept?(env) ? metrics(env) : @app.call(env) 15 | end 16 | 17 | private 18 | 19 | def is_private_ip?(env) 20 | request = Rack::Request.new(env) 21 | ip = 22 | begin 23 | IPAddr.new(request.ip) 24 | rescue StandardError 25 | nil 26 | end 27 | !!(ip && (ip.private? || ip.loopback?)) 28 | end 29 | 30 | def is_trusted_ip?(env) 31 | return false if GlobalSetting.prometheus_trusted_ip_allowlist_regex.empty? 32 | begin 33 | trusted_ip_regex = Regexp.new GlobalSetting.prometheus_trusted_ip_allowlist_regex 34 | request = Rack::Request.new(env) 35 | ip = IPAddr.new(request.ip) 36 | rescue => e 37 | # failed to parse regex 38 | Discourse.warn_exception( 39 | e, 40 | message: "Error parsing prometheus trusted ip whitelist", 41 | env: env, 42 | ) 43 | end 44 | !!(trusted_ip_regex && ip && ip.to_s =~ trusted_ip_regex) 45 | end 46 | 47 | def is_admin?(env) 48 | host = RailsMultisite::ConnectionManagement.host(env) 49 | result = false 50 | RailsMultisite::ConnectionManagement.with_hostname(host) do 51 | result = RailsMultisite::ConnectionManagement.current_db == "default" 52 | result &&= !!CurrentUser.lookup_from_env(env)&.admin 53 | end 54 | result 55 | end 56 | 57 | def intercept?(env) 58 | if env["PATH_INFO"] == "/metrics" 59 | return is_private_ip?(env) || is_trusted_ip?(env) || is_admin?(env) 60 | end 61 | false 62 | end 63 | 64 | def metrics(env) 65 | data = 66 | Net::HTTP.get(URI("http://localhost:#{GlobalSetting.prometheus_collector_port}/metrics")) 67 | [ 68 | 200, 69 | { "Content-Type" => "text/plain; charset=utf-8", "Content-Length" => data.bytesize.to_s }, 70 | [data], 71 | ] 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /spec/lib/reporter/process_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::Reporter::Process do 4 | it "collects gc stats" do 5 | ctx = MiniRacer::Context.new 6 | ctx.eval("") 7 | 8 | GC.expects(:latest_gc_info).with(:major_by).returns(:nofree) 9 | 10 | metric = described_class.new(:web).collect 11 | 12 | expect(metric.type).to eq("web") 13 | expect(metric.gc_major_by).to eq({ { reason: "nofree" } => 1 }) 14 | 15 | expect(metric.heap_live_slots).to be > 0 16 | expect(metric.heap_free_slots).to be > 0 17 | expect(metric.major_gc_count).to be > 0 18 | expect(metric.minor_gc_count).to be > 0 19 | expect(metric.total_allocated_objects).to be > 0 20 | expect(metric.v8_heap_size).to be > 0 21 | expect(metric.v8_heap_count).to be > 0 22 | expect(metric.v8_physical_size).to be > 0 23 | expect(metric.pid).to be > 0 24 | expect(metric.thread_count).to be > 0 25 | expect(metric.process_cpu_seconds_total).to be > 0 26 | 27 | # macos does not support these metrics 28 | expect(metric.rss).to be > 0 unless RbConfig::CONFIG["arch"] =~ /darwin/ 29 | end 30 | 31 | describe "job_exception_stats" do 32 | before { Discourse.reset_job_exception_stats! } 33 | after { Discourse.reset_job_exception_stats! } 34 | 35 | it "collects job_exception_stats" do 36 | # see MiniScheduler Manager which reports it like this 37 | # https://github.com/discourse/mini_scheduler/blob/2b2c1c56b6e76f51108c2a305775469e24cf2b65/lib/mini_scheduler/manager.rb#L95 38 | exception_context = { 39 | message: "Running a scheduled job", 40 | job: { 41 | "class" => Jobs::ReindexSearch, 42 | }, 43 | } 44 | 45 | 2.times do 46 | expect { 47 | Discourse.handle_job_exception(StandardError.new, exception_context) 48 | }.to raise_error(StandardError) 49 | end 50 | 51 | metric = described_class.new(:web).collect 52 | expect(metric.job_failures).to eq( 53 | { { "job" => "Jobs::ReindexSearch", "family" => "scheduled" } => 2 }, 54 | ) 55 | end 56 | end 57 | 58 | it "collects failover data" do 59 | metric = described_class.new(:web).collect 60 | 61 | expect(metric.active_record_failover_count).to eq(0) 62 | expect(metric.redis_failover_count).to eq(0) 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/internal_metric/process.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus::InternalMetric 4 | class Process < Base 5 | GAUGES = { 6 | heap_free_slots: "Free ruby heap slots", 7 | heap_live_slots: "Used ruby heap slots", 8 | v8_heap_size: "Total JavaScript V8 heap size (bytes)", 9 | v8_used_heap_size: "Total used JavaScript V8 heap size (bytes)", 10 | v8_physical_size: "Physical size consumed by V8 heaps", 11 | v8_heap_count: "Number of V8 contexts running", 12 | rss: "Total RSS used by process", 13 | thread_count: "Total number of active threads per process", 14 | deferred_jobs_queued: "Number of jobs queued in the deferred job queue", 15 | active_record_connections_count: 16 | "Total number of connections in ActiveRecord's connection pools", 17 | active_record_failover_count: "Count of ActiveRecord databases in a failover state", 18 | redis_failover_count: "Count of Redis servers in a failover state", 19 | } 20 | 21 | COUNTERS = { 22 | gc_major_by: "Reason the last major GC was triggered", 23 | major_gc_count: "Major GC operations by process", 24 | minor_gc_count: "Minor GC operations by process", 25 | total_allocated_objects: "Total number of allocated objects by process", 26 | job_failures: "Number of scheduled and regular jobs that failed in a process", 27 | process_cpu_seconds_total: "Total CPU time used by the process", 28 | } 29 | 30 | attribute :type, 31 | :gc_major_by, 32 | :heap_free_slots, 33 | :heap_live_slots, 34 | :major_gc_count, 35 | :minor_gc_count, 36 | :total_allocated_objects, 37 | :rss, 38 | :thread_count, 39 | :v8_heap_size, 40 | :v8_used_heap_size, 41 | :v8_physical_size, 42 | :v8_heap_count, 43 | :pid, 44 | :created_at, 45 | :deferred_jobs_queued, 46 | :active_record_connections_count, 47 | :active_record_failover_count, 48 | :redis_failover_count, 49 | :job_failures, 50 | :process_cpu_seconds_total 51 | 52 | def initialize 53 | @active_record_connections_count = {} 54 | @gc_major_by = {} 55 | end 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GIT 2 | remote: https://github.com/discourse/translations-manager.git 3 | revision: a8b225f7fabd3250ba88a4a2eff797693df51192 4 | specs: 5 | translations-manager (0.6) 6 | 7 | GEM 8 | remote: https://rubygems.org/ 9 | specs: 10 | activesupport (8.0.3) 11 | base64 12 | benchmark (>= 0.3) 13 | bigdecimal 14 | concurrent-ruby (~> 1.0, >= 1.3.1) 15 | connection_pool (>= 2.2.5) 16 | drb 17 | i18n (>= 1.6, < 2) 18 | logger (>= 1.4.2) 19 | minitest (>= 5.1) 20 | securerandom (>= 0.3) 21 | tzinfo (~> 2.0, >= 2.0.5) 22 | uri (>= 0.13.1) 23 | ast (2.4.3) 24 | base64 (0.3.0) 25 | benchmark (0.4.1) 26 | bigdecimal (3.3.0) 27 | concurrent-ruby (1.3.5) 28 | connection_pool (2.5.4) 29 | drb (2.2.3) 30 | i18n (1.14.7) 31 | concurrent-ruby (~> 1.0) 32 | json (2.15.1) 33 | language_server-protocol (3.17.0.5) 34 | lint_roller (1.1.0) 35 | logger (1.7.0) 36 | minitest (5.26.0) 37 | parallel (1.27.0) 38 | parser (3.3.9.0) 39 | ast (~> 2.4.1) 40 | racc 41 | prettier_print (1.2.1) 42 | prism (1.5.1) 43 | racc (1.8.1) 44 | rack (3.2.3) 45 | rainbow (3.1.1) 46 | regexp_parser (2.11.3) 47 | rubocop (1.81.1) 48 | json (~> 2.3) 49 | language_server-protocol (~> 3.17.0.2) 50 | lint_roller (~> 1.1.0) 51 | parallel (~> 1.10) 52 | parser (>= 3.3.0.2) 53 | rainbow (>= 2.2.2, < 4.0) 54 | regexp_parser (>= 2.9.3, < 3.0) 55 | rubocop-ast (>= 1.47.1, < 2.0) 56 | ruby-progressbar (~> 1.7) 57 | unicode-display_width (>= 2.4.0, < 4.0) 58 | rubocop-ast (1.47.1) 59 | parser (>= 3.3.7.2) 60 | prism (~> 1.4) 61 | rubocop-capybara (2.22.1) 62 | lint_roller (~> 1.1) 63 | rubocop (~> 1.72, >= 1.72.1) 64 | rubocop-discourse (3.13.3) 65 | activesupport (>= 6.1) 66 | lint_roller (>= 1.1.0) 67 | rubocop (>= 1.73.2) 68 | rubocop-capybara (>= 2.22.0) 69 | rubocop-factory_bot (>= 2.27.0) 70 | rubocop-rails (>= 2.30.3) 71 | rubocop-rspec (>= 3.0.1) 72 | rubocop-rspec_rails (>= 2.31.0) 73 | rubocop-factory_bot (2.27.1) 74 | lint_roller (~> 1.1) 75 | rubocop (~> 1.72, >= 1.72.1) 76 | rubocop-rails (2.33.4) 77 | activesupport (>= 4.2.0) 78 | lint_roller (~> 1.1) 79 | rack (>= 1.1) 80 | rubocop (>= 1.75.0, < 2.0) 81 | rubocop-ast (>= 1.44.0, < 2.0) 82 | rubocop-rspec (3.7.0) 83 | lint_roller (~> 1.1) 84 | rubocop (~> 1.72, >= 1.72.1) 85 | rubocop-rspec_rails (2.31.0) 86 | lint_roller (~> 1.1) 87 | rubocop (~> 1.72, >= 1.72.1) 88 | rubocop-rspec (~> 3.5) 89 | ruby-progressbar (1.13.0) 90 | securerandom (0.4.1) 91 | syntax_tree (6.3.0) 92 | prettier_print (>= 1.2.0) 93 | tzinfo (2.0.6) 94 | concurrent-ruby (~> 1.0) 95 | unicode-display_width (3.2.0) 96 | unicode-emoji (~> 4.1) 97 | unicode-emoji (4.1.0) 98 | uri (1.0.4) 99 | 100 | PLATFORMS 101 | ruby 102 | 103 | DEPENDENCIES 104 | rubocop-discourse 105 | syntax_tree 106 | translations-manager! 107 | 108 | BUNDLED WITH 109 | 2.7.2 110 | -------------------------------------------------------------------------------- /plugin.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # name: discourse-prometheus 4 | # about: Collects key metrics from Discourse for Prometheus data analytics. 5 | # meta_topic_id: 72666 6 | # version: 0.1 7 | # authors: Sam Saffron 8 | # url: https://github.com/discourse/discourse-prometheus 9 | 10 | module ::DiscoursePrometheus 11 | end 12 | 13 | gem "prometheus_exporter", "2.2.0" 14 | 15 | require "prometheus_exporter/client" 16 | 17 | require_relative("lib/internal_metric/base") 18 | require_relative("lib/internal_metric/global") 19 | require_relative("lib/internal_metric/job") 20 | require_relative("lib/internal_metric/process") 21 | require_relative("lib/internal_metric/web") 22 | require_relative("lib/internal_metric/custom") 23 | 24 | require_relative("lib/reporter/process") 25 | require_relative("lib/reporter/global") 26 | require_relative("lib/reporter/web") 27 | 28 | require_relative("lib/collector_demon") 29 | require_relative("lib/global_reporter_demon") 30 | 31 | require_relative("lib/middleware/metrics") 32 | 33 | require_relative("lib/job_metric_initializer") 34 | 35 | GlobalSetting.add_default :prometheus_collector_port, 9405 36 | GlobalSetting.add_default :prometheus_webserver_bind, "localhost" 37 | GlobalSetting.add_default :prometheus_trusted_ip_allowlist_regex, "" 38 | DiscoursePluginRegistry.define_filtered_register :global_collectors 39 | 40 | Rails.configuration.middleware.unshift DiscoursePrometheus::Middleware::Metrics 41 | 42 | after_initialize do 43 | require_relative("app/jobs/scheduled/update_stats") 44 | 45 | $prometheus_client = 46 | PrometheusExporter::Client.new(host: "localhost", port: GlobalSetting.prometheus_collector_port) 47 | 48 | # creates no new threads, this simply adds the instruments 49 | DiscoursePrometheus::Reporter::Web.start($prometheus_client) unless Rails.env.test? 50 | 51 | register_demon_process(DiscoursePrometheus::CollectorDemon) 52 | register_demon_process(DiscoursePrometheus::GlobalReporterDemon) 53 | 54 | on(:sidekiq_fork_started) do 55 | DiscoursePrometheus::Reporter::Process.start($prometheus_client, :sidekiq) 56 | DiscoursePrometheus::JobMetricInitializer.initialize_regular_job_metrics 57 | DiscoursePrometheus::JobMetricInitializer.initialize_scheduled_job_metrics 58 | end 59 | 60 | on(:web_fork_started) { DiscoursePrometheus::Reporter::Process.start($prometheus_client, :web) } 61 | 62 | on(:scheduled_job_ran) do |stat| 63 | metric = DiscoursePrometheus::InternalMetric::Job.new 64 | metric.scheduled = true 65 | metric.job_name = stat.name 66 | metric.duration = stat.duration_ms * 0.001 67 | metric.count = 1 68 | metric.success = stat.success 69 | $prometheus_client.send_json metric.to_h unless Rails.env.test? 70 | end 71 | 72 | on(:sidekiq_job_ran) do |worker, _msg, _queue, duration| 73 | metric = DiscoursePrometheus::InternalMetric::Job.new 74 | metric.scheduled = false 75 | metric.duration = duration 76 | metric.count = 1 77 | metric.job_name = worker.class.to_s 78 | metric.success = true 79 | $prometheus_client.send_json metric.to_h unless Rails.env.test? 80 | end 81 | 82 | on(:sidekiq_job_error) do |worker, _msg, _queue, duration| 83 | metric = DiscoursePrometheus::InternalMetric::Job.new 84 | metric.scheduled = false 85 | metric.duration = duration 86 | metric.count = 1 87 | metric.job_name = worker.class.to_s 88 | metric.success = false 89 | $prometheus_client.send_json metric.to_h unless Rails.env.test? 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /lib/internal_metric/web.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DiscoursePrometheus::InternalMetric 4 | class Web < Base 5 | FLOAT_ATTRS = %w[duration sql_duration net_duration redis_duration queue_duration gc_duration] 6 | 7 | INT_ATTRS = %w[sql_calls redis_calls net_calls status_code gc_major_count gc_minor_count] 8 | 9 | BOOL_ATTRS = %w[ 10 | ajax 11 | background 12 | logged_in 13 | crawler 14 | mobile 15 | tracked 16 | json 17 | html 18 | admin_api 19 | user_api 20 | forced_anon 21 | ] 22 | 23 | STRING_ATTRS = %w[background_type verb controller action host db cache] 24 | 25 | (FLOAT_ATTRS + INT_ATTRS + BOOL_ATTRS + STRING_ATTRS).each { |attr| attribute attr } 26 | 27 | ALLOWED_REQUEST_METHODS = Set["HEAD", "GET", "PUT", "POST", "DELETE"] 28 | 29 | def self.get(hash) 30 | metric = new 31 | hash.each { |k, v| metric.send "#{k}=", v } 32 | metric 33 | end 34 | 35 | def self.multisite? 36 | @multisite ||= Rails.configuration.multisite 37 | end 38 | 39 | def self.from_env_data(env, data, host) 40 | metric = self.new 41 | 42 | data ||= {} 43 | 44 | if multisite? 45 | spec = RailsMultisite::ConnectionManagement.connection_spec(host: host) 46 | metric.db = spec.config[:database] if spec 47 | else 48 | metric.db = nil 49 | end 50 | 51 | if queue_seconds = data[:queue_seconds] 52 | metric.queue_duration = queue_seconds 53 | else 54 | metric.queue_duration = 0.0 55 | end 56 | 57 | metric.admin_api = !!env["_DISCOURSE_API"] 58 | metric.user_api = !!env["_DISCOURSE_USER_API"] 59 | 60 | metric.verb = env["REQUEST_METHOD"] 61 | metric.verb = "OTHER" if !ALLOWED_REQUEST_METHODS.include?(metric.verb) 62 | 63 | if ad_params = env["action_dispatch.request.parameters"] 64 | metric.controller = ad_params["controller"] 65 | metric.action = ad_params["action"] 66 | end 67 | 68 | if timing = data[:timing] 69 | metric.duration = timing[:total_duration] 70 | 71 | if sql = timing[:sql] 72 | metric.sql_duration = sql[:duration] 73 | metric.sql_calls = sql[:calls] 74 | end 75 | 76 | if redis = timing[:redis] 77 | metric.redis_duration = redis[:duration] 78 | metric.redis_calls = redis[:calls] 79 | end 80 | 81 | if net = timing[:net] 82 | metric.net_duration = net[:duration] 83 | metric.net_calls = net[:calls] 84 | end 85 | 86 | if gc = timing[:gc] 87 | metric.gc_duration = gc[:time] 88 | metric.gc_major_count = gc[:major_count] 89 | metric.gc_minor_count = gc[:minor_count] 90 | end 91 | end 92 | 93 | metric.status_code = data[:status].to_i 94 | metric.crawler = !!data[:is_crawler] 95 | metric.logged_in = !!data[:has_auth_cookie] 96 | metric.background = !!data[:is_background] 97 | metric.background_type = data[:background_type] 98 | metric.mobile = !!data[:is_mobile] 99 | metric.tracked = !!data[:track_view] 100 | metric.cache = data[:cache] 101 | metric.host = host 102 | 103 | metric.json = 104 | env["PATH_INFO"].to_s.ends_with?(".json") || 105 | env["HTTP_ACCEPT"].to_s.include?("application/json") 106 | 107 | metric.html = 108 | env["PATH_INFO"].to_s.ends_with?(".html") || env["HTTP_ACCEPT"].to_s.include?("text/html") 109 | 110 | metric.ajax = env["HTTP_X_REQUESTED_WITH"] == "XMLHttpRequest" 111 | metric.forced_anon = !!env["DISCOURSE_FORCE_ANON"] 112 | 113 | metric 114 | end 115 | end 116 | end 117 | -------------------------------------------------------------------------------- /spec/middleware/metrics_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::Middleware::Metrics do 4 | let(:middleware) do 5 | app = lambda { |env| [404, {}, ["not found"]] } 6 | DiscoursePrometheus::Middleware::Metrics.new(app) 7 | end 8 | 9 | it "404s for unauthenticated if prometheus_trusted_ip_allowlist_regex is unset" do 10 | status, = 11 | middleware.call( 12 | "PATH_INFO" => "/metrics", 13 | "REMOTE_ADDR" => "200.0.1.1", 14 | "rack.input" => StringIO.new, 15 | ) 16 | expect(status).to eq(404) 17 | end 18 | 19 | it "404s for unauthenticated" do 20 | status, = 21 | middleware.call( 22 | "PATH_INFO" => "/metrics", 23 | "REMOTE_ADDR" => "200.0.1.1", 24 | "rack.input" => StringIO.new, 25 | ) 26 | expect(status).to eq(404) 27 | end 28 | 29 | it "404s for unauthenticated and invalid regex" do 30 | global_setting :prometheus_trusted_ip_allowlist_regex, "unbalanced bracket[" 31 | status, = 32 | middleware.call( 33 | "PATH_INFO" => "/metrics", 34 | "REMOTE_ADDR" => "200.0.1.1", 35 | "rack.input" => StringIO.new, 36 | ) 37 | expect(status).to eq(404) 38 | end 39 | 40 | it "404s for unauthenticated empty regex" do 41 | status, = 42 | middleware.call( 43 | "PATH_INFO" => "/metrics", 44 | "REMOTE_ADDR" => "200.0.1.1", 45 | "rack.input" => StringIO.new, 46 | ) 47 | expect(status).to eq(404) 48 | end 49 | 50 | it "404s for public IP addresses" do 51 | addresses = %w[ 52 | 62.127.0.1 53 | 62.192.168.1 54 | 62.10.0.0 55 | 62.172.16.0 56 | 62.172.21.0 57 | 62.172.31.0 58 | 2001:fc00:ffff:ffff:ffff:ffff:ffff:ffff 59 | ] 60 | addresses.each do |ip| 61 | status, = 62 | middleware.call( 63 | "PATH_INFO" => "/metrics", 64 | "REMOTE_ADDR" => ip, 65 | "rack.input" => StringIO.new, 66 | ) 67 | expect(status).to eq(404) 68 | end 69 | end 70 | 71 | it "proxies the dedicated port for private IP addresses" do 72 | stub_request( 73 | :get, 74 | "http://localhost:#{GlobalSetting.prometheus_collector_port}/metrics", 75 | ).to_return(status: 200, body: "hello world", headers: {}) 76 | 77 | addresses = %w[ 78 | 127.1.2.3 79 | 192.168.1.2 80 | 10.0.1.2 81 | 172.16.9.8 82 | 172.19.1.2 83 | 172.20.9.8 84 | 172.29.1.2 85 | 172.30.9.8 86 | 172.31.1.2 87 | ] 88 | addresses.each do |ip| 89 | status, headers, body = middleware.call("PATH_INFO" => "/metrics", "REMOTE_ADDR" => ip) 90 | body = body.join 91 | 92 | expect(status).to eq(200) 93 | expect(headers["Content-Type"]).to eq("text/plain; charset=utf-8") 94 | expect(body).to include("hello world") 95 | end 96 | end 97 | 98 | it "proxies the dedicated port even with invalid regex" do 99 | global_setting :prometheus_trusted_ip_allowlist_regex, "unbalanced bracket[" 100 | stub_request( 101 | :get, 102 | "http://localhost:#{GlobalSetting.prometheus_collector_port}/metrics", 103 | ).to_return(status: 200, body: "hello world", headers: {}) 104 | 105 | status, headers, body = 106 | middleware.call("PATH_INFO" => "/metrics", "REMOTE_ADDR" => "192.168.1.1") 107 | body = body.join 108 | 109 | expect(status).to eq(200) 110 | expect(headers["Content-Type"]).to eq("text/plain; charset=utf-8") 111 | expect(body).to include("hello world") 112 | end 113 | 114 | it "proxies the dedicated port on trusted IP" do 115 | global_setting :prometheus_trusted_ip_allowlist_regex, "(200\.0)" 116 | stub_request( 117 | :get, 118 | "http://localhost:#{GlobalSetting.prometheus_collector_port}/metrics", 119 | ).to_return(status: 200, body: "hello world", headers: {}) 120 | 121 | status, headers, body = middleware.call("PATH_INFO" => "/metrics", "REMOTE_ADDR" => "200.0.0.1") 122 | body = body.join 123 | 124 | expect(status).to eq(200) 125 | expect(headers["Content-Type"]).to eq("text/plain; charset=utf-8") 126 | expect(body).to include("hello world") 127 | end 128 | end 129 | -------------------------------------------------------------------------------- /spec/lib/internal_metric/web_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::InternalMetric::Web do 4 | it "round trips host" do 5 | metric = described_class.get(tracked: true, status_code: 200, host: "bob") 6 | metric = DiscoursePrometheus::InternalMetric::Base.from_h(metric.to_h) 7 | expect(metric.host).to eq("bob") 8 | end 9 | 10 | it "round trips to a hash" do 11 | metric = described_class.new 12 | 13 | metric.duration = 0.00074 14 | metric.sql_duration = 0.00015 15 | metric.redis_duration = 0.00014 16 | 17 | metric.redis_calls = 2 18 | metric.sql_calls = 3 19 | 20 | metric.controller = "controller" 21 | metric.action = "action" 22 | 23 | metric.crawler = true 24 | 25 | metric = DiscoursePrometheus::InternalMetric::Base.from_h(metric.to_h) 26 | 27 | expect(metric.duration).to eq(0.00074) 28 | expect(metric.sql_duration).to eq(0.00015) 29 | expect(metric.redis_duration).to eq(0.00014) 30 | 31 | expect(metric.redis_calls).to eq(2) 32 | expect(metric.sql_calls).to eq(3) 33 | 34 | expect(metric.controller).to eq("controller") 35 | expect(metric.action).to eq("action") 36 | 37 | expect(metric.crawler).to eq(true) 38 | expect(metric.tracked).to eq(nil) 39 | end 40 | 41 | describe "from_env_data" do 42 | it "gets controller/action" do 43 | env = { "action_dispatch.request.parameters" => { "controller" => "con", "action" => "act" } } 44 | 45 | metric = described_class.from_env_data(env, {}, "") 46 | 47 | expect(metric.controller).to eq("con") 48 | expect(metric.action).to eq("act") 49 | end 50 | 51 | it "fishes out logged data from discourse" do 52 | data = { 53 | status: 201, 54 | is_crawler: false, 55 | has_auth_cookie: true, 56 | is_background: nil, 57 | is_mobile: true, 58 | track_view: true, 59 | } 60 | 61 | metric = described_class.from_env_data({}, data, "test") 62 | 63 | expect(metric.status_code).to eq(201) 64 | expect(metric.crawler).to eq(false) 65 | expect(metric.logged_in).to eq(true) 66 | expect(metric.background).to eq(false) 67 | expect(metric.mobile).to eq(true) 68 | expect(metric.tracked).to eq(true) 69 | expect(metric.host).to eq("test") 70 | end 71 | 72 | it "figures out if it is an ajax call" do 73 | env = { "HTTP_X_REQUESTED_WITH" => "XMLHttpRequest" } 74 | 75 | metric = described_class.from_env_data(env, {}, "") 76 | 77 | expect(metric.ajax).to eq(true) 78 | end 79 | 80 | it "detects json requests" do 81 | env = { "PATH_INFO" => "/test.json" } 82 | 83 | metric = described_class.from_env_data(env, {}, "") 84 | 85 | expect(metric.json).to eq(true) 86 | end 87 | 88 | it "detects json requests from header" do 89 | env = { "HTTP_ACCEPT" => "application/json, text/javascript, */*; q=0.01" } 90 | 91 | metric = described_class.from_env_data(env, {}, "") 92 | 93 | expect(metric.json).to eq(true) 94 | end 95 | 96 | it "detects request method" do 97 | env = { "REQUEST_METHOD" => "GET" } 98 | 99 | metric = described_class.from_env_data(env, {}, "") 100 | 101 | expect(metric.verb).to eq("GET") 102 | 103 | env = { "REQUEST_METHOD" => "TEST" } 104 | 105 | metric = described_class.from_env_data(env, {}, "") 106 | 107 | expect(metric.verb).to eq("OTHER") 108 | end 109 | 110 | it "fishes out timings if available" do 111 | data = { 112 | timing: { 113 | total_duration: 0.1, 114 | sql: { 115 | duration: 0.2, 116 | calls: 5, 117 | }, 118 | redis: { 119 | duration: 0.3, 120 | calls: 6, 121 | }, 122 | gc: { 123 | time: 0.4, 124 | major_count: 7, 125 | minor_count: 8, 126 | }, 127 | }, 128 | } 129 | 130 | metric = described_class.from_env_data({}, data, "") 131 | 132 | expect(metric.duration).to eq(0.1) 133 | expect(metric.sql_duration).to eq(0.2) 134 | expect(metric.redis_duration).to eq(0.3) 135 | 136 | expect(metric.sql_calls).to eq(5) 137 | expect(metric.redis_calls).to eq(6) 138 | 139 | expect(metric.gc_duration).to eq(0.4) 140 | expect(metric.gc_major_count).to eq(7) 141 | expect(metric.gc_minor_count).to eq(8) 142 | end 143 | end 144 | end 145 | -------------------------------------------------------------------------------- /lib/reporter/process.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # collects stats from currently running process 4 | module DiscoursePrometheus::Reporter 5 | class Process 6 | def self.start(client, type, frequency = 30) 7 | process_collector = new(type) 8 | 9 | Thread.new do 10 | while true 11 | begin 12 | metric = process_collector.collect 13 | client.send_json metric 14 | rescue => e 15 | Rails.logger.warn( 16 | "Prometheus Discourse Failed To Collect Process Stats #{e.class} #{e}\n#{e.backtrace.join("\n")}", 17 | ) 18 | ensure 19 | sleep frequency 20 | end 21 | end 22 | end 23 | end 24 | 25 | def initialize(type) 26 | @type = type.to_s 27 | end 28 | 29 | def collect 30 | metric = ::DiscoursePrometheus::InternalMetric::Process.new 31 | metric.type = @type 32 | collect_gc_stats(metric) 33 | collect_v8_stats(metric) 34 | collect_process_stats(metric) 35 | collect_scheduler_stats(metric) 36 | collect_active_record_connections_stat(metric) 37 | collect_failover_stats(metric) 38 | metric 39 | end 40 | 41 | def pid 42 | @pid = ::Process.pid 43 | end 44 | 45 | def rss 46 | @pagesize ||= 47 | begin 48 | `getconf PAGESIZE`.to_i 49 | rescue StandardError 50 | 4096 51 | end 52 | begin 53 | File.read("/proc/#{pid}/statm").split(" ")[1].to_i * @pagesize 54 | rescue StandardError 55 | 0 56 | end 57 | end 58 | 59 | def process_cpu_seconds_total 60 | ::Process.clock_gettime(::Process::CLOCK_PROCESS_CPUTIME_ID) 61 | end 62 | 63 | def collect_scheduler_stats(metric) 64 | metric.deferred_jobs_queued = Scheduler::Defer.length 65 | 66 | metric.job_failures = {} 67 | 68 | if Discourse.respond_to?(:job_exception_stats) 69 | Discourse.job_exception_stats.each do |klass, count| 70 | key = { "job" => klass.to_s } 71 | if klass.class == Class && klass < ::Jobs::Scheduled 72 | key["family"] = "scheduled" 73 | else 74 | # this is a guess, but regular jobs simply inherit off 75 | # Jobs::Base, so there is no easy way of finding out 76 | key["family"] = "regular" 77 | end 78 | metric.job_failures[key] = count 79 | end 80 | end 81 | end 82 | 83 | def collect_process_stats(metric) 84 | metric.pid = pid 85 | metric.rss = rss 86 | metric.thread_count = Thread.list.count 87 | metric.process_cpu_seconds_total = process_cpu_seconds_total 88 | end 89 | 90 | def collect_gc_stats(metric) 91 | stat = GC.stat 92 | metric.heap_live_slots = stat[:heap_live_slots] 93 | metric.heap_free_slots = stat[:heap_free_slots] 94 | metric.major_gc_count = stat[:major_gc_count] 95 | metric.minor_gc_count = stat[:minor_gc_count] 96 | 97 | if major_by = GC.latest_gc_info(:major_by) 98 | key = { reason: major_by.to_s } 99 | metric.gc_major_by[key] ||= 0 100 | metric.gc_major_by[key] += 1 101 | end 102 | 103 | metric.total_allocated_objects = stat[:total_allocated_objects] 104 | end 105 | 106 | def collect_v8_stats(metric) 107 | metric.v8_heap_count = metric.v8_heap_size = 0 108 | metric.v8_heap_size = metric.v8_physical_size = 0 109 | metric.v8_used_heap_size = 0 110 | 111 | ObjectSpace.each_object(MiniRacer::Context) do |context| 112 | stats = context.heap_stats 113 | if stats 114 | metric.v8_heap_count += 1 115 | metric.v8_heap_size += stats[:total_heap_size].to_i 116 | metric.v8_used_heap_size += stats[:used_heap_size].to_i 117 | metric.v8_physical_size += stats[:total_physical_size].to_i 118 | end 119 | end 120 | end 121 | 122 | def collect_active_record_connections_stat(metric) 123 | ObjectSpace.each_object(ActiveRecord::ConnectionAdapters::ConnectionPool) do |pool| 124 | if !pool.connections.nil? 125 | stat = pool.stat 126 | 127 | %i[busy dead idle waiting].each do |status| 128 | key = { status: status.to_s } 129 | metric.active_record_connections_count[key] ||= 0 130 | metric.active_record_connections_count[key] += stat[status] 131 | end 132 | end 133 | end 134 | end 135 | 136 | def collect_failover_stats(metric) 137 | if defined?(RailsFailover::ActiveRecord) && 138 | RailsFailover::ActiveRecord::Handler.instance.respond_to?(:primaries_down_count) 139 | metric.active_record_failover_count = 140 | RailsFailover::ActiveRecord::Handler.instance.primaries_down_count 141 | end 142 | 143 | if defined?(RailsFailover::Redis) && 144 | RailsFailover::Redis::Handler.instance.respond_to?(:primaries_down_count) 145 | metric.redis_failover_count = RailsFailover::Redis::Handler.instance.primaries_down_count 146 | end 147 | end 148 | end 149 | end 150 | -------------------------------------------------------------------------------- /spec/lib/internal_metric/global_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe DiscoursePrometheus::InternalMetric::Global do 4 | let(:db) { RailsMultisite::ConnectionManagement.current_db } 5 | let(:metric) { described_class.new } 6 | 7 | after { metric.reset! } 8 | 9 | it "collects global metrics" do 10 | metric.collect 11 | 12 | expect(metric.sidekiq_processes).not_to eq(nil) 13 | expect(metric.postgres_master_available).to eq(1) 14 | expect(metric.postgres_replica_available).to eq(nil) 15 | expect(metric.redis_primary_available).to eq({ { type: "main" } => 1 }) 16 | expect(metric.redis_replica_available).to eq({ { type: "main" } => 0 }) 17 | expect(metric.tmp_dir_available_bytes).to be > 0 18 | end 19 | 20 | it "collects the version_info metric" do 21 | metric.collect 22 | 23 | expect(metric.version_info.count).to eq(1) 24 | labels = metric.version_info.keys.first 25 | value = metric.version_info.values.first 26 | 27 | expect(labels[:revision]).to match(/\A[0-9a-f]{40}\z/) 28 | expect(labels[:version]).to eq(Discourse::VERSION::STRING) 29 | expect(value).to eq(1) 30 | end 31 | 32 | if SiteSetting.respond_to?("s3_inventory_bucket") 33 | describe "missing_s3_uploads metric" do 34 | before { SiteSetting.s3_inventory_bucket = "some-bucket/some/prefix" } 35 | 36 | it "collects the missing upload metrics" do 37 | Discourse.stats.set("missing_s3_uploads", 2) 38 | 39 | metric.collect 40 | 41 | expect(metric.missing_s3_uploads).to eq({ db: db } => 2) 42 | end 43 | 44 | it "throttles the collection of missing upload metrics" do 45 | Discourse.stats.set("missing_s3_uploads", 2) 46 | 47 | metric.collect 48 | 49 | expect(metric.missing_s3_uploads).to eq({ db: db } => 2) 50 | 51 | Discourse.stats.set("missing_s3_uploads", 0) 52 | metric.collect 53 | 54 | expect(metric.missing_s3_uploads).to eq({ db: db } => 2) 55 | 56 | metric.reset! 57 | metric.collect 58 | 59 | expect(metric.missing_s3_uploads).to eq({ db: db } => 0) 60 | end 61 | 62 | context "when `s3_inventory_bucket` has not been set for the site" do 63 | before { SiteSetting.s3_inventory_bucket = nil } 64 | 65 | it "does not expose the metric" do 66 | Discourse.stats.set("missing_s3_uploads", 2) 67 | 68 | metric.collect 69 | 70 | expect(metric.missing_s3_uploads).to eq({}) 71 | end 72 | end 73 | end 74 | end 75 | 76 | describe "sidekiq paused" do 77 | after { Sidekiq.unpause_all! } 78 | 79 | it "collects the right metrics" do 80 | metric.collect 81 | 82 | expect(metric.sidekiq_paused).to eq({ db: db } => nil) 83 | 84 | Sidekiq.pause! 85 | metric.collect 86 | 87 | expect(metric.sidekiq_paused).to eq({ db: db } => 1) 88 | end 89 | end 90 | 91 | describe "#sidekiq_queue_latency_seconds" do 92 | it "collects the right metrics" do 93 | fake_queue = Sidekiq::Queue.new("default") 94 | 95 | Sidekiq::Queue.expects(:all).returns([fake_queue]) 96 | 97 | metric.collect 98 | 99 | expect(metric.sidekiq_queue_latency_seconds).to eq({ queue: "default" } => 0) 100 | end 101 | end 102 | 103 | describe "#find_stuck_sidekiq_jobs" do 104 | before do 105 | allow(Sidekiq::Workers).to receive(:new).and_return( 106 | [ 107 | [ 108 | "#{::PrometheusExporter.hostname}:25417:c2801d1a17b5", 109 | "5hhh", 110 | Sidekiq::Work.new( 111 | "#{::PrometheusExporter.hostname}:25417:c2801d1a17b5", 112 | "5hhh", 113 | { 114 | "queue" => "default", 115 | "payload" => { 116 | "retry" => true, 117 | "queue" => "default", 118 | "class" => "Jobs::Foo", 119 | "args" => [{ "current_site_id" => "default" }], 120 | "jid" => "294d7a9766a1c2ef237c1452", 121 | "created_at" => 1234567890.123456, 122 | "enqueued_at" => 1234567890.123456, 123 | }.to_json, 124 | "run_at" => 1_234_567_890, 125 | }, 126 | ), 127 | ], 128 | ], 129 | ) 130 | end 131 | 132 | it "collects the right metrics" do 133 | metric.collect 134 | 135 | expect(metric.sidekiq_jobs_stuck).to eq({ { job_name: "Jobs::Foo" } => 1 }) 136 | end 137 | end 138 | 139 | describe "when a replica has been configured" do 140 | before do 141 | config = ActiveRecord::Base.connection_db_config.configuration_hash.dup 142 | 143 | config.merge!(replica_host: "localhost", replica_port: 1111) 144 | ActiveRecord::Base.connection.disconnect! 145 | ActiveRecord::Base.establish_connection(config) 146 | end 147 | 148 | it "collects the right metrics" do 149 | metric.collect 150 | 151 | expect(metric.postgres_master_available).to eq(1) 152 | expect(metric.postgres_replica_available).to eq(0) 153 | end 154 | end 155 | 156 | it "collects postgres_highest_sequence metric" do 157 | Jobs::UpdateStats.new.execute 158 | 159 | metric.collect 160 | 161 | expect(metric.postgres_highest_sequence).to be_a_kind_of(Hash) 162 | expect(metric.postgres_highest_sequence[{ db: "default" }]).to be_present 163 | end 164 | 165 | describe "Redis" do 166 | context "when a replica has been configured for main Redis" do 167 | before do 168 | allow(GlobalSetting).to receive_messages( 169 | get_redis_replica_host: GlobalSetting.redis_host, 170 | get_redis_replica_port: GlobalSetting.redis_port, 171 | ) 172 | GlobalSetting.reset_redis_config! 173 | metric.collect 174 | end 175 | 176 | after { GlobalSetting.reset_redis_config! } 177 | 178 | it "collects the right metrics" do 179 | expect(metric).to have_attributes( 180 | redis_primary_available: { 181 | { type: "main" } => 1, 182 | }, 183 | redis_master_available: { 184 | { type: "main" } => 1, 185 | }, 186 | redis_slave_available: { 187 | { type: "main" } => 1, 188 | }, 189 | redis_replica_available: { 190 | { type: "main" } => 1, 191 | }, 192 | ) 193 | end 194 | end 195 | 196 | context "when a replica has been configured for MessageBus Redis" do 197 | before do 198 | allow(GlobalSetting).to receive_messages( 199 | message_bus_redis_enabled: true, 200 | message_bus_redis_host: GlobalSetting.redis_host, 201 | message_bus_redis_port: GlobalSetting.redis_port, 202 | get_message_bus_redis_replica_host: GlobalSetting.redis_host, 203 | get_message_bus_redis_replica_port: GlobalSetting.redis_port, 204 | ) 205 | GlobalSetting.reset_redis_config! 206 | metric.collect 207 | end 208 | 209 | after { GlobalSetting.reset_redis_config! } 210 | 211 | it "collects the right metrics" do 212 | expect(metric).to have_attributes( 213 | redis_primary_available: { 214 | { type: "main" } => 1, 215 | { type: "message-bus" } => 1, 216 | }, 217 | redis_master_available: { 218 | { type: "main" } => 1, 219 | { type: "message-bus" } => 1, 220 | }, 221 | redis_slave_available: { 222 | { type: "main" } => 0, 223 | { type: "message-bus" } => 1, 224 | }, 225 | redis_replica_available: { 226 | { type: "main" } => 0, 227 | { type: "message-bus" } => 1, 228 | }, 229 | ) 230 | end 231 | end 232 | end 233 | end 234 | -------------------------------------------------------------------------------- /lib/internal_metric/global.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "raindrops" 4 | require "sidekiq/api" 5 | require "open3" 6 | 7 | module DiscoursePrometheus::InternalMetric 8 | class Global < Base 9 | STUCK_SIDEKIQ_JOB_MINUTES = 120 10 | 11 | attribute :postgres_readonly_mode, 12 | :redis_master_available, 13 | :redis_slave_available, 14 | :redis_primary_available, 15 | :redis_replica_available, 16 | :postgres_master_available, 17 | :postgres_primary_available, 18 | :postgres_replica_available, 19 | :active_app_reqs, 20 | :queued_app_reqs, 21 | :sidekiq_jobs_enqueued, 22 | :sidekiq_processes, 23 | :sidekiq_paused, 24 | :sidekiq_workers, 25 | :sidekiq_jobs_stuck, 26 | :sidekiq_queue_latency_seconds, 27 | :scheduled_jobs_stuck, 28 | :missing_s3_uploads, 29 | :version_info, 30 | :readonly_sites, 31 | :postgres_highest_sequence, 32 | :tmp_dir_available_bytes 33 | 34 | def initialize 35 | @active_app_reqs = 0 36 | @queued_app_reqs = 0 37 | @fault_logged = {} 38 | 39 | begin 40 | @@version = nil 41 | 42 | out, error, status = Open3.capture3("git rev-parse HEAD") 43 | 44 | if status.success? 45 | @@version ||= out.chomp 46 | else 47 | raise error 48 | end 49 | rescue => e 50 | if defined?(::Discourse) 51 | Discourse.warn_exception(e, message: "Failed to calculate discourse_version_info metric") 52 | else 53 | STDERR.puts "Failed to calculate discourse_version_info metric: #{e}\n#{e.backtrace.join("\n")}" 54 | end 55 | 56 | @@retries ||= 10 57 | @@retries -= 1 58 | @@version = -1 if @@retries < 0 59 | end 60 | end 61 | 62 | def collect 63 | @version_info ||= { { revision: @@version, version: Discourse::VERSION::STRING } => 1 } 64 | 65 | redis_primary_running = {} 66 | redis_replica_running = {} 67 | 68 | redis_config = GlobalSetting.redis_config 69 | redis_primary_running[{ type: "main" }] = test_redis( 70 | :master, 71 | **redis_config.slice(:host, :port, :password, :ssl), 72 | ) 73 | redis_replica_running[{ type: "main" }] = 0 74 | 75 | if redis_config[:custom] || redis_config[:replica_host] 76 | redis_replica_running[{ type: "main" }] = test_redis( 77 | :slave, 78 | host: redis_config.dig(:custom, :replica_host) || redis_config[:replica_host], 79 | port: redis_config.dig(:custom, :replica_port) || redis_config[:replica_port], 80 | password: redis_config[:password], 81 | ssl: redis_config[:ssl], 82 | ) 83 | end 84 | 85 | if GlobalSetting.message_bus_redis_enabled 86 | mb_redis_config = GlobalSetting.message_bus_redis_config 87 | redis_primary_running[{ type: "message-bus" }] = test_redis( 88 | :master, 89 | **mb_redis_config.slice(:host, :port, :password, :ssl), 90 | ) 91 | redis_replica_running[{ type: "message-bus" }] = 0 92 | 93 | if mb_redis_config[:custom] || mb_redis_config[:replica_host] 94 | redis_replica_running[{ type: "message-bus" }] = test_redis( 95 | :slave, 96 | host: mb_redis_config.dig(:custom, :replica_host) || mb_redis_config[:replica_host], 97 | port: mb_redis_config.dig(:custom, :replica_port) || mb_redis_config[:replica_port], 98 | password: mb_redis_config[:password], 99 | ssl: mb_redis_config[:ssl], 100 | ) 101 | end 102 | end 103 | 104 | postgres_primary_running = test_postgres(primary: true) 105 | postgres_replica_running = test_postgres(primary: false) 106 | 107 | net_stats = nil 108 | 109 | if RbConfig::CONFIG["arch"] !~ /darwin/ 110 | if listener = ENV["UNICORN_LISTENER"] 111 | net_stats = Raindrops::Linux.unix_listener_stats([listener])[listener] 112 | else 113 | net_stats = Raindrops::Linux.tcp_listener_stats("0.0.0.0:3000")["0.0.0.0:3000"] 114 | end 115 | end 116 | 117 | @postgres_readonly_mode = primary_site_readonly? 118 | @redis_primary_available = @redis_master_available = redis_primary_running 119 | @redis_replica_available = @redis_slave_available = redis_replica_running 120 | @postgres_primary_available = @postgres_master_available = postgres_primary_running 121 | @postgres_replica_available = postgres_replica_running 122 | 123 | # active and queued are special metrics that track max 124 | @active_app_reqs = [@active_app_reqs, net_stats.active].max if net_stats 125 | @queued_app_reqs = [@queued_app_reqs, net_stats.queued].max if net_stats 126 | 127 | @sidekiq_jobs_enqueued = 128 | begin 129 | stats = {} 130 | 131 | Sidekiq::Stats.new.queues.each do |queue_name, queue_count| 132 | stats[{ queue: queue_name }] = queue_count 133 | end 134 | 135 | stats 136 | end 137 | 138 | @sidekiq_queue_latency_seconds = 139 | begin 140 | stats = {} 141 | Sidekiq::Queue.all.each { |queue| stats[{ queue: queue.name }] = queue.latency } 142 | stats 143 | end 144 | 145 | hostname = ::PrometheusExporter.hostname 146 | 147 | @sidekiq_processes = 0 148 | @sidekiq_workers = 149 | Sidekiq::ProcessSet 150 | .new(false) 151 | .sum do |process| 152 | if process["hostname"] == hostname 153 | @sidekiq_processes += 1 154 | process["concurrency"] 155 | else 156 | 0 157 | end 158 | end 159 | 160 | @sidekiq_jobs_stuck = find_stuck_sidekiq_jobs 161 | @scheduled_jobs_stuck = find_stuck_scheduled_jobs 162 | 163 | @sidekiq_paused = sidekiq_paused_states 164 | 165 | @missing_s3_uploads = missing_uploads("s3") 166 | 167 | @readonly_sites = collect_readonly_sites 168 | 169 | @postgres_highest_sequence = calc_postgres_highest_sequence 170 | 171 | @tmp_dir_available_bytes = collect_dir_stats("/tmp") 172 | end 173 | 174 | # For testing purposes 175 | def reset! 176 | @@missing_uploads = nil 177 | end 178 | 179 | private 180 | 181 | def collect_readonly_sites 182 | dbs = RailsMultisite::ConnectionManagement.all_dbs 183 | result = {} 184 | 185 | Discourse::READONLY_KEYS.each do |key| 186 | redis_keys = dbs.map { |db| "#{db}:#{key}" } 187 | count = Discourse.redis.without_namespace.exists(*redis_keys) 188 | result[{ key: key }] = count 189 | end 190 | 191 | result 192 | end 193 | 194 | def collect_dir_stats(dir) 195 | stdout, status = Open3.capture2("df", "-B1", dir) 196 | return nil if !status.success? 197 | 198 | begin 199 | dirstat = stdout.lines[-1].split() 200 | dirstat[3].to_i 201 | rescue Exception => e 202 | Discourse.warn_exception( 203 | e, 204 | message: "Failed to read disk usage for tmp_dir_available_bytes metric", 205 | ) 206 | end 207 | end 208 | 209 | def primary_site_readonly? 210 | return 1 if !defined?(Discourse::PG_READONLY_MODE_KEY) 211 | Discourse.redis.without_namespace.get("default:#{Discourse::PG_READONLY_MODE_KEY}") ? 1 : 0 212 | rescue StandardError 213 | 0 214 | end 215 | 216 | def test_postgres(primary: true) 217 | config = ActiveRecord::Base.connection_db_config.configuration_hash 218 | 219 | unless primary 220 | if config[:replica_host] 221 | config = config.dup.merge(host: config[:replica_host], port: config[:replica_port]) 222 | else 223 | return nil 224 | end 225 | end 226 | 227 | connection = ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.new(config) 228 | connection.tap(&:verify!).active? ? 1 : 0 229 | rescue StandardError 230 | 0 231 | ensure 232 | connection&.disconnect! 233 | end 234 | 235 | def test_redis(role, **config) 236 | test_connection = Redis.new(**config) 237 | if test_connection.ping == "PONG" 238 | 1 239 | else 240 | 0 241 | end 242 | rescue StandardError 243 | 0 244 | ensure 245 | test_connection&.close 246 | end 247 | 248 | def sidekiq_paused_states 249 | paused = {} 250 | 251 | begin 252 | RailsMultisite::ConnectionManagement.each_connection do |db| 253 | paused[{ db: db }] = Sidekiq.paused? ? 1 : nil 254 | end 255 | rescue => e 256 | Discourse.warn_exception(e, message: "Failed to connect to redis to collect paused status") 257 | end 258 | 259 | paused 260 | end 261 | 262 | MISSING_UPLOADS_CHECK_SECONDS = 60 263 | 264 | def missing_uploads(type) 265 | @@missing_uploads ||= {} 266 | @@missing_uploads[type] ||= {} 267 | @@missing_uploads[type][:stats] ||= {} 268 | last_check = @@missing_uploads[type][:last_check] 269 | 270 | if Discourse.respond_to?(:stats) && 271 | (!last_check || (Time.now.to_i - last_check > MISSING_UPLOADS_CHECK_SECONDS)) 272 | begin 273 | RailsMultisite::ConnectionManagement.each_connection do |db| 274 | if SiteSetting.respond_to?(:s3_inventory_bucket) 275 | next if SiteSetting.s3_inventory_bucket.blank? 276 | end 277 | 278 | # For backward compatibility 279 | if SiteSetting.respond_to?(:enable_s3_inventory) 280 | next if !SiteSetting.enable_s3_inventory 281 | end 282 | 283 | @@missing_uploads[type][:stats][{ db: db }] = Discourse.stats.get( 284 | "missing_#{type}_uploads", 285 | ) 286 | end 287 | 288 | @@missing_uploads[type][:last_check] = Time.now.to_i 289 | rescue => e 290 | if @postgres_master_available == 1 291 | Discourse.warn_exception( 292 | e, 293 | message: "Failed to connect to database to collect upload stats", 294 | ) 295 | else 296 | # TODO: Be smarter and connect to the replica. For now, just disable 297 | # the noise when we failover. 298 | end 299 | end 300 | end 301 | 302 | @@missing_uploads[type][:stats] 303 | end 304 | 305 | def find_stuck_scheduled_jobs 306 | hostname = ::PrometheusExporter.hostname 307 | stats = {} 308 | MiniScheduler::Manager.discover_schedules.each do |klass| 309 | info_key = 310 | ( 311 | if klass.is_per_host 312 | MiniScheduler::Manager.schedule_key(klass, hostname) 313 | else 314 | MiniScheduler::Manager.schedule_key(klass) 315 | end 316 | ) 317 | schedule_info = Discourse.redis.get(info_key) 318 | schedule_info = 319 | begin 320 | JSON.parse(schedule_info, symbolize_names: true) 321 | rescue StandardError 322 | nil 323 | end 324 | 325 | next if !schedule_info 326 | next if !schedule_info[:prev_result] == "RUNNING" # Only look at jobs which are running 327 | next if !schedule_info[:current_owner]&.start_with?("_scheduler_#{hostname}") # Only look at jobs on this host 328 | 329 | job_frequency = klass.every || 1.day 330 | started_at = Time.at(schedule_info[:prev_run]) 331 | 332 | allowed_duration = 333 | begin 334 | if job_frequency < 30.minutes 335 | 30.minutes 336 | elsif job_frequency < 2.hours 337 | job_frequency 338 | else 339 | 2.hours 340 | end 341 | end 342 | 343 | next if started_at >= (Time.now - allowed_duration) 344 | 345 | labels = { job_name: klass.name } 346 | stats[labels] ||= 0 347 | stats[labels] += 1 348 | end 349 | stats 350 | rescue => e 351 | Discourse.warn_exception( 352 | e, 353 | message: "Failed to connect to redis to collect scheduled job status", 354 | ) 355 | end 356 | 357 | def find_stuck_sidekiq_jobs 358 | hostname = ::PrometheusExporter.hostname 359 | stats = {} 360 | Sidekiq::Workers.new.each do |queue, tid, work| 361 | next unless queue.start_with?(hostname) 362 | next if Time.at(work.run_at) >= (Time.now - 60 * STUCK_SIDEKIQ_JOB_MINUTES) 363 | labels = { job_name: work.job["class"] } 364 | stats[labels] ||= 0 365 | stats[labels] += 1 366 | end 367 | stats 368 | end 369 | 370 | def calc_postgres_highest_sequence 371 | result = {} 372 | 373 | RailsMultisite::ConnectionManagement.each_connection do |db| 374 | result[{ db: db }] = Discourse.stats.get("postgres_highest_sequence") 375 | end 376 | 377 | result 378 | end 379 | end 380 | end 381 | -------------------------------------------------------------------------------- /spec/lib/collector_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "prometheus_exporter/server" 4 | require_relative "../../lib/collector" 5 | 6 | RSpec.describe DiscoursePrometheus::Collector do 7 | subject(:collector) { described_class.new } 8 | 9 | it "processes custom metrics" do 10 | collector.process(<<~METRIC) 11 | { 12 | "_type": "Custom", 13 | "name": "counter", 14 | "description": "some description", 15 | "value": 2, 16 | "type": "Counter" 17 | } 18 | METRIC 19 | 20 | collector.process(<<~METRIC) 21 | { 22 | "_type": "Custom", 23 | "name": "counter", 24 | "description": "some description", 25 | "type": "Counter" 26 | } 27 | METRIC 28 | 29 | collector.process(<<~METRIC) 30 | { 31 | "_type": "Custom", 32 | "name": "gauge", 33 | "labels": { "test": "super" }, 34 | "description": "some description", 35 | "value": 122.1, 36 | "type": "Gauge" 37 | } 38 | METRIC 39 | 40 | metrics = collector.prometheus_metrics 41 | 42 | counter = metrics.find { |m| m.name == "counter" } 43 | gauge = metrics.find { |m| m.name == "gauge" } 44 | 45 | expect(gauge.data).to eq({ "test" => "super" } => 122.1) 46 | expect(counter.data).to eq(nil => 3) 47 | end 48 | 49 | it "processes custom summary and histogram metrics" do 50 | collector.process(<<~METRIC) 51 | { 52 | "_type": "Custom", 53 | "name": "summary_metric", 54 | "description": "summary description", 55 | "labels": { "feature": "foo" }, 56 | "value": 1.5, 57 | "type": "Summary" 58 | } 59 | METRIC 60 | 61 | collector.process(<<~METRIC) 62 | { 63 | "_type": "Custom", 64 | "name": "summary_metric", 65 | "description": "summary description", 66 | "labels": { "feature": "foo" }, 67 | "value": 2.5, 68 | "type": "Summary" 69 | } 70 | METRIC 71 | 72 | collector.process(<<~METRIC) 73 | { 74 | "_type": "Custom", 75 | "name": "histogram_metric", 76 | "description": "histogram description", 77 | "labels": { "feature": "foo" }, 78 | "value": 0.5, 79 | "type": "Histogram" 80 | } 81 | METRIC 82 | 83 | collector.process(<<~METRIC) 84 | { 85 | "_type": "Custom", 86 | "name": "histogram_metric", 87 | "description": "histogram description", 88 | "labels": { "feature": "foo" }, 89 | "value": 1.5, 90 | "type": "Histogram" 91 | } 92 | METRIC 93 | 94 | metrics = collector.prometheus_metrics 95 | 96 | summary = metrics.find { |m| m.name == "summary_metric" } 97 | histogram = metrics.find { |m| m.name == "histogram_metric" } 98 | 99 | expect(summary.type).to eq("summary") 100 | expect(summary.to_h).to eq({ { "feature" => "foo" } => { "count" => 2, "sum" => 4.0 } }) 101 | 102 | expect(histogram.type).to eq("histogram") 103 | expect(histogram.to_h).to eq({ { "feature" => "foo" } => { "count" => 2, "sum" => 2.0 } }) 104 | end 105 | 106 | it "logs failures when metric processing raises" do 107 | expect do 108 | expect do collector.process(<<~METRIC) end.to output( 109 | { 110 | "_type": "Custom", 111 | "name": "mystery_metric", 112 | "description": "broken metric", 113 | "type": "Mystery" 114 | } 115 | METRIC 116 | /Prometheus collector failed to process metric mystery_metric/, 117 | ).to_stderr 118 | end.to raise_error( 119 | DiscoursePrometheus::Collector::UnknownMetricTypeError, 120 | "Unknown metric type Mystery", 121 | ) 122 | end 123 | 124 | it "handles sidekiq job metrics" do 125 | metric_1 = DiscoursePrometheus::InternalMetric::Job.new 126 | metric_1.scheduled = false 127 | metric_1.job_name = "Bob" 128 | metric_1.duration = 1.778 129 | metric_1.count = 1 130 | metric_1.success = true 131 | 132 | collector.process(metric_1.to_json) 133 | metrics = collector.prometheus_metrics 134 | 135 | metric_2 = DiscoursePrometheus::InternalMetric::Job.new 136 | metric_2.scheduled = false 137 | metric_2.job_name = "Bob" 138 | metric_2.duration = 0.5 139 | metric_2.count = 1 140 | metric_2.success = false 141 | collector.process(metric_2.to_json) 142 | 143 | metric_3 = DiscoursePrometheus::InternalMetric::Job.new 144 | metric_3.scheduled = false 145 | metric_3.job_name = "Bob" 146 | metric_3.duration = 1.5 147 | metric_3.count = 1 148 | metric_3.success = false 149 | collector.process(metric_3.to_json) 150 | 151 | duration = metrics.find { |m| m.name == "sidekiq_job_duration_seconds" } 152 | sidekiq_job_count = metrics.find { |m| m.name == "sidekiq_job_count" } 153 | 154 | expect(duration.data).to eq( 155 | { job_name: "Bob", success: true } => metric_1.duration, 156 | { job_name: "Bob", success: false } => metric_2.duration + metric_3.duration, 157 | ) 158 | 159 | expect(sidekiq_job_count.data).to eq( 160 | { job_name: "Bob", success: false } => 2, 161 | { job_name: "Bob", success: true } => 1, 162 | ) 163 | end 164 | 165 | it "handles scheduled job metrics" do 166 | metric_1 = DiscoursePrometheus::InternalMetric::Job.new 167 | metric_1.scheduled = true 168 | metric_1.job_name = "Bob" 169 | metric_1.duration = 1.778 170 | metric_1.success = true 171 | metric_1.count = 1 172 | collector.process(metric_1.to_json) 173 | 174 | metric_2 = DiscoursePrometheus::InternalMetric::Job.new 175 | metric_2.scheduled = true 176 | metric_2.job_name = "Bob" 177 | metric_2.duration = 1.123123 178 | metric_2.success = false 179 | metric_2.count = 1 180 | collector.process(metric_2.to_json) 181 | 182 | metrics = collector.prometheus_metrics 183 | 184 | duration = metrics.find { |m| m.name == "scheduled_job_duration_seconds" } 185 | count = metrics.find { |m| m.name == "scheduled_job_count" } 186 | 187 | expect(duration.data).to eq( 188 | { job_name: "Bob", success: true } => metric_1.duration, 189 | { job_name: "Bob", success: false } => metric_2.duration, 190 | ) 191 | 192 | expect(count.data).to eq( 193 | { job_name: "Bob", success: true } => 1, 194 | { job_name: "Bob", success: false } => 1, 195 | ) 196 | end 197 | 198 | it "handles job initialization metrics" do 199 | metric = DiscoursePrometheus::InternalMetric::Job.new 200 | 201 | metric.scheduled = true 202 | metric.job_name = "Bob" 203 | metric.count = 0 204 | metric.duration = 0 205 | metric.success = true 206 | 207 | collector.process(metric.to_json) 208 | metrics = collector.prometheus_metrics 209 | 210 | duration = metrics.find { |m| m.name == "scheduled_job_duration_seconds" } 211 | count = metrics.find { |m| m.name == "scheduled_job_count" } 212 | 213 | expect(duration.data).to eq({ job_name: "Bob", success: true } => 0) 214 | expect(count.data).to eq({ job_name: "Bob", success: true } => 0) 215 | end 216 | 217 | it "handles process metrics" do 218 | skip("skipped because /proc does not exist on macOS") if RbConfig::CONFIG["arch"] =~ /darwin/ 219 | 220 | reporter = DiscoursePrometheus::Reporter::Process.new(:web) 221 | collector.process(reporter.collect.to_json) 222 | 223 | metrics = collector.prometheus_metrics 224 | rss = metrics.find { |m| m.name == "rss" } 225 | 226 | expect(rss.data[type: "web", pid: Process.pid]).to be > 0 227 | 228 | ar = metrics.find { |metric| metric.name == "active_record_connections_count" } 229 | 230 | expect(ar.data[type: "web", pid: Process.pid, status: "busy"]).to be > 0 231 | end 232 | 233 | describe "job_exception_stats" do 234 | before { Discourse.reset_job_exception_stats! } 235 | after { Discourse.reset_job_exception_stats! } 236 | 237 | it "collects job_exception_stats" do 238 | # see MiniScheduler Manager which reports it like this 239 | # https://github.com/discourse/mini_scheduler/blob/2b2c1c56b6e76f51108c2a305775469e24cf2b65/lib/mini_scheduler/manager.rb#L95 240 | exception_context = { 241 | message: "Running a scheduled job", 242 | job: { 243 | "class" => Jobs::ReindexSearch, 244 | }, 245 | } 246 | 247 | 2.times do 248 | expect { 249 | Discourse.handle_job_exception(StandardError.new, exception_context) 250 | }.to raise_error(StandardError) 251 | end 252 | 253 | metric = DiscoursePrometheus::Reporter::Process.new(:web).collect 254 | 255 | collector.process(metric.to_json) 256 | 257 | metric = collector.prometheus_metrics.find { |m| m.name == "job_failures" } 258 | 259 | expect(metric.data).to eq( 260 | { 261 | { 262 | "family" => "scheduled", 263 | :type => "web", 264 | :pid => Process.pid, 265 | "job" => "Jobs::ReindexSearch", 266 | } => 267 | 2, 268 | }, 269 | ) 270 | end 271 | end 272 | 273 | it "expires old metrics" do 274 | old_metric = DiscoursePrometheus::InternalMetric::Process.new 275 | old_metric.pid = 100 276 | old_metric.rss = 100 277 | old_metric.major_gc_count = old_metric.minor_gc_count = old_metric.total_allocated_objects = 0 278 | 279 | collector.process(old_metric.to_json) 280 | 281 | # travel forward in time 282 | now = Process.clock_gettime(Process::CLOCK_MONOTONIC) 283 | later = now + 61 284 | Process.stubs(:clock_gettime).returns(later) 285 | 286 | new_metric = DiscoursePrometheus::InternalMetric::Process.new 287 | new_metric.pid = 200 288 | new_metric.rss = 20 289 | new_metric.major_gc_count = new_metric.minor_gc_count = new_metric.total_allocated_objects = 0 290 | 291 | collector.process(new_metric.to_json) 292 | 293 | metrics = collector.prometheus_metrics 294 | rss = metrics.find { |m| m.name == "rss" } 295 | 296 | expect(rss.data[type: nil, pid: 200]).to be > 0 297 | expect(rss.data.length).to eq(1) 298 | end 299 | 300 | it "counts metrics correctly" do 301 | metrics = [] 302 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 303 | tracked: true, 304 | verb: "GET", 305 | status_code: 200, 306 | db: "bob", 307 | ) 308 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 309 | tracked: true, 310 | verb: "GET", 311 | status_code: 200, 312 | db: "bob", 313 | ) 314 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 315 | tracked: true, 316 | verb: "GET", 317 | logged_in: true, 318 | status_code: 200, 319 | db: "bill", 320 | ) 321 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 322 | tracked: true, 323 | verb: "GET", 324 | mobile: true, 325 | status_code: 200, 326 | db: "jake", 327 | ) 328 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 329 | tracked: false, 330 | verb: "GET", 331 | status_code: 200, 332 | db: "bob", 333 | user_api: true, 334 | ) 335 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 336 | tracked: false, 337 | verb: "GET", 338 | status_code: 300, 339 | db: "bob", 340 | admin_api: true, 341 | ) 342 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 343 | tracked: false, 344 | verb: "GET", 345 | background: true, 346 | status_code: 418, 347 | db: "bob", 348 | ) 349 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 350 | tracked: false, 351 | verb: "GET", 352 | background: true, 353 | status_code: 200, 354 | db: "bob", 355 | ) 356 | 357 | metrics.each { |metric| collector.process(metric.to_json) } 358 | 359 | exported = collector.prometheus_metrics 360 | 361 | page_views = exported.find { |m| m.name == "page_views" } 362 | 363 | expected = { 364 | { db: "bob", type: "anon", device: "desktop" } => 2, 365 | { db: "bill", type: "logged_in", device: "desktop" } => 1, 366 | { db: "jake", type: "anon", device: "mobile" } => 1, 367 | } 368 | 369 | expect(page_views.data).to eq(expected) 370 | 371 | http_requests = exported.find { |m| m.name == "http_requests" } 372 | expected = { 373 | { db: "bob", api: "web", verb: "GET", type: "regular", status: 200 } => 2, 374 | { db: "bill", api: "web", verb: "GET", type: "regular", status: 200 } => 1, 375 | { db: "jake", api: "web", verb: "GET", type: "regular", status: 200 } => 1, 376 | { db: "bob", api: "user", verb: "GET", type: "regular", status: 200 } => 1, 377 | { db: "bob", api: "admin", verb: "GET", type: "regular", status: 300 } => 1, 378 | { db: "bob", api: "web", verb: "GET", type: "background", status: "-1" } => 1, 379 | { db: "bob", api: "web", verb: "GET", type: "background", status: 200 } => 1, 380 | } 381 | expect(http_requests.data).to eq(expected) 382 | end 383 | 384 | it "processes timing attributes in web metrics correctly" do 385 | metrics = [] 386 | 387 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 388 | status_code: 200, 389 | duration: 14, 390 | sql_duration: 1, 391 | redis_duration: 2, 392 | net_duration: 3, 393 | gc_duration: 4, 394 | gc_major_count: 5, 395 | gc_minor_count: 6, 396 | queue_duration: 7, 397 | json: true, 398 | controller: "list", 399 | action: "latest", 400 | logged_in: true, 401 | ) 402 | 403 | metrics << DiscoursePrometheus::InternalMetric::Web.get( 404 | status_code: 302, 405 | duration: 14, 406 | sql_duration: 1, 407 | redis_duration: 2, 408 | net_duration: 3, 409 | gc_duration: 4, 410 | gc_major_count: 5, 411 | gc_minor_count: 6, 412 | queue_duration: 7, 413 | controller: "list", 414 | action: "latest", 415 | logged_in: false, 416 | html: true, 417 | ) 418 | 419 | metrics.each { |metric| collector.process(metric.to_json) } 420 | 421 | exported = collector.prometheus_metrics 422 | 423 | assert_metric = ->(metric_name, sum, metric_type) do 424 | metric = exported.find { |m| m.name == metric_name } 425 | 426 | expect(metric.type).to eq(metric_type) 427 | 428 | expect(metric.to_h).to eq( 429 | { 430 | controller: "list", 431 | action: "latest", 432 | success: true, 433 | logged_in: true, 434 | content_type: "json", 435 | } => { 436 | "count" => 1, 437 | "sum" => sum, 438 | }, 439 | { 440 | controller: "list", 441 | action: "latest", 442 | success: false, 443 | logged_in: false, 444 | content_type: "html", 445 | } => { 446 | "count" => 1, 447 | "sum" => sum, 448 | }, 449 | ) 450 | end 451 | 452 | [ 453 | ["http_duration_seconds", 14.0, "summary"], 454 | ["http_application_duration_seconds", 4.0, "summary"], 455 | ["http_sql_duration_seconds", 1.0, "summary"], 456 | ["http_redis_duration_seconds", 2.0, "summary"], 457 | ["http_net_duration_seconds", 3.0, "summary"], 458 | ["http_gc_duration_seconds", 4.0, "summary"], 459 | ["http_requests_duration_seconds", 14.0, "histogram"], 460 | ["http_requests_application_duration_seconds", 4.0, "histogram"], 461 | ["http_requests_sql_duration_seconds", 1.0, "histogram"], 462 | ["http_requests_redis_duration_seconds", 2.0, "histogram"], 463 | ["http_requests_net_duration_seconds", 3.0, "histogram"], 464 | ["http_requests_gc_duration_seconds", 4.0, "histogram"], 465 | ].each { |args| assert_metric.call(*args) } 466 | 467 | expect( 468 | exported.find { |metric| metric.name == "http_requests_queue_duration_seconds" }.to_h, 469 | ).to eq({} => { "count" => 2, "sum" => 14.0 }) 470 | 471 | expect(exported.find { |metric| metric.name == "http_gc_major_count" }.to_h).to eq( 472 | { 473 | controller: "list", 474 | action: "latest", 475 | success: true, 476 | logged_in: true, 477 | content_type: "json", 478 | } => 479 | 5, 480 | { 481 | controller: "list", 482 | action: "latest", 483 | success: false, 484 | logged_in: false, 485 | content_type: "html", 486 | } => 487 | 5, 488 | ) 489 | 490 | expect(exported.find { |metric| metric.name == "http_gc_minor_count" }.to_h).to eq( 491 | { 492 | controller: "list", 493 | action: "latest", 494 | success: true, 495 | logged_in: true, 496 | content_type: "json", 497 | } => 498 | 6, 499 | { 500 | controller: "list", 501 | action: "latest", 502 | success: false, 503 | logged_in: false, 504 | content_type: "html", 505 | } => 506 | 6, 507 | ) 508 | end 509 | end 510 | -------------------------------------------------------------------------------- /lib/collector.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module ::DiscoursePrometheus 4 | class Collector < ::PrometheusExporter::Server::CollectorBase 5 | MAX_PROCESS_METRIC_AGE = 60 6 | 7 | # convenience shortcuts 8 | Gauge = ::PrometheusExporter::Metric::Gauge 9 | Counter = ::PrometheusExporter::Metric::Counter 10 | Summary = ::PrometheusExporter::Metric::Summary 11 | Histogram = ::PrometheusExporter::Metric::Histogram 12 | 13 | class UnknownMetricTypeError < StandardError 14 | end 15 | 16 | def initialize 17 | @page_views = nil 18 | @http_requests = nil 19 | @http_duration_seconds = nil 20 | @http_application_duration_seconds = nil 21 | @http_redis_duration_seconds = nil 22 | @http_sql_duration_seconds = nil 23 | @http_net_duration_seconds = nil 24 | @http_queue_duration_seconds = nil 25 | @http_gc_duration_seconds = nil 26 | 27 | @http_requests_duration_seconds = nil 28 | @http_requests_application_duration_seconds = nil 29 | @http_requests_redis_duration_seconds = nil 30 | @http_requests_sql_duration_seconds = nil 31 | @http_requests_net_duration_seconds = nil 32 | @http_requests_queue_duration_seconds = nil 33 | @http_requests_gc_duration_seconds = nil 34 | 35 | @http_gc_major_count = nil 36 | @http_gc_minor_count = nil 37 | @http_forced_anon_count = nil 38 | 39 | @scheduled_job_duration_seconds = nil 40 | @scheduled_job_count = nil 41 | @sidekiq_job_duration_seconds = nil 42 | @sidekiq_job_count = nil 43 | 44 | @missing_s3_uploads = nil 45 | 46 | @process_metrics = [] 47 | @global_metrics = [] 48 | 49 | @custom_metrics = nil 50 | end 51 | 52 | def process(str) 53 | obj = nil 54 | metric = nil 55 | 56 | obj = Oj.load(str, mode: :object) 57 | metric = DiscoursePrometheus::InternalMetric::Base.from_h(obj) 58 | 59 | if InternalMetric::Process === metric 60 | process_process(metric) 61 | elsif InternalMetric::Web === metric 62 | process_web(metric) 63 | elsif InternalMetric::Job === metric 64 | process_job(metric) 65 | elsif InternalMetric::Global === metric 66 | process_global(metric) 67 | elsif InternalMetric::Custom === metric 68 | process_custom(metric) 69 | end 70 | rescue => e 71 | metric_name = 72 | if metric && metric.respond_to?(:name) 73 | metric.name 74 | elsif obj 75 | obj["name"] || obj[:name] 76 | end 77 | 78 | STDERR.puts( 79 | "#{Time.now}: Prometheus collector failed to process metric #{metric_name || "unknown"} " \ 80 | "(#{e.class}): #{e.message}", 81 | ) 82 | STDERR.puts(e.backtrace.join("\n")) if e.backtrace 83 | 84 | raise 85 | end 86 | 87 | def prometheus_metrics_text 88 | prometheus_metrics.map(&:to_prometheus_text).join("\n") 89 | end 90 | 91 | def process_custom(metric) 92 | obj = ensure_custom_metric(metric) 93 | value = metric.value 94 | 95 | case obj 96 | when Counter 97 | obj.observe(value || 1, metric.labels) 98 | when Gauge 99 | obj.observe(value, metric.labels) if !value.nil? 100 | when Summary, Histogram 101 | obj.observe(value, metric.labels || {}) if !value.nil? 102 | end 103 | end 104 | 105 | def ensure_custom_metric(metric) 106 | @custom_metrics ||= {} 107 | if !(obj = @custom_metrics[metric.name]) 108 | case metric.type.to_s.downcase 109 | when "counter" 110 | obj = Counter.new(metric.name, metric.description) 111 | when "gauge" 112 | obj = Gauge.new(metric.name, metric.description) 113 | when "summary" 114 | obj = Summary.new(metric.name, metric.description) 115 | when "histogram" 116 | obj = Histogram.new(metric.name, metric.description) 117 | else 118 | raise UnknownMetricTypeError, "Unknown metric type #{metric.type}" 119 | end 120 | @custom_metrics[metric.name] = obj 121 | end 122 | 123 | obj 124 | end 125 | 126 | def process_global(metric) 127 | ensure_global_metrics 128 | @global_metrics.each do |gauge| 129 | values = metric.send(gauge.name) 130 | # global metrics "reset" each time they are called 131 | # this will delete labels we don't need anymore 132 | gauge.reset! 133 | 134 | if values.is_a?(Hash) 135 | values.each { |labels, value| gauge.observe(value, labels) } 136 | else 137 | gauge.observe(values) 138 | end 139 | end 140 | end 141 | 142 | def ensure_global_metrics 143 | return if @global_metrics.length > 0 144 | 145 | global_metrics = [] 146 | 147 | global_metrics << Gauge.new( 148 | "postgres_readonly_mode", 149 | "Indicates whether site is in readonly mode due to PostgreSQL failover", 150 | ) 151 | 152 | global_metrics << Gauge.new( 153 | "redis_master_available", 154 | "DEPRECATED: see redis_primary_available", 155 | ) 156 | 157 | global_metrics << Gauge.new( 158 | "redis_primary_available", 159 | "Whether or not we have an active connection to the primary Redis", 160 | ) 161 | 162 | global_metrics << Gauge.new( 163 | "redis_slave_available", 164 | "DEPRECATED: see redis_replica_available", 165 | ) 166 | 167 | global_metrics << Gauge.new( 168 | "redis_replica_available", 169 | "Whether or not we have an active connection to the replica Redis", 170 | ) 171 | 172 | global_metrics << Gauge.new( 173 | "postgres_master_available", 174 | "DEPRECATED: See postgres_primary_available", 175 | ) 176 | 177 | global_metrics << Gauge.new( 178 | "postgres_primary_available", 179 | "Whether or not we have an active connection to the primary PostgreSQL", 180 | ) 181 | 182 | global_metrics << Gauge.new( 183 | "postgres_replica_available", 184 | "Whether or not we have an active connection to the replica PostgreSQL", 185 | ) 186 | 187 | global_metrics << Gauge.new("active_app_reqs", "Number of active web requests in progress") 188 | 189 | global_metrics << Gauge.new("queued_app_reqs", "Number of queued web requests") 190 | 191 | global_metrics << Gauge.new( 192 | "sidekiq_jobs_enqueued", 193 | "Number of jobs queued in the Sidekiq worker processes", 194 | ) 195 | 196 | global_metrics << Gauge.new("sidekiq_processes", "Number of Sidekiq job processes") 197 | 198 | global_metrics << Gauge.new("sidekiq_paused", "Whether or not Sidekiq is paused") 199 | 200 | global_metrics << Gauge.new("sidekiq_workers", "Total number of active sidekiq workers") 201 | 202 | global_metrics << Gauge.new( 203 | "sidekiq_queue_latency_seconds", 204 | "Latency in seconds for each Sidekiq queue", 205 | ) 206 | 207 | global_metrics << Gauge.new( 208 | "sidekiq_jobs_stuck", 209 | "Number of sidekiq jobs which have been running for more than #{InternalMetric::Global::STUCK_SIDEKIQ_JOB_MINUTES} minutes", 210 | ) 211 | 212 | global_metrics << Gauge.new( 213 | "scheduled_jobs_stuck", 214 | "Number of scheduled jobs which have been running for more than their expected duration", 215 | ) 216 | 217 | global_metrics << Gauge.new("missing_s3_uploads", "Number of missing uploads in S3") 218 | 219 | global_metrics << Gauge.new( 220 | "version_info", 221 | "Labelled with `revision` (current core commit hash), and `version` (Discourse::VERSION::STRING)", 222 | ) 223 | 224 | global_metrics << Gauge.new( 225 | "readonly_sites", 226 | "Count of sites currently in readonly mode, grouped by the relevant key from Discourse::READONLY_KEYS", 227 | ) 228 | 229 | global_metrics << Gauge.new( 230 | "postgres_highest_sequence", 231 | "The highest last_value from the pg_sequences table", 232 | ) 233 | 234 | global_metrics << Gauge.new( 235 | "tmp_dir_available_bytes", 236 | "Available space in /tmp directory (bytes)", 237 | ) 238 | 239 | @global_metrics = global_metrics 240 | end 241 | 242 | def process_job(metric) 243 | ensure_job_metrics 244 | hash = { job_name: metric.job_name, success: metric.success } 245 | 246 | if metric.scheduled 247 | @scheduled_job_duration_seconds.observe(metric.duration, hash) 248 | @scheduled_job_count.observe(metric.count, hash) 249 | else 250 | @sidekiq_job_duration_seconds.observe(metric.duration, hash) 251 | @sidekiq_job_count.observe(metric.count, hash) 252 | end 253 | end 254 | 255 | def ensure_job_metrics 256 | unless @scheduled_job_count 257 | @scheduled_job_duration_seconds = 258 | Counter.new("scheduled_job_duration_seconds", "Total time spent in scheduled jobs") 259 | 260 | @scheduled_job_count = 261 | Counter.new("scheduled_job_count", "Total number of scheduled jobs executed") 262 | 263 | @sidekiq_job_duration_seconds = 264 | Counter.new("sidekiq_job_duration_seconds", "Total time spent in sidekiq jobs") 265 | 266 | @sidekiq_job_count = 267 | Counter.new("sidekiq_job_count", "Total number of sidekiq jobs executed") 268 | end 269 | end 270 | 271 | def process_process(metric) 272 | now = Process.clock_gettime(Process::CLOCK_MONOTONIC) 273 | # process clock monotonic is used here so keep collector process time 274 | metric.created_at = now 275 | @process_metrics.delete_if do |current| 276 | metric.pid == current.pid || (current.created_at + MAX_PROCESS_METRIC_AGE < now) 277 | end 278 | @process_metrics << metric 279 | end 280 | 281 | HTTP_DURATION_HISTOGRAM_BUCKETS = [0.01, 0.05, 0.1, 0.2, 0.4, 0.8, 1, 15, 30] 282 | 283 | def ensure_web_metrics 284 | unless @page_views 285 | @page_views = Counter.new("page_views", "Page views reported by admin dashboard") 286 | @http_requests = Counter.new("http_requests", "Total HTTP requests from web app") 287 | @http_forced_anon_count = 288 | Counter.new( 289 | "http_forced_anon_count", 290 | "Total count of logged in requests forced into anonymous mode", 291 | ) 292 | 293 | @http_requests_duration_seconds = 294 | Histogram.new( 295 | "http_requests_duration_seconds", 296 | "Time spent in HTTP reqs in seconds", 297 | buckets: HTTP_DURATION_HISTOGRAM_BUCKETS, 298 | ) 299 | 300 | @http_requests_application_duration_seconds = 301 | Histogram.new( 302 | "http_requests_application_duration_seconds", 303 | "Time spent in application code within HTTP reqs in seconds", 304 | buckets: HTTP_DURATION_HISTOGRAM_BUCKETS, 305 | ) 306 | 307 | @http_requests_redis_duration_seconds = 308 | Histogram.new( 309 | "http_requests_redis_duration_seconds", 310 | "Time spent in Redis within HTTP reqs redis seconds", 311 | buckets: HTTP_DURATION_HISTOGRAM_BUCKETS, 312 | ) 313 | 314 | @http_requests_sql_duration_seconds = 315 | Histogram.new( 316 | "http_requests_sql_duration_seconds", 317 | "Time spent in SQL within HTTP reqs in seconds", 318 | buckets: HTTP_DURATION_HISTOGRAM_BUCKETS, 319 | ) 320 | @http_requests_net_duration_seconds = 321 | Histogram.new( 322 | "http_requests_net_duration_seconds", 323 | "Time spent in external network requests within HTTP reqs in seconds", 324 | buckets: HTTP_DURATION_HISTOGRAM_BUCKETS, 325 | ) 326 | 327 | @http_requests_queue_duration_seconds = 328 | Histogram.new( 329 | "http_requests_queue_duration_seconds", 330 | "Time spent queueing requests between NGINX and Ruby in seconds", 331 | buckets: [0, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], 332 | ) 333 | 334 | @http_requests_gc_duration_seconds = 335 | Histogram.new( 336 | "http_requests_gc_duration_seconds", 337 | "Time spent in garbage collection within HTTP reqs in seconds", 338 | buckets: HTTP_DURATION_HISTOGRAM_BUCKETS, 339 | ) 340 | 341 | @http_duration_seconds = 342 | Summary.new("http_duration_seconds", "Time spent in HTTP reqs in seconds") 343 | 344 | @http_application_duration_seconds = 345 | Summary.new( 346 | "http_application_duration_seconds", 347 | "Time spent in application code within HTTP reqs in seconds", 348 | ) 349 | 350 | @http_redis_duration_seconds = 351 | Summary.new( 352 | "http_redis_duration_seconds", 353 | "Time spent in Redis within HTTP reqs redis seconds", 354 | ) 355 | 356 | @http_sql_duration_seconds = 357 | Summary.new("http_sql_duration_seconds", "Time spent in SQL within HTTP reqs in seconds") 358 | 359 | @http_net_duration_seconds = 360 | Summary.new( 361 | "http_net_duration_seconds", 362 | "Time spent in external network requests within HTTP reqs in seconds", 363 | ) 364 | 365 | @http_queue_duration_seconds = 366 | Summary.new( 367 | "http_queue_duration_seconds", 368 | "Time spent queueing requests between NGINX and Ruby in seconds", 369 | ) 370 | 371 | @http_gc_duration_seconds = 372 | Summary.new( 373 | "http_gc_duration_seconds", 374 | "Time spent in garbage collection within HTTP reqs in seconds", 375 | ) 376 | 377 | @http_gc_major_count = 378 | Gauge.new("http_gc_major_count", "Number of major GC runs per request") 379 | 380 | @http_gc_minor_count = 381 | Gauge.new("http_gc_minor_count", "Number of minor GC runs per request") 382 | 383 | @http_sql_calls_per_request = 384 | Gauge.new("http_sql_calls_per_request", "How many SQL statements ran per request") 385 | 386 | @http_anon_cache_store = 387 | Counter.new( 388 | "http_anon_cache_store", 389 | "How many a payload is stored in redis for anonymous cache", 390 | ) 391 | 392 | @http_anon_cache_hit = 393 | Counter.new( 394 | "http_anon_cache_hit", 395 | "How many a payload from redis is used for anonymous cache", 396 | ) 397 | end 398 | end 399 | 400 | def process_web(metric) 401 | ensure_web_metrics 402 | 403 | labels = { 404 | success: (200..299).include?(metric.status_code), 405 | content_type: web_metric_content_type(metric), 406 | logged_in: metric.logged_in, 407 | } 408 | 409 | if observe_timings?(metric) 410 | labels[:controller] = metric.controller 411 | labels[:action] = metric.action 412 | else 413 | labels[:controller] = "other" 414 | labels[:action] = "other" 415 | end 416 | 417 | duration = metric.duration 418 | 419 | @http_duration_seconds.observe(duration, labels) 420 | @http_requests_duration_seconds.observe(duration, labels) 421 | 422 | if duration 423 | application_duration = duration.dup 424 | application_duration -= metric.sql_duration if metric.sql_duration 425 | application_duration -= metric.redis_duration if metric.redis_duration 426 | application_duration -= metric.net_duration if metric.net_duration 427 | application_duration -= metric.gc_duration if metric.gc_duration 428 | @http_application_duration_seconds.observe(application_duration, labels) 429 | @http_requests_application_duration_seconds.observe(application_duration, labels) 430 | end 431 | 432 | @http_sql_duration_seconds.observe(metric.sql_duration, labels) 433 | @http_requests_sql_duration_seconds.observe(metric.sql_duration, labels) 434 | 435 | @http_redis_duration_seconds.observe(metric.redis_duration, labels) 436 | @http_requests_redis_duration_seconds.observe(metric.redis_duration, labels) 437 | 438 | @http_net_duration_seconds.observe(metric.net_duration, labels) 439 | @http_requests_net_duration_seconds.observe(metric.net_duration, labels) 440 | 441 | @http_queue_duration_seconds.observe(metric.queue_duration) 442 | @http_requests_queue_duration_seconds.observe(metric.queue_duration) 443 | 444 | @http_sql_calls_per_request.observe(metric.sql_calls, labels) 445 | 446 | if metric.gc_duration 447 | @http_gc_duration_seconds.observe(metric.gc_duration, labels) 448 | @http_requests_gc_duration_seconds.observe(metric.gc_duration, labels) 449 | end 450 | 451 | @http_gc_major_count.observe(metric.gc_major_count, labels) if metric.gc_major_count 452 | @http_gc_minor_count.observe(metric.gc_minor_count, labels) if metric.gc_minor_count 453 | 454 | if cache = metric.cache 455 | if cache == "store" 456 | @http_anon_cache_store.observe(1, labels) 457 | elsif cache == "true" 458 | @http_anon_cache_hit.observe(1, labels) 459 | end 460 | end 461 | 462 | db = metric.db || "default" 463 | 464 | if metric.tracked 465 | hash = { db: db } 466 | 467 | if metric.crawler 468 | hash[:type] = "crawler" 469 | hash[:device] = "crawler" 470 | else 471 | hash[:type] = metric.logged_in ? "logged_in" : "anon" 472 | hash[:device] = metric.mobile ? "mobile" : "desktop" 473 | end 474 | @page_views.observe(1, hash) 475 | end 476 | 477 | api_type = 478 | if metric.user_api 479 | "user" 480 | elsif metric.admin_api 481 | "admin" 482 | else 483 | "web" 484 | end 485 | 486 | hash = { db: db, api: api_type, verb: metric.verb } 487 | if metric.background 488 | hash[:type] = "background" 489 | hash[:background_type] = metric.background_type if metric.background_type 490 | # hijacked but never got the actual status, message bus 491 | if metric.status_code == 418 492 | hash[:status] = "-1" 493 | else 494 | hash[:status] = metric.status_code 495 | end 496 | else 497 | hash[:type] = "regular" 498 | hash[:status] = metric.status_code 499 | end 500 | 501 | @http_forced_anon_count.observe(1, hash) if metric.forced_anon 502 | @http_requests.observe(1, hash) 503 | end 504 | 505 | def prometheus_metrics 506 | metrics = web_metrics + process_metrics + job_metrics + @global_metrics 507 | metrics += @custom_metrics.values if @custom_metrics 508 | metrics 509 | end 510 | 511 | private 512 | 513 | def job_metrics 514 | if @scheduled_job_duration_seconds 515 | [ 516 | @scheduled_job_duration_seconds, 517 | @scheduled_job_count, 518 | @sidekiq_job_duration_seconds, 519 | @sidekiq_job_count, 520 | ] 521 | else 522 | [] 523 | end 524 | end 525 | 526 | def report_metric(instrument, metric, key) 527 | values = metric.send(key) 528 | return if values.nil? || values == {} 529 | default_labels = { type: metric.type, pid: metric.pid } 530 | 531 | if values.is_a?(Hash) 532 | values.each { |labels, value| instrument.observe(value, default_labels.merge(labels)) } 533 | else 534 | instrument.observe(values, default_labels) 535 | end 536 | end 537 | 538 | def process_metrics 539 | # this are only calculated when we ask for them on the fly 540 | return [] if @process_metrics.length == 0 541 | metrics = [] 542 | InternalMetric::Process::GAUGES.each do |key, name| 543 | gauge = Gauge.new(key.to_s, name) 544 | metrics << gauge 545 | @process_metrics.each { |metric| report_metric(gauge, metric, key) } 546 | end 547 | InternalMetric::Process::COUNTERS.each do |key, name| 548 | counter = Counter.new(key.to_s, name) 549 | metrics << counter 550 | @process_metrics.each { |metric| report_metric(counter, metric, key) } 551 | end 552 | metrics 553 | end 554 | 555 | def web_metrics 556 | if @page_views 557 | [ 558 | @page_views, 559 | @http_requests, 560 | @http_requests_duration_seconds, 561 | @http_requests_application_duration_seconds, 562 | @http_requests_redis_duration_seconds, 563 | @http_requests_sql_duration_seconds, 564 | @http_requests_net_duration_seconds, 565 | @http_requests_queue_duration_seconds, 566 | @http_requests_gc_duration_seconds, 567 | @http_duration_seconds, 568 | @http_application_duration_seconds, 569 | @http_redis_duration_seconds, 570 | @http_sql_duration_seconds, 571 | @http_net_duration_seconds, 572 | @http_queue_duration_seconds, 573 | @http_gc_duration_seconds, 574 | @http_gc_major_count, 575 | @http_gc_minor_count, 576 | @http_forced_anon_count, 577 | @http_sql_calls_per_request, 578 | @http_anon_cache_store, 579 | @http_anon_cache_hit, 580 | ] 581 | else 582 | [] 583 | end 584 | end 585 | 586 | def observe_timings?(metric) 587 | (metric.controller == "list" && metric.action == "latest") || 588 | (metric.controller == "list" && metric.action == "top") || 589 | (metric.controller == "topics" && metric.action == "show") || 590 | (metric.controller == "users" && metric.action == "show") || 591 | (metric.controller == "categories" && metric.action == "categories_and_latest") 592 | end 593 | 594 | def web_metric_content_type(metric) 595 | if metric.json 596 | "json" 597 | elsif metric.html 598 | "html" 599 | else 600 | "other" 601 | end 602 | end 603 | end 604 | end 605 | --------------------------------------------------------------------------------