├── .gitignore ├── .travis.yml ├── DEBIAN ├── conffiles ├── control └── md5sums ├── LICENSE ├── README.md ├── display.go ├── docs ├── ordered-by-diskspace.png ├── ordered-by-read-latency.png └── ordered-by-write-rate.png ├── handlers.go ├── main.go ├── metricscollectormx4j.go ├── sorting.go └── types ├── cassandra.go ├── messages.go └── xml.go /.gitignore: -------------------------------------------------------------------------------- 1 | ctop 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.4 5 | - tip -------------------------------------------------------------------------------- /DEBIAN/conffiles: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrusty/ctop/8c6719d7470131489a5bb134a28d9755d5aede3d/DEBIAN/conffiles -------------------------------------------------------------------------------- /DEBIAN/control: -------------------------------------------------------------------------------- 1 | Package: ctop 2 | Version: 1.5 3 | Architecture: amd64 4 | Essential: no 5 | Section: Applications 6 | Priority: optional 7 | Depends: libmx4j-java 8 | Maintainer: devops@hailocab.com 9 | Installed-Size: 1000 10 | Description: C-TOP (Top for Cassandra) 11 | -------------------------------------------------------------------------------- /DEBIAN/md5sums: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrusty/ctop/8c6719d7470131489a5bb134a28d9755d5aede3d/DEBIAN/md5sums -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Hailo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CTOP ("Top for Cassandra") 2 | 3 | [![Build Status](https://img.shields.io/travis/hailocab/ctop/master.svg "Build Status")](https://travis-ci.org/hailocab/ctop) 4 | 5 | CTOP is a tool which allows you to quickly find out what's happening on a machine running Cassandra. It is particularly useful on a cluster with multiple-tenants, multiple-applications, and large numbers of tables. If you suspect that the performance is not good, then you can use this to figure out which table is giving you trouble. 6 | 7 | ## Screenshots: 8 | ![Ordered by read-latency](docs/ordered-by-read-latency.png "Ordered by read-latency") 9 | ![Ordered by disk-space](docs/ordered-by-diskspace.png "Ordered by disk-space") 10 | ![Ordered by write-rate](docs/ordered-by-write-rate.png "Ordered by write-rate") 11 | 12 | ## Installation (deb): 13 | * Download a [DEB file](https://github.com/hailocab/ctop/releases/download/1.3/ctop_1.3_amd64.deb "CTOP 1.3") from the [releases](https://github.com/hailocab/ctop/releases "releases") section 14 | * Install mx4j "apt-get install libmx4j-java" 15 | * Install CTOP "dpkg -i ctop_1.3_amd64.deb" 16 | * Find out what's killing your Cassandra cluster 17 | 18 | ## Installation (binary): 19 | * Download a binary (or compile one yourself) 20 | * Install mx4j jars in the java/cassandra classpath ([download from here](http://sourceforge.net/projects/mx4j/files/MX4J%20Binary/)) 21 | * Re-start cassandra 22 | * Run CTOP 23 | 24 | ## How to use it: 25 | * Run the binary 26 | * Allow CTOP some time to collect metrics, then press SPACE to refresh the display. You will see your tables listed by Reads/s (in descending order). 27 | * You can press SPACE at any time to refresh the display 28 | * The numbers 1 through 5 change the sorting order: 29 | 1: Order by Reads/s 30 | 2: Order by Writes/s 31 | 3: Order by the amount of disk-space used (in Bytes) 32 | 4: Order by read-latency (in miliseconds) 33 | 5: Order by write-latency (in miliseconds) 34 | * Pressing "Q" will quit 35 | * Metrics are for one node only (not cluster-wide) 36 | 37 | ## Notes 38 | * CTOP should run on anything. Originally was developed on Ubuntu, and 39 | it's reported to run on CentOS and MacOS X. 40 | 41 | ## Pre-requisites: 42 | * CTOP uses "libmx4j-java" (an HTTP -> JMX) to retrieve JMX metrics from Cassandra (there was no easy way to query JMX directly), so this needs to be in the class-path where Cassandra can find it upon startup (with Ubuntu it is enough to do "apt-get install libmx4j-java", then to re-start Cassandra). 43 | * Cassandra needs to be told how to run MX4J (the default is to listen to the interface that Cassandra listens on): 44 | ``` 45 | MX4J_ADDRESS="-Dmx4jaddress=127.0.0.1" 46 | MX4J_PORT="-Dmx4jport=8081" 47 | JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" 48 | JVM_OPTS="$JVM_OPTS $MX4J_PORT" 49 | ``` 50 | 51 | -------------------------------------------------------------------------------- /display.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/nsf/termbox-go" 6 | "time" 7 | ) 8 | 9 | // Reads log-messages out of the logMessage chan and displays them to screen: 10 | func showLogs() { 11 | termbox.Clear(termbox.ColorDefault, termbox.ColorDefault) 12 | 13 | printfTb(2, 1, messageForeGroundColour, termbox.ColorBlack, " |") 14 | printfTb(2, 1, messageForeGroundColour|termbox.AttrBold, termbox.ColorBlack, "Severity") 15 | printfTb(13, 1, messageForeGroundColour|termbox.AttrBold, termbox.ColorBlack, "Message") 16 | 17 | for y := 2; y < termHeight; y++ { 18 | select { 19 | // attempt to receive from channel: 20 | case logMessage := <-messageChannel: 21 | printfTb(2, y, messageForeGroundColour, termbox.ColorBlack, "%s", logMessage.Severity) 22 | printfTb(13, y, messageForeGroundColour, termbox.ColorBlack, "%s", logMessage.Message) 23 | default: 24 | printfTb(2, y, messageForeGroundColour, termbox.ColorBlack, "No more logs") 25 | return 26 | } 27 | } 28 | } 29 | 30 | // Draws stats on the screen: 31 | func showStats() { 32 | termbox.Clear(termbox.ColorDefault, termbox.ColorDefault) 33 | 34 | // Positions: 2 22 42 52 63 79 94 35 | printfTb(2, 1, messageForeGroundColour|termbox.AttrBold, termbox.ColorBlack, "KeySpace ColumnFamily Reads/s Writes/s LiveSpace(B) R-Latency(ms) W-Latency(ms)") 36 | printfTb(20, 1, messageForeGroundColour, termbox.ColorBlack, "|") 37 | printfTb(40, 1, messageForeGroundColour, termbox.ColorBlack, "|") 38 | printfTb(50, 1, messageForeGroundColour, termbox.ColorBlack, "|") 39 | printfTb(61, 1, messageForeGroundColour, termbox.ColorBlack, "|") 40 | printfTb(76, 1, messageForeGroundColour, termbox.ColorBlack, "|") 41 | printfTb(92, 1, messageForeGroundColour, termbox.ColorBlack, "|") 42 | 43 | y := 2 44 | 45 | // Get a lock on stats, then make a sorted map of the stats: 46 | statsMutex.Lock() 47 | sortedStats := sortedKeys(stats) 48 | statsMutex.Unlock() 49 | 50 | for _, cfStatsKey := range sortedStats { 51 | if y < termHeight { 52 | // printfTb(2, y, messageForeGroundColour, termbox.ColorBlack, "(%s:%s) r:%d, w:%d", cfStats.KeySpace, cfStats.ColumnFamily, cfStats.ReadCount, cfStats.WriteCount) 53 | printfTb(2, y, messageForeGroundColour, termbox.ColorBlack, "%s", stats[cfStatsKey].KeySpace) 54 | printfTb(20, y, messageForeGroundColour, termbox.ColorBlack, " %s", stats[cfStatsKey].ColumnFamily) 55 | printfTb(40, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].ReadRate) 56 | printfTb(50, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].WriteRate) 57 | printfTb(61, y, messageForeGroundColour, termbox.ColorBlack, " %d", stats[cfStatsKey].LiveDiskSpaceUsed) 58 | printfTb(76, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].ReadLatency) 59 | printfTb(92, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].WriteLatency) 60 | y++ 61 | } 62 | } 63 | } 64 | 65 | // Refreshes the on-screen data: 66 | func refreshScreen() { 67 | for { 68 | 69 | if dataDisplayed == "Metrics" { 70 | showStats() 71 | } 72 | 73 | if dataDisplayed == "Logs" { 74 | showLogs() 75 | } 76 | 77 | // Sleep: 78 | time.Sleep(refreshTime) 79 | } 80 | } 81 | 82 | // Print function for TermBox: 83 | func printTb(x, y int, fg, bg termbox.Attribute, msg string) { 84 | for _, c := range msg { 85 | termbox.SetCell(x, y, c, fg, bg) 86 | x++ 87 | } 88 | } 89 | 90 | // PrintF function for TermBox: 91 | func printfTb(x, y int, fg, bg termbox.Attribute, format string, args ...interface{}) { 92 | s := fmt.Sprintf(format, args...) 93 | printTb(x, y, fg, bg, s) 94 | } 95 | 96 | // Draw the border around the edge of the screen: 97 | func drawBorder(width int, height int) { 98 | // Sides: 99 | for x := 0; x < width; x++ { 100 | termbox.SetCell(x, 0, '-', defaultForeGroundColour, defaultBackGroundColour) 101 | termbox.SetCell(x, height-1, '-', defaultForeGroundColour, defaultBackGroundColour) 102 | } 103 | 104 | // Top and bottom: 105 | for y := 0; y < height; y++ { 106 | termbox.SetCell(0, y, '|', defaultForeGroundColour, defaultBackGroundColour) 107 | termbox.SetCell(width-1, y, '|', defaultForeGroundColour, defaultBackGroundColour) 108 | } 109 | 110 | // Corners: 111 | termbox.SetCell(0, 0, '+', defaultForeGroundColour, defaultBackGroundColour) 112 | termbox.SetCell(width-1, 0, '+', defaultForeGroundColour, defaultBackGroundColour) 113 | termbox.SetCell(0, height-1, '+', defaultForeGroundColour, defaultBackGroundColour) 114 | termbox.SetCell(width-1, height-1, '+', defaultForeGroundColour, defaultBackGroundColour) 115 | 116 | // Title: 117 | printTb(1, 0, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, " C-top ") 118 | printTb(8, 0, termbox.ColorBlue, defaultBackGroundColour, "(top for Cassandra) connected to ") 119 | printTb(41, 0, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, *cassandraHost) 120 | 121 | // Menu: 122 | // Positions: 2 15 28 42 58 76 94 105 113 123 | printTb(1, height-1, termbox.ColorBlue, defaultBackGroundColour, " Organise by (1)Reads/s / (2)Writes/s / (3)Space-used / (4)Read-latency / (5)Write-latency, (M)etrics, (L)ogs, (Q)uit ") 124 | printTb(15, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "1") 125 | printTb(28, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "2") 126 | printTb(42, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "3") 127 | printTb(58, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "4") 128 | printTb(76, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "5") 129 | printTb(94, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "M") 130 | printTb(105, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "L") 131 | printTb(113, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "Q") 132 | 133 | // Highlight the sorting mode: 134 | if dataSortedBy == "Reads" { 135 | printTb(15, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "1") 136 | } 137 | if dataSortedBy == "Writes" { 138 | printTb(28, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "2") 139 | } 140 | if dataSortedBy == "Space" { 141 | printTb(42, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "3") 142 | } 143 | if dataSortedBy == "ReadLatency" { 144 | printTb(58, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "4") 145 | } 146 | if dataSortedBy == "WriteLatency" { 147 | printTb(76, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "5") 148 | } 149 | 150 | // Show what mode we're in: 151 | if dataDisplayed == "Metrics" { 152 | printfTb(termWidth-10, 0, termbox.ColorBlue|termbox.AttrBold, termbox.ColorBlack, " Metrics ") 153 | } 154 | if dataDisplayed == "Logs" { 155 | printfTb(termWidth-7, 0, termbox.ColorBlue|termbox.AttrBold, termbox.ColorBlack, " Logs ") 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /docs/ordered-by-diskspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrusty/ctop/8c6719d7470131489a5bb134a28d9755d5aede3d/docs/ordered-by-diskspace.png -------------------------------------------------------------------------------- /docs/ordered-by-read-latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrusty/ctop/8c6719d7470131489a5bb134a28d9755d5aede3d/docs/ordered-by-read-latency.png -------------------------------------------------------------------------------- /docs/ordered-by-write-rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrusty/ctop/8c6719d7470131489a5bb134a28d9755d5aede3d/docs/ordered-by-write-rate.png -------------------------------------------------------------------------------- /handlers.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/hailocab/ctop/types" 5 | "fmt" 6 | "github.com/nsf/termbox-go" 7 | ) 8 | 9 | // Logging to a channel (from anywhere): 10 | func logToChannel(severity string, message string) { 11 | // Make a new LogMessage struct: 12 | logMessage := types.LogMessage{ 13 | Severity: severity, 14 | Message: message, 15 | } 16 | 17 | // Put it in the messages channel: 18 | select { 19 | case messageChannel <- logMessage: 20 | 21 | default: 22 | 23 | } 24 | } 25 | 26 | // Takes metrics off the channel and adds them up: 27 | func handleMetrics() { 28 | 29 | var cfStats types.CFStats 30 | 31 | for { 32 | // Get a metric from the channel: 33 | cfMetric := <-metricsChannel 34 | logToChannel("debug", fmt.Sprintf("Received a metric! %s", cfMetric.MetricName)) 35 | 36 | // Build the key: 37 | statName := cfMetric.KeySpace + ":" + cfMetric.ColumnFamily 38 | 39 | statsMutex.Lock() 40 | defer statsMutex.Unlock() 41 | 42 | // See if we already have a stats-entry: 43 | if _, ok := stats[statName]; ok { 44 | // Use the existing stats-entry: 45 | logToChannel("debug", fmt.Sprintf("Updating existing stat (%s)", statName)) 46 | cfStats = stats[statName] 47 | } else { 48 | // Add a new entry to the map: 49 | logToChannel("debug", fmt.Sprintf("Adding new stat (%s)", statName)) 50 | cfStats = types.CFStats{ 51 | ReadCount: 0, 52 | ReadCountTS: 0, 53 | ReadLatency: 0.0, 54 | ReadRate: 0.0, 55 | WriteCount: 0, 56 | WriteCountTS: 0, 57 | WriteLatency: 0.0, 58 | WriteRate: 0.0, 59 | KeySpace: cfMetric.KeySpace, 60 | ColumnFamily: cfMetric.ColumnFamily, 61 | } 62 | } 63 | 64 | // Figure out which metric we need to update: 65 | if cfMetric.MetricName == "ReadCount" { 66 | // Total read count: 67 | interval := cfMetric.MetricTimeStamp - cfStats.ReadCountTS 68 | if cfStats.ReadCountTS == 0 { 69 | cfStats.ReadRate = 0.0 70 | } else { 71 | cfStats.ReadRate = float64(cfMetric.MetricIntValue-cfStats.ReadCount) / float64(interval) 72 | } 73 | cfStats.ReadCount = cfMetric.MetricIntValue 74 | cfStats.ReadCountTS = cfMetric.MetricTimeStamp 75 | stats[statName] = cfStats 76 | 77 | } else if cfMetric.MetricName == "WriteCount" { 78 | // Total write count: 79 | interval := cfMetric.MetricTimeStamp - cfStats.WriteCountTS 80 | if cfStats.WriteCountTS == 0 { 81 | cfStats.WriteRate = 0.0 82 | } else { 83 | cfStats.WriteRate = float64(cfMetric.MetricIntValue-cfStats.WriteCount) / float64(interval) 84 | } 85 | cfStats.WriteCount = cfMetric.MetricIntValue 86 | cfStats.WriteCountTS = cfMetric.MetricTimeStamp 87 | stats[statName] = cfStats 88 | 89 | } else if cfMetric.MetricName == "LiveDiskSpaceUsed" { 90 | // Total disk space used(k): 91 | cfStats.LiveDiskSpaceUsed = cfMetric.MetricIntValue 92 | stats[statName] = cfStats 93 | 94 | } else if cfMetric.MetricName == "RecentReadLatencyMicros" { 95 | // ReadLatency (MicroSeconds): 96 | if cfMetric.MetricFloatValue > 0 { 97 | cfStats.ReadLatency = cfMetric.MetricFloatValue / 1000 98 | stats[statName] = cfStats 99 | } 100 | 101 | } else if cfMetric.MetricName == "RecentWriteLatencyMicros" { 102 | // WriteLatency (MicroSeconds): 103 | if cfMetric.MetricFloatValue > 0 { 104 | cfStats.WriteLatency = cfMetric.MetricFloatValue / 1000 105 | stats[statName] = cfStats 106 | } 107 | } 108 | 109 | statsMutex.Unlock() 110 | 111 | } 112 | 113 | } 114 | 115 | // Returns the key-code: 116 | func handleKeypress(ev *termbox.Event) { 117 | logToChannel("debug", fmt.Sprintf("Key pressed: %s", ev.Ch)) 118 | } 119 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/hailocab/ctop/types" 5 | "flag" 6 | "fmt" 7 | "github.com/nsf/termbox-go" 8 | "os" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | var metricsChannel = make(chan types.CFMetric, 100) 14 | var messageChannel = make(chan types.LogMessage, 100) 15 | var stats = make(map[string]types.CFStats) 16 | var statsMutex sync.Mutex 17 | var dataDisplayed = "Metrics" 18 | var dataSortedBy = "Reads" 19 | var termWidth = 80 20 | var termHeight = 25 21 | var refreshTime = 1 * time.Second 22 | var localHostName, _ = os.Hostname() 23 | var printVersion = flag.Bool("version", false, "Print version number and exit") 24 | var cassandraHost = flag.String("host", localHostName, "IP address of the Cassandra host to run against") 25 | var cassandraMX4jPort = flag.String("port", "8081", "TCP port of MX4J on the Cassandra host") 26 | 27 | const ( 28 | defaultForeGroundColour = termbox.ColorWhite 29 | defaultBackGroundColour = termbox.ColorBlack 30 | messageForeGroundColour = termbox.ColorMagenta 31 | releaseVersion = 1.5 32 | ) 33 | 34 | func init() { 35 | // Set the vars from the command-line args: 36 | flag.Parse() 37 | 38 | // Print the version and quit (if we've been asked to): 39 | if *printVersion == true { 40 | fmt.Printf("CTOP version %v\n", releaseVersion) 41 | os.Exit(0) 42 | } 43 | } 44 | 45 | // Do all the things: 46 | func main() { 47 | 48 | // Check our connection to MX4J: 49 | if checkConnection(*cassandraHost, *cassandraMX4jPort) != nil { 50 | fmt.Printf("Can't connect to stats-provider (%s)! Trying localhost before bailing...\n", *cassandraHost) 51 | if checkConnection("localhost", *cassandraMX4jPort) != nil { 52 | fmt.Println("Can't even connect to localhost! Check your destination host and port and try again.") 53 | os.Exit(2) 54 | } else { 55 | fmt.Println("Proceeding with localhost..") 56 | *cassandraHost = "localhost" 57 | } 58 | } 59 | 60 | // Initialise "termbox" (console interface): 61 | err := termbox.Init() 62 | if err != nil { 63 | panic(err) 64 | } 65 | defer termbox.Close() 66 | 67 | // Get the initial window-size: 68 | termWidth, termHeight = termbox.Size() 69 | 70 | // Get the display running in the right mode: 71 | termbox.SetInputMode(termbox.InputEsc | termbox.InputMouse) 72 | 73 | // Render the initial "UI": 74 | termbox.Clear(termbox.ColorDefault, termbox.ColorDefault) 75 | drawBorder(termWidth, termHeight) 76 | termbox.Flush() 77 | 78 | // Run the metrics-collector: 79 | go MetricsCollector() 80 | go handleMetrics() 81 | go refreshScreen() 82 | 83 | loop: 84 | for { 85 | switch ev := termbox.PollEvent(); ev.Type { 86 | // Key pressed: 87 | case termbox.EventKey: 88 | 89 | // Handle keypresses: 90 | if ev.Ch == 113 { 91 | // "q" (quit): 92 | printfTb(2, 1, messageForeGroundColour, termbox.ColorBlack, "Goodbye!: %s", ev.Ch) 93 | break loop 94 | } else if ev.Ch == 0 { // "Space-bar (refresh)" 95 | showStats() 96 | } else if ev.Ch == 109 { // "M" 97 | dataDisplayed = "Metrics" 98 | showStats() 99 | } else if ev.Ch == 108 { // "L" 100 | dataDisplayed = "Logs" 101 | } else if ev.Ch == 49 { // "1" 102 | dataSortedBy = "Reads" 103 | } else if ev.Ch == 50 { // "2" 104 | dataSortedBy = "Writes" 105 | } else if ev.Ch == 51 { // "3" 106 | dataSortedBy = "Space" 107 | } else if ev.Ch == 52 { // "4" 108 | dataSortedBy = "ReadLatency" 109 | } else if ev.Ch == 53 { // "5" 110 | dataSortedBy = "WriteLatency" 111 | } else { 112 | // Anything else: 113 | handleKeypress(&ev) 114 | } 115 | 116 | // Redraw the display: 117 | drawBorder(termWidth, termHeight) 118 | termbox.Flush() 119 | 120 | // Window is re-sized: 121 | case termbox.EventResize: 122 | // Remember the new sizes: 123 | termWidth = ev.Width 124 | termHeight = ev.Height 125 | 126 | // Redraw the screen: 127 | drawBorder(termWidth, termHeight) 128 | termbox.Flush() 129 | 130 | // Error: 131 | case termbox.EventError: 132 | panic(ev.Err) 133 | 134 | default: 135 | } 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /metricscollectormx4j.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/hailocab/ctop/types" 5 | "encoding/xml" 6 | "fmt" 7 | "io/ioutil" 8 | "net/http" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | var ( 15 | // Define a list of metrics to collect (sticking to 5 important ones to save on HTTP calls): 16 | cfMetrics = []string{"ReadCount", "WriteCount", "LiveDiskSpaceUsed", "RecentReadLatencyMicros", "RecentWriteLatencyMicros"} 17 | 18 | // "ReadCount" - The number of reads to a CF 19 | // "WriteCount" - The number of writes to a CF 20 | // "LiveDiskSpaceUsed" - Disk space used 21 | // "MeanRowSize" - Mean row-size 22 | // "MaxRowSize" - Max row-size 23 | // "RecentReadLatencyMicros" - Read latency 24 | // "RecentWriteLatencyMicros" - Write latency 25 | ) 26 | 27 | // Checks the connection to MX4J: 28 | func checkConnection(cassandraAddress string, cassandraMX4jPort string) error { 29 | // Request the root URL: 30 | URL := fmt.Sprintf("http://%s:%s/", cassandraAddress, cassandraMX4jPort) 31 | 32 | _, err := http.Get(URL) 33 | return err 34 | } 35 | 36 | // Return a list of keySpaces and columnFamilies from MX4J: 37 | func getCluster(cassandraIP string, cassandraMX4jPort string) (types.Cluster, error) { 38 | 39 | logToChannel("info", fmt.Sprintf("Getting list of KeySpaces and ColumnFamilies from (%s:%s)", cassandraIP, cassandraMX4jPort)) 40 | 41 | // Create a new MX4JCFList{} to unmarshal the XML into: 42 | columnFamilyList := types.MX4JCFList{} 43 | 44 | // Build the reqest URL: 45 | URL := fmt.Sprintf("http://%s:%s/server?instanceof=org.apache.cassandra.db.ColumnFamilyStore&template=identity", cassandraIP, cassandraMX4jPort) 46 | 47 | // Request the data from MX4J: 48 | httpResponse, err := http.Get(URL) 49 | if err != nil { 50 | logToChannel("error", fmt.Sprintf("Trouble talking to MX4J (%s)\n%s", URL, err)) 51 | } else { 52 | logToChannel("debug", fmt.Sprintf("Got HTTP response code (%d)", httpResponse.StatusCode)) 53 | } 54 | 55 | // Read the response: 56 | xmlResponse, err := ioutil.ReadAll(httpResponse.Body) 57 | if err != nil { 58 | logToChannel("error", fmt.Sprintf("Couldn't get response body!\n%s", err)) 59 | } 60 | 61 | // UnMarshal the XML response: 62 | err = xml.Unmarshal([]byte(xmlResponse), &columnFamilyList) 63 | if err != nil { 64 | logToChannel("error", fmt.Sprintf("Couldn't unmarshal the response!\n%s", err)) 65 | } else { 66 | logToChannel("debug", fmt.Sprintf("Got a ColumnFamily list - great success!")) 67 | //log.Debugf("- %s", columnFamilyList) 68 | } 69 | 70 | // Create a new types.Cluster{}: 71 | cluster := types.Cluster{ 72 | Name: "cruft", 73 | KeySpaces: make(map[string]types.KeySpace), 74 | } 75 | 76 | // Populate the Cluster{} with the results returned from MX4J: 77 | for i := range columnFamilyList.CFList { 78 | // Split up the comma-delimited metadata string: 79 | columnFamilyMetaData := strings.Split(columnFamilyList.CFList[i].ColmnFamily, ",") 80 | 81 | // Now split these values up by "=" to get the metadata we're after: 82 | keySpaceName := strings.Split(columnFamilyMetaData[1], "=") 83 | 84 | // Create a new KeySpace{}: 85 | cluster.KeySpaces[keySpaceName[1]] = types.KeySpace{ 86 | ColumnFamilies: make(map[string]types.ColumnFamily), 87 | } 88 | } 89 | 90 | for i := range columnFamilyList.CFList { 91 | // Split up the comma-delimited metadata string: 92 | columnFamilyMetaData := strings.Split(columnFamilyList.CFList[i].ColmnFamily, ",") 93 | 94 | // Now split these values up by "=" to get the metadata we're after: 95 | columnFamilyType := strings.Split(columnFamilyMetaData[0], "=") 96 | keySpaceName := strings.Split(columnFamilyMetaData[1], "=") 97 | columnFamilyName := strings.Split(columnFamilyMetaData[2], "=") 98 | 99 | // Create a new ColumnFamily{}: 100 | if columnFamilyType[1] == "ColumnFamilies" { 101 | logToChannel("debug", fmt.Sprintf("Found KS:CF - %s:%s (%s)", keySpaceName[1], columnFamilyName[1], columnFamilyType[1])) 102 | cluster.KeySpaces[keySpaceName[1]].ColumnFamilies[columnFamilyName[1]] = types.ColumnFamily{} 103 | } 104 | } 105 | 106 | return cluster, nil 107 | } 108 | 109 | // Retreive metrics from MX4J: 110 | func getCFMetrics(cluster types.Cluster, cassandraIP string, cassandraPort string) (types.Cluster, error) { 111 | 112 | logToChannel("debug", fmt.Sprintf("Getting metrics from (%s:%s)", cassandraIP, cassandraPort)) 113 | 114 | // Iterate through our Cluster{}: 115 | for name, keySpace := range cluster.KeySpaces { 116 | for columnFamily := range keySpace.ColumnFamilies { 117 | 118 | // Get the CFMetrics: 119 | for i := range cfMetrics { 120 | 121 | // Create a new MX4JCassandraCFLongData{} to unmarshal the XML into: 122 | metric := types.MX4JCassandraCFLongData{} 123 | 124 | logToChannel("info", fmt.Sprintf("Getting %s:%s:%s", name, columnFamily, cfMetrics[i])) 125 | 126 | // Build the reqest URL: 127 | URL := fmt.Sprintf("http://%s:%s/getattribute?objectname=org.apache.cassandra.db:type=ColumnFamilies,keyspace=%s,columnfamily=%s&attribute=%s&format=long&template=identity", cassandraIP, cassandraPort, name, columnFamily, cfMetrics[i]) 128 | 129 | // Request the data from MX4J: 130 | httpResponse, err := http.Get(URL) 131 | if err != nil { 132 | logToChannel("error", fmt.Sprintf("Trouble talking to MX4J (%s)\n%s", URL, err)) 133 | } else { 134 | logToChannel("debug", fmt.Sprintf("Got HTTP response code (%d)", httpResponse.StatusCode)) 135 | } 136 | 137 | // Read the response: 138 | xmlResponse, err := ioutil.ReadAll(httpResponse.Body) 139 | if err != nil { 140 | logToChannel("error", fmt.Sprintf("Couldn't get response body!\n%s", err)) 141 | } 142 | 143 | // UnMarshal the XML response: 144 | err = xml.Unmarshal([]byte(xmlResponse), &metric) 145 | if err != nil { 146 | logToChannel("error", fmt.Sprintf("Couldn't unmarshal the response!\n%s", err)) 147 | } else { 148 | logToChannel("debug", fmt.Sprintf("Got a metric - GREAT SUCCESS!")) 149 | 150 | // Make an int64: 151 | metricIntValue, _ := strconv.ParseInt(metric.CFLongData.Value, 0, 64) 152 | metricFloatValue, _ := strconv.ParseFloat(metric.CFLongData.Value, 64) 153 | 154 | // Make a new Metric struct: 155 | cfMetric := types.CFMetric{ 156 | KeySpace: name, 157 | ColumnFamily: columnFamily, 158 | MetricName: metric.CFLongData.Name, 159 | MetricIntValue: metricIntValue, 160 | MetricFloatValue: metricFloatValue, 161 | MetricTimeStamp: time.Now().Unix(), 162 | } 163 | 164 | // Put it in the metrics channel: 165 | select { 166 | case metricsChannel <- cfMetric: 167 | logToChannel("debug", fmt.Sprintf("Sent a metric.")) 168 | default: 169 | logToChannel("info", fmt.Sprintf("Couldn't send metric!")) 170 | } 171 | } 172 | } 173 | } 174 | } 175 | 176 | return cluster, nil 177 | } 178 | 179 | // Collects actual metrics 180 | func MetricsCollector() { 181 | 182 | // Get a list of cluster KeySpaces and ColumnFamilies from MX4J: 183 | cluster, err := getCluster(*cassandraHost, *cassandraMX4jPort) 184 | if err != nil { 185 | logToChannel("error", fmt.Sprintf("Couldn't get cluster schema!\n%s", err)) 186 | } 187 | 188 | for { 189 | // Get metrics for each ColumnFamily from MX4J: 190 | cluster, err = getCFMetrics(cluster, *cassandraHost, *cassandraMX4jPort) 191 | if err != nil { 192 | logToChannel("error", fmt.Sprintf("Couldn't get metrics!\n%s", err)) 193 | } 194 | time.Sleep(5 * time.Second) 195 | } 196 | 197 | } 198 | -------------------------------------------------------------------------------- /sorting.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/hailocab/ctop/types" 5 | "sort" 6 | ) 7 | 8 | // Define a type for a sorted-map of columnfamily-stats: 9 | type sortedMap struct { 10 | m map[string]types.CFStats 11 | s []string 12 | } 13 | 14 | // Return the length of a sorted-map: 15 | func (sm *sortedMap) Len() int { 16 | return len(sm.m) 17 | } 18 | 19 | // Handles the different attributes we might sort by: 20 | func (sm *sortedMap) Less(i, j int) bool { 21 | if dataSortedBy == "Reads" { 22 | return sm.m[sm.s[i]].ReadRate > sm.m[sm.s[j]].ReadRate 23 | } 24 | if dataSortedBy == "Writes" { 25 | return sm.m[sm.s[i]].WriteRate > sm.m[sm.s[j]].WriteRate 26 | } 27 | if dataSortedBy == "Space" { 28 | return sm.m[sm.s[i]].LiveDiskSpaceUsed > sm.m[sm.s[j]].LiveDiskSpaceUsed 29 | } 30 | if dataSortedBy == "ReadLatency" { 31 | return sm.m[sm.s[i]].ReadLatency > sm.m[sm.s[j]].ReadLatency 32 | } 33 | if dataSortedBy == "WriteLatency" { 34 | return sm.m[sm.s[i]].WriteLatency > sm.m[sm.s[j]].WriteLatency 35 | } 36 | // Default to "Reads": 37 | return sm.m[sm.s[i]].ReadRate > sm.m[sm.s[j]].ReadRate 38 | } 39 | 40 | // Replace two values in a list: 41 | func (sm *sortedMap) Swap(i, j int) { 42 | sm.s[i], sm.s[j] = sm.s[j], sm.s[i] 43 | } 44 | 45 | // Return keys in order: 46 | func sortedKeys(m map[string]types.CFStats) []string { 47 | sm := new(sortedMap) 48 | sm.m = m 49 | sm.s = make([]string, len(m)) 50 | i := 0 51 | for key := range m { 52 | sm.s[i] = key 53 | i++ 54 | } 55 | sort.Sort(sm) 56 | return sm.s 57 | } 58 | -------------------------------------------------------------------------------- /types/cassandra.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type ( 4 | ColumnFamily struct { 5 | ReadCount int64 6 | WriteCount int64 7 | LiveDiskSpaceUsed int64 8 | MeanRowSize int64 9 | MaxRowSize int64 10 | RecentSSTablesPerReadHistogram map[int]int 11 | RecentReadLatencyHistogramMicros map[int]int 12 | RecentWriteLatencyHistogramMicros map[int]int 13 | EstimatedColumnCountHistogram map[int]int 14 | EstimatedRowSizeHistogram map[int]int 15 | } 16 | 17 | KeySpace struct { 18 | ColumnFamilies map[string]ColumnFamily 19 | } 20 | 21 | Cluster struct { 22 | Name string 23 | KeySpaces map[string]KeySpace 24 | } 25 | ) 26 | -------------------------------------------------------------------------------- /types/messages.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | // import "time" 4 | 5 | type ( 6 | LogMessage struct { 7 | Severity string 8 | Message string 9 | } 10 | 11 | CFStats struct { 12 | KeySpace string 13 | ColumnFamily string 14 | ReadCount int64 15 | ReadCountTS int64 16 | ReadLatency float64 17 | ReadRate float64 18 | WriteCount int64 19 | WriteCountTS int64 20 | WriteLatency float64 21 | WriteRate float64 22 | LiveDiskSpaceUsed int64 23 | MeanRowSize int64 24 | MaxRowSize int64 25 | } 26 | 27 | CFMetric struct { 28 | KeySpace string 29 | ColumnFamily string 30 | MetricName string 31 | MetricIntValue int64 32 | MetricFloatValue float64 33 | MetricTimeStamp int64 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /types/xml.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type ( 4 | // This is a response from a query for a CFHistogram array: 5 | // ("http://%s:8081/getattribute?objectname=org.apache.cassandra.db:type=ColumnFamilies,keyspace=%s,columnfamily=%s&attribute=%s&format=array&template=viewarray&template=identity", cassandraIP, name, columnFamily, cfHistograms[i]) 6 | MX4JCassandraCFHistogram struct { 7 | //XMLName xml.Name `xml:"MBean"` 8 | CFHistogram []MX4JCassandraCFHistogramElement `xml:"Attribute>Array>Element"` 9 | } 10 | 11 | // This is one of the array elements: 12 | MX4JCassandraCFHistogramElement struct { 13 | Index string `xml:"index,attr"` 14 | Value string `xml:"element,attr"` 15 | } 16 | 17 | // This is a response from a query for an individual bit of data: 18 | // ("http://%s:8081/getattribute?objectname=org.apache.cassandra.db:type=ColumnFamilies,keyspace=%s,columnfamily=%s&attribute=%s&format=long&template=identity", cassandraIP, name, columnFamily, cfMetrics[i]) 19 | MX4JCassandraCFLongData struct { 20 | //XMLName xml.Name `xml:"MBean"` 21 | CFLongData MX4JCassandraCFLongDataAttribute `xml:"Attribute"` 22 | } 23 | 24 | // This is the bit of data itself: 25 | MX4JCassandraCFLongDataAttribute struct { 26 | Name string `xml:"name,attr"` 27 | Value string `xml:"value,attr"` 28 | } 29 | 30 | // This is the response from a query for the list of ColumnFamilies: 31 | // ("http://%s:8081/server?instanceof=org.apache.cassandra.db.ColumnFamilyStore&template=identity", cassandraIP) 32 | MX4JCFList struct { 33 | CFList []MX4JCFListColumnFamily `xml:"MBean"` 34 | } 35 | 36 | MX4JCFListColumnFamily struct { 37 | ColmnFamily string `xml:"objectname,attr"` 38 | } 39 | ) 40 | --------------------------------------------------------------------------------