├── .gitignore ├── README.md ├── config.sample.yaml ├── config └── config.go ├── go.mod ├── go.sum └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | matrix-synchrotron-balancer 2 | config.yaml 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [Support Chat](https://matrix.to/#/#matrix-synchrotron-balancer:sorunome.de) [![donate](https://liberapay.com/assets/widgets/donate.svg)](https://liberapay.com/Sorunome/donate) 2 | 3 | # matrix-synchrotron-balancer 4 | This is a load balancer for synapse synchrotron workers. As specified in the [docs](https://github.com/matrix-org/synapse/blob/master/docs/workers.rst#synapseappsynchrotron) it is best if each synchrotron handles one user. As such, this load balancer parses that. In addition it also does some basic logic to cycle users to other synchrotrons, if load is too high. 5 | 6 | **IMPORTANT** This does not do any authentification at all, so be sure that only `localhost` has access to it! 7 | 8 | ## Installation 9 | ```bash 10 | git clone https://github.com/Sorunome/matrix-synchrotron-balancer 11 | cd matrix-synchrotron-balancer 12 | go build 13 | ``` 14 | ## Running 15 | ```bash 16 | ./matrix-synchrotron-balancer 17 | ``` 18 | ## Configuration 19 | An example file is in `config.sample.yaml`, copy that one to `config.yaml`. Edit it to your needs. Here are all the keys: 20 | 21 | - `homeserver_url`: (string) url of your homeserver 22 | - `listener`: (string) listener where the load balancer listens to (`host:port`) 23 | - `synchrotrons`: (array) the defined synchrotrons 24 | - `address`: (string) address where the synchrotron listens to (`host:port`, WITHOUT `http://`) 25 | - `pid_file`: (string) the full path of the PID file of the synchrotron 26 | - `balancer`: Balancer configs 27 | - `interval`: (int) interval, in seconds, how often the balancer does logic 28 | - `relocate_min_cpu`: (float) only relocate users if the synchrotron has a CPU usage of at least this much 29 | - `relocate_threshold`: (float) if the maximum synchrotron load is this much larger than the minimum one, start relocating 30 | - `relocate_counter_threshold`: (float) to limit sudden bursts to relocate stuff unneededly, relocate only after this many balancer ticks have progressed 31 | - `relocate_cooldown`: (float) how much the relocate counter is decreased per relocated user 32 | -------------------------------------------------------------------------------- /config.sample.yaml: -------------------------------------------------------------------------------- 1 | homeserver_url: http://localhost:8008 # homeserver URL for the whoami request 2 | listener: localhost:8083 # host:port to listen to 3 | synchrotrons: # array of synchrotrons to handle 4 | - address: localhost:8085 # host:port of where the synchrotron listens to 5 | pid_file: /tmp/synapse-synchrotron.pid # pid file of the synchrotron 6 | - address: localhost:8086 7 | pid_file: /tmp/synapse-synchrotron2.pid 8 | balancer: 9 | interval: 2 # interval, in seconds, where it re-checks the load of the synchrotrons 10 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "io/ioutil" 5 | "log" 6 | 7 | "gopkg.in/yaml.v2" 8 | ) 9 | 10 | type SynchrotronConfig struct { 11 | Address string `yaml:"address"` 12 | PIDFile string `yaml:"pid_file"` 13 | } 14 | 15 | type BalancerConfig struct { 16 | RelocateThreshold float64 `yaml:"relocate_threshold"` 17 | RelocateCounterThreshold float64 `yaml:"relocate_counter_threshold"` 18 | RelocateMinCPU float64 `yaml:"relocate_min_cpu"` 19 | RelocateCooldown float64 `yaml:"relocate_cooldown"` 20 | Interval int `yaml:"interval"` 21 | } 22 | 23 | type Config struct { 24 | HomeserverURL string `yaml:"homeserver_url"` 25 | Listener string `yaml:"listener"` 26 | Synchrotrons []*SynchrotronConfig `yaml:"synchrotrons"` 27 | Balancer *BalancerConfig `yaml:"balancer"` 28 | } 29 | 30 | var instance *Config = nil 31 | var Path = "config.yaml" 32 | 33 | func defaultConfig() *Config { 34 | return &Config{ 35 | HomeserverURL: "http://localhost:8008", 36 | Listener: "localhost:8083", 37 | Synchrotrons: nil, 38 | Balancer: &BalancerConfig{ 39 | RelocateThreshold: 3.0, 40 | RelocateCounterThreshold: 4.5, 41 | RelocateCooldown: 0.2, 42 | Interval: 2, 43 | RelocateMinCPU: 10.0, 44 | }, 45 | } 46 | } 47 | 48 | func loadConfig() error { 49 | c := defaultConfig() 50 | 51 | buffer, err := ioutil.ReadFile(Path) 52 | if err != nil { 53 | return err 54 | } 55 | err = yaml.Unmarshal(buffer, &c) 56 | if err != nil { 57 | return err 58 | } 59 | instance = c 60 | return nil 61 | } 62 | 63 | func Get() *Config { 64 | if instance != nil { 65 | return instance 66 | } 67 | err := loadConfig() 68 | if err != nil { 69 | log.Panic("Error loading config: ", err) 70 | } 71 | return instance 72 | } 73 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/Sorunome/matrix-synchrotron-balancer 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/struCoder/pidusage v0.1.3 7 | gopkg.in/yaml.v2 v2.2.8 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/struCoder/pidusage v0.1.3 h1:pZcSa6asBE38TJtW0Nui6GeCjLTpaT/jAnNP7dUTLSQ= 2 | github.com/struCoder/pidusage v0.1.3/go.mod h1:pWBlW3YuSwRl6h7R5KbvA4N8oOqe9LjaKW5CwT1SPjI= 3 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 4 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 5 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 6 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io" 7 | "io/ioutil" 8 | "log" 9 | "net" 10 | "net/http" 11 | "regexp" 12 | "strconv" 13 | "sync" 14 | "time" 15 | 16 | "github.com/struCoder/pidusage" 17 | 18 | "github.com/Sorunome/matrix-synchrotron-balancer/config" 19 | ) 20 | 21 | type Synchrotron struct { 22 | Address string 23 | PIDFile string 24 | Load float64 25 | Users int 26 | RelocateCounter float64 27 | } 28 | 29 | var rMatchToken = regexp.MustCompile(`(?i)(?:Authorization:\s*Bearer\s*(\S*)|[&?]access_token=([^&?\s]+))`) 30 | var tokenMxidCache = make(map[string]string) 31 | var synchrotronCache = make(map[string]int) 32 | var synchrotrons []*Synchrotron 33 | var totalUsers = 0 34 | 35 | func getSynchrotron(mxid string) *Synchrotron { 36 | if cachedIndex, ok := synchrotronCache[mxid]; ok { 37 | if synchrotrons[cachedIndex].RelocateCounter < config.Get().Balancer.RelocateCounterThreshold || synchrotrons[cachedIndex].Users < 2 { 38 | return synchrotrons[cachedIndex] 39 | } 40 | // we need to relocate the user to another synchrotron 41 | synchrotrons[cachedIndex].Users-- 42 | // estimate to how good our relocating is 43 | synchrotrons[cachedIndex].RelocateCounter -= config.Get().Balancer.RelocateCooldown 44 | } 45 | minLoad := 1000.0 46 | chosenIndex := 0 47 | for i, synch := range synchrotrons { 48 | if synch.Load < minLoad { 49 | minLoad = synch.Load 50 | chosenIndex = i 51 | } 52 | } 53 | synchrotronCache[mxid] = chosenIndex 54 | synchrotrons[chosenIndex].Users++ 55 | return synchrotrons[chosenIndex] 56 | } 57 | 58 | type WhoamiResponse struct { 59 | UserID string `json:"user_id"` 60 | } 61 | 62 | func getMXID(token string) string { 63 | if val, ok := tokenMxidCache[token]; ok { 64 | return val 65 | } 66 | log.Println("New first authorization token") 67 | req, err := http.NewRequest("GET", config.Get().HomeserverURL+"/_matrix/client/r0/account/whoami", nil) 68 | if err != nil { 69 | log.Println("Error creating request to fetch user ID from homeserver:", err) 70 | return "" 71 | } 72 | req.Header.Add("Authorization", "Bearer "+token) 73 | resp, err := http.DefaultClient.Do(req) 74 | if err != nil { 75 | log.Println("Error fetching user ID from homeserver:", err) 76 | tokenMxidCache[token] = "" 77 | return "" 78 | } 79 | defer resp.Body.Close() 80 | var whoami WhoamiResponse 81 | err = json.NewDecoder(resp.Body).Decode(&whoami) 82 | if err != nil { 83 | log.Println("JSON decode error:", err) 84 | tokenMxidCache[token] = "" 85 | return "" 86 | } 87 | log.Println("New user ID:", whoami.UserID) 88 | tokenMxidCache[token] = whoami.UserID 89 | return whoami.UserID 90 | } 91 | 92 | func pipe(src net.Conn, dst net.Conn, wg *sync.WaitGroup) { 93 | buff := make([]byte, 65535) 94 | _, _ = io.CopyBuffer(dst, src, buff) 95 | src.Close() 96 | dst.Close() 97 | wg.Done() 98 | } 99 | 100 | func handleConnection(conn net.Conn) { 101 | defer conn.Close() 102 | // read out the first chunk to determine where to route to 103 | buff := make([]byte, 65535) 104 | n, err := conn.Read(buff) 105 | if err != nil { 106 | return 107 | } 108 | 109 | firstChunk := buff[:n] 110 | var mxid string 111 | if match := rMatchToken.FindSubmatch(firstChunk); len(match) > 1 { 112 | token := match[1] 113 | if len(token) == 0 { 114 | token = match[2] 115 | } 116 | mxid = getMXID(string(token)) 117 | } 118 | 119 | rconn, err := net.Dial("tcp", getSynchrotron(mxid).Address) 120 | if err != nil { 121 | log.Println("Failed to connect to remote") 122 | return 123 | } 124 | defer rconn.Close() 125 | 126 | // don't forget to send the first chunk! 127 | _, err = rconn.Write(firstChunk) 128 | if err != nil { 129 | return 130 | } 131 | 132 | var wg sync.WaitGroup 133 | wg.Add(2) 134 | totalUsers++ 135 | 136 | go pipe(conn, rconn, &wg) 137 | go pipe(rconn, conn, &wg) 138 | 139 | wg.Wait() 140 | totalUsers-- 141 | } 142 | 143 | func updateLoads() { 144 | minLoad := 1000.0 145 | maxLoad := 0.0 146 | for i, synch := range synchrotrons { 147 | f, err := ioutil.ReadFile(synch.PIDFile) 148 | if err != nil { 149 | log.Println("Error fetching PID file:", err) 150 | continue 151 | } 152 | pid, err := strconv.Atoi(string(bytes.TrimSpace(f))) 153 | if err != nil { 154 | log.Println("Malformed PID file:", err) 155 | continue 156 | } 157 | sysInfo, err := pidusage.GetStat(pid) 158 | if err != nil { 159 | log.Println("Error fetching synchrotron stats:", err) 160 | } 161 | synch.Load = sysInfo.CPU 162 | if synch.Load > maxLoad { 163 | maxLoad = synch.Load 164 | } 165 | if synch.Load < minLoad { 166 | minLoad = synch.Load 167 | } 168 | log.Println("Synchrotron", i, "Users:", synch.Users, "Load:", synch.Load) 169 | } 170 | relocateLoad := minLoad * config.Get().Balancer.RelocateThreshold 171 | for _, synch := range synchrotrons { 172 | if synch.Load >= relocateLoad && synch.Users > 1 && synch.Load > config.Get().Balancer.RelocateMinCPU { 173 | synch.RelocateCounter++ 174 | } else if synch.RelocateCounter > 0 { 175 | synch.RelocateCounter-- 176 | } 177 | if synch.RelocateCounter < 0 { 178 | synch.RelocateCounter = 0 179 | } 180 | } 181 | } 182 | 183 | func startUpdateLoads() { 184 | for { 185 | time.Sleep(time.Duration(config.Get().Balancer.Interval) * time.Second) 186 | go updateLoads() 187 | } 188 | } 189 | 190 | func main() { 191 | if len(config.Get().Synchrotrons) == 0 { 192 | log.Panic("Please configure at least one synchrotron") 193 | } 194 | 195 | synchrotrons = make([]*Synchrotron, len(config.Get().Synchrotrons)) 196 | for i, synch := range config.Get().Synchrotrons { 197 | synchrotrons[i] = &Synchrotron{ 198 | Address: synch.Address, 199 | PIDFile: synch.PIDFile, 200 | Load: 0, 201 | Users: 0, 202 | RelocateCounter: 0, 203 | } 204 | } 205 | log.Print("Configured synchrotrons: ", len(synchrotrons)) 206 | 207 | go startUpdateLoads() 208 | updateLoads() 209 | 210 | ln, err := net.Listen("tcp", config.Get().Listener) 211 | if err != nil { 212 | log.Panic("Error starting up:", err) 213 | } 214 | for { 215 | conn, err := ln.Accept() 216 | if err != nil { 217 | log.Print("Error accepting connection") 218 | continue 219 | } 220 | go handleConnection(conn) 221 | } 222 | } 223 | --------------------------------------------------------------------------------