├── .gitignore ├── README.md ├── config-steps-brief.sh ├── private └── config │ ├── aws-cloudwatch-agent │ ├── amazon-cloudwatch-agent.json │ └── amazon-cloudwatch-agent.toml │ ├── aws-instances │ └── livepeer-transcoder-ec2-config.json │ ├── geth │ ├── geth-config.toml │ └── systemd │ │ └── geth.service │ └── livepeer │ └── systemd │ └── livepeer-transcoder.service └── utils └── monitor_reward_call.go /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *#*# 4 | *.swp 5 | *.DS_Store 6 | *.project 7 | *.log 8 | 9 | # Test binary, build with `go test -c` 10 | *.test 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Running a LivePeer transcoder 2 | The purpose of this project is to document my own approach to running a LivePeer transcoder in production. The goal is to run robust infrastructure for the LivePeer transcoding network and to share any supporting code or processes to help the community do the same. These are the early steps in building a robust operating framework in which to run a transcoder network. 3 | 4 | This is a long document and it would be easy to get the impression that running LivePeer is complicated and difficult. This is not the case. Running LivePeer itself is very straightforward and accessible to a wide audience. All the complexity comes from the fact that running any service in a highly-available, scalable way gets complicated. 5 | 6 | If you just want to see the config steps with minimal commentary, check out [config-steps-brief.sh](/config-steps-brief.sh). For a full write-up of the approach, details, reasons for key decisions, and areas for future improvement, read on. 7 | 8 | Some of the operational characteristics I'm working toward include: 9 | * Availability (including fast recovery) 10 | * Security 11 | * Flexibility / composability 12 | * Repeatability 13 | * Capacity understanding not the same as performance 14 | * Configuration / Config is code 15 | 16 | **Note:** This work is all very specific to AWS and Ubuntu. I haven't done the work to generalize for Amazon Linux, RHEL, CentOS, etc. 17 | 18 | ## Key Decisions 19 | Some key decisions I made and why. 20 | - **Platform** - AWS, Linux, Ubuntu - addressable via API, flexibility, elastic capacity. 21 | - **Hardware Resources** - I want to be sure this transcoder can perform, so for the initial phase I've overprovisioned the resources of CPU, RAM, disk performance, and bandwidth (details below). This means this specific configuration is expensive - [$300+/month](https://www.ec2instances.info/?filter=c4.2xlarge&cost_duration=monthly) so feel free to choose lower-resource instance types. 22 | The instructions below will spin up an instance with the following characteristics: 23 | 24 | | | | 25 | | --- | --- | 26 | | Instance type | [c4.2xlarge](https://www.ec2instances.info/?filter=c4.2xlarge&cost_duration=monthly) | 27 | | CPU | 8 vCPUs | 28 | | Network | High | 29 | | EBS Optimized | [YES](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSOptimized.html) | 30 | | OS | ami-85f9b8fa [Ubuntu 18.04 LTS HVM AMI](https://cloud-images.ubuntu.com/locator/ec2/) | 31 | | Root disk | EBS-backed, 32GB [gp2 SSD](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html#EBSVolumeTypes_gp2) | 32 | | EBS Vol 1 | 100GB gp2 SSD for LivePeer data | 33 | | EBS Vol 2 | 500 GB gp2 SSD for dedicated local geth node | 34 | - **Addressability** - fixed ip address (AWS Elastic IP Address) for stable addressability, ability to moved ip between instances for better flexibility. 35 | - **Storage performance** - gp2 SSD for decent, consistent performance. 36 | - **Storage flexibility** - EBS Volumes - concentrate data and config with deliberate filesystem location choices, not in default home directories, on dedicated volumes separate from root disk, flexilibity - easily expandable, easily transferred to new instance (speed of recovery), easy to backup (EBS snapshots, easily automated). 37 | - **Process supervision** - systemd (ugh). Not a huge fan of systemd, but given that it's default now, there's a lot of value in not fighting the native system. 38 | - **Timekeeping** is always crucial. In Ubuntu 18.04, the base system uses [systemd-timesyncd](https://www.freedesktop.org/software/systemd/man/timedatectl.html) which looks ok, but may want to consider using [Chrony](https://chrony.tuxfamily.org/) for more fine-grained control of accuracy and syncing. See the [FAQ](https://chrony.tuxfamily.org/faq.html), this [Ubuntu help article on time sync](https://help.ubuntu.com/lts/serverguide/NTP.html), and a [basic overview of configuring chrony](https://blog.ubuntu.com/2018/04/09/ubuntu-bionic-using-chrony-to-configure-ntp). 39 | - **Ethereum network access** - we run a local (light) geth node 40 | * The official docs [recommend running geth](https://livepeer.readthedocs.io/en/latest/node.html) but other info, such as [this forum post](https://forum.livepeer.org/t/how-to-run-livepeer-with-geth/143), say it's not necessary. That's correct, it's not strictly required but I think it's clearly beneficial - as mentioned in this [FAQ](https://livepeer.readthedocs.io/en/latest/transcoding.html#faq), a flaky connection to the Ethereum network can lead to errors, calls to `reward()` failing, failure to transcode incoming jobs, etc. The best way to ensure a solid, fast connection to the Ethereum network is to run a local geth / parity node. 41 | * See this post for args to run a local geth instance https://forum.livepeer.org/t/transcoder-tip-geth-light-client/247/7 42 | * Need a full copy of ETH blockchain? It seems a fast sync is sufficient 43 | * My preference is to run it on a dedicated local node (not the transcoder) 44 | - **Security** 45 | This is not meant to be an exhaustive review of security practices, just a quick overview of some considerations that are top of mind and decisions I made. 46 | * Securing Ethereum keys is one of the most important considerations. One of the first decisions is whether to protect the local Ethereum key with a passphrase, which seems like an obvious first step, but there are trade-offs. Having automated, non-interactive startup of the LivePeer transcoder is an important operational goal in order to acheive any kind of scale and systems automation. There are ways to provide the passphrase automatically at startup time to the livepeer binary, but the passphrase would still have to live in a file somewhere on disk and it would not be truly secure anyone with access to the instance. LivePeer relies on [geth's Ethereum account funcs](https://github.com/livepeer/go-livepeer/blob/master/eth/accountmanager.go) to unlock accounts and geth doesn't seem to be able to request private keys over the network, for example, so they must be stored locally and unlocked via intereactive prompt (as far as I can tell). If you want non-interactive startup paired with a passphrase, you'll have to store the passphrase locally on the machine. My decision is to secure the instance to the best of my ability, optimize for operational efficiency at scale, and not use a passphrase on the local Eth private key. I just supplied a blank password the first time and then it doesn't ask for password on startup in the future. 47 | * The implications are that you have to strongly limit access to the instance and to the data directories - anyone with access and sufficient permissions can access the private key. This also means that backups of the data directory will contain the unprotected private key, so backups should be encrypted and appropriate controls should be in place around decryption keys. 48 | * You could supply the passphrase via command-line, but I don't really want it to be visible in the process table 49 | * Supplying the passphrase via config file would be slightly better than no passphrase. 50 | * Ports - all ports on the transcoder are locked down, closed to the world and to the local network except 4433, as required by [upcoming network updates](https://forum.livepeer.org/t/upcoming-networking-upgrades/298) which I think has to be open to the world. 51 | * Note that ssh on the transcoder node is also closed to the world and, in our setup, only accessible through an ssh bastion host in our AWS network which runs ssh on a non-standard port and only allows access from specific, known IPs. 52 | * Other considerations: 53 | * No root logins are permitted 54 | * Auth is via ssh keys only 55 | * Logins are only via named user accounts (e.g. "sabrina") for auditability, not via anonymous system accounts (e.g. "ubuntu"), although I use "ubuntu" in these examples as a placeholder. 56 | * Keep system security patches up-to-date 57 | * Review all running procs and open ports and shut down (permanently) all unnecessary ones 58 | * Make sure you have 2FA enable for your AWS account 59 | * Backup regularly and automatically. 60 | * Check your backups for validity and restorability. 61 | * Be aware of security implications of backups - is sensitive data in your backups? Encrypt 62 | * Monitor your boxes (metrics, health, etc). 63 | * Regularly audit and rotate authorized ssh keys and accounts 64 | * Use AWS IAM permissions for fine-grained access control 65 | * Limit access to sudo 66 | * Don't encourage access via root AWS ssh keys, only user account keys 67 | 68 | * Future Architecture Directions see [OPs TODO](#ops-todo) 69 | There is much room for improvement. See below for some specific areas of known technical debt and future work. 70 | 71 | ## Instance launch 72 | You can use the [AWS Command Line Interface](https://docs.aws.amazon.com/cli/latest/userguide/installing.html) to launch instances with these characteristics using [this configuration file](https://github.com/alexlines/livepeer-transcoder-ops/blob/master/private/config/aws-instances/livepeer-transcoder-ec2-config.json) as follows: 73 | **Note** This command line won't work for you as-is because the named profile "notation" won't exist on your system. You can [create your own named profile config](https://docs.aws.amazon.com/cli/latest/userguide/cli-multiple-profiles.html) and reference that. This config also references named security groups which you won't have (which just allow ssh from certain sources), a private key of a different name, so adjust accordingly. 74 | ``` 75 | aws --profile notation ec2 run-instances \ 76 | --cli-input-json file://livepeer-transcoder-ec2-config.json 77 | ``` 78 | 79 | 80 | **Allocate elastic ip for stable public address** 81 | ``` 82 | aws --profile notation ec2 allocate-address 83 | aws --profile notation ec2 associate-address --instance-id --public-ip 84 | ``` 85 | 86 | 87 | **Get most recent LivePeer release** 88 | You can build from scratch if you want but why ... I won't go into that, read more about it in the [official README](https://github.com/livepeer/go-livepeer/blob/master/README.md) 89 | Download the latest mainnet-targeted livepeer and livepeer_cli from https://github.com/livepeer/go-livepeer/releases. 90 | ``` 91 | cd 92 | curl -s -L https://github.com/livepeer/go-livepeer/releases/download/0.2.4/livepeer_linux.tar.gz > livepeer_linux.tgz 93 | gzip -d -c livepeer_linux.tar.gz | tar xvf - 94 | ``` 95 | 96 | 97 | System Ops 98 | **Prepare the LivePeer volume** 99 | If the LivePeeer EBS volume was not created at instance instantiation, create and attach now. 100 | 100GB gp2 disk for LivePeer storage / operations 101 | Adjust the availability zone to match the instance's az 102 | ``` 103 | aws --profile notation ec2 create-volume --size 100 --region us-east-1 --availability-zone us-east-1a --volume-type gp2 104 | aws --profile notation ec2 attach-volume --device /dev/sdg --instance-id --volume-id 105 | ``` 106 | Then login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary): 107 | ``` 108 | # ssh to instance and run locally on the box: 109 | sudo mkfs.ext4 /dev/xvdg 110 | sudo mkdir /d1 111 | echo "UUID= /d1 ext4 defaults 0 2" | sudo tee -a /etc/fstab 112 | sudo mount /d1 113 | ``` 114 | 115 | **Prepare the geth volume** 116 | If the geth EBS volume was not created at instance instantiation, create and attach now. 117 | 500GB gp2 disk for geth storage / operations 118 | Adjust the availability zone to match the instance's az 119 | ``` 120 | aws --profile notation ec2 create-volume --size 500 --region us-east-1 --availability-zone us-east-1a --volume-type gp2 121 | aws --profile notation ec2 attach-volume --device /dev/sdh --instance-id --volume-id 122 | ``` 123 | Then login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary): 124 | ``` 125 | # ssh to instance and run locally on the box: 126 | sudo mkfs.ext4 /dev/xvdh 127 | sudo mkdir /d2 128 | echo "UUID= /d2 ext4 defaults 0 2" | sudo tee -a /etc/fstab 129 | sudo mount /dev/xvdh /d2 130 | ``` 131 | 132 | **Set the hostname** 133 | ``` 134 | sudo hostname tc001.mydomain.com 135 | # add FQDN to /etc/hosts 136 | # And replace contents of /etc/hostname (with only hostname, not FQDN) 137 | ``` 138 | 139 | 140 | **Filesystem operations** 141 | For this setup, All LivePeer-specific files (binaries, logs, ethereum accounts, keys, etc) live on a dedicated EBS volume under /d1. The EBS volumes can be backed-up via EBS snapshots and easily attached to a new instance if necessary. 142 | ``` 143 | sudo mkdir -p /d1/livepeer/logs 144 | sudo mv -i ~/livepeer_linux /d1/livepeer/bin 145 | sudo chown -R ubuntu:ubuntu /d1/livepeer 146 | cd /d1 147 | # check out repo 148 | git clone git@github.com:alexlines/livepeer-transcoder-ops.git 149 | ``` 150 | 151 | **Raise open filehandle limits** 152 | As noted in this [LivePeer FAQ](https://livepeer.readthedocs.io/en/latest/transcoding.html#faq), you can encounter the "too many open files" error when running a transcoder. As Eric notes in [this forum post](https://forum.livepeer.org/t/increase-file-limit-as-a-transcoder/170), raising the open file handle limit via pam will address this, but only for cases where you are running the livepeer node manually from an interactive session (e.g., you logged in via ssh): 153 | from https://bugs.launchpad.net/ubuntu/+source/upstart/+bug/938669 154 | > PAM is intended as a user oriented library, and daemons are by definition 155 | not users. In man limits.conf, it is clearly stated: 156 | > 157 | > Also, please note that all limit settings are set per login. They 158 | > are not global, nor are they permanent; existing only for the 159 | > duration of the session. 160 | See also the responses to this question about the same https://askubuntu.com/a/288534 161 | If you're running the LivePeer binary through non-interactive processes (upstart, systemd, etc) as we are here, you need to raise the limit via a different approach (see our systemd config below). We'll go ahead and raise the limits for interactive sessions in case you want to run manually to debug, etc. 162 | ``` 163 | echo "ubuntu soft nofile 50000" | sudo tee -a /etc/security/limits.conf 164 | echo "ubuntu hard nofile 50000" | sudo tee -a /etc/security/limits.conf 165 | ``` 166 | And edit `/etc/pam.d/login` and add or uncomment the line: 167 | ``` 168 | session required /lib/security/pam_limits.so 169 | ``` 170 | You don't have to restart the system, just log out and log back in, start some long-running or background process, note its PID and then look at: 171 | ``` 172 | cat /proc//limits 173 | ``` 174 | to confirm the limit has been raised. 175 | 176 | **Install geth and run in light mode** 177 | ``` 178 | sudo apt-get install -y software-properties-common 179 | sudo add-apt-repository -y ppa:ethereum/ethereum 180 | sudo apt-get update 181 | sudo apt-get install -y ethereum 182 | ``` 183 | In this configuration, geth's data, logs, and any keys (but not binaries, which get installed in default locations via apt-get install) all live on a dedicated EBS volume under /d2/ for easy backups via snapshots and to easily attach to a new instance. 184 | Setup geth data directories on the attached EBS volume. We're running geth under systemd and passing geth's options via a [toml config file](/private/config/geth/geth-config.toml). Copy the config file and systemd unit file into place: 185 | ``` 186 | sudo mkdir /d2/geth-data 187 | sudo chown -R ubuntu:ubuntu /d2/geth-data 188 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/systemd/geth.service /etc/systemd/system/ 189 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/geth-config.toml /d2/geth-data/ 190 | ``` 191 | If you plan to use existing .ethereum files or keys, copy them into place now in `/d2/geth-data/.ethereum` 192 | start geth via systemd and watch the logs: 193 | ``` 194 | sudo systemctl enable geth [or 'reenable' if you're overwriting existing config file] 195 | sudo systemctl start|stop|restart geth 196 | 197 | # check the status and logs 198 | sudo systemctl status geth 199 | sudo journalctl -u geth.service -f 200 | ``` 201 | 202 | Wait a few minutes and make sure geth is grabbing latest blocks. Sometimes you have to wait 15 minutes, kill it, and restart it before it begins syncing them. 203 | 204 | 205 | **Install systemd config for LivePeer** 206 | If you are going to use existing LivePeer account data, go ahead and copy it into place now in `/d1/livepeer/.lpData/` 207 | ``` 208 | sudo cp /d1/livepeer-transcoder-ops/private/config/livepeer/systemd/livepeer-transcoder.service /etc/systemd/system/ 209 | sudo systemctl enable livepeer-transcoder [or reenable if copying updated config] 210 | ``` 211 | 212 | Run LivePeer manually for the initial run to make sure it can: 213 | * Connect to the local geth instance 214 | * Detect your existing Ethereum account / keys if they are in place **OR** 215 | * Create a new Ethereum account if necessary. For my installation, I created this initial account *without* a passphrase for operational reasons, taking into account all the security considerations discussed elsewhere in this document. Your mileage may vary and my recommendation is to keep security as the top priority while adjusting for your own operational environment. 216 | 217 | **Running live on the Ethereum mainnet** 218 | * Transfer some ETH and LPT to your node - a small amount at first to confirm addresses and process. 219 | * Run the livepeer binary by hand as an initial test. I am running LivePeer with the following params (as seen in the [systemd unit config](https://github.com/alexlines/livepeer-transcoder-ops/blob/master/private/config/livepeer/systemd/livepeer-transcoder.service)): 220 | **Note:** This will run as a transcoder on **mainnet**, this is basically running live in production. 221 | ``` 222 | /d1/livepeer/bin/livepeer -datadir /d1/livepeer/.lpData -ipfsPath /d1/livepeer -log_dir /d1/livepeer/logs -ethUrl ws://127.0.0.1:8546 -v 6 -initializeRound -transcoder -publicIP -gasPrice 0 223 | ``` 224 | 225 | Now kill that process and start livepeer using systemd and watch the logs: 226 | ``` 227 | kill $(pgrep livepeer) 228 | sudo systemctl start livepeer-transcoder 229 | # check status and watch the logs 230 | sudo systemctl status livepeer-transcoder 231 | sudo journalctl -u livepeer-transcoder.service -f 232 | ``` 233 | 234 | Now use the livepeer command line utility to enroll as a transcoder and set transcoder config: 235 | ``` 236 | /d1/livepeer/bin/livepeer_cli 237 | ``` 238 | Choose `13. Invoke multi-step "become a transcoder"` 239 | 240 | **Initial monitoring and notification** 241 | Any monitoring is better than no monitoring. If you can't integrate the LivePeer transcoder into your existing monitoring and alerting infrastructure for any reason, you can get basic monitoring and alerting functionality up and running quickly using AWS CloudWatch. I may include more detail on how to do this all quickly from the command line, but for now see the following docs: 242 | * [Install the CloudWatch Agent](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Install-CloudWatch-Agent.html) on your instance 243 | * [Setup alarms](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html) on basic health and activity metrics such as disk space, swap activity, CPU, etc. Ideally you also want to alarm if LivePeer or geth isn't running or if `reward()` hasn't been called (more about that elsewhere in this document). 244 | * I added [my AWS CloudWatch agent config](private/config/aws-cloudwatch-agent) to this repo, but it's very basic. 245 | 246 | 247 | **Operational Notes** 248 | * Running with `initiliazeRound` is a nice thing to do - the round can't start until somebody calls it and `reward()` cannot be called until the round has been started. Running with `initializeRound` can get expensive when gas is high (I've seen ~$40) 249 | * Making sure `reward()` gets called every day is the most important thing right now, after making sure everything is up and running. This generally succeeds, but, in the absence of rock-solid monitoring and alerting on this event, you should manually check it every day. Go set a reminder in your calendar to check it every day at 4pm. While you're there, set another reminder at 9pm. If the call hasn't succeeded for the day, use the command line interface to call `reward()` manually. Some reasons I've seen that can cause it to fail: 250 | * You don't have enough ETH in your transcoder's account. You should monitor this and replenish as necessary. 251 | * If gas prices spike, this can cause slowness and for transactions to fail, especially if you don't have enough funds (see above). 252 | * Unable to communicate with the geth node - I've seen the local geth node appear to run fine and continue to stay sync'd to latest blocks and log that it's submitting transactions (such as calls to reward), but they fail silently and no errors or warnings are produced. LivePeer [issue #455](https://github.com/livepeer/go-livepeer/issues/455) documents a problem similar to this. In such cases, I've restarted first the geth node, waited for it to sync (a couple minutes at most), and then restarted the livepeer node. This is annoying enough to consider restarting geth automatically on a nightly (!) basis. 253 | * What is the best way to backup the account / credentials tied to the node? For this setup, just create a snapshot of the EBS volume. 254 | * What livepeer / ipfs / etc logs needs to be rotated? It looks like the only logs that livepeer currently writes are ipfs logs which, in this configuration, are written to /d1/livepeer/logs/ and are automatically rotated every 10MB, though not automatically compressed. 255 | * Keep an eye on the LivePeer [Releases](https://github.com/livepeer/go-livepeer/releases) page for updates to the software, as well as the [discord](https://discord.gg/cBfD23u) and [forum](https://forum.livepeer.org/) discussions. 256 | 257 | 258 | **LivePeer questions I had but was able to answer** 259 | * **Note** Don't forget the [upcoming networking updates!](https://forum.livepeer.org/t/upcoming-networking-upgrades/298) 260 | * Is it ok to call `reward()` more than once per round? Yes, I think it will just say "reward already called for this round." 261 | * Is it worth setting up a dedicated ipfs node in local network? Doesn't look like it's necessary at this time. 262 | * GPU - Is it worth it to run with GPU? How much does it help? What specifically leverages the GPU - ffmpeg? short answer: Not yet 263 | * Adding GPU Acceleration to transcoding is still an [open issue](https://github.com/livepeer/lpms/issues/33). 264 | * GPU transcoding is not currently supported, according to Doug, "Currently we support deterministic CPU transcoding, but we're working on what you read in the above proposal to enable GPU transcoding in a way that will not disrupt GPU mining operations" 265 | * In [issue #51 Transcoder Design](https://github.com/livepeer/lpms/issues/51#issuecomment-362502511), j0sh goes into a bit more depth on which areas may benefit from GPU 266 | > There are some workloads in the transcoding pipeline that might benefit from GPU (such as colorspace conversion), but encoding generally benefits more from SIMD (AVX) or fixed function hardware (QuickSync). That being said, FFMpeg already supports the Intel MediaSync SDK which I believe is able to run certain operations on the (Intel?) GPU natively. I'm hoping that enabling MediaSync support is as simple as installing the library and setting the ffmpeg configure flag. We'd likely need run-time hardware detection as well. 267 | > GPU's might help more with verification, but it'd depend on the method we choose. 268 | * See also the [Transcoder Design doc](https://github.com/livepeer/lpms/wiki/Transcoder-Design) 269 | * There is a [GPU transcoding verficiation proposal](https://github.com/livepeer/research/issues/12) in [research projects](https://github.com/livepeer/research/projects/1#card-9975184) 270 | * When GPU's can be meaningfully helpful, [P2 GPU instances](https://aws.amazon.com/ec2/instance-types/p2/) are one (very expensive) option, or [Elastic GPUs](https://aws.amazon.com/ec2/elastic-gpus/details/), which can be attached to certain instance types. 271 | * What do you need to do to transfer your transcoder identity to a new box? eg if you need to migrate hardware for some reason? The identity of the transcoder node is just the eth address of the account, so as long as you migrate that to a new machine it should be fine - backup and restore your livepeer .lpData directory (but it's very sensitive and contains your private key, so be very careful how and where you back it up, encrypt it, limit access, make sure it's not a publicly accessible machine, storage account, etc. 272 | * Explain difference between reward, stake, pending stake, etc. 273 | 274 | 275 | **LivePeer open questions** 276 | * How to know if you've been slashed? 277 | * Specifying `-log_dir` on the command line only moved where the ipfs log file got written, `livepeer` still wrote its log to stderr. 278 | * Is it possible to transfer LPT from a transcoder to another account without `unbonding()` the entire stake? Is this done via CLI option #11 "transfer" or can you unbond (a certain number) and then call "withdraw stake" on just that portion? 279 | * What ports should be open to internal network? Open to the world? 280 | * How much ETH should you keep in your transcoder account? 281 | * Capacity planning - how to estimate transcoding rate (how long to transcode each second of output video) based on machine resources? 282 | * How can you run multiple transcoder instances, behind a load balancer, for example, but have them all use the same identity? Because you just register as a single transcoder id, right? Pretty sure it's not yet possible. 283 | * How to monitor demand in different regions? Would be great to know that there is more demand than capacity in Asia/Pacific, for example, and to spin up capacity there. 284 | 285 | 286 | 287 | **Reference** 288 | * Master reference docs and info is aggregated in this thread - [Transcoder Megathread - Start here to learn about playing the role of transcoder on Livepeer](https://forum.livepeer.org/t/transcoder-megathread-start-here-to-learn-about-playing-the-role-of-transcoder-on-livepeer/190) 289 | 290 | ## OPs TODO 291 | - Configuration 292 | - Use actual config management 293 | - Testing 294 | - Automated deployment 295 | - Docker? 296 | - Traffic management 297 | - Load balancing 298 | - Automatic failover 299 | - Regional routing / responding to regional demand 300 | - Auto-scaling 301 | - Monitoring, Alerting, Metrics Collection 302 | - Better health checks of LivePeer instance and processes 303 | - Go and systemd both support watchdog for process health monitoring http://0pointer.de/blog/projects/watchdog.html 304 | - It would be nice if the livepeer internal webserver supported a call to `/health` for example, which could be checked by a nagios/etc plugin, as well as by a load balancer which was monitoring the health of a pool of transcoders. You could sortof fake this today by using a call to `http://:8935/nodeID` and make sure it returns the expected nodeID, but a proper `/health` function could better check a few vital signs. 305 | - Monitor and alert if reward() doesn't get called. A few ways to monitor this: 306 | - I started a very basic checker script, mostly based on code in [livepeer_cli](https://github.com/livepeer/go-livepeer/tree/master/cmd/livepeer_cli) that queries the local livepeer internal webserver for LastRewardRound and, if it doesn't match currentRound, could send an alert. There's not much there, but you can [see the code here](utils/monitor_reward_call.go) 307 | - Query the local livepeer node via http: 308 | ``` 309 | $ curl http://127.0.0.1:8935/transcoderInfo 310 | {"Address":"0x50d69f8253685999b4c74a67ccb3d240e2a56ed6","LastRewardRound":1018,"RewardCut":30000,"FeeShare":300000,"PricePerSegment":150000000000,"PendingRewardCut":30000,"PendingFeeShare":300000,"PendingPricePerSegment":150000000000,"DelegatedStake":6454553077282307328907,"Active":true,"Status":"Registered"} 311 | ``` 312 | and if `LastRewardRound` doesn't match the current round (which you have to get via another call), then `reward()` has not yet been called. This would be straightfoward to monitor automatically and you could alert on this after a certain time of day. 313 | - Basic first-pass alerting could be done through the checker script sending sms via Twilio API or I wonder if you could publish custom events to AWS CloudWatch and set alarms there to have more sophisticated logic and thresholds. 314 | - You also really need to know where in the current round you are to set reasonable alert thresholds - what is the current round length, start block for current round, and blocks to wait before next round, as calculated in the LivePeer [roundservice](https://github.com/livepeer/go-livepeer/blob/4589a1364fa9d29e9d196d259f1f235116d45953/eth/eventservices/roundservice.go#L137) and [wizard_stats](https://github.com/livepeer/go-livepeer/blob/ba011c60094edc2595020a11200fdcebb03da937/cmd/livepeer_cli/wizard_stats.go#L67). Ideally you could leverage the LivePeer codebase to do this, rather than duplicating the work. 315 | - The highest certainty check would be to query the Ethereum blockchain for the tx where your node called `reward()` 316 | - Could do this by querying the local geth node via [JSON-RPC api](https://github.com/ethereum/wiki/wiki/JSON-RPC), e.g. using [eth_getTransactionbyHash](https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_gettransactionbyhash): 317 | ``` 318 | curl -H "Content-Type: application/json" -X POST --data '{"jsonrpc":"2.0","method":"eth_getTransactionByHash","params":["0xcde8ec889fa7ed433d2a55c5f34f1be98f4dad97791a27c258d18eb1bad17d0f"],"id":1}' http://localhost:8545 319 | ``` 320 | but there's not an easy way to list recent transactions for an account or contract ... looks like filters/logs are the way to do this? https://github.com/ethereum/go-ethereum/issues/1897 or here https://github.com/ethereum/go-ethereum/issues/2104 321 | - Use the [etherscan API](https://etherscan.io/apis), the `input` value below indicates a call to `reward()`: 322 | ``` 323 | $ curl 'http://api.etherscan.io/api?module=account&action=txlist&address=&startblock=5858336&endblock=5858337&sort=desc&apikey=' 324 | { 325 | "status":"1", 326 | "message":"OK", 327 | "result":[ 328 | { 329 | "blockNumber":"5944652", 330 | "blockHash":"0x58d1fcc7ec68dd001a3c166572b3a2d308f4b0598a92faf46e509abb70118de3", 331 | "timeStamp":"1531311107", 332 | "hash":"0x5960471df2da7cc405a4bcb5a195c73a711b37ada828a56672b4ef40c891b918", 333 | "nonce":"203", 334 | "transactionIndex":"136", 335 | "from":"0x345551571c5ef20111c6168b9a498dfb836e7c09", 336 | "to":"0x511bc4556d823ae99630ae8de28b9b80df90ea2e", 337 | "value":"0", 338 | "gas":"262287", 339 | "gasPrice":"8000000000", 340 | "input":"0x228cb733", 341 | "contractAddress":"", 342 | "cumulativeGasUsed":"3474053", 343 | "txreceipt_status":"1", 344 | "gasUsed":"213639", 345 | "confirmations":"1143", 346 | "isError":"0" 347 | }] 348 | } 349 | ``` 350 | - Monitor amount of ETH in transcoder's account and alert if below certain threshold. 351 | - Metrics collection - one idea is [publish metrics to AWS CloudWatch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html), possibly with [custom events](https://aws.amazon.com/blogs/security/how-to-use-amazon-cloudwatch-events-to-monitor-application-health/)? Though I haven't researched if feasible in this case. 352 | - Security 353 | - Better management of Ethereum private keys 354 | - Possibly using Hashicorp's Vault for private keys or AWS KMS 355 | - Could store your private key in [AWS Parameter Store](https://aws.amazon.com/systems-manager/features/#Parameter_Store) in AWS [Key Management Service](https://aws.amazon.com/kms/) and write a wrapper script which can retrieve the private key 356 | - [Getting started with AWS Parameter store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-paramstore.html) see also How AWS [Systems Manager Parameter Store Uses AWS KMS](https://docs.aws.amazon.com/kms/latest/developerguide/services-parameter-store.html?shortFooter=true) 357 | - EBS Volumes 358 | - Automate EBS snapshots 359 | - Encrypt EBS Volumes by default? 360 | - Add GPU's, optionally 361 | - Local geth node 362 | - Would it benefit from being a fast-sync node or a full node? 363 | - Should probably move geth to a dedicated instance that multiple local transcoder nodes can connect to 364 | - Should probably run a local geth cluster in each region you plan to run transcoders 365 | - geth unfortunately seems to get stalled / stuck and appears to benefit from periodically stopping and restarting it. 366 | - Log rotation for LivePeer and geth logs 367 | - Helpful to give the instance an DNS and/or ENS name? 368 | - Better documentation of AWS Security groups, IAM users and permissions, ssh gateway host, etc 369 | 370 | 371 | -------------------------------------------------------------------------------- /config-steps-brief.sh: -------------------------------------------------------------------------------- 1 | # brief summary of commands for my approach to running LivePeer transcoder node 2 | 3 | # this is just an overview and this script is NOT meant to be executed, 4 | # it will not work, and not all commands are meant to be run on the same instance. 5 | 6 | # instance launch 7 | aws --profile notation ec2 run-instances \ 8 | --cli-input-json file://livepeer-transcoder-ec2-config.json 9 | 10 | # allocate elastic ip 11 | aws --profile notation ec2 allocate-address 12 | aws --profile notation ec2 associate-address --instance-id --public-ip 13 | 14 | # get most recent LivePeer release 15 | cd ~ 16 | curl -s -L https://github.com/livepeer/go-livepeer/releases/download/0.2.4/livepeer_linux.tar.gz > livepeer_linux.tgz 17 | gzip -d -c livepeer_linux.tar.gz | tar xvf - 18 | 19 | # prepare LivePeer volume 20 | # if the LivePeeer EBS volume was not created at instance instantiation, create and attach now 21 | aws --profile notation ec2 create-volume --size 100 --region us-east-1 --availability-zone us-east-1a --volume-type gp2 22 | aws --profile notation ec2 attach-volume --device /dev/sdg --instance-id --volume-id 23 | 24 | # login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary): 25 | sudo mkfs.ext4 /dev/xvdg 26 | sudo mkdir /d1 27 | echo "UUID= /d1 ext4 defaults 0 2" | sudo tee -a /etc/fstab 28 | sudo mount /d1 29 | 30 | # prepare geth volume 31 | # If the geth EBS volume was not created at instance instantiation, create and attach now. 32 | aws --profile notation ec2 create-volume --size 500 --region us-east-1 --availability-zone us-east-1a --volume-type gp2 33 | aws --profile notation ec2 attach-volume --device /dev/sdh --instance-id --volume-id 34 | 35 | # login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary): 36 | sudo mkfs.ext4 /dev/xvdh 37 | sudo mkdir /d2 38 | echo "UUID= /d2 ext4 defaults 0 2" | sudo tee -a /etc/fstab 39 | sudo mount /dev/xvdh /d2 40 | 41 | # set hostname 42 | sudo hostname tc001.mydomain.com 43 | # add fqdn to /etc/hosts 44 | # and replace contents of /etc/hostname (with only hostname, not FQDN) 45 | 46 | # setup directories 47 | sudo mkdir -p /d1/livepeer/logs 48 | sudo mv -i ~/livepeer_linux /d1/livepeer/bin 49 | sudo chown -R ubuntu:ubuntu /d1/livepeer 50 | cd /d1 51 | 52 | # check out repo 53 | git clone git@github.com:alexlines/livepeer-transcoder-ops.git 54 | 55 | # raise open filehandle limits 56 | echo "ubuntu soft nofile 50000" | sudo tee -a /etc/security/limits.conf 57 | echo "ubuntu hard nofile 50000" | sudo tee -a /etc/security/limits.conf 58 | 59 | # edit /etc/pam.d/login and add or uncomment the line: 60 | session required /lib/security/pam_limits.so 61 | 62 | # install geth 63 | sudo apt-get install -y software-properties-common 64 | sudo add-apt-repository -y ppa:ethereum/ethereum 65 | sudo apt-get update 66 | sudo apt-get install -y ethereum 67 | 68 | # setup geth directories and copy config files into place 69 | sudo mkdir /d2/geth-data 70 | sudo chown -R ubuntu:ubuntu /d2/geth-data 71 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/systemd/geth.service /etc/systemd/system/ 72 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/geth-config.toml /d2/geth-data/ 73 | 74 | # copy any existing .ethereum files or keys into place now in /d2/geth-data/.ethereum 75 | 76 | # enable geth under systemd and start geth 77 | sudo systemctl enable geth 78 | sudo systemctl start 79 | 80 | # check the status and logs 81 | sudo systemctl status geth 82 | sudo journalctl -u geth.service -f 83 | 84 | 85 | # If you are going to use existing LivePeer account data, go ahead and copy it into place now in /d1/livepeer/.lpData/ 86 | 87 | # copy systemd unit file for LivePeer into place 88 | sudo cp /d1/livepeer-transcoder-ops/private/config/livepeer/systemd/livepeer-transcoder.service /etc/systemd/system/ 89 | sudo systemctl enable livepeer-transcoder 90 | 91 | # start LivePeer using systemd 92 | sudo systemctl start livepeer-transcoder 93 | 94 | # check status and watch the logs 95 | sudo systemctl status livepeer-transcoder 96 | sudo journalctl -u livepeer-transcoder.service -f 97 | 98 | # now use the livepeer command line utility to enroll as a transcoder and set transcoder config: 99 | # Choose 13. Invoke multi-step "become a transcoder" 100 | 101 | /d1/livepeer/bin/livepeer_cli 102 | 103 | 104 | -------------------------------------------------------------------------------- /private/config/aws-cloudwatch-agent/amazon-cloudwatch-agent.json: -------------------------------------------------------------------------------- 1 | { 2 | "logs": { 3 | "logs_collected": { 4 | "files": { 5 | "collect_list": [ 6 | { 7 | "file_path": "/d1/livepeer/logs/ipfs.log", 8 | "log_group_name": "ipfs.log" 9 | }, 10 | { 11 | "file_path": "/var/log/syslog", 12 | "log_group_name": "syslog" 13 | } 14 | ] 15 | } 16 | } 17 | }, 18 | "metrics": { 19 | "append_dimensions": { 20 | "AutoScalingGroupName": "${aws:AutoScalingGroupName}", 21 | "ImageId": "${aws:ImageId}", 22 | "InstanceId": "${aws:InstanceId}", 23 | "InstanceType": "${aws:InstanceType}" 24 | }, 25 | "metrics_collected": { 26 | "cpu": { 27 | "measurement": [ 28 | "cpu_usage_idle", 29 | "cpu_usage_iowait", 30 | "cpu_usage_user", 31 | "cpu_usage_system" 32 | ], 33 | "metrics_collection_interval": 60, 34 | "resources": [ 35 | "*" 36 | ], 37 | "totalcpu": false 38 | }, 39 | "disk": { 40 | "measurement": [ 41 | "used_percent", 42 | "inodes_free" 43 | ], 44 | "metrics_collection_interval": 60, 45 | "resources": [ 46 | "*" 47 | ] 48 | }, 49 | "diskio": { 50 | "measurement": [ 51 | "io_time", 52 | "write_bytes", 53 | "read_bytes", 54 | "writes", 55 | "reads" 56 | ], 57 | "metrics_collection_interval": 60, 58 | "resources": [ 59 | "*" 60 | ] 61 | }, 62 | "mem": { 63 | "measurement": [ 64 | "mem_used_percent" 65 | ], 66 | "metrics_collection_interval": 60 67 | }, 68 | "netstat": { 69 | "measurement": [ 70 | "tcp_established", 71 | "tcp_time_wait" 72 | ], 73 | "metrics_collection_interval": 60 74 | }, 75 | "swap": { 76 | "measurement": [ 77 | "swap_used_percent" 78 | ], 79 | "metrics_collection_interval": 60 80 | } 81 | } 82 | } 83 | } -------------------------------------------------------------------------------- /private/config/aws-cloudwatch-agent/amazon-cloudwatch-agent.toml: -------------------------------------------------------------------------------- 1 | [agent] 2 | collection_jitter = "0s" 3 | debug = false 4 | flush_interval = "1s" 5 | flush_jitter = "0s" 6 | hostname = "" 7 | interval = "60s" 8 | logfile = "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log" 9 | metric_batch_size = 1000 10 | metric_buffer_limit = 10000 11 | omit_hostname = false 12 | precision = "" 13 | quiet = false 14 | round_interval = false 15 | 16 | [inputs] 17 | 18 | [[inputs.cpu]] 19 | fieldpass = ["usage_idle", "usage_iowait", "usage_user", "usage_system"] 20 | interval = "60s" 21 | percpu = true 22 | totalcpu = false 23 | 24 | [[inputs.disk]] 25 | fieldpass = ["used_percent", "inodes_free"] 26 | interval = "60s" 27 | 28 | [[inputs.diskio]] 29 | fieldpass = ["io_time", "write_bytes", "read_bytes", "writes", "reads"] 30 | interval = "60s" 31 | report_deltas = true 32 | 33 | [[inputs.mem]] 34 | fieldpass = ["used_percent"] 35 | interval = "60s" 36 | 37 | [[inputs.netstat]] 38 | fieldpass = ["tcp_established", "tcp_time_wait"] 39 | interval = "60s" 40 | 41 | [[inputs.swap]] 42 | fieldpass = ["used_percent"] 43 | interval = "60s" 44 | 45 | [[inputs.tail]] 46 | data_format = "value" 47 | data_type = "string" 48 | file_state_folder = "/opt/aws/amazon-cloudwatch-agent/logs/state" 49 | name_override = "raw_log_line" 50 | 51 | [[inputs.tail.file_config]] 52 | file_path = "/d1/livepeer/logs/ipfs.log" 53 | from_beginning = true 54 | log_group_name = "ipfs.log" 55 | pipe = false 56 | 57 | [[inputs.tail.file_config]] 58 | file_path = "/var/log/syslog" 59 | from_beginning = true 60 | log_group_name = "syslog" 61 | pipe = false 62 | 63 | [outputs] 64 | 65 | [[outputs.cloudwatch]] 66 | force_flush_interval = "60s" 67 | namespace = "CWAgent" 68 | region = "us-east-1" 69 | tagexclude = ["host"] 70 | [outputs.cloudwatch.tagdrop] 71 | log_group_name = ["*"] 72 | 73 | [[outputs.cloudwatchlogs]] 74 | file_name_field_key = "file_name" 75 | file_state_folder = "/opt/aws/amazon-cloudwatch-agent/logs/state" 76 | log_entry_field_key = "value" 77 | log_group_name_tag_key = "log_group_name" 78 | log_stream_name = "i-07f7578cdaedb4c0f" 79 | log_stream_name_tag_key = "log_stream_name" 80 | log_timestamp_field_key = "log_timestamp" 81 | multi_line_start_field_key = "multi_line_start" 82 | offset_field_key = "offset" 83 | region = "us-east-1" 84 | 85 | [processors] 86 | 87 | [[processors.ec2tagger]] 88 | ec2_instance_tag_keys = ["aws:autoscaling:groupName"] 89 | ec2_metadata_tags = ["ImageId", "InstanceId", "InstanceType"] 90 | [processors.ec2tagger.tagdrop] 91 | log_group_name = ["*"] 92 | -------------------------------------------------------------------------------- /private/config/aws-instances/livepeer-transcoder-ec2-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "BlockDeviceMappings": [ 3 | { 4 | "DeviceName": "/dev/sda1", 5 | "Ebs": { 6 | "DeleteOnTermination": true, 7 | "VolumeSize": 32, 8 | "VolumeType": "gp2" 9 | } 10 | }, 11 | { 12 | "DeviceName": "/dev/sdg", 13 | "Ebs": { 14 | "DeleteOnTermination": false, 15 | "VolumeSize": 100, 16 | "VolumeType": "gp2" 17 | } 18 | }, 19 | { 20 | "DeviceName": "/dev/sdh", 21 | "Ebs": { 22 | "DeleteOnTermination": false, 23 | "VolumeSize": 500, 24 | "VolumeType": "gp2" 25 | } 26 | } 27 | ], 28 | "ImageId": "ami-0b425589c7bb7663d", 29 | "InstanceType": "c4.4xlarge", 30 | "KeyName": "form72", 31 | "Monitoring": { 32 | "Enabled": true 33 | }, 34 | "SecurityGroups": [ 35 | "livepeer-inbound" 36 | ], 37 | "DryRun": false, 38 | "EbsOptimized": true, 39 | "InstanceInitiatedShutdownBehavior": "stop" 40 | } 41 | -------------------------------------------------------------------------------- /private/config/geth/geth-config.toml: -------------------------------------------------------------------------------- 1 | # this file was generated by running the following: 2 | # geth --datadir "/d2/geth-data/.ethereum" --cache 512 --maxpeers 25 \ 3 | # --syncmode light --rpc --rpcapi db,eth,net,web3 --ws \ 4 | # --wsorigins "*" dumpconfig > config.toml 5 | # 6 | # and should be equivalent to running: 7 | # geth --datadir "/d2/geth-data/.ethereum" --cache 512 --maxpeers 25 \ 8 | # --syncmode light --rpc --rpcapi db,eth,net,web3 --ws --wsorigins "*" 9 | # 10 | [Eth] 11 | NetworkId = 1 12 | SyncMode = "light" 13 | NoPruning = false 14 | LightPeers = 100 15 | DatabaseCache = 256 16 | TrieCleanCache = 128 17 | TrieDirtyCache = 128 18 | TrieTimeout = 3600000000000 19 | MinerGasFloor = 8000000 20 | MinerGasCeil = 8000000 21 | MinerGasPrice = 1000000000 22 | MinerRecommit = 3000000000 23 | MinerNoverify = false 24 | EnablePreimageRecording = false 25 | EWASMInterpreter = "" 26 | EVMInterpreter = "" 27 | 28 | [Eth.Ethash] 29 | CacheDir = "ethash" 30 | CachesInMem = 2 31 | CachesOnDisk = 3 32 | DatasetDir = "/d2/geth-data/.ethash" 33 | DatasetsInMem = 1 34 | DatasetsOnDisk = 2 35 | PowMode = 0 36 | 37 | [Eth.TxPool] 38 | Locals = [] 39 | NoLocals = false 40 | Journal = "transactions.rlp" 41 | Rejournal = 3600000000000 42 | PriceLimit = 1 43 | PriceBump = 10 44 | AccountSlots = 16 45 | GlobalSlots = 4096 46 | AccountQueue = 64 47 | GlobalQueue = 1024 48 | Lifetime = 10800000000000 49 | 50 | [Eth.GPO] 51 | Blocks = 20 52 | Percentile = 60 53 | 54 | [Shh] 55 | MaxMessageSize = 1048576 56 | MinimumAcceptedPOW = 2e-01 57 | RestrictConnectionBetweenLightClients = true 58 | 59 | [Node] 60 | DataDir = "/d2/geth-data/.ethereum" 61 | IPCPath = "geth.ipc" 62 | HTTPHost = "127.0.0.1" 63 | HTTPPort = 8545 64 | HTTPVirtualHosts = ["localhost"] 65 | HTTPModules = ["db", "eth", "net", "web3"] 66 | WSHost = "127.0.0.1" 67 | WSPort = 8546 68 | WSOrigins = ["*"] 69 | WSModules = ["net", "web3", "eth", "shh"] 70 | 71 | [Node.P2P] 72 | MaxPeers = 25 73 | NoDiscovery = true 74 | DiscoveryV5 = true 75 | BootstrapNodes = ["enode://a979fb575495b8d6db44f750317d0f4622bf4c2aa3365d6af7c284339968eef29b69ad0dce72a4d8db5ebb4968de0e3bec910127f134779fbcb0cb6d3331163c@52.16.188.185:30303", "enode://3f1d12044546b76342d59d4a05532c14b85aa669704bfe1f864fe079415aa2c02d743e03218e57a33fb94523adb54032871a6c51b2cc5514cb7c7e35b3ed0a99@13.93.211.84:30303", "enode://78de8a0916848093c73790ead81d1928bec737d565119932b98c6b100d944b7a95e94f847f689fc723399d2e31129d182f7ef3863f2b4c820abbf3ab2722344d@191.235.84.50:30303", "enode://158f8aab45f6d19c6cbf4a089c2670541a8da11978a2f90dbf6a502a4a3bab80d288afdbeb7ec0ef6d92de563767f3b1ea9e8e334ca711e9f8e2df5a0385e8e6@13.75.154.138:30303", "enode://1118980bf48b0a3640bdba04e0fe78b1add18e1cd99bf22d53daac1fd9972ad650df52176e7c7d89d1114cfef2bc23a2959aa54998a46afcf7d91809f0855082@52.74.57.123:30303", "enode://979b7fa28feeb35a4741660a16076f1943202cb72b6af70d327f053e248bab9ba81760f39d0701ef1d8f89cc1fbd2cacba0710a12cd5314d5e0c9021aa3637f9@5.1.83.226:30303"] 76 | BootstrapNodesV5 = ["enode://06051a5573c81934c9554ef2898eb13b33a34b94cf36b202b69fde139ca17a85051979867720d4bdae4323d4943ddf9aeeb6643633aa656e0be843659795007a@35.177.226.168:30303", "enode://0cc5f5ffb5d9098c8b8c62325f3797f56509bff942704687b6530992ac706e2cb946b90a34f1f19548cd3c7baccbcaea354531e5983c7d1bc0dee16ce4b6440b@40.118.3.223:30304", "enode://1c7a64d76c0334b0418c004af2f67c50e36a3be60b5e4790bdac0439d21603469a85fad36f2473c9a80eb043ae60936df905fa28f1ff614c3e5dc34f15dcd2dc@40.118.3.223:30306", "enode://85c85d7143ae8bb96924f2b54f1b3e70d8c4d367af305325d30a61385a432f247d2c75c45c6b4a60335060d072d7f5b35dd1d4c45f76941f62a4f83b6e75daaf@40.118.3.223:30307"] 77 | StaticNodes = [] 78 | TrustedNodes = [] 79 | ListenAddr = ":30303" 80 | EnableMsgEvents = false 81 | 82 | [Node.HTTPTimeouts] 83 | ReadTimeout = 30000000000 84 | WriteTimeout = 30000000000 85 | IdleTimeout = 120000000000 86 | 87 | [Dashboard] 88 | Host = "localhost" 89 | Port = 8080 90 | Refresh = 5000000000 91 | -------------------------------------------------------------------------------- /private/config/geth/systemd/geth.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Ethereum go client 3 | After=syslog.target network.target 4 | 5 | StartLimitIntervalSec=0 6 | 7 | [Service] 8 | User=ubuntu 9 | Group=ubuntu 10 | Environment=HOME=/d2/geth-data 11 | Type=simple 12 | SyslogIdentifier=geth-client 13 | LimitNOFILE=50000 14 | WorkingDirectory=/d2/geth-data 15 | ExecStart=/usr/bin/geth --config /d2/geth-data/geth-config.toml 16 | KillMode=process 17 | KillSignal=SIGINT 18 | TimeoutStopSec=180 19 | SendSIGKILL=no 20 | Restart=always 21 | RestartSec=20 22 | 23 | [Install] 24 | WantedBy=multi-user.target 25 | -------------------------------------------------------------------------------- /private/config/livepeer/systemd/livepeer-transcoder.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=LivePeer Transcoder Service 3 | After=geth.service 4 | 5 | # disable start rate limiting, which can result in systemd abandoning 6 | # a service if it fails to start too many times within the interval. 7 | # There are good reasons to limit restarts, but we want to disable 8 | # that for now and possible revisit in the future 9 | StartLimitIntervalSec=0 10 | 11 | 12 | [Service] 13 | User=ubuntu 14 | Group=ubuntu 15 | SyslogIdentifier=livepeer-transcoder 16 | 17 | # raise the open filehandle limit 18 | LimitNOFILE=50000 19 | 20 | # location of livepeer binary 21 | WorkingDirectory=/d1/livepeer/bin 22 | 23 | # LivePeer runtime args: 24 | # -datadir, -ipfsPath, -log_dir: use attached vol for all LivePeer data 25 | # -ethUrl : connect to our own geth instance 26 | # -initializeRound : try to initialize the round if necessary 27 | # -gasPrice 0 : rely on the gas oracle to automatically set the price 28 | # -transcoder : run as a transcoder 29 | # -monitor : send metrics to monitoring endpoint http://metrics-mainnet.livepeer.org/api/events 30 | # -serviceAddr : public IP:port of transcoder that broadcasters connect to, must match on-chain Service URI 31 | # -v 6 : run with verbose logging for now 32 | 33 | ExecStart=/d1/livepeer/bin/livepeer -datadir /d1/livepeer/.lpData -ipfsPath /d1/livepeer -log_dir /d1/livepeer/logs -ethUrl ws://127.0.0.1:8546 -initializeRound -transcoder -monitor -serviceAddr tx.form72.com:8443 -gasPrice 0 -v 6 34 | 35 | # always attempt to restart the service 36 | Restart=always 37 | 38 | # seeking balance between default of attempting restart every 100ms 39 | # which is probably overkill here, and possibly not being available 40 | # to serve requests if wait too long between attempts 41 | RestartSec=3 42 | 43 | [Install] 44 | WantedBy=multi-user.target 45 | -------------------------------------------------------------------------------- /utils/monitor_reward_call.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "math/big" 8 | "net/http" 9 | "strconv" 10 | 11 | "github.com/golang/glog" 12 | "github.com/ethereum/go-ethereum/common" 13 | 14 | ) 15 | 16 | // these types and funcs are copied or based on files from go-livepeer, mostly 17 | // from files in github.com/livepeer/go-livepeer/cmd/livepeer_cli 18 | // This is a quick proof of concept, they should more properly be imported 19 | // and used as a base to build on. 20 | 21 | // The initial logic is that if the currentRound and LastRewardRound are not 22 | // the same, then reward() has not yet been called for this round. 23 | // A better way to check this: 24 | // - has current round been initialized 25 | // - what is current round length 26 | // - what is start block for current round 27 | // - how many blocks to wait until next round 28 | // - alert based on configurable threshold if reward() has not been 29 | // called and there are less than n blocks until next round 30 | 31 | // Think also about what action a responding operator can take in this case, 32 | // is it worth paging someone at 3am? What can the script attempt to 33 | // remedy the situation? 34 | 35 | // TODO's also include: 36 | // - accept command lines args (e.g. where does livepeer server live?) 37 | // - should alert if reward() has not been called, subject to thresholds, etc 38 | 39 | 40 | type Transcoder struct { 41 | Address common.Address 42 | // tech debt: 43 | // changed type of LastRewardRound 44 | // from big.Int to int to simplify 45 | // comparison to currentRound 46 | // but should be changed back to 47 | // big.Int at some point 48 | //LastRewardRound *big.Int 49 | LastRewardRound *int 50 | RewardCut *big.Int 51 | FeeShare *big.Int 52 | PricePerSegment *big.Int 53 | PendingRewardCut *big.Int 54 | PendingFeeShare *big.Int 55 | PendingPricePerSegment *big.Int 56 | DelegatedStake *big.Int 57 | Active bool 58 | Status string 59 | } 60 | 61 | type wizard struct { 62 | endpoint string // Local livepeer node 63 | httpPort string 64 | host string 65 | } 66 | 67 | 68 | func main() { 69 | // tech debt: hard-coding these values: 70 | lp_host := "localhost" 71 | lp_port := "8935" 72 | 73 | w := &wizard{ 74 | endpoint: fmt.Sprintf("http://%v:%v/status", lp_host, lp_port), 75 | httpPort: lp_port, 76 | host: lp_host, 77 | } 78 | w.run() 79 | } 80 | 81 | func (w *wizard) run() { 82 | // Make sure there is a local node running 83 | _, err := http.Get(w.endpoint) 84 | if err != nil { 85 | glog.Errorf("Cannot find local node. Is your node running on http:%v?", w.httpPort) 86 | return 87 | } 88 | 89 | nodeid := w.getNodeID() 90 | currentRound := w.currentRound() 91 | t, err := w.getTranscoderInfo() 92 | if err != nil { 93 | glog.Errorf("Error getting transcoder info: %v", err) 94 | return 95 | } 96 | 97 | // if the currentRound and LastRewardRound are not the same, then 98 | // reward() has not yet been called for this round. 99 | if strconv.Atoi(currentRound) != strconv.Atoi(t.LastRewardRound) { 100 | fmt.Printf("reward has not been called for current round %v\n", currentRound) 101 | // possibly alert here, based on configurable thresholds 102 | } 103 | // another option is to return true or false 104 | // fmt.Printf("%v\n", (a != b)) 105 | 106 | // don't be so chatty in the future, but debugging for now: 107 | fmt.Printf("current round : %v\n", currentRound) 108 | fmt.Printf("nodeid : %v\n", nodeid) 109 | fmt.Printf("Status : %v\n", t.Status) 110 | fmt.Printf("Active : %v\n", t.Active) 111 | fmt.Printf("Last Reward Round: %v\n", t.LastRewardRound.String()) 112 | 113 | } 114 | 115 | func (w *wizard) getNodeID() string { 116 | return httpGet(fmt.Sprintf("http://%v:%v/nodeID", w.host, w.httpPort)) 117 | } 118 | 119 | func httpGet(url string) string { 120 | resp, err := http.Get(url) 121 | if err != nil { 122 | glog.Errorf("Error sending HTTP GET: %v") 123 | return "" 124 | } 125 | 126 | defer resp.Body.Close() 127 | result, err := ioutil.ReadAll(resp.Body) 128 | if err != nil || string(result) == "" { 129 | return "" 130 | } 131 | return string(result) 132 | 133 | } 134 | 135 | func (w *wizard) currentRound() string { 136 | return httpGet(fmt.Sprintf("http://%v:%v/currentRound", w.host, w.httpPort)) 137 | } 138 | 139 | func (w *wizard) getTranscoderInfo() (Transcoder, error) { 140 | resp, err := http.Get(fmt.Sprintf("http://%v:%v/transcoderInfo", w.host, w.httpPort)) 141 | if err != nil { 142 | return Transcoder{}, err 143 | } 144 | 145 | defer resp.Body.Close() 146 | 147 | result, err := ioutil.ReadAll(resp.Body) 148 | if err != nil { 149 | return Transcoder{}, err 150 | } 151 | 152 | var tInfo Transcoder 153 | err = json.Unmarshal(result, &tInfo) 154 | if err != nil { 155 | return Transcoder{}, err 156 | } 157 | 158 | return tInfo, nil 159 | } 160 | --------------------------------------------------------------------------------