├── .gitignore
├── README.md
├── config-steps-brief.sh
├── private
    └── config
    │   ├── aws-cloudwatch-agent
    │       ├── amazon-cloudwatch-agent.json
    │       └── amazon-cloudwatch-agent.toml
    │   ├── aws-instances
    │       └── livepeer-transcoder-ec2-config.json
    │   ├── geth
    │       ├── geth-config.toml
    │       └── systemd
    │       │   └── geth.service
    │   └── livepeer
    │       └── systemd
    │           └── livepeer-transcoder.service
└── utils
    └── monitor_reward_call.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.pyc
 3 | *#*#
 4 | *.swp
 5 | *.DS_Store
 6 | *.project
 7 | *.log
 8 | 
 9 | # Test binary, build with `go test -c`
10 | *.test
11 | 
12 | # Output of the go coverage tool, specifically when used with LiteIDE
13 | *.out
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Running a LivePeer transcoder   
  2 | The purpose of this project is to document my own approach to running a LivePeer transcoder in production. The goal is to run robust infrastructure for the LivePeer transcoding network and to share any supporting code or processes to help the community do the same. These are the early steps in building a robust operating framework in which to run a transcoder network.  
  3 | 
  4 | This is a long document and it would be easy to get the impression that running LivePeer is complicated and difficult. This is not the case. Running LivePeer itself is very straightforward and accessible to a wide audience. All the complexity comes from the fact that running any service in a highly-available, scalable way gets complicated.  
  5 | 
  6 | If you just want to see the config steps with minimal commentary, check out [config-steps-brief.sh](/config-steps-brief.sh). For a full write-up of the approach, details, reasons for key decisions, and areas for future improvement, read on.  
  7 | 
  8 | Some of the operational characteristics I'm working toward include:  
  9 |   * Availability (including fast recovery)  
 10 |   * Security  
 11 |   * Flexibility  / composability  
 12 |   * Repeatability  
 13 |   * Capacity understanding not the same as performance  
 14 |   * Configuration  / Config is code   
 15 |   
 16 | **Note:** This work is all very specific to AWS and Ubuntu. I haven't done the work to generalize for Amazon Linux, RHEL, CentOS, etc.   
 17 | 
 18 | ## Key Decisions  
 19 | Some key decisions I made and why.  
 20 | - **Platform** - AWS, Linux, Ubuntu - addressable via API, flexibility, elastic capacity.  
 21 | - **Hardware Resources** - I want to be sure this transcoder can perform, so for the initial phase I've overprovisioned the resources of CPU, RAM, disk performance, and bandwidth (details below). This means this specific configuration is expensive - [$300+/month](https://www.ec2instances.info/?filter=c4.2xlarge&cost_duration=monthly) so feel free to choose lower-resource instance types.  
 22 | The instructions below will spin up an instance with the following characteristics:  
 23 | 
 24 | | | |  
 25 | | --- | --- |  
 26 | | Instance type | [c4.2xlarge](https://www.ec2instances.info/?filter=c4.2xlarge&cost_duration=monthly)  |  
 27 | | CPU | 8 vCPUs | 
 28 | | Network | High |
 29 | | EBS Optimized | [YES](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSOptimized.html) |
 30 | | OS | ami-85f9b8fa [Ubuntu 18.04 LTS HVM AMI](https://cloud-images.ubuntu.com/locator/ec2/) |
 31 | | Root disk | EBS-backed, 32GB [gp2 SSD](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html#EBSVolumeTypes_gp2) |
 32 | | EBS Vol 1 | 100GB gp2 SSD for LivePeer data |
 33 | | EBS Vol 2 | 500 GB gp2 SSD for dedicated local geth node |  
 34 | - **Addressability** - fixed ip address (AWS Elastic IP Address) for stable addressability, ability to moved ip between instances for better flexibility.  
 35 | - **Storage performance** - gp2 SSD for decent, consistent performance.  
 36 | - **Storage flexibility** - EBS Volumes - concentrate data and config with deliberate filesystem location choices, not in default home directories, on dedicated volumes separate from root disk, flexilibity - easily expandable, easily transferred to new instance (speed of recovery), easy to backup (EBS snapshots, easily automated).  
 37 | - **Process supervision** - systemd (ugh). Not a huge fan of systemd, but given that it's default now, there's a lot of value in not fighting the native system.  
 38 | - **Timekeeping** is always crucial. In Ubuntu 18.04, the base system uses [systemd-timesyncd](https://www.freedesktop.org/software/systemd/man/timedatectl.html) which looks ok, but may want to consider using [Chrony](https://chrony.tuxfamily.org/) for more fine-grained control of accuracy and syncing. See the [FAQ](https://chrony.tuxfamily.org/faq.html), this [Ubuntu help article on time sync](https://help.ubuntu.com/lts/serverguide/NTP.html), and a [basic overview of configuring chrony](https://blog.ubuntu.com/2018/04/09/ubuntu-bionic-using-chrony-to-configure-ntp).  
 39 | - **Ethereum network access** - we run a local (light) geth node  
 40 |   * The official docs [recommend running geth](https://livepeer.readthedocs.io/en/latest/node.html) but other info, such as [this forum post](https://forum.livepeer.org/t/how-to-run-livepeer-with-geth/143), say it's not necessary. That's correct, it's not strictly required but I think it's clearly beneficial - as mentioned in this [FAQ](https://livepeer.readthedocs.io/en/latest/transcoding.html#faq), a flaky connection to the Ethereum network can lead to errors, calls to `reward()` failing, failure to transcode incoming jobs, etc. The best way to ensure a solid, fast connection to the Ethereum network is to run a local geth / parity node.   
 41 |   * See this post for args to run a local geth instance https://forum.livepeer.org/t/transcoder-tip-geth-light-client/247/7  
 42 |   * Need a full copy of ETH blockchain? It seems a fast sync is sufficient  
 43 |   * My preference is to run it on a dedicated local node (not the transcoder)  
 44 | - **Security**  
 45 | This is not meant to be an exhaustive review of security practices, just a quick overview of some considerations that are top of mind and decisions I made.  
 46 |   * Securing Ethereum keys is one of the most important considerations. One of the first decisions is whether to protect the local Ethereum key with a passphrase, which seems like an obvious first step, but there are trade-offs. Having automated, non-interactive startup of the LivePeer transcoder is an important operational goal in order to acheive any kind of scale and systems automation. There are ways to provide the passphrase automatically at startup time to the livepeer binary, but the passphrase would still have to live in a file somewhere on disk and it would not be truly secure anyone with access to the instance. LivePeer relies on [geth's Ethereum account funcs](https://github.com/livepeer/go-livepeer/blob/master/eth/accountmanager.go) to unlock accounts and geth doesn't seem to be able to request private keys over the network, for example, so they must be stored locally and unlocked via intereactive prompt (as far as I can tell). If you want non-interactive startup paired with a passphrase, you'll have to store the passphrase locally on the machine. My decision is to secure the instance to the best of my ability, optimize for operational efficiency at scale, and not use a passphrase on the local Eth private key. I just supplied a blank password the first time and then it doesn't ask for password on startup in the future.  
 47 |   * The implications are that you have to strongly limit access to the instance and to the data directories - anyone with access and sufficient permissions can access the private key. This also means that backups of the data directory will contain the unprotected private key, so backups should be encrypted and appropriate controls should be in place around decryption keys.  
 48 |   * You could supply the passphrase via command-line, but I don't really want it to be visible in the process table  
 49 |   * Supplying the passphrase via config file would be slightly better than no passphrase.  
 50 | * Ports - all ports on the transcoder are locked down, closed to the world and to the local network except 4433, as required by [upcoming network updates](https://forum.livepeer.org/t/upcoming-networking-upgrades/298) which I think has to be open to the world.  
 51 |   * Note that ssh on the transcoder node is also closed to the world and, in our setup, only accessible through an ssh bastion host in our AWS network which runs ssh on a non-standard port and only allows access from specific, known IPs.  
 52 | * Other considerations:  
 53 |   * No root logins are permitted
 54 |   * Auth is via ssh keys only  
 55 |   * Logins are only via named user accounts (e.g. "sabrina") for auditability, not via anonymous system accounts (e.g. "ubuntu"), although I use "ubuntu" in these examples as a placeholder.  
 56 |   * Keep system security patches up-to-date  
 57 |   * Review all running procs and open ports and shut down (permanently) all unnecessary ones
 58 |   * Make sure you have 2FA enable for your AWS account  
 59 |   * Backup regularly and automatically.  
 60 |   * Check your backups for validity and restorability.  
 61 |   * Be aware of security implications of backups - is sensitive data in your backups? Encrypt  
 62 |   * Monitor your boxes (metrics, health, etc).  
 63 |   * Regularly audit and rotate authorized ssh keys and accounts
 64 |   * Use AWS IAM permissions for fine-grained access control
 65 |   * Limit access to sudo
 66 |   * Don't encourage access via root AWS ssh keys, only user account keys  
 67 | 
 68 | * Future Architecture Directions see [OPs TODO](#ops-todo)  
 69 | There is much room for improvement. See below for some specific areas of known technical debt and future work.   
 70 | 
 71 | ## Instance launch  
 72 | You can use the [AWS Command Line Interface](https://docs.aws.amazon.com/cli/latest/userguide/installing.html) to launch instances with these characteristics using [this configuration file](https://github.com/alexlines/livepeer-transcoder-ops/blob/master/private/config/aws-instances/livepeer-transcoder-ec2-config.json) as follows:  
 73 | **Note** This command line won't work for you as-is because the named profile "notation" won't exist on your system. You can [create your own named profile config](https://docs.aws.amazon.com/cli/latest/userguide/cli-multiple-profiles.html) and reference that. This config also references named security groups which you won't have (which just allow ssh from certain sources), a private key of a different name, so adjust accordingly.   
 74 | ```
 75 | aws --profile notation ec2 run-instances \
 76 |     --cli-input-json file://livepeer-transcoder-ec2-config.json  
 77 | ```  
 78 | 
 79 | 
 80 | **Allocate elastic ip for stable public address**  
 81 | ```
 82 | aws --profile notation ec2 allocate-address  
 83 | aws --profile notation ec2 associate-address --instance-id <instance id> --public-ip <ip address>  
 84 | ```   
 85 | 
 86 | 
 87 | **Get most recent LivePeer release**  
 88 | You can build from scratch if you want but why ... I won't go into that, read more about it in the [official README](https://github.com/livepeer/go-livepeer/blob/master/README.md)    
 89 | Download the latest mainnet-targeted livepeer and livepeer_cli from https://github.com/livepeer/go-livepeer/releases.  
 90 | ```
 91 | cd
 92 | curl -s -L https://github.com/livepeer/go-livepeer/releases/download/0.2.4/livepeer_linux.tar.gz > livepeer_linux.tgz
 93 | gzip -d -c livepeer_linux.tar.gz | tar xvf -
 94 | ```
 95 | 
 96 | 
 97 | System Ops  
 98 | **Prepare the LivePeer volume**  
 99 | If the LivePeeer EBS volume was not created at instance instantiation, create and attach now.  
100 | 100GB gp2 disk for LivePeer storage / operations  
101 | Adjust the availability zone to match the instance's az   
102 | ```
103 | aws --profile notation ec2 create-volume --size 100 --region us-east-1 --availability-zone us-east-1a --volume-type gp2  
104 | aws --profile notation ec2 attach-volume --device /dev/sdg --instance-id <instance-id> --volume-id <volume-id>  
105 | ```
106 | Then login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary):  
107 | ```
108 | # ssh to instance and run locally on the box:  
109 | sudo mkfs.ext4 /dev/xvdg  
110 | sudo mkdir /d1  
111 | echo "UUID=<volume UUID> /d1 ext4 defaults 0 2" | sudo tee -a /etc/fstab  
112 | sudo mount /d1    
113 | ```  
114 | 
115 | **Prepare the geth volume**  
116 | If the geth EBS volume was not created at instance instantiation, create and attach now.  
117 | 500GB gp2 disk for geth storage / operations  
118 | Adjust the availability zone to match the instance's az  
119 | ```
120 | aws --profile notation ec2 create-volume --size 500 --region us-east-1 --availability-zone us-east-1a --volume-type gp2  
121 | aws --profile notation ec2 attach-volume --device /dev/sdh --instance-id <instance-id> --volume-id <volume-id>  
122 | ```
123 | Then login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary):
124 | ```
125 | # ssh to instance and run locally on the box:  
126 | sudo mkfs.ext4 /dev/xvdh  
127 | sudo mkdir /d2     
128 | echo "UUID=<volume UUID> /d2 ext4 defaults 0 2" | sudo tee -a /etc/fstab  
129 | sudo mount /dev/xvdh /d2   
130 | ```  
131 | 
132 | **Set the hostname**  
133 | ```
134 | sudo hostname tc001.mydomain.com
135 | # add FQDN to /etc/hosts
136 | # And replace contents of /etc/hostname (with only hostname, not FQDN)
137 | ```
138 | 
139 | 
140 | **Filesystem operations**   
141 | For this setup, All LivePeer-specific files (binaries, logs, ethereum accounts, keys, etc) live on a dedicated EBS volume under /d1. The EBS volumes can be backed-up via EBS snapshots and easily attached to a new instance if necessary.  
142 | ```
143 | sudo mkdir -p /d1/livepeer/logs  
144 | sudo mv -i ~/livepeer_linux /d1/livepeer/bin  
145 | sudo chown -R ubuntu:ubuntu /d1/livepeer  
146 | cd /d1
147 | # check out repo
148 | git clone git@github.com:alexlines/livepeer-transcoder-ops.git
149 | ```  
150 | 
151 | **Raise open filehandle limits**   
152 | As noted in this [LivePeer FAQ](https://livepeer.readthedocs.io/en/latest/transcoding.html#faq), you can encounter the "too many open files" error when running a transcoder. As Eric notes in [this forum post](https://forum.livepeer.org/t/increase-file-limit-as-a-transcoder/170), raising the open file handle limit via pam will address this, but only for cases where you are running the livepeer node manually from an interactive session (e.g., you logged in via ssh):
153 | from https://bugs.launchpad.net/ubuntu/+source/upstart/+bug/938669  
154 | > PAM is intended as a user oriented library, and daemons are by definition
155 | not users. In man limits.conf, it is clearly stated:
156 | > 
157 | >      Also, please note that all limit settings are set per login. They
158 | >      are not global, nor are they permanent; existing only for the
159 | >      duration of the session.  
160 | See also the responses to this question about the same https://askubuntu.com/a/288534  
161 | If you're running the LivePeer binary through non-interactive processes (upstart, systemd, etc) as we are here, you need to raise the limit via a different approach (see our systemd config below). We'll go ahead and raise the limits for interactive sessions in case you want to run manually to debug, etc.  
162 | ```  
163 | echo "ubuntu soft nofile 50000" | sudo tee -a /etc/security/limits.conf
164 | echo "ubuntu hard nofile 50000" | sudo tee -a /etc/security/limits.conf
165 | ```  
166 | And edit `/etc/pam.d/login` and add or uncomment the line:
167 | ```
168 | session required /lib/security/pam_limits.so
169 | ```
170 | You don't have to restart the system, just log out and log back in, start some long-running or background process, note its PID and then look at:
171 | ```
172 | cat /proc/<PID>/limits 
173 | ```
174 | to confirm the limit has been raised.  
175 | 
176 | **Install geth and run in light mode**    
177 | ```
178 | sudo apt-get install -y software-properties-common
179 | sudo add-apt-repository -y ppa:ethereum/ethereum
180 | sudo apt-get update
181 | sudo apt-get install -y ethereum  
182 | ```
183 | In this configuration, geth's data, logs, and any keys (but not binaries, which get installed in default locations via apt-get install) all live on a dedicated EBS volume under /d2/ for easy backups via snapshots and to easily attach to a new instance.  
184 | Setup geth data directories on the attached EBS volume. We're running geth under systemd and passing geth's options via a [toml config file](/private/config/geth/geth-config.toml). Copy the config file and systemd unit file into place:  
185 | ```
186 | sudo mkdir /d2/geth-data
187 | sudo chown -R ubuntu:ubuntu /d2/geth-data
188 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/systemd/geth.service /etc/systemd/system/
189 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/geth-config.toml /d2/geth-data/
190 | ```
191 | If you plan to use existing .ethereum files or keys, copy them into place now in `/d2/geth-data/.ethereum`  
192 | start geth via systemd and watch the logs:  
193 | ```
194 | sudo systemctl enable geth    [or 'reenable' if you're overwriting existing config file]
195 | sudo systemctl start|stop|restart geth
196 | 
197 | # check the status and logs
198 | sudo systemctl status geth
199 | sudo journalctl -u geth.service -f
200 | ```  
201 | 
202 | Wait a few minutes and make sure geth is grabbing latest blocks. Sometimes you have to wait 15 minutes, kill it, and restart it before it begins syncing them.  
203 | 
204 | 
205 | **Install systemd config for LivePeer**  
206 | If you are going to use existing LivePeer account data, go ahead and copy it into place now in `/d1/livepeer/.lpData/`  
207 | ```  
208 | sudo cp /d1/livepeer-transcoder-ops/private/config/livepeer/systemd/livepeer-transcoder.service /etc/systemd/system/
209 | sudo systemctl enable livepeer-transcoder    [or reenable if copying updated config]
210 | ```  
211 |   
212 | Run LivePeer manually for the initial run to make sure it can:  
213 |   * Connect to the local geth instance  
214 |   * Detect your existing Ethereum account / keys if they are in place **OR**  
215 |   * Create a new Ethereum account if necessary. For my installation, I created this initial account *without* a passphrase for operational reasons, taking into account all the security considerations discussed elsewhere in this document. Your mileage may vary and my recommendation is to keep security as the top priority while adjusting for your own operational environment.  
216 | 
217 | **Running live on the Ethereum mainnet**  
218 | * Transfer some ETH and LPT to your node - a small amount at first to confirm addresses and process.  
219 | * Run the livepeer binary by hand as an initial test. I am running LivePeer with the following params (as seen in the [systemd unit config](https://github.com/alexlines/livepeer-transcoder-ops/blob/master/private/config/livepeer/systemd/livepeer-transcoder.service)):  
220 | **Note:** This will run as a transcoder on **mainnet**, this is basically running live in production.  
221 | ```
222 | /d1/livepeer/bin/livepeer -datadir /d1/livepeer/.lpData -ipfsPath /d1/livepeer -log_dir /d1/livepeer/logs -ethUrl ws://127.0.0.1:8546 -v 6 -initializeRound -transcoder -publicIP <public ip> -gasPrice 0
223 | ```
224 | 
225 | Now kill that process and start livepeer using systemd and watch the logs:
226 | ```
227 | kill $(pgrep livepeer)
228 | sudo systemctl start livepeer-transcoder
229 | # check status and watch the logs
230 | sudo systemctl status livepeer-transcoder
231 | sudo journalctl -u livepeer-transcoder.service -f
232 | ```
233 | 
234 | Now use the livepeer command line utility to enroll as a transcoder and set transcoder config:  
235 | ```
236 | /d1/livepeer/bin/livepeer_cli
237 | ```
238 | Choose `13. Invoke multi-step "become a transcoder"`  
239 | 
240 | **Initial monitoring and notification**  
241 | Any monitoring is better than no monitoring. If you can't integrate the LivePeer transcoder into your existing monitoring and alerting infrastructure for any reason, you can get basic monitoring and alerting functionality up and running quickly using AWS CloudWatch. I may include more detail on how to do this all quickly from the command line, but for now see the following docs:  
242 | * [Install the CloudWatch Agent](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Install-CloudWatch-Agent.html) on your instance  
243 | * [Setup alarms](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html) on basic health and activity metrics such as disk space, swap activity, CPU, etc. Ideally you also want to alarm if LivePeer or geth isn't running or if `reward()` hasn't been called (more about that elsewhere in this document).  
244 | * I added [my AWS CloudWatch agent config](private/config/aws-cloudwatch-agent) to this repo, but it's very basic.  
245 | 
246 | 
247 | **Operational Notes**  
248 | * Running with `initiliazeRound` is a nice thing to do - the round can't start until somebody calls it and `reward()` cannot be called until the round has been started. Running with `initializeRound` can get expensive when gas is high (I've seen ~$40)  
249 | * Making sure `reward()` gets called every day is the most important thing right now, after making sure everything is up and running. This generally succeeds, but, in the absence of rock-solid monitoring and alerting on this event, you should manually check it every day. Go set a reminder in your calendar to check it every day at 4pm. While you're there, set another reminder at 9pm. If the call hasn't succeeded for the day, use the command line interface to call `reward()` manually. Some reasons I've seen that can cause it to fail:  
250 |   * You don't have enough ETH in your transcoder's account. You should monitor this and replenish as necessary.  
251 |   * If gas prices spike, this can cause slowness and for transactions to fail, especially if you don't have enough funds (see above).  
252 |   * Unable to communicate with the geth node - I've seen the local geth node appear to run fine and continue to stay sync'd to latest blocks and log that it's submitting transactions (such as calls to reward), but they fail silently and no errors or warnings are produced. LivePeer [issue #455](https://github.com/livepeer/go-livepeer/issues/455) documents a problem similar to this. In such cases, I've restarted first the geth node, waited for it to sync (a couple minutes at most), and then restarted the livepeer node. This is annoying enough to consider restarting geth automatically on a nightly (!) basis.    
253 | * What is the best way to backup the account / credentials tied to the node? For this setup, just create a snapshot of the EBS volume.  
254 | * What livepeer / ipfs / etc logs needs to be rotated? It looks like the only logs that livepeer currently writes are ipfs logs which, in this configuration, are written to /d1/livepeer/logs/ and are automatically rotated every 10MB, though not automatically compressed.  
255 | * Keep an eye on the LivePeer [Releases](https://github.com/livepeer/go-livepeer/releases) page for updates to the software, as well as the [discord](https://discord.gg/cBfD23u) and [forum](https://forum.livepeer.org/) discussions.  
256 | 
257 | 
258 | **LivePeer questions I had but was able to answer**  
259 | * **Note** Don't forget the [upcoming networking updates!](https://forum.livepeer.org/t/upcoming-networking-upgrades/298)  
260 | * Is it ok to call `reward()` more than once per round? Yes, I think it will just say "reward already called for this round."  
261 | * Is it worth setting up a dedicated ipfs node in local network? Doesn't look like it's necessary at this time.  
262 | * GPU - Is it worth it to run with GPU? How much does it help? What specifically leverages the GPU - ffmpeg? short answer: Not yet  
263 |   * Adding GPU Acceleration to transcoding is still an [open issue](https://github.com/livepeer/lpms/issues/33). 
264 |   * GPU transcoding is not currently supported, according to Doug, "Currently we support deterministic CPU transcoding, but we're working on what you read in the above proposal to enable GPU transcoding in a way that will not disrupt GPU mining operations"  
265 |   * In [issue #51 Transcoder Design](https://github.com/livepeer/lpms/issues/51#issuecomment-362502511), j0sh goes into a bit more depth on which areas may benefit from GPU    
266 |   > There are some workloads in the transcoding pipeline that might benefit from GPU (such as colorspace conversion), but encoding generally benefits more from SIMD (AVX) or fixed function hardware (QuickSync). That being said, FFMpeg already supports the Intel MediaSync SDK which I believe is able to run certain operations on the (Intel?) GPU natively. I'm hoping that enabling MediaSync support is as simple as installing the library and setting the ffmpeg configure flag. We'd likely need run-time hardware detection as well.
267 |   > GPU's might help more with verification, but it'd depend on the method we choose.   
268 |   * See also the [Transcoder Design doc](https://github.com/livepeer/lpms/wiki/Transcoder-Design)  
269 |   * There is a [GPU transcoding verficiation proposal](https://github.com/livepeer/research/issues/12) in [research projects](https://github.com/livepeer/research/projects/1#card-9975184)    
270 |   * When GPU's can be meaningfully helpful, [P2 GPU instances](https://aws.amazon.com/ec2/instance-types/p2/) are one (very expensive) option, or [Elastic GPUs](https://aws.amazon.com/ec2/elastic-gpus/details/), which can be attached to certain instance types.  
271 | * What do you need to do to transfer your transcoder identity to a new box? eg if you need to migrate hardware for some reason? The identity of the transcoder node is just the eth address of the account, so as long as you migrate that to a new machine it should be fine - backup and restore your livepeer .lpData directory (but it's very sensitive and contains your private key, so be very careful how and where you back it up, encrypt it, limit access, make sure it's not a publicly accessible machine, storage account, etc.  
272 | * Explain difference between reward, stake, pending stake, etc. 
273 | 
274 | 
275 | **LivePeer open questions**  
276 | * How to know if you've been slashed?  
277 | * Specifying `-log_dir` on the command line only moved where the ipfs log file got written, `livepeer` still wrote its log to stderr.  
278 | * Is it possible to transfer LPT from a transcoder to another account without `unbonding()` the entire stake? Is this done via CLI option #11 "transfer" or can you unbond (a certain number) and then call "withdraw stake" on just that portion?  
279 | * What ports should be open to internal network? Open to the world?  
280 | * How much ETH should you keep in your transcoder account?  
281 | * Capacity planning - how to estimate transcoding rate (how long to transcode each second of output video) based on machine resources?  
282 | * How can you run multiple transcoder instances, behind a load balancer, for example, but have them all use the same identity? Because you just register as a single transcoder id, right? Pretty sure it's not yet possible.  
283 | * How to monitor demand in different regions? Would be great to know that there is more demand than capacity in Asia/Pacific, for example, and to spin up capacity there.  
284 | 
285 | 
286 | 
287 | **Reference**   
288 |   * Master reference docs and info is aggregated in this thread - [Transcoder Megathread - Start here to learn about playing the role of transcoder on Livepeer](https://forum.livepeer.org/t/transcoder-megathread-start-here-to-learn-about-playing-the-role-of-transcoder-on-livepeer/190)   
289 | 
290 | ## OPs TODO  
291 | - Configuration  
292 |   - Use actual config management  
293 |   - Testing  
294 |   - Automated deployment  
295 |   - Docker?  
296 | - Traffic management  
297 |   - Load balancing  
298 |   - Automatic failover  
299 |   - Regional routing / responding to regional demand  
300 |   - Auto-scaling  
301 | - Monitoring, Alerting, Metrics Collection  
302 |   - Better health checks of LivePeer instance and processes  
303 |   - Go and systemd both support watchdog for process health monitoring http://0pointer.de/blog/projects/watchdog.html  
304 |   - It would be nice if the livepeer internal webserver supported a call to `/health` for example, which could be checked by a nagios/etc plugin, as well as by a load balancer which was monitoring the health of a pool of transcoders.  You could sortof fake this today by using a call to `http://<transcoder ip>:8935/nodeID` and make sure it returns the expected nodeID, but a proper `/health` function could better check a few vital signs.  
305 |   - Monitor and alert if reward() doesn't get called. A few ways to monitor this:  
306 |     - I started a very basic checker script, mostly based on code in [livepeer_cli](https://github.com/livepeer/go-livepeer/tree/master/cmd/livepeer_cli) that queries the local livepeer internal webserver for LastRewardRound and, if it doesn't match currentRound, could send an alert. There's not much there, but you can [see the code here](utils/monitor_reward_call.go)  
307 |     - Query the local livepeer node via http:  
308 |     ```
309 |       $ curl http://127.0.0.1:8935/transcoderInfo
310 |       {"Address":"0x50d69f8253685999b4c74a67ccb3d240e2a56ed6","LastRewardRound":1018,"RewardCut":30000,"FeeShare":300000,"PricePerSegment":150000000000,"PendingRewardCut":30000,"PendingFeeShare":300000,"PendingPricePerSegment":150000000000,"DelegatedStake":6454553077282307328907,"Active":true,"Status":"Registered"}
311 |       ```  
312 |      and if `LastRewardRound` doesn't match the current round (which you have to get via another call), then `reward()` has not yet been called. This would be straightfoward to monitor automatically and you could alert on this after a certain time of day.     
313 |     - Basic first-pass alerting could be done through the checker script sending sms via Twilio API or I wonder if you could publish custom events to AWS CloudWatch and set alarms there to have more sophisticated logic and thresholds.  
314 |     - You also really need to know where in the current round you are to set reasonable alert thresholds - what is the current round length, start block for current round, and blocks to wait before next round, as calculated in the LivePeer [roundservice](https://github.com/livepeer/go-livepeer/blob/4589a1364fa9d29e9d196d259f1f235116d45953/eth/eventservices/roundservice.go#L137) and [wizard_stats](https://github.com/livepeer/go-livepeer/blob/ba011c60094edc2595020a11200fdcebb03da937/cmd/livepeer_cli/wizard_stats.go#L67). Ideally you could leverage the LivePeer codebase to do this, rather than duplicating the work.  
315 |     - The highest certainty check would be to query the Ethereum blockchain for the tx where your node called `reward()` 
316 |       - Could do this by querying the local geth node via [JSON-RPC api](https://github.com/ethereum/wiki/wiki/JSON-RPC), e.g. using [eth_getTransactionbyHash](https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_gettransactionbyhash):  
317 |       ```
318 |       curl -H "Content-Type: application/json" -X POST --data '{"jsonrpc":"2.0","method":"eth_getTransactionByHash","params":["0xcde8ec889fa7ed433d2a55c5f34f1be98f4dad97791a27c258d18eb1bad17d0f"],"id":1}' http://localhost:8545  
319 |       ```
320 |       but there's not an easy way to list recent transactions for an account or contract ... looks like filters/logs are the way to do this? https://github.com/ethereum/go-ethereum/issues/1897  or here https://github.com/ethereum/go-ethereum/issues/2104     
321 |       - Use the [etherscan API](https://etherscan.io/apis), the `input` value below indicates a call to `reward()`:    
322 |       ```
323 |       $ curl 'http://api.etherscan.io/api?module=account&action=txlist&address=<your node address>&startblock=5858336&endblock=5858337&sort=desc&apikey=<api key>'
324 |       {
325 |       "status":"1",
326 |       "message":"OK",
327 |       "result":[
328 |         {
329 |         "blockNumber":"5944652",
330 |         "blockHash":"0x58d1fcc7ec68dd001a3c166572b3a2d308f4b0598a92faf46e509abb70118de3",
331 |         "timeStamp":"1531311107",
332 |         "hash":"0x5960471df2da7cc405a4bcb5a195c73a711b37ada828a56672b4ef40c891b918",
333 |         "nonce":"203",
334 |         "transactionIndex":"136",
335 |         "from":"0x345551571c5ef20111c6168b9a498dfb836e7c09",
336 |         "to":"0x511bc4556d823ae99630ae8de28b9b80df90ea2e",
337 |         "value":"0",
338 |         "gas":"262287",
339 |         "gasPrice":"8000000000",
340 |         "input":"0x228cb733",
341 |         "contractAddress":"",
342 |         "cumulativeGasUsed":"3474053",
343 |         "txreceipt_status":"1",
344 |         "gasUsed":"213639",
345 |         "confirmations":"1143",
346 |         "isError":"0"
347 |         }]
348 |       }
349 |       ```  
350 |   - Monitor amount of ETH in transcoder's account and alert if below certain threshold.  
351 |   - Metrics collection - one idea is [publish metrics to AWS CloudWatch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html), possibly with [custom events](https://aws.amazon.com/blogs/security/how-to-use-amazon-cloudwatch-events-to-monitor-application-health/)? Though I haven't researched if feasible in this case.  
352 | - Security  
353 |   - Better management of Ethereum private keys  
354 |   - Possibly using Hashicorp's Vault for private keys or AWS KMS   
355 |   - Could store your private key in [AWS Parameter Store](https://aws.amazon.com/systems-manager/features/#Parameter_Store) in AWS [Key Management Service](https://aws.amazon.com/kms/) and write a wrapper script which can retrieve the private key  
356 |     - [Getting started with AWS Parameter store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-paramstore.html) see also How AWS [Systems Manager Parameter Store Uses AWS KMS](https://docs.aws.amazon.com/kms/latest/developerguide/services-parameter-store.html?shortFooter=true)   
357 | - EBS Volumes
358 |   - Automate EBS snapshots  
359 |   - Encrypt EBS Volumes by default?  
360 | - Add GPU's, optionally  
361 | - Local geth node  
362 |   - Would it benefit from being a fast-sync node or a full node?  
363 |   - Should probably move geth to a dedicated instance that multiple local transcoder nodes can connect to  
364 |   - Should probably run a local geth cluster in each region you plan to run transcoders  
365 |   - geth unfortunately seems to get stalled / stuck and appears to benefit from periodically stopping and restarting it.  
366 | - Log rotation for LivePeer and geth logs    
367 | - Helpful to give the instance an DNS and/or ENS name?  
368 | - Better documentation of AWS Security groups, IAM users and permissions, ssh gateway host, etc  
369 | 
370 | 
371 | 


--------------------------------------------------------------------------------
/config-steps-brief.sh:
--------------------------------------------------------------------------------
  1 | # brief summary of commands for my approach to running LivePeer transcoder node
  2 | 
  3 | # this is just an overview and this script is NOT meant to be executed,
  4 | # it will not work, and not all commands are meant to be run on the same instance.
  5 | 
  6 | # instance launch
  7 | aws --profile notation ec2 run-instances \
  8 |     --cli-input-json file://livepeer-transcoder-ec2-config.json  
  9 | 
 10 | # allocate elastic ip
 11 | aws --profile notation ec2 allocate-address  
 12 | aws --profile notation ec2 associate-address --instance-id <instance id> --public-ip <ip address> 
 13 | 
 14 | # get most recent LivePeer release
 15 | cd ~
 16 | curl -s -L https://github.com/livepeer/go-livepeer/releases/download/0.2.4/livepeer_linux.tar.gz > livepeer_linux.tgz
 17 | gzip -d -c livepeer_linux.tar.gz | tar xvf -
 18 | 
 19 | # prepare LivePeer volume
 20 | # if the LivePeeer EBS volume was not created at instance instantiation, create and attach now
 21 | aws --profile notation ec2 create-volume --size 100 --region us-east-1 --availability-zone us-east-1a --volume-type gp2  
 22 | aws --profile notation ec2 attach-volume --device /dev/sdg --instance-id <instance-id> --volume-id <volume-id>  
 23 | 
 24 | # login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary):
 25 | sudo mkfs.ext4 /dev/xvdg  
 26 | sudo mkdir /d1  
 27 | echo "UUID=<volume UUID> /d1 ext4 defaults 0 2" | sudo tee -a /etc/fstab  
 28 | sudo mount /d1 
 29 | 
 30 | # prepare geth volume
 31 | # If the geth EBS volume was not created at instance instantiation, create and attach now.
 32 | aws --profile notation ec2 create-volume --size 500 --region us-east-1 --availability-zone us-east-1a --volume-type gp2  
 33 | aws --profile notation ec2 attach-volume --device /dev/sdh --instance-id <instance-id> --volume-id <volume-id>  
 34 | 
 35 | # login to the instance and create filesystem, mount point, and add volume to fstab (device names may vary):
 36 | sudo mkfs.ext4 /dev/xvdh  
 37 | sudo mkdir /d2     
 38 | echo "UUID=<volume UUID> /d2 ext4 defaults 0 2" | sudo tee -a /etc/fstab  
 39 | sudo mount /dev/xvdh /d2   
 40 | 
 41 | # set hostname
 42 | sudo hostname tc001.mydomain.com
 43 | # add fqdn to /etc/hosts
 44 | # and replace contents of /etc/hostname (with only hostname, not FQDN)
 45 | 
 46 | # setup directories
 47 | sudo mkdir -p /d1/livepeer/logs  
 48 | sudo mv -i ~/livepeer_linux /d1/livepeer/bin  
 49 | sudo chown -R ubuntu:ubuntu /d1/livepeer  
 50 | cd /d1
 51 | 
 52 | # check out repo
 53 | git clone git@github.com:alexlines/livepeer-transcoder-ops.git
 54 | 
 55 | # raise open filehandle limits
 56 | echo "ubuntu soft nofile 50000" | sudo tee -a /etc/security/limits.conf
 57 | echo "ubuntu hard nofile 50000" | sudo tee -a /etc/security/limits.conf
 58 | 
 59 | # edit /etc/pam.d/login and add or uncomment the line:
 60 | session required /lib/security/pam_limits.so
 61 | 
 62 | # install geth
 63 | sudo apt-get install -y software-properties-common
 64 | sudo add-apt-repository -y ppa:ethereum/ethereum
 65 | sudo apt-get update
 66 | sudo apt-get install -y ethereum 
 67 | 
 68 | # setup geth directories and copy config files into place
 69 | sudo mkdir /d2/geth-data
 70 | sudo chown -R ubuntu:ubuntu /d2/geth-data
 71 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/systemd/geth.service /etc/systemd/system/
 72 | sudo cp /d1/livepeer-transcoder-ops/private/config/geth/geth-config.toml /d2/geth-data/
 73 | 
 74 | # copy any existing .ethereum files or keys into place now in /d2/geth-data/.ethereum
 75 | 
 76 | # enable geth under systemd and start geth
 77 | sudo systemctl enable geth
 78 | sudo systemctl start
 79 | 
 80 | # check the status and logs
 81 | sudo systemctl status geth
 82 | sudo journalctl -u geth.service -f
 83 | 
 84 | 
 85 | # If you are going to use existing LivePeer account data, go ahead and copy it into place now in /d1/livepeer/.lpData/
 86 | 
 87 | # copy systemd unit file for LivePeer into place
 88 | sudo cp /d1/livepeer-transcoder-ops/private/config/livepeer/systemd/livepeer-transcoder.service /etc/systemd/system/
 89 | sudo systemctl enable livepeer-transcoder
 90 | 
 91 | # start LivePeer using systemd
 92 | sudo systemctl start livepeer-transcoder
 93 | 
 94 | # check status and watch the logs
 95 | sudo systemctl status livepeer-transcoder
 96 | sudo journalctl -u livepeer-transcoder.service -f
 97 | 
 98 | # now use the livepeer command line utility to enroll as a transcoder and set transcoder config:
 99 | # Choose 13. Invoke multi-step "become a transcoder"
100 | 
101 | /d1/livepeer/bin/livepeer_cli
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/private/config/aws-cloudwatch-agent/amazon-cloudwatch-agent.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"logs": {
 3 | 		"logs_collected": {
 4 | 			"files": {
 5 | 				"collect_list": [
 6 | 					{
 7 | 						"file_path": "/d1/livepeer/logs/ipfs.log",
 8 | 						"log_group_name": "ipfs.log"
 9 | 					},
10 | 					{
11 | 						"file_path": "/var/log/syslog",
12 | 						"log_group_name": "syslog"
13 | 					}
14 | 				]
15 | 			}
16 | 		}
17 | 	},
18 | 	"metrics": {
19 | 		"append_dimensions": {
20 | 			"AutoScalingGroupName": "${aws:AutoScalingGroupName}",
21 | 			"ImageId": "${aws:ImageId}",
22 | 			"InstanceId": "${aws:InstanceId}",
23 | 			"InstanceType": "${aws:InstanceType}"
24 | 		},
25 | 		"metrics_collected": {
26 | 			"cpu": {
27 | 				"measurement": [
28 | 					"cpu_usage_idle",
29 | 					"cpu_usage_iowait",
30 | 					"cpu_usage_user",
31 | 					"cpu_usage_system"
32 | 				],
33 | 				"metrics_collection_interval": 60,
34 | 				"resources": [
35 | 					"*"
36 | 				],
37 | 				"totalcpu": false
38 | 			},
39 | 			"disk": {
40 | 				"measurement": [
41 | 					"used_percent",
42 | 					"inodes_free"
43 | 				],
44 | 				"metrics_collection_interval": 60,
45 | 				"resources": [
46 | 					"*"
47 | 				]
48 | 			},
49 | 			"diskio": {
50 | 				"measurement": [
51 | 					"io_time",
52 | 					"write_bytes",
53 | 					"read_bytes",
54 | 					"writes",
55 | 					"reads"
56 | 				],
57 | 				"metrics_collection_interval": 60,
58 | 				"resources": [
59 | 					"*"
60 | 				]
61 | 			},
62 | 			"mem": {
63 | 				"measurement": [
64 | 					"mem_used_percent"
65 | 				],
66 | 				"metrics_collection_interval": 60
67 | 			},
68 | 			"netstat": {
69 | 				"measurement": [
70 | 					"tcp_established",
71 | 					"tcp_time_wait"
72 | 				],
73 | 				"metrics_collection_interval": 60
74 | 			},
75 | 			"swap": {
76 | 				"measurement": [
77 | 					"swap_used_percent"
78 | 				],
79 | 				"metrics_collection_interval": 60
80 | 			}
81 | 		}
82 | 	}
83 | }


--------------------------------------------------------------------------------
/private/config/aws-cloudwatch-agent/amazon-cloudwatch-agent.toml:
--------------------------------------------------------------------------------
 1 | [agent]
 2 |   collection_jitter = "0s"
 3 |   debug = false
 4 |   flush_interval = "1s"
 5 |   flush_jitter = "0s"
 6 |   hostname = ""
 7 |   interval = "60s"
 8 |   logfile = "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log"
 9 |   metric_batch_size = 1000
10 |   metric_buffer_limit = 10000
11 |   omit_hostname = false
12 |   precision = ""
13 |   quiet = false
14 |   round_interval = false
15 | 
16 | [inputs]
17 | 
18 |   [[inputs.cpu]]
19 |     fieldpass = ["usage_idle", "usage_iowait", "usage_user", "usage_system"]
20 |     interval = "60s"
21 |     percpu = true
22 |     totalcpu = false
23 | 
24 |   [[inputs.disk]]
25 |     fieldpass = ["used_percent", "inodes_free"]
26 |     interval = "60s"
27 | 
28 |   [[inputs.diskio]]
29 |     fieldpass = ["io_time", "write_bytes", "read_bytes", "writes", "reads"]
30 |     interval = "60s"
31 |     report_deltas = true
32 | 
33 |   [[inputs.mem]]
34 |     fieldpass = ["used_percent"]
35 |     interval = "60s"
36 | 
37 |   [[inputs.netstat]]
38 |     fieldpass = ["tcp_established", "tcp_time_wait"]
39 |     interval = "60s"
40 | 
41 |   [[inputs.swap]]
42 |     fieldpass = ["used_percent"]
43 |     interval = "60s"
44 | 
45 |   [[inputs.tail]]
46 |     data_format = "value"
47 |     data_type = "string"
48 |     file_state_folder = "/opt/aws/amazon-cloudwatch-agent/logs/state"
49 |     name_override = "raw_log_line"
50 | 
51 |     [[inputs.tail.file_config]]
52 |       file_path = "/d1/livepeer/logs/ipfs.log"
53 |       from_beginning = true
54 |       log_group_name = "ipfs.log"
55 |       pipe = false
56 | 
57 |     [[inputs.tail.file_config]]
58 |       file_path = "/var/log/syslog"
59 |       from_beginning = true
60 |       log_group_name = "syslog"
61 |       pipe = false
62 | 
63 | [outputs]
64 | 
65 |   [[outputs.cloudwatch]]
66 |     force_flush_interval = "60s"
67 |     namespace = "CWAgent"
68 |     region = "us-east-1"
69 |     tagexclude = ["host"]
70 |     [outputs.cloudwatch.tagdrop]
71 |       log_group_name = ["*"]
72 | 
73 |   [[outputs.cloudwatchlogs]]
74 |     file_name_field_key = "file_name"
75 |     file_state_folder = "/opt/aws/amazon-cloudwatch-agent/logs/state"
76 |     log_entry_field_key = "value"
77 |     log_group_name_tag_key = "log_group_name"
78 |     log_stream_name = "i-07f7578cdaedb4c0f"
79 |     log_stream_name_tag_key = "log_stream_name"
80 |     log_timestamp_field_key = "log_timestamp"
81 |     multi_line_start_field_key = "multi_line_start"
82 |     offset_field_key = "offset"
83 |     region = "us-east-1"
84 | 
85 | [processors]
86 | 
87 |   [[processors.ec2tagger]]
88 |     ec2_instance_tag_keys = ["aws:autoscaling:groupName"]
89 |     ec2_metadata_tags = ["ImageId", "InstanceId", "InstanceType"]
90 |     [processors.ec2tagger.tagdrop]
91 |       log_group_name = ["*"]
92 | 


--------------------------------------------------------------------------------
/private/config/aws-instances/livepeer-transcoder-ec2-config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "BlockDeviceMappings": [
 3 |         {
 4 |             "DeviceName": "/dev/sda1",
 5 |             "Ebs": {
 6 |                 "DeleteOnTermination": true,
 7 |                 "VolumeSize": 32,
 8 |                 "VolumeType": "gp2"
 9 |             }
10 |         },
11 |         {
12 |             "DeviceName": "/dev/sdg",
13 |             "Ebs": {
14 |                 "DeleteOnTermination": false,
15 |                 "VolumeSize": 100,
16 |                 "VolumeType": "gp2"
17 |             }
18 |         },
19 |         {
20 |             "DeviceName": "/dev/sdh",
21 |             "Ebs": {
22 |                 "DeleteOnTermination": false,
23 |                 "VolumeSize": 500,
24 |                 "VolumeType": "gp2"
25 |             }
26 |         }
27 |     ],
28 |     "ImageId": "ami-0b425589c7bb7663d",
29 |     "InstanceType": "c4.4xlarge",
30 |     "KeyName": "form72",
31 |     "Monitoring": {
32 |         "Enabled": true
33 |     },
34 |     "SecurityGroups": [
35 |         "livepeer-inbound"
36 |     ],
37 |     "DryRun": false,
38 |     "EbsOptimized": true,
39 |     "InstanceInitiatedShutdownBehavior": "stop"
40 | }
41 | 


--------------------------------------------------------------------------------
/private/config/geth/geth-config.toml:
--------------------------------------------------------------------------------
 1 | # this file was generated by running the following:
 2 | #   geth --datadir "/d2/geth-data/.ethereum" --cache 512 --maxpeers 25 \
 3 | #        --syncmode light --rpc --rpcapi db,eth,net,web3 --ws \
 4 | #        --wsorigins "*" dumpconfig > config.toml
 5 | #
 6 | # and should be equivalent to running:
 7 | #   geth --datadir "/d2/geth-data/.ethereum" --cache 512 --maxpeers 25 \
 8 | #        --syncmode light --rpc --rpcapi db,eth,net,web3 --ws --wsorigins "*"
 9 | # 
10 | [Eth]
11 | NetworkId = 1
12 | SyncMode = "light"
13 | NoPruning = false
14 | LightPeers = 100
15 | DatabaseCache = 256
16 | TrieCleanCache = 128
17 | TrieDirtyCache = 128
18 | TrieTimeout = 3600000000000
19 | MinerGasFloor = 8000000
20 | MinerGasCeil = 8000000
21 | MinerGasPrice = 1000000000
22 | MinerRecommit = 3000000000
23 | MinerNoverify = false
24 | EnablePreimageRecording = false
25 | EWASMInterpreter = ""
26 | EVMInterpreter = ""
27 | 
28 | [Eth.Ethash]
29 | CacheDir = "ethash"
30 | CachesInMem = 2
31 | CachesOnDisk = 3
32 | DatasetDir = "/d2/geth-data/.ethash"
33 | DatasetsInMem = 1
34 | DatasetsOnDisk = 2
35 | PowMode = 0
36 | 
37 | [Eth.TxPool]
38 | Locals = []
39 | NoLocals = false
40 | Journal = "transactions.rlp"
41 | Rejournal = 3600000000000
42 | PriceLimit = 1
43 | PriceBump = 10
44 | AccountSlots = 16
45 | GlobalSlots = 4096
46 | AccountQueue = 64
47 | GlobalQueue = 1024
48 | Lifetime = 10800000000000
49 | 
50 | [Eth.GPO]
51 | Blocks = 20
52 | Percentile = 60
53 | 
54 | [Shh]
55 | MaxMessageSize = 1048576
56 | MinimumAcceptedPOW = 2e-01
57 | RestrictConnectionBetweenLightClients = true
58 | 
59 | [Node]
60 | DataDir = "/d2/geth-data/.ethereum"
61 | IPCPath = "geth.ipc"
62 | HTTPHost = "127.0.0.1"
63 | HTTPPort = 8545
64 | HTTPVirtualHosts = ["localhost"]
65 | HTTPModules = ["db", "eth", "net", "web3"]
66 | WSHost = "127.0.0.1"
67 | WSPort = 8546
68 | WSOrigins = ["*"]
69 | WSModules = ["net", "web3", "eth", "shh"]
70 | 
71 | [Node.P2P]
72 | MaxPeers = 25
73 | NoDiscovery = true
74 | DiscoveryV5 = true
75 | BootstrapNodes = ["enode://a979fb575495b8d6db44f750317d0f4622bf4c2aa3365d6af7c284339968eef29b69ad0dce72a4d8db5ebb4968de0e3bec910127f134779fbcb0cb6d3331163c@52.16.188.185:30303", "enode://3f1d12044546b76342d59d4a05532c14b85aa669704bfe1f864fe079415aa2c02d743e03218e57a33fb94523adb54032871a6c51b2cc5514cb7c7e35b3ed0a99@13.93.211.84:30303", "enode://78de8a0916848093c73790ead81d1928bec737d565119932b98c6b100d944b7a95e94f847f689fc723399d2e31129d182f7ef3863f2b4c820abbf3ab2722344d@191.235.84.50:30303", "enode://158f8aab45f6d19c6cbf4a089c2670541a8da11978a2f90dbf6a502a4a3bab80d288afdbeb7ec0ef6d92de563767f3b1ea9e8e334ca711e9f8e2df5a0385e8e6@13.75.154.138:30303", "enode://1118980bf48b0a3640bdba04e0fe78b1add18e1cd99bf22d53daac1fd9972ad650df52176e7c7d89d1114cfef2bc23a2959aa54998a46afcf7d91809f0855082@52.74.57.123:30303", "enode://979b7fa28feeb35a4741660a16076f1943202cb72b6af70d327f053e248bab9ba81760f39d0701ef1d8f89cc1fbd2cacba0710a12cd5314d5e0c9021aa3637f9@5.1.83.226:30303"]
76 | BootstrapNodesV5 = ["enode://06051a5573c81934c9554ef2898eb13b33a34b94cf36b202b69fde139ca17a85051979867720d4bdae4323d4943ddf9aeeb6643633aa656e0be843659795007a@35.177.226.168:30303", "enode://0cc5f5ffb5d9098c8b8c62325f3797f56509bff942704687b6530992ac706e2cb946b90a34f1f19548cd3c7baccbcaea354531e5983c7d1bc0dee16ce4b6440b@40.118.3.223:30304", "enode://1c7a64d76c0334b0418c004af2f67c50e36a3be60b5e4790bdac0439d21603469a85fad36f2473c9a80eb043ae60936df905fa28f1ff614c3e5dc34f15dcd2dc@40.118.3.223:30306", "enode://85c85d7143ae8bb96924f2b54f1b3e70d8c4d367af305325d30a61385a432f247d2c75c45c6b4a60335060d072d7f5b35dd1d4c45f76941f62a4f83b6e75daaf@40.118.3.223:30307"]
77 | StaticNodes = []
78 | TrustedNodes = []
79 | ListenAddr = ":30303"
80 | EnableMsgEvents = false
81 | 
82 | [Node.HTTPTimeouts]
83 | ReadTimeout = 30000000000
84 | WriteTimeout = 30000000000
85 | IdleTimeout = 120000000000
86 | 
87 | [Dashboard]
88 | Host = "localhost"
89 | Port = 8080
90 | Refresh = 5000000000
91 | 


--------------------------------------------------------------------------------
/private/config/geth/systemd/geth.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Ethereum go client
 3 | After=syslog.target network.target
 4 | 
 5 | StartLimitIntervalSec=0
 6 | 
 7 | [Service]
 8 | User=ubuntu
 9 | Group=ubuntu
10 | Environment=HOME=/d2/geth-data
11 | Type=simple
12 | SyslogIdentifier=geth-client
13 | LimitNOFILE=50000
14 | WorkingDirectory=/d2/geth-data
15 | ExecStart=/usr/bin/geth --config /d2/geth-data/geth-config.toml
16 | KillMode=process
17 | KillSignal=SIGINT
18 | TimeoutStopSec=180
19 | SendSIGKILL=no
20 | Restart=always
21 | RestartSec=20
22 | 
23 | [Install]
24 | WantedBy=multi-user.target
25 | 


--------------------------------------------------------------------------------
/private/config/livepeer/systemd/livepeer-transcoder.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=LivePeer Transcoder Service
 3 | After=geth.service
 4 | 
 5 | # disable start rate limiting, which can result in systemd abandoning 
 6 | # a service if it fails to start too many times within the interval. 
 7 | # There are good reasons to limit restarts, but we want to disable 
 8 | # that for now and possible revisit in the future
 9 | StartLimitIntervalSec=0
10 | 
11 | 
12 | [Service]
13 | User=ubuntu
14 | Group=ubuntu
15 | SyslogIdentifier=livepeer-transcoder
16 | 
17 | # raise the open filehandle limit
18 | LimitNOFILE=50000
19 | 
20 | # location of livepeer binary
21 | WorkingDirectory=/d1/livepeer/bin
22 | 
23 | # LivePeer runtime args:
24 | #    -datadir, -ipfsPath, -log_dir: use attached vol for all LivePeer data
25 | #    -ethUrl          : connect to our own geth instance
26 | #    -initializeRound : try to initialize the round if necessary
27 | #    -gasPrice 0      : rely on the gas oracle to automatically set the price
28 | #    -transcoder      : run as a transcoder
29 | #    -monitor         : send metrics to monitoring endpoint http://metrics-mainnet.livepeer.org/api/events
30 | #    -serviceAddr     : public IP:port of transcoder that broadcasters connect to, must match on-chain Service URI
31 | #    -v 6             : run with verbose logging for now
32 | 
33 | ExecStart=/d1/livepeer/bin/livepeer -datadir /d1/livepeer/.lpData -ipfsPath /d1/livepeer -log_dir /d1/livepeer/logs -ethUrl ws://127.0.0.1:8546 -initializeRound -transcoder -monitor -serviceAddr tx.form72.com:8443 -gasPrice 0 -v 6
34 | 
35 | # always attempt to restart the service
36 | Restart=always
37 | 
38 | # seeking balance between default of attempting restart every 100ms
39 | # which is probably overkill here, and possibly not being available 
40 | # to serve requests if wait too long between attempts
41 | RestartSec=3
42 | 
43 | [Install]
44 | WantedBy=multi-user.target
45 | 


--------------------------------------------------------------------------------
/utils/monitor_reward_call.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"math/big"
  8 | 	"net/http"
  9 | 	"strconv"
 10 | 
 11 | 	"github.com/golang/glog"
 12 | 	"github.com/ethereum/go-ethereum/common"
 13 | 
 14 | )
 15 | 
 16 | // these types and funcs are copied or based on files from go-livepeer, mostly
 17 | // from files in github.com/livepeer/go-livepeer/cmd/livepeer_cli
 18 | // This is a quick proof of concept, they should more properly be imported 
 19 | // and used as a base to build on.
 20 | 
 21 | // The initial logic is that if the currentRound and LastRewardRound are not 
 22 | // the same, then reward() has not yet been called for this round. 
 23 | // A better way to check this:
 24 | // - has current round been initialized
 25 | // - what is current round length
 26 | // - what is start block for current round
 27 | // - how many blocks to wait until next round
 28 | // - alert based on configurable threshold if reward() has not been 
 29 | //   called and there are less than n blocks until next round
 30 | 
 31 | // Think also about what action a responding operator can take in this case,
 32 | // is it worth paging someone at 3am? What can the script attempt to 
 33 | // remedy the situation?
 34 | 
 35 | // TODO's also include:
 36 | // - accept command lines args (e.g. where does livepeer server live?)
 37 | // - should alert if reward() has not been called, subject to thresholds, etc
 38 | 
 39 | 
 40 | type Transcoder struct {
 41 | 	Address                common.Address
 42 | 	// tech debt:
 43 | 	//  changed type of LastRewardRound
 44 | 	//  from big.Int to int to simplify
 45 | 	//  comparison to currentRound
 46 | 	//  but should be changed back to 
 47 | 	//  big.Int at some point
 48 | 	//LastRewardRound        *big.Int
 49 | 	LastRewardRound        *int
 50 | 	RewardCut              *big.Int
 51 | 	FeeShare               *big.Int
 52 | 	PricePerSegment        *big.Int
 53 | 	PendingRewardCut       *big.Int
 54 | 	PendingFeeShare        *big.Int
 55 | 	PendingPricePerSegment *big.Int
 56 | 	DelegatedStake         *big.Int
 57 | 	Active                 bool
 58 | 	Status                 string
 59 | }
 60 | 
 61 | type wizard struct {
 62 | 	endpoint   string // Local livepeer node
 63 | 	httpPort   string
 64 | 	host       string
 65 | }
 66 | 
 67 | 
 68 | func main() {
 69 | 	// tech debt: hard-coding these values:
 70 | 	lp_host := "localhost"
 71 | 	lp_port := "8935"
 72 | 
 73 | 	w := &wizard{
 74 | 		endpoint: fmt.Sprintf("http://%v:%v/status", lp_host, lp_port),
 75 | 		httpPort: lp_port,
 76 | 		host:     lp_host,
 77 | 	}
 78 | 	w.run()
 79 | }
 80 | 
 81 | func (w *wizard) run() {
 82 | 	// Make sure there is a local node running
 83 | 	_, err := http.Get(w.endpoint)
 84 | 	if err != nil {
 85 | 		glog.Errorf("Cannot find local node. Is your node running on http:%v?", w.httpPort)
 86 | 		return
 87 | 	}
 88 | 
 89 | 	nodeid := w.getNodeID()
 90 | 	currentRound := w.currentRound()
 91 | 	t, err := w.getTranscoderInfo()
 92 | 	if err != nil {
 93 | 		glog.Errorf("Error getting transcoder info: %v", err)
 94 | 		return
 95 | 	}
 96 | 
 97 | 	// if the currentRound and LastRewardRound are not the same, then
 98 | 	// reward() has not yet been called for this round. 
 99 | 	if strconv.Atoi(currentRound) != strconv.Atoi(t.LastRewardRound) {
100 | 	    fmt.Printf("reward has not been called for current round %v\n", currentRound)
101 | 	    // possibly alert here, based on configurable thresholds
102 | 	}
103 | 	// another option is to return true or false
104 | 	// fmt.Printf("%v\n", (a != b))
105 | 
106 | 	// don't be so chatty in the future, but debugging for now:
107 | 	fmt.Printf("current round    : %v\n", currentRound)
108 | 	fmt.Printf("nodeid           : %v\n", nodeid)
109 | 	fmt.Printf("Status           : %v\n", t.Status)
110 | 	fmt.Printf("Active           : %v\n", t.Active)
111 | 	fmt.Printf("Last Reward Round: %v\n", t.LastRewardRound.String())
112 | 
113 | }
114 | 
115 | func (w *wizard) getNodeID() string {
116 | 	return httpGet(fmt.Sprintf("http://%v:%v/nodeID", w.host, w.httpPort))
117 | }
118 | 
119 | func httpGet(url string) string {
120 | 	resp, err := http.Get(url)
121 | 	if err != nil {
122 | 		glog.Errorf("Error sending HTTP GET: %v")
123 | 		return ""
124 | 	}
125 | 
126 | 	defer resp.Body.Close()
127 | 	result, err := ioutil.ReadAll(resp.Body)
128 | 	if err != nil || string(result) == "" {
129 | 		return ""
130 | 	}
131 | 	return string(result)
132 | 
133 | }
134 | 
135 | func (w *wizard) currentRound() string {
136 | 	return httpGet(fmt.Sprintf("http://%v:%v/currentRound", w.host, w.httpPort))
137 | }
138 | 
139 | func (w *wizard) getTranscoderInfo() (Transcoder, error) {
140 | 	resp, err := http.Get(fmt.Sprintf("http://%v:%v/transcoderInfo", w.host, w.httpPort))
141 | 	if err != nil {
142 | 		return Transcoder{}, err
143 | 	}
144 | 
145 | 	defer resp.Body.Close()
146 | 
147 | 	result, err := ioutil.ReadAll(resp.Body)
148 | 	if err != nil {
149 | 		return Transcoder{}, err
150 | 	}
151 | 
152 | 	var tInfo Transcoder
153 | 	err = json.Unmarshal(result, &tInfo)
154 | 	if err != nil {
155 | 		return Transcoder{}, err
156 | 	}
157 | 
158 | 	return tInfo, nil
159 | }
160 | 


--------------------------------------------------------------------------------