├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── diagrams ├── fack_contract.png └── pipecat_flow.png ├── logo.png └── pipecat.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | services: 3 | - rabbitmq 4 | env: 5 | - AMQP_URI=amqp://guest:guest@localhost:5672/ 6 | go: 7 | - 1.8 8 | - 1.9 9 | - tip 10 | install: 11 | - go get github.com/codegangsta/cli 12 | - go get github.com/streadway/amqp 13 | script: 14 | - go install 15 | - seq 1 10 | pipecat publish numbers 16 | - pipecat consume numbers --non-blocking --autoack | xargs -n 1 expr 10 '*' | pipecat publish results 17 | - pipecat consume results --autoack --non-blocking 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Lukas Martinelli 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pipecat [![Build Status](https://travis-ci.org/lukasmartinelli/pipecat.svg?branch=master)](https://travis-ci.org/lukasmartinelli/pipecat) ![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg) 2 | 3 | pipecat 4 | 5 | Pipecat allows you to scale any program supporting the [FACK contract](#fack-contract) 6 | using traditional UNIX pipes and [AMQP](https://www.amqp.org/). 7 | Think of it as [netcat](http://nc110.sourceforge.net/) 8 | but with message acknowledgments. 9 | It is the successor of [redis-pipe](http://github.com/lukasmartinelli/redis-pipe). 10 | 11 | ```bash 12 | # Publish sequence of numbers to a job queue. 13 | seq 1 1000 | pipecat publish numbers 14 | 15 | # Multiply each number with 10 and store results in a different queue. 16 | pipecat consume numbers --autoack | xargs -n 1 expr 10 '*' | pipecat publish results 17 | 18 | # Aggregate the results and calculate the sum 19 | pipecat consume results --autoack --non-blocking \ 20 | | python -cu 'import sys; print(sum(map(int, sys.stdin)))' 21 | ``` 22 | 23 | > If you are into streams and UNIX pipes [checkout my Haskell based awk and sed alternative ](https://github.com/lukasmartinelli/hwk) 24 | 25 | ## Support 26 | 27 | Pipecat supports a local mode and all AMQP 0.9.1 message brokers. 28 | 29 | - [ActiveMQ](http://activemq.apache.org/) 30 | - [RabbitMQ](https://www.rabbitmq.com/) 31 | - [Azure Service Bus](https://azure.microsoft.com/en-us/services/service-bus/) 32 | 33 | ## Install 34 | 35 | You can download a single binary for Linux, OSX or Windows. 36 | 37 | **OSX** 38 | 39 | ```bash 40 | wget -O pipecat https://github.com/lukasmartinelli/pipecat/releases/download/v0.3/pipecat_darwin_amd64 41 | chmod +x pipecat 42 | 43 | ./pipecat --help 44 | ``` 45 | 46 | **Linux** 47 | 48 | ```bash 49 | wget -O pipecat https://github.com/lukasmartinelli/pipecat/releases/download/v0.3/pipecat_linux_amd64 50 | chmod +x pipecat 51 | 52 | ./pipecat --help 53 | ``` 54 | 55 | 56 | **Install from Source** 57 | 58 | ``` 59 | go get github.com/lukasmartinelli/pipecat 60 | ``` 61 | 62 | If you are using Windows or 32-bit architectures you need to [download the appropriate binary 63 | yourself](https://github.com/lukasmartinelli/pipecat/releases/latest). 64 | 65 | ## Using pipecat 66 | 67 | `pipecat` connects message queues and UNIX pipes. 68 | The need arose when I started building messaging support into 69 | utilities in order to make them scalable but still wanted to leave my programs the way they are without heavy dependencies and still be able to scale the process **reliably**. 70 | 71 | In this example we will calculate the sum of a sequence of numbers. 72 | 73 | ### Connect the broker 74 | 75 | Specify the `AMQP_URI` env var to connect to the message broker. 76 | 77 | ``` 78 | export AMQP_URI=amqp://user:pass@host:5672/vhost 79 | ``` 80 | 81 | ### Create the queue 82 | 83 | Let's create a new queue `numbers` and publish a sequence of numbers from 1 to 1000. 84 | 85 | ```bash 86 | seq 1 1000 | pipecat publish numbers 87 | ``` 88 | 89 | ### Process input 90 | 91 | Multiply the input sequence with factor `10` and publish the results to an additional `results` queue. 92 | This step can be run on multiple hosts. 93 | We want to acknowledge all received messages automatically with `--autoack`. 94 | 95 | ```bash 96 | pipecat consume numbers --autoack | xargs -n 1 expr 10 '*' | pipecat publish results 97 | ``` 98 | 99 | ### Aggregate results 100 | 101 | Now let's sum up all the numbers. Because we want to end after receiving all numbers we specify the `--non-blocking` mode which will close the connection if no messages have been received after a timeout. 102 | 103 | ```bash 104 | pipecat consume results --autoack --non-blocking | python -cu 'import sys; print(sum(map(int, sys.stdin)))' 105 | ``` 106 | 107 | ### Local RabbitMQ with Docker 108 | 109 | If you do not have an existing AMQP broker at hand you can run 110 | RabbitMQ in a docker container, expose the ports and connect to it. 111 | 112 | ```bash 113 | docker run -d -p 5672:5672 --hostname pipecat-rabbit --name pipecat-rabbit rabbitmq:3 114 | ``` 115 | 116 | Now connect to localhost with the default `guest` login. 117 | 118 | ```bash 119 | export AMQP_URI=amqp://guest:guest@localhost:5672/ 120 | ``` 121 | 122 | ### Publish messages to Exchange 123 | 124 | If you are using existing message queue infrastructure you can also publish messages to an exchange, with the first parameter used as the routing key. 125 | Thanks to @kennon for the implementation. 126 | 127 | ```bash 128 | seq 1 1000 | pipecat publish --exchange "my_exchange" --no-create-queue my.routing.key 129 | ``` 130 | 131 | The AMQP_EXCHANGE environment variable can also be used: 132 | 133 | ```bash 134 | export AMQP_EXCHANGE=my_exchange 135 | ``` 136 | 137 | ## Make it failsafe 138 | 139 | We already have written a small, concise and very 140 | scalable set of programs. We can now run the `multiply.py` 141 | step on many servers. 142 | 143 | However, if the server dies while `multiply.py` is 144 | running **the input lines already processed are lost**. 145 | 146 | If your program needs that ability you need to implement 147 | the [FACK contract](#fack-contract), demonstrated for the `multiply.py` sample. 148 | 149 | ## FACK Contract 150 | 151 | > Any program that accepts output from `stdin` and writes to `stdout` 152 | should accept an environment variable `FACK` containing a file descriptor. 153 | If a single operation performed on a line from `stdin` was successful , 154 | that line should be written to `FACK`. 155 | 156 | ![FACK contract Flow](diagrams/fack_contract.png) 157 | 158 | ### Implement the contract 159 | 160 | Implementing the contract is straightforward. 161 | 162 | 1. Support the optional `FACK` environment variable containing a file name 163 | 2. Write the received input into this file handle if we 164 | performed the operation successfully on it 165 | 166 | #### Python Example 167 | 168 | Below is a Python example `multiply.py` which multiplies the sequence of numbers as above 169 | but writes the input line to `stdack` if successfully processed. 170 | 171 | 172 | ```python 173 | import sys 174 | import os 175 | 176 | with open(os.getenv('FACK', os.devnull), 'w') as stdack: # Works even if FACK is not set 177 | for line in sys.stdin: 178 | num = int(line.strip()) 179 | result = num * 10 180 | sys.stdout.write('{}\n'.format(result)) 181 | stdack.write(line) # Ack the processed line 182 | stdack.flush() # Make sure line does not get lost in the buffer 183 | ``` 184 | 185 | ### Use named queues for ACKs 186 | 187 | Now your program can no longer lose messages with `pipecat` because 188 | you can feed the `FACK` output back into `pipecat` 189 | using [named pipes](http://thorstenball.com/blog/2013/08/11/named-pipes/) 190 | which will only then acknowledge the messages from the message queue. 191 | 192 | ![Pipecat Flow Diagram](diagrams/pipecat_flow.png) 193 | 194 | Fill the queue again. 195 | 196 | ```bash 197 | seq 1 1000 | pipecat publish numbers 198 | ``` 199 | 200 | And use a named pipe to funnel the acknowledged input lines back into 201 | pipecat. 202 | 203 | ```bash 204 | mkfifo ack 205 | cat ack | pipecat consume numbers \ 206 | | FACK=ack python -u multiply.py \ 207 | | pipecat publish results 208 | rm ack 209 | ``` 210 | 211 | Consume all messages to reduce a result. 212 | In the reduce operation we need to autoack all received messages 213 | because we can't possibly hold the entire result set in memory until the 214 | operation has performed. 215 | 216 | ```bash 217 | pipecat consume results --autoack --non-blocking | python -cu 'import sys; print(sum(map(int, sys.stdin)))' 218 | ``` 219 | 220 | With a few lines additional code only depending on the standard library 221 | you can now make any program in any language scalable using message queues. 222 | Without any dependencies and without changing the behavior bit. 223 | 224 | ## Usage Examples 225 | 226 | ### Create local Queue Backup 227 | 228 | ```bash 229 | pipecat consume results --autoack --non-blocking > results_backup.json 230 | cat results_backup.json | pipecat publish results 231 | ``` 232 | 233 | ## Cross Compile Release 234 | 235 | We use [gox](https://github.com/mitchellh/gox) to create distributable 236 | binaries for Windows, OSX and Linux. 237 | 238 | ```bash 239 | docker run --rm -v "$(pwd)":/usr/src/pipecat -w /usr/src/pipecat tcnksm/gox:1.4.2-light 240 | ``` 241 | -------------------------------------------------------------------------------- /diagrams/fack_contract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukasmartinelli/pipecat/4e97f5e1a0a3bf46ab2c4549481fc940a9722112/diagrams/fack_contract.png -------------------------------------------------------------------------------- /diagrams/pipecat_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukasmartinelli/pipecat/4e97f5e1a0a3bf46ab2c4549481fc940a9722112/diagrams/pipecat_flow.png -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukasmartinelli/pipecat/4e97f5e1a0a3bf46ab2c4549481fc940a9722112/logo.png -------------------------------------------------------------------------------- /pipecat.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "log" 7 | "os" 8 | "sync" 9 | "time" 10 | 11 | "encoding/base64" 12 | 13 | "github.com/codegangsta/cli" 14 | "github.com/streadway/amqp" 15 | ) 16 | 17 | func failOnError(err error, msg string) { 18 | if err != nil { 19 | log.Fatalf("%s: %s", msg, err) 20 | panic(fmt.Sprintf("%s: %s", msg, err)) 21 | } 22 | } 23 | 24 | func prepare(amqpURI string, queueName string, createQueue bool) (*amqp.Connection, *amqp.Channel) { 25 | conn, err := amqp.Dial(amqpURI) 26 | failOnError(err, "Failed to connect to AMQP broker") 27 | 28 | channel, err := conn.Channel() 29 | failOnError(err, "Failed to open a channel") 30 | 31 | if createQueue == true { 32 | _, err = channel.QueueDeclare( 33 | queueName, // name 34 | true, // durable 35 | false, // delete when unused 36 | false, // exclusive 37 | false, // no-wait 38 | nil, // arguments 39 | ) 40 | failOnError(err, "Failed to declare queue") 41 | } 42 | 43 | return conn, channel 44 | } 45 | 46 | func publish(c *cli.Context) { 47 | queueName := c.Args().First() 48 | if queueName == "" { 49 | fmt.Println("Please provide name of the queue") 50 | os.Exit(1) 51 | } 52 | conn, channel := prepare(c.String("amqpuri"), queueName, !c.Bool("no-create-queue")) 53 | 54 | defer conn.Close() 55 | defer channel.Close() 56 | 57 | // It is better to have a durable delivery mode and let user disable it 58 | // even though it is not the RabbitMQ default 59 | deliveryMode := uint8(2) 60 | if c.Bool("transient") { 61 | deliveryMode = 1 62 | } 63 | 64 | scanner := bufio.NewScanner(os.Stdin) 65 | for scanner.Scan() { 66 | line := scanner.Text() 67 | 68 | msgBody := []byte(line) 69 | if c.Bool("base64") { 70 | var encodeErr error 71 | msgBody, encodeErr = base64.StdEncoding.DecodeString(line) 72 | failOnError(encodeErr, "Fail to decode base64") 73 | } 74 | 75 | err := channel.Publish( 76 | c.String("exchange"), // exchange 77 | queueName, // routing key 78 | false, // mandatory 79 | false, // immediate 80 | amqp.Publishing{ 81 | ContentType: "text/plain", 82 | Body: msgBody, 83 | DeliveryMode: deliveryMode, 84 | }) 85 | 86 | failOnError(err, "Failed to publish a message") 87 | fmt.Println(line) 88 | } 89 | err := scanner.Err() 90 | failOnError(err, "Failed to read from stdin") 91 | } 92 | 93 | func consume(c *cli.Context) { 94 | queueName := c.Args().First() 95 | if queueName == "" { 96 | fmt.Println("Please provide name of the queue") 97 | os.Exit(1) 98 | } 99 | 100 | conn, channel := prepare(c.String("amqpuri"), queueName, !c.Bool("no-create-queue")) 101 | defer conn.Close() 102 | defer channel.Close() 103 | 104 | var mutex sync.Mutex 105 | unackedMessages := make([]amqp.Delivery, 100) 106 | 107 | msgs, err := channel.Consume( 108 | queueName, // queue 109 | "", // consumer 110 | c.Bool("autoack"), // auto-ack 111 | false, // exclusive 112 | false, // no-local 113 | false, // no-wait 114 | nil, // args 115 | ) 116 | failOnError(err, "Failed to register consumer") 117 | 118 | ackMessages := func() { 119 | scanner := bufio.NewScanner(os.Stdin) 120 | for scanner.Scan() { 121 | ackedLine := scanner.Text() 122 | 123 | // O(n²) complexity for the win! 124 | mutex.Lock() // use channels some day 125 | for i, msg := range unackedMessages { 126 | unackedLine := fmt.Sprintf("%s", msg.Body) 127 | if c.Bool("base64") { 128 | unackedLine = base64.StdEncoding.EncodeToString([]byte(unackedLine)) 129 | } 130 | if unackedLine == ackedLine { 131 | msg.Ack(false) 132 | 133 | // discard message 134 | unackedMessages = append(unackedMessages[:i], unackedMessages[i+1:]...) 135 | break 136 | } 137 | } 138 | mutex.Unlock() 139 | 140 | } 141 | err := scanner.Err() 142 | failOnError(err, "Failed to read from stdin") 143 | } 144 | 145 | forever := make(chan bool) 146 | 147 | consumeMessages := func() { 148 | timeout := time.Second * time.Duration(c.Int("timeout")) 149 | for { 150 | select { 151 | case msg := <-msgs: 152 | if !c.Bool("autoack") { 153 | mutex.Lock() 154 | unackedMessages = append(unackedMessages, msg) 155 | mutex.Unlock() 156 | } 157 | line := fmt.Sprintf("%s", msg.Body) 158 | if c.Bool("base64") { 159 | line = base64.StdEncoding.EncodeToString([]byte(line)) 160 | } 161 | fmt.Println(line) 162 | case <-time.After(timeout): 163 | if c.Bool("non-blocking") { 164 | forever <- false 165 | return 166 | } 167 | } 168 | } 169 | } 170 | 171 | if c.Bool("autoack") { 172 | go consumeMessages() 173 | } else { 174 | go ackMessages() 175 | go consumeMessages() 176 | } 177 | <-forever 178 | } 179 | 180 | func main() { 181 | app := cli.NewApp() 182 | app.Name = "pipecat" 183 | app.Usage = "Connect unix pipes and message queues" 184 | app.Version = "0.3.1" 185 | 186 | globalFlags := []cli.Flag{ 187 | cli.StringFlag{ 188 | Name: "amqpuri", 189 | Value: "amqp://guest:guest@localhost:5672/", 190 | Usage: "AMQP URI", 191 | EnvVar: "AMQP_URI", 192 | }, 193 | cli.StringFlag{ 194 | Name: "exchange", 195 | Value: "", 196 | Usage: "AMQP Exchange to publish to (default: \"\")", 197 | EnvVar: "AMQP_EXCHANGE", 198 | }, 199 | cli.BoolFlag{ 200 | Name: "no-create-queue", 201 | Usage: "Don't create queue", 202 | }, 203 | cli.BoolFlag{ 204 | Name: "autoack", 205 | Usage: "Ack all received messages directly", 206 | }, 207 | cli.BoolFlag{ 208 | Name: "non-blocking", 209 | Usage: "Stop consumer after timeout", 210 | }, 211 | cli.BoolFlag{ 212 | Name: "transient", 213 | Usage: "Publish messages with transient delivery mode", 214 | }, 215 | cli.IntFlag{ 216 | Name: "timeout", 217 | Value: 1, 218 | Usage: "Timeout to wait for messages", 219 | }, 220 | cli.BoolFlag{ 221 | Name: "base64", 222 | Usage: "Encode to Base64 string in consumer mode. Decode from Base64 string in publish mode", 223 | }, 224 | } 225 | 226 | app.Commands = []cli.Command{ 227 | { 228 | Name: "publish", 229 | Aliases: []string{"p"}, 230 | Usage: "Publish messages to queue", 231 | Flags: globalFlags, 232 | Action: publish, 233 | }, 234 | { 235 | Name: "consume", 236 | Flags: globalFlags, 237 | Aliases: []string{"c"}, 238 | Usage: "Consume messages from queue", 239 | Action: consume, 240 | }, 241 | } 242 | 243 | app.Run(os.Args) 244 | } 245 | --------------------------------------------------------------------------------