├── .gitignore ├── CHANGELOG.md ├── project.clj ├── .circleci └── config.yml ├── README.md ├── test └── bencode │ └── core_test.clj ├── LICENSE └── src └── bencode └── core.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## 1.2.0 (2024-06-12) 4 | 5 | ### Changes 6 | 7 | * [#16](https://github.com/nrepl/bencode/pull/16): Rewrite bencode implementation to be more performant and use Clojure 1.7 features. This basically matches the changes that were made in nREPL 1.2. 8 | 9 | ## 1.1.0 (2020-04-13) 10 | 11 | ### Changes 12 | 13 | * [#3](https://github.com/nrepl/bencode/issues/3): Allow maps with keyword or symbol keys. 14 | 15 | ## 1.0.1 (2020-04-01) 16 | 17 | ### Bugs fixed 18 | 19 | * [#4](https://github.com/nrepl/bencode/issues/4): Make compatible with GraalVM. 20 | 21 | ## 1.0.0 (2018-06-17) 22 | 23 | Initial version. Direct extraction of the `nrepl.bencode` namespace into a separate library. 24 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject nrepl/bencode "1.2.0" 2 | :description "A netstring and bencode implementation for Clojure." 3 | :url "https://github.com/nrepl/bencode" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.8.0"]] 7 | 8 | :aliases {"bump-version" ["change" "version" "leiningen.release/bump-version"]} 9 | 10 | :release-tasks [["vcs" "assert-committed"] 11 | ["bump-version" "release"] 12 | ["vcs" "commit" "Release %s"] 13 | ["vcs" "tag" "v" "--no-sign"] 14 | ["bump-version"] 15 | ["vcs" "commit" "Begin %s"]] 16 | 17 | :deploy-repositories [["clojars" {:url "https://clojars.org/repo" 18 | :username :env/clojars_username 19 | :password :env/clojars_password 20 | :sign-releases false}]] 21 | 22 | :profiles {:test {:dependencies [[org.clojure/test.check "1.1.1"]]}}) 23 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | defaults: &defaults 4 | working_directory: ~/bencode 5 | 6 | executors: 7 | jdk8: 8 | docker: 9 | - image: clojure:temurin-8-lein-2.11.2-jammy 10 | <<: *defaults 11 | jdk11: 12 | docker: 13 | - image: clojure:temurin-11-lein-2.11.2-jammy 14 | <<: *defaults 15 | jdk17: 16 | docker: 17 | - image: clojure:temurin-17-lein-2.11.2-jammy 18 | <<: *defaults 19 | jdk21: 20 | docker: 21 | - image: clojure:temurin-21-lein-2.11.2-jammy 22 | <<: *defaults 23 | 24 | jobs: 25 | test: 26 | parameters: 27 | jdk_version: 28 | description: "JDK version" 29 | type: string 30 | executor: << parameters.jdk_version >> 31 | steps: 32 | - checkout 33 | - restore_cache: 34 | key: bencode-{{ checksum "project.clj" }} 35 | - run: 36 | name: Run Tests 37 | command: lein test 38 | - save_cache: 39 | paths: 40 | - $HOME/.m2 41 | - $HOME/.lein 42 | key: bencode-{{ checksum "project.clj" }} 43 | 44 | workflows: 45 | test-with-matrix: 46 | jobs: 47 | - test: 48 | matrix: 49 | parameters: 50 | jdk_version: ["jdk8", "jdk11", "jdk17", "jdk21"] 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CircleCI](https://circleci.com/gh/nrepl/bencode/tree/master.svg?style=svg)](https://circleci.com/gh/nrepl/bencode/tree/master) 2 | [![Clojars Project](https://img.shields.io/clojars/v/nrepl/bencode.svg)](https://clojars.org/nrepl/bencode) 3 | [![cljdoc badge](https://cljdoc.org/badge/nrepl/bencode)](https://cljdoc.org/d/nrepl/bencode/CURRENT) 4 | [![downloads badge](https://versions.deps.co/nrepl/bencode/downloads.svg)](https://clojars.org/nrepl/bencode) 5 | 6 | # bencode 7 | 8 | A netstring and bencode implementation for Clojure. 9 | 10 | This particular implementation was extracted from [nREPL][], so it 11 | could be reused by other applications. 12 | 13 | [nREPL]: https://nrepl.org 14 | 15 | There are other netstring/bencode libraries for Clojure out there, but this one has 16 | the distinct advantage that it's certainly going to work well with nREPL. ;-) 17 | 18 | **P.S.** It's also very fast and very memory efficient, but who's counting! :D 19 | 20 | ## Motivation 21 | 22 | In each and every application, which contacts peer processes via some 23 | communication channel, the handling of the communication channel is 24 | obviously a central part of the application. Unfortunately introduces 25 | handling of buffers of varying sizes often bugs in form of buffer 26 | overflows and similar. 27 | 28 | A strong factor in this situation is of course the protocol which goes 29 | over the wire. Depending on its design it might be difficult to estimate 30 | the size of the input up front. This introduces more handling of message 31 | buffers to accomodate for inputs of varying sizes. This is particularly 32 | difficult in languages like C, where there is no bounds checking of array 33 | accesses and where errors might go unnoticed for considerable amount of 34 | time. 35 | 36 | To address these issues D. Bernstein developed the so called 37 | [netstrings][net]. They are especially designed to allow easy construction 38 | of the message buffers, easy and robust parsing. 39 | 40 | BitTorrent extended this to the [bencode][bc] protocol which also 41 | includes ways to encode numbers and collections like lists or maps. 42 | 43 | *wire* is based on these ideas. 44 | 45 | [net]: http://cr.yp.to/proto/netstrings.txt 46 | [bc]: http://wiki.theory.org/BitTorrentSpecification#Bencoding 47 | 48 | ## Usage 49 | 50 | Just add `bencode` as a dependency to your project and start hacking. 51 | 52 | ```clojure 53 | [nrepl/bencode "1.2.0"] 54 | ``` 55 | 56 | The API is documented in great detail [here](https://github.com/nrepl/bencode/blob/master/src/bencode/core.clj). 57 | 58 | The main functions in the API are: 59 | 60 | * `read-netstring` 61 | * `write-nestring` 62 | * `read-bencode` 63 | * `write-bencode` 64 | 65 | Here are some usage examples for each of the functions available: 66 | 67 | - `read-netstring` 68 | 69 | ```clojure 70 | (-> (.getBytes "13:Hello, World!," "UTF-8") 71 | ByteArrayInputStream. 72 | read-netstring 73 | (String. "UTF-8")) 74 | 75 | => Hello, World! 76 | ``` 77 | 78 | - `write-netstring` 79 | 80 | ```clojure 81 | (-> (doto (ByteArrayOutputStream.) 82 | (write-netstring (.getBytes "Hello, World!" "UTF-8"))) 83 | .toString) 84 | 85 | => "13:Hello, World!," 86 | ``` 87 | 88 | - `read-bencode` 89 | 90 | ```clojure 91 | (vec 92 | (map 93 | #(String. % "UTF-8") 94 | (-> (.getBytes "5:nrepl2:is7:awesomee" "UTF-8") 95 | ByteArrayInputStream. 96 | PushbackInputStream. 97 | read-bencode))) 98 | 99 | => ["nrepl" "is" "awesome"] 100 | ``` 101 | 102 | - `write-bencode` 103 | 104 | ```clojure 105 | (-> (doto (ByteArrayOutputStream.) 106 | (write-bencode {:foo "bar"})) 107 | .toString) 108 | 109 | => "d3:foo3:bare" 110 | ``` 111 | 112 | Additionally, you can check this [document](https://github.com/nrepl/nrepl/blob/master/test/clojure/nrepl/bencode_test.clj) to learn more about its usage. 113 | 114 | ## Use Cases 115 | 116 | Obviously you can use this library whenever you need to deal with netstrings or 117 | bencode, but I assume that in practice most people will end up using it for 118 | building alternative Clojure nREPL clients or 119 | servers. [babashka.nrepl](https://github.com/babashka/babashka.nrepl) is one 120 | notable user of the library. 121 | 122 | There's also the potential to have the library support ClojureScript and 123 | ClojureCLR down the road, so it could be leveraged in even more 124 | contexts. Sky is the limit! 125 | 126 | ## License 127 | 128 | Copyright © 2018-2024 Meikel Brandmeyer, Oleksandr Yakushev, Bozhidar Batsov and nREPL contributors 129 | 130 | Distributed under the Eclipse Public License either version 1.0 or (at 131 | your option) any later version. 132 | -------------------------------------------------------------------------------- /test/bencode/core_test.clj: -------------------------------------------------------------------------------- 1 | ;; Copyright (c) Meikel Brandmeyer. All rights reserved. 2 | ;; The use and distribution terms for this software are covered by the 3 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | ;; which can be found in the file epl-v10.html at the root of this distribution. 5 | ;; By using this software in any fashion, you are agreeing to be bound by 6 | ;; the terms of this license. 7 | ;; You must not remove this notice, or any other, from this software. 8 | 9 | 10 | (ns bencode.core-test 11 | (:require [clojure.test :refer [are deftest is testing]] 12 | [clojure.test.check.generators :as gen] 13 | [clojure.test.check.properties :as tc.prop] 14 | [clojure.test.check.clojure-test :refer [defspec]] 15 | [nrepl.bencode :as bencode :refer [read-bencode 16 | read-netstring 17 | write-bencode 18 | write-netstring]]) 19 | (:import clojure.lang.RT 20 | [java.io ByteArrayInputStream ByteArrayOutputStream PushbackInputStream])) 21 | 22 | (defn #^{:private true} >bytes 23 | [#^String input] 24 | (.getBytes input "UTF-8")) 25 | 26 | (defmulti #^{:private true} > input 43 | (map (fn [[k v]] [k ( bytes 49 | ByteArrayInputStream. 50 | PushbackInputStream. 51 | reader)) 52 | 53 | (defn- >input 54 | [^String input & args] 55 | (-> input 56 | (.getBytes "UTF-8") 57 | (#(apply decode % args)) 58 | input x :reader read-netstring) y) 62 | "0:," "" 63 | "13:Hello, World!," "Hello, World!" 64 | "16:Hällö, Würld!," "Hällö, Würld!" 65 | "25:Здравей, Свят!," "Здравей, Свят!")) 66 | 67 | (deftest test-string-reading 68 | (are [x y] (= (>input x :reader read-bencode) y) 69 | "0:" "" 70 | "13:Hello, World!" "Hello, World!" 71 | "16:Hällö, Würld!" "Hällö, Würld!" 72 | "25:Здравей, Свят!" "Здравей, Свят!")) 73 | 74 | (deftest test-integer-reading 75 | (are [x y] (= (>input x :reader read-bencode) y) 76 | "i0e" 0 77 | "i42e" 42 78 | "i-42e" -42)) 79 | 80 | (deftest test-list-reading 81 | (are [x y] (= (>input x :reader read-bencode) y) 82 | "le" [] 83 | "l6:cheesee" ["cheese"] 84 | "l6:cheese3:ham4:eggse" ["cheese" "ham" "eggs"])) 85 | 86 | (deftest test-map-reading 87 | (are [x y] (= (>input x :reader read-bencode) y) 88 | "de" {} 89 | "d3:ham4:eggse" {"ham" "eggs"})) 90 | 91 | (deftest test-nested-reading 92 | (are [x y] (= (>input x :reader read-bencode) y) 93 | "l6:cheesei42ed3:ham4:eggsee" ["cheese" 42 {"ham" "eggs"}] 94 | "d6:cheesei42e3:haml4:eggsee" {"cheese" 42 "ham" ["eggs"]})) 95 | 96 | (defn- >stream 97 | ^ByteArrayOutputStream 98 | [thing & {:keys [writer]}] 99 | (doto (ByteArrayOutputStream.) 100 | (writer thing))) 101 | 102 | (defn- >output 103 | [& args] 104 | (-> >stream 105 | ^ByteArrayOutputStream (apply args) 106 | (.toString "UTF-8"))) 107 | 108 | (deftest test-netstring-writing 109 | (are [x y] (= (>output (>bytes x) :writer write-netstring) y) 110 | "" "0:," 111 | "Hello, World!" "13:Hello, World!," 112 | "Hällö, Würld!" "16:Hällö, Würld!," 113 | "Здравей, Свят!" "25:Здравей, Свят!,")) 114 | 115 | (deftest test-byte-array-writing 116 | (are [x y] (= (>output (>bytes x) :writer write-bencode) y) 117 | "" "0:" 118 | "Hello, World!" "13:Hello, World!" 119 | "Hällö, Würld!" "16:Hällö, Würld!" 120 | "Здравей, Свят!" "25:Здравей, Свят!")) 121 | 122 | (deftest test-string-writing 123 | (are [x y] (= (>output x :writer write-bencode) y) 124 | "" "0:" 125 | "Hello, World!" "13:Hello, World!" 126 | "Hällö, Würld!" "16:Hällö, Würld!" 127 | "Здравей, Свят!" "25:Здравей, Свят!")) 128 | 129 | (deftest test-input-stream-writing 130 | (are [x y] (= (>output (ByteArrayInputStream. (>bytes x)) 131 | :writer write-bencode) y) 132 | "" "0:" 133 | "Hello, World!" "13:Hello, World!" 134 | "Hällö, Würld!" "16:Hällö, Würld!" 135 | "Здравей, Свят!" "25:Здравей, Свят!")) 136 | 137 | (deftest test-integer-writing 138 | (are [x y] (= (>output x :writer write-bencode) y) 139 | 0 "i0e" 140 | 42 "i42e" 141 | -42 "i-42e" 142 | 143 | ;; Works for all integral types. 144 | ;; Note: BigInts (42N) not tested, since they are not 145 | ;; supported in 1.2. 146 | (Byte/parseByte "42" 10) "i42e" 147 | (Short/parseShort "42" 10) "i42e" 148 | (Integer/parseInt "42" 10) "i42e" 149 | (Long/parseLong "42" 10) "i42e")) 150 | 151 | (deftest test-named-writing 152 | (are [x y] (= (>output x :writer write-bencode) y) 153 | :foo "3:foo" 154 | :foo/bar "7:foo/bar" 155 | 'foo "3:foo" 156 | 'foo/bar "7:foo/bar")) 157 | 158 | (deftest test-list-writing 159 | (are [x y] (= (>output x :writer write-bencode) y) 160 | nil "le" 161 | [] "le" 162 | ["cheese"] "l6:cheesee" 163 | ["cheese" "ham" "eggs"] "l6:cheese3:ham4:eggse")) 164 | 165 | (deftest test-map-writing 166 | (are [x y] (= (>output x :writer write-bencode) y) 167 | {} "de" 168 | {"ham" "eggs"} "d3:ham4:eggse" 169 | {:ham "eggs"} "d3:ham4:eggse" 170 | {'ham "eggs"} "d3:ham4:eggse" 171 | {:h/am "eggs"} "d4:h/am4:eggse")) 172 | 173 | (deftest test-nested-writing 174 | (are [x y] (= (>output x :writer write-bencode) y) 175 | ["cheese" 42 {"ham" "eggs"}] "l6:cheesei42ed3:ham4:eggsee" 176 | {"cheese" 42 "ham" ["eggs"]} "d6:cheesei42e3:haml4:eggsee")) 177 | 178 | (deftest test-lexicographic-sorting 179 | (let [source (zipmap ["ham" "eggs" "hamburger" "hamburg" "cheese"] (range))] 180 | (is (= "d6:cheesei4e4:eggsi1e3:hami0e7:hamburgi3e9:hamburgeri2ee" 181 | (>output source :writer write-bencode))))) 182 | 183 | (deftest unencoded-values 184 | ;; just some PNG data that won't round-trip cleanly through UTF-8 encoding, so 185 | ;; any default encoding in the bencode implementation will be caught immediately 186 | (let [binary-data (->> [-119 80 78 71 13 10 26 10 0 0 0 13 73 72 68 82 0 0 0 187 | 100 0 0 0 100 8 6 0 0 0 112 -30 -107 84 0 0 3 -16 105 188 | 67 67 80 73 67 67 32 80 114 111 102 105 108 101 0 0 40 189 | -111 -115 85 -35 111 -37 84 20 63 -119 111 92 -92 22 63 190 | -96 -79 -114 14 21 -117 -81 85 83 91 -71 27 26 -83 -58 6 191 | 73 -109 -91 -23 66 26 -71 -51 -40 42 -92 -55 117 110] 192 | (map byte) 193 | (into-array Byte/TYPE))] 194 | (is (= (seq binary-data) 195 | (-> {"data" binary-data} 196 | (>stream :writer write-bencode) 197 | .toByteArray 198 | (decode :reader read-bencode) 199 | (get "data") 200 | seq))))) 201 | 202 | (deftest unwritable-values 203 | (testing "write-bencode writes eagerly" 204 | (let [out (ByteArrayOutputStream.)] 205 | (is (thrown? IllegalArgumentException 206 | (write-bencode out {"obj" (Object.)}))) 207 | (is (= "d3:obj" (String. (.toByteArray out))))))) 208 | 209 | ;; ## Generative testing. 210 | ;; 211 | ;; Verify that any valid value remains the same after encode-decode roundtrip. 212 | 213 | (def valid-bencode-input-generator 214 | "This is a recursive test.check generator that can generate a string, a number, 215 | or a list or a map that contains any other valid value." 216 | (gen/recursive-gen #(gen/one-of [% 217 | (gen/vector % 0 20) 218 | (gen/map gen/string % {:min-elements 0 219 | :max-elements 20})]) 220 | (gen/one-of [gen/string gen/large-integer]))) 221 | 222 | (defspec generative-roundtrip-test {:num-tests 100} 223 | (tc.prop/for-all 224 | [value valid-bencode-input-generator] 225 | (= value (>input (>output value :writer write-bencode) :reader read-bencode)))) 226 | 227 | ;; ## Performance testing 228 | ;; 229 | ;; This code is to be run manually, so it is in a comment block. Evaluate the 230 | ;; performance of bencode encoding and decoding when making changes to bencode 231 | ;; implementation, and compare it to the previous version. 232 | 233 | (comment 234 | (def data (gen/generate valid-bencode-input-generator 5000 4)) 235 | (count (str data)) ;; Sanity-check data size. 236 | 237 | ;; Writing benchmark 238 | (time+ (write-bencode (ByteArrayOutputStream.) data)) 239 | 240 | ;; Reading benchmark 241 | (let [baos (doto (ByteArrayOutputStream.) 242 | (write-bencode data)) 243 | arr (.toByteArray baos)] 244 | (time+ 245 | (nrepl.bencode/read-bencode (PushbackInputStream. (java.io.ByteArrayInputStream. arr))) 246 | nil))) 247 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor to control, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /src/bencode/core.clj: -------------------------------------------------------------------------------- 1 | ;; Copyright (c) Meikel Brandmeyer, Oleksandr Yakushev. All rights reserved. 2 | ;; The use and distribution terms for this software are covered by the 3 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | ;; which can be found in the file epl-v10.html at the root of this distribution. 5 | ;; By using this software in any fashion, you are agreeing to be bound by 6 | ;; the terms of this license. 7 | ;; You must not remove this notice, or any other, from this software. 8 | 9 | (ns bencode.core 10 | "A netstring and bencode implementation for Clojure." 11 | {:author "Meikel Brandmeyer"} 12 | (:require 13 | [clojure.java.io :as io]) 14 | (:import 15 | (clojure.lang IPersistentCollection IPersistentMap Named PersistentVector) 16 | (java.io ByteArrayOutputStream EOFException InputStream IOException 17 | OutputStream PushbackInputStream) 18 | (java.nio.charset StandardCharsets) 19 | (java.util Arrays))) 20 | 21 | ;; # Motivation 22 | ;; 23 | ;; In each and every application, which contacts peer processes via some 24 | ;; communication channel, the handling of the communication channel is 25 | ;; obviously a central part of the application. Unfortunately introduces 26 | ;; handling of buffers of varying sizes often bugs in form of buffer 27 | ;; overflows and similar. 28 | ;; 29 | ;; A strong factor in this situation is of course the protocol which goes 30 | ;; over the wire. Depending on its design it might be difficult to estimate 31 | ;; the size of the input up front. This introduces more handling of message 32 | ;; buffers to accommodate for inputs of varying sizes. This is particularly 33 | ;; difficult in languages like C, where there is no bounds checking of array 34 | ;; accesses and where errors might go unnoticed for considerable amount of 35 | ;; time. 36 | ;; 37 | ;; To address these issues D. Bernstein developed the so called 38 | ;; [netstrings][net]. They are especially designed to allow easy construction 39 | ;; of the message buffers, easy and robust parsing. 40 | ;; 41 | ;; BitTorrent extended this to the [bencode][bc] protocol which also 42 | ;; includes ways to encode numbers and collections like lists or maps. 43 | ;; 44 | ;; *wire* is based on these ideas. 45 | ;; 46 | ;; [net]: http://cr.yp.to/proto/netstrings.txt 47 | ;; [bc]: http://wiki.theory.org/BitTorrentSpecification#Bencoding 48 | ;; 49 | ;; # Netstrings 50 | ;; 51 | ;; Now let's start with the basic netstrings. They consist of a byte count, 52 | ;; followed a colon and the binary data and a trailing comma. Examples: 53 | ;; 54 | ;; 13:Hello, World!, 55 | ;; 10:Guten Tag!, 56 | ;; 0:, 57 | ;; 58 | ;; The initial byte count allows to efficiently allocate a sufficiently 59 | ;; sized message buffer. The trailing comma serves as a hint to detect 60 | ;; incorrect netstrings. 61 | ;; 62 | ;; ## Low-level reading 63 | ;; 64 | ;; We will need some low-level reading helpers to read the bytes from 65 | ;; the input stream. These are `read-byte` as well as `read-bytes`. They 66 | ;; are split out, because doing such a simple task as reading a byte is 67 | ;; mild catastrophe in Java. So it would add some clutter to the algorithm 68 | ;; `read-netstring`. 69 | ;; 70 | ;; On the other hand they might be also useful elsewhere. 71 | ;; 72 | ;; To remove some magic numbers from the code below. 73 | 74 | (def ^:const i 105) 75 | (def ^:const l 108) 76 | (def ^:const d 100) 77 | (def ^:const e 101) 78 | (def ^:const comma 44) 79 | (def ^:const minus 45) 80 | (def ^:const colon 58) 81 | 82 | (defn- throw-eof [] 83 | (throw (EOFException. "Invalid netstring. Unexpected end of input."))) 84 | 85 | ;; This function is inline because it is used in a hot loop inside `read-long`. 86 | 87 | (definline ^:private read-byte [input] 88 | ;; There is a quirk here. `.read` returns -1 on end of input. However, the 89 | ;; Java `byte` has a range from -128 to 127. To accommodate for that, Java 90 | ;; uses `Byte/toUnsignedInt` which offsets the byte value by 256. The result 91 | ;; is an `int` that has a range 0-255. Everything below the value 128 stands 92 | ;; for itself. But larger values are actually negative byte values. We have to 93 | ;; translate it back here. Narrowing downcast to byte does precisely that. 94 | `(let [c# (.read ~(with-meta input {:tag 'InputStream}))] 95 | (when (neg? c#) (throw-eof)) 96 | ;; Cast back to int to avoid boxing and/or redundant casts for consumers. 97 | (unchecked-int (unchecked-byte c#)))) 98 | 99 | (defn- read-bytes ^bytes [^InputStream input, ^long n] 100 | (let [content (byte-array n)] 101 | (loop [offset 0, len n] 102 | (let [result (.read input content (unchecked-int offset) 103 | (unchecked-int len))] 104 | (when (neg? result) 105 | (throw 106 | (EOFException. 107 | "Invalid netstring. Less data available than expected."))) 108 | (when-not (= result len) 109 | (recur (unchecked-add offset result) (unchecked-subtract len result))))) 110 | content)) 111 | 112 | ;; `read-long` is used for reading integers from the stream as well 113 | ;; as the byte count prefixes of byte strings. The delimiter is \: 114 | ;; for byte count prefixes and \e for integers. 115 | 116 | (defn- read-long ^long [^PushbackInputStream input, ^long delim] 117 | (let [first-byte (read-byte input) 118 | ;; Only the first byte can be a minus. 119 | negate? (if (= first-byte minus) 120 | true 121 | (do (.unread input first-byte) 122 | false))] 123 | (loop [n 0] 124 | ;; We read repeatedly a byte from the input... 125 | (let [b (read-byte input)] 126 | ;; ...and stop at the delimiter. 127 | (if (= b delim) 128 | (if negate? (- n) n) 129 | (recur (+ (* n 10) (- b 48)))))))) 130 | 131 | ;; ## Reading a netstring 132 | ;; 133 | ;; Let's dive straight into reading a netstring from an `InputStream`. 134 | ;; 135 | ;; For convenience we split the function into two subfunctions. The 136 | ;; public `read-netstring` is the normal entry point, which also checks 137 | ;; for the trailing comma after reading the payload data with the 138 | ;; private `read-netstring*`. 139 | ;; 140 | ;; The reason we need the less strict `read-netstring*` is that with 141 | ;; bencode we don't have a trailing comma. So a check would not be 142 | ;; beneficial here. 143 | ;; 144 | ;; However the consumer doesn't have to care. `read-netstring` as 145 | ;; well as `read-bencode` provide the public entry points, which do 146 | ;; the right thing. Although they both may reference the `read-netstring*` 147 | ;; underneath. 148 | ;; 149 | ;; With this in mind we define the inner helper function first. 150 | 151 | (defn- read-netstring* [input] 152 | (read-bytes input (read-long input colon))) 153 | 154 | ;; And the public facing API: `read-netstring`. 155 | 156 | (defn read-netstring 157 | "Reads a classic netstring from input—an InputStream. Returns the 158 | contained binary data as byte array." 159 | ^bytes [input] 160 | (let [content (read-netstring* input)] 161 | (when-not (= (read-byte input) comma) 162 | (throw (IOException. "Invalid netstring. ',' expected."))) 163 | content)) 164 | 165 | ;; Similarly the `string>payload` and `stringpayload ^bytes [^String s] 170 | (.getBytes s StandardCharsets/UTF_8)) 171 | 172 | (defn- stringpayload (str (alength content)))) 189 | (.write (unchecked-int colon)) 190 | (.write content))) 191 | 192 | (defn write-netstring 193 | "Write the given binary data to the output stream in form of a classic 194 | netstring." 195 | [^OutputStream output, content] 196 | (doto output 197 | (write-netstring* content) 198 | (.write (unchecked-int comma)))) 199 | 200 | ;; # Bencode 201 | ;; 202 | ;; However most of the time we don't want to send simple blobs of data 203 | ;; back and forth. The data sent between the communication peers usually 204 | ;; have some structure, which has to be carried along the way to the 205 | ;; other side. Here [bencode][bc] come into play. 206 | ;; 207 | ;; Bencode defines additionally to netstrings easily parseable structures 208 | ;; for lists, maps and numbers. It allows to communicate information 209 | ;; about the data structure to the peer on the other side. 210 | ;; 211 | ;; ## Tokens 212 | ;; 213 | ;; The data is encoded in tokens in bencode. There are several types of 214 | ;; tokens: 215 | ;; 216 | ;; * A netstring without trailing comma for string data. 217 | ;; * A tag specifying the type of the following tokens. 218 | ;; The tag may be one of these: 219 | ;; * `\i` to encode integers. 220 | ;; * `\l` to encode lists of items. 221 | ;; * `\d` to encode maps of item pairs. 222 | ;; * `\e` to end the a previously started tag. 223 | ;; 224 | ;; ## Reading bencode 225 | ;; 226 | ;; Reading bencode encoded data is basically parsing a stream of values from the 227 | ;; input. To read the bencode encoded data we walk along the sequence of values 228 | ;; and act according to the found tags. 229 | 230 | (declare read-bencode) 231 | 232 | ;; Integers consist of a sequence of decimal digits. 233 | 234 | (defn- read-integer [input] 235 | (read-long input e)) 236 | 237 | ;; Lists are just a sequence of other tokens. 238 | 239 | (defn- read-list [input] 240 | (loop [res (transient [])] 241 | (if-some [val (read-bencode input)] 242 | (recur (conj! res val)) 243 | (persistent! res)))) 244 | 245 | ;; Maps are sequences of key/value pairs. The keys are always 246 | ;; decoded into strings. The values are kept as is. 247 | 248 | (defn- read-map [input] 249 | (loop [m (transient {})] 250 | (if-some [key (read-bencode input)] 251 | (if-some [val (read-bencode input)] 252 | (recur (assoc! m (stringstring [named] 305 | (cond (string? named) named 306 | (symbol? named) (str named) 307 | (keyword? named) (str (.sym ^clojure.lang.Keyword named)) 308 | :else (throw-illegal-value named "map key"))) 309 | 310 | (defn- write-bencode-map [^IPersistentMap m, ^OutputStream output] 311 | ;; The implementation here is quite unidiomatic for performance reasons. We 312 | ;; need to transform the keys of a map to strings, then sort the kvs 313 | ;; lexicographically by key, then write to output. To avoid creating redundant 314 | ;; data structures, we use array as the transitional structure that keeps 315 | ;; stringified kvs that we sort and in the end efficiently foreach. 316 | (let [n (.count m) ;; Because `clojure.core/count` is quite slow. 317 | arr (object-array n)] 318 | ;; Using reduce-kv as an efficient map iterator for side effects. 319 | (reduce-kv (fn [i k v] 320 | (aset arr i [(named>string k) v]) 321 | (inc i)) 322 | 0 m) 323 | 324 | (Arrays/sort arr (fn [^PersistentVector x, ^PersistentVector y] 325 | (compare (.nth x 0) (.nth y 0)))) 326 | 327 | (.write output (unchecked-int d)) 328 | (dotimes [i n] 329 | (let [^PersistentVector kv (aget arr i)] 330 | (write-netstring* output (string>payload (.nth kv 0))) 331 | (write-bencode output (.nth kv 1)))) 332 | (.write output (unchecked-int e)))) 333 | 334 | (extend-protocol BencodeSerializable 335 | nil 336 | (write-bencode* [_ output] 337 | ;; Treat nil as an empty list. 338 | (write-bencode* [] output)) 339 | 340 | InputStream 341 | (write-bencode* [stream output] 342 | ;; Streaming does not really work, since we need to know the number of bytes 343 | ;; to write upfront. So we read in everything for InputStreams and pass on 344 | ;; the byte array. 345 | (let [bytes (ByteArrayOutputStream.)] 346 | (io/copy stream bytes) 347 | (write-netstring* output (.toByteArray bytes)))) 348 | 349 | IPersistentMap 350 | (write-bencode* [m output] (write-bencode-map m output)) 351 | 352 | IPersistentCollection 353 | (write-bencode* [coll output] (write-bencode-list coll output)) 354 | 355 | Number 356 | (write-bencode* [n, ^OutputStream output] 357 | (if (integer? n) 358 | (doto output 359 | (.write (unchecked-int i)) 360 | (.write (string>payload (str n))) 361 | (.write (unchecked-int e))) 362 | (throw-illegal-value n "value"))) 363 | 364 | String 365 | (write-bencode* [s output] 366 | ;; For strings we simply write the string as a netstring without trailing 367 | ;; comma after encoding the string as UTF-8 bytes. 368 | (write-netstring* output (string>payload s))) 369 | 370 | ;; Symbols and keywords are converted to a string of the form 'namespace/name' 371 | ;; or just 'name' in case its not qualified. We do not add colons for keywords 372 | ;; since the other side might not have the notion of keywords. 373 | 374 | Named 375 | (write-bencode* [named output] (write-bencode* (named>string named) output)) 376 | 377 | Object 378 | (write-bencode* [o output] 379 | ;; In the catch-all, check a few more conditions that are not easy to 380 | ;; declare as separate types. 381 | (cond 382 | ;; The easiest case is of course when we already have a byte array. 383 | ;; We can simply pass it on to the underlying machinery. 384 | (-> o class .getComponentType (= Byte/TYPE)) 385 | (write-netstring* output o) 386 | 387 | ;; Treat other arrays as lists. 388 | (.isArray (class o)) 389 | (write-bencode-list o output) 390 | 391 | :else (throw-illegal-value o "value")))) 392 | --------------------------------------------------------------------------------