├── LICENSE ├── README.md ├── cmd └── dataurl │ └── main.go ├── dataurl.go ├── dataurl_test.go ├── doc.go ├── lex.go ├── rfc2396.go ├── rfc2396_test.go └── wercker.yml /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Vincent Petithory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data URL Schemes for Go [![wercker status](https://app.wercker.com/status/6f9a2e144dfcc59e862c52459b452928/s "wercker status")](https://app.wercker.com/project/bykey/6f9a2e144dfcc59e862c52459b452928) [![GoDoc](https://godoc.org/github.com/vincent-petithory/dataurl?status.png)](https://godoc.org/github.com/vincent-petithory/dataurl) 2 | 3 | This package parses and generates Data URL Schemes for the Go language, according to [RFC 2397](http://tools.ietf.org/html/rfc2397). 4 | 5 | Data URLs are small chunks of data commonly used in browsers to display inline data, 6 | typically like small images, or when you use the FileReader API of the browser. 7 | 8 | Common use-cases: 9 | 10 | * generate a data URL out of a `string`, `[]byte`, `io.Reader` for inclusion in HTML templates, 11 | * parse a data URL sent by a browser in a http.Handler, and do something with the data (save to disk, etc.) 12 | * ... 13 | 14 | Install the package with: 15 | ~~~ 16 | go get github.com/vincent-petithory/dataurl 17 | ~~~ 18 | 19 | ## Usage 20 | 21 | ~~~ go 22 | package main 23 | 24 | import ( 25 | "github.com/vincent-petithory/dataurl" 26 | "fmt" 27 | ) 28 | 29 | func main() { 30 | dataURL, err := dataurl.DecodeString(`data:text/plain;charset=utf-8;base64,aGV5YQ==`) 31 | if err != nil { 32 | fmt.Println(err) 33 | return 34 | } 35 | fmt.Printf("content type: %s, data: %s\n", dataURL.MediaType.ContentType(), string(dataURL.Data)) 36 | // Output: content type: text/plain, data: heya 37 | } 38 | ~~~ 39 | 40 | From a `http.Handler`: 41 | 42 | ~~~ go 43 | func handleDataURLUpload(w http.ResponseWriter, r *http.Request) { 44 | dataURL, err := dataurl.Decode(r.Body) 45 | defer r.Body.Close() 46 | if err != nil { 47 | http.Error(w, err.Error(), http.StatusBadRequest) 48 | return 49 | } 50 | if dataURL.ContentType() == "image/png" { 51 | ioutil.WriteFile("image.png", dataURL.Data, 0644) 52 | } else { 53 | http.Error(w, "not a png", http.StatusBadRequest) 54 | } 55 | } 56 | ~~~ 57 | 58 | ## Command 59 | 60 | For convenience, a `dataurl` command is provided to encode/decode dataurl streams. 61 | 62 | ~~~ 63 | dataurl - Encode or decode dataurl data and print to standard output 64 | 65 | Usage: dataurl [OPTION]... [FILE] 66 | 67 | dataurl encodes or decodes FILE or standard input if FILE is - or omitted, and prints to standard output. 68 | Unless -mimetype is used, when FILE is specified, dataurl will attempt to detect its mimetype using Go's mime.TypeByExtension (http://golang.org/pkg/mime/#TypeByExtension). If this fails or data is read from STDIN, the mimetype will default to application/octet-stream. 69 | 70 | Options: 71 | -a=false: encode data using ascii instead of base64 72 | -ascii=false: encode data using ascii instead of base64 73 | -d=false: decode data instead of encoding 74 | -decode=false: decode data instead of encoding 75 | -m="": force the mimetype of the data to encode to this value 76 | -mimetype="": force the mimetype of the data to encode to this value 77 | ~~~ 78 | 79 | ## Contributing 80 | 81 | Feel free to file an issue/make a pull request if you find any bug, or want to suggest enhancements. 82 | -------------------------------------------------------------------------------- /cmd/dataurl/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io" 7 | "io/ioutil" 8 | "log" 9 | "mime" 10 | "os" 11 | "path" 12 | 13 | "github.com/vincent-petithory/dataurl" 14 | ) 15 | 16 | var ( 17 | performDecode bool 18 | asciiEncoding bool 19 | mimetype string 20 | ) 21 | 22 | func init() { 23 | const decodeUsage = "decode data instead of encoding" 24 | flag.BoolVar(&performDecode, "decode", false, decodeUsage) 25 | flag.BoolVar(&performDecode, "d", false, decodeUsage) 26 | 27 | const mimetypeUsage = "force the mimetype of the data to encode to this value" 28 | flag.StringVar(&mimetype, "mimetype", "", mimetypeUsage) 29 | flag.StringVar(&mimetype, "m", "", mimetypeUsage) 30 | 31 | const asciiUsage = "encode data using ascii instead of base64" 32 | flag.BoolVar(&asciiEncoding, "ascii", false, asciiUsage) 33 | flag.BoolVar(&asciiEncoding, "a", false, asciiUsage) 34 | 35 | flag.Usage = func() { 36 | fmt.Fprint(os.Stderr, 37 | `dataurl - Encode or decode dataurl data and print to standard output 38 | 39 | Usage: dataurl [OPTION]... [FILE] 40 | 41 | dataurl encodes or decodes FILE or standard input if FILE is - or omitted, and prints to standard output. 42 | Unless -mimetype is used, when FILE is specified, dataurl will attempt to detect its mimetype using Go's mime.TypeByExtension (http://golang.org/pkg/mime/#TypeByExtension). If this fails or data is read from STDIN, the mimetype will default to application/octet-stream. 43 | 44 | Options: 45 | `) 46 | flag.PrintDefaults() 47 | } 48 | } 49 | 50 | func main() { 51 | log.SetFlags(0) 52 | flag.Parse() 53 | 54 | var ( 55 | in io.Reader 56 | out = os.Stdout 57 | encoding = dataurl.EncodingBase64 58 | detectedMimetype string 59 | ) 60 | switch n := flag.NArg(); n { 61 | case 0: 62 | in = os.Stdin 63 | case 1: 64 | if flag.Arg(0) == "-" { 65 | in = os.Stdin 66 | return 67 | } 68 | if f, err := os.Open(flag.Arg(0)); err != nil { 69 | log.Fatal(err) 70 | } else { 71 | in = f 72 | defer f.Close() 73 | } 74 | ext := path.Ext(flag.Arg(0)) 75 | detectedMimetype = mime.TypeByExtension(ext) 76 | } 77 | 78 | switch { 79 | case mimetype == "" && detectedMimetype == "": 80 | mimetype = "application/octet-stream" 81 | case mimetype == "" && detectedMimetype != "": 82 | mimetype = detectedMimetype 83 | } 84 | 85 | if performDecode { 86 | if err := decode(in, out); err != nil { 87 | log.Fatal(err) 88 | } 89 | } else { 90 | if asciiEncoding { 91 | encoding = dataurl.EncodingASCII 92 | } 93 | if err := encode(in, out, encoding, mimetype); err != nil { 94 | log.Fatal(err) 95 | } 96 | } 97 | } 98 | 99 | func decode(in io.Reader, out io.Writer) (err error) { 100 | defer func() { 101 | if e := recover(); e != nil { 102 | err = e.(error) 103 | } 104 | }() 105 | 106 | du, err := dataurl.Decode(in) 107 | if err != nil { 108 | return 109 | } 110 | 111 | _, err = out.Write(du.Data) 112 | if err != nil { 113 | return 114 | } 115 | return 116 | } 117 | 118 | func encode(in io.Reader, out io.Writer, encoding string, mediatype string) (err error) { 119 | defer func() { 120 | if e := recover(); e != nil { 121 | var ok bool 122 | err, ok = e.(error) 123 | if !ok { 124 | err = fmt.Errorf("%v", e) 125 | } 126 | return 127 | } 128 | }() 129 | b, err := ioutil.ReadAll(in) 130 | if err != nil { 131 | return 132 | } 133 | 134 | du := dataurl.New(b, mediatype) 135 | du.Encoding = encoding 136 | 137 | _, err = du.WriteTo(out) 138 | if err != nil { 139 | return 140 | } 141 | return 142 | } 143 | -------------------------------------------------------------------------------- /dataurl.go: -------------------------------------------------------------------------------- 1 | package dataurl 2 | 3 | import ( 4 | "bytes" 5 | "encoding/base64" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/ioutil" 10 | "net/http" 11 | "sort" 12 | "strconv" 13 | "strings" 14 | ) 15 | 16 | const ( 17 | // EncodingBase64 is base64 encoding for the data url 18 | EncodingBase64 = "base64" 19 | // EncodingASCII is ascii encoding for the data url 20 | EncodingASCII = "ascii" 21 | ) 22 | 23 | func defaultMediaType() MediaType { 24 | return MediaType{ 25 | "text", 26 | "plain", 27 | map[string]string{"charset": "US-ASCII"}, 28 | } 29 | } 30 | 31 | // MediaType is the combination of a media type, a media subtype 32 | // and optional parameters. 33 | type MediaType struct { 34 | Type string 35 | Subtype string 36 | Params map[string]string 37 | } 38 | 39 | // ContentType returns the content type of the dataurl's data, in the form type/subtype. 40 | func (mt *MediaType) ContentType() string { 41 | return fmt.Sprintf("%s/%s", mt.Type, mt.Subtype) 42 | } 43 | 44 | // String implements the Stringer interface. 45 | // 46 | // Params values are escaped with the Escape function, rather than in a quoted string. 47 | func (mt *MediaType) String() string { 48 | var ( 49 | buf bytes.Buffer 50 | keys = make([]string, len(mt.Params)) 51 | i int 52 | ) 53 | for k := range mt.Params { 54 | keys[i] = k 55 | i++ 56 | } 57 | sort.Strings(keys) 58 | for _, k := range keys { 59 | v := mt.Params[k] 60 | fmt.Fprintf(&buf, ";%s=%s", k, EscapeString(v)) 61 | } 62 | return mt.ContentType() + (&buf).String() 63 | } 64 | 65 | // DataURL is the combination of a MediaType describing the type of its Data. 66 | type DataURL struct { 67 | MediaType 68 | Encoding string 69 | Data []byte 70 | } 71 | 72 | // New returns a new DataURL initialized with data and 73 | // a MediaType parsed from mediatype and paramPairs. 74 | // mediatype must be of the form "type/subtype" or it will panic. 75 | // paramPairs must have an even number of elements or it will panic. 76 | // For more complex DataURL, initialize a DataURL struct. 77 | // The DataURL is initialized with base64 encoding. 78 | func New(data []byte, mediatype string, paramPairs ...string) *DataURL { 79 | parts := strings.Split(mediatype, "/") 80 | if len(parts) != 2 { 81 | panic("dataurl: invalid mediatype") 82 | } 83 | 84 | nParams := len(paramPairs) 85 | if nParams%2 != 0 { 86 | panic("dataurl: requires an even number of param pairs") 87 | } 88 | params := make(map[string]string) 89 | for i := 0; i < nParams; i += 2 { 90 | params[paramPairs[i]] = paramPairs[i+1] 91 | } 92 | 93 | mt := MediaType{ 94 | parts[0], 95 | parts[1], 96 | params, 97 | } 98 | return &DataURL{ 99 | MediaType: mt, 100 | Encoding: EncodingBase64, 101 | Data: data, 102 | } 103 | } 104 | 105 | // String implements the Stringer interface. 106 | // 107 | // Note: it doesn't guarantee the returned string is equal to 108 | // the initial source string that was used to create this DataURL. 109 | // The reasons for that are: 110 | // * Insertion of default values for MediaType that were maybe not in the initial string, 111 | // * Various ways to encode the MediaType parameters (quoted string or url encoded string, the latter is used), 112 | func (du *DataURL) String() string { 113 | var buf bytes.Buffer 114 | du.WriteTo(&buf) 115 | return (&buf).String() 116 | } 117 | 118 | // WriteTo implements the WriterTo interface. 119 | // See the note about String(). 120 | func (du *DataURL) WriteTo(w io.Writer) (n int64, err error) { 121 | var ni int 122 | ni, _ = fmt.Fprint(w, "data:") 123 | n += int64(ni) 124 | 125 | ni, _ = fmt.Fprint(w, du.MediaType.String()) 126 | n += int64(ni) 127 | 128 | if du.Encoding == EncodingBase64 { 129 | ni, _ = fmt.Fprint(w, ";base64") 130 | n += int64(ni) 131 | } 132 | 133 | ni, _ = fmt.Fprint(w, ",") 134 | n += int64(ni) 135 | 136 | if du.Encoding == EncodingBase64 { 137 | encoder := base64.NewEncoder(base64.StdEncoding, w) 138 | ni, err = encoder.Write(du.Data) 139 | if err != nil { 140 | return 141 | } 142 | encoder.Close() 143 | } else if du.Encoding == EncodingASCII { 144 | ni, _ = fmt.Fprint(w, Escape(du.Data)) 145 | n += int64(ni) 146 | } else { 147 | err = fmt.Errorf("dataurl: invalid encoding %s", du.Encoding) 148 | return 149 | } 150 | 151 | return 152 | } 153 | 154 | // UnmarshalText decodes a Data URL string and sets it to *du 155 | func (du *DataURL) UnmarshalText(text []byte) error { 156 | decoded, err := DecodeString(string(text)) 157 | if err != nil { 158 | return err 159 | } 160 | *du = *decoded 161 | return nil 162 | } 163 | 164 | // MarshalText writes du as a Data URL 165 | func (du *DataURL) MarshalText() ([]byte, error) { 166 | buf := bytes.NewBuffer(nil) 167 | if _, err := du.WriteTo(buf); err != nil { 168 | return nil, err 169 | } 170 | return buf.Bytes(), nil 171 | } 172 | 173 | type encodedDataReader func(string) ([]byte, error) 174 | 175 | var asciiDataReader encodedDataReader = func(s string) ([]byte, error) { 176 | us, err := Unescape(s) 177 | if err != nil { 178 | return nil, err 179 | } 180 | return []byte(us), nil 181 | } 182 | 183 | var base64DataReader encodedDataReader = func(s string) ([]byte, error) { 184 | data, err := base64.StdEncoding.DecodeString(s) 185 | if err != nil { 186 | return nil, err 187 | } 188 | return []byte(data), nil 189 | } 190 | 191 | type parser struct { 192 | du *DataURL 193 | l *lexer 194 | currentAttr string 195 | unquoteParamVal bool 196 | encodedDataReaderFn encodedDataReader 197 | } 198 | 199 | func (p *parser) parse() error { 200 | for item := range p.l.items { 201 | switch item.t { 202 | case itemError: 203 | return errors.New(item.String()) 204 | case itemMediaType: 205 | p.du.MediaType.Type = item.val 206 | // Should we clear the default 207 | // "charset" parameter at this point? 208 | delete(p.du.MediaType.Params, "charset") 209 | case itemMediaSubType: 210 | p.du.MediaType.Subtype = item.val 211 | case itemParamAttr: 212 | p.currentAttr = item.val 213 | case itemLeftStringQuote: 214 | p.unquoteParamVal = true 215 | case itemParamVal: 216 | val := item.val 217 | if p.unquoteParamVal { 218 | p.unquoteParamVal = false 219 | us, err := strconv.Unquote("\"" + val + "\"") 220 | if err != nil { 221 | return err 222 | } 223 | val = us 224 | } else { 225 | us, err := UnescapeToString(val) 226 | if err != nil { 227 | return err 228 | } 229 | val = us 230 | } 231 | p.du.MediaType.Params[p.currentAttr] = val 232 | case itemBase64Enc: 233 | p.du.Encoding = EncodingBase64 234 | p.encodedDataReaderFn = base64DataReader 235 | case itemDataComma: 236 | if p.encodedDataReaderFn == nil { 237 | p.encodedDataReaderFn = asciiDataReader 238 | } 239 | case itemData: 240 | reader, err := p.encodedDataReaderFn(item.val) 241 | if err != nil { 242 | return err 243 | } 244 | p.du.Data = reader 245 | case itemEOF: 246 | if p.du.Data == nil { 247 | p.du.Data = []byte("") 248 | } 249 | return nil 250 | } 251 | } 252 | panic("EOF not found") 253 | } 254 | 255 | // DecodeString decodes a Data URL scheme string. 256 | func DecodeString(s string) (*DataURL, error) { 257 | du := &DataURL{ 258 | MediaType: defaultMediaType(), 259 | Encoding: EncodingASCII, 260 | } 261 | 262 | parser := &parser{ 263 | du: du, 264 | l: lex(s), 265 | } 266 | if err := parser.parse(); err != nil { 267 | return nil, err 268 | } 269 | return du, nil 270 | } 271 | 272 | // Decode decodes a Data URL scheme from a io.Reader. 273 | func Decode(r io.Reader) (*DataURL, error) { 274 | data, err := ioutil.ReadAll(r) 275 | if err != nil { 276 | return nil, err 277 | } 278 | return DecodeString(string(data)) 279 | } 280 | 281 | // EncodeBytes encodes the data bytes into a Data URL string, using base 64 encoding. 282 | // 283 | // The media type of data is detected using http.DetectContentType. 284 | func EncodeBytes(data []byte) string { 285 | mt := http.DetectContentType(data) 286 | // http.DetectContentType may add spurious spaces between ; and a parameter. 287 | // The canonical way is to not have them. 288 | cleanedMt := strings.Replace(mt, "; ", ";", -1) 289 | 290 | return New(data, cleanedMt).String() 291 | } 292 | -------------------------------------------------------------------------------- /dataurl_test.go: -------------------------------------------------------------------------------- 1 | package dataurl 2 | 3 | import ( 4 | "bytes" 5 | "encoding/base64" 6 | "fmt" 7 | "net/http" 8 | "net/http/httptest" 9 | "reflect" 10 | "regexp" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | type dataURLTest struct { 16 | InputRawDataURL string 17 | ExpectedItems []item 18 | ExpectedDataURL DataURL 19 | } 20 | 21 | func genTestTable() []dataURLTest { 22 | return []dataURLTest{ 23 | dataURLTest{ 24 | `data:;base64,aGV5YQ==`, 25 | []item{ 26 | item{itemDataPrefix, dataPrefix}, 27 | item{itemParamSemicolon, ";"}, 28 | item{itemBase64Enc, "base64"}, 29 | item{itemDataComma, ","}, 30 | item{itemData, "aGV5YQ=="}, 31 | item{itemEOF, ""}, 32 | }, 33 | DataURL{ 34 | defaultMediaType(), 35 | EncodingBase64, 36 | []byte("heya"), 37 | }, 38 | }, 39 | dataURLTest{ 40 | `data:text/plain;base64,aGV5YQ==`, 41 | []item{ 42 | item{itemDataPrefix, dataPrefix}, 43 | item{itemMediaType, "text"}, 44 | item{itemMediaSep, "/"}, 45 | item{itemMediaSubType, "plain"}, 46 | item{itemParamSemicolon, ";"}, 47 | item{itemBase64Enc, "base64"}, 48 | item{itemDataComma, ","}, 49 | item{itemData, "aGV5YQ=="}, 50 | item{itemEOF, ""}, 51 | }, 52 | DataURL{ 53 | MediaType{ 54 | "text", 55 | "plain", 56 | map[string]string{}, 57 | }, 58 | EncodingBase64, 59 | []byte("heya"), 60 | }, 61 | }, 62 | dataURLTest{ 63 | `data:text/plain;charset=utf-8;base64,aGV5YQ==`, 64 | []item{ 65 | item{itemDataPrefix, dataPrefix}, 66 | item{itemMediaType, "text"}, 67 | item{itemMediaSep, "/"}, 68 | item{itemMediaSubType, "plain"}, 69 | item{itemParamSemicolon, ";"}, 70 | item{itemParamAttr, "charset"}, 71 | item{itemParamEqual, "="}, 72 | item{itemParamVal, "utf-8"}, 73 | item{itemParamSemicolon, ";"}, 74 | item{itemBase64Enc, "base64"}, 75 | item{itemDataComma, ","}, 76 | item{itemData, "aGV5YQ=="}, 77 | item{itemEOF, ""}, 78 | }, 79 | DataURL{ 80 | MediaType{ 81 | "text", 82 | "plain", 83 | map[string]string{ 84 | "charset": "utf-8", 85 | }, 86 | }, 87 | EncodingBase64, 88 | []byte("heya"), 89 | }, 90 | }, 91 | dataURLTest{ 92 | `data:text/plain;charset=utf-8;foo=bar;base64,aGV5YQ==`, 93 | []item{ 94 | item{itemDataPrefix, dataPrefix}, 95 | item{itemMediaType, "text"}, 96 | item{itemMediaSep, "/"}, 97 | item{itemMediaSubType, "plain"}, 98 | item{itemParamSemicolon, ";"}, 99 | item{itemParamAttr, "charset"}, 100 | item{itemParamEqual, "="}, 101 | item{itemParamVal, "utf-8"}, 102 | item{itemParamSemicolon, ";"}, 103 | item{itemParamAttr, "foo"}, 104 | item{itemParamEqual, "="}, 105 | item{itemParamVal, "bar"}, 106 | item{itemParamSemicolon, ";"}, 107 | item{itemBase64Enc, "base64"}, 108 | item{itemDataComma, ","}, 109 | item{itemData, "aGV5YQ=="}, 110 | item{itemEOF, ""}, 111 | }, 112 | DataURL{ 113 | MediaType{ 114 | "text", 115 | "plain", 116 | map[string]string{ 117 | "charset": "utf-8", 118 | "foo": "bar", 119 | }, 120 | }, 121 | EncodingBase64, 122 | []byte("heya"), 123 | }, 124 | }, 125 | dataURLTest{ 126 | `data:application/json;charset=utf-8;foo="b\"<@>\"r";style=unformatted%20json;base64,eyJtc2ciOiAiaGV5YSJ9`, 127 | []item{ 128 | item{itemDataPrefix, dataPrefix}, 129 | item{itemMediaType, "application"}, 130 | item{itemMediaSep, "/"}, 131 | item{itemMediaSubType, "json"}, 132 | item{itemParamSemicolon, ";"}, 133 | item{itemParamAttr, "charset"}, 134 | item{itemParamEqual, "="}, 135 | item{itemParamVal, "utf-8"}, 136 | item{itemParamSemicolon, ";"}, 137 | item{itemParamAttr, "foo"}, 138 | item{itemParamEqual, "="}, 139 | item{itemLeftStringQuote, "\""}, 140 | item{itemParamVal, `b\"<@>\"r`}, 141 | item{itemRightStringQuote, "\""}, 142 | item{itemParamSemicolon, ";"}, 143 | item{itemParamAttr, "style"}, 144 | item{itemParamEqual, "="}, 145 | item{itemParamVal, "unformatted%20json"}, 146 | item{itemParamSemicolon, ";"}, 147 | item{itemBase64Enc, "base64"}, 148 | item{itemDataComma, ","}, 149 | item{itemData, "eyJtc2ciOiAiaGV5YSJ9"}, 150 | item{itemEOF, ""}, 151 | }, 152 | DataURL{ 153 | MediaType{ 154 | "application", 155 | "json", 156 | map[string]string{ 157 | "charset": "utf-8", 158 | "foo": `b"<@>"r`, 159 | "style": "unformatted json", 160 | }, 161 | }, 162 | EncodingBase64, 163 | []byte(`{"msg": "heya"}`), 164 | }, 165 | }, 166 | dataURLTest{ 167 | `data:xxx;base64,aGV5YQ==`, 168 | []item{ 169 | item{itemDataPrefix, dataPrefix}, 170 | item{itemError, "invalid character for media type"}, 171 | }, 172 | DataURL{}, 173 | }, 174 | dataURLTest{ 175 | `data:,`, 176 | []item{ 177 | item{itemDataPrefix, dataPrefix}, 178 | item{itemDataComma, ","}, 179 | item{itemEOF, ""}, 180 | }, 181 | DataURL{ 182 | defaultMediaType(), 183 | EncodingASCII, 184 | []byte(""), 185 | }, 186 | }, 187 | dataURLTest{ 188 | `data:,A%20brief%20note`, 189 | []item{ 190 | item{itemDataPrefix, dataPrefix}, 191 | item{itemDataComma, ","}, 192 | item{itemData, "A%20brief%20note"}, 193 | item{itemEOF, ""}, 194 | }, 195 | DataURL{ 196 | defaultMediaType(), 197 | EncodingASCII, 198 | []byte("A brief note"), 199 | }, 200 | }, 201 | dataURLTest{ 202 | `data:image/svg+xml-im.a.fake;base64,cGllLXN0b2NrX1RoaXJ0eQ==`, 203 | []item{ 204 | item{itemDataPrefix, dataPrefix}, 205 | item{itemMediaType, "image"}, 206 | item{itemMediaSep, "/"}, 207 | item{itemMediaSubType, "svg+xml-im.a.fake"}, 208 | item{itemParamSemicolon, ";"}, 209 | item{itemBase64Enc, "base64"}, 210 | item{itemDataComma, ","}, 211 | item{itemData, "cGllLXN0b2NrX1RoaXJ0eQ=="}, 212 | item{itemEOF, ""}, 213 | }, 214 | DataURL{ 215 | MediaType{ 216 | "image", 217 | "svg+xml-im.a.fake", 218 | map[string]string{}, 219 | }, 220 | EncodingBase64, 221 | []byte("pie-stock_Thirty"), 222 | }, 223 | }, 224 | } 225 | } 226 | 227 | func expectItems(expected, actual []item) bool { 228 | if len(expected) != len(actual) { 229 | return false 230 | } 231 | for i := range expected { 232 | if expected[i].t != actual[i].t { 233 | return false 234 | } 235 | if expected[i].val != actual[i].val { 236 | return false 237 | } 238 | } 239 | return true 240 | } 241 | 242 | func equal(du1, du2 *DataURL) (bool, error) { 243 | if !reflect.DeepEqual(du1.MediaType, du2.MediaType) { 244 | return false, nil 245 | } 246 | if du1.Encoding != du2.Encoding { 247 | return false, nil 248 | } 249 | 250 | if du1.Data == nil || du2.Data == nil { 251 | return false, fmt.Errorf("nil Data") 252 | } 253 | 254 | if !bytes.Equal(du1.Data, du2.Data) { 255 | return false, nil 256 | } 257 | return true, nil 258 | } 259 | 260 | func TestLexDataURLs(t *testing.T) { 261 | for _, test := range genTestTable() { 262 | l := lex(test.InputRawDataURL) 263 | var items []item 264 | for item := range l.items { 265 | items = append(items, item) 266 | } 267 | if !expectItems(test.ExpectedItems, items) { 268 | t.Errorf("Expected %v, got %v", test.ExpectedItems, items) 269 | } 270 | } 271 | } 272 | 273 | func testDataURLs(t *testing.T, factory func(string) (*DataURL, error)) { 274 | for _, test := range genTestTable() { 275 | var expectedItemError string 276 | for _, item := range test.ExpectedItems { 277 | if item.t == itemError { 278 | expectedItemError = item.String() 279 | break 280 | } 281 | } 282 | dataURL, err := factory(test.InputRawDataURL) 283 | if expectedItemError == "" && err != nil { 284 | t.Error(err) 285 | continue 286 | } else if expectedItemError != "" && err == nil { 287 | t.Errorf("Expected error \"%s\", got nil", expectedItemError) 288 | continue 289 | } else if expectedItemError != "" && err != nil { 290 | if err.Error() != expectedItemError { 291 | t.Errorf("Expected error \"%s\", got \"%s\"", expectedItemError, err.Error()) 292 | } 293 | continue 294 | } 295 | 296 | if ok, err := equal(dataURL, &test.ExpectedDataURL); err != nil { 297 | t.Error(err) 298 | } else if !ok { 299 | t.Errorf("Expected %v, got %v", test.ExpectedDataURL, *dataURL) 300 | } 301 | } 302 | } 303 | 304 | func TestDataURLsWithDecode(t *testing.T) { 305 | testDataURLs(t, func(s string) (*DataURL, error) { 306 | return Decode(strings.NewReader(s)) 307 | }) 308 | } 309 | 310 | func TestDataURLsWithDecodeString(t *testing.T) { 311 | testDataURLs(t, func(s string) (*DataURL, error) { 312 | return DecodeString(s) 313 | }) 314 | } 315 | 316 | func TestDataURLsWithUnmarshalText(t *testing.T) { 317 | testDataURLs(t, func(s string) (*DataURL, error) { 318 | d := &DataURL{} 319 | err := d.UnmarshalText([]byte(s)) 320 | return d, err 321 | }) 322 | } 323 | 324 | func TestRoundTrip(t *testing.T) { 325 | tests := []struct { 326 | s string 327 | roundTripOk bool 328 | }{ 329 | {`data:text/plain;charset=utf-8;foo=bar;base64,aGV5YQ==`, true}, 330 | {`data:;charset=utf-8;foo=bar;base64,aGV5YQ==`, false}, 331 | {`data:text/plain;charset=utf-8;foo="bar";base64,aGV5YQ==`, false}, 332 | {`data:text/plain;charset=utf-8;foo="bar",A%20brief%20note`, false}, 333 | {`data:text/plain;charset=utf-8;foo=bar,A%20brief%20note`, true}, 334 | } 335 | for _, test := range tests { 336 | dataURL, err := DecodeString(test.s) 337 | if err != nil { 338 | t.Error(err) 339 | continue 340 | } 341 | dus := dataURL.String() 342 | if test.roundTripOk && dus != test.s { 343 | t.Errorf("Expected %s, got %s", test.s, dus) 344 | } else if !test.roundTripOk && dus == test.s { 345 | t.Errorf("Found %s, expected something else", test.s) 346 | } 347 | 348 | txt, err := dataURL.MarshalText() 349 | if err != nil { 350 | t.Error(err) 351 | continue 352 | } 353 | if test.roundTripOk && string(txt) != test.s { 354 | t.Errorf("MarshalText roundtrip: got '%s', want '%s'", txt, test.s) 355 | } else if !test.roundTripOk && string(txt) == test.s { 356 | t.Errorf("MarshalText roundtrip: got '%s', want something else", txt) 357 | } 358 | } 359 | } 360 | 361 | func TestNew(t *testing.T) { 362 | tests := []struct { 363 | Data []byte 364 | MediaType string 365 | ParamPairs []string 366 | WillPanic bool 367 | ExpectedDataURL *DataURL 368 | }{ 369 | { 370 | []byte(`{"msg": "heya"}`), 371 | "application/json", 372 | []string{}, 373 | false, 374 | &DataURL{ 375 | MediaType{ 376 | "application", 377 | "json", 378 | map[string]string{}, 379 | }, 380 | EncodingBase64, 381 | []byte(`{"msg": "heya"}`), 382 | }, 383 | }, 384 | { 385 | []byte(``), 386 | "application//json", 387 | []string{}, 388 | true, 389 | nil, 390 | }, 391 | { 392 | []byte(``), 393 | "", 394 | []string{}, 395 | true, 396 | nil, 397 | }, 398 | { 399 | []byte(`{"msg": "heya"}`), 400 | "text/plain", 401 | []string{"charset", "utf-8"}, 402 | false, 403 | &DataURL{ 404 | MediaType{ 405 | "text", 406 | "plain", 407 | map[string]string{ 408 | "charset": "utf-8", 409 | }, 410 | }, 411 | EncodingBase64, 412 | []byte(`{"msg": "heya"}`), 413 | }, 414 | }, 415 | { 416 | []byte(`{"msg": "heya"}`), 417 | "text/plain", 418 | []string{"charset", "utf-8", "name"}, 419 | true, 420 | nil, 421 | }, 422 | } 423 | for _, test := range tests { 424 | var dataURL *DataURL 425 | func() { 426 | defer func() { 427 | if test.WillPanic { 428 | if e := recover(); e == nil { 429 | t.Error("Expected panic didn't happen") 430 | } 431 | } else { 432 | if e := recover(); e != nil { 433 | t.Errorf("Unexpected panic: %v", e) 434 | } 435 | } 436 | }() 437 | dataURL = New(test.Data, test.MediaType, test.ParamPairs...) 438 | }() 439 | if test.WillPanic { 440 | if dataURL != nil { 441 | t.Error("Expected nil DataURL") 442 | } 443 | } else { 444 | if ok, err := equal(dataURL, test.ExpectedDataURL); err != nil { 445 | t.Error(err) 446 | } else if !ok { 447 | t.Errorf("Expected %v, got %v", test.ExpectedDataURL, *dataURL) 448 | } 449 | } 450 | } 451 | } 452 | 453 | var golangFavicon = strings.Replace(`AAABAAEAEBAAAAEAIABoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAAAAAAAAAAAAAAAAAAA 454 | AAAAAAD///8AVE44//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb/ 455 | /uF2/1ROOP////8A////AFROOP/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+ 456 | 4Xb//uF2//7hdv9UTjj/////AP///wBUTjj//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7h 457 | dv/+4Xb//uF2//7hdv/+4Xb/VE44/////wD///8AVE44//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2 458 | //7hdv/+4Xb//uF2//7hdv/+4Xb//uF2/1ROOP////8A////AFROOP/+4Xb//uF2//7hdv/+4Xb/ 459 | /uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv9UTjj/////AP///wBUTjj//uF2//7hdv/+ 460 | 4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb/VE44/////wD///8AVE44//7h 461 | dv/+4Xb//uF2//7hdv/+4Xb/z7t5/8Kyev/+4Xb//993///dd///3Xf//uF2/1ROOP////8A//// 462 | AFROOP/+4Xb//uF2//7hdv//4Hn/dIzD//v8///7/P//dIzD//7hdv//3Xf//913//7hdv9UTjj/ 463 | ////AP///wBUTjj//uF2///fd//+4Xb//uF2/6ajif90jMP/dIzD/46Zpv/+4Xb//+F1///feP/+ 464 | 4Xb/VE44/////wD///8AVE44//7hdv/z1XT////////////Is3L/HyAj/x8gI//Is3L///////// 465 | ///z1XT//uF2/1ROOP////8A19nd/1ROOP/+4Xb/5+HS//v+//8RExf/Liwn//7hdv/+4Xb/5+HS 466 | //v8//8RExf/Liwn//7hdv9UTjj/19nd/1ROOP94aDT/yKdO/+fh0v//////ERMX/y4sJ//+4Xb/ 467 | /uF2/+fh0v//////ERMX/y4sJ//Ip07/dWU3/1ROOP9UTjj/yKdO/6qSSP/Is3L/9fb7//f6///I 468 | s3L//uF2//7hdv/Is3L////////////Is3L/qpJI/8inTv9UTjj/19nd/1ROOP97c07/qpJI/8in 469 | Tv/Ip07//uF2//7hdv/+4Xb//uF2/8zBlv/Kv4//pZJU/3tzTv9UTjj/19nd/////wD///8A4eLl 470 | /6CcjP97c07/e3NO/1dOMf9BOiX/TkUn/2VXLf97c07/e3NO/6CcjP/h4uX/////AP///wD///8A 471 | ////AP///wD///8A////AP///wDq6/H/3N/j/9fZ3f/q6/H/////AP///wD///8A////AP///wD/ 472 | //8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 473 | AAAAAAAAAAAAAA==`, "\n", "", -1) 474 | 475 | func TestEncodeBytes(t *testing.T) { 476 | mustDecode := func(s string) []byte { 477 | data, err := base64.StdEncoding.DecodeString(s) 478 | if err != nil { 479 | panic(err) 480 | } 481 | return data 482 | } 483 | tests := []struct { 484 | Data []byte 485 | ExpectedString string 486 | }{ 487 | { 488 | []byte(`A brief note`), 489 | "data:text/plain;charset=utf-8;base64,QSBicmllZiBub3Rl", 490 | }, 491 | { 492 | []byte{0xA, 0xFF, 0x99, 0x34, 0x56, 0x34, 0x00}, 493 | `data:application/octet-stream;base64,Cv+ZNFY0AA==`, 494 | }, 495 | { 496 | mustDecode(golangFavicon), 497 | `data:image/x-icon;base64,` + golangFavicon, 498 | }, 499 | } 500 | for _, test := range tests { 501 | str := EncodeBytes(test.Data) 502 | if str != test.ExpectedString { 503 | t.Errorf("Expected %s, got %s", test.ExpectedString, str) 504 | } 505 | } 506 | } 507 | 508 | func BenchmarkLex(b *testing.B) { 509 | for i := 0; i < b.N; i++ { 510 | for _, test := range genTestTable() { 511 | l := lex(test.InputRawDataURL) 512 | for _ = range l.items { 513 | } 514 | } 515 | } 516 | } 517 | 518 | const rep = `^data:(?P\w+/[\w\+\-\.]+)?(?P(?:;[\w\-]+="?[\w\-\\<>@,";:%]*"?)+)?(?P;base64)?,(?P.*)$` 519 | 520 | func TestRegexp(t *testing.T) { 521 | re, err := regexp.Compile(rep) 522 | if err != nil { 523 | t.Fatal(err) 524 | } 525 | for _, test := range genTestTable() { 526 | shouldMatch := true 527 | for _, item := range test.ExpectedItems { 528 | if item.t == itemError { 529 | shouldMatch = false 530 | break 531 | } 532 | } 533 | // just test it matches, do not parse 534 | if re.MatchString(test.InputRawDataURL) && !shouldMatch { 535 | t.Error("doesn't match", test.InputRawDataURL) 536 | } else if !re.MatchString(test.InputRawDataURL) && shouldMatch { 537 | t.Error("match", test.InputRawDataURL) 538 | } 539 | } 540 | } 541 | 542 | func BenchmarkRegexp(b *testing.B) { 543 | re, err := regexp.Compile(rep) 544 | if err != nil { 545 | b.Fatal(err) 546 | } 547 | for i := 0; i < b.N; i++ { 548 | for _, test := range genTestTable() { 549 | _ = re.FindStringSubmatch(test.InputRawDataURL) 550 | } 551 | } 552 | } 553 | 554 | func ExampleDecodeString() { 555 | dataURL, err := DecodeString(`data:text/plain;charset=utf-8;base64,aGV5YQ==`) 556 | if err != nil { 557 | fmt.Println(err) 558 | return 559 | } 560 | fmt.Printf("%s, %s", dataURL.MediaType.ContentType(), string(dataURL.Data)) 561 | // Output: text/plain, heya 562 | } 563 | 564 | func ExampleDecode() { 565 | r, err := http.NewRequest( 566 | "POST", "/", 567 | strings.NewReader(`data:image/vnd.microsoft.icon;name=golang%20favicon;base64,`+golangFavicon), 568 | ) 569 | if err != nil { 570 | fmt.Println(err) 571 | return 572 | } 573 | 574 | var dataURL *DataURL 575 | h := func(w http.ResponseWriter, r *http.Request) { 576 | var err error 577 | dataURL, err = Decode(r.Body) 578 | defer r.Body.Close() 579 | if err != nil { 580 | fmt.Println(err) 581 | } 582 | } 583 | w := httptest.NewRecorder() 584 | h(w, r) 585 | fmt.Printf("%s: %s", dataURL.Params["name"], dataURL.ContentType()) 586 | // Output: golang favicon: image/vnd.microsoft.icon 587 | } 588 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package dataurl parses Data URL Schemes 3 | according to RFC 2397 4 | (http://tools.ietf.org/html/rfc2397). 5 | 6 | Data URLs are small chunks of data commonly used in browsers to display inline data, 7 | typically like small images, or when you use the FileReader API of the browser. 8 | 9 | A dataurl looks like: 10 | 11 | data:text/plain;charset=utf-8,A%20brief%20note 12 | 13 | Or, with base64 encoding: 14 | 15 | data:image/vnd.microsoft.icon;name=golang%20favicon;base64,AAABAAEAEBAAAAEAIABoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAAAAAAAAAAAAAAAAAAA 16 | AAAAAAD///8AVE44//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb/ 17 | /uF2/1ROOP////8A////AFROOP/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+4Xb//uF2//7hdv/+ 18 | ... 19 | /6CcjP97c07/e3NO/1dOMf9BOiX/TkUn/2VXLf97c07/e3NO/6CcjP/h4uX/////AP///wD///8A 20 | ////AP///wD///8A////AP///wDq6/H/3N/j/9fZ3f/q6/H/////AP///wD///8A////AP///wD/ 21 | //8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 22 | AAAAAAAAAAAAAA== 23 | 24 | Common functions are Decode and DecodeString to obtain a DataURL, 25 | and DataURL.String() and DataURL.WriteTo to generate a Data URL string. 26 | 27 | */ 28 | package dataurl 29 | -------------------------------------------------------------------------------- /lex.go: -------------------------------------------------------------------------------- 1 | package dataurl 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "unicode" 7 | "unicode/utf8" 8 | ) 9 | 10 | type item struct { 11 | t itemType 12 | val string 13 | } 14 | 15 | func (i item) String() string { 16 | switch i.t { 17 | case itemEOF: 18 | return "EOF" 19 | case itemError: 20 | return i.val 21 | } 22 | if len(i.val) > 10 { 23 | return fmt.Sprintf("%.10q...", i.val) 24 | } 25 | return fmt.Sprintf("%q", i.val) 26 | } 27 | 28 | type itemType int 29 | 30 | const ( 31 | itemError itemType = iota 32 | itemEOF 33 | 34 | itemDataPrefix 35 | 36 | itemMediaType 37 | itemMediaSep 38 | itemMediaSubType 39 | itemParamSemicolon 40 | itemParamAttr 41 | itemParamEqual 42 | itemLeftStringQuote 43 | itemRightStringQuote 44 | itemParamVal 45 | 46 | itemBase64Enc 47 | 48 | itemDataComma 49 | itemData 50 | ) 51 | 52 | const eof rune = -1 53 | 54 | func isTokenRune(r rune) bool { 55 | return r <= unicode.MaxASCII && 56 | !unicode.IsControl(r) && 57 | !unicode.IsSpace(r) && 58 | !isTSpecialRune(r) 59 | } 60 | 61 | func isTSpecialRune(r rune) bool { 62 | return r == '(' || 63 | r == ')' || 64 | r == '<' || 65 | r == '>' || 66 | r == '@' || 67 | r == ',' || 68 | r == ';' || 69 | r == ':' || 70 | r == '\\' || 71 | r == '"' || 72 | r == '/' || 73 | r == '[' || 74 | r == ']' || 75 | r == '?' || 76 | r == '=' 77 | } 78 | 79 | // See http://tools.ietf.org/html/rfc2045 80 | // This doesn't include extension-token case 81 | // as it's handled separatly 82 | func isDiscreteType(s string) bool { 83 | if strings.HasPrefix(s, "text") || 84 | strings.HasPrefix(s, "image") || 85 | strings.HasPrefix(s, "audio") || 86 | strings.HasPrefix(s, "video") || 87 | strings.HasPrefix(s, "application") { 88 | return true 89 | } 90 | return false 91 | } 92 | 93 | // See http://tools.ietf.org/html/rfc2045 94 | // This doesn't include extension-token case 95 | // as it's handled separatly 96 | func isCompositeType(s string) bool { 97 | if strings.HasPrefix(s, "message") || 98 | strings.HasPrefix(s, "multipart") { 99 | return true 100 | } 101 | return false 102 | } 103 | 104 | func isURLCharRune(r rune) bool { 105 | // We're a bit permissive here, 106 | // by not including '%' in delims 107 | // This is okay, since url unescaping will validate 108 | // that later in the parser. 109 | return r <= unicode.MaxASCII && 110 | !(r >= 0x00 && r <= 0x1F) && r != 0x7F && /* control */ 111 | // delims 112 | r != ' ' && 113 | r != '<' && 114 | r != '>' && 115 | r != '#' && 116 | r != '"' && 117 | // unwise 118 | r != '{' && 119 | r != '}' && 120 | r != '|' && 121 | r != '\\' && 122 | r != '^' && 123 | r != '[' && 124 | r != ']' && 125 | r != '`' 126 | } 127 | 128 | func isBase64Rune(r rune) bool { 129 | return (r >= 'a' && r <= 'z') || 130 | (r >= 'A' && r <= 'Z') || 131 | (r >= '0' && r <= '9') || 132 | r == '+' || 133 | r == '/' || 134 | r == '=' || 135 | r == '\n' 136 | } 137 | 138 | type stateFn func(*lexer) stateFn 139 | 140 | // lexer lexes the data URL scheme input string. 141 | // The implementation is from the text/template/parser package. 142 | type lexer struct { 143 | input string 144 | start int 145 | pos int 146 | width int 147 | seenBase64Item bool 148 | items chan item 149 | } 150 | 151 | func (l *lexer) run() { 152 | for state := lexBeforeDataPrefix; state != nil; { 153 | state = state(l) 154 | } 155 | close(l.items) 156 | } 157 | 158 | func (l *lexer) emit(t itemType) { 159 | l.items <- item{t, l.input[l.start:l.pos]} 160 | l.start = l.pos 161 | } 162 | 163 | func (l *lexer) next() (r rune) { 164 | if l.pos >= len(l.input) { 165 | l.width = 0 166 | return eof 167 | } 168 | r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 169 | l.pos += l.width 170 | return r 171 | } 172 | 173 | func (l *lexer) backup() { 174 | l.pos -= l.width 175 | } 176 | 177 | func (l *lexer) ignore() { 178 | l.start = l.pos 179 | } 180 | 181 | func (l *lexer) errorf(format string, args ...interface{}) stateFn { 182 | l.items <- item{itemError, fmt.Sprintf(format, args...)} 183 | return nil 184 | } 185 | 186 | func lex(input string) *lexer { 187 | l := &lexer{ 188 | input: input, 189 | items: make(chan item), 190 | } 191 | go l.run() // Concurrently run state machine. 192 | return l 193 | } 194 | 195 | const ( 196 | dataPrefix = "data:" 197 | mediaSep = '/' 198 | paramSemicolon = ';' 199 | paramEqual = '=' 200 | dataComma = ',' 201 | ) 202 | 203 | // start lexing by detecting data prefix 204 | func lexBeforeDataPrefix(l *lexer) stateFn { 205 | if strings.HasPrefix(l.input[l.pos:], dataPrefix) { 206 | return lexDataPrefix 207 | } 208 | return l.errorf("missing data prefix") 209 | } 210 | 211 | // lex data prefix 212 | func lexDataPrefix(l *lexer) stateFn { 213 | l.pos += len(dataPrefix) 214 | l.emit(itemDataPrefix) 215 | return lexAfterDataPrefix 216 | } 217 | 218 | // lex what's after data prefix. 219 | // it can be the media type/subtype separator, 220 | // the base64 encoding, or the comma preceding the data 221 | func lexAfterDataPrefix(l *lexer) stateFn { 222 | switch r := l.next(); { 223 | case r == paramSemicolon: 224 | l.backup() 225 | return lexParamSemicolon 226 | case r == dataComma: 227 | l.backup() 228 | return lexDataComma 229 | case r == eof: 230 | return l.errorf("missing comma before data") 231 | case r == 'x' || r == 'X': 232 | if l.next() == '-' { 233 | return lexXTokenMediaType 234 | } 235 | return lexInDiscreteMediaType 236 | case isTokenRune(r): 237 | return lexInDiscreteMediaType 238 | default: 239 | return l.errorf("invalid character after data prefix") 240 | } 241 | } 242 | 243 | func lexXTokenMediaType(l *lexer) stateFn { 244 | for { 245 | switch r := l.next(); { 246 | case r == mediaSep: 247 | l.backup() 248 | return lexMediaType 249 | case r == eof: 250 | return l.errorf("missing media type slash") 251 | case isTokenRune(r): 252 | default: 253 | return l.errorf("invalid character for media type") 254 | } 255 | } 256 | } 257 | 258 | func lexInDiscreteMediaType(l *lexer) stateFn { 259 | for { 260 | switch r := l.next(); { 261 | case r == mediaSep: 262 | l.backup() 263 | // check it's valid discrete type 264 | if !isDiscreteType(l.input[l.start:l.pos]) && 265 | !isCompositeType(l.input[l.start:l.pos]) { 266 | return l.errorf("invalid media type") 267 | } 268 | return lexMediaType 269 | case r == eof: 270 | return l.errorf("missing media type slash") 271 | case isTokenRune(r): 272 | default: 273 | return l.errorf("invalid character for media type") 274 | } 275 | } 276 | } 277 | 278 | func lexMediaType(l *lexer) stateFn { 279 | if l.pos > l.start { 280 | l.emit(itemMediaType) 281 | } 282 | return lexMediaSep 283 | } 284 | 285 | func lexMediaSep(l *lexer) stateFn { 286 | l.next() 287 | l.emit(itemMediaSep) 288 | return lexAfterMediaSep 289 | } 290 | 291 | func lexAfterMediaSep(l *lexer) stateFn { 292 | for { 293 | switch r := l.next(); { 294 | case r == paramSemicolon || r == dataComma: 295 | l.backup() 296 | return lexMediaSubType 297 | case r == eof: 298 | return l.errorf("incomplete media type") 299 | case isTokenRune(r): 300 | default: 301 | return l.errorf("invalid character for media subtype") 302 | } 303 | } 304 | } 305 | 306 | func lexMediaSubType(l *lexer) stateFn { 307 | if l.pos > l.start { 308 | l.emit(itemMediaSubType) 309 | } 310 | return lexAfterMediaSubType 311 | } 312 | 313 | func lexAfterMediaSubType(l *lexer) stateFn { 314 | switch r := l.next(); { 315 | case r == paramSemicolon: 316 | l.backup() 317 | return lexParamSemicolon 318 | case r == dataComma: 319 | l.backup() 320 | return lexDataComma 321 | case r == eof: 322 | return l.errorf("missing comma before data") 323 | default: 324 | return l.errorf("expected semicolon or comma") 325 | } 326 | } 327 | 328 | func lexParamSemicolon(l *lexer) stateFn { 329 | l.next() 330 | l.emit(itemParamSemicolon) 331 | return lexAfterParamSemicolon 332 | } 333 | 334 | func lexAfterParamSemicolon(l *lexer) stateFn { 335 | switch r := l.next(); { 336 | case r == eof: 337 | return l.errorf("unterminated parameter sequence") 338 | case r == paramEqual || r == dataComma: 339 | return l.errorf("unterminated parameter sequence") 340 | case isTokenRune(r): 341 | l.backup() 342 | return lexInParamAttr 343 | default: 344 | return l.errorf("invalid character for parameter attribute") 345 | } 346 | } 347 | 348 | func lexBase64Enc(l *lexer) stateFn { 349 | if l.pos > l.start { 350 | if v := l.input[l.start:l.pos]; v != "base64" { 351 | return l.errorf("expected base64, got %s", v) 352 | } 353 | l.seenBase64Item = true 354 | l.emit(itemBase64Enc) 355 | } 356 | return lexDataComma 357 | } 358 | 359 | func lexInParamAttr(l *lexer) stateFn { 360 | for { 361 | switch r := l.next(); { 362 | case r == paramEqual: 363 | l.backup() 364 | return lexParamAttr 365 | case r == dataComma: 366 | l.backup() 367 | return lexBase64Enc 368 | case r == eof: 369 | return l.errorf("unterminated parameter sequence") 370 | case isTokenRune(r): 371 | default: 372 | return l.errorf("invalid character for parameter attribute") 373 | } 374 | } 375 | } 376 | 377 | func lexParamAttr(l *lexer) stateFn { 378 | if l.pos > l.start { 379 | l.emit(itemParamAttr) 380 | } 381 | return lexParamEqual 382 | } 383 | 384 | func lexParamEqual(l *lexer) stateFn { 385 | l.next() 386 | l.emit(itemParamEqual) 387 | return lexAfterParamEqual 388 | } 389 | 390 | func lexAfterParamEqual(l *lexer) stateFn { 391 | switch r := l.next(); { 392 | case r == '"': 393 | l.emit(itemLeftStringQuote) 394 | return lexInQuotedStringParamVal 395 | case r == eof: 396 | return l.errorf("missing comma before data") 397 | case isTokenRune(r): 398 | return lexInParamVal 399 | default: 400 | return l.errorf("invalid character for parameter value") 401 | } 402 | } 403 | 404 | func lexInQuotedStringParamVal(l *lexer) stateFn { 405 | for { 406 | switch r := l.next(); { 407 | case r == eof: 408 | return l.errorf("unclosed quoted string") 409 | case r == '\\': 410 | return lexEscapedChar 411 | case r == '"': 412 | l.backup() 413 | return lexQuotedStringParamVal 414 | case r <= unicode.MaxASCII: 415 | default: 416 | return l.errorf("invalid character for parameter value") 417 | } 418 | } 419 | } 420 | 421 | func lexEscapedChar(l *lexer) stateFn { 422 | switch r := l.next(); { 423 | case r <= unicode.MaxASCII: 424 | return lexInQuotedStringParamVal 425 | case r == eof: 426 | return l.errorf("unexpected eof") 427 | default: 428 | return l.errorf("invalid escaped character") 429 | } 430 | } 431 | 432 | func lexInParamVal(l *lexer) stateFn { 433 | for { 434 | switch r := l.next(); { 435 | case r == paramSemicolon || r == dataComma: 436 | l.backup() 437 | return lexParamVal 438 | case r == eof: 439 | return l.errorf("missing comma before data") 440 | case isTokenRune(r): 441 | default: 442 | return l.errorf("invalid character for parameter value") 443 | } 444 | } 445 | } 446 | 447 | func lexQuotedStringParamVal(l *lexer) stateFn { 448 | if l.pos > l.start { 449 | l.emit(itemParamVal) 450 | } 451 | l.next() 452 | l.emit(itemRightStringQuote) 453 | return lexAfterParamVal 454 | } 455 | 456 | func lexParamVal(l *lexer) stateFn { 457 | if l.pos > l.start { 458 | l.emit(itemParamVal) 459 | } 460 | return lexAfterParamVal 461 | } 462 | 463 | func lexAfterParamVal(l *lexer) stateFn { 464 | switch r := l.next(); { 465 | case r == paramSemicolon: 466 | l.backup() 467 | return lexParamSemicolon 468 | case r == dataComma: 469 | l.backup() 470 | return lexDataComma 471 | case r == eof: 472 | return l.errorf("missing comma before data") 473 | default: 474 | return l.errorf("expected semicolon or comma") 475 | } 476 | } 477 | 478 | func lexDataComma(l *lexer) stateFn { 479 | l.next() 480 | l.emit(itemDataComma) 481 | if l.seenBase64Item { 482 | return lexBase64Data 483 | } 484 | return lexData 485 | } 486 | 487 | func lexData(l *lexer) stateFn { 488 | Loop: 489 | for { 490 | switch r := l.next(); { 491 | case r == eof: 492 | break Loop 493 | case isURLCharRune(r): 494 | default: 495 | return l.errorf("invalid data character") 496 | } 497 | } 498 | if l.pos > l.start { 499 | l.emit(itemData) 500 | } 501 | l.emit(itemEOF) 502 | return nil 503 | } 504 | 505 | func lexBase64Data(l *lexer) stateFn { 506 | Loop: 507 | for { 508 | switch r := l.next(); { 509 | case r == eof: 510 | break Loop 511 | case isBase64Rune(r): 512 | default: 513 | return l.errorf("invalid data character") 514 | } 515 | } 516 | if l.pos > l.start { 517 | l.emit(itemData) 518 | } 519 | l.emit(itemEOF) 520 | return nil 521 | } 522 | -------------------------------------------------------------------------------- /rfc2396.go: -------------------------------------------------------------------------------- 1 | package dataurl 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "strings" 8 | ) 9 | 10 | // Escape implements URL escaping, as defined in RFC 2397 (http://tools.ietf.org/html/rfc2397). 11 | // It differs a bit from net/url's QueryEscape and QueryUnescape, e.g how spaces are treated (+ instead of %20): 12 | // 13 | // Only ASCII chars are allowed. Reserved chars are escaped to their %xx form. 14 | // Unreserved chars are [a-z], [A-Z], [0-9], and -_.!~*\(). 15 | func Escape(data []byte) string { 16 | var buf = new(bytes.Buffer) 17 | for _, b := range data { 18 | switch { 19 | case isUnreserved(b): 20 | buf.WriteByte(b) 21 | default: 22 | fmt.Fprintf(buf, "%%%02X", b) 23 | } 24 | } 25 | return buf.String() 26 | } 27 | 28 | // EscapeString is like Escape, but taking 29 | // a string as argument. 30 | func EscapeString(s string) string { 31 | return Escape([]byte(s)) 32 | } 33 | 34 | // isUnreserved return true 35 | // if the byte c is an unreserved char, 36 | // as defined in RFC 2396. 37 | func isUnreserved(c byte) bool { 38 | return (c >= 'a' && c <= 'z') || 39 | (c >= 'A' && c <= 'Z') || 40 | (c >= '0' && c <= '9') || 41 | c == '-' || 42 | c == '_' || 43 | c == '.' || 44 | c == '!' || 45 | c == '~' || 46 | c == '*' || 47 | c == '\'' || 48 | c == '(' || 49 | c == ')' 50 | } 51 | 52 | func isHex(c byte) bool { 53 | switch { 54 | case c >= 'a' && c <= 'f': 55 | return true 56 | case c >= 'A' && c <= 'F': 57 | return true 58 | case c >= '0' && c <= '9': 59 | return true 60 | } 61 | return false 62 | } 63 | 64 | // borrowed from net/url/url.go 65 | func unhex(c byte) byte { 66 | switch { 67 | case '0' <= c && c <= '9': 68 | return c - '0' 69 | case 'a' <= c && c <= 'f': 70 | return c - 'a' + 10 71 | case 'A' <= c && c <= 'F': 72 | return c - 'A' + 10 73 | } 74 | return 0 75 | } 76 | 77 | // Unescape unescapes a character sequence 78 | // escaped with Escape(String?). 79 | func Unescape(s string) ([]byte, error) { 80 | var buf = new(bytes.Buffer) 81 | reader := strings.NewReader(s) 82 | 83 | for { 84 | r, size, err := reader.ReadRune() 85 | if err == io.EOF { 86 | break 87 | } 88 | if err != nil { 89 | return nil, err 90 | } 91 | if size > 1 { 92 | return nil, fmt.Errorf("rfc2396: non-ASCII char detected") 93 | } 94 | 95 | switch r { 96 | case '%': 97 | eb1, err := reader.ReadByte() 98 | if err == io.EOF { 99 | return nil, fmt.Errorf("rfc2396: unexpected end of unescape sequence") 100 | } 101 | if err != nil { 102 | return nil, err 103 | } 104 | if !isHex(eb1) { 105 | return nil, fmt.Errorf("rfc2396: invalid char 0x%x in unescape sequence", r) 106 | } 107 | eb0, err := reader.ReadByte() 108 | if err == io.EOF { 109 | return nil, fmt.Errorf("rfc2396: unexpected end of unescape sequence") 110 | } 111 | if err != nil { 112 | return nil, err 113 | } 114 | if !isHex(eb0) { 115 | return nil, fmt.Errorf("rfc2396: invalid char 0x%x in unescape sequence", r) 116 | } 117 | buf.WriteByte(unhex(eb0) + unhex(eb1)*16) 118 | default: 119 | buf.WriteByte(byte(r)) 120 | } 121 | } 122 | return buf.Bytes(), nil 123 | } 124 | 125 | // UnescapeToString is like Unescape, but returning 126 | // a string. 127 | func UnescapeToString(s string) (string, error) { 128 | b, err := Unescape(s) 129 | return string(b), err 130 | } 131 | -------------------------------------------------------------------------------- /rfc2396_test.go: -------------------------------------------------------------------------------- 1 | package dataurl 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "testing" 7 | ) 8 | 9 | var tests = []struct { 10 | escaped string 11 | unescaped []byte 12 | }{ 13 | {"A%20brief%20note%0A", []byte("A brief note\n")}, 14 | {"%7B%5B%5Dbyte(%22A%2520brief%2520note%22)%2C%20%5B%5Dbyte(%22A%20brief%20note%22)%7D", []byte(`{[]byte("A%20brief%20note"), []byte("A brief note")}`)}, 15 | } 16 | 17 | func TestEscape(t *testing.T) { 18 | for _, test := range tests { 19 | escaped := Escape(test.unescaped) 20 | if string(escaped) != test.escaped { 21 | t.Errorf("Expected %s, got %s", test.escaped, string(escaped)) 22 | } 23 | } 24 | } 25 | 26 | func TestUnescape(t *testing.T) { 27 | for _, test := range tests { 28 | unescaped, err := Unescape(test.escaped) 29 | if err != nil { 30 | t.Error(err) 31 | continue 32 | } 33 | if !bytes.Equal(unescaped, test.unescaped) { 34 | t.Errorf("Expected %s, got %s", test.unescaped, unescaped) 35 | } 36 | } 37 | } 38 | 39 | func ExampleEscapeString() { 40 | fmt.Println(EscapeString("A brief note")) 41 | // Output: A%20brief%20note 42 | } 43 | 44 | func ExampleEscape() { 45 | fmt.Println(Escape([]byte("A brief note"))) 46 | // Output: A%20brief%20note 47 | } 48 | 49 | func ExampleUnescape() { 50 | data, err := Unescape("A%20brief%20note") 51 | if err != nil { 52 | // can fail e.g if incorrect escaped sequence 53 | fmt.Println(err) 54 | return 55 | } 56 | fmt.Println(string(data)) 57 | // Output: A brief note 58 | } 59 | 60 | func ExampleUnescapeToString() { 61 | s, err := UnescapeToString("A%20brief%20note") 62 | if err != nil { 63 | // can fail e.g if incorrect escaped sequence 64 | fmt.Println(err) 65 | return 66 | } 67 | fmt.Println(s) 68 | // Output: A brief note 69 | } 70 | -------------------------------------------------------------------------------- /wercker.yml: -------------------------------------------------------------------------------- 1 | box: wercker/default --------------------------------------------------------------------------------