├── .gitignore ├── LICENSE ├── README.md ├── box.go ├── css.go ├── display.go ├── hello.jpg ├── html.go ├── main.go ├── node.go └── style.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | .idea 3 | htmlPDF 4 | *swp 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Ivan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Package for create pdf from html 2 | 3 | Implement series article [Let's build a toy browser engine!](https://limpet.net/mbrubeck/2014/08/08/toy-layout-engine-1.html) 4 | 5 | Install: 6 | 7 | go get github.com/janczer/htmlPDF 8 | go get github.com/jung-kurt/gofpdf 9 | 10 | Exemple: 11 | First you need create 3 files. With html tags, css style and Go code: 12 | 13 | `first.html`: 14 | 15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | 30 | `style.css`: 31 | 32 | * { display: block; padding: 5px; } 33 | .a { background: #ff0000; } 34 | .b { background: #ffa500; } 35 | .c { background: #ffff00; } 36 | .d { background: #008000; } 37 | .e { background: #0000ff; } 38 | .f { background: #4b0082; } 39 | .g { background: #800080; } 40 | 41 | 42 | `main.go`: 43 | 44 | package main 45 | 46 | import "github.com/janczer/htmlPDF" 47 | 48 | func main() { 49 | htmlPDF.Generate("first.html", "style.css", "hello.pdf") 50 | } 51 | 52 | ![render](hello.jpg) 53 | 54 | ### Todo: 55 |
56 | 57 | - [ ] Add support Anonymous block 58 | - [ ] Add support Inline block 59 | - [ ] Add use CSS from style tag `` 60 | 61 |
62 | -------------------------------------------------------------------------------- /box.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import "fmt" 4 | 5 | type Dimensions struct { 6 | content Rect 7 | 8 | padding EdgeSizes 9 | border EdgeSizes 10 | margin EdgeSizes 11 | } 12 | 13 | type Rect struct { 14 | x float64 15 | y float64 16 | 17 | width float64 18 | height float64 19 | } 20 | 21 | type EdgeSizes struct { 22 | left float64 23 | right float64 24 | top float64 25 | bottom float64 26 | } 27 | 28 | type LayoutBox struct { 29 | dimensions Dimensions 30 | box_type interface{} //box_type can be a block node, an inline node, or an anonymous block box 31 | children map[int]*LayoutBox 32 | style StyleNode 33 | } 34 | 35 | func (l LayoutBox) getStyleNode() StyleNode { 36 | switch l.box_type.(type) { 37 | case BlockNode, InlineNode: 38 | return l.style 39 | case AnonymousBlock: 40 | panic("Anonymous block box has no style node") 41 | default: 42 | panic("Type must be BlockNode or InlineNode") 43 | } 44 | } 45 | 46 | type BlockNode struct{} 47 | 48 | type InlineNode struct{} 49 | 50 | type AnonymousBlock struct{} 51 | 52 | func (r Rect) expandedBy(edge EdgeSizes) Rect { 53 | return Rect{ 54 | x: r.x - edge.left, 55 | y: r.y - edge.top, 56 | width: r.width + edge.left + edge.right, 57 | height: r.height + edge.top + edge.bottom, 58 | } 59 | } 60 | 61 | func (d Dimensions) paddingBox() Rect { 62 | return d.content.expandedBy(d.padding) 63 | } 64 | 65 | func (d Dimensions) borderBox() Rect { 66 | return d.paddingBox().expandedBy(d.border) 67 | } 68 | 69 | func (d Dimensions) marginBox() Rect { 70 | return d.borderBox().expandedBy(d.margin) 71 | } 72 | 73 | func (d Dimensions) textBox() Rect { 74 | return Rect{ 75 | x: d.content.x + d.margin.left + d.padding.left + d.border.left, 76 | y: d.content.y + d.margin.top + d.padding.top + d.border.top, 77 | } 78 | } 79 | 80 | func (s StyleNode) value(name string) Value { 81 | val, ok := s.specified_values[name] 82 | if ok { 83 | return val 84 | } 85 | //Return white color and transparent color 86 | return Value{color: Color{0, 0, 0, 0}} 87 | } 88 | 89 | func (s StyleNode) lookup(first string, second string, end Value) Value { 90 | f, ok := s.specified_values[first] 91 | if ok { 92 | return f 93 | } 94 | f, ok = s.specified_values[second] 95 | if ok { 96 | return f 97 | } 98 | return end 99 | } 100 | 101 | func (s StyleNode) display() string { 102 | val, ok := s.specified_values["display"] 103 | if ok { 104 | return val.keyword 105 | } 106 | return "inline" 107 | } 108 | 109 | func NewLayoutBox(boxType interface{}, style StyleNode) *LayoutBox { 110 | return &LayoutBox{ 111 | dimensions: Dimensions{}, 112 | box_type: boxType, 113 | children: map[int]*LayoutBox{}, 114 | style: style, 115 | } 116 | } 117 | 118 | func (s LayoutBox) print(l int) { 119 | tab(l) 120 | fmt.Printf("dimensions %+v\n", s.dimensions) 121 | tab(l) 122 | fmt.Printf("box type %#v\n", s.box_type) 123 | //tab(l) 124 | //fmt.Printf("style %v\n", s.style) 125 | //tab(l) 126 | //fmt.Printf("childrens: \n") 127 | l++ 128 | for i := 0; i < len(s.children); i++ { 129 | s.children[i].print(l + 1) 130 | } 131 | } 132 | 133 | func buildLayoutTree(styleNode StyleNode) *LayoutBox { 134 | 135 | display := styleNode.display() 136 | fmt.Println(display) 137 | var boxType interface{} 138 | switch display { 139 | case "block": 140 | boxType = BlockNode{} 141 | case "inline": 142 | boxType = InlineNode{} 143 | default: 144 | panic("Root node has display: none.") 145 | } 146 | fmt.Println(boxType) 147 | 148 | l := LayoutBox{ 149 | box_type: boxType, 150 | children: map[int]*LayoutBox{}, 151 | style: styleNode, 152 | } 153 | 154 | for i := 0; i < len(styleNode.children); i++ { 155 | child := styleNode.children[i] 156 | display = child.display() 157 | switch display { 158 | case "block": 159 | childLayoutTree := buildLayoutTree(child) 160 | l.children[len(l.children)] = childLayoutTree 161 | case "inline": 162 | lastContainer := l.getLastContainer() 163 | boxT := lastContainer.box_type 164 | fmt.Printf("last container %#v\n", boxT) 165 | //add anonymous box 166 | switch boxT.(type) { 167 | case AnonymousBlock, InlineNode: 168 | childLayoutTree := buildLayoutTree(child) 169 | 170 | lastContainer.children[len(lastContainer.children)] = childLayoutTree 171 | case BlockNode: 172 | //create AnonymousBlock 173 | anonymous := LayoutBox{ 174 | box_type: AnonymousBlock{}, 175 | children: map[int]*LayoutBox{}, 176 | } 177 | //buildLayoutTree 178 | childLayoutTree := buildLayoutTree(child) 179 | //add to AnonymousBlock 180 | anonymous.children[len(anonymous.children)] = childLayoutTree 181 | //add anonymousBox to child 182 | l.children[len(l.children)] = &anonymous 183 | } 184 | } 185 | } 186 | 187 | return &l 188 | } 189 | 190 | func (l *LayoutBox) getLastContainer() *LayoutBox { 191 | if len(l.children) == 0 { 192 | return l 193 | } 194 | 195 | if len(l.children) == 1 { 196 | return l.children[0] 197 | } 198 | 199 | return l.children[len(l.children)-1] 200 | } 201 | 202 | func (l *LayoutBox) getInlineContainer() *LayoutBox { 203 | boxT := l.box_type 204 | switch boxT.(type) { 205 | case AnonymousBlock, InlineNode: 206 | return l 207 | case BlockNode: 208 | return NewLayoutBox(AnonymousBlock{}, StyleNode{}) 209 | default: 210 | return l 211 | } 212 | } 213 | 214 | func layoutTree(node StyleNode, containBlock Dimensions) *LayoutBox { 215 | containBlock.content.height = 0 216 | 217 | rootBox := buildLayoutTree(node) 218 | rootBox.layout(&containBlock) 219 | 220 | return rootBox 221 | } 222 | 223 | func (l *LayoutBox) layout(containBlock *Dimensions) { 224 | switch l.box_type.(type) { 225 | case BlockNode: 226 | l.layoutBox(containBlock) 227 | case InlineNode: 228 | fmt.Println("layout inlinenode") 229 | l.inlineBox(containBlock) 230 | case AnonymousBlock: 231 | fmt.Println("layout anonymous") 232 | l.anonymousBox(containBlock) 233 | default: 234 | } 235 | } 236 | 237 | func (l *LayoutBox) inlineBox(containBlock *Dimensions) { 238 | fmt.Printf("%+v", containBlock.content) 239 | 240 | l.dimensions.content.x = containBlock.content.x 241 | l.dimensions.content.y = containBlock.content.y 242 | //l.dimensions.content.y = containBlock.content.height 243 | d := &l.dimensions 244 | 245 | //calculate box width 246 | 247 | for _, child := range l.children { 248 | child.layout(d) 249 | } 250 | } 251 | 252 | func (l *LayoutBox) anonymousBox(containBlock *Dimensions) { 253 | fmt.Printf("%+v", containBlock.content) 254 | //block position is the same previous 255 | l.dimensions.content.x = containBlock.content.x 256 | l.dimensions.content.y = containBlock.content.y 257 | l.dimensions.content.height = containBlock.content.height 258 | l.dimensions.content.width = containBlock.content.width 259 | 260 | //Recursibely layout the children of this box 261 | l.layoutBlockChildren() 262 | } 263 | 264 | func (l *LayoutBox) layoutBox(containBlock *Dimensions) { 265 | //Child width can depend on parent width, so we need to calculate 266 | //this box's width before laying out its children. 267 | l.calculateBlockWidth(containBlock) 268 | 269 | //Determine where the box is located within its container. 270 | l.calculateBlockPosition(containBlock) 271 | 272 | //Recursibely layout the children of this box 273 | l.layoutBlockChildren() 274 | 275 | //Parent height can depend on child height, so calculateHeight 276 | //must be called after the children are laid out. 277 | l.calculateBlockHeight(containBlock) 278 | } 279 | 280 | func (l *LayoutBox) layoutBlockChildren() { 281 | d := &l.dimensions 282 | 283 | for _, child := range l.children { 284 | child.layout(d) 285 | // Track the height so each child is laid out below the previous content. 286 | d.content.height = d.content.height + child.dimensions.marginBox().height 287 | } 288 | } 289 | 290 | //Calculate the width of a block-level non-replaced element in normal flow 291 | //http://www.w3.org/TR/CSS2/visudet.html#blockwidth 292 | //Sets the horizontal margin/padding/border dimesions, and the 'width' 293 | func (l *LayoutBox) calculateBlockWidth(containBlock *Dimensions) { 294 | style := l.getStyleNode() 295 | 296 | //width has initial value auto 297 | width, ok := style.specified_values["width"] 298 | if !ok { 299 | width = Value{ 300 | keyword: "auto", 301 | } 302 | } 303 | 304 | //margin, border, and padding have initial value 0 305 | zero := Value{ 306 | length: Length{0.0, "px"}, 307 | } 308 | 309 | marginLeft := style.lookup("margin-left", "margin", zero) 310 | marginRight := style.lookup("margin-right", "margin", zero) 311 | 312 | borderLeft := style.lookup("border-left-width", "border-width", zero) 313 | borderRight := style.lookup("border-rigth-width", "border-width", zero) 314 | 315 | paddingLeft := style.lookup("padding-left", "padding", zero) 316 | paddingRight := style.lookup("padding-right", "padding", zero) 317 | 318 | total := GetTotalFrom(marginLeft, marginRight, borderLeft, borderRight, paddingLeft, paddingRight) 319 | 320 | if width.keyword != "auto" && total > containBlock.content.width { 321 | if marginLeft.keyword == "auto" { 322 | marginLeft = Value{length: Length{0, "Px"}} 323 | } 324 | if marginRight.keyword == "auto" { 325 | marginRight = Value{length: Length{0, "Px"}} 326 | } 327 | } 328 | 329 | underflow := containBlock.content.width - total 330 | 331 | widthAuto := width.keyword == "auto" 332 | marginLeftAuto := style.value("margin-left").keyword == "auto" 333 | marginRightAuto := style.value("margin-right").keyword == "auto" 334 | widthLength := width 335 | 336 | //If the values are overconstrained, calculate margin_rigth 337 | if !widthAuto && !marginLeftAuto && !marginRightAuto { 338 | marginRight = Value{length: Length{marginRight.length.value + underflow, "Px"}} 339 | } 340 | 341 | //If execly one size is auto, its used value fallows from the equality 342 | if !widthAuto && !marginLeftAuto && marginRightAuto { 343 | marginRight.length = Length{value: underflow} 344 | } 345 | 346 | if !widthAuto && marginLeftAuto && !marginRightAuto { 347 | marginLeft.length = Length{value: underflow} 348 | } 349 | 350 | if widthAuto { 351 | if marginLeftAuto { 352 | marginLeft = Value{} 353 | } 354 | if marginRightAuto { 355 | marginRight = Value{} 356 | } 357 | 358 | if underflow >= 0 { 359 | //Expand width to fill the underflow 360 | widthLength = Value{length: Length{value: underflow}} 361 | } else { 362 | //Width can't be negative.Adjust the right margin instead 363 | widthLength = Value{} 364 | marginRight = Value{length: Length{marginRight.length.value + underflow, "Px"}} 365 | } 366 | } 367 | //If margin-left and margin-right are both auto, their used values are equal 368 | if !widthAuto && marginLeftAuto && marginRightAuto { 369 | marginLeft.length = Length{value: underflow / 2} 370 | marginRight.length = Length{value: underflow / 2} 371 | } 372 | l.dimensions.content.width = widthLength.length.value 373 | 374 | l.dimensions.padding.left = paddingLeft.length.value 375 | l.dimensions.padding.right = paddingRight.length.value 376 | 377 | l.dimensions.border.left = borderLeft.length.value 378 | l.dimensions.border.right = borderRight.length.value 379 | 380 | l.dimensions.margin.left = marginLeft.length.value 381 | l.dimensions.margin.right = marginRight.length.value 382 | } 383 | 384 | //Finish calculating the block's edge sizes, and position it within its containing block 385 | // http://www.w3.org/TR/CSS2/visudet.html#normal-block 386 | //Sets the vertical margin/padding/border dimensions, and the 'x', 'y' values 387 | func (l *LayoutBox) calculateBlockPosition(containBlock *Dimensions) { 388 | style := l.getStyleNode() 389 | 390 | zero := Value{ 391 | length: Length{0.0, "px"}, 392 | } 393 | 394 | l.dimensions.margin.top = style.lookup("margin-top", "margin", zero).length.value 395 | l.dimensions.margin.bottom = style.lookup("margin-bottom", "margin", zero).length.value 396 | 397 | l.dimensions.border.top = style.lookup("border-top-width", "border-width", zero).length.value 398 | l.dimensions.border.bottom = style.lookup("border-bottom-width", "border-width", zero).length.value 399 | 400 | l.dimensions.padding.top = style.lookup("padding-top", "padding", zero).length.value 401 | l.dimensions.padding.bottom = style.lookup("padding-bottom", "padding", zero).length.value 402 | 403 | l.dimensions.content.x = containBlock.content.x + l.dimensions.margin.left + l.dimensions.border.left + l.dimensions.padding.left 404 | 405 | l.dimensions.content.y = containBlock.content.height + containBlock.content.y + l.dimensions.margin.top + l.dimensions.border.top + l.dimensions.padding.top 406 | 407 | } 408 | 409 | //Height of a block-level non-replaced element in normal flow with overflow visible 410 | func (l *LayoutBox) calculateBlockHeight(containBlock *Dimensions) { 411 | //If the height is set to an explicit length, use that exact lenght 412 | //Otherwise, just keep the value set by 'layoutBlockChildren' 413 | height := l.getStyleNode().value("height") 414 | if height.length.value != 0 { 415 | l.dimensions.content.height = height.length.value 416 | } 417 | } 418 | 419 | func GetTotalFrom(ml, mr, bl, br, pl, pr Value) float64 { 420 | return ml.length.value + mr.length.value + bl.length.value + br.length.value + pl.length.value + pr.length.value 421 | } 422 | -------------------------------------------------------------------------------- /css.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import ( 4 | "regexp" 5 | "strconv" 6 | ) 7 | 8 | type Stylesheet struct { 9 | rules map[int]*Rule 10 | } 11 | 12 | type Rule struct { 13 | selectors map[int]SimpleSelector 14 | declaration map[int]Declaration 15 | } 16 | 17 | type SimpleSelector struct { 18 | tag_name string 19 | id string 20 | class map[int]string 21 | } 22 | 23 | type Specificity struct { 24 | a int 25 | b int 26 | c int 27 | } 28 | 29 | //Calculate specificity 30 | //https://www.w3.org/TR/selectors/#specificity 31 | //cahnge algorithms 32 | func (s SimpleSelector) specificity() Specificity { 33 | var a, b, c int 34 | if len(s.id) > 0 { 35 | a++ 36 | } 37 | if len(s.class) > 0 { 38 | b = len(s.class) 39 | } 40 | if len(s.tag_name) > 0 { 41 | c++ 42 | } 43 | 44 | return Specificity{a, b, c} 45 | } 46 | 47 | type Declaration struct { 48 | name string 49 | value Value 50 | } 51 | 52 | type Value struct { 53 | keyword string 54 | length Length 55 | color Color 56 | } 57 | 58 | type Length struct { 59 | value float64 60 | unit string //only px 61 | } 62 | 63 | type Color struct { 64 | r uint 65 | g uint 66 | b uint 67 | a uint 68 | } 69 | 70 | type Parser struct { 71 | pos int 72 | input string 73 | } 74 | 75 | func validLengthChar(c string) bool { 76 | var valid = regexp.MustCompile("[0-9.]") 77 | return valid.MatchString(c) 78 | } 79 | 80 | //Parse a whole CSS stylesheet 81 | func CssParser(source string) *Parser { 82 | return &Parser{ 83 | pos: 0, 84 | input: source, 85 | } 86 | } 87 | 88 | //Parse a list of rule sets, separated by optional whitespace 89 | func (p *Parser) parseRules() Stylesheet { 90 | rules := map[int]*Rule{} 91 | 92 | for { 93 | p.consumeWhitespace() 94 | if p.eof() { 95 | break 96 | } 97 | rules[len(rules)] = p.parseRule() 98 | } 99 | 100 | return Stylesheet{rules} 101 | } 102 | 103 | //Parse a rule: 'selectors { declarations }' 104 | //declarations it's pair of 'property: value;' 105 | func (p *Parser) parseRule() *Rule { 106 | return &Rule{ 107 | selectors: p.parseSelectors(), 108 | declaration: p.parseDeclarations(), 109 | } 110 | } 111 | 112 | //Parse a list of declarations enclosed in '{ ... }' 113 | func (p *Parser) parseDeclarations() map[int]Declaration { 114 | p.consumeChar() 115 | decl := map[int]Declaration{} 116 | for { 117 | p.consumeWhitespace() 118 | if p.nextChar() == "}" { 119 | p.consumeChar() 120 | break 121 | } 122 | decl[len(decl)] = p.parseDeclaration() 123 | } 124 | 125 | return decl 126 | } 127 | 128 | // Parse one declaration pair: 'property: value;' 129 | func (p *Parser) parseDeclaration() Declaration { 130 | name := p.parseIdentifier() 131 | p.consumeWhitespace() 132 | for p.consumeChar() != ":" { 133 | } 134 | p.consumeWhitespace() 135 | value := p.parseValue() 136 | p.consumeWhitespace() 137 | for p.consumeChar() != ";" { 138 | } 139 | 140 | return Declaration{name, value} 141 | } 142 | 143 | //Parse value 144 | func (p *Parser) parseValue() Value { 145 | var valid = regexp.MustCompile("[0-9]") 146 | 147 | switch { 148 | case p.nextChar() == "#": 149 | p.consumeChar() 150 | return p.parseColor() 151 | case valid.MatchString(string(p.nextChar())): 152 | return p.parseLength() 153 | default: 154 | return Value{keyword: p.parseIdentifier()} 155 | } 156 | } 157 | 158 | func (p *Parser) parseLength() Value { 159 | return Value{length: p.parseFloat()} 160 | } 161 | 162 | //Parse value 000px, support only px 163 | func (p *Parser) parseFloat() Length { 164 | var result string 165 | for !p.eof() && validLengthChar(p.nextChar()) { 166 | result += string(p.consumeChar()) 167 | } 168 | r, e := strconv.ParseFloat(result, 64) 169 | if e != nil { 170 | return Length{0, "px"} 171 | } 172 | return Length{r, "px"} 173 | } 174 | 175 | //Parse color #000000 176 | func (p *Parser) parseColor() Value { 177 | return Value{ 178 | color: Color{ 179 | r: p.parseHexPair(), 180 | g: p.parseHexPair(), 181 | b: p.parseHexPair(), 182 | a: 255, 183 | }, 184 | } 185 | } 186 | 187 | //Parse two hexadecimal digits 188 | func (p *Parser) parseHexPair() uint { 189 | s := p.input[p.pos : p.pos+2] 190 | p.pos += 2 191 | r, e := strconv.ParseUint(s, 16, 64) 192 | if e != nil { 193 | return 0 194 | } 195 | return uint(r) 196 | } 197 | 198 | //Parse a comma-separated list of selectors 199 | func (p *Parser) parseSelectors() map[int]SimpleSelector { 200 | s := map[int]SimpleSelector{} 201 | Loopsels: 202 | for { 203 | s[len(s)] = p.parseSelector() 204 | p.consumeWhitespace() 205 | switch p.nextChar() { 206 | case ",": 207 | p.consumeChar() 208 | p.consumeWhitespace() 209 | case "{": 210 | break Loopsels 211 | default: 212 | panic("Unexpected character") 213 | } 214 | } 215 | 216 | return s 217 | } 218 | 219 | //Parse one simple selector, e.g.: '#id, class1, class2, class3' 220 | func (p *Parser) parseSelector() SimpleSelector { 221 | m := SimpleSelector{class: map[int]string{}} 222 | Loopsel: 223 | for !p.eof() { 224 | c := p.nextChar() 225 | switch { 226 | case c == "#": 227 | p.consumeChar() 228 | m.id = p.parseIdentifier() 229 | case c == ".": 230 | p.consumeChar() 231 | m.class[len(m.class)] = p.parseIdentifier() 232 | case c == "*": 233 | // universal selector 234 | p.consumeChar() 235 | case validIdentifierChar(c): 236 | m.tag_name = p.parseIdentifier() 237 | default: 238 | break Loopsel 239 | } 240 | p.consumeWhitespace() 241 | } 242 | return m 243 | } 244 | 245 | func validIdentifierChar(c string) bool { 246 | var valid = regexp.MustCompile("[a-zA-Z0-9-_]") 247 | return valid.MatchString(c) 248 | } 249 | 250 | //Parse a property name or keyword 251 | func (p *Parser) parseIdentifier() string { 252 | var valid = regexp.MustCompile("[a-zA-Z0-9-_]") 253 | return p.consumeWhile(func(char string) bool { 254 | return valid.MatchString(char) 255 | }) 256 | } 257 | -------------------------------------------------------------------------------- /display.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jung-kurt/gofpdf" 6 | ) 7 | 8 | type DisplayCommand struct { 9 | command interface{} 10 | } 11 | 12 | type SolidColor struct { 13 | color Color 14 | rect Rect 15 | } 16 | 17 | type Text struct { 18 | color Color 19 | text string 20 | rect Rect 21 | } 22 | 23 | func (d DisplayCommand) draw(pdf *gofpdf.Fpdf) { 24 | switch command := d.command.(type) { 25 | case SolidColor: 26 | r := command.rect 27 | c := command.color 28 | pdf.SetFillColor(int(c.r), int(c.g), int(c.b)) 29 | pdf.Rect(r.x, r.y, r.width, r.height, "F") 30 | case Text: 31 | r := command.rect 32 | c := command.color 33 | t := command.text 34 | pdf.SetTextColor(int(c.r), int(c.g), int(c.b)) 35 | pdf.Text(r.x, r.y, t) 36 | } 37 | } 38 | 39 | func buildDisplayList(layoutRoot *LayoutBox) map[int]DisplayCommand { 40 | list := map[int]DisplayCommand{} 41 | renderLayoutBox(layoutRoot, list) 42 | return list 43 | } 44 | 45 | func renderLayoutBox(layoutBox *LayoutBox, list map[int]DisplayCommand) { 46 | //renderBackground 47 | renderBackground(layoutBox, list) 48 | 49 | //renderBorders 50 | renderBorders(layoutBox, list) 51 | 52 | //renderText 53 | renderText(layoutBox, list) 54 | 55 | //Render child 56 | for _, child := range layoutBox.children { 57 | renderLayoutBox(child, list) 58 | } 59 | } 60 | 61 | func renderText(layoutBox *LayoutBox, list map[int]DisplayCommand) { 62 | colorText := getColor(layoutBox, "color") 63 | if colorText == nil { 64 | return 65 | } 66 | text := layoutBox.style.node.node_type.text 67 | if len(text) == 0 { 68 | return 69 | } 70 | fmt.Println(text) 71 | 72 | list[len(list)] = DisplayCommand{ 73 | command: Text{ 74 | color: *colorText, 75 | text: text, 76 | rect: layoutBox.dimensions.textBox(), 77 | }, 78 | } 79 | } 80 | 81 | func renderBackground(layoutBox *LayoutBox, list map[int]DisplayCommand) { 82 | colorBackrgound := getColor(layoutBox, "background") 83 | if colorBackrgound == nil { 84 | return 85 | } 86 | list[len(list)] = DisplayCommand{ 87 | command: SolidColor{ 88 | color: *colorBackrgound, 89 | rect: layoutBox.dimensions.borderBox(), 90 | }, 91 | } 92 | } 93 | 94 | func renderBorders(layoutBox *LayoutBox, list map[int]DisplayCommand) { 95 | colorBorder := getColor(layoutBox, "border-color") 96 | if colorBorder == nil { 97 | return 98 | } 99 | //Return if white 100 | //TODO change create Color with nil 101 | if colorBorder.r == 255 && colorBorder.g == 255 && colorBorder.b == 255 { 102 | return 103 | } 104 | 105 | d := layoutBox.dimensions 106 | 107 | borderBox := d.borderBox() 108 | 109 | // Left border 110 | list[len(list)] = DisplayCommand{ 111 | command: SolidColor{ 112 | color: *colorBorder, 113 | rect: Rect{ 114 | x: borderBox.x, 115 | y: borderBox.y, 116 | width: d.border.left, 117 | height: borderBox.height, 118 | }, 119 | }, 120 | } 121 | 122 | // Right border 123 | list[len(list)] = DisplayCommand{ 124 | command: SolidColor{ 125 | color: *colorBorder, 126 | rect: Rect{ 127 | x: borderBox.x + borderBox.width - d.border.right, 128 | y: borderBox.y, 129 | width: d.border.right, 130 | height: borderBox.height, 131 | }, 132 | }, 133 | } 134 | 135 | // Top border 136 | list[len(list)] = DisplayCommand{ 137 | command: SolidColor{ 138 | color: *colorBorder, 139 | rect: Rect{ 140 | x: borderBox.x, 141 | y: borderBox.y, 142 | width: borderBox.width, 143 | height: d.border.top, 144 | }, 145 | }, 146 | } 147 | 148 | // Bottom border 149 | list[len(list)] = DisplayCommand{ 150 | command: SolidColor{ 151 | color: *colorBorder, 152 | rect: Rect{ 153 | x: borderBox.x, 154 | y: borderBox.y + borderBox.height - d.border.bottom, 155 | width: borderBox.width, 156 | height: d.border.bottom, 157 | }, 158 | }, 159 | } 160 | } 161 | 162 | //Return the specified color for CSS property name 163 | func getColor(layoutBox *LayoutBox, name string) *Color { 164 | switch layoutBox.box_type.(type) { 165 | case BlockNode, InlineNode: 166 | color := layoutBox.style.value(name).color 167 | return &color 168 | case AnonymousBlock: 169 | return nil 170 | default: 171 | return nil 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /hello.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janczer/htmlPDF/c2b2b548f7c378bfee96f32528f33986030ad9ac/hello.jpg -------------------------------------------------------------------------------- /html.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | ) 7 | 8 | // Parse an HTML document and return the *Node 9 | func ParseHtml(source string) *Node { 10 | p := new(Parser) 11 | p.input = source 12 | p.pos = 0 13 | nodes := p.parseNodes() 14 | if len(nodes) > 1 { 15 | panic("Not one root tag") 16 | } 17 | 18 | //return first node 19 | return nodes[0] 20 | } 21 | 22 | func (p *Parser) parseNodes() map[int]*Node { 23 | nodes := map[int]*Node{} 24 | for { 25 | p.consumeWhitespace() 26 | if p.eof() || p.startWith("" { 56 | panic(fmt.Sprintf("%v was not a closing tag <", end)) 57 | } 58 | 59 | //Parse children 60 | children := p.parseNodes() 61 | 62 | //Closing tag 63 | start = p.consumeChar() 64 | if start != "<" { 65 | panic(fmt.Sprintf("%v was not an openig tag <", start)) 66 | } 67 | 68 | slash := p.consumeChar() 69 | if slash != "/" { 70 | panic(fmt.Sprintf("%v was not a tag /", slash)) 71 | } 72 | 73 | closeName := p.parseTagName() 74 | if closeName != tagName { 75 | panic(fmt.Sprintf("open tag %v and close tag %v don't equal ", tagName, closeName)) 76 | } 77 | 78 | end = p.consumeChar() 79 | if end != ">" { 80 | panic(fmt.Sprintf("%v was not a closing tag <", end)) 81 | } 82 | 83 | return elem(tagName, attrs, children) 84 | } 85 | 86 | //Parse a text node 87 | func (p *Parser) parseText() *Node { 88 | return text(p.consumeWhile(func(char string) bool { 89 | return char != "<" 90 | })) 91 | } 92 | 93 | //Parse a tag or attribute name 94 | func (p *Parser) parseTagName() string { 95 | reg := regexp.MustCompile("[a-zA-Z0-9]") 96 | f := func(char string) bool { 97 | return reg.MatchString(char) 98 | } 99 | return p.consumeWhile(f) 100 | } 101 | 102 | //Parse a list of name="value" pairs 103 | func (p *Parser) parseAttributes() map[string]string { 104 | attr := map[string]string{} 105 | 106 | for { 107 | p.consumeWhitespace() 108 | if p.nextChar() == ">" { 109 | break 110 | } 111 | name, value := p.parseAttribute() 112 | attr[name] = value 113 | } 114 | 115 | return attr 116 | } 117 | 118 | //Parse a single name="value" pair 119 | func (p *Parser) parseAttribute() (string, string) { 120 | name := p.parseTagName() 121 | delimiter := p.consumeChar() 122 | 123 | if delimiter != "=" { 124 | panic(fmt.Sprintf("%v was not =", delimiter)) 125 | } 126 | value := p.parseAttributeValue() 127 | return name, value 128 | } 129 | 130 | //Parse a quoted value 131 | func (p *Parser) parseAttributeValue() string { 132 | q := p.consumeChar() 133 | if q != "\"" && q != "'" { 134 | panic(fmt.Sprintf("%v was not \" or '", q)) 135 | } 136 | 137 | value := p.consumeWhile(func(char string) bool { 138 | return char != q 139 | }) 140 | 141 | cq := p.consumeChar() 142 | if cq != q { 143 | panic(fmt.Sprintf("%v was not %v", cq, q)) 144 | } 145 | 146 | return value 147 | } 148 | 149 | //Return true if current input start with the given string 150 | func (p *Parser) startWith(test string) bool { 151 | start := true 152 | for i := 0; i < len(test); i++ { 153 | if p.input[p.pos+i] != test[i] { 154 | start = false 155 | } 156 | } 157 | 158 | return start 159 | } 160 | 161 | //Consume characters until function 'test' returns false 162 | func (p *Parser) consumeWhile(test func(char string) bool) string { 163 | var result string 164 | for { 165 | if p.eof() || !test(p.nextChar()) { 166 | break 167 | } 168 | result += p.consumeChar() 169 | } 170 | 171 | return result 172 | } 173 | 174 | //Consume and discard zero or more whitespace characters 175 | func (p *Parser) consumeWhitespace() { 176 | reg := regexp.MustCompile("[\\s]") 177 | f := func(char string) bool { 178 | return reg.MatchString(char) 179 | } 180 | 181 | p.consumeWhile(f) 182 | } 183 | 184 | //Return the current character with consuming it 185 | func (p *Parser) consumeChar() string { 186 | char := p.input[p.pos] 187 | p.pos++ 188 | return string(char) 189 | } 190 | 191 | //Read the current character without consuming it 192 | func (p *Parser) nextChar() string { 193 | return string(p.input[p.pos]) 194 | } 195 | 196 | //Return true if all input is consumed 197 | func (p *Parser) eof() bool { 198 | return p.pos >= len(p.input) 199 | } 200 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jung-kurt/gofpdf" 6 | "io/ioutil" 7 | ) 8 | 9 | //global pointer to pdf 10 | var pdf *gofpdf.Fpdf 11 | 12 | func Generate(html string, css string, out string) { 13 | xmlFile, err := ioutil.ReadFile(html) 14 | if err != nil { 15 | return 16 | } 17 | 18 | //parse html to Node tree 19 | n := ParseHtml(string(xmlFile)) 20 | fmt.Println("\x1b[41m\x1b[1mprint Node\x1b[0m") 21 | n.print(0) 22 | fmt.Println("\x1b[41m\x1b[1mend print Node\x1b[0m\n") 23 | 24 | cssFile, err := ioutil.ReadFile(css) 25 | if err != nil { 26 | panic(err) 27 | return 28 | } 29 | cssStyle := string(cssFile) 30 | p2 := CssParser(cssStyle) 31 | stylesheet := p2.parseRules() 32 | 33 | styletree := styleTree(n, &stylesheet) 34 | fmt.Println("\x1b[41m\x1b[1mprint StyleTree\x1b[0m") 35 | styletree.print(0) 36 | fmt.Println("\x1b[41m\x1b[1mend print StyleTree\x1b[0m\n") 37 | 38 | viewport := Dimensions{} 39 | viewport.content.width = 210 40 | viewport.content.height = 600 41 | 42 | layoutTree := layoutTree(styletree, viewport) 43 | fmt.Println("\n\x1b[41m\x1b[1mprint LayoutTree\x1b[0m") 44 | layoutTree.print(0) 45 | fmt.Println("\x1b[41m\x1b[1mend print LayoutTree\x1b[0m") 46 | list := buildDisplayList(layoutTree) 47 | fmt.Println(layoutTree) 48 | 49 | pdf := gofpdf.New("P", "mm", "A4", "") 50 | pdf.AddPage() 51 | pdf.SetFont("Arial", "", 16) 52 | for i := 0; i < len(list); i++ { 53 | list[i].draw(pdf) 54 | } 55 | err = pdf.OutputFileAndClose(out) 56 | if err != nil { 57 | fmt.Println("Error pdf", err) 58 | } 59 | pdf.Close() 60 | } 61 | 62 | func GenerateFromString(html string, css string, out string) { 63 | //parse html to Node tree 64 | n := ParseHtml(string(html)) 65 | fmt.Println("\x1b[41m\x1b[1mprint Node\x1b[0m") 66 | n.print(0) 67 | fmt.Println("\x1b[41m\x1b[1mend print Node\x1b[0m\n") 68 | 69 | cssStyle := string(css) 70 | p2 := CssParser(cssStyle) 71 | stylesheet := p2.parseRules() 72 | 73 | styletree := styleTree(n, &stylesheet) 74 | fmt.Println("\x1b[41m\x1b[1mprint StyleTree\x1b[0m") 75 | styletree.print(0) 76 | fmt.Println("\x1b[41m\x1b[1mend print StyleTree\x1b[0m\n") 77 | 78 | viewport := Dimensions{} 79 | viewport.content.width = 210 80 | viewport.content.height = 600 81 | 82 | layoutTree := layoutTree(styletree, viewport) 83 | fmt.Println("\n\x1b[41m\x1b[1mprint LayoutTree\x1b[0m") 84 | layoutTree.print(0) 85 | fmt.Println("\x1b[41m\x1b[1mend print LayoutTree\x1b[0m") 86 | list := buildDisplayList(layoutTree) 87 | fmt.Println(layoutTree) 88 | 89 | pdf := gofpdf.New("P", "mm", "A4", "") 90 | pdf.AddPage() 91 | pdf.SetFont("Arial", "", 16) 92 | for i := 0; i < len(list); i++ { 93 | list[i].draw(pdf) 94 | } 95 | err := pdf.OutputFileAndClose(out) 96 | if err != nil { 97 | fmt.Println("Error pdf", err) 98 | } 99 | pdf.Close() 100 | } 101 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | //Struct for Node tree 9 | type Node struct { 10 | children map[int]*Node 11 | node_type NodeType 12 | } 13 | 14 | type NodeType struct { 15 | element ElementData 16 | text string 17 | } 18 | 19 | type ElementData struct { 20 | tag_name string 21 | attr map[string]string 22 | } 23 | 24 | func (e ElementData) id() string { 25 | return e.attr["id"] 26 | } 27 | 28 | func (e ElementData) classes() []string { 29 | class, ok := e.attr["class"] 30 | 31 | if ok { 32 | return strings.Split(class, " ") 33 | } 34 | return []string{} 35 | } 36 | 37 | func tab(i int) { 38 | for j := 0; j < i; j++ { 39 | fmt.Printf(" ") 40 | } 41 | } 42 | 43 | func (n *Node) print(l int) { 44 | tab(l) 45 | l++ 46 | fmt.Printf("%s text: %s\n", n.node_type.element.tag_name, n.node_type.text) 47 | for i := 0; i < len(n.children); i++ { 48 | n.children[i].print(l + 1) 49 | } 50 | } 51 | 52 | func text(data string) *Node { 53 | return &Node{ 54 | children: map[int]*Node{}, 55 | node_type: NodeType{ 56 | element: ElementData{attr: map[string]string{}}, 57 | text: data, 58 | }, 59 | } 60 | } 61 | 62 | func elem(name string, attrs map[string]string, children map[int]*Node) *Node { 63 | return &Node{ 64 | children: children, 65 | node_type: NodeType{ 66 | element: ElementData{ 67 | tag_name: name, 68 | attr: attrs, 69 | }, 70 | }, 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /style.go: -------------------------------------------------------------------------------- 1 | package htmlPDF 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | ) 7 | 8 | type StyleNode struct { 9 | node *Node 10 | specified_values map[string]Value 11 | children map[int]StyleNode 12 | } 13 | 14 | type MatchedRule struct { 15 | spec Specificity 16 | rule *Rule 17 | } 18 | 19 | //If rule match elem, return a MatchedRule 20 | func matchRule(elem *ElementData, rule *Rule) MatchedRule { 21 | for _, selector := range rule.selectors { 22 | if matchesSelector(elem, selector) { 23 | //Fine the first (highest-specificity) matching selector 24 | mr := MatchedRule{ 25 | selector.specificity(), 26 | rule, 27 | } 28 | return mr 29 | } 30 | } 31 | 32 | return MatchedRule{} 33 | } 34 | 35 | func matchesSelector(elem *ElementData, selector SimpleSelector) bool { 36 | //Check type selector 37 | if selector.tag_name != "" && selector.tag_name != elem.tag_name { 38 | return false 39 | } 40 | 41 | //Check id 42 | if selector.id != "" && selector.id != elem.id() { 43 | return false 44 | } 45 | 46 | // Check class selectors 47 | if !elem.classContains(selector.class) { 48 | return false 49 | } 50 | 51 | return true 52 | } 53 | 54 | //Find all CSS rules that match the given element 55 | func matchingRules(elem *ElementData, stylesheet *Stylesheet) map[int]MatchedRule { 56 | matched := map[int]MatchedRule{} 57 | 58 | for i, rule := range stylesheet.rules { 59 | mr := matchRule(elem, rule) 60 | if mr.rule != nil { 61 | matched[i] = mr 62 | } 63 | } 64 | return matched 65 | } 66 | 67 | type SortBySpec map[int]MatchedRule 68 | 69 | func (a SortBySpec) Len() int { return len(a) } 70 | func (a SortBySpec) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 71 | func (a SortBySpec) Less(i, j int) bool { return a[i].spec.a < a[j].spec.a } 72 | 73 | func specifiedValues(elem *ElementData, stylesheet *Stylesheet) map[string]Value { 74 | values := map[string]Value{} 75 | rules := matchingRules(elem, stylesheet) 76 | 77 | //add sort rules 78 | sort.Sort(SortBySpec(rules)) 79 | 80 | for _, matchedRule := range rules { 81 | for _, declaration := range matchedRule.rule.declaration { 82 | values[declaration.name] = declaration.value 83 | } 84 | } 85 | 86 | return values 87 | } 88 | 89 | func styleTree(root *Node, stylesheet *Stylesheet) StyleNode { 90 | children := map[int]StyleNode{} 91 | for i, child := range root.children { 92 | children[i] = styleTree(child, stylesheet) 93 | } 94 | 95 | specifiedValue := map[string]Value{} 96 | if root.node_type.element.tag_name != "" { 97 | specifiedValue = specifiedValues(&root.node_type.element, stylesheet) 98 | } 99 | 100 | return StyleNode{ 101 | node: root, 102 | specified_values: specifiedValue, 103 | children: children, 104 | } 105 | } 106 | 107 | func (s StyleNode) print(l int) { 108 | tab(l) 109 | fmt.Printf("node %v\n", s.node) 110 | tab(l) 111 | fmt.Printf("specified_values len %d\n", len(s.specified_values)) 112 | tab(l) 113 | fmt.Printf("childrens: \n") 114 | l++ 115 | for i := 0; i < len(s.children); i++ { 116 | s.children[i].print(l + 1) 117 | } 118 | } 119 | 120 | //Return true if ElementData contain one or more class 121 | func (e ElementData) classContains(class map[int]string) bool { 122 | if len(class) == 0 { 123 | return true 124 | } 125 | for _, class := range class { 126 | for _, eclass := range e.classes() { 127 | if class == eclass { 128 | return true 129 | } 130 | } 131 | } 132 | 133 | return false 134 | } 135 | --------------------------------------------------------------------------------