├── .gitignore
├── LICENSE
├── README.md
├── box.go
├── css.go
├── display.go
├── hello.jpg
├── html.go
├── main.go
├── node.go
└── style.go
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 | .idea
3 | htmlPDF
4 | *swp
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Ivan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Package for create pdf from html
2 |
3 | Implement series article [Let's build a toy browser engine!](https://limpet.net/mbrubeck/2014/08/08/toy-layout-engine-1.html)
4 |
5 | Install:
6 |
7 | go get github.com/janczer/htmlPDF
8 | go get github.com/jung-kurt/gofpdf
9 |
10 | Exemple:
11 | First you need create 3 files. With html tags, css style and Go code:
12 |
13 | `first.html`:
14 |
15 |
29 |
30 | `style.css`:
31 |
32 | * { display: block; padding: 5px; }
33 | .a { background: #ff0000; }
34 | .b { background: #ffa500; }
35 | .c { background: #ffff00; }
36 | .d { background: #008000; }
37 | .e { background: #0000ff; }
38 | .f { background: #4b0082; }
39 | .g { background: #800080; }
40 |
41 |
42 | `main.go`:
43 |
44 | package main
45 |
46 | import "github.com/janczer/htmlPDF"
47 |
48 | func main() {
49 | htmlPDF.Generate("first.html", "style.css", "hello.pdf")
50 | }
51 |
52 | 
53 |
54 | ### Todo:
55 |
56 |
57 | - [ ] Add support Anonymous block
58 | - [ ] Add support Inline block
59 | - [ ] Add use CSS from style tag ``
60 |
61 |
62 |
--------------------------------------------------------------------------------
/box.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import "fmt"
4 |
5 | type Dimensions struct {
6 | content Rect
7 |
8 | padding EdgeSizes
9 | border EdgeSizes
10 | margin EdgeSizes
11 | }
12 |
13 | type Rect struct {
14 | x float64
15 | y float64
16 |
17 | width float64
18 | height float64
19 | }
20 |
21 | type EdgeSizes struct {
22 | left float64
23 | right float64
24 | top float64
25 | bottom float64
26 | }
27 |
28 | type LayoutBox struct {
29 | dimensions Dimensions
30 | box_type interface{} //box_type can be a block node, an inline node, or an anonymous block box
31 | children map[int]*LayoutBox
32 | style StyleNode
33 | }
34 |
35 | func (l LayoutBox) getStyleNode() StyleNode {
36 | switch l.box_type.(type) {
37 | case BlockNode, InlineNode:
38 | return l.style
39 | case AnonymousBlock:
40 | panic("Anonymous block box has no style node")
41 | default:
42 | panic("Type must be BlockNode or InlineNode")
43 | }
44 | }
45 |
46 | type BlockNode struct{}
47 |
48 | type InlineNode struct{}
49 |
50 | type AnonymousBlock struct{}
51 |
52 | func (r Rect) expandedBy(edge EdgeSizes) Rect {
53 | return Rect{
54 | x: r.x - edge.left,
55 | y: r.y - edge.top,
56 | width: r.width + edge.left + edge.right,
57 | height: r.height + edge.top + edge.bottom,
58 | }
59 | }
60 |
61 | func (d Dimensions) paddingBox() Rect {
62 | return d.content.expandedBy(d.padding)
63 | }
64 |
65 | func (d Dimensions) borderBox() Rect {
66 | return d.paddingBox().expandedBy(d.border)
67 | }
68 |
69 | func (d Dimensions) marginBox() Rect {
70 | return d.borderBox().expandedBy(d.margin)
71 | }
72 |
73 | func (d Dimensions) textBox() Rect {
74 | return Rect{
75 | x: d.content.x + d.margin.left + d.padding.left + d.border.left,
76 | y: d.content.y + d.margin.top + d.padding.top + d.border.top,
77 | }
78 | }
79 |
80 | func (s StyleNode) value(name string) Value {
81 | val, ok := s.specified_values[name]
82 | if ok {
83 | return val
84 | }
85 | //Return white color and transparent color
86 | return Value{color: Color{0, 0, 0, 0}}
87 | }
88 |
89 | func (s StyleNode) lookup(first string, second string, end Value) Value {
90 | f, ok := s.specified_values[first]
91 | if ok {
92 | return f
93 | }
94 | f, ok = s.specified_values[second]
95 | if ok {
96 | return f
97 | }
98 | return end
99 | }
100 |
101 | func (s StyleNode) display() string {
102 | val, ok := s.specified_values["display"]
103 | if ok {
104 | return val.keyword
105 | }
106 | return "inline"
107 | }
108 |
109 | func NewLayoutBox(boxType interface{}, style StyleNode) *LayoutBox {
110 | return &LayoutBox{
111 | dimensions: Dimensions{},
112 | box_type: boxType,
113 | children: map[int]*LayoutBox{},
114 | style: style,
115 | }
116 | }
117 |
118 | func (s LayoutBox) print(l int) {
119 | tab(l)
120 | fmt.Printf("dimensions %+v\n", s.dimensions)
121 | tab(l)
122 | fmt.Printf("box type %#v\n", s.box_type)
123 | //tab(l)
124 | //fmt.Printf("style %v\n", s.style)
125 | //tab(l)
126 | //fmt.Printf("childrens: \n")
127 | l++
128 | for i := 0; i < len(s.children); i++ {
129 | s.children[i].print(l + 1)
130 | }
131 | }
132 |
133 | func buildLayoutTree(styleNode StyleNode) *LayoutBox {
134 |
135 | display := styleNode.display()
136 | fmt.Println(display)
137 | var boxType interface{}
138 | switch display {
139 | case "block":
140 | boxType = BlockNode{}
141 | case "inline":
142 | boxType = InlineNode{}
143 | default:
144 | panic("Root node has display: none.")
145 | }
146 | fmt.Println(boxType)
147 |
148 | l := LayoutBox{
149 | box_type: boxType,
150 | children: map[int]*LayoutBox{},
151 | style: styleNode,
152 | }
153 |
154 | for i := 0; i < len(styleNode.children); i++ {
155 | child := styleNode.children[i]
156 | display = child.display()
157 | switch display {
158 | case "block":
159 | childLayoutTree := buildLayoutTree(child)
160 | l.children[len(l.children)] = childLayoutTree
161 | case "inline":
162 | lastContainer := l.getLastContainer()
163 | boxT := lastContainer.box_type
164 | fmt.Printf("last container %#v\n", boxT)
165 | //add anonymous box
166 | switch boxT.(type) {
167 | case AnonymousBlock, InlineNode:
168 | childLayoutTree := buildLayoutTree(child)
169 |
170 | lastContainer.children[len(lastContainer.children)] = childLayoutTree
171 | case BlockNode:
172 | //create AnonymousBlock
173 | anonymous := LayoutBox{
174 | box_type: AnonymousBlock{},
175 | children: map[int]*LayoutBox{},
176 | }
177 | //buildLayoutTree
178 | childLayoutTree := buildLayoutTree(child)
179 | //add to AnonymousBlock
180 | anonymous.children[len(anonymous.children)] = childLayoutTree
181 | //add anonymousBox to child
182 | l.children[len(l.children)] = &anonymous
183 | }
184 | }
185 | }
186 |
187 | return &l
188 | }
189 |
190 | func (l *LayoutBox) getLastContainer() *LayoutBox {
191 | if len(l.children) == 0 {
192 | return l
193 | }
194 |
195 | if len(l.children) == 1 {
196 | return l.children[0]
197 | }
198 |
199 | return l.children[len(l.children)-1]
200 | }
201 |
202 | func (l *LayoutBox) getInlineContainer() *LayoutBox {
203 | boxT := l.box_type
204 | switch boxT.(type) {
205 | case AnonymousBlock, InlineNode:
206 | return l
207 | case BlockNode:
208 | return NewLayoutBox(AnonymousBlock{}, StyleNode{})
209 | default:
210 | return l
211 | }
212 | }
213 |
214 | func layoutTree(node StyleNode, containBlock Dimensions) *LayoutBox {
215 | containBlock.content.height = 0
216 |
217 | rootBox := buildLayoutTree(node)
218 | rootBox.layout(&containBlock)
219 |
220 | return rootBox
221 | }
222 |
223 | func (l *LayoutBox) layout(containBlock *Dimensions) {
224 | switch l.box_type.(type) {
225 | case BlockNode:
226 | l.layoutBox(containBlock)
227 | case InlineNode:
228 | fmt.Println("layout inlinenode")
229 | l.inlineBox(containBlock)
230 | case AnonymousBlock:
231 | fmt.Println("layout anonymous")
232 | l.anonymousBox(containBlock)
233 | default:
234 | }
235 | }
236 |
237 | func (l *LayoutBox) inlineBox(containBlock *Dimensions) {
238 | fmt.Printf("%+v", containBlock.content)
239 |
240 | l.dimensions.content.x = containBlock.content.x
241 | l.dimensions.content.y = containBlock.content.y
242 | //l.dimensions.content.y = containBlock.content.height
243 | d := &l.dimensions
244 |
245 | //calculate box width
246 |
247 | for _, child := range l.children {
248 | child.layout(d)
249 | }
250 | }
251 |
252 | func (l *LayoutBox) anonymousBox(containBlock *Dimensions) {
253 | fmt.Printf("%+v", containBlock.content)
254 | //block position is the same previous
255 | l.dimensions.content.x = containBlock.content.x
256 | l.dimensions.content.y = containBlock.content.y
257 | l.dimensions.content.height = containBlock.content.height
258 | l.dimensions.content.width = containBlock.content.width
259 |
260 | //Recursibely layout the children of this box
261 | l.layoutBlockChildren()
262 | }
263 |
264 | func (l *LayoutBox) layoutBox(containBlock *Dimensions) {
265 | //Child width can depend on parent width, so we need to calculate
266 | //this box's width before laying out its children.
267 | l.calculateBlockWidth(containBlock)
268 |
269 | //Determine where the box is located within its container.
270 | l.calculateBlockPosition(containBlock)
271 |
272 | //Recursibely layout the children of this box
273 | l.layoutBlockChildren()
274 |
275 | //Parent height can depend on child height, so calculateHeight
276 | //must be called after the children are laid out.
277 | l.calculateBlockHeight(containBlock)
278 | }
279 |
280 | func (l *LayoutBox) layoutBlockChildren() {
281 | d := &l.dimensions
282 |
283 | for _, child := range l.children {
284 | child.layout(d)
285 | // Track the height so each child is laid out below the previous content.
286 | d.content.height = d.content.height + child.dimensions.marginBox().height
287 | }
288 | }
289 |
290 | //Calculate the width of a block-level non-replaced element in normal flow
291 | //http://www.w3.org/TR/CSS2/visudet.html#blockwidth
292 | //Sets the horizontal margin/padding/border dimesions, and the 'width'
293 | func (l *LayoutBox) calculateBlockWidth(containBlock *Dimensions) {
294 | style := l.getStyleNode()
295 |
296 | //width has initial value auto
297 | width, ok := style.specified_values["width"]
298 | if !ok {
299 | width = Value{
300 | keyword: "auto",
301 | }
302 | }
303 |
304 | //margin, border, and padding have initial value 0
305 | zero := Value{
306 | length: Length{0.0, "px"},
307 | }
308 |
309 | marginLeft := style.lookup("margin-left", "margin", zero)
310 | marginRight := style.lookup("margin-right", "margin", zero)
311 |
312 | borderLeft := style.lookup("border-left-width", "border-width", zero)
313 | borderRight := style.lookup("border-rigth-width", "border-width", zero)
314 |
315 | paddingLeft := style.lookup("padding-left", "padding", zero)
316 | paddingRight := style.lookup("padding-right", "padding", zero)
317 |
318 | total := GetTotalFrom(marginLeft, marginRight, borderLeft, borderRight, paddingLeft, paddingRight)
319 |
320 | if width.keyword != "auto" && total > containBlock.content.width {
321 | if marginLeft.keyword == "auto" {
322 | marginLeft = Value{length: Length{0, "Px"}}
323 | }
324 | if marginRight.keyword == "auto" {
325 | marginRight = Value{length: Length{0, "Px"}}
326 | }
327 | }
328 |
329 | underflow := containBlock.content.width - total
330 |
331 | widthAuto := width.keyword == "auto"
332 | marginLeftAuto := style.value("margin-left").keyword == "auto"
333 | marginRightAuto := style.value("margin-right").keyword == "auto"
334 | widthLength := width
335 |
336 | //If the values are overconstrained, calculate margin_rigth
337 | if !widthAuto && !marginLeftAuto && !marginRightAuto {
338 | marginRight = Value{length: Length{marginRight.length.value + underflow, "Px"}}
339 | }
340 |
341 | //If execly one size is auto, its used value fallows from the equality
342 | if !widthAuto && !marginLeftAuto && marginRightAuto {
343 | marginRight.length = Length{value: underflow}
344 | }
345 |
346 | if !widthAuto && marginLeftAuto && !marginRightAuto {
347 | marginLeft.length = Length{value: underflow}
348 | }
349 |
350 | if widthAuto {
351 | if marginLeftAuto {
352 | marginLeft = Value{}
353 | }
354 | if marginRightAuto {
355 | marginRight = Value{}
356 | }
357 |
358 | if underflow >= 0 {
359 | //Expand width to fill the underflow
360 | widthLength = Value{length: Length{value: underflow}}
361 | } else {
362 | //Width can't be negative.Adjust the right margin instead
363 | widthLength = Value{}
364 | marginRight = Value{length: Length{marginRight.length.value + underflow, "Px"}}
365 | }
366 | }
367 | //If margin-left and margin-right are both auto, their used values are equal
368 | if !widthAuto && marginLeftAuto && marginRightAuto {
369 | marginLeft.length = Length{value: underflow / 2}
370 | marginRight.length = Length{value: underflow / 2}
371 | }
372 | l.dimensions.content.width = widthLength.length.value
373 |
374 | l.dimensions.padding.left = paddingLeft.length.value
375 | l.dimensions.padding.right = paddingRight.length.value
376 |
377 | l.dimensions.border.left = borderLeft.length.value
378 | l.dimensions.border.right = borderRight.length.value
379 |
380 | l.dimensions.margin.left = marginLeft.length.value
381 | l.dimensions.margin.right = marginRight.length.value
382 | }
383 |
384 | //Finish calculating the block's edge sizes, and position it within its containing block
385 | // http://www.w3.org/TR/CSS2/visudet.html#normal-block
386 | //Sets the vertical margin/padding/border dimensions, and the 'x', 'y' values
387 | func (l *LayoutBox) calculateBlockPosition(containBlock *Dimensions) {
388 | style := l.getStyleNode()
389 |
390 | zero := Value{
391 | length: Length{0.0, "px"},
392 | }
393 |
394 | l.dimensions.margin.top = style.lookup("margin-top", "margin", zero).length.value
395 | l.dimensions.margin.bottom = style.lookup("margin-bottom", "margin", zero).length.value
396 |
397 | l.dimensions.border.top = style.lookup("border-top-width", "border-width", zero).length.value
398 | l.dimensions.border.bottom = style.lookup("border-bottom-width", "border-width", zero).length.value
399 |
400 | l.dimensions.padding.top = style.lookup("padding-top", "padding", zero).length.value
401 | l.dimensions.padding.bottom = style.lookup("padding-bottom", "padding", zero).length.value
402 |
403 | l.dimensions.content.x = containBlock.content.x + l.dimensions.margin.left + l.dimensions.border.left + l.dimensions.padding.left
404 |
405 | l.dimensions.content.y = containBlock.content.height + containBlock.content.y + l.dimensions.margin.top + l.dimensions.border.top + l.dimensions.padding.top
406 |
407 | }
408 |
409 | //Height of a block-level non-replaced element in normal flow with overflow visible
410 | func (l *LayoutBox) calculateBlockHeight(containBlock *Dimensions) {
411 | //If the height is set to an explicit length, use that exact lenght
412 | //Otherwise, just keep the value set by 'layoutBlockChildren'
413 | height := l.getStyleNode().value("height")
414 | if height.length.value != 0 {
415 | l.dimensions.content.height = height.length.value
416 | }
417 | }
418 |
419 | func GetTotalFrom(ml, mr, bl, br, pl, pr Value) float64 {
420 | return ml.length.value + mr.length.value + bl.length.value + br.length.value + pl.length.value + pr.length.value
421 | }
422 |
--------------------------------------------------------------------------------
/css.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import (
4 | "regexp"
5 | "strconv"
6 | )
7 |
8 | type Stylesheet struct {
9 | rules map[int]*Rule
10 | }
11 |
12 | type Rule struct {
13 | selectors map[int]SimpleSelector
14 | declaration map[int]Declaration
15 | }
16 |
17 | type SimpleSelector struct {
18 | tag_name string
19 | id string
20 | class map[int]string
21 | }
22 |
23 | type Specificity struct {
24 | a int
25 | b int
26 | c int
27 | }
28 |
29 | //Calculate specificity
30 | //https://www.w3.org/TR/selectors/#specificity
31 | //cahnge algorithms
32 | func (s SimpleSelector) specificity() Specificity {
33 | var a, b, c int
34 | if len(s.id) > 0 {
35 | a++
36 | }
37 | if len(s.class) > 0 {
38 | b = len(s.class)
39 | }
40 | if len(s.tag_name) > 0 {
41 | c++
42 | }
43 |
44 | return Specificity{a, b, c}
45 | }
46 |
47 | type Declaration struct {
48 | name string
49 | value Value
50 | }
51 |
52 | type Value struct {
53 | keyword string
54 | length Length
55 | color Color
56 | }
57 |
58 | type Length struct {
59 | value float64
60 | unit string //only px
61 | }
62 |
63 | type Color struct {
64 | r uint
65 | g uint
66 | b uint
67 | a uint
68 | }
69 |
70 | type Parser struct {
71 | pos int
72 | input string
73 | }
74 |
75 | func validLengthChar(c string) bool {
76 | var valid = regexp.MustCompile("[0-9.]")
77 | return valid.MatchString(c)
78 | }
79 |
80 | //Parse a whole CSS stylesheet
81 | func CssParser(source string) *Parser {
82 | return &Parser{
83 | pos: 0,
84 | input: source,
85 | }
86 | }
87 |
88 | //Parse a list of rule sets, separated by optional whitespace
89 | func (p *Parser) parseRules() Stylesheet {
90 | rules := map[int]*Rule{}
91 |
92 | for {
93 | p.consumeWhitespace()
94 | if p.eof() {
95 | break
96 | }
97 | rules[len(rules)] = p.parseRule()
98 | }
99 |
100 | return Stylesheet{rules}
101 | }
102 |
103 | //Parse a rule: 'selectors { declarations }'
104 | //declarations it's pair of 'property: value;'
105 | func (p *Parser) parseRule() *Rule {
106 | return &Rule{
107 | selectors: p.parseSelectors(),
108 | declaration: p.parseDeclarations(),
109 | }
110 | }
111 |
112 | //Parse a list of declarations enclosed in '{ ... }'
113 | func (p *Parser) parseDeclarations() map[int]Declaration {
114 | p.consumeChar()
115 | decl := map[int]Declaration{}
116 | for {
117 | p.consumeWhitespace()
118 | if p.nextChar() == "}" {
119 | p.consumeChar()
120 | break
121 | }
122 | decl[len(decl)] = p.parseDeclaration()
123 | }
124 |
125 | return decl
126 | }
127 |
128 | // Parse one declaration pair: 'property: value;'
129 | func (p *Parser) parseDeclaration() Declaration {
130 | name := p.parseIdentifier()
131 | p.consumeWhitespace()
132 | for p.consumeChar() != ":" {
133 | }
134 | p.consumeWhitespace()
135 | value := p.parseValue()
136 | p.consumeWhitespace()
137 | for p.consumeChar() != ";" {
138 | }
139 |
140 | return Declaration{name, value}
141 | }
142 |
143 | //Parse value
144 | func (p *Parser) parseValue() Value {
145 | var valid = regexp.MustCompile("[0-9]")
146 |
147 | switch {
148 | case p.nextChar() == "#":
149 | p.consumeChar()
150 | return p.parseColor()
151 | case valid.MatchString(string(p.nextChar())):
152 | return p.parseLength()
153 | default:
154 | return Value{keyword: p.parseIdentifier()}
155 | }
156 | }
157 |
158 | func (p *Parser) parseLength() Value {
159 | return Value{length: p.parseFloat()}
160 | }
161 |
162 | //Parse value 000px, support only px
163 | func (p *Parser) parseFloat() Length {
164 | var result string
165 | for !p.eof() && validLengthChar(p.nextChar()) {
166 | result += string(p.consumeChar())
167 | }
168 | r, e := strconv.ParseFloat(result, 64)
169 | if e != nil {
170 | return Length{0, "px"}
171 | }
172 | return Length{r, "px"}
173 | }
174 |
175 | //Parse color #000000
176 | func (p *Parser) parseColor() Value {
177 | return Value{
178 | color: Color{
179 | r: p.parseHexPair(),
180 | g: p.parseHexPair(),
181 | b: p.parseHexPair(),
182 | a: 255,
183 | },
184 | }
185 | }
186 |
187 | //Parse two hexadecimal digits
188 | func (p *Parser) parseHexPair() uint {
189 | s := p.input[p.pos : p.pos+2]
190 | p.pos += 2
191 | r, e := strconv.ParseUint(s, 16, 64)
192 | if e != nil {
193 | return 0
194 | }
195 | return uint(r)
196 | }
197 |
198 | //Parse a comma-separated list of selectors
199 | func (p *Parser) parseSelectors() map[int]SimpleSelector {
200 | s := map[int]SimpleSelector{}
201 | Loopsels:
202 | for {
203 | s[len(s)] = p.parseSelector()
204 | p.consumeWhitespace()
205 | switch p.nextChar() {
206 | case ",":
207 | p.consumeChar()
208 | p.consumeWhitespace()
209 | case "{":
210 | break Loopsels
211 | default:
212 | panic("Unexpected character")
213 | }
214 | }
215 |
216 | return s
217 | }
218 |
219 | //Parse one simple selector, e.g.: '#id, class1, class2, class3'
220 | func (p *Parser) parseSelector() SimpleSelector {
221 | m := SimpleSelector{class: map[int]string{}}
222 | Loopsel:
223 | for !p.eof() {
224 | c := p.nextChar()
225 | switch {
226 | case c == "#":
227 | p.consumeChar()
228 | m.id = p.parseIdentifier()
229 | case c == ".":
230 | p.consumeChar()
231 | m.class[len(m.class)] = p.parseIdentifier()
232 | case c == "*":
233 | // universal selector
234 | p.consumeChar()
235 | case validIdentifierChar(c):
236 | m.tag_name = p.parseIdentifier()
237 | default:
238 | break Loopsel
239 | }
240 | p.consumeWhitespace()
241 | }
242 | return m
243 | }
244 |
245 | func validIdentifierChar(c string) bool {
246 | var valid = regexp.MustCompile("[a-zA-Z0-9-_]")
247 | return valid.MatchString(c)
248 | }
249 |
250 | //Parse a property name or keyword
251 | func (p *Parser) parseIdentifier() string {
252 | var valid = regexp.MustCompile("[a-zA-Z0-9-_]")
253 | return p.consumeWhile(func(char string) bool {
254 | return valid.MatchString(char)
255 | })
256 | }
257 |
--------------------------------------------------------------------------------
/display.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import (
4 | "fmt"
5 | "github.com/jung-kurt/gofpdf"
6 | )
7 |
8 | type DisplayCommand struct {
9 | command interface{}
10 | }
11 |
12 | type SolidColor struct {
13 | color Color
14 | rect Rect
15 | }
16 |
17 | type Text struct {
18 | color Color
19 | text string
20 | rect Rect
21 | }
22 |
23 | func (d DisplayCommand) draw(pdf *gofpdf.Fpdf) {
24 | switch command := d.command.(type) {
25 | case SolidColor:
26 | r := command.rect
27 | c := command.color
28 | pdf.SetFillColor(int(c.r), int(c.g), int(c.b))
29 | pdf.Rect(r.x, r.y, r.width, r.height, "F")
30 | case Text:
31 | r := command.rect
32 | c := command.color
33 | t := command.text
34 | pdf.SetTextColor(int(c.r), int(c.g), int(c.b))
35 | pdf.Text(r.x, r.y, t)
36 | }
37 | }
38 |
39 | func buildDisplayList(layoutRoot *LayoutBox) map[int]DisplayCommand {
40 | list := map[int]DisplayCommand{}
41 | renderLayoutBox(layoutRoot, list)
42 | return list
43 | }
44 |
45 | func renderLayoutBox(layoutBox *LayoutBox, list map[int]DisplayCommand) {
46 | //renderBackground
47 | renderBackground(layoutBox, list)
48 |
49 | //renderBorders
50 | renderBorders(layoutBox, list)
51 |
52 | //renderText
53 | renderText(layoutBox, list)
54 |
55 | //Render child
56 | for _, child := range layoutBox.children {
57 | renderLayoutBox(child, list)
58 | }
59 | }
60 |
61 | func renderText(layoutBox *LayoutBox, list map[int]DisplayCommand) {
62 | colorText := getColor(layoutBox, "color")
63 | if colorText == nil {
64 | return
65 | }
66 | text := layoutBox.style.node.node_type.text
67 | if len(text) == 0 {
68 | return
69 | }
70 | fmt.Println(text)
71 |
72 | list[len(list)] = DisplayCommand{
73 | command: Text{
74 | color: *colorText,
75 | text: text,
76 | rect: layoutBox.dimensions.textBox(),
77 | },
78 | }
79 | }
80 |
81 | func renderBackground(layoutBox *LayoutBox, list map[int]DisplayCommand) {
82 | colorBackrgound := getColor(layoutBox, "background")
83 | if colorBackrgound == nil {
84 | return
85 | }
86 | list[len(list)] = DisplayCommand{
87 | command: SolidColor{
88 | color: *colorBackrgound,
89 | rect: layoutBox.dimensions.borderBox(),
90 | },
91 | }
92 | }
93 |
94 | func renderBorders(layoutBox *LayoutBox, list map[int]DisplayCommand) {
95 | colorBorder := getColor(layoutBox, "border-color")
96 | if colorBorder == nil {
97 | return
98 | }
99 | //Return if white
100 | //TODO change create Color with nil
101 | if colorBorder.r == 255 && colorBorder.g == 255 && colorBorder.b == 255 {
102 | return
103 | }
104 |
105 | d := layoutBox.dimensions
106 |
107 | borderBox := d.borderBox()
108 |
109 | // Left border
110 | list[len(list)] = DisplayCommand{
111 | command: SolidColor{
112 | color: *colorBorder,
113 | rect: Rect{
114 | x: borderBox.x,
115 | y: borderBox.y,
116 | width: d.border.left,
117 | height: borderBox.height,
118 | },
119 | },
120 | }
121 |
122 | // Right border
123 | list[len(list)] = DisplayCommand{
124 | command: SolidColor{
125 | color: *colorBorder,
126 | rect: Rect{
127 | x: borderBox.x + borderBox.width - d.border.right,
128 | y: borderBox.y,
129 | width: d.border.right,
130 | height: borderBox.height,
131 | },
132 | },
133 | }
134 |
135 | // Top border
136 | list[len(list)] = DisplayCommand{
137 | command: SolidColor{
138 | color: *colorBorder,
139 | rect: Rect{
140 | x: borderBox.x,
141 | y: borderBox.y,
142 | width: borderBox.width,
143 | height: d.border.top,
144 | },
145 | },
146 | }
147 |
148 | // Bottom border
149 | list[len(list)] = DisplayCommand{
150 | command: SolidColor{
151 | color: *colorBorder,
152 | rect: Rect{
153 | x: borderBox.x,
154 | y: borderBox.y + borderBox.height - d.border.bottom,
155 | width: borderBox.width,
156 | height: d.border.bottom,
157 | },
158 | },
159 | }
160 | }
161 |
162 | //Return the specified color for CSS property name
163 | func getColor(layoutBox *LayoutBox, name string) *Color {
164 | switch layoutBox.box_type.(type) {
165 | case BlockNode, InlineNode:
166 | color := layoutBox.style.value(name).color
167 | return &color
168 | case AnonymousBlock:
169 | return nil
170 | default:
171 | return nil
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/hello.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/janczer/htmlPDF/c2b2b548f7c378bfee96f32528f33986030ad9ac/hello.jpg
--------------------------------------------------------------------------------
/html.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import (
4 | "fmt"
5 | "regexp"
6 | )
7 |
8 | // Parse an HTML document and return the *Node
9 | func ParseHtml(source string) *Node {
10 | p := new(Parser)
11 | p.input = source
12 | p.pos = 0
13 | nodes := p.parseNodes()
14 | if len(nodes) > 1 {
15 | panic("Not one root tag")
16 | }
17 |
18 | //return first node
19 | return nodes[0]
20 | }
21 |
22 | func (p *Parser) parseNodes() map[int]*Node {
23 | nodes := map[int]*Node{}
24 | for {
25 | p.consumeWhitespace()
26 | if p.eof() || p.startWith("") {
27 | break
28 | }
29 | nodes[len(nodes)] = p.parseNode()
30 | }
31 | return nodes
32 | }
33 |
34 | //Parse a single node
35 | func (p *Parser) parseNode() *Node {
36 | if p.nextChar() == "<" {
37 | return p.parseElement()
38 | } else {
39 | return p.parseText()
40 | }
41 | }
42 |
43 | //Parse a single element, including contents(and childrens if exist)
44 | func (p *Parser) parseElement() *Node {
45 | //Opening tag
46 | start := p.consumeChar()
47 | if start != "<" {
48 | panic(fmt.Sprintf("%v was not an openig tag <", start))
49 | }
50 |
51 | tagName := p.parseTagName()
52 | attrs := p.parseAttributes()
53 |
54 | end := p.consumeChar()
55 | if end != ">" {
56 | panic(fmt.Sprintf("%v was not a closing tag <", end))
57 | }
58 |
59 | //Parse children
60 | children := p.parseNodes()
61 |
62 | //Closing tag
63 | start = p.consumeChar()
64 | if start != "<" {
65 | panic(fmt.Sprintf("%v was not an openig tag <", start))
66 | }
67 |
68 | slash := p.consumeChar()
69 | if slash != "/" {
70 | panic(fmt.Sprintf("%v was not a tag /", slash))
71 | }
72 |
73 | closeName := p.parseTagName()
74 | if closeName != tagName {
75 | panic(fmt.Sprintf("open tag %v and close tag %v don't equal ", tagName, closeName))
76 | }
77 |
78 | end = p.consumeChar()
79 | if end != ">" {
80 | panic(fmt.Sprintf("%v was not a closing tag <", end))
81 | }
82 |
83 | return elem(tagName, attrs, children)
84 | }
85 |
86 | //Parse a text node
87 | func (p *Parser) parseText() *Node {
88 | return text(p.consumeWhile(func(char string) bool {
89 | return char != "<"
90 | }))
91 | }
92 |
93 | //Parse a tag or attribute name
94 | func (p *Parser) parseTagName() string {
95 | reg := regexp.MustCompile("[a-zA-Z0-9]")
96 | f := func(char string) bool {
97 | return reg.MatchString(char)
98 | }
99 | return p.consumeWhile(f)
100 | }
101 |
102 | //Parse a list of name="value" pairs
103 | func (p *Parser) parseAttributes() map[string]string {
104 | attr := map[string]string{}
105 |
106 | for {
107 | p.consumeWhitespace()
108 | if p.nextChar() == ">" {
109 | break
110 | }
111 | name, value := p.parseAttribute()
112 | attr[name] = value
113 | }
114 |
115 | return attr
116 | }
117 |
118 | //Parse a single name="value" pair
119 | func (p *Parser) parseAttribute() (string, string) {
120 | name := p.parseTagName()
121 | delimiter := p.consumeChar()
122 |
123 | if delimiter != "=" {
124 | panic(fmt.Sprintf("%v was not =", delimiter))
125 | }
126 | value := p.parseAttributeValue()
127 | return name, value
128 | }
129 |
130 | //Parse a quoted value
131 | func (p *Parser) parseAttributeValue() string {
132 | q := p.consumeChar()
133 | if q != "\"" && q != "'" {
134 | panic(fmt.Sprintf("%v was not \" or '", q))
135 | }
136 |
137 | value := p.consumeWhile(func(char string) bool {
138 | return char != q
139 | })
140 |
141 | cq := p.consumeChar()
142 | if cq != q {
143 | panic(fmt.Sprintf("%v was not %v", cq, q))
144 | }
145 |
146 | return value
147 | }
148 |
149 | //Return true if current input start with the given string
150 | func (p *Parser) startWith(test string) bool {
151 | start := true
152 | for i := 0; i < len(test); i++ {
153 | if p.input[p.pos+i] != test[i] {
154 | start = false
155 | }
156 | }
157 |
158 | return start
159 | }
160 |
161 | //Consume characters until function 'test' returns false
162 | func (p *Parser) consumeWhile(test func(char string) bool) string {
163 | var result string
164 | for {
165 | if p.eof() || !test(p.nextChar()) {
166 | break
167 | }
168 | result += p.consumeChar()
169 | }
170 |
171 | return result
172 | }
173 |
174 | //Consume and discard zero or more whitespace characters
175 | func (p *Parser) consumeWhitespace() {
176 | reg := regexp.MustCompile("[\\s]")
177 | f := func(char string) bool {
178 | return reg.MatchString(char)
179 | }
180 |
181 | p.consumeWhile(f)
182 | }
183 |
184 | //Return the current character with consuming it
185 | func (p *Parser) consumeChar() string {
186 | char := p.input[p.pos]
187 | p.pos++
188 | return string(char)
189 | }
190 |
191 | //Read the current character without consuming it
192 | func (p *Parser) nextChar() string {
193 | return string(p.input[p.pos])
194 | }
195 |
196 | //Return true if all input is consumed
197 | func (p *Parser) eof() bool {
198 | return p.pos >= len(p.input)
199 | }
200 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import (
4 | "fmt"
5 | "github.com/jung-kurt/gofpdf"
6 | "io/ioutil"
7 | )
8 |
9 | //global pointer to pdf
10 | var pdf *gofpdf.Fpdf
11 |
12 | func Generate(html string, css string, out string) {
13 | xmlFile, err := ioutil.ReadFile(html)
14 | if err != nil {
15 | return
16 | }
17 |
18 | //parse html to Node tree
19 | n := ParseHtml(string(xmlFile))
20 | fmt.Println("\x1b[41m\x1b[1mprint Node\x1b[0m")
21 | n.print(0)
22 | fmt.Println("\x1b[41m\x1b[1mend print Node\x1b[0m\n")
23 |
24 | cssFile, err := ioutil.ReadFile(css)
25 | if err != nil {
26 | panic(err)
27 | return
28 | }
29 | cssStyle := string(cssFile)
30 | p2 := CssParser(cssStyle)
31 | stylesheet := p2.parseRules()
32 |
33 | styletree := styleTree(n, &stylesheet)
34 | fmt.Println("\x1b[41m\x1b[1mprint StyleTree\x1b[0m")
35 | styletree.print(0)
36 | fmt.Println("\x1b[41m\x1b[1mend print StyleTree\x1b[0m\n")
37 |
38 | viewport := Dimensions{}
39 | viewport.content.width = 210
40 | viewport.content.height = 600
41 |
42 | layoutTree := layoutTree(styletree, viewport)
43 | fmt.Println("\n\x1b[41m\x1b[1mprint LayoutTree\x1b[0m")
44 | layoutTree.print(0)
45 | fmt.Println("\x1b[41m\x1b[1mend print LayoutTree\x1b[0m")
46 | list := buildDisplayList(layoutTree)
47 | fmt.Println(layoutTree)
48 |
49 | pdf := gofpdf.New("P", "mm", "A4", "")
50 | pdf.AddPage()
51 | pdf.SetFont("Arial", "", 16)
52 | for i := 0; i < len(list); i++ {
53 | list[i].draw(pdf)
54 | }
55 | err = pdf.OutputFileAndClose(out)
56 | if err != nil {
57 | fmt.Println("Error pdf", err)
58 | }
59 | pdf.Close()
60 | }
61 |
62 | func GenerateFromString(html string, css string, out string) {
63 | //parse html to Node tree
64 | n := ParseHtml(string(html))
65 | fmt.Println("\x1b[41m\x1b[1mprint Node\x1b[0m")
66 | n.print(0)
67 | fmt.Println("\x1b[41m\x1b[1mend print Node\x1b[0m\n")
68 |
69 | cssStyle := string(css)
70 | p2 := CssParser(cssStyle)
71 | stylesheet := p2.parseRules()
72 |
73 | styletree := styleTree(n, &stylesheet)
74 | fmt.Println("\x1b[41m\x1b[1mprint StyleTree\x1b[0m")
75 | styletree.print(0)
76 | fmt.Println("\x1b[41m\x1b[1mend print StyleTree\x1b[0m\n")
77 |
78 | viewport := Dimensions{}
79 | viewport.content.width = 210
80 | viewport.content.height = 600
81 |
82 | layoutTree := layoutTree(styletree, viewport)
83 | fmt.Println("\n\x1b[41m\x1b[1mprint LayoutTree\x1b[0m")
84 | layoutTree.print(0)
85 | fmt.Println("\x1b[41m\x1b[1mend print LayoutTree\x1b[0m")
86 | list := buildDisplayList(layoutTree)
87 | fmt.Println(layoutTree)
88 |
89 | pdf := gofpdf.New("P", "mm", "A4", "")
90 | pdf.AddPage()
91 | pdf.SetFont("Arial", "", 16)
92 | for i := 0; i < len(list); i++ {
93 | list[i].draw(pdf)
94 | }
95 | err := pdf.OutputFileAndClose(out)
96 | if err != nil {
97 | fmt.Println("Error pdf", err)
98 | }
99 | pdf.Close()
100 | }
101 |
--------------------------------------------------------------------------------
/node.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | )
7 |
8 | //Struct for Node tree
9 | type Node struct {
10 | children map[int]*Node
11 | node_type NodeType
12 | }
13 |
14 | type NodeType struct {
15 | element ElementData
16 | text string
17 | }
18 |
19 | type ElementData struct {
20 | tag_name string
21 | attr map[string]string
22 | }
23 |
24 | func (e ElementData) id() string {
25 | return e.attr["id"]
26 | }
27 |
28 | func (e ElementData) classes() []string {
29 | class, ok := e.attr["class"]
30 |
31 | if ok {
32 | return strings.Split(class, " ")
33 | }
34 | return []string{}
35 | }
36 |
37 | func tab(i int) {
38 | for j := 0; j < i; j++ {
39 | fmt.Printf(" ")
40 | }
41 | }
42 |
43 | func (n *Node) print(l int) {
44 | tab(l)
45 | l++
46 | fmt.Printf("%s text: %s\n", n.node_type.element.tag_name, n.node_type.text)
47 | for i := 0; i < len(n.children); i++ {
48 | n.children[i].print(l + 1)
49 | }
50 | }
51 |
52 | func text(data string) *Node {
53 | return &Node{
54 | children: map[int]*Node{},
55 | node_type: NodeType{
56 | element: ElementData{attr: map[string]string{}},
57 | text: data,
58 | },
59 | }
60 | }
61 |
62 | func elem(name string, attrs map[string]string, children map[int]*Node) *Node {
63 | return &Node{
64 | children: children,
65 | node_type: NodeType{
66 | element: ElementData{
67 | tag_name: name,
68 | attr: attrs,
69 | },
70 | },
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/style.go:
--------------------------------------------------------------------------------
1 | package htmlPDF
2 |
3 | import (
4 | "fmt"
5 | "sort"
6 | )
7 |
8 | type StyleNode struct {
9 | node *Node
10 | specified_values map[string]Value
11 | children map[int]StyleNode
12 | }
13 |
14 | type MatchedRule struct {
15 | spec Specificity
16 | rule *Rule
17 | }
18 |
19 | //If rule match elem, return a MatchedRule
20 | func matchRule(elem *ElementData, rule *Rule) MatchedRule {
21 | for _, selector := range rule.selectors {
22 | if matchesSelector(elem, selector) {
23 | //Fine the first (highest-specificity) matching selector
24 | mr := MatchedRule{
25 | selector.specificity(),
26 | rule,
27 | }
28 | return mr
29 | }
30 | }
31 |
32 | return MatchedRule{}
33 | }
34 |
35 | func matchesSelector(elem *ElementData, selector SimpleSelector) bool {
36 | //Check type selector
37 | if selector.tag_name != "" && selector.tag_name != elem.tag_name {
38 | return false
39 | }
40 |
41 | //Check id
42 | if selector.id != "" && selector.id != elem.id() {
43 | return false
44 | }
45 |
46 | // Check class selectors
47 | if !elem.classContains(selector.class) {
48 | return false
49 | }
50 |
51 | return true
52 | }
53 |
54 | //Find all CSS rules that match the given element
55 | func matchingRules(elem *ElementData, stylesheet *Stylesheet) map[int]MatchedRule {
56 | matched := map[int]MatchedRule{}
57 |
58 | for i, rule := range stylesheet.rules {
59 | mr := matchRule(elem, rule)
60 | if mr.rule != nil {
61 | matched[i] = mr
62 | }
63 | }
64 | return matched
65 | }
66 |
67 | type SortBySpec map[int]MatchedRule
68 |
69 | func (a SortBySpec) Len() int { return len(a) }
70 | func (a SortBySpec) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
71 | func (a SortBySpec) Less(i, j int) bool { return a[i].spec.a < a[j].spec.a }
72 |
73 | func specifiedValues(elem *ElementData, stylesheet *Stylesheet) map[string]Value {
74 | values := map[string]Value{}
75 | rules := matchingRules(elem, stylesheet)
76 |
77 | //add sort rules
78 | sort.Sort(SortBySpec(rules))
79 |
80 | for _, matchedRule := range rules {
81 | for _, declaration := range matchedRule.rule.declaration {
82 | values[declaration.name] = declaration.value
83 | }
84 | }
85 |
86 | return values
87 | }
88 |
89 | func styleTree(root *Node, stylesheet *Stylesheet) StyleNode {
90 | children := map[int]StyleNode{}
91 | for i, child := range root.children {
92 | children[i] = styleTree(child, stylesheet)
93 | }
94 |
95 | specifiedValue := map[string]Value{}
96 | if root.node_type.element.tag_name != "" {
97 | specifiedValue = specifiedValues(&root.node_type.element, stylesheet)
98 | }
99 |
100 | return StyleNode{
101 | node: root,
102 | specified_values: specifiedValue,
103 | children: children,
104 | }
105 | }
106 |
107 | func (s StyleNode) print(l int) {
108 | tab(l)
109 | fmt.Printf("node %v\n", s.node)
110 | tab(l)
111 | fmt.Printf("specified_values len %d\n", len(s.specified_values))
112 | tab(l)
113 | fmt.Printf("childrens: \n")
114 | l++
115 | for i := 0; i < len(s.children); i++ {
116 | s.children[i].print(l + 1)
117 | }
118 | }
119 |
120 | //Return true if ElementData contain one or more class
121 | func (e ElementData) classContains(class map[int]string) bool {
122 | if len(class) == 0 {
123 | return true
124 | }
125 | for _, class := range class {
126 | for _, eclass := range e.classes() {
127 | if class == eclass {
128 | return true
129 | }
130 | }
131 | }
132 |
133 | return false
134 | }
135 |
--------------------------------------------------------------------------------