├── tmp └── main ├── go.mod ├── LICENSE ├── go.sum └── gqpp.go /tmp/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Phillip-England/gqpp/main/tmp/main -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/phillip-england/gqpp 2 | 3 | go 1.23.3 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.10.0 7 | github.com/phillip-england/purse v1.0.1 8 | ) 9 | 10 | require ( 11 | github.com/andybalholm/cascadia v1.3.2 // indirect 12 | golang.org/x/net v0.29.0 // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Phillip England 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4= 2 | github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4= 3 | github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= 4 | github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= 5 | github.com/phillip-england/purse v1.0.1 h1:YQvG/1Mj7UiyQcgc5sMhh22U2wDmDb8+Fai9YZSYeLk= 6 | github.com/phillip-england/purse v1.0.1/go.mod h1:MeBsQJohgOVLuiEMXd2CBHuS3yFWGoaxVDKYCKL3uCE= 7 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 8 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 9 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 10 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 11 | golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= 12 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 13 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 14 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 15 | golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 16 | golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= 17 | golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= 18 | golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= 19 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 20 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 21 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 22 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 23 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 24 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 25 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 26 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 27 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 28 | golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 29 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 30 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 31 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= 32 | golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= 33 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 34 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 35 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 36 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 37 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 38 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 39 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 40 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 41 | golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= 42 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 43 | -------------------------------------------------------------------------------- /gqpp.go: -------------------------------------------------------------------------------- 1 | package gqpp 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/PuerkitoBio/goquery" 9 | "github.com/phillip-england/purse" 10 | ) 11 | 12 | func NewSelectionFromFilePath(path string) (*goquery.Selection, error) { 13 | f, err := os.ReadFile(path) 14 | if err != nil { 15 | return nil, err 16 | } 17 | fStr := string(f) 18 | doc, err := goquery.NewDocumentFromReader(strings.NewReader(fStr)) 19 | if err != nil { 20 | return nil, err 21 | } 22 | body := doc.Find("body") 23 | return body, nil 24 | } 25 | 26 | func NewSelectionFromStr(htmlStr string) (*goquery.Selection, error) { 27 | sq := strings.ReplaceAll(htmlStr, " ", "") 28 | isHtmlElm := false 29 | if strings.HasPrefix(sq, "%s", tagName, htmlStr, tagName) 52 | } else { 53 | out = fmt.Sprintf("<%s %s>%s", tagName, attrStr, htmlStr, tagName) 54 | } 55 | newSel, err := NewSelectionFromStr(out) 56 | if err != nil { 57 | return nil, err 58 | } 59 | return newSel, nil 60 | } 61 | 62 | func GetAttrStr(selection *goquery.Selection, filter ...string) string { 63 | var attrs []string 64 | filterMap := make(map[string]struct{}) 65 | for _, f := range filter { 66 | filterMap[f] = struct{}{} 67 | } 68 | selection.Each(func(i int, sel *goquery.Selection) { 69 | for _, attr := range sel.Nodes[0].Attr { 70 | if _, found := filterMap[attr.Key]; !found { 71 | attrs = append(attrs, fmt.Sprintf(`%s="%s"`, attr.Key, attr.Val)) 72 | } 73 | } 74 | }) 75 | return strings.Join(attrs, " ") 76 | } 77 | 78 | func NewHtmlFromSelection(s *goquery.Selection) (string, error) { 79 | htmlStr, err := goquery.OuterHtml(s) 80 | if err != nil { 81 | return "", err 82 | } 83 | return purse.Flatten(htmlStr), nil 84 | } 85 | 86 | func ClimbTreeUntil(s *goquery.Selection, cond func(parent *goquery.Selection) bool) error { 87 | parent := s.Parent() 88 | if cond(parent) { 89 | return nil 90 | } 91 | return ClimbTreeUntil(parent, cond) 92 | } 93 | 94 | func AttrFromStr(str string, attrName string) (string, bool, error) { 95 | s, err := NewSelectionFromStr(str) 96 | if err != nil { 97 | return "", false, err 98 | } 99 | out, exists := s.Attr(attrName) 100 | return out, exists, nil 101 | } 102 | 103 | func CalculateNodeDepth(root *goquery.Selection, child *goquery.Selection) (int, error) { 104 | depth := 0 105 | childNodeName := goquery.NodeName(child) 106 | childHtml, err := NewHtmlFromSelection(child) 107 | if err != nil { 108 | return -1, err 109 | } 110 | rootHtml, err := NewHtmlFromSelection(root) 111 | if err != nil { 112 | return -1, err 113 | } 114 | var potErr error 115 | root.Find(childNodeName).Each(func(i int, search *goquery.Selection) { 116 | searchHtml, err := NewHtmlFromSelection(search) 117 | if err != nil { 118 | potErr = err 119 | return 120 | } 121 | if searchHtml == childHtml { 122 | ClimbTreeUntil(search, func(parent *goquery.Selection) bool { 123 | if parent.Length() == 0 { 124 | potErr = fmt.Errorf("child node: %s not found within parent node: %s", childHtml[0:30], rootHtml[0:30]) 125 | } 126 | parentHtml, err := NewHtmlFromSelection(parent) 127 | if err != nil { 128 | potErr = err 129 | return true 130 | } 131 | if parentHtml == rootHtml { 132 | return true 133 | } 134 | depth++ 135 | return false 136 | }) 137 | } 138 | }) 139 | if potErr != nil { 140 | return -1, potErr 141 | } 142 | return depth, nil 143 | } 144 | 145 | func CountMatchingParentTags(root, child *goquery.Selection, tagNames ...string) (int, error) { 146 | count := 0 147 | tagSet := make(map[string]struct{}) 148 | for _, tag := range tagNames { 149 | tagSet[tag] = struct{}{} 150 | } 151 | childHtml, err := NewHtmlFromSelection(child) 152 | if err != nil { 153 | return -1, err 154 | } 155 | found := false 156 | var potentialErr error 157 | root.Find(goquery.NodeName(child)).EachWithBreak(func(i int, search *goquery.Selection) bool { 158 | searchHtml, err := NewHtmlFromSelection(search) 159 | if err != nil { 160 | potentialErr = err 161 | return false 162 | } 163 | if searchHtml == childHtml { 164 | found = true 165 | current := search.Parent() 166 | for current.Length() > 0 { 167 | nodeName := goquery.NodeName(current) 168 | if _, exists := tagSet[nodeName]; exists { 169 | count++ 170 | } 171 | current = current.Parent() 172 | } 173 | return false 174 | } 175 | return true 176 | }) 177 | if potentialErr != nil { 178 | return -1, potentialErr 179 | } 180 | if !found { 181 | return -1, fmt.Errorf("child node not found within the root") 182 | } 183 | return count, nil 184 | } 185 | 186 | func NewHtmlFromSelectionWithNewTag(s *goquery.Selection, newTagName string, newTagAttrStr string) (string, error) { 187 | htmlStr, err := s.Html() 188 | if err != nil { 189 | return "", err 190 | } 191 | openTag := "" 192 | if newTagAttrStr == "" { 193 | openTag = fmt.Sprintf("<%s>", newTagName) 194 | } else { 195 | openTag = fmt.Sprintf("<%s %s>", newTagName, newTagAttrStr) 196 | } 197 | closeTag := fmt.Sprintf("", newTagName) 198 | out := fmt.Sprintf("%s%s%s", openTag, htmlStr, closeTag) 199 | newSel, err := NewSelectionFromStr(out) 200 | if err != nil { 201 | return "", err 202 | } 203 | finalOut, err := NewHtmlFromSelection(newSel) 204 | if err != nil { 205 | return "", err 206 | } 207 | return finalOut, nil 208 | } 209 | 210 | func FindDeepestMatchingSelection(selection *goquery.Selection, selectors ...string) (*goquery.Selection, bool) { 211 | var deepestSelection *goquery.Selection 212 | maxDepth := -1 213 | 214 | for _, selector := range selectors { 215 | found := selection.Find(selector) 216 | if found.Length() > 0 { 217 | for i := 0; i < found.Length(); i++ { 218 | node := found.Eq(i) 219 | 220 | // Calculate the depth of this node within the original selection 221 | depth, err := CalculateNodeDepth(selection, node) 222 | if err != nil { 223 | return nil, false 224 | } 225 | 226 | // Update the deepest node found if this one is deeper 227 | if depth > maxDepth { 228 | maxDepth = depth 229 | deepestSelection = node 230 | } 231 | } 232 | } 233 | } 234 | 235 | if deepestSelection == nil { 236 | return nil, false 237 | } 238 | return deepestSelection, true 239 | } 240 | 241 | func HasMatchingElements(selection *goquery.Selection, selectors ...string) bool { 242 | for _, selector := range selectors { 243 | if selection.Find(selector).Length() > 0 { 244 | return true 245 | } 246 | } 247 | return false 248 | } 249 | 250 | func GetFirstMatchingAttr(selection *goquery.Selection, attrs ...string) string { 251 | for _, attr := range attrs { 252 | if _, exists := selection.Attr(attr); exists { 253 | return attr 254 | } 255 | } 256 | return "" 257 | } 258 | 259 | func GetAttrPart(selection *goquery.Selection, attrName string, part int) (string, error) { 260 | attr, exists := selection.Attr(attrName) 261 | if !exists { 262 | return "", fmt.Errorf("attr: '%s' does not exist when it should", attrName) 263 | } 264 | parts := strings.Split(attr, " ") 265 | if len(parts) < part { 266 | return "", fmt.Errorf("attr: '%s' must contain %d parts", attrName, part) 267 | } 268 | return parts[part], nil 269 | } 270 | 271 | func GetAttr(selection *goquery.Selection, attrName string) (string, error) { 272 | attr, exists := selection.Attr(attrName) 273 | if !exists { 274 | return "", fmt.Errorf("attr: '%s' does not exist", attrName) 275 | } 276 | return attr, nil 277 | } 278 | 279 | func HasAttr(selection *goquery.Selection, attrs ...string) bool { 280 | for _, attr := range attrs { 281 | if _, exists := selection.Attr(attr); exists { 282 | return true 283 | } 284 | } 285 | return false 286 | } 287 | 288 | func HasParentWithAttrs(sel *goquery.Selection, stopAt *goquery.Selection, attrs ...string) bool { 289 | // Create a set of the attribute names for quick lookup 290 | attrSet := make(map[string]struct{}) 291 | for _, attr := range attrs { 292 | attrSet[attr] = struct{}{} 293 | } 294 | 295 | // Traverse up the parent hierarchy 296 | current := sel.Parent() 297 | for current.Length() > 0 { 298 | // Stop if we reach the specified stopAt selection 299 | if current.IsSelection(stopAt) { 300 | break 301 | } 302 | 303 | for _, node := range current.Nodes { 304 | for _, attr := range node.Attr { 305 | if _, found := attrSet[attr.Key]; found { 306 | return true 307 | } 308 | } 309 | } 310 | current = current.Parent() 311 | } 312 | 313 | return false 314 | } 315 | func HasParentWithAttrsIncludingStopAt(sel *goquery.Selection, stopAt *goquery.Selection, attrs ...string) bool { 316 | // Create a set of the attribute names for quick lookup 317 | attrSet := make(map[string]struct{}) 318 | for _, attr := range attrs { 319 | attrSet[attr] = struct{}{} 320 | } 321 | 322 | // Traverse up the parent hierarchy 323 | current := sel.Parent() 324 | for current.Length() > 0 { 325 | // Stop if we reach the specified stopAt selection 326 | if sameNodes(current, stopAt) { 327 | break 328 | } 329 | 330 | // Check attributes on the current node 331 | for _, node := range current.Nodes { 332 | for _, attr := range node.Attr { 333 | if _, found := attrSet[attr.Key]; found { 334 | return true 335 | } 336 | } 337 | } 338 | 339 | // Move to the parent 340 | current = current.Parent() 341 | } 342 | 343 | return false 344 | } 345 | 346 | // Helper function to check if two selections have the same underlying nodes 347 | func sameNodes(a, b *goquery.Selection) bool { 348 | if a.Length() != b.Length() { 349 | return false 350 | } 351 | for i := 0; i < a.Length(); i++ { 352 | if a.Get(i) != b.Get(i) { 353 | return false 354 | } 355 | } 356 | return true 357 | } 358 | 359 | func ForceElementAttr(sel *goquery.Selection, attrToCheck string) (string, error) { 360 | htmlStr, err := NewHtmlFromSelection(sel) 361 | if err != nil { 362 | return "", err 363 | } 364 | attr, exists := sel.Attr(attrToCheck) 365 | if !exists { 366 | return "", fmt.Errorf("element is required to have the '%s' attribute: %s", attrToCheck, htmlStr) 367 | } 368 | return attr, nil 369 | } 370 | 371 | func ForceElementAttrParts(sel *goquery.Selection, attrToCheck string, partsExpected int) ([]string, error) { 372 | htmlStr, err := NewHtmlFromSelection(sel) 373 | if err != nil { 374 | return make([]string, 0), err 375 | } 376 | attr, err := ForceElementAttr(sel, attrToCheck) 377 | if err != nil { 378 | return make([]string, 0), nil 379 | } 380 | parts := strings.Split(attr, " ") 381 | if len(parts) != partsExpected { 382 | return make([]string, 0), fmt.Errorf("attribute '%s' expects %d distinct parts in element: %s", attrToCheck, partsExpected, htmlStr) 383 | } 384 | return parts, nil 385 | } 386 | --------------------------------------------------------------------------------