├── .gitignore
├── LICENSE
├── README.md
├── examples
└── awesome-movies_2018-11-29_112814.gif
├── movie.go
├── pup
├── display.go
├── parse.go
├── pup.go
└── selector.go
├── run
└── tamilmv.go
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 |
8 | # Test binary, build with `go test -c`
9 | *.test
10 |
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 |
14 |
15 | dist/*
16 | workspace/*
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Anbarasan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # awesome-movies
2 |
3 | awesome-movies is a command line tool for searching movies online and get torrent link. It reads from stdin,
4 | prints to stdout.
5 |
6 | ## Install
7 |
8 | Direct downloads are available through the [releases page](https://github.com/anbuksv/awesome-movies/releases/latest).
9 |
10 | ## Quick start
11 |
12 | ```bash
13 | $ movies the dark knight
14 | ```
15 |
16 | ## Basic Usage
17 |
18 | ```bash
19 | $ movies [flags] '[movie name]'
20 | ```
21 |
22 | ## Examples
23 |
24 | 
25 |
26 | ### Tamil Movies
27 |
28 | ```bash
29 | $ movies -l tamil '[movie name]'
30 | ```
31 |
32 | ## Roadmap
33 |
34 | - [x] yts
35 | - [x] tamilmv
36 |
37 | ## Flags
38 |
39 | Run `movies --help` for a list of further options
40 |
--------------------------------------------------------------------------------
/examples/awesome-movies_2018-11-29_112814.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anbuksv/awesome-movies/55c7aa82553618dbf5ea566e0671c7102f1d5b78/examples/awesome-movies_2018-11-29_112814.gif
--------------------------------------------------------------------------------
/movie.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "strings"
7 | "net/http"
8 | "net/url"
9 | "io/ioutil"
10 | "encoding/json"
11 | "strconv"
12 | "io"
13 | "time"
14 |
15 | "./pup"
16 | "github.com/fatih/color"
17 | )
18 |
19 | type Yts struct {
20 | SearchResults struct {
21 | Status string `json:"status"`
22 | Movies []struct {
23 | Url string `json:"url"`
24 | Img string `json:"img"`
25 | Title string `json:"title"`
26 | Year string `json:"year"`
27 | } `json:"data"`
28 | }
29 |
30 | DownloadResults []struct {
31 | Href string `json:"href"`
32 | Text string `json:"text"`
33 | Title string `json:"Title"`
34 | }
35 | }
36 |
37 | var (
38 | timeout = time.Duration(5 * time.Second)
39 | HttpClient = http.Client{
40 | Timeout: timeout,
41 | }
42 | YTS = Yts{}
43 | MOVIES_VERSION string = "1.0.1"
44 | language string
45 | )
46 |
47 | func UrlEncoded(str string) string {
48 | u, err := url.Parse(str)
49 | if err != nil {
50 | return ""
51 | }
52 | return u.String()
53 | }
54 |
55 | func moviesResultCheck(length int){
56 | if length <= 0 {
57 | color.Set(color.FgMagenta, color.Bold)
58 | fmt.Println("No movies found.")
59 | color.Unset()
60 | os.Exit(0)
61 | }
62 | }
63 |
64 | func main(){
65 | search := ParseFlages()
66 | if strings.ToLower(language) == "tamil"{
67 | SearchTamilMovies(search)
68 | return
69 | }
70 | searchMovie("https://yts.am/ajax/search?query=",search)
71 | moviesResultCheck(len(YTS.SearchResults.Movies))
72 | fmt.Printf("%s",listMovies())
73 | downloadIndex := getConformation(len(YTS.SearchResults.Movies))
74 | torrentHtml := downloadHTML(YTS.SearchResults.Movies[downloadIndex].Url)
75 | os.Args = []string{
76 | "anbuksv",
77 | "#movie-info > p a json{}",
78 | }
79 | nodes := pup.Run(torrentHtml)
80 | json.Unmarshal(nodes,&YTS.DownloadResults)
81 | color.Set(color.FgYellow, color.Bold)
82 | for _,result:= range(YTS.DownloadResults){
83 | color.New(color.FgYellow,color.Bold).Print("("+ result.Text +") ")
84 | color.White(result.Href + "\n")
85 | }
86 | // fmt.Println(YTS.DownloadResults[len(YTS.DownloadResults)-1].Href) //By default high resolution torrent link will be printed
87 | color.Unset()
88 | }
89 |
90 | func ParseFlages() string {
91 | cmds := os.Args[1:]
92 | nonFlagCmds := make([]string, len(cmds))
93 | n := 0
94 | for i := 0; i < len(cmds); i++ {
95 | cmd := cmds[i]
96 | switch cmd {
97 | case "-h","--help":
98 | PrintMoviesHelp(os.Stdout, 0)
99 | case "--version":
100 | fmt.Println(MOVIES_VERSION)
101 | os.Exit(0)
102 | case "-l","--language":
103 | language = cmds[i+1]
104 | i++
105 | default:
106 | nonFlagCmds[n] = cmds[i]
107 | n++
108 | }
109 | }
110 | return strings.Join(nonFlagCmds," ")
111 | }
112 |
113 | func PrintMoviesHelp(w io.Writer, exitCode int) {
114 | helpString := `Usage
115 | movies Name [flags]
116 | Version
117 | %s
118 | Flags
119 | -h --help display this help
120 | --version display version
121 | -l --language movie language
122 | `
123 | fmt.Fprintf(w, helpString, MOVIES_VERSION)
124 | os.Exit(exitCode)
125 | }
126 |
127 | func getConformation(limit int) int {
128 | var downloadIndex int
129 | color.Set(color.FgWhite, color.Bold)
130 | fmt.Print("awesome-movie> ")
131 | color.Unset()
132 | fmt.Scan(&downloadIndex)
133 | downloadIndex = downloadIndex - 1
134 | if downloadIndex < 0 || downloadIndex >= limit{
135 | fmt.Println("awesome-movie> Please enter valid number.")
136 | return getConformation(limit)
137 | }
138 | return downloadIndex
139 | }
140 |
141 | func onHttpError(err error){
142 | color.Set(color.FgRed, color.Bold)
143 | fmt.Println(err)
144 | color.Unset()
145 | os.Exit(0)
146 | }
147 |
148 | func searchMovie(url string, query string) {
149 | resp,err := HttpClient.Get(url+UrlEncoded(query))
150 | if err != nil{
151 | onHttpError(err)
152 | }
153 | defer resp.Body.Close()
154 | body, err := ioutil.ReadAll(resp.Body)
155 | json.Unmarshal([]byte(body),&YTS.SearchResults)
156 | }
157 |
158 | func downloadHTML(url string) io.ReadCloser {
159 | resp,err := HttpClient.Get(url)
160 | if err != nil {
161 | onHttpError(err)
162 | }
163 | return resp.Body
164 | }
165 |
166 | func listMovies() string {
167 | var _movies string = ""
168 | for index,movie := range(YTS.SearchResults.Movies){
169 | _movies = _movies + strconv.Itoa(index+1) + ". " + movie.Title + " (" + movie.Year + ")\n"
170 | }
171 | return _movies
172 | }
173 |
--------------------------------------------------------------------------------
/pup/display.go:
--------------------------------------------------------------------------------
1 | package pup
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "regexp"
7 | "strings"
8 |
9 | "github.com/fatih/color"
10 | colorable "github.com/mattn/go-colorable"
11 | "golang.org/x/net/html"
12 | "golang.org/x/net/html/atom"
13 | )
14 |
15 | func init() {
16 | color.Output = colorable.NewColorableStdout()
17 | }
18 |
19 | type Displayer interface {
20 | Display([]*html.Node)
21 | }
22 |
23 | func ParseDisplayer(cmd string) error {
24 | attrRe := regexp.MustCompile(`attr\{([a-zA-Z\-]+)\}`)
25 | if cmd == "text{}" {
26 | pupDisplayer = TextDisplayer{}
27 | } else if cmd == "json{}" {
28 | pupDisplayer = JSONDisplayer{}
29 | } else if match := attrRe.FindAllStringSubmatch(cmd, -1); len(match) == 1 {
30 | pupDisplayer = AttrDisplayer{
31 | Attr: match[0][1],
32 | }
33 | } else {
34 | return fmt.Errorf("Unknown displayer")
35 | }
36 | return nil
37 | }
38 |
39 | // Is this node a tag with no end tag such as or
?
40 | // http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
41 | func isVoidElement(n *html.Node) bool {
42 | switch n.DataAtom {
43 | case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
44 | atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
45 | atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
46 | return true
47 | }
48 | return false
49 | }
50 |
51 | var (
52 | // Colors
53 | tagColor *color.Color = color.New(color.FgCyan)
54 | tokenColor = color.New(color.FgCyan)
55 | attrKeyColor = color.New(color.FgMagenta)
56 | quoteColor = color.New(color.FgBlue)
57 | commentColor = color.New(color.FgYellow)
58 | )
59 |
60 | type TreeDisplayer struct {
61 | }
62 |
63 | func (t TreeDisplayer) Display(nodes []*html.Node) {
64 | for _, node := range nodes {
65 | t.printNode(node, 0)
66 | }
67 | }
68 |
69 | // The
tag indicates that the text within it should always be formatted 70 | // as is. See https://github.com/ericchiang/pup/issues/33 71 | func (t TreeDisplayer) printPre(n *html.Node) { 72 | switch n.Type { 73 | case html.TextNode: 74 | s := n.Data 75 | if pupEscapeHTML { 76 | // don't escape javascript 77 | if n.Parent == nil || n.Parent.DataAtom != atom.Script { 78 | s = html.EscapeString(s) 79 | } 80 | } 81 | fmt.Print(s) 82 | for c := n.FirstChild; c != nil; c = c.NextSibling { 83 | t.printPre(c) 84 | } 85 | case html.ElementNode: 86 | fmt.Printf("<%s", n.Data) 87 | for _, a := range n.Attr { 88 | val := a.Val 89 | if pupEscapeHTML { 90 | val = html.EscapeString(val) 91 | } 92 | fmt.Printf(` %s="%s"`, a.Key, val) 93 | } 94 | fmt.Print(">") 95 | if !isVoidElement(n) { 96 | for c := n.FirstChild; c != nil; c = c.NextSibling { 97 | t.printPre(c) 98 | } 99 | fmt.Printf("%s>", n.Data) 100 | } 101 | case html.CommentNode: 102 | data := n.Data 103 | if pupEscapeHTML { 104 | data = html.EscapeString(data) 105 | } 106 | fmt.Printf("\n", data) 107 | for c := n.FirstChild; c != nil; c = c.NextSibling { 108 | t.printPre(c) 109 | } 110 | case html.DoctypeNode, html.DocumentNode: 111 | for c := n.FirstChild; c != nil; c = c.NextSibling { 112 | t.printPre(c) 113 | } 114 | } 115 | } 116 | 117 | // Print a node and all of it's children to `maxlevel`. 118 | func (t TreeDisplayer) printNode(n *html.Node, level int) { 119 | switch n.Type { 120 | case html.TextNode: 121 | s := n.Data 122 | if pupEscapeHTML { 123 | // don't escape javascript 124 | if n.Parent == nil || n.Parent.DataAtom != atom.Script { 125 | s = html.EscapeString(s) 126 | } 127 | } 128 | s = strings.TrimSpace(s) 129 | if s != "" { 130 | t.printIndent(level) 131 | fmt.Println(s) 132 | } 133 | case html.ElementNode: 134 | t.printIndent(level) 135 | // TODO: allow pre with color 136 | if n.DataAtom == atom.Pre && !pupPrintColor && pupPreformatted { 137 | t.printPre(n) 138 | fmt.Println() 139 | return 140 | } 141 | if pupPrintColor { 142 | tokenColor.Print("<") 143 | tagColor.Printf("%s", n.Data) 144 | } else { 145 | fmt.Printf("<%s", n.Data) 146 | } 147 | for _, a := range n.Attr { 148 | val := a.Val 149 | if pupEscapeHTML { 150 | val = html.EscapeString(val) 151 | } 152 | if pupPrintColor { 153 | fmt.Print(" ") 154 | attrKeyColor.Printf("%s", a.Key) 155 | tokenColor.Print("=") 156 | quoteColor.Printf(`"%s"`, val) 157 | } else { 158 | fmt.Printf(` %s="%s"`, a.Key, val) 159 | } 160 | } 161 | if pupPrintColor { 162 | tokenColor.Println(">") 163 | } else { 164 | fmt.Println(">") 165 | } 166 | if !isVoidElement(n) { 167 | t.printChildren(n, level+1) 168 | t.printIndent(level) 169 | if pupPrintColor { 170 | tokenColor.Print("") 171 | tagColor.Printf("%s", n.Data) 172 | tokenColor.Println(">") 173 | } else { 174 | fmt.Printf("%s>\n", n.Data) 175 | } 176 | } 177 | case html.CommentNode: 178 | t.printIndent(level) 179 | data := n.Data 180 | if pupEscapeHTML { 181 | data = html.EscapeString(data) 182 | } 183 | if pupPrintColor { 184 | commentColor.Printf("\n", data) 185 | } else { 186 | fmt.Printf("\n", data) 187 | } 188 | t.printChildren(n, level) 189 | case html.DoctypeNode, html.DocumentNode: 190 | t.printChildren(n, level) 191 | } 192 | } 193 | 194 | func (t TreeDisplayer) printChildren(n *html.Node, level int) { 195 | if pupMaxPrintLevel > -1 { 196 | if level >= pupMaxPrintLevel { 197 | t.printIndent(level) 198 | fmt.Println("...") 199 | return 200 | } 201 | } 202 | child := n.FirstChild 203 | for child != nil { 204 | t.printNode(child, level) 205 | child = child.NextSibling 206 | } 207 | } 208 | 209 | func (t TreeDisplayer) printIndent(level int) { 210 | for ; level > 0; level-- { 211 | fmt.Print(pupIndentString) 212 | } 213 | } 214 | 215 | // Print the text of a node 216 | type TextDisplayer struct{} 217 | 218 | func (t TextDisplayer) Display(nodes []*html.Node) { 219 | for _, node := range nodes { 220 | if node.Type == html.TextNode { 221 | data := node.Data 222 | if pupEscapeHTML { 223 | // don't escape javascript 224 | if node.Parent == nil || node.Parent.DataAtom != atom.Script { 225 | data = html.EscapeString(data) 226 | } 227 | } 228 | fmt.Println(data) 229 | } 230 | children := []*html.Node{} 231 | child := node.FirstChild 232 | for child != nil { 233 | children = append(children, child) 234 | child = child.NextSibling 235 | } 236 | t.Display(children) 237 | } 238 | } 239 | 240 | // Print the attribute of a node 241 | type AttrDisplayer struct { 242 | Attr string 243 | } 244 | 245 | func (a AttrDisplayer) Display(nodes []*html.Node) { 246 | for _, node := range nodes { 247 | attributes := node.Attr 248 | for _, attr := range attributes { 249 | if attr.Key == a.Attr { 250 | val := attr.Val 251 | if pupEscapeHTML { 252 | val = html.EscapeString(val) 253 | } 254 | fmt.Printf("%s\n", val) 255 | } 256 | } 257 | } 258 | } 259 | 260 | // Print nodes as a JSON list 261 | type JSONDisplayer struct{} 262 | 263 | // returns a jsonifiable struct 264 | func jsonify(node *html.Node) map[string]interface{} { 265 | vals := map[string]interface{}{} 266 | if len(node.Attr) > 0 { 267 | for _, attr := range node.Attr { 268 | if pupEscapeHTML { 269 | vals[attr.Key] = html.EscapeString(attr.Val) 270 | } else { 271 | vals[attr.Key] = attr.Val 272 | } 273 | } 274 | } 275 | vals["tag"] = node.DataAtom.String() 276 | children := []interface{}{} 277 | for child := node.FirstChild; child != nil; child = child.NextSibling { 278 | switch child.Type { 279 | case html.ElementNode: 280 | children = append(children, jsonify(child)) 281 | case html.TextNode: 282 | text := strings.TrimSpace(child.Data) 283 | if text != "" { 284 | if pupEscapeHTML { 285 | // don't escape javascript 286 | if node.DataAtom != atom.Script { 287 | text = html.EscapeString(text) 288 | } 289 | } 290 | // if there is already text we'll append it 291 | currText, ok := vals["text"] 292 | if ok { 293 | text = fmt.Sprintf("%s %s", currText, text) 294 | } 295 | vals["text"] = text 296 | } 297 | case html.CommentNode: 298 | comment := strings.TrimSpace(child.Data) 299 | if pupEscapeHTML { 300 | comment = html.EscapeString(comment) 301 | } 302 | currComment, ok := vals["comment"] 303 | if ok { 304 | comment = fmt.Sprintf("%s %s", currComment, comment) 305 | } 306 | vals["comment"] = comment 307 | } 308 | } 309 | if len(children) > 0 { 310 | vals["children"] = children 311 | } 312 | return vals 313 | } 314 | 315 | func (j JSONDisplayer) Display(nodes []*html.Node) { 316 | var data []byte 317 | var err error 318 | jsonNodes := []map[string]interface{}{} 319 | for _, node := range nodes { 320 | jsonNodes = append(jsonNodes, jsonify(node)) 321 | } 322 | data, err = json.MarshalIndent(&jsonNodes, "", pupIndentString) 323 | if err != nil { 324 | panic("Could not jsonify nodes") 325 | } 326 | fmt.Printf("%s\n", data) 327 | } 328 | 329 | func GetJSONByte(nodes []*html.Node) []byte{ 330 | var data []byte 331 | var err error 332 | jsonNodes := []map[string]interface{}{} 333 | for _, node := range nodes { 334 | jsonNodes = append(jsonNodes, jsonify(node)) 335 | } 336 | data, err = json.MarshalIndent(&jsonNodes, "", pupIndentString) 337 | if err != nil { 338 | panic("Could not jsonify nodes") 339 | } 340 | // fmt.Printf("%s\n", data) 341 | return data 342 | } 343 | 344 | // Print the number of features returned 345 | type NumDisplayer struct{} 346 | 347 | func (d NumDisplayer) Display(nodes []*html.Node) { 348 | fmt.Println(len(nodes)) 349 | } 350 | -------------------------------------------------------------------------------- /pup/parse.go: -------------------------------------------------------------------------------- 1 | package pup 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "strconv" 8 | "strings" 9 | 10 | "golang.org/x/net/html" 11 | "golang.org/x/net/html/charset" 12 | "golang.org/x/text/transform" 13 | ) 14 | 15 | var ( 16 | pupIn io.ReadCloser = os.Stdin 17 | pupCharset string = "" 18 | pupMaxPrintLevel int = -1 19 | pupPreformatted bool = false 20 | pupPrintColor bool = false 21 | pupEscapeHTML bool = true 22 | pupIndentString string = " " 23 | pupDisplayer Displayer = TreeDisplayer{} 24 | ) 25 | 26 | // Parse the html while handling the charset 27 | func ParseHTML(r io.Reader, cs string) (*html.Node, error) { 28 | var err error 29 | if cs == "" { 30 | // attempt to guess the charset of the HTML document 31 | r, err = charset.NewReader(r, "") 32 | if err != nil { 33 | return nil, err 34 | } 35 | } else { 36 | // let the user specify the charset 37 | e, name := charset.Lookup(cs) 38 | if name == "" { 39 | return nil, fmt.Errorf("'%s' is not a valid charset", cs) 40 | } 41 | r = transform.NewReader(r, e.NewDecoder()) 42 | } 43 | return html.Parse(r) 44 | } 45 | 46 | func PrintHelp(w io.Writer, exitCode int) { 47 | helpString := `Usage 48 | pup [flags] [selectors] [optional display function] 49 | Version 50 | %s 51 | Flags 52 | -c --color print result with color 53 | -f --file file to read from 54 | -h --help display this help 55 | -i --indent number of spaces to use for indent or character 56 | -n --number print number of elements selected 57 | -l --limit restrict number of levels printed 58 | -p --plain don't escape html 59 | --pre preserve preformatted text 60 | --charset specify the charset for pup to use 61 | --version display version 62 | ` 63 | fmt.Fprintf(w, helpString, VERSION) 64 | os.Exit(exitCode) 65 | } 66 | 67 | func ParseArgs() ([]string, error) { 68 | cmds, err := ProcessFlags(os.Args[1:]) 69 | if err != nil { 70 | return []string{}, err 71 | } 72 | return ParseCommands(strings.Join(cmds, " ")) 73 | } 74 | 75 | // Process command arguments and return all non-flags. 76 | func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) { 77 | var i int 78 | defer func() { 79 | if r := recover(); r != nil { 80 | err = fmt.Errorf("Option '%s' requires an argument", cmds[i]) 81 | } 82 | }() 83 | nonFlagCmds = make([]string, len(cmds)) 84 | n := 0 85 | for i = 0; i < len(cmds); i++ { 86 | cmd := cmds[i] 87 | switch cmd { 88 | case "-c", "--color": 89 | pupPrintColor = true 90 | case "-p", "--plain": 91 | pupEscapeHTML = false 92 | case "--pre": 93 | pupPreformatted = true 94 | case "-f", "--file": 95 | filename := cmds[i+1] 96 | pupIn, err = os.Open(filename) 97 | if err != nil { 98 | fmt.Fprintf(os.Stderr, "%s\n", err.Error()) 99 | os.Exit(2) 100 | } 101 | i++ 102 | case "-h", "--help": 103 | PrintHelp(os.Stdout, 0) 104 | case "-i", "--indent": 105 | indentLevel, err := strconv.Atoi(cmds[i+1]) 106 | if err == nil { 107 | pupIndentString = strings.Repeat(" ", indentLevel) 108 | } else { 109 | pupIndentString = cmds[i+1] 110 | } 111 | i++ 112 | case "-l", "--limit": 113 | pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1]) 114 | if err != nil { 115 | return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd) 116 | } 117 | i++ 118 | case "--charset": 119 | pupCharset = cmds[i+1] 120 | i++ 121 | case "--version": 122 | fmt.Println(VERSION) 123 | os.Exit(0) 124 | case "-n", "--number": 125 | pupDisplayer = NumDisplayer{} 126 | default: 127 | if cmd[0] == '-' { 128 | return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd) 129 | } 130 | nonFlagCmds[n] = cmds[i] 131 | n++ 132 | } 133 | } 134 | return nonFlagCmds[:n], nil 135 | } 136 | 137 | // Split a string with awareness for quoted text and commas 138 | func ParseCommands(cmdString string) ([]string, error) { 139 | cmds := []string{} 140 | last, next, max := 0, 0, len(cmdString) 141 | for { 142 | // if we're at the end of the string, return 143 | if next == max { 144 | if next > last { 145 | cmds = append(cmds, cmdString[last:next]) 146 | } 147 | return cmds, nil 148 | } 149 | // evaluate a rune 150 | c := cmdString[next] 151 | switch c { 152 | case ' ': 153 | if next > last { 154 | cmds = append(cmds, cmdString[last:next]) 155 | } 156 | last = next + 1 157 | case ',': 158 | if next > last { 159 | cmds = append(cmds, cmdString[last:next]) 160 | } 161 | cmds = append(cmds, ",") 162 | last = next + 1 163 | case '\'', '"': 164 | // for quotes, consume runes until the quote has ended 165 | quoteChar := c 166 | for { 167 | next++ 168 | if next == max { 169 | return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar) 170 | } 171 | if cmdString[next] == '\\' { 172 | next++ 173 | if next == max { 174 | return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar) 175 | } 176 | } else if cmdString[next] == quoteChar { 177 | break 178 | } 179 | } 180 | } 181 | next++ 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /pup/pup.go: -------------------------------------------------------------------------------- 1 | package pup 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "io" 7 | "golang.org/x/net/html" 8 | ) 9 | 10 | // _=,_ 11 | // o_/6 /#\ 12 | // \__ |##/ 13 | // ='|--\ 14 | // / #'-. 15 | // \#|_ _'-. / 16 | // |/ \_( # |" 17 | // C/ ,--___/ 18 | 19 | var VERSION string = "0.4.0" 20 | 21 | func Run(_pupIn io.ReadCloser) []byte { 22 | // process flags and arguments 23 | cmds, err := ParseArgs() 24 | if err != nil { 25 | fmt.Fprintf(os.Stderr, "%s\n", err.Error()) 26 | os.Exit(2) 27 | } 28 | 29 | pupIn = _pupIn 30 | // Parse the input and get the root node 31 | root, err := ParseHTML(pupIn, pupCharset) 32 | if err != nil { 33 | fmt.Fprintf(os.Stderr, "%s\n", err.Error()) 34 | os.Exit(2) 35 | } 36 | pupIn.Close() 37 | 38 | // Parse the selectors 39 | selectorFuncs := []SelectorFunc{} 40 | funcGenerator := Select 41 | var cmd string 42 | for len(cmds) > 0 { 43 | cmd, cmds = cmds[0], cmds[1:] 44 | if len(cmds) == 0 { 45 | if err := ParseDisplayer(cmd); err == nil { 46 | continue 47 | } 48 | } 49 | switch cmd { 50 | case "*": // select all 51 | continue 52 | case ">": 53 | funcGenerator = SelectFromChildren 54 | case "+": 55 | funcGenerator = SelectNextSibling 56 | case ",": // nil will signify a comma 57 | selectorFuncs = append(selectorFuncs, nil) 58 | default: 59 | selector, err := ParseSelector(cmd) 60 | if err != nil { 61 | fmt.Fprintf(os.Stderr, "Selector parsing error: %s\n", err.Error()) 62 | os.Exit(2) 63 | } 64 | selectorFuncs = append(selectorFuncs, funcGenerator(selector)) 65 | funcGenerator = Select 66 | } 67 | } 68 | 69 | selectedNodes := []*html.Node{} 70 | currNodes := []*html.Node{root} 71 | for _, selectorFunc := range selectorFuncs { 72 | if selectorFunc == nil { // hit a comma 73 | selectedNodes = append(selectedNodes, currNodes...) 74 | currNodes = []*html.Node{root} 75 | } else { 76 | currNodes = selectorFunc(currNodes) 77 | } 78 | } 79 | selectedNodes = append(selectedNodes, currNodes...) 80 | // fmt.Println("Selected Node: ",&selectedNodes) 81 | // pupDisplayer.Display(selectedNodes) 82 | return GetJSONByte(selectedNodes) 83 | // fmt.Println("results",results) 84 | } 85 | -------------------------------------------------------------------------------- /pup/selector.go: -------------------------------------------------------------------------------- 1 | package pup 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "regexp" 7 | "strconv" 8 | "strings" 9 | "text/scanner" 10 | 11 | "golang.org/x/net/html" 12 | ) 13 | 14 | type Selector interface { 15 | Match(node *html.Node) bool 16 | } 17 | 18 | type SelectorFunc func(nodes []*html.Node) []*html.Node 19 | 20 | func Select(s Selector) SelectorFunc { 21 | // have to define first to be able to do recursion 22 | var selectChildren func(node *html.Node) []*html.Node 23 | selectChildren = func(node *html.Node) []*html.Node { 24 | selected := []*html.Node{} 25 | for child := node.FirstChild; child != nil; child = child.NextSibling { 26 | if s.Match(child) { 27 | selected = append(selected, child) 28 | } else { 29 | selected = append(selected, selectChildren(child)...) 30 | } 31 | } 32 | return selected 33 | } 34 | return func(nodes []*html.Node) []*html.Node { 35 | selected := []*html.Node{} 36 | for _, node := range nodes { 37 | selected = append(selected, selectChildren(node)...) 38 | } 39 | return selected 40 | } 41 | } 42 | 43 | // Defined for the '>' selector 44 | func SelectNextSibling(s Selector) SelectorFunc { 45 | return func(nodes []*html.Node) []*html.Node { 46 | selected := []*html.Node{} 47 | for _, node := range nodes { 48 | for ns := node.NextSibling; ns != nil; ns = ns.NextSibling { 49 | if ns.Type == html.ElementNode { 50 | if s.Match(ns) { 51 | selected = append(selected, ns) 52 | } 53 | break 54 | } 55 | } 56 | } 57 | return selected 58 | } 59 | } 60 | 61 | // Defined for the '+' selector 62 | func SelectFromChildren(s Selector) SelectorFunc { 63 | return func(nodes []*html.Node) []*html.Node { 64 | selected := []*html.Node{} 65 | for _, node := range nodes { 66 | for c := node.FirstChild; c != nil; c = c.NextSibling { 67 | if s.Match(c) { 68 | selected = append(selected, c) 69 | } 70 | } 71 | } 72 | return selected 73 | } 74 | } 75 | 76 | type PseudoClass func(*html.Node) bool 77 | 78 | type CSSSelector struct { 79 | Tag string 80 | Attrs map[string]*regexp.Regexp 81 | Pseudo PseudoClass 82 | } 83 | 84 | func (s CSSSelector) Match(node *html.Node) bool { 85 | if node.Type != html.ElementNode { 86 | return false 87 | } 88 | if s.Tag != "" { 89 | if s.Tag != node.DataAtom.String() { 90 | return false 91 | } 92 | } 93 | for attrKey, matcher := range s.Attrs { 94 | matched := false 95 | for _, attr := range node.Attr { 96 | if attrKey == attr.Key { 97 | if !matcher.MatchString(attr.Val) { 98 | return false 99 | } 100 | matched = true 101 | break 102 | } 103 | } 104 | if !matched { 105 | return false 106 | } 107 | } 108 | if s.Pseudo == nil { 109 | return true 110 | } 111 | return s.Pseudo(node) 112 | } 113 | 114 | // Parse a selector 115 | // e.g. `div#my-button.btn[href^="http"]` 116 | func ParseSelector(cmd string) (selector CSSSelector, err error) { 117 | selector = CSSSelector{ 118 | Tag: "", 119 | Attrs: map[string]*regexp.Regexp{}, 120 | Pseudo: nil, 121 | } 122 | var s scanner.Scanner 123 | s.Init(strings.NewReader(cmd)) 124 | err = ParseTagMatcher(&selector, s) 125 | return 126 | } 127 | 128 | // Parse the initial tag 129 | // e.g. `div` 130 | func ParseTagMatcher(selector *CSSSelector, s scanner.Scanner) error { 131 | tag := bytes.NewBuffer([]byte{}) 132 | defer func() { 133 | selector.Tag = tag.String() 134 | }() 135 | for { 136 | c := s.Next() 137 | switch c { 138 | case scanner.EOF: 139 | return nil 140 | case '.': 141 | return ParseClassMatcher(selector, s) 142 | case '#': 143 | return ParseIdMatcher(selector, s) 144 | case '[': 145 | return ParseAttrMatcher(selector, s) 146 | case ':': 147 | return ParsePseudo(selector, s) 148 | default: 149 | if _, err := tag.WriteRune(c); err != nil { 150 | return err 151 | } 152 | } 153 | } 154 | } 155 | 156 | // Parse a class matcher 157 | // e.g. `.btn` 158 | func ParseClassMatcher(selector *CSSSelector, s scanner.Scanner) error { 159 | var class bytes.Buffer 160 | defer func() { 161 | regexpStr := `(\A|\s)` + regexp.QuoteMeta(class.String()) + `(\s|\z)` 162 | selector.Attrs["class"] = regexp.MustCompile(regexpStr) 163 | }() 164 | for { 165 | c := s.Next() 166 | switch c { 167 | case scanner.EOF: 168 | return nil 169 | case '.': 170 | return ParseClassMatcher(selector, s) 171 | case '#': 172 | return ParseIdMatcher(selector, s) 173 | case '[': 174 | return ParseAttrMatcher(selector, s) 175 | case ':': 176 | return ParsePseudo(selector, s) 177 | default: 178 | if _, err := class.WriteRune(c); err != nil { 179 | return err 180 | } 181 | } 182 | } 183 | } 184 | 185 | // Parse an id matcher 186 | // e.g. `#my-picture` 187 | func ParseIdMatcher(selector *CSSSelector, s scanner.Scanner) error { 188 | var id bytes.Buffer 189 | defer func() { 190 | regexpStr := `^` + regexp.QuoteMeta(id.String()) + `$` 191 | selector.Attrs["id"] = regexp.MustCompile(regexpStr) 192 | }() 193 | for { 194 | c := s.Next() 195 | switch c { 196 | case scanner.EOF: 197 | return nil 198 | case '.': 199 | return ParseClassMatcher(selector, s) 200 | case '#': 201 | return ParseIdMatcher(selector, s) 202 | case '[': 203 | return ParseAttrMatcher(selector, s) 204 | case ':': 205 | return ParsePseudo(selector, s) 206 | default: 207 | if _, err := id.WriteRune(c); err != nil { 208 | return err 209 | } 210 | } 211 | } 212 | } 213 | 214 | // Parse an attribute matcher 215 | // e.g. `[attr^="http"]` 216 | func ParseAttrMatcher(selector *CSSSelector, s scanner.Scanner) error { 217 | var attrKey bytes.Buffer 218 | var attrVal bytes.Buffer 219 | hasMatchVal := false 220 | matchType := '=' 221 | defer func() { 222 | if hasMatchVal { 223 | var regexpStr string 224 | switch matchType { 225 | case '=': 226 | regexpStr = `^` + regexp.QuoteMeta(attrVal.String()) + `$` 227 | case '*': 228 | regexpStr = regexp.QuoteMeta(attrVal.String()) 229 | case '$': 230 | regexpStr = regexp.QuoteMeta(attrVal.String()) + `$` 231 | case '^': 232 | regexpStr = `^` + regexp.QuoteMeta(attrVal.String()) 233 | case '~': 234 | regexpStr = `(\A|\s)` + regexp.QuoteMeta(attrVal.String()) + `(\s|\z)` 235 | } 236 | selector.Attrs[attrKey.String()] = regexp.MustCompile(regexpStr) 237 | } else { 238 | selector.Attrs[attrKey.String()] = regexp.MustCompile(`^.*$`) 239 | } 240 | }() 241 | // After reaching ']' proceed 242 | proceed := func() error { 243 | switch s.Next() { 244 | case scanner.EOF: 245 | return nil 246 | case '.': 247 | return ParseClassMatcher(selector, s) 248 | case '#': 249 | return ParseIdMatcher(selector, s) 250 | case '[': 251 | return ParseAttrMatcher(selector, s) 252 | case ':': 253 | return ParsePseudo(selector, s) 254 | default: 255 | return fmt.Errorf("Expected selector indicator after ']'") 256 | } 257 | } 258 | // Parse the attribute key matcher 259 | for !hasMatchVal { 260 | c := s.Next() 261 | switch c { 262 | case scanner.EOF: 263 | return fmt.Errorf("Unmatched open brace '['") 264 | case ']': 265 | // No attribute value matcher, proceed! 266 | return proceed() 267 | case '$', '^', '~', '*': 268 | matchType = c 269 | hasMatchVal = true 270 | if s.Next() != '=' { 271 | return fmt.Errorf("'%c' must be followed by a '='", matchType) 272 | } 273 | case '=': 274 | matchType = c 275 | hasMatchVal = true 276 | default: 277 | if _, err := attrKey.WriteRune(c); err != nil { 278 | return err 279 | } 280 | } 281 | } 282 | // figure out if the value is quoted 283 | c := s.Next() 284 | inQuote := false 285 | switch c { 286 | case scanner.EOF: 287 | return fmt.Errorf("Unmatched open brace '['") 288 | case ']': 289 | return proceed() 290 | case '"': 291 | inQuote = true 292 | default: 293 | if _, err := attrVal.WriteRune(c); err != nil { 294 | return err 295 | } 296 | } 297 | if inQuote { 298 | for { 299 | c := s.Next() 300 | switch c { 301 | case '\\': 302 | // consume another character 303 | if c = s.Next(); c == scanner.EOF { 304 | return fmt.Errorf("Unmatched open brace '['") 305 | } 306 | case '"': 307 | switch s.Next() { 308 | case ']': 309 | return proceed() 310 | default: 311 | return fmt.Errorf("Quote must end at ']'") 312 | } 313 | } 314 | if _, err := attrVal.WriteRune(c); err != nil { 315 | return err 316 | } 317 | } 318 | } else { 319 | for { 320 | c := s.Next() 321 | switch c { 322 | case scanner.EOF: 323 | return fmt.Errorf("Unmatched open brace '['") 324 | case ']': 325 | // No attribute value matcher, proceed! 326 | return proceed() 327 | } 328 | if _, err := attrVal.WriteRune(c); err != nil { 329 | return err 330 | } 331 | } 332 | } 333 | } 334 | 335 | // Parse the selector after ':' 336 | func ParsePseudo(selector *CSSSelector, s scanner.Scanner) error { 337 | if selector.Pseudo != nil { 338 | return fmt.Errorf("Combined multiple pseudo classes") 339 | } 340 | var b bytes.Buffer 341 | for s.Peek() != scanner.EOF { 342 | if _, err := b.WriteRune(s.Next()); err != nil { 343 | return err 344 | } 345 | } 346 | cmd := b.String() 347 | var err error 348 | switch { 349 | case cmd == "empty": 350 | selector.Pseudo = func(n *html.Node) bool { 351 | return n.FirstChild == nil 352 | } 353 | case cmd == "first-child": 354 | selector.Pseudo = firstChildPseudo 355 | case cmd == "last-child": 356 | selector.Pseudo = lastChildPseudo 357 | case cmd == "only-child": 358 | selector.Pseudo = func(n *html.Node) bool { 359 | return firstChildPseudo(n) && lastChildPseudo(n) 360 | } 361 | case cmd == "first-of-type": 362 | selector.Pseudo = firstOfTypePseudo 363 | case cmd == "last-of-type": 364 | selector.Pseudo = lastOfTypePseudo 365 | case cmd == "only-of-type": 366 | selector.Pseudo = func(n *html.Node) bool { 367 | return firstOfTypePseudo(n) && lastOfTypePseudo(n) 368 | } 369 | case strings.HasPrefix(cmd, "contains("): 370 | selector.Pseudo, err = parseContainsPseudo(cmd[len("contains("):]) 371 | if err != nil { 372 | return err 373 | } 374 | case strings.HasPrefix(cmd, "nth-child("), 375 | strings.HasPrefix(cmd, "nth-last-child("), 376 | strings.HasPrefix(cmd, "nth-last-of-type("), 377 | strings.HasPrefix(cmd, "nth-of-type("): 378 | if selector.Pseudo, err = parseNthPseudo(cmd); err != nil { 379 | return err 380 | } 381 | case strings.HasPrefix(cmd, "not("): 382 | if selector.Pseudo, err = parseNotPseudo(cmd[len("not("):]); err != nil { 383 | return err 384 | } 385 | case strings.HasPrefix(cmd, "parent-of("): 386 | if selector.Pseudo, err = parseParentOfPseudo(cmd[len("parent-of("):]); err != nil { 387 | return err 388 | } 389 | default: 390 | return fmt.Errorf("%s not a valid pseudo class", cmd) 391 | } 392 | return nil 393 | } 394 | 395 | // :first-of-child 396 | func firstChildPseudo(n *html.Node) bool { 397 | for c := n.PrevSibling; c != nil; c = c.PrevSibling { 398 | if c.Type == html.ElementNode { 399 | return false 400 | } 401 | } 402 | return true 403 | } 404 | 405 | // :last-of-child 406 | func lastChildPseudo(n *html.Node) bool { 407 | for c := n.NextSibling; c != nil; c = c.NextSibling { 408 | if c.Type == html.ElementNode { 409 | return false 410 | } 411 | } 412 | return true 413 | } 414 | 415 | // :first-of-type 416 | func firstOfTypePseudo(node *html.Node) bool { 417 | if node.Type != html.ElementNode { 418 | return false 419 | } 420 | for n := node.PrevSibling; n != nil; n = n.PrevSibling { 421 | if n.DataAtom == node.DataAtom { 422 | return false 423 | } 424 | } 425 | return true 426 | } 427 | 428 | // :last-of-type 429 | func lastOfTypePseudo(node *html.Node) bool { 430 | if node.Type != html.ElementNode { 431 | return false 432 | } 433 | for n := node.NextSibling; n != nil; n = n.NextSibling { 434 | if n.DataAtom == node.DataAtom { 435 | return false 436 | } 437 | } 438 | return true 439 | } 440 | 441 | func parseNthPseudo(cmd string) (PseudoClass, error) { 442 | i := strings.IndexRune(cmd, '(') 443 | if i < 0 { 444 | // really, we should never get here 445 | return nil, fmt.Errorf("Fatal error, '%s' does not contain a '('", cmd) 446 | } 447 | pseudoName := cmd[:i] 448 | // Figure out how the counting function works 449 | var countNth func(*html.Node) int 450 | switch pseudoName { 451 | case "nth-child": 452 | countNth = func(n *html.Node) int { 453 | nth := 1 454 | for sib := n.PrevSibling; sib != nil; sib = sib.PrevSibling { 455 | if sib.Type == html.ElementNode { 456 | nth++ 457 | } 458 | } 459 | return nth 460 | } 461 | case "nth-of-type": 462 | countNth = func(n *html.Node) int { 463 | nth := 1 464 | for sib := n.PrevSibling; sib != nil; sib = sib.PrevSibling { 465 | if sib.Type == html.ElementNode && sib.DataAtom == n.DataAtom { 466 | nth++ 467 | } 468 | } 469 | return nth 470 | } 471 | case "nth-last-child": 472 | countNth = func(n *html.Node) int { 473 | nth := 1 474 | for sib := n.NextSibling; sib != nil; sib = sib.NextSibling { 475 | if sib.Type == html.ElementNode { 476 | nth++ 477 | } 478 | } 479 | return nth 480 | } 481 | case "nth-last-of-type": 482 | countNth = func(n *html.Node) int { 483 | nth := 1 484 | for sib := n.NextSibling; sib != nil; sib = sib.NextSibling { 485 | if sib.Type == html.ElementNode && sib.DataAtom == n.DataAtom { 486 | nth++ 487 | } 488 | } 489 | return nth 490 | } 491 | default: 492 | return nil, fmt.Errorf("Unrecognized pseudo '%s'", pseudoName) 493 | } 494 | 495 | nthString := cmd[i+1:] 496 | i = strings.IndexRune(nthString, ')') 497 | if i < 0 { 498 | return nil, fmt.Errorf("Unmatched '(' for pseudo class %s", pseudoName) 499 | } else if i != len(nthString)-1 { 500 | return nil, fmt.Errorf("%s(n) must end selector", pseudoName) 501 | } 502 | number := nthString[:i] 503 | 504 | // Check if the number is 'odd' or 'even' 505 | oddOrEven := -1 506 | switch number { 507 | case "odd": 508 | oddOrEven = 1 509 | case "even": 510 | oddOrEven = 0 511 | } 512 | if oddOrEven > -1 { 513 | return func(n *html.Node) bool { 514 | return n.Type == html.ElementNode && countNth(n)%2 == oddOrEven 515 | }, nil 516 | } 517 | // Check against '3n+4' pattern 518 | r := regexp.MustCompile(`([0-9]+)n[ ]?\+[ ]?([0-9])`) 519 | subMatch := r.FindAllStringSubmatch(number, -1) 520 | if len(subMatch) == 1 && len(subMatch[0]) == 3 { 521 | cycle, _ := strconv.Atoi(subMatch[0][1]) 522 | offset, _ := strconv.Atoi(subMatch[0][2]) 523 | return func(n *html.Node) bool { 524 | return n.Type == html.ElementNode && countNth(n)%cycle == offset 525 | }, nil 526 | } 527 | // check against 'n+2' pattern 528 | r = regexp.MustCompile(`n[ ]?\+[ ]?([0-9])`) 529 | subMatch = r.FindAllStringSubmatch(number, -1) 530 | if len(subMatch) == 1 && len(subMatch[0]) == 2 { 531 | offset, _ := strconv.Atoi(subMatch[0][1]) 532 | return func(n *html.Node) bool { 533 | return n.Type == html.ElementNode && countNth(n) >= offset 534 | }, nil 535 | } 536 | // the only other option is a numeric value 537 | nth, err := strconv.Atoi(nthString[:i]) 538 | if err != nil { 539 | return nil, err 540 | } else if nth <= 0 { 541 | return nil, fmt.Errorf("Argument to '%s' must be greater than 0", pseudoName) 542 | } 543 | return func(n *html.Node) bool { 544 | return n.Type == html.ElementNode && countNth(n) == nth 545 | }, nil 546 | } 547 | 548 | // Parse a :contains("") selector 549 | // expects the input to be everything after the open parenthesis 550 | // e.g. for `contains("Help")` the argument would be `"Help")` 551 | func parseContainsPseudo(cmd string) (PseudoClass, error) { 552 | var s scanner.Scanner 553 | s.Init(strings.NewReader(cmd)) 554 | switch s.Next() { 555 | case '"': 556 | default: 557 | return nil, fmt.Errorf("Malformed 'contains(\"\")' selector") 558 | } 559 | textToContain := bytes.NewBuffer([]byte{}) 560 | for { 561 | r := s.Next() 562 | switch r { 563 | case '"': 564 | // ')' then EOF must follow '"' 565 | if s.Next() != ')' { 566 | return nil, fmt.Errorf("Malformed 'contains(\"\")' selector") 567 | } 568 | if s.Next() != scanner.EOF { 569 | return nil, fmt.Errorf("'contains(\"\")' must end selector") 570 | } 571 | text := textToContain.String() 572 | contains := func(node *html.Node) bool { 573 | for c := node.FirstChild; c != nil; c = c.NextSibling { 574 | if c.Type == html.TextNode { 575 | if strings.Contains(c.Data, text) { 576 | return true 577 | } 578 | } 579 | } 580 | return false 581 | } 582 | return contains, nil 583 | case '\\': 584 | s.Next() 585 | case scanner.EOF: 586 | return nil, fmt.Errorf("Malformed 'contains(\"\")' selector") 587 | default: 588 | if _, err := textToContain.WriteRune(r); err != nil { 589 | return nil, err 590 | } 591 | } 592 | } 593 | } 594 | 595 | // Parse a :not(selector) selector 596 | // expects the input to be everything after the open parenthesis 597 | // e.g. for `not(div#id)` the argument would be `div#id)` 598 | func parseNotPseudo(cmd string) (PseudoClass, error) { 599 | if len(cmd) < 2 { 600 | return nil, fmt.Errorf("malformed ':not' selector") 601 | } 602 | endQuote, cmd := cmd[len(cmd)-1], cmd[:len(cmd)-1] 603 | selector, err := ParseSelector(cmd) 604 | if err != nil { 605 | return nil, err 606 | } 607 | if endQuote != ')' { 608 | return nil, fmt.Errorf("unmatched '('") 609 | } 610 | return func(n *html.Node) bool { 611 | return !selector.Match(n) 612 | }, nil 613 | } 614 | 615 | // Parse a :parent-of(selector) selector 616 | // expects the input to be everything after the open parenthesis 617 | // e.g. for `parent-of(div#id)` the argument would be `div#id)` 618 | func parseParentOfPseudo(cmd string) (PseudoClass, error) { 619 | if len(cmd) < 2 { 620 | return nil, fmt.Errorf("malformed ':parent-of' selector") 621 | } 622 | endQuote, cmd := cmd[len(cmd)-1], cmd[:len(cmd)-1] 623 | selector, err := ParseSelector(cmd) 624 | if err != nil { 625 | return nil, err 626 | } 627 | if endQuote != ')' { 628 | return nil, fmt.Errorf("unmatched '('") 629 | } 630 | return func(n *html.Node) bool { 631 | for c := n.FirstChild; c != nil; c = c.NextSibling { 632 | if c.Type == html.ElementNode && selector.Match(c) { 633 | return true 634 | } 635 | } 636 | return false 637 | }, nil 638 | } 639 | -------------------------------------------------------------------------------- /run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MOVIES_PKG=$PWD/vendor 3 | export GOPATH=$MOVIES_PKG 4 | go build 5 | awesome-movies $@ 6 | -------------------------------------------------------------------------------- /tamilmv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | 4 | import ( 5 | "fmt" 6 | "os" 7 | "net/http" 8 | // "net/url" 9 | "encoding/json" 10 | "strconv" 11 | 12 | "./pup" 13 | "github.com/fatih/color" 14 | ) 15 | type TamilMV struct { 16 | SearchResults []struct { 17 | Href string `json:"href"` 18 | Text string `json:"text"` 19 | } 20 | 21 | DownloadResults []struct { 22 | Href string `json:"href"` 23 | } 24 | 25 | } 26 | 27 | var ( 28 | searchURL string = "https://www.tamilmv.cz/index.php?/search/" 29 | TAMILMV = TamilMV{} 30 | ) 31 | 32 | func SearchTamilMovies(query string){ 33 | //resp,err := http.PostForm(searchURL,url.Values{"type":{"all"},"q":{query}}) 34 | resp,err := http.Get(searchURL+"&q="+UrlEncoded(query)+"&nodes=1,2,3") 35 | if err != nil { 36 | onHttpError(err) 37 | } 38 | os.Args = []string{ 39 | "anbuksv", 40 | "li.ipsStreamItem > div > div > div > h2 > div > a json{}", 41 | } 42 | searchResultNodes := pup.Run(resp.Body) 43 | json.Unmarshal(searchResultNodes,&TAMILMV.SearchResults) 44 | moviesResultCheck(len(TAMILMV.SearchResults)) 45 | fmt.Printf("%s",listTamilMVMovies()) 46 | downloadIndex := getConformation(len(TAMILMV.SearchResults)) 47 | torrentHtml := downloadHTML(TAMILMV.SearchResults[downloadIndex].Href) 48 | os.Args = []string{ 49 | "anbuksv", 50 | ".ipsAttachLink json{}", 51 | } 52 | torrentHtmlNode := pup.Run(torrentHtml) 53 | json.Unmarshal(torrentHtmlNode,&TAMILMV.DownloadResults) 54 | color.Set(color.FgYellow, color.Bold) 55 | if len(TAMILMV.DownloadResults) > 0 { 56 | fmt.Println(TAMILMV.DownloadResults[0].Href) 57 | } else { 58 | fmt.Println("Sorry,We are unable to process your request") 59 | } 60 | color.Unset() 61 | } 62 | 63 | func listTamilMVMovies() string { 64 | var _movies string = "" 65 | for index,movie := range(TAMILMV.SearchResults){ 66 | _movies = _movies + strconv.Itoa(index+1) + ". " + movie.Text+ "\n" 67 | } 68 | return _movies 69 | } 70 | --------------------------------------------------------------------------------