├── .gitignore ├── LICENSE ├── README.md ├── examples └── awesome-movies_2018-11-29_112814.gif ├── movie.go ├── pup ├── display.go ├── parse.go ├── pup.go └── selector.go ├── run └── tamilmv.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | 15 | dist/* 16 | workspace/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Anbarasan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # awesome-movies 2 | 3 | awesome-movies is a command line tool for searching movies online and get torrent link. It reads from stdin, 4 | prints to stdout. 5 | 6 | ## Install 7 | 8 | Direct downloads are available through the [releases page](https://github.com/anbuksv/awesome-movies/releases/latest). 9 | 10 | ## Quick start 11 | 12 | ```bash 13 | $ movies the dark knight 14 | ``` 15 | 16 | ## Basic Usage 17 | 18 | ```bash 19 | $ movies [flags] '[movie name]' 20 | ``` 21 | 22 | ## Examples 23 | 24 | ![Example](examples/awesome-movies_2018-11-29_112814.gif) 25 | 26 | ### Tamil Movies 27 | 28 | ```bash 29 | $ movies -l tamil '[movie name]' 30 | ``` 31 | 32 | ## Roadmap 33 | 34 | - [x] yts 35 | - [x] tamilmv 36 | 37 | ## Flags 38 | 39 | Run `movies --help` for a list of further options 40 | -------------------------------------------------------------------------------- /examples/awesome-movies_2018-11-29_112814.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anbuksv/awesome-movies/55c7aa82553618dbf5ea566e0671c7102f1d5b78/examples/awesome-movies_2018-11-29_112814.gif -------------------------------------------------------------------------------- /movie.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "net/http" 8 | "net/url" 9 | "io/ioutil" 10 | "encoding/json" 11 | "strconv" 12 | "io" 13 | "time" 14 | 15 | "./pup" 16 | "github.com/fatih/color" 17 | ) 18 | 19 | type Yts struct { 20 | SearchResults struct { 21 | Status string `json:"status"` 22 | Movies []struct { 23 | Url string `json:"url"` 24 | Img string `json:"img"` 25 | Title string `json:"title"` 26 | Year string `json:"year"` 27 | } `json:"data"` 28 | } 29 | 30 | DownloadResults []struct { 31 | Href string `json:"href"` 32 | Text string `json:"text"` 33 | Title string `json:"Title"` 34 | } 35 | } 36 | 37 | var ( 38 | timeout = time.Duration(5 * time.Second) 39 | HttpClient = http.Client{ 40 | Timeout: timeout, 41 | } 42 | YTS = Yts{} 43 | MOVIES_VERSION string = "1.0.1" 44 | language string 45 | ) 46 | 47 | func UrlEncoded(str string) string { 48 | u, err := url.Parse(str) 49 | if err != nil { 50 | return "" 51 | } 52 | return u.String() 53 | } 54 | 55 | func moviesResultCheck(length int){ 56 | if length <= 0 { 57 | color.Set(color.FgMagenta, color.Bold) 58 | fmt.Println("No movies found.") 59 | color.Unset() 60 | os.Exit(0) 61 | } 62 | } 63 | 64 | func main(){ 65 | search := ParseFlages() 66 | if strings.ToLower(language) == "tamil"{ 67 | SearchTamilMovies(search) 68 | return 69 | } 70 | searchMovie("https://yts.am/ajax/search?query=",search) 71 | moviesResultCheck(len(YTS.SearchResults.Movies)) 72 | fmt.Printf("%s",listMovies()) 73 | downloadIndex := getConformation(len(YTS.SearchResults.Movies)) 74 | torrentHtml := downloadHTML(YTS.SearchResults.Movies[downloadIndex].Url) 75 | os.Args = []string{ 76 | "anbuksv", 77 | "#movie-info > p a json{}", 78 | } 79 | nodes := pup.Run(torrentHtml) 80 | json.Unmarshal(nodes,&YTS.DownloadResults) 81 | color.Set(color.FgYellow, color.Bold) 82 | for _,result:= range(YTS.DownloadResults){ 83 | color.New(color.FgYellow,color.Bold).Print("("+ result.Text +") ") 84 | color.White(result.Href + "\n") 85 | } 86 | // fmt.Println(YTS.DownloadResults[len(YTS.DownloadResults)-1].Href) //By default high resolution torrent link will be printed 87 | color.Unset() 88 | } 89 | 90 | func ParseFlages() string { 91 | cmds := os.Args[1:] 92 | nonFlagCmds := make([]string, len(cmds)) 93 | n := 0 94 | for i := 0; i < len(cmds); i++ { 95 | cmd := cmds[i] 96 | switch cmd { 97 | case "-h","--help": 98 | PrintMoviesHelp(os.Stdout, 0) 99 | case "--version": 100 | fmt.Println(MOVIES_VERSION) 101 | os.Exit(0) 102 | case "-l","--language": 103 | language = cmds[i+1] 104 | i++ 105 | default: 106 | nonFlagCmds[n] = cmds[i] 107 | n++ 108 | } 109 | } 110 | return strings.Join(nonFlagCmds," ") 111 | } 112 | 113 | func PrintMoviesHelp(w io.Writer, exitCode int) { 114 | helpString := `Usage 115 | movies Name [flags] 116 | Version 117 | %s 118 | Flags 119 | -h --help display this help 120 | --version display version 121 | -l --language movie language 122 | ` 123 | fmt.Fprintf(w, helpString, MOVIES_VERSION) 124 | os.Exit(exitCode) 125 | } 126 | 127 | func getConformation(limit int) int { 128 | var downloadIndex int 129 | color.Set(color.FgWhite, color.Bold) 130 | fmt.Print("awesome-movie> ") 131 | color.Unset() 132 | fmt.Scan(&downloadIndex) 133 | downloadIndex = downloadIndex - 1 134 | if downloadIndex < 0 || downloadIndex >= limit{ 135 | fmt.Println("awesome-movie> Please enter valid number.") 136 | return getConformation(limit) 137 | } 138 | return downloadIndex 139 | } 140 | 141 | func onHttpError(err error){ 142 | color.Set(color.FgRed, color.Bold) 143 | fmt.Println(err) 144 | color.Unset() 145 | os.Exit(0) 146 | } 147 | 148 | func searchMovie(url string, query string) { 149 | resp,err := HttpClient.Get(url+UrlEncoded(query)) 150 | if err != nil{ 151 | onHttpError(err) 152 | } 153 | defer resp.Body.Close() 154 | body, err := ioutil.ReadAll(resp.Body) 155 | json.Unmarshal([]byte(body),&YTS.SearchResults) 156 | } 157 | 158 | func downloadHTML(url string) io.ReadCloser { 159 | resp,err := HttpClient.Get(url) 160 | if err != nil { 161 | onHttpError(err) 162 | } 163 | return resp.Body 164 | } 165 | 166 | func listMovies() string { 167 | var _movies string = "" 168 | for index,movie := range(YTS.SearchResults.Movies){ 169 | _movies = _movies + strconv.Itoa(index+1) + ". " + movie.Title + " (" + movie.Year + ")\n" 170 | } 171 | return _movies 172 | } 173 | -------------------------------------------------------------------------------- /pup/display.go: -------------------------------------------------------------------------------- 1 | package pup 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | 9 | "github.com/fatih/color" 10 | colorable "github.com/mattn/go-colorable" 11 | "golang.org/x/net/html" 12 | "golang.org/x/net/html/atom" 13 | ) 14 | 15 | func init() { 16 | color.Output = colorable.NewColorableStdout() 17 | } 18 | 19 | type Displayer interface { 20 | Display([]*html.Node) 21 | } 22 | 23 | func ParseDisplayer(cmd string) error { 24 | attrRe := regexp.MustCompile(`attr\{([a-zA-Z\-]+)\}`) 25 | if cmd == "text{}" { 26 | pupDisplayer = TextDisplayer{} 27 | } else if cmd == "json{}" { 28 | pupDisplayer = JSONDisplayer{} 29 | } else if match := attrRe.FindAllStringSubmatch(cmd, -1); len(match) == 1 { 30 | pupDisplayer = AttrDisplayer{ 31 | Attr: match[0][1], 32 | } 33 | } else { 34 | return fmt.Errorf("Unknown displayer") 35 | } 36 | return nil 37 | } 38 | 39 | // Is this node a tag with no end tag such as or
? 40 | // http://www.w3.org/TR/html-markup/syntax.html#syntax-elements 41 | func isVoidElement(n *html.Node) bool { 42 | switch n.DataAtom { 43 | case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed, 44 | atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link, 45 | atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr: 46 | return true 47 | } 48 | return false 49 | } 50 | 51 | var ( 52 | // Colors 53 | tagColor *color.Color = color.New(color.FgCyan) 54 | tokenColor = color.New(color.FgCyan) 55 | attrKeyColor = color.New(color.FgMagenta) 56 | quoteColor = color.New(color.FgBlue) 57 | commentColor = color.New(color.FgYellow) 58 | ) 59 | 60 | type TreeDisplayer struct { 61 | } 62 | 63 | func (t TreeDisplayer) Display(nodes []*html.Node) { 64 | for _, node := range nodes { 65 | t.printNode(node, 0) 66 | } 67 | } 68 | 69 | // The
 tag indicates that the text within it should always be formatted
 70 | // as is. See https://github.com/ericchiang/pup/issues/33
 71 | func (t TreeDisplayer) printPre(n *html.Node) {
 72 | 	switch n.Type {
 73 | 	case html.TextNode:
 74 | 		s := n.Data
 75 | 		if pupEscapeHTML {
 76 | 			// don't escape javascript
 77 | 			if n.Parent == nil || n.Parent.DataAtom != atom.Script {
 78 | 				s = html.EscapeString(s)
 79 | 			}
 80 | 		}
 81 | 		fmt.Print(s)
 82 | 		for c := n.FirstChild; c != nil; c = c.NextSibling {
 83 | 			t.printPre(c)
 84 | 		}
 85 | 	case html.ElementNode:
 86 | 		fmt.Printf("<%s", n.Data)
 87 | 		for _, a := range n.Attr {
 88 | 			val := a.Val
 89 | 			if pupEscapeHTML {
 90 | 				val = html.EscapeString(val)
 91 | 			}
 92 | 			fmt.Printf(` %s="%s"`, a.Key, val)
 93 | 		}
 94 | 		fmt.Print(">")
 95 | 		if !isVoidElement(n) {
 96 | 			for c := n.FirstChild; c != nil; c = c.NextSibling {
 97 | 				t.printPre(c)
 98 | 			}
 99 | 			fmt.Printf("", n.Data)
100 | 		}
101 | 	case html.CommentNode:
102 | 		data := n.Data
103 | 		if pupEscapeHTML {
104 | 			data = html.EscapeString(data)
105 | 		}
106 | 		fmt.Printf("\n", data)
107 | 		for c := n.FirstChild; c != nil; c = c.NextSibling {
108 | 			t.printPre(c)
109 | 		}
110 | 	case html.DoctypeNode, html.DocumentNode:
111 | 		for c := n.FirstChild; c != nil; c = c.NextSibling {
112 | 			t.printPre(c)
113 | 		}
114 | 	}
115 | }
116 | 
117 | // Print a node and all of it's children to `maxlevel`.
118 | func (t TreeDisplayer) printNode(n *html.Node, level int) {
119 | 	switch n.Type {
120 | 	case html.TextNode:
121 | 		s := n.Data
122 | 		if pupEscapeHTML {
123 | 			// don't escape javascript
124 | 			if n.Parent == nil || n.Parent.DataAtom != atom.Script {
125 | 				s = html.EscapeString(s)
126 | 			}
127 | 		}
128 | 		s = strings.TrimSpace(s)
129 | 		if s != "" {
130 | 			t.printIndent(level)
131 | 			fmt.Println(s)
132 | 		}
133 | 	case html.ElementNode:
134 | 		t.printIndent(level)
135 | 		// TODO: allow pre with color
136 | 		if n.DataAtom == atom.Pre && !pupPrintColor && pupPreformatted {
137 | 			t.printPre(n)
138 | 			fmt.Println()
139 | 			return
140 | 		}
141 | 		if pupPrintColor {
142 | 			tokenColor.Print("<")
143 | 			tagColor.Printf("%s", n.Data)
144 | 		} else {
145 | 			fmt.Printf("<%s", n.Data)
146 | 		}
147 | 		for _, a := range n.Attr {
148 | 			val := a.Val
149 | 			if pupEscapeHTML {
150 | 				val = html.EscapeString(val)
151 | 			}
152 | 			if pupPrintColor {
153 | 				fmt.Print(" ")
154 | 				attrKeyColor.Printf("%s", a.Key)
155 | 				tokenColor.Print("=")
156 | 				quoteColor.Printf(`"%s"`, val)
157 | 			} else {
158 | 				fmt.Printf(` %s="%s"`, a.Key, val)
159 | 			}
160 | 		}
161 | 		if pupPrintColor {
162 | 			tokenColor.Println(">")
163 | 		} else {
164 | 			fmt.Println(">")
165 | 		}
166 | 		if !isVoidElement(n) {
167 | 			t.printChildren(n, level+1)
168 | 			t.printIndent(level)
169 | 			if pupPrintColor {
170 | 				tokenColor.Print("")
173 | 			} else {
174 | 				fmt.Printf("\n", n.Data)
175 | 			}
176 | 		}
177 | 	case html.CommentNode:
178 | 		t.printIndent(level)
179 | 		data := n.Data
180 | 		if pupEscapeHTML {
181 | 			data = html.EscapeString(data)
182 | 		}
183 | 		if pupPrintColor {
184 | 			commentColor.Printf("\n", data)
185 | 		} else {
186 | 			fmt.Printf("\n", data)
187 | 		}
188 | 		t.printChildren(n, level)
189 | 	case html.DoctypeNode, html.DocumentNode:
190 | 		t.printChildren(n, level)
191 | 	}
192 | }
193 | 
194 | func (t TreeDisplayer) printChildren(n *html.Node, level int) {
195 | 	if pupMaxPrintLevel > -1 {
196 | 		if level >= pupMaxPrintLevel {
197 | 			t.printIndent(level)
198 | 			fmt.Println("...")
199 | 			return
200 | 		}
201 | 	}
202 | 	child := n.FirstChild
203 | 	for child != nil {
204 | 		t.printNode(child, level)
205 | 		child = child.NextSibling
206 | 	}
207 | }
208 | 
209 | func (t TreeDisplayer) printIndent(level int) {
210 | 	for ; level > 0; level-- {
211 | 		fmt.Print(pupIndentString)
212 | 	}
213 | }
214 | 
215 | // Print the text of a node
216 | type TextDisplayer struct{}
217 | 
218 | func (t TextDisplayer) Display(nodes []*html.Node) {
219 | 	for _, node := range nodes {
220 | 		if node.Type == html.TextNode {
221 | 			data := node.Data
222 | 			if pupEscapeHTML {
223 | 				// don't escape javascript
224 | 				if node.Parent == nil || node.Parent.DataAtom != atom.Script {
225 | 					data = html.EscapeString(data)
226 | 				}
227 | 			}
228 | 			fmt.Println(data)
229 | 		}
230 | 		children := []*html.Node{}
231 | 		child := node.FirstChild
232 | 		for child != nil {
233 | 			children = append(children, child)
234 | 			child = child.NextSibling
235 | 		}
236 | 		t.Display(children)
237 | 	}
238 | }
239 | 
240 | // Print the attribute of a node
241 | type AttrDisplayer struct {
242 | 	Attr string
243 | }
244 | 
245 | func (a AttrDisplayer) Display(nodes []*html.Node) {
246 | 	for _, node := range nodes {
247 | 		attributes := node.Attr
248 | 		for _, attr := range attributes {
249 | 			if attr.Key == a.Attr {
250 | 				val := attr.Val
251 | 				if pupEscapeHTML {
252 | 					val = html.EscapeString(val)
253 | 				}
254 | 				fmt.Printf("%s\n", val)
255 | 			}
256 | 		}
257 | 	}
258 | }
259 | 
260 | // Print nodes as a JSON list
261 | type JSONDisplayer struct{}
262 | 
263 | // returns a jsonifiable struct
264 | func jsonify(node *html.Node) map[string]interface{} {
265 | 	vals := map[string]interface{}{}
266 | 	if len(node.Attr) > 0 {
267 | 		for _, attr := range node.Attr {
268 | 			if pupEscapeHTML {
269 | 				vals[attr.Key] = html.EscapeString(attr.Val)
270 | 			} else {
271 | 				vals[attr.Key] = attr.Val
272 | 			}
273 | 		}
274 | 	}
275 | 	vals["tag"] = node.DataAtom.String()
276 | 	children := []interface{}{}
277 | 	for child := node.FirstChild; child != nil; child = child.NextSibling {
278 | 		switch child.Type {
279 | 		case html.ElementNode:
280 | 			children = append(children, jsonify(child))
281 | 		case html.TextNode:
282 | 			text := strings.TrimSpace(child.Data)
283 | 			if text != "" {
284 | 				if pupEscapeHTML {
285 | 					// don't escape javascript
286 | 					if node.DataAtom != atom.Script {
287 | 						text = html.EscapeString(text)
288 | 					}
289 | 				}
290 | 				// if there is already text we'll append it
291 | 				currText, ok := vals["text"]
292 | 				if ok {
293 | 					text = fmt.Sprintf("%s %s", currText, text)
294 | 				}
295 | 				vals["text"] = text
296 | 			}
297 | 		case html.CommentNode:
298 | 			comment := strings.TrimSpace(child.Data)
299 | 			if pupEscapeHTML {
300 | 				comment = html.EscapeString(comment)
301 | 			}
302 | 			currComment, ok := vals["comment"]
303 | 			if ok {
304 | 				comment = fmt.Sprintf("%s %s", currComment, comment)
305 | 			}
306 | 			vals["comment"] = comment
307 | 		}
308 | 	}
309 | 	if len(children) > 0 {
310 | 		vals["children"] = children
311 | 	}
312 | 	return vals
313 | }
314 | 
315 | func (j JSONDisplayer) Display(nodes []*html.Node) {
316 | 	var data []byte
317 | 	var err error
318 | 	jsonNodes := []map[string]interface{}{}
319 | 	for _, node := range nodes {
320 | 		jsonNodes = append(jsonNodes, jsonify(node))
321 | 	}
322 | 	data, err = json.MarshalIndent(&jsonNodes, "", pupIndentString)
323 | 	if err != nil {
324 | 		panic("Could not jsonify nodes")
325 | 	}
326 | 	fmt.Printf("%s\n", data)
327 | }
328 | 
329 | func GetJSONByte(nodes []*html.Node) []byte{
330 | 	var data []byte
331 | 	var err error
332 | 	jsonNodes := []map[string]interface{}{}
333 | 	for _, node := range nodes {
334 | 		jsonNodes = append(jsonNodes, jsonify(node))
335 | 	}
336 | 	data, err = json.MarshalIndent(&jsonNodes, "", pupIndentString)
337 | 	if err != nil {
338 | 		panic("Could not jsonify nodes")
339 | 	}
340 | 	// fmt.Printf("%s\n", data)
341 | 	return data
342 | }
343 | 
344 | // Print the number of features returned
345 | type NumDisplayer struct{}
346 | 
347 | func (d NumDisplayer) Display(nodes []*html.Node) {
348 | 	fmt.Println(len(nodes))
349 | }
350 | 


--------------------------------------------------------------------------------
/pup/parse.go:
--------------------------------------------------------------------------------
  1 | package pup
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"os"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 
 10 | 	"golang.org/x/net/html"
 11 | 	"golang.org/x/net/html/charset"
 12 | 	"golang.org/x/text/transform"
 13 | )
 14 | 
 15 | var (
 16 | 	pupIn            io.ReadCloser = os.Stdin
 17 | 	pupCharset       string        = ""
 18 | 	pupMaxPrintLevel int           = -1
 19 | 	pupPreformatted  bool          = false
 20 | 	pupPrintColor    bool          = false
 21 | 	pupEscapeHTML    bool          = true
 22 | 	pupIndentString  string        = " "
 23 | 	pupDisplayer     Displayer     = TreeDisplayer{}
 24 | )
 25 | 
 26 | // Parse the html while handling the charset
 27 | func ParseHTML(r io.Reader, cs string) (*html.Node, error) {
 28 | 	var err error
 29 | 	if cs == "" {
 30 | 		// attempt to guess the charset of the HTML document
 31 | 		r, err = charset.NewReader(r, "")
 32 | 		if err != nil {
 33 | 			return nil, err
 34 | 		}
 35 | 	} else {
 36 | 		// let the user specify the charset
 37 | 		e, name := charset.Lookup(cs)
 38 | 		if name == "" {
 39 | 			return nil, fmt.Errorf("'%s' is not a valid charset", cs)
 40 | 		}
 41 | 		r = transform.NewReader(r, e.NewDecoder())
 42 | 	}
 43 | 	return html.Parse(r)
 44 | }
 45 | 
 46 | func PrintHelp(w io.Writer, exitCode int) {
 47 | 	helpString := `Usage
 48 |     pup [flags] [selectors] [optional display function]
 49 | Version
 50 |     %s
 51 | Flags
 52 |     -c --color         print result with color
 53 |     -f --file          file to read from
 54 |     -h --help          display this help
 55 |     -i --indent        number of spaces to use for indent or character
 56 |     -n --number        print number of elements selected
 57 |     -l --limit         restrict number of levels printed
 58 |     -p --plain         don't escape html
 59 |     --pre              preserve preformatted text
 60 |     --charset          specify the charset for pup to use
 61 |     --version          display version
 62 | `
 63 | 	fmt.Fprintf(w, helpString, VERSION)
 64 | 	os.Exit(exitCode)
 65 | }
 66 | 
 67 | func ParseArgs() ([]string, error) {
 68 | 	cmds, err := ProcessFlags(os.Args[1:])
 69 | 	if err != nil {
 70 | 		return []string{}, err
 71 | 	}
 72 | 	return ParseCommands(strings.Join(cmds, " "))
 73 | }
 74 | 
 75 | // Process command arguments and return all non-flags.
 76 | func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) {
 77 | 	var i int
 78 | 	defer func() {
 79 | 		if r := recover(); r != nil {
 80 | 			err = fmt.Errorf("Option '%s' requires an argument", cmds[i])
 81 | 		}
 82 | 	}()
 83 | 	nonFlagCmds = make([]string, len(cmds))
 84 | 	n := 0
 85 | 	for i = 0; i < len(cmds); i++ {
 86 | 		cmd := cmds[i]
 87 | 		switch cmd {
 88 | 		case "-c", "--color":
 89 | 			pupPrintColor = true
 90 | 		case "-p", "--plain":
 91 | 			pupEscapeHTML = false
 92 | 		case "--pre":
 93 | 			pupPreformatted = true
 94 | 		case "-f", "--file":
 95 | 			filename := cmds[i+1]
 96 | 			pupIn, err = os.Open(filename)
 97 | 			if err != nil {
 98 | 				fmt.Fprintf(os.Stderr, "%s\n", err.Error())
 99 | 				os.Exit(2)
100 | 			}
101 | 			i++
102 | 		case "-h", "--help":
103 | 			PrintHelp(os.Stdout, 0)
104 | 		case "-i", "--indent":
105 | 			indentLevel, err := strconv.Atoi(cmds[i+1])
106 | 			if err == nil {
107 | 				pupIndentString = strings.Repeat(" ", indentLevel)
108 | 			} else {
109 | 				pupIndentString = cmds[i+1]
110 | 			}
111 | 			i++
112 | 		case "-l", "--limit":
113 | 			pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1])
114 | 			if err != nil {
115 | 				return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd)
116 | 			}
117 | 			i++
118 | 		case "--charset":
119 | 			pupCharset = cmds[i+1]
120 | 			i++
121 | 		case "--version":
122 | 			fmt.Println(VERSION)
123 | 			os.Exit(0)
124 | 		case "-n", "--number":
125 | 			pupDisplayer = NumDisplayer{}
126 | 		default:
127 | 			if cmd[0] == '-' {
128 | 				return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd)
129 | 			}
130 | 			nonFlagCmds[n] = cmds[i]
131 | 			n++
132 | 		}
133 | 	}
134 | 	return nonFlagCmds[:n], nil
135 | }
136 | 
137 | // Split a string with awareness for quoted text and commas
138 | func ParseCommands(cmdString string) ([]string, error) {
139 | 	cmds := []string{}
140 | 	last, next, max := 0, 0, len(cmdString)
141 | 	for {
142 | 		// if we're at the end of the string, return
143 | 		if next == max {
144 | 			if next > last {
145 | 				cmds = append(cmds, cmdString[last:next])
146 | 			}
147 | 			return cmds, nil
148 | 		}
149 | 		// evaluate a rune
150 | 		c := cmdString[next]
151 | 		switch c {
152 | 		case ' ':
153 | 			if next > last {
154 | 				cmds = append(cmds, cmdString[last:next])
155 | 			}
156 | 			last = next + 1
157 | 		case ',':
158 | 			if next > last {
159 | 				cmds = append(cmds, cmdString[last:next])
160 | 			}
161 | 			cmds = append(cmds, ",")
162 | 			last = next + 1
163 | 		case '\'', '"':
164 | 			// for quotes, consume runes until the quote has ended
165 | 			quoteChar := c
166 | 			for {
167 | 				next++
168 | 				if next == max {
169 | 					return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
170 | 				}
171 | 				if cmdString[next] == '\\' {
172 | 					next++
173 | 					if next == max {
174 | 						return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
175 | 					}
176 | 				} else if cmdString[next] == quoteChar {
177 | 					break
178 | 				}
179 | 			}
180 | 		}
181 | 		next++
182 | 	}
183 | }
184 | 


--------------------------------------------------------------------------------
/pup/pup.go:
--------------------------------------------------------------------------------
 1 | package pup
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"io"
 7 | 	"golang.org/x/net/html"
 8 | )
 9 | 
10 | //      _=,_
11 | //   o_/6 /#\
12 | //   \__ |##/
13 | //    ='|--\
14 | //      /   #'-.
15 | //      \#|_   _'-. /
16 | //       |/ \_( # |"
17 | //      C/ ,--___/
18 | 
19 | var VERSION string = "0.4.0"
20 | 
21 | func Run(_pupIn io.ReadCloser) []byte {
22 | 	// process flags and arguments
23 | 	cmds, err := ParseArgs()
24 | 	if err != nil {
25 | 		fmt.Fprintf(os.Stderr, "%s\n", err.Error())
26 | 		os.Exit(2)
27 | 	}
28 | 
29 | 	pupIn = _pupIn
30 | 	// Parse the input and get the root node
31 | 	root, err := ParseHTML(pupIn, pupCharset)
32 | 	if err != nil {
33 | 		fmt.Fprintf(os.Stderr, "%s\n", err.Error())
34 | 		os.Exit(2)
35 | 	}
36 | 	pupIn.Close()
37 | 
38 | 	// Parse the selectors
39 | 	selectorFuncs := []SelectorFunc{}
40 | 	funcGenerator := Select
41 | 	var cmd string
42 | 	for len(cmds) > 0 {
43 | 		cmd, cmds = cmds[0], cmds[1:]
44 | 		if len(cmds) == 0 {
45 | 			if err := ParseDisplayer(cmd); err == nil {
46 | 				continue
47 | 			}
48 | 		}
49 | 		switch cmd {
50 | 		case "*": // select all
51 | 			continue
52 | 		case ">":
53 | 			funcGenerator = SelectFromChildren
54 | 		case "+":
55 | 			funcGenerator = SelectNextSibling
56 | 		case ",": // nil will signify a comma
57 | 			selectorFuncs = append(selectorFuncs, nil)
58 | 		default:
59 | 			selector, err := ParseSelector(cmd)
60 | 			if err != nil {
61 | 				fmt.Fprintf(os.Stderr, "Selector parsing error: %s\n", err.Error())
62 | 				os.Exit(2)
63 | 			}
64 | 			selectorFuncs = append(selectorFuncs, funcGenerator(selector))
65 | 			funcGenerator = Select
66 | 		}
67 | 	}
68 | 
69 | 	selectedNodes := []*html.Node{}
70 | 	currNodes := []*html.Node{root}
71 | 	for _, selectorFunc := range selectorFuncs {
72 | 		if selectorFunc == nil { // hit a comma
73 | 			selectedNodes = append(selectedNodes, currNodes...)
74 | 			currNodes = []*html.Node{root}
75 | 		} else {
76 | 			currNodes = selectorFunc(currNodes)
77 | 		}
78 | 	}
79 | 	selectedNodes = append(selectedNodes, currNodes...)
80 | 	// fmt.Println("Selected Node: ",&selectedNodes)
81 | 	// pupDisplayer.Display(selectedNodes)
82 | 	return GetJSONByte(selectedNodes)
83 | 	// fmt.Println("results",results)
84 | }
85 | 


--------------------------------------------------------------------------------
/pup/selector.go:
--------------------------------------------------------------------------------
  1 | package pup
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"regexp"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 	"text/scanner"
 10 | 
 11 | 	"golang.org/x/net/html"
 12 | )
 13 | 
 14 | type Selector interface {
 15 | 	Match(node *html.Node) bool
 16 | }
 17 | 
 18 | type SelectorFunc func(nodes []*html.Node) []*html.Node
 19 | 
 20 | func Select(s Selector) SelectorFunc {
 21 | 	// have to define first to be able to do recursion
 22 | 	var selectChildren func(node *html.Node) []*html.Node
 23 | 	selectChildren = func(node *html.Node) []*html.Node {
 24 | 		selected := []*html.Node{}
 25 | 		for child := node.FirstChild; child != nil; child = child.NextSibling {
 26 | 			if s.Match(child) {
 27 | 				selected = append(selected, child)
 28 | 			} else {
 29 | 				selected = append(selected, selectChildren(child)...)
 30 | 			}
 31 | 		}
 32 | 		return selected
 33 | 	}
 34 | 	return func(nodes []*html.Node) []*html.Node {
 35 | 		selected := []*html.Node{}
 36 | 		for _, node := range nodes {
 37 | 			selected = append(selected, selectChildren(node)...)
 38 | 		}
 39 | 		return selected
 40 | 	}
 41 | }
 42 | 
 43 | // Defined for the '>' selector
 44 | func SelectNextSibling(s Selector) SelectorFunc {
 45 | 	return func(nodes []*html.Node) []*html.Node {
 46 | 		selected := []*html.Node{}
 47 | 		for _, node := range nodes {
 48 | 			for ns := node.NextSibling; ns != nil; ns = ns.NextSibling {
 49 | 				if ns.Type == html.ElementNode {
 50 | 					if s.Match(ns) {
 51 | 						selected = append(selected, ns)
 52 | 					}
 53 | 					break
 54 | 				}
 55 | 			}
 56 | 		}
 57 | 		return selected
 58 | 	}
 59 | }
 60 | 
 61 | // Defined for the '+' selector
 62 | func SelectFromChildren(s Selector) SelectorFunc {
 63 | 	return func(nodes []*html.Node) []*html.Node {
 64 | 		selected := []*html.Node{}
 65 | 		for _, node := range nodes {
 66 | 			for c := node.FirstChild; c != nil; c = c.NextSibling {
 67 | 				if s.Match(c) {
 68 | 					selected = append(selected, c)
 69 | 				}
 70 | 			}
 71 | 		}
 72 | 		return selected
 73 | 	}
 74 | }
 75 | 
 76 | type PseudoClass func(*html.Node) bool
 77 | 
 78 | type CSSSelector struct {
 79 | 	Tag    string
 80 | 	Attrs  map[string]*regexp.Regexp
 81 | 	Pseudo PseudoClass
 82 | }
 83 | 
 84 | func (s CSSSelector) Match(node *html.Node) bool {
 85 | 	if node.Type != html.ElementNode {
 86 | 		return false
 87 | 	}
 88 | 	if s.Tag != "" {
 89 | 		if s.Tag != node.DataAtom.String() {
 90 | 			return false
 91 | 		}
 92 | 	}
 93 | 	for attrKey, matcher := range s.Attrs {
 94 | 		matched := false
 95 | 		for _, attr := range node.Attr {
 96 | 			if attrKey == attr.Key {
 97 | 				if !matcher.MatchString(attr.Val) {
 98 | 					return false
 99 | 				}
100 | 				matched = true
101 | 				break
102 | 			}
103 | 		}
104 | 		if !matched {
105 | 			return false
106 | 		}
107 | 	}
108 | 	if s.Pseudo == nil {
109 | 		return true
110 | 	}
111 | 	return s.Pseudo(node)
112 | }
113 | 
114 | // Parse a selector
115 | // e.g. `div#my-button.btn[href^="http"]`
116 | func ParseSelector(cmd string) (selector CSSSelector, err error) {
117 | 	selector = CSSSelector{
118 | 		Tag:    "",
119 | 		Attrs:  map[string]*regexp.Regexp{},
120 | 		Pseudo: nil,
121 | 	}
122 | 	var s scanner.Scanner
123 | 	s.Init(strings.NewReader(cmd))
124 | 	err = ParseTagMatcher(&selector, s)
125 | 	return
126 | }
127 | 
128 | // Parse the initial tag
129 | // e.g. `div`
130 | func ParseTagMatcher(selector *CSSSelector, s scanner.Scanner) error {
131 | 	tag := bytes.NewBuffer([]byte{})
132 | 	defer func() {
133 | 		selector.Tag = tag.String()
134 | 	}()
135 | 	for {
136 | 		c := s.Next()
137 | 		switch c {
138 | 		case scanner.EOF:
139 | 			return nil
140 | 		case '.':
141 | 			return ParseClassMatcher(selector, s)
142 | 		case '#':
143 | 			return ParseIdMatcher(selector, s)
144 | 		case '[':
145 | 			return ParseAttrMatcher(selector, s)
146 | 		case ':':
147 | 			return ParsePseudo(selector, s)
148 | 		default:
149 | 			if _, err := tag.WriteRune(c); err != nil {
150 | 				return err
151 | 			}
152 | 		}
153 | 	}
154 | }
155 | 
156 | // Parse a class matcher
157 | // e.g. `.btn`
158 | func ParseClassMatcher(selector *CSSSelector, s scanner.Scanner) error {
159 | 	var class bytes.Buffer
160 | 	defer func() {
161 | 		regexpStr := `(\A|\s)` + regexp.QuoteMeta(class.String()) + `(\s|\z)`
162 | 		selector.Attrs["class"] = regexp.MustCompile(regexpStr)
163 | 	}()
164 | 	for {
165 | 		c := s.Next()
166 | 		switch c {
167 | 		case scanner.EOF:
168 | 			return nil
169 | 		case '.':
170 | 			return ParseClassMatcher(selector, s)
171 | 		case '#':
172 | 			return ParseIdMatcher(selector, s)
173 | 		case '[':
174 | 			return ParseAttrMatcher(selector, s)
175 | 		case ':':
176 | 			return ParsePseudo(selector, s)
177 | 		default:
178 | 			if _, err := class.WriteRune(c); err != nil {
179 | 				return err
180 | 			}
181 | 		}
182 | 	}
183 | }
184 | 
185 | // Parse an id matcher
186 | // e.g. `#my-picture`
187 | func ParseIdMatcher(selector *CSSSelector, s scanner.Scanner) error {
188 | 	var id bytes.Buffer
189 | 	defer func() {
190 | 		regexpStr := `^` + regexp.QuoteMeta(id.String()) + `$`
191 | 		selector.Attrs["id"] = regexp.MustCompile(regexpStr)
192 | 	}()
193 | 	for {
194 | 		c := s.Next()
195 | 		switch c {
196 | 		case scanner.EOF:
197 | 			return nil
198 | 		case '.':
199 | 			return ParseClassMatcher(selector, s)
200 | 		case '#':
201 | 			return ParseIdMatcher(selector, s)
202 | 		case '[':
203 | 			return ParseAttrMatcher(selector, s)
204 | 		case ':':
205 | 			return ParsePseudo(selector, s)
206 | 		default:
207 | 			if _, err := id.WriteRune(c); err != nil {
208 | 				return err
209 | 			}
210 | 		}
211 | 	}
212 | }
213 | 
214 | // Parse an attribute matcher
215 | // e.g. `[attr^="http"]`
216 | func ParseAttrMatcher(selector *CSSSelector, s scanner.Scanner) error {
217 | 	var attrKey bytes.Buffer
218 | 	var attrVal bytes.Buffer
219 | 	hasMatchVal := false
220 | 	matchType := '='
221 | 	defer func() {
222 | 		if hasMatchVal {
223 | 			var regexpStr string
224 | 			switch matchType {
225 | 			case '=':
226 | 				regexpStr = `^` + regexp.QuoteMeta(attrVal.String()) + `$`
227 | 			case '*':
228 | 				regexpStr = regexp.QuoteMeta(attrVal.String())
229 | 			case '$':
230 | 				regexpStr = regexp.QuoteMeta(attrVal.String()) + `$`
231 | 			case '^':
232 | 				regexpStr = `^` + regexp.QuoteMeta(attrVal.String())
233 | 			case '~':
234 | 				regexpStr = `(\A|\s)` + regexp.QuoteMeta(attrVal.String()) + `(\s|\z)`
235 | 			}
236 | 			selector.Attrs[attrKey.String()] = regexp.MustCompile(regexpStr)
237 | 		} else {
238 | 			selector.Attrs[attrKey.String()] = regexp.MustCompile(`^.*$`)
239 | 		}
240 | 	}()
241 | 	// After reaching ']' proceed
242 | 	proceed := func() error {
243 | 		switch s.Next() {
244 | 		case scanner.EOF:
245 | 			return nil
246 | 		case '.':
247 | 			return ParseClassMatcher(selector, s)
248 | 		case '#':
249 | 			return ParseIdMatcher(selector, s)
250 | 		case '[':
251 | 			return ParseAttrMatcher(selector, s)
252 | 		case ':':
253 | 			return ParsePseudo(selector, s)
254 | 		default:
255 | 			return fmt.Errorf("Expected selector indicator after ']'")
256 | 		}
257 | 	}
258 | 	// Parse the attribute key matcher
259 | 	for !hasMatchVal {
260 | 		c := s.Next()
261 | 		switch c {
262 | 		case scanner.EOF:
263 | 			return fmt.Errorf("Unmatched open brace '['")
264 | 		case ']':
265 | 			// No attribute value matcher, proceed!
266 | 			return proceed()
267 | 		case '$', '^', '~', '*':
268 | 			matchType = c
269 | 			hasMatchVal = true
270 | 			if s.Next() != '=' {
271 | 				return fmt.Errorf("'%c' must be followed by a '='", matchType)
272 | 			}
273 | 		case '=':
274 | 			matchType = c
275 | 			hasMatchVal = true
276 | 		default:
277 | 			if _, err := attrKey.WriteRune(c); err != nil {
278 | 				return err
279 | 			}
280 | 		}
281 | 	}
282 | 	// figure out if the value is quoted
283 | 	c := s.Next()
284 | 	inQuote := false
285 | 	switch c {
286 | 	case scanner.EOF:
287 | 		return fmt.Errorf("Unmatched open brace '['")
288 | 	case ']':
289 | 		return proceed()
290 | 	case '"':
291 | 		inQuote = true
292 | 	default:
293 | 		if _, err := attrVal.WriteRune(c); err != nil {
294 | 			return err
295 | 		}
296 | 	}
297 | 	if inQuote {
298 | 		for {
299 | 			c := s.Next()
300 | 			switch c {
301 | 			case '\\':
302 | 				// consume another character
303 | 				if c = s.Next(); c == scanner.EOF {
304 | 					return fmt.Errorf("Unmatched open brace '['")
305 | 				}
306 | 			case '"':
307 | 				switch s.Next() {
308 | 				case ']':
309 | 					return proceed()
310 | 				default:
311 | 					return fmt.Errorf("Quote must end at ']'")
312 | 				}
313 | 			}
314 | 			if _, err := attrVal.WriteRune(c); err != nil {
315 | 				return err
316 | 			}
317 | 		}
318 | 	} else {
319 | 		for {
320 | 			c := s.Next()
321 | 			switch c {
322 | 			case scanner.EOF:
323 | 				return fmt.Errorf("Unmatched open brace '['")
324 | 			case ']':
325 | 				// No attribute value matcher, proceed!
326 | 				return proceed()
327 | 			}
328 | 			if _, err := attrVal.WriteRune(c); err != nil {
329 | 				return err
330 | 			}
331 | 		}
332 | 	}
333 | }
334 | 
335 | // Parse the selector after ':'
336 | func ParsePseudo(selector *CSSSelector, s scanner.Scanner) error {
337 | 	if selector.Pseudo != nil {
338 | 		return fmt.Errorf("Combined multiple pseudo classes")
339 | 	}
340 | 	var b bytes.Buffer
341 | 	for s.Peek() != scanner.EOF {
342 | 		if _, err := b.WriteRune(s.Next()); err != nil {
343 | 			return err
344 | 		}
345 | 	}
346 | 	cmd := b.String()
347 | 	var err error
348 | 	switch {
349 | 	case cmd == "empty":
350 | 		selector.Pseudo = func(n *html.Node) bool {
351 | 			return n.FirstChild == nil
352 | 		}
353 | 	case cmd == "first-child":
354 | 		selector.Pseudo = firstChildPseudo
355 | 	case cmd == "last-child":
356 | 		selector.Pseudo = lastChildPseudo
357 | 	case cmd == "only-child":
358 | 		selector.Pseudo = func(n *html.Node) bool {
359 | 			return firstChildPseudo(n) && lastChildPseudo(n)
360 | 		}
361 | 	case cmd == "first-of-type":
362 | 		selector.Pseudo = firstOfTypePseudo
363 | 	case cmd == "last-of-type":
364 | 		selector.Pseudo = lastOfTypePseudo
365 | 	case cmd == "only-of-type":
366 | 		selector.Pseudo = func(n *html.Node) bool {
367 | 			return firstOfTypePseudo(n) && lastOfTypePseudo(n)
368 | 		}
369 | 	case strings.HasPrefix(cmd, "contains("):
370 | 		selector.Pseudo, err = parseContainsPseudo(cmd[len("contains("):])
371 | 		if err != nil {
372 | 			return err
373 | 		}
374 | 	case strings.HasPrefix(cmd, "nth-child("),
375 | 		strings.HasPrefix(cmd, "nth-last-child("),
376 | 		strings.HasPrefix(cmd, "nth-last-of-type("),
377 | 		strings.HasPrefix(cmd, "nth-of-type("):
378 | 		if selector.Pseudo, err = parseNthPseudo(cmd); err != nil {
379 | 			return err
380 | 		}
381 | 	case strings.HasPrefix(cmd, "not("):
382 | 		if selector.Pseudo, err = parseNotPseudo(cmd[len("not("):]); err != nil {
383 | 			return err
384 | 		}
385 | 	case strings.HasPrefix(cmd, "parent-of("):
386 | 		if selector.Pseudo, err = parseParentOfPseudo(cmd[len("parent-of("):]); err != nil {
387 | 			return err
388 | 		}
389 | 	default:
390 | 		return fmt.Errorf("%s not a valid pseudo class", cmd)
391 | 	}
392 | 	return nil
393 | }
394 | 
395 | // :first-of-child
396 | func firstChildPseudo(n *html.Node) bool {
397 | 	for c := n.PrevSibling; c != nil; c = c.PrevSibling {
398 | 		if c.Type == html.ElementNode {
399 | 			return false
400 | 		}
401 | 	}
402 | 	return true
403 | }
404 | 
405 | // :last-of-child
406 | func lastChildPseudo(n *html.Node) bool {
407 | 	for c := n.NextSibling; c != nil; c = c.NextSibling {
408 | 		if c.Type == html.ElementNode {
409 | 			return false
410 | 		}
411 | 	}
412 | 	return true
413 | }
414 | 
415 | // :first-of-type
416 | func firstOfTypePseudo(node *html.Node) bool {
417 | 	if node.Type != html.ElementNode {
418 | 		return false
419 | 	}
420 | 	for n := node.PrevSibling; n != nil; n = n.PrevSibling {
421 | 		if n.DataAtom == node.DataAtom {
422 | 			return false
423 | 		}
424 | 	}
425 | 	return true
426 | }
427 | 
428 | // :last-of-type
429 | func lastOfTypePseudo(node *html.Node) bool {
430 | 	if node.Type != html.ElementNode {
431 | 		return false
432 | 	}
433 | 	for n := node.NextSibling; n != nil; n = n.NextSibling {
434 | 		if n.DataAtom == node.DataAtom {
435 | 			return false
436 | 		}
437 | 	}
438 | 	return true
439 | }
440 | 
441 | func parseNthPseudo(cmd string) (PseudoClass, error) {
442 | 	i := strings.IndexRune(cmd, '(')
443 | 	if i < 0 {
444 | 		// really, we should never get here
445 | 		return nil, fmt.Errorf("Fatal error, '%s' does not contain a '('", cmd)
446 | 	}
447 | 	pseudoName := cmd[:i]
448 | 	// Figure out how the counting function works
449 | 	var countNth func(*html.Node) int
450 | 	switch pseudoName {
451 | 	case "nth-child":
452 | 		countNth = func(n *html.Node) int {
453 | 			nth := 1
454 | 			for sib := n.PrevSibling; sib != nil; sib = sib.PrevSibling {
455 | 				if sib.Type == html.ElementNode {
456 | 					nth++
457 | 				}
458 | 			}
459 | 			return nth
460 | 		}
461 | 	case "nth-of-type":
462 | 		countNth = func(n *html.Node) int {
463 | 			nth := 1
464 | 			for sib := n.PrevSibling; sib != nil; sib = sib.PrevSibling {
465 | 				if sib.Type == html.ElementNode && sib.DataAtom == n.DataAtom {
466 | 					nth++
467 | 				}
468 | 			}
469 | 			return nth
470 | 		}
471 | 	case "nth-last-child":
472 | 		countNth = func(n *html.Node) int {
473 | 			nth := 1
474 | 			for sib := n.NextSibling; sib != nil; sib = sib.NextSibling {
475 | 				if sib.Type == html.ElementNode {
476 | 					nth++
477 | 				}
478 | 			}
479 | 			return nth
480 | 		}
481 | 	case "nth-last-of-type":
482 | 		countNth = func(n *html.Node) int {
483 | 			nth := 1
484 | 			for sib := n.NextSibling; sib != nil; sib = sib.NextSibling {
485 | 				if sib.Type == html.ElementNode && sib.DataAtom == n.DataAtom {
486 | 					nth++
487 | 				}
488 | 			}
489 | 			return nth
490 | 		}
491 | 	default:
492 | 		return nil, fmt.Errorf("Unrecognized pseudo '%s'", pseudoName)
493 | 	}
494 | 
495 | 	nthString := cmd[i+1:]
496 | 	i = strings.IndexRune(nthString, ')')
497 | 	if i < 0 {
498 | 		return nil, fmt.Errorf("Unmatched '(' for pseudo class %s", pseudoName)
499 | 	} else if i != len(nthString)-1 {
500 | 		return nil, fmt.Errorf("%s(n) must end selector", pseudoName)
501 | 	}
502 | 	number := nthString[:i]
503 | 
504 | 	// Check if the number is 'odd' or 'even'
505 | 	oddOrEven := -1
506 | 	switch number {
507 | 	case "odd":
508 | 		oddOrEven = 1
509 | 	case "even":
510 | 		oddOrEven = 0
511 | 	}
512 | 	if oddOrEven > -1 {
513 | 		return func(n *html.Node) bool {
514 | 			return n.Type == html.ElementNode && countNth(n)%2 == oddOrEven
515 | 		}, nil
516 | 	}
517 | 	// Check against '3n+4' pattern
518 | 	r := regexp.MustCompile(`([0-9]+)n[ ]?\+[ ]?([0-9])`)
519 | 	subMatch := r.FindAllStringSubmatch(number, -1)
520 | 	if len(subMatch) == 1 && len(subMatch[0]) == 3 {
521 | 		cycle, _ := strconv.Atoi(subMatch[0][1])
522 | 		offset, _ := strconv.Atoi(subMatch[0][2])
523 | 		return func(n *html.Node) bool {
524 | 			return n.Type == html.ElementNode && countNth(n)%cycle == offset
525 | 		}, nil
526 | 	}
527 | 	// check against 'n+2' pattern
528 | 	r = regexp.MustCompile(`n[ ]?\+[ ]?([0-9])`)
529 | 	subMatch = r.FindAllStringSubmatch(number, -1)
530 | 	if len(subMatch) == 1 && len(subMatch[0]) == 2 {
531 | 		offset, _ := strconv.Atoi(subMatch[0][1])
532 | 		return func(n *html.Node) bool {
533 | 			return n.Type == html.ElementNode && countNth(n) >= offset
534 | 		}, nil
535 | 	}
536 | 	// the only other option is a numeric value
537 | 	nth, err := strconv.Atoi(nthString[:i])
538 | 	if err != nil {
539 | 		return nil, err
540 | 	} else if nth <= 0 {
541 | 		return nil, fmt.Errorf("Argument to '%s' must be greater than 0", pseudoName)
542 | 	}
543 | 	return func(n *html.Node) bool {
544 | 		return n.Type == html.ElementNode && countNth(n) == nth
545 | 	}, nil
546 | }
547 | 
548 | // Parse a :contains("") selector
549 | // expects the input to be everything after the open parenthesis
550 | // e.g. for `contains("Help")` the argument would be `"Help")`
551 | func parseContainsPseudo(cmd string) (PseudoClass, error) {
552 | 	var s scanner.Scanner
553 | 	s.Init(strings.NewReader(cmd))
554 | 	switch s.Next() {
555 | 	case '"':
556 | 	default:
557 | 		return nil, fmt.Errorf("Malformed 'contains(\"\")' selector")
558 | 	}
559 | 	textToContain := bytes.NewBuffer([]byte{})
560 | 	for {
561 | 		r := s.Next()
562 | 		switch r {
563 | 		case '"':
564 | 			// ')' then EOF must follow '"'
565 | 			if s.Next() != ')' {
566 | 				return nil, fmt.Errorf("Malformed 'contains(\"\")' selector")
567 | 			}
568 | 			if s.Next() != scanner.EOF {
569 | 				return nil, fmt.Errorf("'contains(\"\")' must end selector")
570 | 			}
571 | 			text := textToContain.String()
572 | 			contains := func(node *html.Node) bool {
573 | 				for c := node.FirstChild; c != nil; c = c.NextSibling {
574 | 					if c.Type == html.TextNode {
575 | 						if strings.Contains(c.Data, text) {
576 | 							return true
577 | 						}
578 | 					}
579 | 				}
580 | 				return false
581 | 			}
582 | 			return contains, nil
583 | 		case '\\':
584 | 			s.Next()
585 | 		case scanner.EOF:
586 | 			return nil, fmt.Errorf("Malformed 'contains(\"\")' selector")
587 | 		default:
588 | 			if _, err := textToContain.WriteRune(r); err != nil {
589 | 				return nil, err
590 | 			}
591 | 		}
592 | 	}
593 | }
594 | 
595 | // Parse a :not(selector) selector
596 | // expects the input to be everything after the open parenthesis
597 | // e.g. for `not(div#id)` the argument would be `div#id)`
598 | func parseNotPseudo(cmd string) (PseudoClass, error) {
599 | 	if len(cmd) < 2 {
600 | 		return nil, fmt.Errorf("malformed ':not' selector")
601 | 	}
602 | 	endQuote, cmd := cmd[len(cmd)-1], cmd[:len(cmd)-1]
603 | 	selector, err := ParseSelector(cmd)
604 | 	if err != nil {
605 | 		return nil, err
606 | 	}
607 | 	if endQuote != ')' {
608 | 		return nil, fmt.Errorf("unmatched '('")
609 | 	}
610 | 	return func(n *html.Node) bool {
611 | 		return !selector.Match(n)
612 | 	}, nil
613 | }
614 | 
615 | // Parse a :parent-of(selector) selector
616 | // expects the input to be everything after the open parenthesis
617 | // e.g. for `parent-of(div#id)` the argument would be `div#id)`
618 | func parseParentOfPseudo(cmd string) (PseudoClass, error) {
619 | 	if len(cmd) < 2 {
620 | 		return nil, fmt.Errorf("malformed ':parent-of' selector")
621 | 	}
622 | 	endQuote, cmd := cmd[len(cmd)-1], cmd[:len(cmd)-1]
623 | 	selector, err := ParseSelector(cmd)
624 | 	if err != nil {
625 | 		return nil, err
626 | 	}
627 | 	if endQuote != ')' {
628 | 		return nil, fmt.Errorf("unmatched '('")
629 | 	}
630 | 	return func(n *html.Node) bool {
631 | 		for c := n.FirstChild; c != nil; c = c.NextSibling {
632 | 			if c.Type == html.ElementNode && selector.Match(c) {
633 | 				return true
634 | 			}
635 | 		}
636 | 		return false
637 | 	}, nil
638 | }
639 | 


--------------------------------------------------------------------------------
/run:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | MOVIES_PKG=$PWD/vendor
3 | export GOPATH=$MOVIES_PKG
4 | go build
5 | awesome-movies $@
6 | 


--------------------------------------------------------------------------------
/tamilmv.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | 
 4 | import (
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"net/http"
 8 | //	"net/url"
 9 | 	"encoding/json"
10 | 	"strconv"
11 | 
12 | 	"./pup"
13 | 	"github.com/fatih/color"
14 | )
15 | type TamilMV struct {
16 | 	SearchResults []struct {
17 | 		Href string `json:"href"`
18 | 		Text string `json:"text"`
19 | 	}
20 | 
21 | 	DownloadResults []struct {
22 | 		Href string `json:"href"`
23 | 	}
24 | 
25 | }
26 | 
27 | var (
28 | 	searchURL string = "https://www.tamilmv.cz/index.php?/search/"
29 | 	TAMILMV = TamilMV{}
30 | )
31 | 
32 | func SearchTamilMovies(query string){
33 | 	//resp,err := http.PostForm(searchURL,url.Values{"type":{"all"},"q":{query}})
34 | 	resp,err := http.Get(searchURL+"&q="+UrlEncoded(query)+"&nodes=1,2,3")
35 | 	if err != nil {
36 | 		onHttpError(err)
37 | 	}
38 | 	os.Args = []string{
39 | 		"anbuksv",
40 | 		"li.ipsStreamItem > div > div > div > h2 > div > a json{}",
41 | 	}
42 | 	searchResultNodes := pup.Run(resp.Body)
43 | 	json.Unmarshal(searchResultNodes,&TAMILMV.SearchResults)
44 | 	moviesResultCheck(len(TAMILMV.SearchResults))
45 | 	fmt.Printf("%s",listTamilMVMovies())
46 | 	downloadIndex := getConformation(len(TAMILMV.SearchResults))
47 | 	torrentHtml := downloadHTML(TAMILMV.SearchResults[downloadIndex].Href)
48 | 	os.Args = []string{
49 | 		"anbuksv",
50 | 		".ipsAttachLink json{}",
51 | 	}
52 | 	torrentHtmlNode := pup.Run(torrentHtml)
53 | 	json.Unmarshal(torrentHtmlNode,&TAMILMV.DownloadResults)
54 | 	color.Set(color.FgYellow, color.Bold)
55 | 	if len(TAMILMV.DownloadResults) > 0 {
56 | 		fmt.Println(TAMILMV.DownloadResults[0].Href)
57 | 	} else {
58 | 		fmt.Println("Sorry,We are unable to process your request")
59 | 	}
60 | 	color.Unset()
61 | }
62 | 
63 | func listTamilMVMovies() string {
64 | 	var _movies string = ""
65 | 	for index,movie := range(TAMILMV.SearchResults){
66 | 		_movies = _movies + strconv.Itoa(index+1) + ". " + movie.Text+ "\n"
67 | 	}
68 | 	return _movies
69 | }
70 | 


--------------------------------------------------------------------------------