├── LICENSE ├── README.md ├── extractor └── extractor.go ├── go.mod ├── go.sum ├── main.go └── runner ├── objects.go └── runner.go /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 003random 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

JavaScript Extraction CLI & Package

3 |

4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

14 | 15 | 16 | This is a powerful tool for extracting JavaScript sources from URLs and web pages / HTTP responses. It offers a command-line interface (CLI) for straightforward URL processing and a package interface for custom integrations, making it ideal for pentesters, bug bounty hunters, and developers needing to extract JS sources efficiently. 17 | ## Table of Contents 18 | 19 | - [Installation](#installation) 20 | - [CLI Usage](#cli-usage) 21 | - [Options](#options) 22 | - [Examples](#examples) 23 | - [Package Usage](#package-usage) 24 | - [Importing the Extractor](#importing-the-extractor) 25 | - [Example](#example) 26 | - [Version Information](#version-information) 27 | - [Contributing](#contributing) 28 | - [License](#license) 29 | 30 | ## Installation 31 | 32 | To install `getJS`, use the following command: 33 | 34 | `go install github.com/003random/getJS/v2@latest` 35 | 36 | ## CLI Usage 37 | 38 | ### Options 39 | 40 | `getJS` provides several command-line options to customize its behavior: 41 | 42 | - `-url string`: The URL from which JavaScript sources should be extracted. 43 | - `-input string`: Optional URLs input files. Each URL should be on a new line in plain text format. Can be used multiple times. 44 | - `-output string`: Optional output file where results are written to. Can be used multiple times. 45 | - `-complete`: Complete/Autofill relative URLs by adding the current origin. 46 | - `-resolve`: Resolve the JavaScript files. Can only be used in combination with `--complete`. 47 | - `-threads int`: The number of processing threads to spawn (default: 2). 48 | - `-verbose`: Print verbose runtime information and errors. 49 | - `-method string`: The request method used to fetch remote contents (default: "GET"). 50 | - `-header string`: Optional request headers to add to the requests. Can be used multiple times. 51 | - `-timeout duration`: The request timeout while fetching remote contents (default: 5s). 52 | 53 | ### Examples 54 | 55 | #### Extracting JavaScript from a Single URL 56 | 57 | `getJS -url https://destroy.ai` 58 | 59 | or 60 | 61 | `curl https://destroy.ai | getJS` 62 | 63 | #### Using Custom Request Options 64 | 65 | `getJS -url "http://example.com" -header "User-Agent: foo bar" -method POST --timeout=15s` 66 | 67 | #### Processing Multiple URLs from a File 68 | 69 | `getJS -input foo.txt -input bar.txt` 70 | 71 | #### Saving Results to an Output File 72 | 73 | `getJS -url "http://example.com" -output results.txt` 74 | 75 | ## Package Usage 76 | 77 | ### Importing the Extractor 78 | 79 | To use `getJS` as a package, you need to import the `extractor` package and utilize its functions directly. 80 | 81 | ### Example 82 | 83 | ```Go 84 | package main 85 | 86 | import ( 87 | "fmt" 88 | "log" 89 | "net/http" 90 | "net/url" 91 | 92 | "github.com/003random/getJS/extractor" 93 | ) 94 | 95 | func main() { 96 | baseURL, err := url.Parse("https://google.com") 97 | if (err != nil) { 98 | log.Fatalf("Error parsing base URL: %v", err) 99 | } 100 | 101 | resp, err := extractor.FetchResponse(baseURL.String(), "GET", http.Header{}) 102 | if (err != nil) { 103 | log.Fatalf("Error fetching response: %v", err) 104 | } 105 | defer resp.Body.Close() 106 | 107 | // Custom extraction points (optional). 108 | extractionPoints := map[string][]string{ 109 | "script": {"src", "data-src"}, 110 | "a": {"href"}, 111 | } 112 | 113 | sources, err := extractor.ExtractSources(resp.Body, extractionPoints) 114 | if (err != nil) { 115 | log.Fatalf("Error extracting sources: %v", err) 116 | } 117 | 118 | // Filtering and extending extracted sources. 119 | filtered, err := extractor.Filter(sources, extractor.WithComplete(baseURL), extractor.WithResolve()) 120 | if (err != nil) { 121 | log.Fatalf("Error filtering sources: %v", err) 122 | } 123 | 124 | for source := range filtered { 125 | fmt.Println(source.String()) 126 | } 127 | } 128 | ``` 129 | 130 | ## Version Information 131 | 132 | This is the v2 version of `getJS`. The original version can be found under the tag [v1](https://github.com/003random/getJS/tree/v1). 133 | 134 | ## Contributing 135 | 136 | Contributions are welcome! Please open an issue or submit a pull request for any bugs, feature requests, or improvements. 137 | 138 | ## License 139 | 140 | This project is licensed under the MIT License. See the [LICENSE](https://github.com/003random/getJS/blob/master/LICENSE) file for details. 141 | -------------------------------------------------------------------------------- /extractor/extractor.go: -------------------------------------------------------------------------------- 1 | package extractor 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/http" 8 | "net/url" 9 | 10 | "github.com/PuerkitoBio/goquery" 11 | ) 12 | 13 | // ExtractionPoints defines the default HTML tags and their attributes from which JavaScript sources are extracted. 14 | var ExtractionPoints = map[string][]string{ 15 | "script": {"src", "data-src"}, 16 | } 17 | 18 | // FetchResponse fetches the HTTP response for the given URL. 19 | func FetchResponse(u string, method string, headers http.Header) (*http.Response, error) { 20 | req, err := http.NewRequest(method, u, nil) 21 | if err != nil { 22 | return nil, err 23 | } 24 | 25 | req.Header = headers 26 | 27 | return http.DefaultClient.Do(req) 28 | } 29 | 30 | // ExtractSources extracts all JavaScript sources found in the provided HTTP response reader. 31 | // The optional extractionPoints can be used to overwrite the default extraction points map 32 | // with a set of HTML tag names, together with a list of what attributes to extract from. 33 | func ExtractSources(input io.Reader, extractionPoints ...map[string][]string) (<-chan url.URL, error) { 34 | doc, err := goquery.NewDocumentFromReader(input) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | var ( 40 | urls = make(chan url.URL) 41 | points = ExtractionPoints 42 | ) 43 | 44 | if len(extractionPoints) > 0 { 45 | points = extractionPoints[0] 46 | } 47 | 48 | go func() { 49 | defer close(urls) 50 | for tag, attributes := range points { 51 | doc.Find(tag).Each(func(i int, s *goquery.Selection) { 52 | for _, a := range attributes { 53 | if value, exists := s.Attr(a); exists { 54 | u, err := url.Parse(value) 55 | if err != nil { 56 | log.Println(fmt.Errorf("invalid attribute value %s cannot be parsed to a URL: %w", value, err)) 57 | continue 58 | } 59 | 60 | urls <- *u 61 | } 62 | } 63 | }) 64 | } 65 | }() 66 | 67 | return urls, nil 68 | } 69 | 70 | // Filter applies options to filter URLs from the input channel. 71 | func Filter(input <-chan url.URL, options ...func([]url.URL) []url.URL) (<-chan url.URL, error) { 72 | output := make(chan url.URL) 73 | go func() { 74 | defer close(output) 75 | var urls []url.URL 76 | for u := range input { 77 | urls = append(urls, u) 78 | } 79 | 80 | for _, option := range options { 81 | urls = option(urls) 82 | } 83 | 84 | for _, u := range urls { 85 | output <- u 86 | } 87 | }() 88 | return output, nil 89 | } 90 | 91 | // WithComplete is an option to complete relative URLs. 92 | func WithComplete(base *url.URL) func([]url.URL) []url.URL { 93 | return func(urls []url.URL) []url.URL { 94 | var result []url.URL 95 | for _, u := range urls { 96 | result = append(result, complete(u, base)) 97 | } 98 | return result 99 | } 100 | } 101 | 102 | // WithResolve is an option to filter URLs that resolve successfully. 103 | func WithResolve() func([]url.URL) []url.URL { 104 | return func(urls []url.URL) []url.URL { 105 | var result []url.URL 106 | for _, u := range urls { 107 | if resolve(u) { 108 | result = append(result, u) 109 | } 110 | } 111 | return result 112 | } 113 | } 114 | 115 | // complete completes relative URLs by adding the base URL. 116 | func complete(source url.URL, base *url.URL) url.URL { 117 | if source.IsAbs() { 118 | return source 119 | } 120 | return *base.ResolveReference(&source) 121 | } 122 | 123 | // resolve checks if the provided URL resolves successfully. 124 | func resolve(source url.URL) bool { 125 | resp, err := http.Get(source.String()) 126 | if err != nil { 127 | return false 128 | } 129 | defer resp.Body.Close() 130 | 131 | _, err = io.Copy(io.Discard, resp.Body) 132 | return err == nil && (resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) 133 | } 134 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/003random/getJS/v2 2 | 3 | go 1.22 4 | 5 | require github.com/PuerkitoBio/goquery v1.8.1 6 | 7 | require ( 8 | github.com/andybalholm/cascadia v1.3.1 // indirect 9 | golang.org/x/net v0.7.0 // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= 2 | github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= 3 | github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= 4 | github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= 5 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 6 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 7 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 8 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 9 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 10 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 11 | golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 12 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 13 | golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= 14 | golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 15 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 16 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 17 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 18 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 19 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 20 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 21 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 22 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 23 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 24 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 25 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 26 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= 27 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 28 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 29 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 30 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 31 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 32 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 33 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 34 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 35 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 36 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "net/http" 10 | "os" 11 | "strings" 12 | "time" 13 | 14 | "github.com/003random/getJS/v2/runner" 15 | ) 16 | 17 | func main() { 18 | options, err := setup() 19 | if err != nil { 20 | log.Fatal(fmt.Errorf("parsing flags: %w", err)) 21 | } 22 | 23 | if err := runner.New(options).Run(); err != nil { 24 | log.Fatal(err) 25 | } 26 | } 27 | 28 | func setup() (options *runner.Options, err error) { 29 | options = &runner.Options{} 30 | 31 | flag.StringVar(&options.Request.Method, "method", "GET", "The request method that should be used to make fetch the remote contents.") 32 | flag.DurationVar(&options.Request.Timeout, "timeout", 5*time.Second, "The request timeout used while fetching the remote contents.") 33 | flag.BoolVar(&options.Complete, "complete", false, "Complete/Autofil relative URLs by adding the current origin.") 34 | flag.BoolVar(&options.Resolve, "resolve", false, "Resolve the JavaScript files. Can only be used in combination with '--resolve'. Unresolvable hosts are not included in the results.") 35 | flag.IntVar(&options.Threads, "threads", 2, "The amount of processing threads to spawn.") 36 | flag.BoolVar(&options.Verbose, "verbose", false, "Print verbose runtime information and errors.") 37 | 38 | var ( 39 | url string 40 | input arrayFlags 41 | output arrayFlags 42 | header arrayFlags 43 | ) 44 | 45 | flag.Var(&header, "header", "The optional request headers to add to the requests. This flag can be used multiple times with a new header each time.") 46 | flag.StringVar(&url, "url", "", "The URL where the JavaScript sources should be extracted from.") 47 | flag.Var(&input, "input", "The optional URLs input files. Each URL should be on a new line in plain text format. This flag can be used multiple times with different files.") 48 | flag.Var(&output, "output", "The optional output file where the results are written to.") 49 | 50 | flag.Parse() 51 | 52 | options.Request.Headers = headers(header) 53 | 54 | options.Inputs = inputs(input) 55 | options.Outputs = outputs(output) 56 | 57 | // Add an input for the single URL option, if set. 58 | if len(url) > 0 { 59 | options.Inputs = append(options.Inputs, runner.Input{ 60 | Type: runner.InputURL, 61 | Data: strings.NewReader(url), 62 | }) 63 | } 64 | 65 | stat, err := os.Stdin.Stat() 66 | if err != nil { 67 | log.Fatal(fmt.Errorf("error reading stdin: %v", err)) 68 | } 69 | 70 | if (stat.Mode() & os.ModeCharDevice) == 0 { 71 | // Read the first line of stdin to detect its format 72 | reader := bufio.NewReader(os.Stdin) 73 | firstLine, err := reader.ReadString('\n') 74 | if err != nil && err != io.EOF { 75 | log.Fatal(fmt.Errorf("error reading first line of stdin: %v", err)) 76 | } 77 | 78 | if isURL(strings.TrimSpace(firstLine)) { 79 | // Treat as URL input. 80 | options.Inputs = append(options.Inputs, runner.Input{ 81 | Type: runner.InputURL, 82 | Data: io.MultiReader(strings.NewReader(firstLine), reader), 83 | }) 84 | } else { 85 | // Treat as HTTP response body. 86 | options.Inputs = append(options.Inputs, runner.Input{ 87 | Type: runner.InputResponse, 88 | Data: io.MultiReader(strings.NewReader(firstLine), reader), 89 | }) 90 | } 91 | } 92 | 93 | return 94 | } 95 | 96 | func isURL(str string) bool { 97 | return strings.HasPrefix(str, "http://") || strings.HasPrefix(str, "https://") 98 | } 99 | 100 | func outputs(names []string) []io.Writer { 101 | outputs := append([]io.Writer{}, os.Stdout) 102 | 103 | for _, n := range names { 104 | file, err := os.OpenFile(n, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 105 | if err != nil { 106 | log.Fatal(fmt.Errorf("error parsing output file flag: %v", err)) 107 | } 108 | 109 | outputs = append(outputs, file) 110 | } 111 | 112 | return outputs 113 | } 114 | 115 | func inputs(names []string) []runner.Input { 116 | inputs := []runner.Input{} 117 | 118 | for _, n := range names { 119 | file, err := os.Open(n) 120 | if err != nil { 121 | log.Fatal(fmt.Errorf("error reading from file %s: %v", n, err)) 122 | } 123 | 124 | inputs = append(inputs, runner.Input{Type: runner.InputURL, Data: file}) 125 | } 126 | 127 | return inputs 128 | } 129 | 130 | func headers(args []string) http.Header { 131 | headers := make(http.Header) 132 | for _, s := range args { 133 | parts := strings.Split(s, ":") 134 | if len(parts) <= 1 { 135 | log.Fatal(fmt.Errorf("invalid header %s", s)) 136 | } 137 | 138 | headers[strings.TrimSpace(parts[0])] = []string{strings.TrimSpace(strings.Join(parts[1:], ":"))} 139 | } 140 | 141 | return headers 142 | } 143 | 144 | type arrayFlags []string 145 | 146 | func (a *arrayFlags) Set(value string) error { 147 | *a = append(*a, value) 148 | return nil 149 | } 150 | 151 | func (a *arrayFlags) String() string { 152 | return strings.Join(*a, ",") 153 | } 154 | -------------------------------------------------------------------------------- /runner/objects.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | "net/url" 7 | "time" 8 | ) 9 | 10 | // Input represents an input source for getJS. The input format is determined by the `Type` property. 11 | type Input struct { 12 | Type InputType 13 | Data io.Reader 14 | } 15 | 16 | // InputType defines the type of input source for getJS. 17 | type InputType int 18 | 19 | const ( 20 | // InputURL defines the input format to line separated, plain text, URLs. 21 | InputURL InputType = iota 22 | // InputResponse defines the input format to a HTTP response body. 23 | InputResponse 24 | ) 25 | 26 | type runner struct { 27 | Options Options 28 | Results chan url.URL 29 | } 30 | 31 | // Options represents the configuration options for the runner. 32 | type Options struct { 33 | Request struct { 34 | Method string 35 | Headers http.Header 36 | InsecureSkipVerify bool 37 | Timeout time.Duration 38 | } 39 | 40 | Inputs []Input 41 | Outputs []io.Writer 42 | 43 | Complete bool 44 | Resolve bool 45 | 46 | Threads int 47 | 48 | Verbose bool 49 | Colors bool 50 | } 51 | -------------------------------------------------------------------------------- /runner/runner.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "bufio" 5 | "crypto/tls" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "log" 10 | "net/http" 11 | "net/url" 12 | "sync" 13 | 14 | "github.com/003random/getJS/v2/extractor" 15 | ) 16 | 17 | // ExtractionPoints defines the default HTML tags and their attributes from which JavaScript sources are extracted. 18 | var ExtractionPoints = map[string][]string{ 19 | "script": {"src", "data-src"}, 20 | } 21 | 22 | // New creates a new runner with the provided options. 23 | func New(options *Options) *runner { 24 | http.DefaultClient.Transport = &http.Transport{ 25 | TLSHandshakeTimeout: options.Request.Timeout, 26 | TLSClientConfig: &tls.Config{ 27 | InsecureSkipVerify: options.Request.InsecureSkipVerify, 28 | }, 29 | } 30 | http.DefaultClient.Timeout = options.Request.Timeout 31 | 32 | return &runner{ 33 | Options: *options, 34 | Results: make(chan url.URL), 35 | } 36 | } 37 | 38 | // Run starts processing the inputs and extracts JavaScript sources into the runner's Results channel. 39 | func (r *runner) Run() error { 40 | if !r.Options.Verbose { 41 | log.SetOutput(io.Discard) 42 | } 43 | 44 | go func() { 45 | for _, input := range r.Options.Inputs { 46 | switch input.Type { 47 | case InputURL: 48 | r.ProcessURLs(input.Data) 49 | case InputResponse: 50 | r.ProcessResponse(input.Data) 51 | } 52 | 53 | if input, ok := input.Data.(io.Closer); ok { 54 | input.Close() 55 | } 56 | } 57 | 58 | close(r.Results) 59 | }() 60 | 61 | r.listen() 62 | 63 | return nil 64 | } 65 | 66 | func (r *runner) listen() { 67 | for s := range r.Results { 68 | for _, output := range r.Options.Outputs { 69 | _, err := output.Write([]byte(fmt.Sprintf("%s\n", s.String()))) 70 | if err != nil { 71 | log.Println(fmt.Errorf("[error] writing result %s to output: %v", s.String(), err)) 72 | } 73 | } 74 | } 75 | 76 | for _, output := range r.Options.Outputs { 77 | if o, ok := output.(io.Closer); ok { 78 | o.Close() 79 | } 80 | } 81 | } 82 | 83 | // ProcessURLs will fetch the HTTP response for all URLs in the provided reader 84 | // and stream the extracted sources to the runner's Results channel. 85 | func (r *runner) ProcessURLs(data io.Reader) { 86 | var ( 87 | next = Read(data) 88 | wg = sync.WaitGroup{} 89 | 90 | throttle = make(chan struct{}, r.Options.Threads) 91 | ) 92 | 93 | for i := 0; i < r.Options.Threads; i++ { 94 | throttle <- struct{}{} 95 | } 96 | 97 | for { 98 | u, err := next() 99 | if errors.Is(err, io.EOF) { 100 | break 101 | } 102 | if err != nil { 103 | log.Println(fmt.Errorf("[error] parsing url %v: %w", u, err)) 104 | continue 105 | } 106 | 107 | wg.Add(1) 108 | go func(u *url.URL) { 109 | defer func() { 110 | throttle <- struct{}{} 111 | wg.Done() 112 | }() 113 | 114 | resp, err := extractor.FetchResponse(u.String(), r.Options.Request.Method, r.Options.Request.Headers) 115 | if err != nil { 116 | log.Println(fmt.Errorf("[error] fetching response for url %s: %w", u.String(), err)) 117 | return 118 | } 119 | defer resp.Body.Close() 120 | 121 | sources, err := extractor.ExtractSources(resp.Body) 122 | if err != nil { 123 | log.Println(fmt.Errorf("[error] extracting sources from response for url %s: %w", u.String(), err)) 124 | return 125 | } 126 | 127 | filtered, err := extractor.Filter(sources, r.filters(u)...) 128 | if err != nil { 129 | log.Println(fmt.Errorf("[error] filtering sources for url %s: %w", u.String(), err)) 130 | return 131 | } 132 | 133 | for source := range filtered { 134 | r.Results <- source 135 | } 136 | }(u) 137 | 138 | <-throttle 139 | } 140 | 141 | wg.Wait() 142 | } 143 | 144 | // Read is a wrapper around the bufio.Scanner Text() method. 145 | // Upon reading from the input, the line is automatically parsed to a *url.URL. 146 | // An io.EOF error is returned when there are no more lines. 147 | func Read(input io.Reader) func() (*url.URL, error) { 148 | scanner := bufio.NewScanner(input) 149 | return func() (*url.URL, error) { 150 | if !scanner.Scan() { 151 | return nil, io.EOF 152 | } 153 | 154 | return url.Parse(scanner.Text()) 155 | } 156 | } 157 | 158 | func (r *runner) ProcessResponse(data io.Reader) { 159 | sources, err := extractor.ExtractSources(data) 160 | if err != nil { 161 | log.Println(fmt.Errorf("[error] extracting sources from response file: %w", err)) 162 | } 163 | 164 | filtered, err := extractor.Filter(sources, r.filters(nil)...) 165 | if err != nil { 166 | log.Println(fmt.Errorf("[error] filtering sources from response file: %w", err)) 167 | return 168 | } 169 | 170 | for source := range filtered { 171 | r.Results <- source 172 | } 173 | } 174 | 175 | func (r *runner) filters(base *url.URL) (options []func([]url.URL) []url.URL) { 176 | if r.Options.Complete && base != nil { 177 | options = append(options, extractor.WithComplete(base)) 178 | } 179 | 180 | if r.Options.Resolve { 181 | options = append(options, extractor.WithResolve()) 182 | } 183 | 184 | return 185 | } 186 | --------------------------------------------------------------------------------