├── .gitignore ├── go.mod ├── .gitattributes ├── Makefile ├── README.md ├── main.go └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/bebiksior/subwords 2 | 3 | go 1.24 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build clean 2 | 3 | build: 4 | go build -o bin/subwords 5 | 6 | clean: 7 | rm -f bin/subwords 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # subwords 2 | A simple tool to extract words from a list of subdomains, sort them by frequency, and output them. 3 | 4 | ## Installation 5 | 6 | ```bash 7 | go install github.com/bebiksior/subwords@latest 8 | ``` 9 | 10 | ## Usage 11 | 12 | ```bash 13 | # Read from stdin 14 | cat subdomains.txt | subwords 15 | 16 | # Read from file 17 | subwords -i subdomains.txt 18 | 19 | # Limit output to top N most frequent words 20 | subwords -i subdomains.txt -limit 10 21 | 22 | # Include statistics in output 23 | subwords -i subdomains.txt -stats 24 | ``` 25 | 26 | The tool splits subdomains by dots (.), hyphens (-), and underscores (_), counts the frequency of each word, and outputs them sorted by frequency. 27 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "os" 8 | "sort" 9 | "strings" 10 | ) 11 | 12 | func readInput(inputFile string) ([]string, error) { 13 | var lines []string 14 | 15 | if inputFile != "" { 16 | f, err := os.Open(inputFile) 17 | if err != nil { 18 | return nil, err 19 | } 20 | defer f.Close() 21 | scanner := bufio.NewScanner(f) 22 | for scanner.Scan() { 23 | lines = append(lines, scanner.Text()) 24 | } 25 | if err := scanner.Err(); err != nil { 26 | return nil, err 27 | } 28 | } else { 29 | scanner := bufio.NewScanner(os.Stdin) 30 | for scanner.Scan() { 31 | lines = append(lines, scanner.Text()) 32 | } 33 | if err := scanner.Err(); err != nil { 34 | return nil, err 35 | } 36 | } 37 | 38 | return lines, nil 39 | } 40 | 41 | func parseSubdomain(subdomain string) []string { 42 | var result []string 43 | parts := strings.Split(subdomain, ".") 44 | for _, part := range parts { 45 | result = append(result, part) 46 | 47 | if strings.Contains(part, "-") { 48 | subParts := strings.Split(part, "-") 49 | result = append(result, subParts...) 50 | 51 | if strings.Contains(part, "_") { 52 | subParts := strings.Split(part, "_") 53 | result = append(result, subParts...) 54 | } 55 | } 56 | } 57 | return result 58 | } 59 | 60 | type wordCount struct { 61 | word string 62 | count int 63 | } 64 | 65 | func processWords(words []string, limit int) []wordCount { 66 | counts := make(map[string]int) 67 | for _, w := range words { 68 | if w != "" { 69 | counts[w]++ 70 | } 71 | } 72 | 73 | var result []wordCount 74 | for word, count := range counts { 75 | result = append(result, wordCount{word: word, count: count}) 76 | } 77 | 78 | sort.Slice(result, func(i, j int) bool { 79 | if result[i].count == result[j].count { 80 | return result[i].word < result[j].word 81 | } 82 | return result[i].count > result[j].count 83 | }) 84 | 85 | if limit > 0 && limit < len(result) { 86 | result = result[:limit] 87 | } 88 | 89 | return result 90 | } 91 | 92 | func main() { 93 | inputFile := flag.String("i", "", "input file with subdomains") 94 | limit := flag.Int("limit", 0, "limit output to top N most frequent words (0 for all)") 95 | stats := flag.Bool("stats", false, "include statistics in output") 96 | flag.Parse() 97 | 98 | lines, err := readInput(*inputFile) 99 | if err != nil { 100 | fmt.Fprintf(os.Stderr, "Error reading input: %v\n", err) 101 | os.Exit(1) 102 | } 103 | 104 | var allWords []string 105 | for _, l := range lines { 106 | allWords = append(allWords, parseSubdomain(l)...) 107 | } 108 | 109 | wordCounts := processWords(allWords, *limit) 110 | 111 | for _, wc := range wordCounts { 112 | if *stats { 113 | fmt.Printf("%s (%d occurrences)\n", wc.word, wc.count) 114 | } else { 115 | fmt.Println(wc.word) 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | --------------------------------------------------------------------------------