├── .gitignore ├── LICENSE ├── README.md ├── go.mod ├── go.sum └── html2gmi.go /.gitignore: -------------------------------------------------------------------------------- 1 | .hgignore 2 | .hg/ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 LukeEmmet 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # html2gmi 2 | 3 | A command line application to convert HTML to GMI (Gemini text/gemini), written in Go. Written in Go so it should compile on various common platforms (linux/mac/windows etc). 4 | 5 | A simple wrapper around the Go library https://github.com/LukeEmmet/html2gemini 6 | 7 | ## Usage 8 | 9 | ``` 10 | html2gmi 11 | 12 | -m, --citationMarkers Use footnote style citation markers 13 | -c, --citationStart int Start citations from this index (default 1) 14 | -i, --input string Input path. Otherwise uses stdin 15 | -l, --linkEmitFrequency int Emit gathered links through the document after this number of paragraphs (default 2) 16 | -n, --numberedLinks Number the links 17 | -o, --output string Output path. Otherwise uses stdout 18 | -e, --emitImagesAsLinks Emit links to included images 19 | -t, --prettyTables Pretty tables - works with most simple tables 20 | -v, --version Find out what version of html2gmi you're running 21 | 22 | 23 | ``` 24 | 25 | 26 | ## Remarks 27 | 28 | * linkEmitFrequency - this flag determines the frequency of the output of link lists. For example 1 means any links from the previous paragraph are emitted after each paragraph. 2, would mean they are emitted every two paragraphs and so on. Any gathered links are always emitted before a new heading. 29 | * citationStart - this flag determines the start index of the links. By default this is 1, so the first link is labelled "[1]", but you can set this as required. 30 | * citationMarkers - use a numbered marker in the text to indicate the location of the citation, [1], [2] etc. 31 | * numberedLinks - number the links with a reference number [1], [2] etc. Certain command line Gemini clients may automatically add these, in which case you can omit them. 32 | * emitImagesAsLinks - add a link for every embedded image in addition to its placemarker 33 | * prettyTables - tables will be displayed as preformatted content. Complex tables may not look perfect. Otherwise each table row is a new line. 34 | 35 | You can pipe content in from other applications, for example utilities that download HTML from the web. 36 | 37 | To strip out cruft, use an HTML sanitiser before passing the content to this app. 38 | 39 | ## Building 40 | 41 | If you have Go installed, you can install the latest commit using 42 | 43 | ``` 44 | go env -w GO111MODULE=on 45 | go get github.com/LukeEmmet/html2gmi@master 46 | 47 | ``` 48 | 49 | 50 | # History 51 | 52 | ## 0.2.7 53 | 54 | * escape spaces in urls to allow url to be correctly distinguished from display text in gemini 55 | 56 | ## 0.2.6 57 | 58 | * Use Go modules 59 | * use update html2gemini having fix for bug whereby preformatted regions sometimes missed closing newlines 60 | 61 | ## 0.2.5 62 | 63 | * new -t flag to emit pretty tables (as preformatted content) 64 | * improve table rendering when prettyTables is off 65 | * don't use a border marker for preformatted tables 66 | 67 | ## 0.2.4 68 | 69 | * option to toggle emitting links for embedded images 70 | 71 | ## 0.2.2 72 | 73 | * option to toggle numbering on links 74 | * option to toggle citation markers 75 | 76 | ## 0.2.1 77 | 78 | * public release -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/LukeEmmet/html2gmi 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/LukeEmmet/html2gemini v0.0.0-20201115162526-e63bbe688236 // indirect 7 | github.com/spf13/pflag v1.0.5 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/LukeEmmet/html2gemini v0.0.0-20201115160957-70fb785e5e75 h1:onztTpTID3b1deaA3+KnO4IyAU307PBgosQHRIqCMho= 2 | github.com/LukeEmmet/html2gemini v0.0.0-20201115160957-70fb785e5e75/go.mod h1:UFD98yRRVkWrb7yNSXy9UTyHdnSMthMdfLwUYx19PkM= 3 | github.com/LukeEmmet/html2gemini v0.0.0-20201115162526-e63bbe688236 h1:FqsaW6vEhQsER1rqN07RkjYMkGxBORvTNkxUMlJDbTQ= 4 | github.com/LukeEmmet/html2gemini v0.0.0-20201115162526-e63bbe688236/go.mod h1:UFD98yRRVkWrb7yNSXy9UTyHdnSMthMdfLwUYx19PkM= 5 | github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54= 6 | github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= 7 | github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= 8 | github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= 9 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 10 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 11 | github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= 12 | github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM= 13 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 14 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 15 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 16 | golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA= 17 | golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= 18 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 19 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 20 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 21 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 22 | -------------------------------------------------------------------------------- /html2gmi.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "fmt" 7 | "github.com/LukeEmmet/html2gemini" 8 | flag "github.com/spf13/pflag" 9 | "io" 10 | "io/ioutil" 11 | "os" 12 | ) 13 | 14 | var version = "0.2.7" 15 | 16 | var ( 17 | output = flag.StringP("output", "o", "", "Output path. Otherwise uses stdout") 18 | input = flag.StringP("input", "i", "", "Input path. Otherwise uses stdin") 19 | citationStart = flag.IntP("citationStart", "c", 1, "Start citations from this index") 20 | citationMarkers = flag.BoolP("citationMarkers", "m", false, "Use footnote style citation markers") 21 | numberedLinks = flag.BoolP("numberedLinks", "n", false, "Number the links") 22 | prettyTables = flag.BoolP("prettyTables", "t", false, "Pretty tables - works with most simple tables") 23 | emitImagesAsLinks = flag.BoolP("emitImagesAsLinks", "e", false, "Emit links to included images") 24 | linkEmitFrequency = flag.IntP("linkEmitFrequency", "l", 2, "Emit gathered links through the document after this number of paragraphs") 25 | verFlag = flag.BoolP("version", "v", false, "Find out what version of html2gmi you're running") 26 | ) 27 | 28 | func check(e error) { 29 | if e != nil { 30 | panic(e) 31 | } 32 | } 33 | 34 | func saveFile(contents []byte, path string) { 35 | d1 := contents 36 | err := ioutil.WriteFile(path, d1, 0644) 37 | check(err) 38 | } 39 | 40 | func readStdin() string { 41 | // based on https://flaviocopes.com/go-shell-pipes/ 42 | reader := bufio.NewReader(os.Stdin) //default size is 4096 apparently 43 | var output []rune 44 | 45 | for { 46 | input, _, err := reader.ReadRune() 47 | if err != nil && err == io.EOF { 48 | break 49 | } 50 | output = append(output, input) 51 | } 52 | 53 | return string(output) 54 | } 55 | 56 | func getInput() (string, error) { 57 | var inputHtml string 58 | 59 | info, err := os.Stdin.Stat() 60 | check(err) 61 | 62 | if *input != "" { 63 | //get the input file from the command line 64 | dat, err := ioutil.ReadFile(*input) 65 | check(err) 66 | inputHtml = string(dat) 67 | } else if info.Mode()&os.ModeNamedPipe != 0 { 68 | // we have a pipe input 69 | inputHtml = readStdin() 70 | 71 | } else { 72 | //we shouldn't get here 73 | return "", errors.New("invalid option for input - use -i or pipe to stdin") 74 | } 75 | 76 | return inputHtml, nil 77 | } 78 | 79 | func main() { 80 | var inputHtml string 81 | 82 | flag.Parse() 83 | 84 | if *verFlag { 85 | fmt.Println("html2gmi " + version) 86 | return 87 | } 88 | 89 | //get the input from commandline or stdin 90 | inputHtml, err := getInput() 91 | check(err) 92 | 93 | //convert html to gmi 94 | options := html2gemini.NewOptions() 95 | options.PrettyTables = *prettyTables 96 | options.CitationStart = *citationStart 97 | options.LinkEmitFrequency = *linkEmitFrequency 98 | options.CitationMarkers = *citationMarkers 99 | options.NumberedLinks = *numberedLinks 100 | options.EmitImagesAsLinks = *emitImagesAsLinks 101 | 102 | 103 | //dont use an extra line to separate header from body, but 104 | //do separate each row visually 105 | options.PrettyTablesOptions.HeaderLine = false 106 | options.PrettyTablesOptions.RowLine = true 107 | 108 | //use slightly nicer Unicode borders, otherwise can use +,|,- 109 | //options.PrettyTablesOptions.CenterSeparator = "┼" 110 | //options.PrettyTablesOptions.ColumnSeparator = "│" 111 | //options.PrettyTablesOptions.RowSeparator = "─" 112 | 113 | //pretty tables option is somewhat experimental 114 | //and the column positions not always correct 115 | //so use invisible borders of spaces for now 116 | options.PrettyTablesOptions.CenterSeparator = " " 117 | options.PrettyTablesOptions.ColumnSeparator = " " 118 | options.PrettyTablesOptions.RowSeparator = " " 119 | 120 | ctx := html2gemini.NewTraverseContext(*options) 121 | 122 | text, err := html2gemini.FromString(inputHtml, *ctx) 123 | 124 | check(err) 125 | 126 | //process the output 127 | if *output == "" { 128 | fmt.Print(text + "\n") //terminate with a new line 129 | } else { 130 | //save to the specified output 131 | gmiBytes := []byte(text + "\n") 132 | saveFile(gmiBytes, *output) 133 | } 134 | } 135 | --------------------------------------------------------------------------------