├── main.go ├── .gitignore ├── Core ├── Main.go ├── Request.go ├── Format.go └── Scrape.go ├── InputParser └── InputParser.go ├── go.sum ├── LICENSE └── README.md /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "twint-zero/Core" 5 | "twint-zero/InputParser" 6 | ) 7 | 8 | func main() { 9 | Arguments := InputParser.InputParser() 10 | Core.Main(&(Arguments.Query), &(Arguments.Instance), &(Arguments.Format)) 11 | } 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | -------------------------------------------------------------------------------- /Core/Main.go: -------------------------------------------------------------------------------- 1 | package Core 2 | 3 | import ( 4 | "net/url" 5 | ) 6 | 7 | var ( 8 | condition bool = true 9 | cursor string = "" 10 | ) 11 | 12 | func Main(Query *string, Instance *string, Format *string) { 13 | (*Query) = url.QueryEscape(*Query) 14 | for condition { 15 | condition = Scrape(Request(Query, Instance, &cursor), Instance, Format, &cursor) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /InputParser/InputParser.go: -------------------------------------------------------------------------------- 1 | package InputParser 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | ) 7 | 8 | type Arguments struct { 9 | Query string 10 | Instance string 11 | Format string 12 | } 13 | 14 | var arguments *Arguments = new(Arguments) 15 | 16 | func InputParser() *Arguments { 17 | 18 | flag.StringVar(&(arguments.Query), "Query", "", "Specify search query.") 19 | flag.StringVar(&(arguments.Instance), "Instance", "nitter.nl", "Specify instance to get data from.") 20 | flag.StringVar(&(arguments.Format), "Format", "csv", "Specify the return format: csv (default), or json.") 21 | flag.Parse() 22 | 23 | if (*arguments).Query == "" || !ValidateFormatArgument(arguments) { 24 | flag.Usage() 25 | os.Exit(1) 26 | } 27 | 28 | return arguments 29 | } 30 | 31 | func ValidateFormatArgument(arguments *Arguments) bool { 32 | format := (*arguments).Format 33 | return format == "" || format == "csv" || format == "json" 34 | } 35 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= 2 | github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= 3 | github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= 4 | github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= 5 | golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= 6 | golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 7 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 8 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 9 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 10 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 11 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 TWINT Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Core/Request.go: -------------------------------------------------------------------------------- 1 | package Core 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/http" 8 | "time" 9 | ) 10 | 11 | var ( 12 | Client *http.Client = new(http.Client) 13 | ) 14 | 15 | func Request(Query *string, Instance *string, cursor *string) io.ReadCloser { 16 | var url string = fmt.Sprintf("https://%s/search?f=tweet&q=%s", *Instance, *Query) 17 | if *cursor != "" { 18 | url = fmt.Sprintf("https://%s/search%s", *Instance, *cursor) 19 | } 20 | hlsCookie := &http.Cookie{ 21 | Name: "hlsPlayback", 22 | Value: "on", 23 | MaxAge: 300, 24 | } 25 | req, err := http.NewRequest("GET", url, nil) 26 | req.AddCookie(hlsCookie) 27 | if err != nil { 28 | log.Fatalf("[nr] %s\n", err) 29 | } 30 | 31 | req.Header.Set("User-Agent", "twint-zero") 32 | res, err := Client.Do(req) 33 | if err != nil { 34 | log.Fatalf("[do] %s\n", err) 35 | } 36 | 37 | if res.StatusCode != 200 { 38 | if 500 <= res.StatusCode && res.StatusCode <= 599 { 39 | time.Sleep(10 * time.Second) 40 | return Request(Query, Instance, cursor) 41 | } else { 42 | log.Fatalf("status code error: %d %s \n %s", res.StatusCode, res.Status, url) 43 | } 44 | } 45 | return res.Body 46 | } 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Twint Zero 2 | Like Twint, but zero fat. 3 | 4 | # First things first 5 | 6 | Users are invited **to not** scrape public instances, that will cause a bad experience for some users. Instead, you are invited to setup your own custom Nitter instance. 7 | Thank you, and enjoy! 8 | 9 | # Installation 10 | 1) `git clone https://github.com/twintproject/twint-zero` 11 | 2) `cd twint-zero` 12 | 3) `go mod init twint-zero` 13 | 4) `go mod tidy` 14 | 15 | # Usage 16 | - Without compiling: `go run main.go -Query $QUERY -Instance $INSTANCE -Format $FORMAT` 17 | - If you compiled... well at this point you are supposed to know. 18 | 19 | ## CLI Arguments 20 | 1) `$QUERY`: [Here](https://github.com/igorbrigadir/twitter-advanced-search) you go. 21 | 2) `$INSTANCE`: [Setup your own](https://github.com/zedeus/nitter/#installation). 22 | 2) `$FORMAT`: "csv" or "json". 23 | 24 | # Questions/issues 25 | > Sir, the bill is: five GitHub stars, two forks and one retweet. 26 | 27 | That being quoted, feel free to reach out. 28 | 29 | # License 30 | MIT 31 | 32 | # Credits 33 | [Francesco Poldi](https://twitter.com/noneprivacy) 34 | 35 | [Simon Archer](https://mastodon.social/@archy_bold): JSON formatting and attachments parsing 36 | 37 | [Julian](https://github.com/juste97): quoted tweet and its metadata fields 38 | -------------------------------------------------------------------------------- /Core/Format.go: -------------------------------------------------------------------------------- 1 | package Core 2 | 3 | import ( 4 | "bytes" 5 | "encoding/csv" 6 | "encoding/json" 7 | "fmt" 8 | "log" 9 | "strings" 10 | ) 11 | 12 | func FormatTweets(format string, tweets []Tweet) { 13 | if format == "json" { 14 | FormatTweetsJSON(tweets) 15 | } else { 16 | FormatTweetsCSV(tweets) 17 | } 18 | } 19 | 20 | func FormatTweetsCSV(tweets []Tweet) { 21 | var b []byte 22 | buf := bytes.NewBuffer(b) 23 | w := csv.NewWriter(buf) 24 | 25 | for _, tweet := range tweets { 26 | attachments := make([]string, len(tweet.Attachments)) 27 | for i, att := range tweet.Attachments { 28 | attachments[i] = *att.URL 29 | } 30 | row := []string{ 31 | tweet.ID, 32 | tweet.URL, 33 | tweet.Timestamp, 34 | tweet.Username, 35 | tweet.Fullname, 36 | tweet.Text, 37 | tweet.ReplyTo, 38 | strings.Join(attachments, ","), 39 | fmt.Sprintf("%d", tweet.Stats.Replies), 40 | fmt.Sprintf("%d", tweet.Stats.Retweets), 41 | fmt.Sprintf("%d", tweet.Stats.Quotes), 42 | fmt.Sprintf("%d", tweet.Stats.Likes), 43 | tweet.QuoteFullname, 44 | tweet.QuoteUsername, 45 | tweet.QuoteDate, 46 | tweet.QuoteText, 47 | tweet.QuoteID, 48 | } 49 | if err := w.Write(row); err != nil { 50 | log.Fatalln("error writing row to csv:", err) 51 | } 52 | } 53 | w.Flush() 54 | if err := w.Error(); err != nil { 55 | log.Fatal(err) 56 | } 57 | 58 | fmt.Print(string(buf.Bytes())) 59 | } 60 | 61 | func FormatTweetsJSON(tweets []Tweet) { 62 | for _, tweet := range tweets { 63 | tweetJSON, _ := json.Marshal(tweet) 64 | fmt.Println(string(tweetJSON)) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Core/Scrape.go: -------------------------------------------------------------------------------- 1 | package Core 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "regexp" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/PuerkitoBio/goquery" 12 | ) 13 | 14 | type Tweet struct { 15 | ID string `json:"id"` 16 | URL string `json:"url"` 17 | Text string `json:"text"` 18 | ReplyTo string `json:"reply_to,omitempty"` 19 | Username string `json:"username"` 20 | Fullname string `json:"fullname"` 21 | Timestamp string `json:"timestamp"` 22 | Attachments []Attachment `json:"attachments"` 23 | 24 | Stats TweetStats `json:"stats"` 25 | 26 | QuoteFullname string `json:"quote_fullname,omitempty"` 27 | QuoteUsername string `json:"quote_username,omitempty"` 28 | QuoteDate string `json:"quote_date,omitempty"` 29 | QuoteID string `json:"quote_id,omitempty"` 30 | QuoteText string `json:"quote_text,omitempty"` 31 | } 32 | 33 | type Attachment struct { 34 | Type string `json:"type"` 35 | URL *string `json:"url"` 36 | PreviewImageURL *string `json:"preview_image_url"` 37 | AltText *string `json:"alt_text"` 38 | } 39 | 40 | type TweetStats struct { 41 | Replies int64 `json:"replies"` 42 | Retweets int64 `json:"retweets"` 43 | Quotes int64 `json:"quotes"` 44 | Likes int64 `json:"likes"` 45 | } 46 | 47 | func extractViaRegexp(text *string, re string) string { 48 | theRegex := regexp.MustCompile(re) 49 | match := theRegex.Find([]byte(*text)) 50 | return string(match[:]) 51 | } 52 | 53 | func Scrape(responseBody io.ReadCloser, Instance *string, Format *string, cursor *string) bool { 54 | parsedWebpage, err := goquery.NewDocumentFromReader(responseBody) 55 | if err != nil { 56 | log.Fatal("[x] cannot parse webpage. Please report to admins with the query attached.") 57 | } 58 | defer responseBody.Close() 59 | 60 | if parsedWebpage.Find("div.timeline-footer").Length() > 0 { 61 | return false 62 | } 63 | 64 | var tweets []Tweet 65 | parsedWebpage.Find("div.timeline-item").Each(func(i int, t *goquery.Selection) { 66 | tweet_ID_h, _ := t.Find("a").Attr("href") 67 | tweet_ID_s := strings.Split(tweet_ID_h, "/") 68 | tweet_ID := extractViaRegexp(&(tweet_ID_s[len(tweet_ID_s)-1]), `\d*`) 69 | 70 | tweet_URL := fmt.Sprintf("https://twitter.com%s", strings.Split(tweet_ID_h, "#")[0]) 71 | 72 | tweet_TS, _ := t.Find("span.tweet-date").Find("a").Attr("title") 73 | 74 | tweet_text := t.Find("div.tweet-content.media-body").Text() 75 | 76 | reply_to := t.Find("div.replying-to").Text() 77 | 78 | tweet_handle := t.Find("a.username").First().Text() 79 | tweet_fname := t.Find("a.fullname").First().Text() 80 | 81 | tweet_stats := t.Find("div.tweet-stats") 82 | tweet_stats_reply, _ := strconv.ParseInt( 83 | strings.TrimSpace( 84 | strings.ReplaceAll( 85 | tweet_stats.Find("span.tweet-stat").Eq(0).Text(), ",", "",)), 10, 64) 86 | tweet_stats_retweet, _ := strconv.ParseInt( 87 | strings.TrimSpace( 88 | strings.ReplaceAll( 89 | tweet_stats.Find("span.tweet-stat").Eq(1).Text(), ",", "")), 10, 64) 90 | tweet_stats_quote, _ := strconv.ParseInt( 91 | strings.TrimSpace( 92 | strings.ReplaceAll( 93 | tweet_stats.Find("span.tweet-stat").Eq(2).Text(), ",", "")), 10, 64) 94 | tweet_stats_like, _ := strconv.ParseInt( 95 | strings.TrimSpace( 96 | strings.ReplaceAll( 97 | tweet_stats.Find("span.tweet-stat").Eq(3).Text(), ",", "")), 10, 64) 98 | 99 | tweet_attachments := make([]Attachment, 0) 100 | t.Find("div.attachments").Find("div.attachment.image").Find("img").Each(func(i int, s *goquery.Selection) { 101 | src, exists := s.Attr("src") 102 | alt, _ := s.Attr("alt") 103 | if exists { 104 | src = fmt.Sprintf("https://%s%s", *Instance, src) 105 | tweet_attachments = append(tweet_attachments, Attachment{ 106 | Type: "photo", 107 | URL: &src, 108 | AltText: &alt, 109 | }) 110 | } 111 | }) 112 | t.Find("div.attachments").Find("video.gif").Each(func(i int, s *goquery.Selection) { 113 | preview, exists := s.Attr("poster") 114 | if exists { 115 | src, _ := s.Find("source").Attr("src") 116 | preview = fmt.Sprintf("https://%s%s", *Instance, preview) 117 | src = fmt.Sprintf("https://%s%s", *Instance, src) 118 | tweet_attachments = append(tweet_attachments, Attachment{ 119 | Type: "animated_gif", 120 | URL: &src, 121 | PreviewImageURL: &preview, 122 | }) 123 | } 124 | }) 125 | t.Find("div.attachments").Find("div.gallery-video").Find("video").Each(func(i int, s *goquery.Selection) { 126 | preview, exists := s.Attr("poster") 127 | if exists { 128 | var ur *string 129 | src, exists := s.Attr("data-url") 130 | src = fmt.Sprintf("https://%s%s", *Instance, src) 131 | preview = fmt.Sprintf("https://%s%s", *Instance, preview) 132 | if exists { 133 | ur = &src 134 | } 135 | tweet_attachments = append(tweet_attachments, Attachment{ 136 | Type: "video", 137 | URL: ur, 138 | PreviewImageURL: &preview, 139 | }) 140 | } 141 | }) 142 | 143 | var quote_fullname, quote_username, quote_date, quote_text, quote_id string 144 | if quoteInfo := t.Find("div.quote"); quoteInfo.Length() > 0 { 145 | quote_fullname = quoteInfo.Find("a.fullname").Text() 146 | quote_username = quoteInfo.Find("a.username").Text() 147 | quote_date, _ = quoteInfo.Find("span.tweet-date").Find("a").Attr("title") 148 | quote_id_h, _ := quoteInfo.Find("a.quote-link").Attr("href") 149 | quote_id_s := strings.Split(quote_id_h, "/") 150 | quote_id = extractViaRegexp(&(quote_id_s[len(quote_id_s)-1]), `\d*`) 151 | quote_text = quoteInfo.Find("div.quote-text").Text() 152 | } 153 | 154 | stats := TweetStats{ 155 | Replies: tweet_stats_reply, 156 | Retweets: tweet_stats_retweet, 157 | Quotes: tweet_stats_quote, 158 | Likes: tweet_stats_like, 159 | } 160 | 161 | if tweet_ID != "" { 162 | tweet := Tweet{ 163 | ID: tweet_ID, 164 | URL: tweet_URL, 165 | Text: tweet_text, 166 | ReplyTo: reply_to, 167 | Username: tweet_handle, 168 | Fullname: tweet_fname, 169 | Timestamp: tweet_TS, 170 | Attachments: tweet_attachments, 171 | Stats: stats, 172 | QuoteFullname: quote_fullname, 173 | QuoteUsername: quote_username, 174 | QuoteDate: quote_date, 175 | QuoteID: quote_id, 176 | QuoteText: quote_text, 177 | } 178 | tweets = append(tweets, tweet) 179 | } 180 | }) 181 | 182 | if len(tweets) == 0 { 183 | return false 184 | } 185 | 186 | FormatTweets(*Format, tweets) 187 | 188 | *cursor, _ = parsedWebpage.Find("div.show-more").Last().Find("a").Attr("href") 189 | return true 190 | } 191 | --------------------------------------------------------------------------------