The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .github
    ├── FUNDING.yml
    └── ISSUE_TEMPLATE
    │   └── data_source.md
├── .gitignore
├── .sqliterc
├── LICENSE.txt
├── README.md
├── account.go
├── cmd
    └── timeliner
    │   └── main.go
├── datasource.go
├── datasources
    ├── facebook
    │   ├── facebook.go
    │   ├── media.go
    │   └── post.go
    ├── googlelocation
    │   └── googlelocation.go
    ├── googlephotos
    │   ├── googlephotos.go
    │   ├── media.go
    │   └── takeoutarchive.go
    ├── instagram
    │   ├── instagram.go
    │   └── models.go
    ├── smsbackuprestore
    │   ├── mms.go
    │   ├── sms.go
    │   └── smsbackuprestore.go
    └── twitter
    │   ├── api.go
    │   ├── api_test.go
    │   ├── archives.go
    │   ├── models.go
    │   └── twitter.go
├── db.go
├── go.mod
├── go.sum
├── itemfiles.go
├── itemgraph.go
├── mapmutex.go
├── oauth2.go
├── oauth2client
    ├── browser.go
    ├── localapp.go
    ├── oauth2.go
    ├── oauth2proxy
    │   ├── cmd
    │   │   └── oauth2proxy
    │   │   │   └── main.go
    │   └── proxy.go
    └── remoteapp.go
├── persons.go
├── processing.go
├── ratelimit.go
├── timeliner.go
└── wrappedclient.go


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [mholt] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/data_source.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: New data source request
 3 | about: Request a new data source
 4 | title: ''
 5 | labels: 'data source'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!--
11 | This template is specifically for requesting the addition of a new data source (a way to add items to the timeline). Please answer all the questions as completely as possible. Put some effort into it since any implementation is going to require even more effort. If questions are not answered sufficiently, the issue may be closed.
12 | 
13 | PLEASE NOTE: This project is a community effort. We hope that after posting this issue, you will take the time to implement it and submit a pull request for everyone to use!
14 | -->
15 | 
16 | ## 1. What is the data source you want to add?
17 | <!-- Please give the data source's name and website and explain why it would be useful to have its content on a personal timeline. -->
18 | 
19 | 
20 | 
21 | 
22 | ## 2. What constitutes an "item" from this data source?
23 | <!-- An item is an entry on the timeline. Some data sources have multiple things that are "items" - for example: photos, blog posts, or text messages can all be items. An item must make sense to put on a timeline, and items must have unique IDs. -->
24 | 
25 | 
26 | 
27 | 
28 | ## 3. How are items obtained from the data source?
29 | <!-- Is there a free API that allows us to get content from this source? If so, what authentication is required? Or do we have to manually import data from a file? Please describe the process in detail and link to documentation! -->
30 | 
31 | 
32 | 
33 | 
34 | ### 3a. If authentication is required, how does a user create or obtain credentials for Timeliner to access the data?
35 | <!-- For example, APIs that use OAuth usually require creating an app or client with the service provider before their APIs can be accessed. What is that process? We will need to add this to the wiki for others to know how to get set up, so be clear and list the steps here. Check our project wiki first, because it might already be implemented (for example, Google OAuth is already in place.) -->
36 | 
37 | 
38 | 
39 | 
40 | ### 3b. If an API is available, what are its rate limits?
41 | <!-- Please link to rate limit documentation as well. -->
42 | 
43 | 
44 | 
45 | 
46 | ### 3c. If a file is imported, how is the file obtained?
47 | <!-- What is the process a user must go through to obtain the specific file that the data source is designed to import from? -->
48 | 
49 | 
50 | 
51 | 
52 | ### 3d. If a file is imported, how do we read the file?
53 | <!-- Is the file a compressed archive? How do we get the items out? Is the content and metadata separate? Please link to any documentation or provide a sample file. -->
54 | 
55 | 
56 | 
57 | 
58 | ## 4. How can items from this data source be related?
59 | <!-- Often, items form relationships with other items; for example, an item might be a reply to another item, or an item might contain another item. Think of relationships as uni-or-bi-directional arrows between items, with a label on the arrow. Relationships enrich the data obtained from this source. What kinds of useful relationships can be expressed from this data source? Do the relationships work both ways or just one way? Talk about this. -->
60 | 
61 | 
62 | 
63 | 
64 | ## 5. What constitutes a "collection" from this data source?
65 | <!-- A collection is a group of items (like a photo album). Note that collections are different from item relationships. Some data sources don't have collections; please explain. -->
66 | 
67 | 
68 | 
69 | 
70 | ## 6. What might not be trivial, obvious, or straightforward when implementing this data source?
71 | <!-- Most data sources have nuances or caveats, some of which might not be obvious. Please think hard about this and use your experience with this data source to think of things that need special consideration. For example, a data source might only allow the most recent items to be obtained; how could we overcome that, maybe via a data export? See our wiki for "Writing a Data Source" to get ideas about what might be tricky. Ask unanswered questions here, start a discussion. Data sources can't be implemented successfully until these details are figured out. -->
72 | 
73 | 
74 | 
75 | 
76 | ## Bonus: How do you like Timeliner? How much data are you preserving with it? Which existing data sources do you use?
77 | <!-- I want to know! -->
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _gitignore/
2 | _storage/
3 | oauth2client/oauth2proxy/cmd/oauth2proxy/credentials.toml
4 | **/timeliner.toml
5 | cmc/timeliner/timeliner


--------------------------------------------------------------------------------
/.sqliterc:
--------------------------------------------------------------------------------
1 | PRAGMA foreign_keys = ON;
2 | 


--------------------------------------------------------------------------------
/account.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/gob"
  6 | 	"fmt"
  7 | 	"net/http"
  8 | 	"sync"
  9 | 	"time"
 10 | )
 11 | 
 12 | // Account represents an account with a service.
 13 | type Account struct {
 14 | 	ID            int64
 15 | 	DataSourceID  string
 16 | 	UserID        string
 17 | 	person        Person
 18 | 	authorization []byte
 19 | 	checkpoint    []byte
 20 | 	lastItemID    *int64
 21 | 
 22 | 	t  *Timeline
 23 | 	ds DataSource
 24 | 	cp *checkpointWrapper
 25 | }
 26 | 
 27 | // NewHTTPClient returns an HTTP client that is suitable for use
 28 | // with an API associated with the account's data source. If
 29 | // OAuth2 is configured for the data source, the client has OAuth2
 30 | // credentials. If a rate limit is configured, this client is
 31 | // rate limited. A sane default timeout is set, and any fields
 32 | // on the returned Client valule can be modified as needed.
 33 | func (acc Account) NewHTTPClient() (*http.Client, error) {
 34 | 	httpClient := new(http.Client)
 35 | 	if acc.ds.OAuth2.ProviderID != "" {
 36 | 		var err error
 37 | 		httpClient, err = acc.NewOAuth2HTTPClient()
 38 | 		if err != nil {
 39 | 			return nil, err
 40 | 		}
 41 | 	}
 42 | 	if acc.ds.RateLimit.RequestsPerHour > 0 {
 43 | 		httpClient.Transport = acc.NewRateLimitedRoundTripper(httpClient.Transport)
 44 | 	}
 45 | 	httpClient.Timeout = 60 * time.Second
 46 | 	return httpClient, nil
 47 | }
 48 | 
 49 | func (acc Account) String() string {
 50 | 	return acc.DataSourceID + "/" + acc.UserID
 51 | }
 52 | 
 53 | // AddAccount authenticates userID with the service identified
 54 | // within the application by dataSourceID, and then stores it in the
 55 | // database. The account must not yet exist.
 56 | func (t *Timeline) AddAccount(dataSourceID, userID string) error {
 57 | 	// ensure account is not already stored in our system
 58 | 	var count int
 59 | 	err := t.db.QueryRow(`SELECT COUNT(*) FROM accounts WHERE data_source_id=? AND user_id=? LIMIT 1`,
 60 | 		dataSourceID, userID).Scan(&count)
 61 | 	if err != nil {
 62 | 		return fmt.Errorf("checking if account is already stored: %v", err)
 63 | 	}
 64 | 	if count > 0 {
 65 | 		return fmt.Errorf("account already stored in database: %s/%s", dataSourceID, userID)
 66 | 	}
 67 | 
 68 | 	return t.Authenticate(dataSourceID, userID)
 69 | }
 70 | 
 71 | // Authenticate gets authentication for userID with dataSourceID. If the
 72 | // account already exists in the database, it will be updated with the
 73 | // latest authorization.
 74 | func (t *Timeline) Authenticate(dataSourceID, userID string) error {
 75 | 	ds, ok := dataSources[dataSourceID]
 76 | 	if !ok {
 77 | 		return fmt.Errorf("data source not registered: %s", dataSourceID)
 78 | 	}
 79 | 
 80 | 	// authenticate with the data source (if necessary)
 81 | 	var credsBytes []byte
 82 | 	var err error
 83 | 	if authFn := ds.authFunc(); authFn != nil {
 84 | 		credsBytes, err = authFn(userID)
 85 | 		if err != nil {
 86 | 			return fmt.Errorf("authenticating %s for %s: %v", userID, dataSourceID, err)
 87 | 		}
 88 | 	}
 89 | 
 90 | 	// make sure the data source is registered in the DB
 91 | 	_, err = t.db.Exec(`INSERT OR IGNORE INTO data_sources (id, name) VALUES (?, ?)`,
 92 | 		dataSourceID, ds.Name)
 93 | 	if err != nil {
 94 | 		return fmt.Errorf("saving data source record: %v", err)
 95 | 	}
 96 | 
 97 | 	// store the account along with our authorization to access it
 98 | 	_, err = t.db.Exec(`INSERT INTO accounts
 99 | 		(data_source_id, user_id, authorization)
100 | 		VALUES (?, ?, ?)
101 | 		ON CONFLICT (data_source_id, user_id)
102 | 		DO UPDATE SET authorization=?`,
103 | 		dataSourceID, userID, credsBytes,
104 | 		credsBytes)
105 | 	if err != nil {
106 | 		return fmt.Errorf("inserting into or updating DB: %v", err)
107 | 	}
108 | 
109 | 	return nil
110 | }
111 | 
112 | // NewClient returns a new Client that is ready to interact with
113 | // the data source for the account uniquely specified by the data
114 | // source ID and the user ID for that data source. The Client is
115 | // actually wrapped by a type with unexported fields that are
116 | // necessary for internal use.
117 | func (t *Timeline) NewClient(dataSourceID, userID string) (WrappedClient, error) {
118 | 	ds, ok := dataSources[dataSourceID]
119 | 	if !ok {
120 | 		return WrappedClient{}, fmt.Errorf("data source not registered: %s", dataSourceID)
121 | 	}
122 | 	if ds.NewClient == nil {
123 | 		return WrappedClient{}, fmt.Errorf("impossible to make client for data source: %s", dataSourceID)
124 | 	}
125 | 
126 | 	acc, err := t.getAccount(dataSourceID, userID)
127 | 	if err != nil {
128 | 		return WrappedClient{}, fmt.Errorf("getting account: %v", err)
129 | 	}
130 | 
131 | 	cl, err := ds.NewClient(acc)
132 | 	if err != nil {
133 | 		return WrappedClient{}, fmt.Errorf("making client from data source: %v", err)
134 | 	}
135 | 
136 | 	return WrappedClient{
137 | 		Client:     cl,
138 | 		tl:         t,
139 | 		acc:        acc,
140 | 		ds:         ds,
141 | 		lastItemMu: new(sync.Mutex),
142 | 	}, nil
143 | }
144 | 
145 | func (t *Timeline) getAccount(dsID, userID string) (Account, error) {
146 | 	ds, ok := dataSources[dsID]
147 | 	if !ok {
148 | 		return Account{}, fmt.Errorf("data source not registered: %s", dsID)
149 | 	}
150 | 	acc := Account{
151 | 		ds: ds,
152 | 		t:  t,
153 | 	}
154 | 	err := t.db.QueryRow(`SELECT
155 | 		id, data_source_id, user_id, authorization, checkpoint, last_item_id
156 | 		FROM accounts WHERE data_source_id=? AND user_id=? LIMIT 1`,
157 | 		dsID, userID).Scan(&acc.ID, &acc.DataSourceID, &acc.UserID, &acc.authorization, &acc.checkpoint, &acc.lastItemID)
158 | 	if err != nil {
159 | 		return acc, fmt.Errorf("querying account %s/%s from DB: %v", dsID, userID, err)
160 | 	}
161 | 	if acc.checkpoint != nil {
162 | 		err = UnmarshalGob(acc.checkpoint, &acc.cp)
163 | 		if err != nil {
164 | 			return acc, fmt.Errorf("decoding checkpoint wrapper: %v", err)
165 | 		}
166 | 	}
167 | 	return acc, nil
168 | }
169 | 
170 | // MarshalGob is a convenient way to gob-encode v.
171 | func MarshalGob(v interface{}) ([]byte, error) {
172 | 	b := new(bytes.Buffer)
173 | 	err := gob.NewEncoder(b).Encode(v)
174 | 	return b.Bytes(), err
175 | }
176 | 
177 | // UnmarshalGob is a convenient way to gob-decode data into v.
178 | func UnmarshalGob(data []byte, v interface{}) error {
179 | 	return gob.NewDecoder(bytes.NewReader(data)).Decode(v)
180 | }
181 | 


--------------------------------------------------------------------------------
/cmd/timeliner/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"os"
  9 | 	"strings"
 10 | 	"sync"
 11 | 	"time"
 12 | 
 13 | 	"github.com/BurntSushi/toml"
 14 | 	"github.com/mholt/timeliner"
 15 | 	"github.com/mholt/timeliner/oauth2client"
 16 | 	"golang.org/x/oauth2"
 17 | 
 18 | 	// plug in data sources
 19 | 	_ "github.com/mholt/timeliner/datasources/facebook"
 20 | 	_ "github.com/mholt/timeliner/datasources/googlelocation"
 21 | 	_ "github.com/mholt/timeliner/datasources/googlephotos"
 22 | 	_ "github.com/mholt/timeliner/datasources/instagram"
 23 | 	"github.com/mholt/timeliner/datasources/smsbackuprestore"
 24 | 	"github.com/mholt/timeliner/datasources/twitter"
 25 | )
 26 | 
 27 | func init() {
 28 | 	flag.StringVar(&configFile, "config", configFile, "The path to the config file to load")
 29 | 	flag.StringVar(&repoDir, "repo", repoDir, "The path to the folder of the repository")
 30 | 	flag.IntVar(&maxRetries, "max-retries", maxRetries, "If > 0, will retry on failure at most this many times")
 31 | 	flag.DurationVar(&retryAfter, "retry-after", retryAfter, "If > 0, will wait this long between retries")
 32 | 	flag.BoolVar(&verbose, "v", verbose, "Verbose output (can be very slow if data source isn't bottlenecked by network)")
 33 | 
 34 | 	flag.BoolVar(&prune, "prune", prune, "When finishing, delete items not found on remote (download-all or import only)")
 35 | 	flag.BoolVar(&integrity, "integrity", integrity, "Perform integrity check on existing items and reprocess if needed (download-all or import only)")
 36 | 	flag.BoolVar(&reprocess, "reprocess", reprocess, "Reprocess every item that has not been modified locally (download-all or import only)")
 37 | 	flag.StringVar(&merge, "merge", merge, "Comma-separated list of merge options: soft (required, enables 'soft' merging on: account+timestamp+text or filename), and values to overwrite: id,text,file,metadata")
 38 | 
 39 | 	flag.StringVar(&tfStartInput, "start", "", "Timeframe start (relative=duration, absolute=YYYY/MM/DD)")
 40 | 	flag.StringVar(&tfEndInput, "end", "", "Timeframe end (relative=duration, absolute=YYYY/MM/DD)")
 41 | 
 42 | 	flag.BoolVar(&twitterRetweets, "twitter-retweets", twitterRetweets, "Twitter: include retweets")
 43 | 	flag.BoolVar(&twitterReplies, "twitter-replies", twitterReplies, "Twitter: include replies that are not just replies to self")
 44 | 
 45 | 	flag.StringVar(&phoneDefaultRegion, "phone-default-region", phoneDefaultRegion, "SMS Backup & Restore: default region")
 46 | }
 47 | 
 48 | func main() {
 49 | 	flag.Parse()
 50 | 
 51 | 	if maxRetries < 0 {
 52 | 		maxRetries = 0
 53 | 	}
 54 | 
 55 | 	// split the CLI arguments into subcommand and arguments
 56 | 	args := flag.Args()
 57 | 	if len(args) == 0 {
 58 | 		log.Fatal("[FATAL] Missing subcommand and account arguments (specify one or more of 'data_source_id/user_id')")
 59 | 	}
 60 | 	subcmd := args[0]
 61 | 	accountList := args[1:]
 62 | 	if subcmd == "import" {
 63 | 		// special case; import takes an extra argument before account list
 64 | 		if len(args) != 3 {
 65 | 			log.Fatal("[FATAL] Expecting: import <filename> <data_source_id/user_id>")
 66 | 		}
 67 | 		accountList = args[2:]
 68 | 		if len(args) == 0 {
 69 | 			log.Fatal("[FATAL] No accounts to use (specify one or more 'data_source_id/user_id' arguments)")
 70 | 		}
 71 | 	}
 72 | 
 73 | 	// load the command config
 74 | 	err := loadConfig()
 75 | 	if err != nil {
 76 | 		log.Fatalf("[FATAL] Loading configuration: %v", err)
 77 | 	}
 78 | 
 79 | 	// parse the accounts out of the CLI
 80 | 	accounts, err := getAccounts(accountList)
 81 | 	if err != nil {
 82 | 		log.Fatalf("[FATAL] %v", err)
 83 | 	}
 84 | 	if len(accounts) == 0 {
 85 | 		log.Fatalf("[FATAL] No accounts specified")
 86 | 	}
 87 | 
 88 | 	// open the timeline
 89 | 	tl, err := timeliner.Open(repoDir)
 90 | 	if err != nil {
 91 | 		log.Fatalf("[FATAL] Opening timeline: %v", err)
 92 | 	}
 93 | 	defer tl.Close()
 94 | 
 95 | 	// as a special case, handle authentication subcommands separately
 96 | 	switch subcmd {
 97 | 	case "add-account":
 98 | 		for _, a := range accounts {
 99 | 			err := tl.AddAccount(a.dataSourceID, a.userID)
100 | 			if err != nil {
101 | 				log.Fatalf("[FATAL] Adding account: %v", err)
102 | 			}
103 | 		}
104 | 		return
105 | 	case "reauth":
106 | 		for _, a := range accounts {
107 | 			err := tl.Authenticate(a.dataSourceID, a.userID)
108 | 			if err != nil {
109 | 				log.Fatalf("[FATAL] Authenticating: %v", err)
110 | 			}
111 | 		}
112 | 		return
113 | 	}
114 | 
115 | 	// get the timeframe within which to constrain item processing (multiple commands use this)
116 | 	tf, err := parseTimeframe()
117 | 	if err != nil {
118 | 		log.Fatalf("[FATAL] %v", err)
119 | 	}
120 | 
121 | 	// make the processing options
122 | 	var mergeOpt timeliner.MergeOptions
123 | 	mergeOptVals := strings.Split(merge, ",")
124 | 	for _, val := range mergeOptVals {
125 | 		switch val {
126 | 		case "":
127 | 		case "soft":
128 | 			mergeOpt.SoftMerge = true
129 | 		case "id":
130 | 			mergeOpt.PreferNewID = true
131 | 		case "text":
132 | 			mergeOpt.PreferNewDataText = true
133 | 		case "file":
134 | 			mergeOpt.PreferNewDataFile = true
135 | 		case "meta":
136 | 			mergeOpt.PreferNewMetadata = true
137 | 		default:
138 | 			log.Fatalf("[FATAL] Unrecognized merge option: '%s'", val)
139 | 		}
140 | 	}
141 | 	if !mergeOpt.SoftMerge && (mergeOpt.PreferNewID || mergeOpt.PreferNewDataText || mergeOpt.PreferNewDataFile || mergeOpt.PreferNewMetadata) {
142 | 		// for now, the only kind of merging is "soft" merging, so if it is not enabled but other merge options are set, that's probably a user error
143 | 		log.Fatal("[FATAL] Merge options are specified but merging is not enabled (-merge=soft); only soft merging is implemented")
144 | 	}
145 | 	procOpt := timeliner.ProcessingOptions{
146 | 		Reprocess: reprocess,
147 | 		Prune:     prune,
148 | 		Integrity: integrity,
149 | 		Timeframe: tf,
150 | 		Merge:     mergeOpt,
151 | 		Verbose:   verbose,
152 | 	}
153 | 
154 | 	// make a client for each account
155 | 	var clients []timeliner.WrappedClient
156 | 	for _, a := range accounts {
157 | 		wc, err := tl.NewClient(a.dataSourceID, a.userID)
158 | 		if err != nil {
159 | 			log.Fatalf("[FATAL][%s/%s] Creating data source client: %v", a.dataSourceID, a.userID, err)
160 | 		}
161 | 
162 | 		// configure the client (TODO: this is not good design; should happen in their own packages)
163 | 		switch v := wc.Client.(type) {
164 | 		case *twitter.Client:
165 | 			v.Retweets = twitterRetweets
166 | 			v.Replies = twitterReplies
167 | 		case *smsbackuprestore.Client:
168 | 			v.DefaultRegion = phoneDefaultRegion
169 | 		}
170 | 
171 | 		clients = append(clients, wc)
172 | 	}
173 | 
174 | 	switch subcmd {
175 | 	case "get-latest":
176 | 		if procOpt.Reprocess || procOpt.Prune || procOpt.Integrity || procOpt.Timeframe.Since != nil {
177 | 			log.Fatalf("[FATAL] The get-latest subcommand does not support -reprocess, -prune, -integrity, or -start")
178 | 		}
179 | 
180 | 		var wg sync.WaitGroup
181 | 		for _, wc := range clients {
182 | 			wg.Add(1)
183 | 			go func(wc timeliner.WrappedClient) {
184 | 				defer wg.Done()
185 | 				ctx, cancel := context.WithCancel(context.Background())
186 | 				for retryNum := 0; retryNum < 1+maxRetries; retryNum++ {
187 | 					if retryNum > 0 {
188 | 						log.Println("[INFO] Retrying command")
189 | 					}
190 | 					err := wc.GetLatest(ctx, procOpt)
191 | 					if err != nil {
192 | 						log.Printf("[ERROR][%s/%s] Getting latest: %v",
193 | 							wc.DataSourceID(), wc.UserID(), err)
194 | 						if retryAfter > 0 {
195 | 							time.Sleep(retryAfter)
196 | 						}
197 | 						continue
198 | 					}
199 | 					break
200 | 				}
201 | 				defer cancel() // TODO: Make this useful, maybe?
202 | 			}(wc)
203 | 		}
204 | 		wg.Wait()
205 | 
206 | 	case "get-all":
207 | 		var wg sync.WaitGroup
208 | 		for _, wc := range clients {
209 | 			wg.Add(1)
210 | 			go func(wc timeliner.WrappedClient) {
211 | 				defer wg.Done()
212 | 				ctx, cancel := context.WithCancel(context.Background())
213 | 				for retryNum := 0; retryNum < 1+maxRetries; retryNum++ {
214 | 					if retryNum > 0 {
215 | 						log.Println("[INFO] Retrying command")
216 | 					}
217 | 					err := wc.GetAll(ctx, procOpt)
218 | 					if err != nil {
219 | 						log.Printf("[ERROR][%s/%s] Downloading all: %v",
220 | 							wc.DataSourceID(), wc.UserID(), err)
221 | 						if retryAfter > 0 {
222 | 							time.Sleep(retryAfter)
223 | 						}
224 | 						continue
225 | 					}
226 | 					break
227 | 				}
228 | 				defer cancel() // TODO: Make this useful, maybe?
229 | 			}(wc)
230 | 		}
231 | 		wg.Wait()
232 | 
233 | 	case "import":
234 | 		file := args[1]
235 | 		wc := clients[0]
236 | 
237 | 		ctx, cancel := context.WithCancel(context.Background())
238 | 		err = wc.Import(ctx, file, procOpt)
239 | 		if err != nil {
240 | 			log.Printf("[ERROR][%s/%s] Importing: %v",
241 | 				wc.DataSourceID(), wc.UserID(), err)
242 | 		}
243 | 		defer cancel() // TODO: Make this useful, maybe?
244 | 
245 | 	default:
246 | 		log.Fatalf("[FATAL] Unrecognized subcommand: %s", subcmd)
247 | 	}
248 | }
249 | 
250 | // parseTimeframe parses tfStartInput and/or tfEndInput and returns
251 | // the resulting timeframe or an error.
252 | func parseTimeframe() (timeliner.Timeframe, error) {
253 | 	var tf timeliner.Timeframe
254 | 	var timeStart, timeEnd time.Time
255 | 
256 | 	if tfStartInput != "" {
257 | 		tfStartRel, err := time.ParseDuration(tfStartInput)
258 | 		if err == nil {
259 | 			timeStart = time.Now().Add(tfStartRel)
260 | 		} else {
261 | 			timeStart, err = time.Parse(dateFormat, tfStartInput)
262 | 			if err != nil {
263 | 				return tf, fmt.Errorf("bad timeframe start value '%s': %v", tfStartInput, err)
264 | 			}
265 | 		}
266 | 		tf.Since = &timeStart
267 | 	}
268 | 
269 | 	if tfEndInput != "" {
270 | 		tfEndRel, err := time.ParseDuration(tfEndInput)
271 | 		if err == nil {
272 | 			timeEnd = time.Now().Add(tfEndRel)
273 | 		} else {
274 | 			timeEnd, err = time.Parse(dateFormat, tfEndInput)
275 | 			if err != nil {
276 | 				return tf, fmt.Errorf("bad timeframe end value '%s': %v", tfEndInput, err)
277 | 			}
278 | 		}
279 | 		tf.Until = &timeEnd
280 | 	}
281 | 
282 | 	if tf.Since != nil && tf.Until != nil && tf.Until.Before(*tf.Since) {
283 | 		return tf, fmt.Errorf("end time must be after start time (start=%s end=%s)", tf.Since, tf.Until)
284 | 	}
285 | 
286 | 	return tf, nil
287 | }
288 | 
289 | func loadConfig() error {
290 | 	// no config file is allowed, but that might be useless
291 | 	_, err := os.Stat(configFile)
292 | 	if os.IsNotExist(err) {
293 | 		return nil
294 | 	}
295 | 
296 | 	var cmdConfig commandConfig
297 | 	md, err := toml.DecodeFile(configFile, &cmdConfig)
298 | 	if err != nil {
299 | 		return fmt.Errorf("decoding config file: %v", err)
300 | 	}
301 | 	if len(md.Undecoded()) > 0 {
302 | 		return fmt.Errorf("unrecognized key(s) in config file: %+v", md.Undecoded())
303 | 	}
304 | 
305 | 	// convert them into oauth2.Configs (the structure of
306 | 	// oauth2.Config as TOML is too verbose for my taste)
307 | 	// (important to not be pointer values, since the
308 | 	// oauth2.Configs need to be copied and changed for
309 | 	// each token source that is created)
310 | 	oauth2Configs := make(map[string]oauth2.Config)
311 | 	for id, prov := range cmdConfig.OAuth2.Providers {
312 | 		if prov.RedirectURL == "" {
313 | 			prov.RedirectURL = oauth2client.DefaultRedirectURL
314 | 		}
315 | 		oauth2Configs[id] = oauth2.Config{
316 | 			ClientID:     prov.ClientID,
317 | 			ClientSecret: prov.ClientSecret,
318 | 			RedirectURL:  prov.RedirectURL,
319 | 			Endpoint: oauth2.Endpoint{
320 | 				AuthURL:  prov.AuthURL,
321 | 				TokenURL: prov.TokenURL,
322 | 			},
323 | 		}
324 | 	}
325 | 
326 | 	// TODO: Should this be passed into timeliner.Open() instead?
327 | 	timeliner.OAuth2AppSource = func(providerID string, scopes []string) (oauth2client.App, error) {
328 | 		cfg, ok := oauth2Configs[providerID]
329 | 		if !ok {
330 | 			return nil, fmt.Errorf("unsupported provider: %s", providerID)
331 | 		}
332 | 		cfg.Scopes = scopes
333 | 		return oauth2client.LocalAppSource{OAuth2Config: &cfg}, nil
334 | 	}
335 | 
336 | 	return nil
337 | }
338 | 
339 | func getAccounts(args []string) ([]accountInfo, error) {
340 | 	var accts []accountInfo
341 | 	for _, a := range args {
342 | 		parts := strings.SplitN(a, "/", 2)
343 | 		if len(parts) < 2 {
344 | 			return nil, fmt.Errorf("malformed account identifier '%s': expecting '<data_source>/<account>' format", a)
345 | 		}
346 | 		accts = append(accts, accountInfo{
347 | 			dataSourceID: parts[0],
348 | 			userID:       parts[1],
349 | 		})
350 | 	}
351 | 	return accts, nil
352 | }
353 | 
354 | type accountInfo struct {
355 | 	dataSourceID string
356 | 	userID       string
357 | }
358 | 
359 | type commandConfig struct {
360 | 	OAuth2 oauth2Config `toml:"oauth2"`
361 | }
362 | 
363 | type oauth2Config struct {
364 | 	Providers map[string]oauth2ProviderConfig `toml:"providers"`
365 | }
366 | 
367 | type oauth2ProviderConfig struct {
368 | 	ClientID     string `toml:"client_id"`
369 | 	ClientSecret string `toml:"client_secret"`
370 | 	RedirectURL  string `toml:"redirect_url"`
371 | 	AuthURL      string `toml:"auth_url"`
372 | 	TokenURL     string `toml:"token_url"`
373 | }
374 | 
375 | var (
376 | 	repoDir    = "./timeliner_repo"
377 | 	configFile = "timeliner.toml"
378 | 	maxRetries int
379 | 	retryAfter time.Duration
380 | 	verbose    bool
381 | 
382 | 	integrity bool
383 | 	prune     bool
384 | 	reprocess bool
385 | 	merge     string
386 | 
387 | 	tfStartInput, tfEndInput string
388 | 
389 | 	twitterRetweets bool
390 | 	twitterReplies  bool
391 | 
392 | 	phoneDefaultRegion string = "US"
393 | )
394 | 
395 | const dateFormat = "2006/01/02" // YYYY/MM/DD
396 | 


--------------------------------------------------------------------------------
/datasource.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"database/sql"
  7 | 	"encoding/gob"
  8 | 	"fmt"
  9 | 	"log"
 10 | 	"time"
 11 | )
 12 | 
 13 | func init() {
 14 | 	tdBuf := new(bytes.Buffer)
 15 | 	err := gob.NewEncoder(tdBuf).Encode(Metadata{})
 16 | 	if err != nil {
 17 | 		log.Fatalf("[FATAL] Unable to gob-encode metadata struct: %v", err)
 18 | 	}
 19 | 	metadataGobPrefix = tdBuf.Bytes()
 20 | }
 21 | 
 22 | // RegisterDataSource registers ds as a data source.
 23 | func RegisterDataSource(ds DataSource) error {
 24 | 	if ds.ID == "" {
 25 | 		return fmt.Errorf("missing ID")
 26 | 	}
 27 | 	if ds.Name == "" {
 28 | 		return fmt.Errorf("missing Name")
 29 | 	}
 30 | 	if ds.OAuth2.ProviderID != "" && ds.Authenticate != nil {
 31 | 		return fmt.Errorf("conflicting ways of obtaining authorization")
 32 | 	}
 33 | 
 34 | 	// register the data source
 35 | 	if _, ok := dataSources[ds.ID]; ok {
 36 | 		return fmt.Errorf("data source already registered: %s", ds.ID)
 37 | 	}
 38 | 	dataSources[ds.ID] = ds
 39 | 
 40 | 	return nil
 41 | }
 42 | 
 43 | func saveAllDataSources(db *sql.DB) error {
 44 | 	if len(dataSources) == 0 {
 45 | 		return nil
 46 | 	}
 47 | 
 48 | 	query := `INSERT INTO "data_sources" ("id", "name") VALUES`
 49 | 	var vals []interface{}
 50 | 	var count int
 51 | 
 52 | 	for _, ds := range dataSources {
 53 | 		if count > 0 {
 54 | 			query += ","
 55 | 		}
 56 | 		query += " (?, ?)"
 57 | 		vals = append(vals, ds.ID, ds.Name)
 58 | 		count++
 59 | 	}
 60 | 
 61 | 	query += " ON CONFLICT DO NOTHING"
 62 | 
 63 | 	_, err := db.Exec(query, vals...)
 64 | 	if err != nil {
 65 | 		return fmt.Errorf("writing data sources to DB: %v", err)
 66 | 	}
 67 | 
 68 | 	return nil
 69 | }
 70 | 
 71 | // DataSource has information about a
 72 | // data source that can be registered.
 73 | type DataSource struct {
 74 | 	// A snake_cased name of the service
 75 | 	// that uniquely identifies it from
 76 | 	// all others.
 77 | 	ID string
 78 | 
 79 | 	// The human-readable or brand name of
 80 | 	// the service.
 81 | 	Name string
 82 | 
 83 | 	// If the service authenticates with
 84 | 	// OAuth2, fill out this field.
 85 | 	OAuth2 OAuth2
 86 | 
 87 | 	// Otherwise, if the service uses some
 88 | 	// other form of authentication,
 89 | 	// Authenticate is a function which
 90 | 	// returns the credentials needed to
 91 | 	// access an account on the service.
 92 | 	Authenticate AuthenticateFn
 93 | 
 94 | 	// If the service enforces a rate limit,
 95 | 	// specify it here. You can abide it by
 96 | 	// getting an http.Client from the
 97 | 	// Account passed into NewClient.
 98 | 	RateLimit RateLimit
 99 | 
100 | 	// NewClient is a function which takes
101 | 	// information about the account and
102 | 	// returns a type which can facilitate
103 | 	// transactions with the service.
104 | 	NewClient NewClientFn
105 | }
106 | 
107 | // authFunc gets the authentication function for this
108 | // service. If s.Authenticate is set, it returns that;
109 | // if s.OAuth2 is set, it uses a standard OAuth2 func.
110 | func (ds DataSource) authFunc() AuthenticateFn {
111 | 	if ds.Authenticate != nil {
112 | 		return ds.Authenticate
113 | 	} else if ds.OAuth2.ProviderID != "" {
114 | 		return func(userID string) ([]byte, error) {
115 | 			return authorizeWithOAuth2(ds.OAuth2)
116 | 		}
117 | 	}
118 | 	return nil
119 | }
120 | 
121 | // OAuth2 defines which OAuth2 provider a service
122 | // uses and which scopes it requires.
123 | type OAuth2 struct {
124 | 	// The ID of the service must be recognized
125 | 	// by the OAuth2 app configuration.
126 | 	ProviderID string
127 | 
128 | 	// The list of scopes to ask for during auth.
129 | 	Scopes []string
130 | }
131 | 
132 | // AuthenticateFn is a function that authenticates userID with a service.
133 | // It returns the authorization or credentials needed to operate. The return
134 | // value should be byte-encoded so it can be stored in the DB to be reused.
135 | // To store arbitrary types, encode the value as a gob, for example.
136 | type AuthenticateFn func(userID string) ([]byte, error)
137 | 
138 | // NewClientFn is a function that returns a client which, given
139 | // the account passed in, can interact with a service provider.
140 | type NewClientFn func(acc Account) (Client, error)
141 | 
142 | // Client is a type that can interact with a data source.
143 | type Client interface {
144 | 	// ListItems lists the items on the account. Items should be
145 | 	// sent on itemChan as they are discovered, but related items
146 | 	// should be combined onto a single ItemGraph so that their
147 | 	// relationships can be stored. If the relationships are not
148 | 	// discovered until later, that's OK: item processing is
149 | 	// idempotent, so repeating an item from earlier will have no
150 | 	// adverse effects (this is possible because a unique ID is
151 | 	// required for each item).
152 | 	//
153 | 	// Implementations must honor the context's cancellation. If
154 | 	// ctx.Done() is closed, the function should return. Typically,
155 | 	// this is done by having an outer loop select over ctx.Done()
156 | 	// and default, where the next page or set of items is handled
157 | 	// in the default case.
158 | 	//
159 | 	// ListItems MUST close itemChan when returning. A
160 | 	// `defer close(itemChan)` will usually suffice. Closing
161 | 	// this channel signals to the processing goroutine that
162 | 	// no more items are coming.
163 | 	//
164 | 	// Further options for listing items may be passed in opt.
165 | 	//
166 | 	// If opt.Filename is specified, the implementation is expected
167 | 	// to open and list items from that file. If this is not
168 | 	// supported, an error should be returned. Conversely, if a
169 | 	// filename is not specified but required, an error should be
170 | 	// returned.
171 | 	//
172 | 	// opt.Timeframe consists of two optional timestamp and/or item
173 | 	// ID values. If set, item listings should be bounded in the
174 | 	// respective direction by that timestamp / item ID. (Items
175 | 	// are assumed to be part of a chronology; both timestamp and
176 | 	// item ID *may be* provided, when possible, to accommodate
177 | 	// data sources which do not constrain by timestamp but which
178 | 	// do by item ID instead.) The respective time and item ID
179 | 	// fields, if set, will not be in conflict, so either may be
180 | 	// used if both are present. While it should be documented if
181 | 	// timeframes are not supported, an error need not be returned
182 | 	// if they cannot be honored.
183 | 	//
184 | 	// opt.Checkpoint consists of the last checkpoint for this
185 | 	// account if the last call to ListItems did not finish and
186 | 	// if a checkpoint was saved. If not nil, the checkpoint
187 | 	// should be used to resume the listing instead of starting
188 | 	// over from the beginning. Checkpoint values usually consist
189 | 	// of page tokens or whatever state is required to resume. Call
190 | 	// timeliner.Checkpoint to set a checkpoint. Checkpoints are not
191 | 	// required, but if the implementation sets checkpoints, it
192 | 	// should be able to resume from one, too.
193 | 	ListItems(ctx context.Context, itemChan chan<- *ItemGraph, opt ListingOptions) error
194 | }
195 | 
196 | // Timeframe represents a start and end time and/or
197 | // a start and end item, where either value could be
198 | // nil which means unbounded in that direction.
199 | // When items are used as the timeframe boundaries,
200 | // the ItemID fields will be populated. It is not
201 | // guaranteed that any particular field will be set
202 | // or unset just because other fields are set or unset.
203 | // However, if both Since or both Until fields are
204 | // set, that means the timestamp and items are
205 | // correlated; i.e. the Since timestamp is (approx.)
206 | // that of the item ID. Or, put another way: there
207 | // will never be conflicts among the fields which
208 | // are non-nil.
209 | type Timeframe struct {
210 | 	Since, Until             *time.Time
211 | 	SinceItemID, UntilItemID *string
212 | }
213 | 
214 | func (tf Timeframe) String() string {
215 | 	var sinceItemID, untilItemID string
216 | 	if tf.SinceItemID != nil {
217 | 		sinceItemID = *tf.SinceItemID
218 | 	}
219 | 	if tf.UntilItemID != nil {
220 | 		untilItemID = *tf.UntilItemID
221 | 	}
222 | 	return fmt.Sprintf("{Since:%s Until:%s SinceItemID:%s UntilItemID:%s}",
223 | 		tf.Since, tf.Until, sinceItemID, untilItemID)
224 | }
225 | 
226 | var dataSources = make(map[string]DataSource)
227 | 


--------------------------------------------------------------------------------
/datasources/facebook/media.go:
--------------------------------------------------------------------------------
  1 | package facebook
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"log"
  7 | 	"mime"
  8 | 	"net/http"
  9 | 	"net/url"
 10 | 	"path"
 11 | 	"time"
 12 | 
 13 | 	"github.com/mholt/timeliner"
 14 | )
 15 | 
 16 | type fbMediaPage struct {
 17 | 	Data   []fbMedia `json:"data"`
 18 | 	Paging fbPaging  `json:"paging"`
 19 | }
 20 | 
 21 | // fbMedia is used for videos, photos, and albums.
 22 | type fbMedia struct {
 23 | 	Album         fbAlbum       `json:"album,omitempty"`
 24 | 	BackdatedTime string        `json:"backdated_time,omitempty"`
 25 | 	CreatedTime   string        `json:"created_time,omitempty"`
 26 | 	From          fbFrom        `json:"from,omitempty"`
 27 | 	Images        []fbImage     `json:"images,omitempty"`
 28 | 	UpdatedTime   string        `json:"updated_time,omitempty"`
 29 | 	Description   string        `json:"description,omitempty"`
 30 | 	Length        float64       `json:"length,omitempty"` // in seconds
 31 | 	Message       string        `json:"message,omitempty"`
 32 | 	Name          string        `json:"name,omitempty"`
 33 | 	Place         *fbPlace      `json:"place,omitempty"`
 34 | 	Photos        *fbMediaPage  `json:"photos,omitempty"`
 35 | 	Source        string        `json:"source,omitempty"`
 36 | 	Status        fbVideoStatus `json:"status,omitempty"`
 37 | 	MediaID       string        `json:"id,omitempty"`
 38 | 
 39 | 	// these fields added by us and used internally
 40 | 	mediaType          string
 41 | 	bestSourceURL      string
 42 | 	bestSourceFilename string
 43 | 	exifData           map[string]interface{}
 44 | }
 45 | 
 46 | func (m *fbMedia) fillFields(mediaType string) {
 47 | 	m.mediaType = mediaType
 48 | 
 49 | 	// get URL to actual media content; we'll need
 50 | 	// it later, and by doing this now, we only have
 51 | 	// to do it once
 52 | 	switch mediaType {
 53 | 	case "photo":
 54 | 		_, _, m.bestSourceURL = m.getLargestImage()
 55 | 	case "video":
 56 | 		m.bestSourceURL = m.Source
 57 | 	}
 58 | 	if m.bestSourceURL != "" {
 59 | 		sourceURL, err := url.Parse(m.bestSourceURL)
 60 | 		if err != nil {
 61 | 			// TODO: What to return in this case? return the error?
 62 | 			log.Printf("[ERROR] Parsing media source URL to get filename: %v", err)
 63 | 		}
 64 | 		m.bestSourceFilename = path.Base(sourceURL.Path)
 65 | 	}
 66 | }
 67 | 
 68 | func (m *fbMedia) ID() string {
 69 | 	return m.MediaID
 70 | }
 71 | 
 72 | func (m *fbMedia) Timestamp() time.Time {
 73 | 	if m.BackdatedTime != "" {
 74 | 		return fbTimeToGoTime(m.BackdatedTime)
 75 | 	}
 76 | 	return fbTimeToGoTime(m.CreatedTime)
 77 | }
 78 | 
 79 | func (m *fbMedia) DataText() (*string, error) {
 80 | 	if m.Description != "" {
 81 | 		return &m.Description, nil
 82 | 	}
 83 | 	if m.Name != "" {
 84 | 		return &m.Name, nil
 85 | 	}
 86 | 	return nil, nil
 87 | }
 88 | 
 89 | func (m *fbMedia) DataFileName() *string {
 90 | 	if m.bestSourceFilename != "" {
 91 | 		return &m.bestSourceFilename
 92 | 	}
 93 | 	return nil
 94 | }
 95 | 
 96 | func (m *fbMedia) DataFileHash() []byte {
 97 | 	return nil
 98 | }
 99 | 
100 | func (m *fbMedia) DataFileReader() (io.ReadCloser, error) {
101 | 	if m.bestSourceURL == "" {
102 | 		return nil, fmt.Errorf("no way to get data file: no best source URL")
103 | 	}
104 | 
105 | 	resp, err := http.Get(m.bestSourceURL)
106 | 	if err != nil {
107 | 		return nil, fmt.Errorf("getting media contents: %v", err)
108 | 	}
109 | 	if resp.StatusCode != http.StatusOK {
110 | 		resp.Body.Close()
111 | 		return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
112 | 	}
113 | 
114 | 	return resp.Body, nil
115 | }
116 | 
117 | func (m *fbMedia) DataFileMIMEType() *string {
118 | 	mt := mime.TypeByExtension(path.Ext(m.bestSourceFilename))
119 | 	if mt != "" {
120 | 		return &mt
121 | 	}
122 | 	return nil
123 | }
124 | 
125 | func (m *fbMedia) Owner() (*string, *string) {
126 | 	return &m.From.ID, &m.From.Name
127 | }
128 | 
129 | func (m *fbMedia) Class() timeliner.ItemClass {
130 | 	switch m.mediaType {
131 | 	case "photo":
132 | 		return timeliner.ClassImage
133 | 	case "video":
134 | 		return timeliner.ClassVideo
135 | 	}
136 | 	return timeliner.ClassUnknown
137 | }
138 | 
139 | func (m *fbMedia) Metadata() (*timeliner.Metadata, error) {
140 | 	// TODO
141 | 	return nil, nil
142 | }
143 | 
144 | func (m *fbMedia) getLargestImage() (height, width int, source string) {
145 | 	var largest int
146 | 	for _, im := range m.Images {
147 | 		size := im.Height * im.Width
148 | 		if size > largest {
149 | 			source = im.Source
150 | 			height = im.Height
151 | 			width = im.Width
152 | 			largest = size
153 | 		}
154 | 	}
155 | 	return
156 | }
157 | 
158 | func (m *fbMedia) Location() (*timeliner.Location, error) {
159 | 	if m.Place != nil {
160 | 		return &timeliner.Location{
161 | 			Latitude:  &m.Place.Location.Latitude,
162 | 			Longitude: &m.Place.Location.Longitude,
163 | 		}, nil
164 | 	}
165 | 	return nil, nil
166 | }
167 | 
168 | type fbVideoStatus struct {
169 | 	VideoStatus string `json:"video_status,omitempty"`
170 | }
171 | 
172 | type fbAlbum struct {
173 | 	CreatedTime string        `json:"created_time,omitempty"`
174 | 	Name        string        `json:"name,omitempty"`
175 | 	ID          string        `json:"id,omitempty"`
176 | 	Photos      []fbMediaPage `json:"photos,omitempty"`
177 | }
178 | 
179 | type fbImage struct {
180 | 	Height int    `json:"height,omitempty"`
181 | 	Source string `json:"source,omitempty"`
182 | 	Width  int    `json:"width,omitempty"`
183 | }
184 | 


--------------------------------------------------------------------------------
/datasources/facebook/post.go:
--------------------------------------------------------------------------------
  1 | package facebook
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"log"
  6 | 	"time"
  7 | 
  8 | 	"github.com/mholt/timeliner"
  9 | )
 10 | 
 11 | type fbPost struct {
 12 | 	Attachments   fbPostAttachments `json:"attachments,omitempty"`
 13 | 	BackdatedTime string            `json:"backdated_time,omitempty"`
 14 | 	CreatedTime   string            `json:"created_time,omitempty"` // example format: "2018-12-22T19:10:30+0000"
 15 | 	From          fbFrom            `json:"from,omitempty"`
 16 | 	Link          string            `json:"link,omitempty"`
 17 | 	Description   string            `json:"description,omitempty"`
 18 | 	Message       string            `json:"message,omitempty"`
 19 | 	Name          string            `json:"name,omitempty"`
 20 | 	ParentID      string            `json:"parent_id,omitempty"`
 21 | 	Place         *fbPlace          `json:"place,omitempty"`
 22 | 	StatusType    string            `json:"status_type,omitempty"`
 23 | 	Type          string            `json:"type,omitempty"`
 24 | 	PostID        string            `json:"id,omitempty"`
 25 | }
 26 | 
 27 | func (p fbPost) ID() string {
 28 | 	return p.PostID
 29 | }
 30 | 
 31 | func (p fbPost) Timestamp() time.Time {
 32 | 	if p.BackdatedTime != "" {
 33 | 		return fbTimeToGoTime(p.BackdatedTime)
 34 | 	}
 35 | 	return fbTimeToGoTime(p.CreatedTime)
 36 | }
 37 | 
 38 | func (p fbPost) DataText() (*string, error) {
 39 | 	return &p.Message, nil
 40 | }
 41 | 
 42 | func (p fbPost) DataFileName() *string {
 43 | 	return nil
 44 | }
 45 | 
 46 | func (p fbPost) DataFileReader() (io.ReadCloser, error) {
 47 | 	return nil, nil
 48 | }
 49 | 
 50 | func (p fbPost) DataFileHash() []byte {
 51 | 	return nil
 52 | }
 53 | 
 54 | func (p fbPost) DataFileMIMEType() *string {
 55 | 	return nil
 56 | }
 57 | 
 58 | func (p fbPost) Owner() (*string, *string) {
 59 | 	return &p.From.ID, &p.From.Name
 60 | }
 61 | 
 62 | func (p fbPost) Class() timeliner.ItemClass {
 63 | 	return timeliner.ClassPost
 64 | }
 65 | 
 66 | func (p fbPost) Metadata() (*timeliner.Metadata, error) {
 67 | 	return &timeliner.Metadata{
 68 | 		Link:        p.Link,
 69 | 		Description: p.Description,
 70 | 		Name:        p.Name,
 71 | 		ParentID:    p.ParentID,
 72 | 		StatusType:  p.StatusType,
 73 | 		Type:        p.Type,
 74 | 	}, nil
 75 | }
 76 | 
 77 | func (p fbPost) Location() (*timeliner.Location, error) {
 78 | 	if p.Place != nil {
 79 | 		return &timeliner.Location{
 80 | 			Latitude:  &p.Place.Location.Latitude,
 81 | 			Longitude: &p.Place.Location.Longitude,
 82 | 		}, nil
 83 | 	}
 84 | 	return nil, nil
 85 | }
 86 | 
 87 | type fbPostAttachments struct {
 88 | 	Data []fbPostAttachmentData `json:"data"`
 89 | }
 90 | 
 91 | type fbPostAttachmentData struct {
 92 | 	Media          fbPostAttachmentMedia  `json:"media,omitempty"`
 93 | 	Target         fbPostAttachmentTarget `json:"target,omitempty"`
 94 | 	Subattachments fbPostAttachments      `json:"subattachments,omitempty"`
 95 | 	Title          string                 `json:"title,omitempty"`
 96 | 	Type           string                 `json:"type,omitempty"`
 97 | 	URL            string                 `json:"url,omitempty"`
 98 | }
 99 | 
100 | type fbPostAttachmentMedia struct {
101 | 	Image fbPostAttachmentImage `json:"image,omitempty"`
102 | }
103 | 
104 | type fbPostAttachmentImage struct {
105 | 	Height int    `json:"height,omitempty"`
106 | 	Src    string `json:"src,omitempty"`
107 | 	Width  int    `json:"width,omitempty"`
108 | }
109 | 
110 | type fbPostAttachmentTarget struct {
111 | 	ID  string `json:"id,omitempty"`
112 | 	URL string `json:"url,omitempty"`
113 | }
114 | 
115 | func fbTimeToGoTime(fbTime string) time.Time {
116 | 	if fbTime == "" {
117 | 		return time.Time{}
118 | 	}
119 | 	ts, err := time.Parse(fbTimeFormat, fbTime)
120 | 	if err != nil {
121 | 		log.Printf("[ERROR] Parsing timestamp from Facebook: '%s' is not in '%s' format",
122 | 			fbTime, fbTimeFormat)
123 | 	}
124 | 	return ts
125 | }
126 | 
127 | const fbTimeFormat = "2006-01-02T15:04:05+0000"
128 | 


--------------------------------------------------------------------------------
/datasources/googlelocation/googlelocation.go:
--------------------------------------------------------------------------------
  1 | // Package googlelocation implements a Timeliner data source for
  2 | // importing data from the Google Location History (aka Google
  3 | // Maps Timeline).
  4 | package googlelocation
  5 | 
  6 | import (
  7 | 	"context"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"log"
 12 | 	"os"
 13 | 	"sort"
 14 | 	"strconv"
 15 | 	"strings"
 16 | 	"time"
 17 | 
 18 | 	"github.com/mholt/timeliner"
 19 | )
 20 | 
 21 | // Data source name and ID
 22 | const (
 23 | 	DataSourceName = "Google Location History"
 24 | 	DataSourceID   = "google_location"
 25 | )
 26 | 
 27 | var dataSource = timeliner.DataSource{
 28 | 	ID:   DataSourceID,
 29 | 	Name: DataSourceName,
 30 | 	NewClient: func(acc timeliner.Account) (timeliner.Client, error) {
 31 | 		return new(Client), nil
 32 | 	},
 33 | }
 34 | 
 35 | func init() {
 36 | 	err := timeliner.RegisterDataSource(dataSource)
 37 | 	if err != nil {
 38 | 		log.Fatal(err)
 39 | 	}
 40 | }
 41 | 
 42 | // Client implements the timeliner.Client interface.
 43 | type Client struct{}
 44 | 
 45 | // ListItems lists items from the data source. opt.Filename must be non-empty.
 46 | func (c *Client) ListItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 47 | 	defer close(itemChan)
 48 | 
 49 | 	if opt.Filename == "" {
 50 | 		return fmt.Errorf("filename is required")
 51 | 	}
 52 | 
 53 | 	file, err := os.Open(opt.Filename)
 54 | 	if err != nil {
 55 | 		return fmt.Errorf("opening data file: %v", err)
 56 | 	}
 57 | 	defer file.Close()
 58 | 
 59 | 	dec := json.NewDecoder(file)
 60 | 
 61 | 	// read the following opening tokens:
 62 | 	// 1. open brace '{'
 63 | 	// 2. "locations" field name,
 64 | 	// 3. the array value's opening bracket '['
 65 | 	for i := 0; i < 3; i++ {
 66 | 		_, err := dec.Token()
 67 | 		if err != nil {
 68 | 			return fmt.Errorf("decoding opening token: %v", err)
 69 | 		}
 70 | 	}
 71 | 
 72 | 	var prev *location
 73 | 	for dec.More() {
 74 | 		select {
 75 | 		case <-ctx.Done():
 76 | 			return nil
 77 | 		default:
 78 | 			var err error
 79 | 			prev, err = c.processLocation(dec, prev, itemChan)
 80 | 			if err != nil {
 81 | 				return fmt.Errorf("processing location item: %v", err)
 82 | 			}
 83 | 		}
 84 | 	}
 85 | 
 86 | 	return nil
 87 | }
 88 | 
 89 | func (c *Client) processLocation(dec *json.Decoder, prev *location,
 90 | 	itemChan chan<- *timeliner.ItemGraph) (*location, error) {
 91 | 
 92 | 	var l *location
 93 | 	err := dec.Decode(&l)
 94 | 	if err != nil {
 95 | 		return nil, fmt.Errorf("decoding location element: %v", err)
 96 | 	}
 97 | 
 98 | 	// redundancy checks (lots of data points are very similar)
 99 | 	if prev != nil {
100 | 		// if the timestamp of this location is the same
101 | 		// as the previous one, it seems useless to keep
102 | 		// both, so skip this one (also, we produce IDs
103 | 		// based on timestamp, which must be unique --
104 | 		// hence why we compare the unix timestamp values)
105 | 		if l.Timestamp().Unix() == prev.Timestamp().Unix() {
106 | 			return l, nil
107 | 		}
108 | 
109 | 		// if this location is basically the same spot as the
110 | 		// previously-seen one, and if we're sure that the
111 | 		// timestamps are in order, skip it; mostly redundant
112 | 		if locationsSimilar(l, prev) && l.Timestamp().Before(prev.Timestamp()) {
113 | 			return l, nil
114 | 		}
115 | 	}
116 | 
117 | 	// store this item, and possibly connect it to the
118 | 	// previous one if there's a movement activity
119 | 	ig := timeliner.NewItemGraph(l)
120 | 	if movement := l.primaryMovement(); movement != "" && prev != nil {
121 | 		// bidirectional edge, because you may want to know how you got somewhere,
122 | 		// and the timestamps should make it obvious which location is the "from"
123 | 		// or the "to", since you can't go backwards in time (that we know of...)
124 | 		ig.Add(prev, timeliner.Relation{
125 | 			Label:         strings.ToLower(movement),
126 | 			Bidirectional: true,
127 | 		})
128 | 	}
129 | 	itemChan <- ig
130 | 
131 | 	return l, nil
132 | }
133 | 
134 | func locationsSimilar(a, b *location) bool {
135 | 	if a == nil && b == nil {
136 | 		return true
137 | 	}
138 | 	if a == nil || b == nil {
139 | 		return false
140 | 	}
141 | 	return similar(a.LatitudeE7, b.LatitudeE7) &&
142 | 		similar(a.LongitudeE7, b.LongitudeE7)
143 | }
144 | 
145 | func similar(a, b int) bool {
146 | 	const tolerance = 1000
147 | 	if a > b {
148 | 		return a-b < tolerance
149 | 	}
150 | 	return b-a < tolerance
151 | }
152 | 
153 | type location struct {
154 | 	TimestampMs      string       `json:"timestampMs"`
155 | 	LatitudeE7       int          `json:"latitudeE7"`
156 | 	LongitudeE7      int          `json:"longitudeE7"`
157 | 	Accuracy         int          `json:"accuracy"`
158 | 	Altitude         int          `json:"altitude,omitempty"`
159 | 	VerticalAccuracy int          `json:"verticalAccuracy,omitempty"`
160 | 	Activity         []activities `json:"activity,omitempty"`
161 | 	Velocity         int          `json:"velocity,omitempty"`
162 | 	Heading          int          `json:"heading,omitempty"`
163 | }
164 | 
165 | func (l location) primaryMovement() string {
166 | 	if len(l.Activity) == 0 {
167 | 		return ""
168 | 	}
169 | 
170 | 	counts := make(map[string]int)
171 | 	confidences := make(map[string]int)
172 | 	for _, a := range l.Activity {
173 | 		for _, aa := range a.Activity {
174 | 			counts[aa.Type]++
175 | 			confidences[aa.Type] += aa.Confidence
176 | 		}
177 | 	}
178 | 
179 | 	// turn confidence into average confidence,
180 | 	// (ensure all activities are represented),
181 | 	// and keep activities with high enough score
182 | 	var top []activity
183 | 	var hasOnFoot, hasWalking, hasRunning bool
184 | 	for _, a := range movementActivities {
185 | 		count := counts[a]
186 | 		if count == 0 {
187 | 			count = 1 // for the purposes of division
188 | 		}
189 | 		avg := confidences[a] / len(l.Activity)
190 | 		avgSeen := confidences[a] / count
191 | 		if avgSeen > 50 {
192 | 			switch a {
193 | 			case "ON_FOOT":
194 | 				hasOnFoot = true
195 | 			case "WALKING":
196 | 				hasWalking = true
197 | 			case "RUNNING":
198 | 				hasRunning = true
199 | 			}
200 | 			top = append(top, activity{Type: a, Confidence: avg})
201 | 		}
202 | 	}
203 | 	sort.Slice(top, func(i, j int) bool {
204 | 		return top[i].Confidence > top[j].Confidence
205 | 	})
206 | 
207 | 	// consolidate ON_FOOT, WALKING, and RUNNING if more than one is present
208 | 	if hasOnFoot && (hasWalking || hasRunning) {
209 | 		for i := 0; i < len(top); i++ {
210 | 			if hasWalking && hasRunning &&
211 | 				(top[i].Type == "WALKING" || top[i].Type == "RUNNING") {
212 | 				// if both WALKING and RUNNING, prefer more general ON_FOOT
213 | 				top = append(top[:i], top[i+1:]...)
214 | 			} else if top[i].Type == "ON_FOOT" {
215 | 				// if only one of WALKING or RUNNING, prefer that over ON_FOOT
216 | 				top = append(top[:i], top[i+1:]...)
217 | 			}
218 | 		}
219 | 	}
220 | 
221 | 	if len(top) > 0 {
222 | 		return top[0].Type
223 | 	}
224 | 	return ""
225 | }
226 | 
227 | func (l location) hasActivity(act string) bool {
228 | 	for _, a := range l.Activity {
229 | 		for _, aa := range a.Activity {
230 | 			if aa.Type == act && aa.Confidence > 50 {
231 | 				return true
232 | 			}
233 | 		}
234 | 	}
235 | 	return false
236 | }
237 | 
238 | type activities struct {
239 | 	TimestampMs string     `json:"timestampMs"`
240 | 	Activity    []activity `json:"activity"`
241 | }
242 | 
243 | type activity struct {
244 | 	Type       string `json:"type"`
245 | 	Confidence int    `json:"confidence"`
246 | }
247 | 
248 | // ID returns a string representation of the timestamp,
249 | // since there is no actual ID provided by the service.
250 | // It is assumed that one cannot be in two places at once.
251 | func (l location) ID() string {
252 | 	ts := fmt.Sprintf("loc_%d", l.Timestamp().Unix())
253 | 	return ts
254 | }
255 | 
256 | func (l location) Timestamp() time.Time {
257 | 	ts, err := strconv.Atoi(l.TimestampMs)
258 | 	if err != nil {
259 | 		return time.Time{}
260 | 	}
261 | 	return time.Unix(int64(ts)/1000, 0)
262 | }
263 | 
264 | func (l location) Owner() (*string, *string) {
265 | 	return nil, nil
266 | }
267 | 
268 | func (l location) Class() timeliner.ItemClass {
269 | 	return timeliner.ClassLocation
270 | }
271 | 
272 | func (l location) DataText() (*string, error) {
273 | 	return nil, nil
274 | }
275 | 
276 | func (l location) DataFileName() *string {
277 | 	return nil
278 | }
279 | 
280 | func (l location) DataFileReader() (io.ReadCloser, error) {
281 | 	return nil, nil
282 | }
283 | 
284 | func (l location) DataFileHash() []byte {
285 | 	return nil
286 | }
287 | 
288 | func (l location) DataFileMIMEType() *string {
289 | 	return nil
290 | }
291 | 
292 | func (l location) Metadata() (*timeliner.Metadata, error) {
293 | 	var m timeliner.Metadata
294 | 	var hasMetadata bool
295 | 
296 | 	if l.Velocity > 0 {
297 | 		m.Velocity = l.Velocity
298 | 		hasMetadata = true
299 | 	}
300 | 	if l.Heading > 0 {
301 | 		m.Heading = l.Heading
302 | 		hasMetadata = true
303 | 	}
304 | 	if l.Altitude > 0 {
305 | 		m.Altitude = l.Altitude
306 | 		m.AltitudeAccuracy = l.VerticalAccuracy
307 | 		hasMetadata = true
308 | 	}
309 | 
310 | 	if hasMetadata {
311 | 		return &m, nil
312 | 	}
313 | 	return nil, nil
314 | }
315 | 
316 | func (l location) Location() (*timeliner.Location, error) {
317 | 	lat := float64(l.LatitudeE7) / 1e7
318 | 	lon := float64(l.LongitudeE7) / 1e7
319 | 	return &timeliner.Location{
320 | 		Latitude:  &lat,
321 | 		Longitude: &lon,
322 | 	}, nil
323 | }
324 | 
325 | // movementActivities is the list of activities we care about
326 | // for drawing relationships between two locations. For example,
327 | // we don't care about TILTING (sudden accelerometer adjustment,
328 | // like phone set down or person standing up), UNKNOWN, or STILL
329 | // (where there is no apparent movement detected).
330 | //
331 | // https://developers.google.com/android/reference/com/google/android/gms/location/DetectedActivity
332 | var movementActivities = []string{
333 | 	"WALKING",
334 | 	"RUNNING",
335 | 	"IN_VEHICLE",
336 | 	"ON_FOOT",
337 | 	"ON_BICYCLE",
338 | }
339 | 


--------------------------------------------------------------------------------
/datasources/googlephotos/googlephotos.go:
--------------------------------------------------------------------------------
  1 | // Package googlephotos implements the Google Photos service
  2 | // using its API, documented at https://developers.google.com/photos/.
  3 | package googlephotos
  4 | 
  5 | import (
  6 | 	"bytes"
  7 | 	"context"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"io/ioutil"
 12 | 	"log"
 13 | 	"net/http"
 14 | 	"net/url"
 15 | 	"strings"
 16 | 	"sync"
 17 | 	"time"
 18 | 
 19 | 	"github.com/mholt/timeliner"
 20 | )
 21 | 
 22 | // Data source name and ID.
 23 | const (
 24 | 	DataSourceName = "Google Photos"
 25 | 	DataSourceID   = "google_photos"
 26 | 
 27 | 	apiBase = "https://photoslibrary.googleapis.com/v1"
 28 | )
 29 | 
 30 | var dataSource = timeliner.DataSource{
 31 | 	ID:   DataSourceID,
 32 | 	Name: DataSourceName,
 33 | 	OAuth2: timeliner.OAuth2{
 34 | 		ProviderID: "google",
 35 | 		Scopes:     []string{"https://www.googleapis.com/auth/photoslibrary.readonly"},
 36 | 	},
 37 | 	RateLimit: timeliner.RateLimit{
 38 | 		RequestsPerHour: 10000 / 24, // https://developers.google.com/photos/library/guides/api-limits-quotas
 39 | 		BurstSize:       3,
 40 | 	},
 41 | 	NewClient: func(acc timeliner.Account) (timeliner.Client, error) {
 42 | 		httpClient, err := acc.NewHTTPClient()
 43 | 		if err != nil {
 44 | 			return nil, err
 45 | 		}
 46 | 		return &Client{
 47 | 			HTTPClient: httpClient,
 48 | 			userID:     acc.UserID,
 49 | 			checkpoint: checkpointInfo{mu: new(sync.Mutex)},
 50 | 		}, nil
 51 | 	},
 52 | }
 53 | 
 54 | func init() {
 55 | 	err := timeliner.RegisterDataSource(dataSource)
 56 | 	if err != nil {
 57 | 		log.Fatal(err)
 58 | 	}
 59 | }
 60 | 
 61 | // Client interacts with the Google Photos
 62 | // API. It requires an OAuth2-authorized
 63 | // HTTP client in order to work properly.
 64 | type Client struct {
 65 | 	HTTPClient           *http.Client
 66 | 	IncludeArchivedMedia bool
 67 | 
 68 | 	userID     string
 69 | 	checkpoint checkpointInfo
 70 | }
 71 | 
 72 | // ListItems lists items from the data source.
 73 | // opt.Timeframe precision is day-level at best.
 74 | func (c *Client) ListItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 75 | 	defer close(itemChan)
 76 | 
 77 | 	if opt.Filename != "" {
 78 | 		return c.listFromTakeoutArchive(ctx, itemChan, opt)
 79 | 	}
 80 | 
 81 | 	// load any previous checkpoint
 82 | 	c.checkpoint.load(opt.Checkpoint)
 83 | 
 84 | 	// get items and collections
 85 | 	errChan := make(chan error)
 86 | 	go func() {
 87 | 		err := c.listItems(ctx, itemChan, opt)
 88 | 		errChan <- err
 89 | 	}()
 90 | 	go func() {
 91 | 		err := c.listCollections(ctx, itemChan, opt)
 92 | 		errChan <- err
 93 | 	}()
 94 | 
 95 | 	// read exactly 2 error (or nil) values to ensure we
 96 | 	// block precisely until the two listers are done
 97 | 	var errs []string
 98 | 	for i := 0; i < 2; i++ {
 99 | 		err := <-errChan
100 | 		if err != nil {
101 | 			log.Printf("[ERROR] %s/%s: a listing goroutine errored: %v", DataSourceID, c.userID, err)
102 | 			errs = append(errs, err.Error())
103 | 		}
104 | 	}
105 | 	if len(errs) > 0 {
106 | 		return fmt.Errorf("one or more errors: %s", strings.Join(errs, ", "))
107 | 	}
108 | 
109 | 	return nil
110 | }
111 | 
112 | func (c *Client) listItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
113 | 	c.checkpoint.mu.Lock()
114 | 	pageToken := c.checkpoint.ItemsNextPage
115 | 	c.checkpoint.mu.Unlock()
116 | 
117 | 	for {
118 | 		select {
119 | 		case <-ctx.Done():
120 | 			return nil
121 | 		default:
122 | 			var err error
123 | 			pageToken, err = c.getItemsNextPage(itemChan, pageToken, opt.Timeframe)
124 | 			if err != nil {
125 | 				return fmt.Errorf("getting items on next page: %v", err)
126 | 			}
127 | 			if pageToken == "" {
128 | 				return nil
129 | 			}
130 | 
131 | 			c.checkpoint.mu.Lock()
132 | 			c.checkpoint.ItemsNextPage = pageToken
133 | 			c.checkpoint.save(ctx)
134 | 			c.checkpoint.mu.Unlock()
135 | 		}
136 | 	}
137 | }
138 | 
139 | func (c *Client) getItemsNextPage(itemChan chan<- *timeliner.ItemGraph,
140 | 	pageToken string, timeframe timeliner.Timeframe) (string, error) {
141 | 	reqBody := listMediaItemsRequest{
142 | 		PageSize:  100,
143 | 		PageToken: pageToken,
144 | 	}
145 | 	if timeframe.Since != nil || timeframe.Until != nil {
146 | 		reqBody.Filters = &listMediaItemsFilter{
147 | 			DateFilter: listMediaItemsDateFilter{
148 | 				Ranges: []listMediaItemsFilterRange{dateRange(timeframe)},
149 | 			},
150 | 			IncludeArchivedMedia: c.IncludeArchivedMedia,
151 | 		}
152 | 	}
153 | 
154 | 	page, err := c.pageOfMediaItems(reqBody)
155 | 	if err != nil {
156 | 		return "", fmt.Errorf("requesting next page: %v", err)
157 | 	}
158 | 
159 | 	for _, item := range page.MediaItems {
160 | 		itemChan <- &timeliner.ItemGraph{
161 | 			Node: item,
162 | 		}
163 | 	}
164 | 
165 | 	return page.NextPageToken, nil
166 | }
167 | 
168 | // listCollections lists media items by iterating each album. As
169 | // of Jan. 2019, the Google Photos API does not allow searching
170 | // media items with both an album ID and filters. Because this
171 | // search is predicated on album ID, we cannot be constrained by
172 | // a timeframe in this search.
173 | //
174 | // See https://developers.google.com/photos/library/reference/rest/v1/mediaItems/search.
175 | func (c *Client) listCollections(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
176 | 	c.checkpoint.mu.Lock()
177 | 	albumPageToken := c.checkpoint.AlbumsNextPage
178 | 	c.checkpoint.mu.Unlock()
179 | 
180 | 	for {
181 | 		select {
182 | 		case <-ctx.Done():
183 | 			return nil
184 | 		default:
185 | 			if opt.Verbose {
186 | 				log.Printf("[DEBUG] %s/%s: listing albums: next page (page_token=%s)",
187 | 					DataSourceID, c.userID, albumPageToken)
188 | 			}
189 | 
190 | 			var err error
191 | 			albumPageToken, err = c.getAlbumsAndTheirItemsNextPage(itemChan, albumPageToken, opt)
192 | 			if err != nil {
193 | 				return err
194 | 			}
195 | 			if albumPageToken == "" {
196 | 				return nil
197 | 			}
198 | 
199 | 			c.checkpoint.mu.Lock()
200 | 			c.checkpoint.AlbumsNextPage = albumPageToken
201 | 			c.checkpoint.save(ctx)
202 | 			c.checkpoint.mu.Unlock()
203 | 		}
204 | 	}
205 | }
206 | 
207 | func (c *Client) getAlbumsAndTheirItemsNextPage(itemChan chan<- *timeliner.ItemGraph,
208 | 	pageToken string, opt timeliner.ListingOptions) (string, error) {
209 | 	vals := url.Values{
210 | 		"pageToken": {pageToken},
211 | 		"pageSize":  {"50"},
212 | 	}
213 | 
214 | 	var respBody listAlbums
215 | 	err := c.apiRequestWithRetry("GET", "/albums?"+vals.Encode(), nil, &respBody)
216 | 	if err != nil {
217 | 		return pageToken, err
218 | 	}
219 | 
220 | 	for _, album := range respBody.Albums {
221 | 		if opt.Verbose {
222 | 			log.Printf("[DEBUG] %s/%s: listing items in album: '%s' (album_id=%s item_count=%s)",
223 | 				DataSourceID, c.userID, album.Title, album.ID, album.MediaItemsCount)
224 | 		}
225 | 
226 | 		err = c.getAlbumItems(itemChan, album, opt)
227 | 		if err != nil {
228 | 			return "", err
229 | 		}
230 | 	}
231 | 
232 | 	return respBody.NextPageToken, nil
233 | }
234 | 
235 | func (c *Client) getAlbumItems(itemChan chan<- *timeliner.ItemGraph, album gpAlbum, opt timeliner.ListingOptions) error {
236 | 	var albumItemsNextPage string
237 | 	var counter int
238 | 
239 | 	const pageSize = 100
240 | 
241 | 	for {
242 | 		reqBody := listMediaItemsRequest{
243 | 			AlbumID:   album.ID,
244 | 			PageToken: albumItemsNextPage,
245 | 			PageSize:  pageSize,
246 | 		}
247 | 
248 | 		if opt.Verbose {
249 | 			log.Printf("[DEBUG] %s/%s: getting next page of media items in album (album_id=%s page_size=%d page_token=%s)",
250 | 				DataSourceID, c.userID, album.ID, pageSize, albumItemsNextPage)
251 | 		}
252 | 
253 | 		page, err := c.pageOfMediaItems(reqBody)
254 | 		if err != nil {
255 | 			return fmt.Errorf("listing album contents: %v", err)
256 | 		}
257 | 
258 | 		// iterate each media item on this page of the album listing
259 | 		var items []timeliner.CollectionItem
260 | 		for _, it := range page.MediaItems {
261 | 			// since we cannot request items in an album and also filter
262 | 			// by timestamp, be sure to filter here; it means we still
263 | 			// have to iterate all items in all albums, but at least we
264 | 			// can just skip items that fall outside the timeframe...
265 | 			ts := it.Timestamp()
266 | 			if opt.Timeframe.Since != nil && ts.Before(*opt.Timeframe.Since) {
267 | 				continue
268 | 			}
269 | 			if opt.Timeframe.Until != nil && ts.After(*opt.Timeframe.Until) {
270 | 				continue
271 | 			}
272 | 
273 | 			// otherwise, add this item to the album
274 | 			items = append(items, timeliner.CollectionItem{
275 | 				Item:     it,
276 | 				Position: counter,
277 | 			})
278 | 			counter++
279 | 		}
280 | 
281 | 		// if any items remained after filtering,
282 | 		// process this album now
283 | 		if len(items) > 0 {
284 | 			ig := timeliner.NewItemGraph(nil)
285 | 			ig.Collections = append(ig.Collections, timeliner.Collection{
286 | 				OriginalID: album.ID,
287 | 				Name:       &album.Title,
288 | 				Items:      items,
289 | 			})
290 | 			itemChan <- ig
291 | 		}
292 | 
293 | 		if page.NextPageToken == "" {
294 | 			return nil
295 | 		}
296 | 
297 | 		albumItemsNextPage = page.NextPageToken
298 | 	}
299 | }
300 | 
301 | func (c *Client) pageOfMediaItems(reqBody listMediaItemsRequest) (listMediaItems, error) {
302 | 	var respBody listMediaItems
303 | 	err := c.apiRequestWithRetry("POST", "/mediaItems:search", reqBody, &respBody)
304 | 	return respBody, err
305 | }
306 | 
307 | func (c *Client) apiRequestWithRetry(method, endpoint string, reqBodyData, respInto interface{}) error {
308 | 	// do the request in a loop for controlled retries on error
309 | 	var err error
310 | 	const maxTries = 10
311 | 	for i := 0; i < maxTries; i++ {
312 | 		var resp *http.Response
313 | 		resp, err = c.apiRequest(method, endpoint, reqBodyData)
314 | 		if err != nil {
315 | 			log.Printf("[ERROR] %s/%s: doing API request: >>> %v <<< - retrying... (attempt %d/%d)",
316 | 				DataSourceID, c.userID, err, i+1, maxTries)
317 | 			time.Sleep(10 * time.Second)
318 | 			continue
319 | 		}
320 | 
321 | 		if resp.StatusCode != http.StatusOK {
322 | 			bodyText, err2 := ioutil.ReadAll(io.LimitReader(resp.Body, 1024*256))
323 | 			resp.Body.Close()
324 | 
325 | 			if err2 == nil {
326 | 				err = fmt.Errorf("HTTP %d: %s: >>> %s <<<", resp.StatusCode, resp.Status, bodyText)
327 | 			} else {
328 | 				err = fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
329 | 			}
330 | 
331 | 			// extra-long pause for rate limiting errors
332 | 			if resp.StatusCode == http.StatusTooManyRequests {
333 | 				log.Printf("[ERROR] %s/%s: rate limited: HTTP %d: %s: %s - retrying in 35 seconds... (attempt %d/%d)",
334 | 					DataSourceID, c.userID, resp.StatusCode, resp.Status, bodyText, i+1, maxTries)
335 | 				time.Sleep(35 * time.Second)
336 | 				continue
337 | 			}
338 | 
339 | 			// for any other error, wait a couple seconds and retry
340 | 			log.Printf("[ERROR] %s/%s: bad API response: %v - retrying... (attempt %d/%d)",
341 | 				DataSourceID, c.userID, err, i+1, maxTries)
342 | 			time.Sleep(10 * time.Second)
343 | 			continue
344 | 		}
345 | 
346 | 		// successful request; read the response body
347 | 		err = json.NewDecoder(resp.Body).Decode(&respInto)
348 | 		if err != nil {
349 | 			resp.Body.Close()
350 | 			err = fmt.Errorf("decoding JSON: %v", err)
351 | 			log.Printf("[ERROR] %s/%s: reading API response: %v - retrying... (attempt %d/%d)",
352 | 				DataSourceID, c.userID, err, i+1, maxTries)
353 | 			time.Sleep(10 * time.Second)
354 | 			continue
355 | 		}
356 | 
357 | 		// successful read; we're done here
358 | 		resp.Body.Close()
359 | 		break
360 | 	}
361 | 
362 | 	return err
363 | }
364 | 
365 | func (c *Client) apiRequest(method, endpoint string, reqBodyData interface{}) (*http.Response, error) {
366 | 	var reqBody io.Reader
367 | 	if reqBodyData != nil {
368 | 		reqBodyBytes, err := json.Marshal(reqBodyData)
369 | 		if err != nil {
370 | 			return nil, err
371 | 		}
372 | 		reqBody = bytes.NewReader(reqBodyBytes)
373 | 	}
374 | 
375 | 	req, err := http.NewRequest(method, apiBase+endpoint, reqBody)
376 | 	if err != nil {
377 | 		return nil, err
378 | 	}
379 | 	if reqBody != nil {
380 | 		req.Header.Set("Content-Type", "application/json")
381 | 	}
382 | 
383 | 	return c.HTTPClient.Do(req)
384 | }
385 | 
386 | func dateRange(timeframe timeliner.Timeframe) listMediaItemsFilterRange {
387 | 	var start, end filterDate
388 | 	if timeframe.Since == nil {
389 | 		start = filterDate{
390 | 			Day:   1,
391 | 			Month: 1,
392 | 			Year:  1,
393 | 		}
394 | 	} else {
395 | 		since := timeframe.Since.Add(24 * time.Hour) // to account for day precision
396 | 		start = filterDate{
397 | 			Day:   since.Day(),
398 | 			Month: int(since.Month()),
399 | 			Year:  since.Year(),
400 | 		}
401 | 	}
402 | 	if timeframe.Until == nil {
403 | 		end = filterDate{
404 | 			Day:   31,
405 | 			Month: 12,
406 | 			Year:  9999,
407 | 		}
408 | 	} else {
409 | 		timeframe.Until.Add(-24 * time.Hour) // to account for day precision
410 | 		end = filterDate{
411 | 			Day:   timeframe.Until.Day(),
412 | 			Month: int(timeframe.Until.Month()),
413 | 			Year:  timeframe.Until.Year(),
414 | 		}
415 | 	}
416 | 	return listMediaItemsFilterRange{
417 | 		StartDate: start,
418 | 		EndDate:   end,
419 | 	}
420 | }
421 | 
422 | // Assuming checkpoints are short-lived (i.e. are resumed
423 | // somewhat quickly, before the page tokens/cursors expire),
424 | // we can just store the page tokens.
425 | type checkpointInfo struct {
426 | 	ItemsNextPage  string
427 | 	AlbumsNextPage string
428 | 	mu             *sync.Mutex
429 | }
430 | 
431 | // save records the checkpoint. It is NOT thread-safe,
432 | // so calls to this must be protected by a mutex.
433 | func (ch *checkpointInfo) save(ctx context.Context) {
434 | 	gobBytes, err := timeliner.MarshalGob(ch)
435 | 	if err != nil {
436 | 		log.Printf("[ERROR] %s: encoding checkpoint: %v", DataSourceID, err)
437 | 	}
438 | 	timeliner.Checkpoint(ctx, gobBytes)
439 | }
440 | 
441 | // load decodes the checkpoint. It is NOT thread-safe,
442 | // so calls to this must be protected by a mutex.
443 | func (ch *checkpointInfo) load(checkpointGob []byte) {
444 | 	if len(checkpointGob) == 0 {
445 | 		return
446 | 	}
447 | 	err := timeliner.UnmarshalGob(checkpointGob, ch)
448 | 	if err != nil {
449 | 		log.Printf("[ERROR] %s: decoding checkpoint: %v", DataSourceID, err)
450 | 	}
451 | }
452 | 


--------------------------------------------------------------------------------
/datasources/googlephotos/media.go:
--------------------------------------------------------------------------------
  1 | package googlephotos
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"io/ioutil"
  7 | 	"log"
  8 | 	"net/http"
  9 | 	"strconv"
 10 | 	"time"
 11 | 
 12 | 	"github.com/mholt/timeliner"
 13 | )
 14 | 
 15 | // listMediaItems is the structure of the results
 16 | // of calling mediaItems in the Google Photos API.
 17 | type listMediaItems struct {
 18 | 	MediaItems    []mediaItem `json:"mediaItems"`
 19 | 	NextPageToken string      `json:"nextPageToken"`
 20 | }
 21 | 
 22 | type mediaItem struct {
 23 | 	MediaID         string           `json:"id"`
 24 | 	ProductURL      string           `json:"productUrl"`
 25 | 	BaseURL         string           `json:"baseUrl"`
 26 | 	Description     string           `json:"description"`
 27 | 	MIMEType        string           `json:"mimeType"`
 28 | 	MediaMetadata   mediaMetadata    `json:"mediaMetadata"`
 29 | 	ContributorInfo mediaContributor `json:"mediaContributor"`
 30 | 	Filename        string           `json:"filename"`
 31 | }
 32 | 
 33 | func (m mediaItem) ID() string {
 34 | 	return m.MediaID
 35 | }
 36 | 
 37 | func (m mediaItem) Timestamp() time.Time {
 38 | 	return m.MediaMetadata.CreationTime
 39 | }
 40 | 
 41 | func (m mediaItem) DataText() (*string, error) {
 42 | 	return &m.Description, nil
 43 | }
 44 | 
 45 | func (m mediaItem) DataFileName() *string {
 46 | 	return &m.Filename
 47 | }
 48 | 
 49 | func (m mediaItem) DataFileReader() (io.ReadCloser, error) {
 50 | 	if m.MediaMetadata.Video != nil && m.MediaMetadata.Video.Status != "READY" {
 51 | 		log.Printf("[INFO] Skipping video file because it is not ready (status=%s filename=%s)",
 52 | 			m.MediaMetadata.Video.Status, m.Filename)
 53 | 		return nil, nil
 54 | 	}
 55 | 
 56 | 	u := m.BaseURL
 57 | 
 58 | 	// configure for the download of full file with almost-full exif data; see
 59 | 	// https://developers.google.com/photos/library/guides/access-media-items#base-urls
 60 | 	if m.MediaMetadata.Photo != nil {
 61 | 		u += "=d"
 62 | 	} else if m.MediaMetadata.Video != nil {
 63 | 		u += "=dv"
 64 | 	}
 65 | 
 66 | 	const maxTries = 5
 67 | 	var err error
 68 | 	var resp *http.Response
 69 | 	for i := 0; i < maxTries; i++ {
 70 | 		resp, err = http.Get(u)
 71 | 		if err != nil {
 72 | 			err = fmt.Errorf("getting media contents: %v", err)
 73 | 			log.Printf("[ERROR] %s: %s: %v - retrying... (attempt %d/%d)", DataSourceID, u, err, i+1, maxTries)
 74 | 			time.Sleep(30 * time.Second)
 75 | 			continue
 76 | 		}
 77 | 		if resp.StatusCode != http.StatusOK {
 78 | 			bodyText, err2 := ioutil.ReadAll(io.LimitReader(resp.Body, 1024*256))
 79 | 			resp.Body.Close()
 80 | 
 81 | 			if err2 == nil {
 82 | 				err = fmt.Errorf("HTTP %d: %s: >>> %s <<<", resp.StatusCode, resp.Status, bodyText)
 83 | 			} else {
 84 | 				err = fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
 85 | 			}
 86 | 
 87 | 			log.Printf("[ERROR %s: %s: Bad response: %v - waiting and retrying... (attempt %d/%d)",
 88 | 				DataSourceID, u, err, i+1, maxTries)
 89 | 			time.Sleep(15 * time.Second)
 90 | 			continue
 91 | 		}
 92 | 		break
 93 | 	}
 94 | 
 95 | 	if resp == nil {
 96 | 		return nil, err
 97 | 	}
 98 | 	return resp.Body, err
 99 | }
100 | 
101 | func (m mediaItem) DataFileHash() []byte {
102 | 	return nil
103 | }
104 | 
105 | func (m mediaItem) DataFileMIMEType() *string {
106 | 	return &m.MIMEType
107 | }
108 | 
109 | func (m mediaItem) Owner() (*string, *string) {
110 | 	// since we only download media owned by the account,
111 | 	// we can leave ID nil and assume the display name
112 | 	// is the account owner's name
113 | 	if m.ContributorInfo.DisplayName != "" {
114 | 		return nil, &m.ContributorInfo.DisplayName
115 | 	}
116 | 	return nil, nil
117 | }
118 | 
119 | func (m mediaItem) Class() timeliner.ItemClass {
120 | 	if m.MediaMetadata.Video != nil {
121 | 		return timeliner.ClassVideo
122 | 	}
123 | 	if m.MediaMetadata.Photo != nil {
124 | 		return timeliner.ClassImage
125 | 	}
126 | 	return timeliner.ClassUnknown
127 | }
128 | 
129 | func (m mediaItem) Metadata() (*timeliner.Metadata, error) {
130 | 	// TODO: Parse exif metadata... maybe add most important/useful
131 | 	// EXIF fields to the metadata struct directly?
132 | 
133 | 	widthInt, err := strconv.Atoi(m.MediaMetadata.Width)
134 | 	if err != nil {
135 | 		return nil, fmt.Errorf("parsing width as int: %v (width=%s)",
136 | 			err, m.MediaMetadata.Width)
137 | 	}
138 | 	heightInt, err := strconv.Atoi(m.MediaMetadata.Height)
139 | 	if err != nil {
140 | 		return nil, fmt.Errorf("parsing height as int: %v (height=%s)",
141 | 			err, m.MediaMetadata.Height)
142 | 	}
143 | 
144 | 	meta := &timeliner.Metadata{
145 | 		Width:  widthInt,
146 | 		Height: heightInt,
147 | 	}
148 | 
149 | 	if m.MediaMetadata.Photo != nil {
150 | 		meta.CameraMake = m.MediaMetadata.Photo.CameraMake
151 | 		meta.CameraModel = m.MediaMetadata.Photo.CameraModel
152 | 		meta.FocalLength = m.MediaMetadata.Photo.FocalLength
153 | 		meta.ApertureFNumber = m.MediaMetadata.Photo.ApertureFNumber
154 | 		meta.ISOEquivalent = m.MediaMetadata.Photo.ISOEquivalent
155 | 		if m.MediaMetadata.Photo.ExposureTime != "" {
156 | 			expDur, err := time.ParseDuration(m.MediaMetadata.Photo.ExposureTime)
157 | 			if err != nil {
158 | 				return nil, fmt.Errorf("parsing exposure time as duration: %v (exposure_time=%s)",
159 | 					err, m.MediaMetadata.Photo.ExposureTime)
160 | 			}
161 | 			meta.ExposureTime = expDur
162 | 		}
163 | 	} else if m.MediaMetadata.Video != nil {
164 | 		meta.CameraMake = m.MediaMetadata.Video.CameraMake
165 | 		meta.CameraModel = m.MediaMetadata.Video.CameraModel
166 | 		meta.FPS = m.MediaMetadata.Video.FPS
167 | 	}
168 | 
169 | 	return meta, nil
170 | }
171 | 
172 | func (m mediaItem) Location() (*timeliner.Location, error) {
173 | 	// See https://issuetracker.google.com/issues/80379228 😭
174 | 	return nil, nil
175 | }
176 | 
177 | type mediaMetadata struct {
178 | 	CreationTime time.Time      `json:"creationTime"`
179 | 	Width        string         `json:"width"`
180 | 	Height       string         `json:"height"`
181 | 	Photo        *photoMetadata `json:"photo,omitempty"`
182 | 	Video        *videoMetadata `json:"video,omitempty"`
183 | }
184 | 
185 | type photoMetadata struct {
186 | 	CameraMake      string  `json:"cameraMake"`
187 | 	CameraModel     string  `json:"cameraModel"`
188 | 	FocalLength     float64 `json:"focalLength"`
189 | 	ApertureFNumber float64 `json:"apertureFNumber"`
190 | 	ISOEquivalent   int     `json:"isoEquivalent"`
191 | 	ExposureTime    string  `json:"exposureTime"` // TODO: Parse duration out of this...?
192 | }
193 | 
194 | type videoMetadata struct {
195 | 	CameraMake  string  `json:"cameraMake"`
196 | 	CameraModel string  `json:"cameraModel"`
197 | 	FPS         float64 `json:"fps"`
198 | 	Status      string  `json:"status"`
199 | }
200 | 
201 | type mediaContributor struct {
202 | 	ProfilePictureBaseURL string `json:"profilePictureBaseUrl"`
203 | 	DisplayName           string `json:"displayName"`
204 | }
205 | 
206 | type listMediaItemsRequest struct {
207 | 	Filters   *listMediaItemsFilter `json:"filters,omitempty"`
208 | 	AlbumID   string                `json:"albumId,omitempty"`
209 | 	PageSize  int                   `json:"pageSize,omitempty"`
210 | 	PageToken string                `json:"pageToken,omitempty"`
211 | }
212 | 
213 | type listMediaItemsFilter struct {
214 | 	DateFilter               listMediaItemsDateFilter      `json:"dateFilter"`
215 | 	IncludeArchivedMedia     bool                          `json:"includeArchivedMedia"`
216 | 	ExcludeNonAppCreatedData bool                          `json:"excludeNonAppCreatedData"`
217 | 	ContentFilter            listMediaItemsContentFilter   `json:"contentFilter"`
218 | 	MediaTypeFilter          listMediaItemsMediaTypeFilter `json:"mediaTypeFilter"`
219 | }
220 | 
221 | type listMediaItemsDateFilter struct {
222 | 	Ranges []listMediaItemsFilterRange `json:"ranges,omitempty"`
223 | 	Dates  []filterDate                `json:"dates,omitempty"`
224 | }
225 | 
226 | type listMediaItemsFilterRange struct {
227 | 	StartDate filterDate `json:"startDate"`
228 | 	EndDate   filterDate `json:"endDate"`
229 | }
230 | 
231 | type filterDate struct {
232 | 	Month int `json:"month"`
233 | 	Day   int `json:"day"`
234 | 	Year  int `json:"year"`
235 | }
236 | 
237 | type listMediaItemsContentFilter struct {
238 | 	ExcludedContentCategories []string `json:"excludedContentCategories,omitempty"`
239 | 	IncludedContentCategories []string `json:"includedContentCategories,omitempty"`
240 | }
241 | 
242 | type listMediaItemsMediaTypeFilter struct {
243 | 	MediaTypes []string `json:"mediaTypes,omitempty"`
244 | }
245 | 
246 | type listAlbums struct {
247 | 	Albums        []gpAlbum `json:"albums"`
248 | 	NextPageToken string    `json:"nextPageToken"`
249 | }
250 | 
251 | type gpAlbum struct {
252 | 	ID                    string `json:"id"`
253 | 	Title                 string `json:"title,omitempty"`
254 | 	ProductURL            string `json:"productUrl"`
255 | 	MediaItemsCount       string `json:"mediaItemsCount"`
256 | 	CoverPhotoBaseURL     string `json:"coverPhotoBaseUrl"`
257 | 	CoverPhotoMediaItemID string `json:"coverPhotoMediaItemId"`
258 | }
259 | 


--------------------------------------------------------------------------------
/datasources/googlephotos/takeoutarchive.go:
--------------------------------------------------------------------------------
  1 | package googlephotos
  2 | 
  3 | import (
  4 | 	"archive/tar"
  5 | 	"archive/zip"
  6 | 	"bytes"
  7 | 	"context"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"path/filepath"
 12 | 	"strconv"
 13 | 	"strings"
 14 | 	"time"
 15 | 
 16 | 	"github.com/mholt/archiver/v3"
 17 | 	"github.com/mholt/timeliner"
 18 | )
 19 | 
 20 | func (c *Client) listFromTakeoutArchive(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 21 | 	err := archiver.Walk(opt.Filename, func(f archiver.File) error {
 22 | 		pathInArchive := getPathInArchive(f) // TODO: maybe this should be a function in the archiver lib
 23 | 
 24 | 		// only walk in album folders, and look for metadata files
 25 | 		if !strings.HasPrefix(pathInArchive, "Takeout/Google Photos/") {
 26 | 			return nil
 27 | 		}
 28 | 		if f.Name() != albumMetadataFilename {
 29 | 			return nil
 30 | 		}
 31 | 
 32 | 		// album metadata file; begin processing next album
 33 | 		var albumMeta albumArchiveMetadata
 34 | 		err := json.NewDecoder(f).Decode(&albumMeta)
 35 | 		if err != nil {
 36 | 			return fmt.Errorf("decoding album metadata file %s: %v", pathInArchive, err)
 37 | 		}
 38 | 		collection := timeliner.Collection{
 39 | 			OriginalID:  albumMeta.AlbumData.Date.Timestamp, // TODO: we don't have one... this will not merge nicely with API imports!!
 40 | 			Name:        &albumMeta.AlbumData.Title,
 41 | 			Description: &albumMeta.AlbumData.Description,
 42 | 		}
 43 | 
 44 | 		albumPathInArchive := strings.TrimSuffix(pathInArchive, albumMetadataFilename)
 45 | 
 46 | 		// get all the album's items using a separate walk that is constrained to this album's folder
 47 | 		err = archiver.Walk(opt.Filename, func(f archiver.File) error {
 48 | 			pathInArchive := getPathInArchive(f)
 49 | 			if !strings.HasPrefix(pathInArchive, albumPathInArchive) {
 50 | 				return nil
 51 | 			}
 52 | 			if f.Name() == albumMetadataFilename {
 53 | 				return nil
 54 | 			}
 55 | 			if filepath.Ext(f.Name()) != ".json" {
 56 | 				return nil
 57 | 			}
 58 | 
 59 | 			var itemMeta mediaArchiveMetadata
 60 | 			err := json.NewDecoder(f).Decode(&itemMeta)
 61 | 			if err != nil {
 62 | 				return fmt.Errorf("decoding item metadata file %s: %v", pathInArchive, err)
 63 | 			}
 64 | 
 65 | 			itemMeta.parsedPhotoTakenTime, err = itemMeta.timestamp()
 66 | 			if err != nil {
 67 | 				return fmt.Errorf("parsing timestamp from item %s: %v", pathInArchive, err)
 68 | 			}
 69 | 			itemMeta.pathInArchive = strings.TrimSuffix(pathInArchive, ".json")
 70 | 			itemMeta.archiveFilename = opt.Filename
 71 | 
 72 | 			withinTimeframe := (opt.Timeframe.Since == nil || itemMeta.parsedPhotoTakenTime.After(*opt.Timeframe.Since)) &&
 73 | 				(opt.Timeframe.Until == nil || itemMeta.parsedPhotoTakenTime.Before(*opt.Timeframe.Until))
 74 | 
 75 | 			if withinTimeframe {
 76 | 				collection.Items = append(collection.Items, timeliner.CollectionItem{
 77 | 					Item:     itemMeta,
 78 | 					Position: len(collection.Items),
 79 | 				})
 80 | 			}
 81 | 
 82 | 			return nil
 83 | 		})
 84 | 		if err != nil {
 85 | 			return err
 86 | 		}
 87 | 
 88 | 		if len(collection.Items) > 0 {
 89 | 			ig := timeliner.NewItemGraph(nil)
 90 | 			ig.Collections = append(ig.Collections, collection)
 91 | 			itemChan <- ig
 92 | 		}
 93 | 
 94 | 		return nil
 95 | 	})
 96 | 	if err != nil {
 97 | 		return err
 98 | 	}
 99 | 
100 | 	return nil
101 | }
102 | 
103 | const albumMetadataFilename = "metadata.json"
104 | 
105 | func getPathInArchive(f archiver.File) string {
106 | 	switch hdr := f.Header.(type) {
107 | 	case zip.FileHeader:
108 | 		return hdr.Name
109 | 	case *tar.Header:
110 | 		return hdr.Name
111 | 	}
112 | 	return ""
113 | }
114 | 
115 | type albumArchiveMetadata struct {
116 | 	AlbumData struct {
117 | 		Title       string `json:"title"`
118 | 		Description string `json:"description"`
119 | 		Access      string `json:"access"`
120 | 		Location    string `json:"location"`
121 | 		Date        struct {
122 | 			Timestamp string `json:"timestamp"`
123 | 			Formatted string `json:"formatted"`
124 | 		} `json:"date"`
125 | 		GeoData struct {
126 | 			Latitude      float64 `json:"latitude"`
127 | 			Longitude     float64 `json:"longitude"`
128 | 			Altitude      float64 `json:"altitude"`
129 | 			LatitudeSpan  float64 `json:"latitudeSpan"`
130 | 			LongitudeSpan float64 `json:"longitudeSpan"`
131 | 		} `json:"geoData"`
132 | 	} `json:"albumData"`
133 | }
134 | 
135 | type mediaArchiveMetadata struct {
136 | 	Title        string `json:"title"`
137 | 	Description  string `json:"description"`
138 | 	ImageViews   string `json:"imageViews"`
139 | 	CreationTime struct {
140 | 		Timestamp string `json:"timestamp"`
141 | 		Formatted string `json:"formatted"`
142 | 	} `json:"creationTime"`
143 | 	ModificationTime struct {
144 | 		Timestamp string `json:"timestamp"`
145 | 		Formatted string `json:"formatted"`
146 | 	} `json:"modificationTime"`
147 | 	GeoData struct {
148 | 		Latitude      float64 `json:"latitude"`
149 | 		Longitude     float64 `json:"longitude"`
150 | 		Altitude      float64 `json:"altitude"`
151 | 		LatitudeSpan  float64 `json:"latitudeSpan"`
152 | 		LongitudeSpan float64 `json:"longitudeSpan"`
153 | 	} `json:"geoData"`
154 | 	GeoDataExif struct {
155 | 		Latitude      float64 `json:"latitude"`
156 | 		Longitude     float64 `json:"longitude"`
157 | 		Altitude      float64 `json:"altitude"`
158 | 		LatitudeSpan  float64 `json:"latitudeSpan"`
159 | 		LongitudeSpan float64 `json:"longitudeSpan"`
160 | 	} `json:"geoDataExif"`
161 | 	PhotoTakenTime struct {
162 | 		Timestamp string `json:"timestamp"`
163 | 		Formatted string `json:"formatted"`
164 | 	} `json:"photoTakenTime"`
165 | 	GooglePhotosOrigin struct {
166 | 		MobileUpload struct {
167 | 			DeviceFolder struct {
168 | 				LocalFolderName string `json:"localFolderName"`
169 | 			} `json:"deviceFolder"`
170 | 			DeviceType string `json:"deviceType"`
171 | 		} `json:"mobileUpload"`
172 | 	} `json:"googlePhotosOrigin"`
173 | 
174 | 	parsedPhotoTakenTime time.Time
175 | 	archiveFilename      string
176 | 	pathInArchive        string
177 | }
178 | 
179 | func (m mediaArchiveMetadata) timestamp() (time.Time, error) {
180 | 	ts := m.PhotoTakenTime.Timestamp
181 | 	if ts == "" {
182 | 		ts = m.CreationTime.Timestamp
183 | 	}
184 | 	if ts == "" {
185 | 		ts = m.ModificationTime.Timestamp
186 | 	}
187 | 	if ts == "" {
188 | 		return time.Time{}, fmt.Errorf("no timestamp available")
189 | 	}
190 | 	parsed, err := strconv.ParseInt(ts, 10, 64)
191 | 	if err != nil {
192 | 		return time.Time{}, err
193 | 	}
194 | 	return time.Unix(parsed, 0), nil
195 | }
196 | 
197 | // ID does NOT return the same ID as from the API. Takeout archives do NOT
198 | // have an ID associated with each item, so we do our best by making up
199 | // an ID using the timestamp and the filename.
200 | func (m mediaArchiveMetadata) ID() string {
201 | 	return m.PhotoTakenTime.Timestamp + "_" + m.Title
202 | }
203 | 
204 | func (m mediaArchiveMetadata) Timestamp() time.Time {
205 | 	return m.parsedPhotoTakenTime
206 | }
207 | 
208 | func (m mediaArchiveMetadata) Class() timeliner.ItemClass {
209 | 	ext := filepath.Ext(strings.ToLower(m.Title))
210 | 	switch ext {
211 | 	case ".mp4", ".m4v", ".mov", ".wmv", ".mkv", "mpeg4", ".mpeg", ".ogg", ".m4p", ".avi":
212 | 		return timeliner.ClassVideo
213 | 	default:
214 | 		return timeliner.ClassImage
215 | 	}
216 | }
217 | 
218 | func (m mediaArchiveMetadata) Owner() (id *string, name *string) {
219 | 	return nil, nil
220 | }
221 | 
222 | func (m mediaArchiveMetadata) DataText() (*string, error) {
223 | 	if m.Description != "" {
224 | 		return &m.Description, nil
225 | 	}
226 | 	return nil, nil
227 | }
228 | 
229 | func (m mediaArchiveMetadata) DataFileName() *string {
230 | 	return &m.Title
231 | }
232 | 
233 | func (m mediaArchiveMetadata) DataFileReader() (io.ReadCloser, error) {
234 | 	var rc io.ReadCloser
235 | 	err := archiver.Walk(m.archiveFilename, func(f archiver.File) error {
236 | 		pathInArchive := getPathInArchive(f)
237 | 		if pathInArchive != m.pathInArchive {
238 | 			return nil
239 | 		}
240 | 
241 | 		buf := new(bytes.Buffer)
242 | 		_, err := io.Copy(buf, f)
243 | 		if err != nil {
244 | 			return fmt.Errorf("copying item into memory: %v", err)
245 | 		}
246 | 		rc = timeliner.FakeCloser(buf)
247 | 
248 | 		return archiver.ErrStopWalk
249 | 	})
250 | 	if err != nil {
251 | 		return nil, fmt.Errorf("walking takeout file %s in search of media: %v",
252 | 			m.archiveFilename, err)
253 | 	}
254 | 	return rc, nil
255 | }
256 | 
257 | func (m mediaArchiveMetadata) DataFileHash() []byte {
258 | 	return nil
259 | }
260 | 
261 | func (m mediaArchiveMetadata) DataFileMIMEType() *string {
262 | 	return nil
263 | }
264 | 
265 | func (m mediaArchiveMetadata) Metadata() (*timeliner.Metadata, error) {
266 | 	return nil, nil
267 | }
268 | 
269 | func (m mediaArchiveMetadata) Location() (*timeliner.Location, error) {
270 | 	lat, lon := m.GeoData.Latitude, m.GeoData.Longitude
271 | 	if lat == 0 {
272 | 		lat = m.GeoDataExif.Latitude
273 | 	}
274 | 	if lon == 0 {
275 | 		lon = m.GeoDataExif.Longitude
276 | 	}
277 | 	return &timeliner.Location{
278 | 		Latitude:  &lat,
279 | 		Longitude: &lon,
280 | 	}, nil
281 | }
282 | 


--------------------------------------------------------------------------------
/datasources/instagram/instagram.go:
--------------------------------------------------------------------------------
  1 | // Package instagram implements a Timeliner data source for
  2 | // importing data from Instagram archive files.
  3 | package instagram
  4 | 
  5 | import (
  6 | 	"context"
  7 | 	"encoding/json"
  8 | 	"fmt"
  9 | 	"log"
 10 | 	"time"
 11 | 
 12 | 	"github.com/mholt/archiver/v3"
 13 | 	"github.com/mholt/timeliner"
 14 | )
 15 | 
 16 | // Data source name and ID
 17 | const (
 18 | 	DataSourceName = "Instagram"
 19 | 	DataSourceID   = "instagram"
 20 | )
 21 | 
 22 | var dataSource = timeliner.DataSource{
 23 | 	ID:   DataSourceID,
 24 | 	Name: DataSourceName,
 25 | 	NewClient: func(acc timeliner.Account) (timeliner.Client, error) {
 26 | 		return new(Client), nil
 27 | 	},
 28 | }
 29 | 
 30 | func init() {
 31 | 	err := timeliner.RegisterDataSource(dataSource)
 32 | 	if err != nil {
 33 | 		log.Fatal(err)
 34 | 	}
 35 | }
 36 | 
 37 | // Client implements the timeliner.Client interface.
 38 | type Client struct{}
 39 | 
 40 | // ListItems lists items from the data source. opt.Filename must be non-empty.
 41 | func (c *Client) ListItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 42 | 	defer close(itemChan)
 43 | 
 44 | 	if opt.Filename == "" {
 45 | 		return fmt.Errorf("filename is required")
 46 | 	}
 47 | 
 48 | 	// first, load the profile information
 49 | 	prof, err := c.getProfileInfo(opt.Filename)
 50 | 	if err != nil {
 51 | 		return fmt.Errorf("loading profile: %v", err)
 52 | 	}
 53 | 
 54 | 	// then, load the media index
 55 | 	idx, err := c.getMediaIndex(opt.Filename)
 56 | 	if err != nil {
 57 | 		return fmt.Errorf("loading index: %v", err)
 58 | 	}
 59 | 
 60 | 	// prepare each media item with the information they
 61 | 	// need to be processed into the timeline
 62 | 	for i, ph := range idx.Photos {
 63 | 		idx.Photos[i].profile = prof
 64 | 		idx.Photos[i].archiveFilename = opt.Filename
 65 | 		idx.Photos[i].takenAtParsed, err = time.Parse(takenAtFormat, ph.TakenAt)
 66 | 		if err != nil {
 67 | 			return fmt.Errorf("parsing photo time %s into format %s: %v", ph.TakenAt, takenAtFormat, err)
 68 | 		}
 69 | 	}
 70 | 	for i, p := range idx.Profile {
 71 | 		idx.Profile[i].profile = prof
 72 | 		idx.Profile[i].archiveFilename = opt.Filename
 73 | 		idx.Photos[i].takenAtParsed, err = time.Parse(takenAtFormat, p.TakenAt)
 74 | 		if err != nil {
 75 | 			return fmt.Errorf("parsing profile pic time %s into format %s: %v", p.TakenAt, takenAtFormat, err)
 76 | 		}
 77 | 	}
 78 | 	for i, vid := range idx.Videos {
 79 | 		idx.Videos[i].profile = prof
 80 | 		idx.Videos[i].archiveFilename = opt.Filename
 81 | 		idx.Videos[i].takenAtParsed, err = time.Parse(takenAtFormat, vid.TakenAt)
 82 | 		if err != nil {
 83 | 			return fmt.Errorf("parsing video time %s into format %s: %v", vid.TakenAt, takenAtFormat, err)
 84 | 		}
 85 | 	}
 86 | 
 87 | 	// add all of the media items to the timeline
 88 | 	for _, photo := range idx.Photos {
 89 | 		itemChan <- timeliner.NewItemGraph(photo)
 90 | 	}
 91 | 	for _, video := range idx.Videos {
 92 | 		itemChan <- timeliner.NewItemGraph(video)
 93 | 	}
 94 | 
 95 | 	return nil
 96 | }
 97 | 
 98 | func (c *Client) getProfileInfo(filename string) (instaAccountProfile, error) {
 99 | 	var prof instaAccountProfile
100 | 	err := archiver.Walk(filename, func(f archiver.File) error {
101 | 		defer f.Close()
102 | 		if f.Name() != "profile.json" {
103 | 			return nil
104 | 		}
105 | 
106 | 		err := json.NewDecoder(f).Decode(&prof)
107 | 		if err != nil {
108 | 			return fmt.Errorf("decoding account file: %v", err)
109 | 		}
110 | 
111 | 		return archiver.ErrStopWalk
112 | 	})
113 | 	return prof, err
114 | }
115 | 
116 | func (c *Client) getMediaIndex(filename string) (instaMediaIndex, error) {
117 | 	var idx instaMediaIndex
118 | 	err := archiver.Walk(filename, func(f archiver.File) error {
119 | 		defer f.Close()
120 | 		if f.Name() != "media.json" {
121 | 			return nil
122 | 		}
123 | 
124 | 		err := json.NewDecoder(f).Decode(&idx)
125 | 		if err != nil {
126 | 			return fmt.Errorf("decoding media index JSON: %v", err)
127 | 		}
128 | 
129 | 		return archiver.ErrStopWalk
130 | 	})
131 | 	if err != nil {
132 | 		return idx, fmt.Errorf("walking archive file %s: %v", filename, err)
133 | 	}
134 | 	return idx, nil
135 | }
136 | 
137 | const takenAtFormat = "2006-01-02T15:04:05+07:00"
138 | 


--------------------------------------------------------------------------------
/datasources/instagram/models.go:
--------------------------------------------------------------------------------
  1 | package instagram
  2 | 
  3 | import (
  4 | 	"archive/zip"
  5 | 	"bytes"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"mime"
  9 | 	"path"
 10 | 	"strings"
 11 | 	"time"
 12 | 
 13 | 	"github.com/mholt/archiver/v3"
 14 | 	"github.com/mholt/timeliner"
 15 | )
 16 | 
 17 | type instaMediaIndex struct {
 18 | 	Photos  []instaPhoto      `json:"photos"`
 19 | 	Profile []instaProfilePic `json:"profile"`
 20 | 	Videos  []instaVideo      `json:"videos"`
 21 | }
 22 | 
 23 | type instaPhoto struct {
 24 | 	Caption     string `json:"caption"`
 25 | 	TakenAt     string `json:"taken_at"`
 26 | 	Path        string `json:"path"`
 27 | 	LocationStr string `json:"location,omitempty"`
 28 | 
 29 | 	takenAtParsed   time.Time
 30 | 	archiveFilename string
 31 | 	profile         instaAccountProfile
 32 | }
 33 | 
 34 | func (ph instaPhoto) ID() string {
 35 | 	fname := path.Base(ph.Path)
 36 | 	ext := path.Ext(fname)
 37 | 	return strings.TrimSuffix(fname, ext)
 38 | }
 39 | 
 40 | func (ph instaPhoto) Timestamp() time.Time {
 41 | 	return ph.takenAtParsed
 42 | }
 43 | 
 44 | func (ph instaPhoto) Class() timeliner.ItemClass {
 45 | 	return timeliner.ClassImage
 46 | }
 47 | 
 48 | func (ph instaPhoto) Owner() (id *string, name *string) {
 49 | 	return &ph.profile.Username, &ph.profile.Name
 50 | }
 51 | 
 52 | func (ph instaPhoto) DataText() (*string, error) {
 53 | 	return &ph.Caption, nil
 54 | }
 55 | 
 56 | func (ph instaPhoto) DataFileName() *string {
 57 | 	fname := path.Base(ph.Path)
 58 | 	return &fname
 59 | }
 60 | 
 61 | func (ph instaPhoto) DataFileReader() (io.ReadCloser, error) {
 62 | 	var rc io.ReadCloser
 63 | 	err := archiver.Walk(ph.archiveFilename, func(f archiver.File) error {
 64 | 		if f.Header.(zip.FileHeader).Name != ph.Path {
 65 | 			return nil
 66 | 		}
 67 | 
 68 | 		buf := new(bytes.Buffer)
 69 | 		_, err := io.Copy(buf, f)
 70 | 		if err != nil {
 71 | 			return fmt.Errorf("copying item into memory: %v", err)
 72 | 		}
 73 | 		rc = timeliner.FakeCloser(buf)
 74 | 
 75 | 		return archiver.ErrStopWalk
 76 | 	})
 77 | 	if err != nil {
 78 | 		return nil, fmt.Errorf("walking archive file %s in search of media: %v",
 79 | 			ph.archiveFilename, err)
 80 | 	}
 81 | 	return rc, nil
 82 | }
 83 | 
 84 | func (ph instaPhoto) DataFileHash() []byte {
 85 | 	return nil
 86 | }
 87 | 
 88 | func (ph instaPhoto) DataFileMIMEType() *string {
 89 | 	mt := mime.TypeByExtension(path.Ext(ph.Path))
 90 | 	return &mt
 91 | }
 92 | 
 93 | func (ph instaPhoto) Metadata() (*timeliner.Metadata, error) {
 94 | 	if ph.LocationStr != "" {
 95 | 		return &timeliner.Metadata{GeneralArea: ph.LocationStr}, nil
 96 | 	}
 97 | 	return nil, nil
 98 | }
 99 | 
100 | func (ph instaPhoto) Location() (*timeliner.Location, error) {
101 | 	return nil, nil
102 | }
103 | 
104 | type instaProfilePic struct {
105 | 	Caption         string `json:"caption"`
106 | 	TakenAt         string `json:"taken_at"`
107 | 	IsActiveProfile bool   `json:"is_active_profile"`
108 | 	Path            string `json:"path"`
109 | 
110 | 	takenAtParsed   time.Time
111 | 	archiveFilename string
112 | 	profile         instaAccountProfile
113 | }
114 | 
115 | type instaVideo struct {
116 | 	Caption     string `json:"caption"`
117 | 	TakenAt     string `json:"taken_at"`
118 | 	Path        string `json:"path"`
119 | 	LocationStr string `json:"location,omitempty"`
120 | 
121 | 	takenAtParsed   time.Time
122 | 	archiveFilename string
123 | 	profile         instaAccountProfile
124 | }
125 | 
126 | func (vid instaVideo) ID() string {
127 | 	fname := path.Base(vid.Path)
128 | 	ext := path.Ext(fname)
129 | 	return strings.TrimSuffix(fname, ext)
130 | }
131 | 
132 | func (vid instaVideo) Timestamp() time.Time {
133 | 	return vid.takenAtParsed
134 | }
135 | 
136 | func (vid instaVideo) Class() timeliner.ItemClass {
137 | 	return timeliner.ClassVideo
138 | }
139 | 
140 | func (vid instaVideo) Owner() (id *string, name *string) {
141 | 	return &vid.profile.Username, &vid.profile.Name
142 | }
143 | 
144 | func (vid instaVideo) DataText() (*string, error) {
145 | 	return &vid.Caption, nil
146 | }
147 | 
148 | func (vid instaVideo) DataFileName() *string {
149 | 	fname := path.Base(vid.Path)
150 | 	return &fname
151 | }
152 | 
153 | func (vid instaVideo) DataFileReader() (io.ReadCloser, error) {
154 | 	var rc io.ReadCloser
155 | 	err := archiver.Walk(vid.archiveFilename, func(f archiver.File) error {
156 | 		if f.Header.(zip.FileHeader).Name != vid.Path {
157 | 			return nil
158 | 		}
159 | 
160 | 		buf := new(bytes.Buffer)
161 | 		_, err := io.Copy(buf, f)
162 | 		if err != nil {
163 | 			return fmt.Errorf("copying item into memory: %v", err)
164 | 		}
165 | 		rc = timeliner.FakeCloser(buf)
166 | 
167 | 		return archiver.ErrStopWalk
168 | 	})
169 | 	if err != nil {
170 | 		return nil, fmt.Errorf("walking archive file %s in search of media: %v",
171 | 			vid.archiveFilename, err)
172 | 	}
173 | 	return rc, nil
174 | }
175 | 
176 | func (vid instaVideo) DataFileHash() []byte {
177 | 	return nil
178 | }
179 | 
180 | func (vid instaVideo) DataFileMIMEType() *string {
181 | 	mt := mime.TypeByExtension(path.Ext(vid.Path))
182 | 	return &mt
183 | }
184 | 
185 | func (vid instaVideo) Metadata() (*timeliner.Metadata, error) {
186 | 	if vid.LocationStr != "" {
187 | 		return &timeliner.Metadata{GeneralArea: vid.LocationStr}, nil
188 | 	}
189 | 	return nil, nil
190 | }
191 | 
192 | func (vid instaVideo) Location() (*timeliner.Location, error) {
193 | 	return nil, nil
194 | }
195 | 
196 | type instaAccountProfile struct {
197 | 	Biography      string `json:"biography"`
198 | 	DateJoined     string `json:"date_joined"`
199 | 	Email          string `json:"email"`
200 | 	Website        string `json:"website"`
201 | 	Gender         string `json:"gender"`
202 | 	PrivateAccount bool   `json:"private_account"`
203 | 	Name           string `json:"name"`
204 | 	PhoneNumber    string `json:"phone_number"`
205 | 	ProfilePicURL  string `json:"profile_pic_url"`
206 | 	Username       string `json:"username"`
207 | }
208 | 


--------------------------------------------------------------------------------
/datasources/smsbackuprestore/mms.go:
--------------------------------------------------------------------------------
  1 | package smsbackuprestore
  2 | 
  3 | import (
  4 | 	"encoding/base64"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/mholt/timeliner"
 11 | )
 12 | 
 13 | // MMS represents a multimedia message.
 14 | type MMS struct {
 15 | 	CommonSMSandMMSFields
 16 | 	Rr         string    `xml:"rr,attr"`
 17 | 	Sub        string    `xml:"sub,attr"`
 18 | 	CtT        string    `xml:"ct_t,attr"`
 19 | 	ReadStatus string    `xml:"read_status,attr"`
 20 | 	Seen       string    `xml:"seen,attr"`
 21 | 	MsgBox     string    `xml:"msg_box,attr"`
 22 | 	SubCs      string    `xml:"sub_cs,attr"`
 23 | 	RespSt     string    `xml:"resp_st,attr"`
 24 | 	RetrSt     string    `xml:"retr_st,attr"`
 25 | 	DTm        string    `xml:"d_tm,attr"`
 26 | 	TextOnly   string    `xml:"text_only,attr"`
 27 | 	Exp        string    `xml:"exp,attr"`
 28 | 	MID        string    `xml:"m_id,attr"`
 29 | 	St         string    `xml:"st,attr"`
 30 | 	RetrTxtCs  string    `xml:"retr_txt_cs,attr"`
 31 | 	RetrTxt    string    `xml:"retr_txt,attr"`
 32 | 	Creator    string    `xml:"creator,attr"`
 33 | 	MSize      string    `xml:"m_size,attr"`
 34 | 	RptA       string    `xml:"rpt_a,attr"`
 35 | 	CtCls      string    `xml:"ct_cls,attr"`
 36 | 	Pri        string    `xml:"pri,attr"`
 37 | 	TrID       string    `xml:"tr_id,attr"`
 38 | 	RespTxt    string    `xml:"resp_txt,attr"`
 39 | 	CtL        string    `xml:"ct_l,attr"`
 40 | 	MCls       string    `xml:"m_cls,attr"`
 41 | 	DRpt       string    `xml:"d_rpt,attr"`
 42 | 	V          string    `xml:"v,attr"`
 43 | 	MType      string    `xml:"m_type,attr"`
 44 | 	Parts      Parts     `xml:"parts"`
 45 | 	Addrs      Addresses `xml:"addrs"`
 46 | 
 47 | 	client *Client
 48 | }
 49 | 
 50 | // ID returns a unique ID by concatenating the
 51 | // date of the message with its TRID.
 52 | func (m MMS) ID() string {
 53 | 	return fmt.Sprintf("%d_%s", m.Date, m.TrID)
 54 | }
 55 | 
 56 | // Timestamp returns the message's date.
 57 | func (m MMS) Timestamp() time.Time {
 58 | 	return time.Unix(0, m.Date*int64(time.Millisecond))
 59 | }
 60 | 
 61 | // Class returns the class Message.
 62 | func (m MMS) Class() timeliner.ItemClass {
 63 | 	return timeliner.ClassMessage
 64 | }
 65 | 
 66 | // Owner returns the name and number of the sender,
 67 | // if available. The export format does not give us
 68 | // the contacts' names, however.
 69 | func (m MMS) Owner() (number *string, name *string) {
 70 | 	for _, addr := range m.Addrs.Addr {
 71 | 		if addr.Type == mmsAddrTypeSender {
 72 | 			// TODO: Get sender name... for group texts this is tricky/impossible, since order varies
 73 | 			// TODO: If there is only one other contact on the message (other than the account owner's number), we can probably assume the contact name is theirs.
 74 | 			standardized, err := m.client.standardizePhoneNumber(addr.Address)
 75 | 			if err != nil {
 76 | 				// oh well; just go with what we have, I guess
 77 | 				return &addr.Address, nil
 78 | 			}
 79 | 			return &standardized, nil
 80 | 		}
 81 | 	}
 82 | 	return nil, nil
 83 | }
 84 | 
 85 | // DataText returns the text of the multimedia message, if any.
 86 | func (m MMS) DataText() (*string, error) {
 87 | 	var text string
 88 | 	for _, part := range m.Parts.Part {
 89 | 		if part.Seq < 0 {
 90 | 			continue
 91 | 		}
 92 | 		if part.ContentType == "text/plain" &&
 93 | 			part.AttrText != "" &&
 94 | 			part.AttrText != "null" {
 95 | 			text += part.AttrText
 96 | 		}
 97 | 	}
 98 | 	if text != "" {
 99 | 		return &text, nil
100 | 	}
101 | 	return nil, nil
102 | }
103 | 
104 | // DataFileName returns the name of the file, if any.
105 | func (m MMS) DataFileName() *string {
106 | 	for _, part := range m.Parts.Part {
107 | 		if part.Seq < 0 {
108 | 			continue
109 | 		}
110 | 		if isMediaContentType(part.ContentType) {
111 | 			return &part.Filename
112 | 		}
113 | 	}
114 | 	return nil
115 | }
116 | 
117 | // DataFileReader returns the data file reader, if any.
118 | func (m MMS) DataFileReader() (io.ReadCloser, error) {
119 | 	for _, part := range m.Parts.Part {
120 | 		if part.Seq < 0 {
121 | 			continue
122 | 		}
123 | 		if isMediaContentType(part.ContentType) {
124 | 			sr := strings.NewReader(part.Data)
125 | 			bd := base64.NewDecoder(base64.StdEncoding, sr)
126 | 			return timeliner.FakeCloser(bd), nil
127 | 		}
128 | 	}
129 | 	return nil, nil
130 | }
131 | 
132 | // DataFileHash returns nil.
133 | func (m MMS) DataFileHash() []byte {
134 | 	return nil
135 | }
136 | 
137 | // DataFileMIMEType returns the MIME type, if any.
138 | func (m MMS) DataFileMIMEType() *string {
139 | 	for _, part := range m.Parts.Part {
140 | 		if isMediaContentType(part.ContentType) {
141 | 			return &part.ContentType
142 | 		}
143 | 	}
144 | 	return nil
145 | }
146 | 
147 | // Metadata returns nil.
148 | func (m MMS) Metadata() (*timeliner.Metadata, error) {
149 | 	return nil, nil
150 | }
151 | 
152 | // Location returns nil.
153 | func (m MMS) Location() (*timeliner.Location, error) {
154 | 	return nil, nil
155 | }
156 | 
157 | // Parts is the parts of an MMS.
158 | type Parts struct {
159 | 	Text string `xml:",chardata"`
160 | 	Part []Part `xml:"part"`
161 | }
162 | 
163 | // Part is a part of an MMS.
164 | type Part struct {
165 | 	Text        string `xml:",chardata"`
166 | 	Seq         int    `xml:"seq,attr"`
167 | 	ContentType string `xml:"ct,attr"`
168 | 	Name        string `xml:"name,attr"`
169 | 	Charset     string `xml:"chset,attr"`
170 | 	Cd          string `xml:"cd,attr"`
171 | 	Fn          string `xml:"fn,attr"`
172 | 	Cid         string `xml:"cid,attr"`
173 | 	Filename    string `xml:"cl,attr"`
174 | 	CttS        string `xml:"ctt_s,attr"`
175 | 	CttT        string `xml:"ctt_t,attr"`
176 | 	AttrText    string `xml:"text,attr"`
177 | 	Data        string `xml:"data,attr"`
178 | }
179 | 
180 | // Addresses is the addresses the MMS was sent to.
181 | type Addresses struct {
182 | 	Text string    `xml:",chardata"`
183 | 	Addr []Address `xml:"addr"`
184 | }
185 | 
186 | // Address is a sender or recipient of the MMS.
187 | type Address struct {
188 | 	Text    string `xml:",chardata"`
189 | 	Address string `xml:"address,attr"`
190 | 	Type    int    `xml:"type,attr"` // 151 = recipient, 137 = sender
191 | 	Charset string `xml:"charset,attr"`
192 | }
193 | 
194 | func isMediaContentType(ct string) bool {
195 | 	return strings.HasPrefix(ct, "image/") ||
196 | 		strings.HasPrefix(ct, "video/")
197 | }
198 | 


--------------------------------------------------------------------------------
/datasources/smsbackuprestore/sms.go:
--------------------------------------------------------------------------------
  1 | package smsbackuprestore
  2 | 
  3 | import (
  4 | 	"encoding/xml"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/mholt/timeliner"
 11 | )
 12 | 
 13 | // Smses was generated 2019-07-10 using an export from
 14 | // SMS Backup & Restore v10.05.602 (previous versions
 15 | // have a bug with emoji encodings).
 16 | type Smses struct {
 17 | 	XMLName    xml.Name `xml:"smses"`
 18 | 	Text       string   `xml:",chardata"`
 19 | 	Count      int      `xml:"count,attr"`
 20 | 	BackupSet  string   `xml:"backup_set,attr"`  // UUID
 21 | 	BackupDate int64    `xml:"backup_date,attr"` // unix timestamp in milliseconds
 22 | 	SMS        []SMS    `xml:"sms"`
 23 | 	MMS        []MMS    `xml:"mms"`
 24 | }
 25 | 
 26 | // CommonSMSandMMSFields are the fields that both
 27 | // SMS and MMS share in common.
 28 | type CommonSMSandMMSFields struct {
 29 | 	Text         string `xml:",chardata"`
 30 | 	Address      string `xml:"address,attr"`
 31 | 	Date         int64  `xml:"date,attr"` // unix timestamp in milliseconds
 32 | 	Read         int    `xml:"read,attr"`
 33 | 	Locked       int    `xml:"locked,attr"`
 34 | 	DateSent     int64  `xml:"date_sent,attr"` // unix timestamp in (SMS: milliseconds, MMS: seconds)
 35 | 	SubID        int    `xml:"sub_id,attr"`
 36 | 	ReadableDate string `xml:"readable_date,attr"` // format: "Oct 20, 2017 12:35:30 PM"
 37 | 	ContactName  string `xml:"contact_name,attr"`  // might be "(Unknown)"
 38 | }
 39 | 
 40 | // SMS represents a simple text message.
 41 | type SMS struct {
 42 | 	CommonSMSandMMSFields
 43 | 	Protocol      int    `xml:"protocol,attr"`
 44 | 	Type          int    `xml:"type,attr"` // 1 = received, 2 = sent
 45 | 	Subject       string `xml:"subject,attr"`
 46 | 	Body          string `xml:"body,attr"`
 47 | 	Toa           string `xml:"toa,attr"`
 48 | 	ScToa         string `xml:"sc_toa,attr"`
 49 | 	ServiceCenter string `xml:"service_center,attr"`
 50 | 	Status        int    `xml:"status,attr"`
 51 | 
 52 | 	client *Client
 53 | }
 54 | 
 55 | // ID returns a unique ID for this text message.
 56 | // Because text messages do not have IDs, an ID
 57 | // is constructed by concatenating the millisecond
 58 | // timestamp of the message with a fast hash of
 59 | // the message body.
 60 | func (s SMS) ID() string {
 61 | 	return fmt.Sprintf("%d_%s", s.Date, fastHash(s.Body))
 62 | }
 63 | 
 64 | // Timestamp returns the message's date.
 65 | func (s SMS) Timestamp() time.Time {
 66 | 	return time.Unix(0, s.Date*int64(time.Millisecond))
 67 | }
 68 | 
 69 | // Class returns class Message.
 70 | func (s SMS) Class() timeliner.ItemClass {
 71 | 	return timeliner.ClassMessage
 72 | }
 73 | 
 74 | // Owner returns the sender's phone number and name, if available.
 75 | func (s SMS) Owner() (number *string, name *string) {
 76 | 	switch s.Type {
 77 | 	case smsTypeSent:
 78 | 		return &s.client.account.UserID, nil
 79 | 	case smsTypeReceived:
 80 | 		if s.ContactName != "" && s.ContactName != "(Unknown)" {
 81 | 			name = &s.ContactName
 82 | 		}
 83 | 		standardized, err := s.client.standardizePhoneNumber(s.Address)
 84 | 		if err == nil {
 85 | 			number = &standardized
 86 | 		} else {
 87 | 			number = &s.Address // oh well
 88 | 		}
 89 | 	}
 90 | 	return
 91 | }
 92 | 
 93 | // DataText returns the text of the message.
 94 | func (s SMS) DataText() (*string, error) {
 95 | 	body := strings.TrimSpace(s.Body)
 96 | 	if body != "" {
 97 | 		return &body, nil
 98 | 	}
 99 | 	return nil, nil
100 | }
101 | 
102 | // DataFileName returns nil.
103 | func (s SMS) DataFileName() *string {
104 | 	return nil
105 | }
106 | 
107 | // DataFileReader returns nil.
108 | func (s SMS) DataFileReader() (io.ReadCloser, error) {
109 | 	return nil, nil
110 | }
111 | 
112 | // DataFileHash returns nil.
113 | func (s SMS) DataFileHash() []byte {
114 | 	return nil
115 | }
116 | 
117 | // DataFileMIMEType returns nil.
118 | func (s SMS) DataFileMIMEType() *string {
119 | 	return nil
120 | }
121 | 
122 | // Metadata returns nil.
123 | func (s SMS) Metadata() (*timeliner.Metadata, error) {
124 | 	return nil, nil
125 | }
126 | 
127 | // Location returns nil.
128 | func (s SMS) Location() (*timeliner.Location, error) {
129 | 	return nil, nil
130 | }
131 | 


--------------------------------------------------------------------------------
/datasources/smsbackuprestore/smsbackuprestore.go:
--------------------------------------------------------------------------------
  1 | // Package smsbackuprestore implements a Timeliner data source for
  2 | // the Android SMS Backup & Restore app by SyncTech:
  3 | // https://synctech.com.au/sms-backup-restore/
  4 | package smsbackuprestore
  5 | 
  6 | import (
  7 | 	"context"
  8 | 	"encoding/xml"
  9 | 	"fmt"
 10 | 	"hash/fnv"
 11 | 	"log"
 12 | 	"os"
 13 | 
 14 | 	"github.com/mholt/timeliner"
 15 | 	"github.com/ttacon/libphonenumber"
 16 | )
 17 | 
 18 | // Data source name and ID.
 19 | const (
 20 | 	DataSourceName = "SMS Backup & Restore"
 21 | 	DataSourceID   = "smsbackuprestore"
 22 | )
 23 | 
 24 | var dataSource = timeliner.DataSource{
 25 | 	ID:   DataSourceID,
 26 | 	Name: DataSourceName,
 27 | 	NewClient: func(acc timeliner.Account) (timeliner.Client, error) {
 28 | 		return &Client{account: acc}, nil
 29 | 	},
 30 | }
 31 | 
 32 | func init() {
 33 | 	err := timeliner.RegisterDataSource(dataSource)
 34 | 	if err != nil {
 35 | 		log.Fatal(err)
 36 | 	}
 37 | }
 38 | 
 39 | // Client implements the timeliner.Client interface.
 40 | type Client struct {
 41 | 	// DefaultRegion is the region to assume for phone
 42 | 	// numbers that do not have an explicit country
 43 | 	// calling code. This value should be the ISO
 44 | 	// 3166-1 alpha-2 standard region code.
 45 | 	DefaultRegion string
 46 | 
 47 | 	account timeliner.Account
 48 | }
 49 | 
 50 | // ListItems lists items from the data source.
 51 | func (c *Client) ListItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 52 | 	defer close(itemChan)
 53 | 
 54 | 	if opt.Filename == "" {
 55 | 		return fmt.Errorf("filename is required")
 56 | 	}
 57 | 
 58 | 	// ensure the client's phone number is standardized
 59 | 	// TODO: It would be better to have a hook in the account creation process to be able to do this
 60 | 	ownerPhoneNum, err := c.standardizePhoneNumber(c.account.UserID)
 61 | 	if err != nil {
 62 | 		return fmt.Errorf("standardizing client phone number '%s': %v", c.account.UserID, err)
 63 | 	}
 64 | 	c.account.UserID = ownerPhoneNum
 65 | 
 66 | 	xmlFile, err := os.Open(opt.Filename)
 67 | 	if err != nil {
 68 | 		return err
 69 | 	}
 70 | 	defer xmlFile.Close()
 71 | 
 72 | 	var data Smses
 73 | 	dec := xml.NewDecoder(xmlFile)
 74 | 	err = dec.Decode(&data)
 75 | 	if err != nil {
 76 | 		return fmt.Errorf("decoding XML file: %v", err)
 77 | 	}
 78 | 
 79 | 	for _, sms := range data.SMS {
 80 | 		sms.client = c
 81 | 		itemChan <- timeliner.NewItemGraph(sms)
 82 | 	}
 83 | 
 84 | 	for _, mms := range data.MMS {
 85 | 		mms.client = c
 86 | 
 87 | 		ig := timeliner.NewItemGraph(mms)
 88 | 
 89 | 		// add relations to make sure other participants in a group text
 90 | 		// are recorded; necessary if more than two participants
 91 | 		if len(mms.Addrs.Addr) > 2 {
 92 | 			ownerNum, _ := mms.Owner()
 93 | 			if ownerNum != nil {
 94 | 				for _, addr := range mms.Addrs.Addr {
 95 | 					participantNum, err := c.standardizePhoneNumber(addr.Address)
 96 | 					if err != nil {
 97 | 						participantNum = addr.Address // oh well
 98 | 					}
 99 | 					// if this participant is not the owner of the message or
100 | 					// the account owner, then it must be another group member
101 | 					if participantNum != *ownerNum && participantNum != c.account.UserID {
102 | 						ig.Relations = append(ig.Relations, timeliner.RawRelation{
103 | 							FromItemID:     mms.ID(),
104 | 							ToPersonUserID: participantNum,
105 | 							Relation:       timeliner.RelCCed,
106 | 						})
107 | 					}
108 | 				}
109 | 			}
110 | 		}
111 | 
112 | 		itemChan <- ig
113 | 	}
114 | 
115 | 	return nil
116 | }
117 | 
118 | // fastHash hashes input using a fast 32-bit hashing algorithm
119 | // and returns the hash as a hex-encoded string. Do not use this
120 | // for cryptographic purposes. If the hashing fails for some
121 | // reason, an empty string is returned.
122 | func fastHash(input string) string {
123 | 	h := fnv.New32a()
124 | 	h.Write([]byte(input))
125 | 	return fmt.Sprintf("%x", h.Sum32())
126 | }
127 | 
128 | // standardizePhoneNumber attempts to parse number and returns
129 | // a standardized version in E164 format. If the number does
130 | // not have an explicit region/country code, the country code
131 | // for c.DefaultRegion is used instead.
132 | //
133 | // We chose E164 because that's what Twilio uses.
134 | func (c *Client) standardizePhoneNumber(number string) (string, error) {
135 | 	ph, err := libphonenumber.Parse(number, c.DefaultRegion)
136 | 	if err != nil {
137 | 		return "", err
138 | 	}
139 | 	return libphonenumber.Format(ph, libphonenumber.E164), nil
140 | }
141 | 
142 | const (
143 | 	smsTypeReceived = 1
144 | 	smsTypeSent     = 2
145 | 
146 | 	mmsAddrTypeRecipient = 151
147 | 	mmsAddrTypeSender    = 137
148 | )
149 | 


--------------------------------------------------------------------------------
/datasources/twitter/api.go:
--------------------------------------------------------------------------------
  1 | package twitter
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"net/http"
  8 | 	"net/url"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/mholt/timeliner"
 13 | )
 14 | 
 15 | func (c *Client) getFromAPI(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 16 | 	// load any previous checkpoint
 17 | 	c.checkpoint.load(opt.Checkpoint)
 18 | 
 19 | 	// get account owner information
 20 | 	cleanedScreenName := strings.TrimPrefix(c.acc.UserID, "@")
 21 | 	ownerAccount, err := c.getAccountFromAPI(cleanedScreenName, "")
 22 | 	if err != nil {
 23 | 		return fmt.Errorf("getting user account information for @%s: %v", cleanedScreenName, err)
 24 | 	}
 25 | 	c.ownerAccount = ownerAccount
 26 | 
 27 | 	// get the starting bounds of this operation
 28 | 	var maxTweet, minTweet string
 29 | 	if opt.Timeframe.SinceItemID != nil {
 30 | 		minTweet = *opt.Timeframe.SinceItemID
 31 | 	}
 32 | 	if c.checkpoint.LastTweetID != "" {
 33 | 		// by default, start off at the last checkpoint
 34 | 		maxTweet = c.checkpoint.LastTweetID
 35 | 		if opt.Timeframe.UntilItemID != nil {
 36 | 			// if both a timeframe UntilItemID and a checkpoint are set,
 37 | 			// we will choose the one with a tweet ID that is higher,
 38 | 			// meaning more recent, to avoid potentially skipping
 39 | 			// a chunk of the timeline
 40 | 			maxTweet = maxTweetID(c.checkpoint.LastTweetID, *opt.Timeframe.UntilItemID)
 41 | 		}
 42 | 	}
 43 | 
 44 | 	for {
 45 | 		select {
 46 | 		case <-ctx.Done():
 47 | 			return nil
 48 | 		default:
 49 | 			tweets, err := c.nextPageOfTweetsFromAPI(maxTweet, minTweet)
 50 | 			if err != nil {
 51 | 				return fmt.Errorf("getting next page of tweets: %v", err)
 52 | 			}
 53 | 
 54 | 			// we are done when there are no more tweets
 55 | 			if len(tweets) == 0 {
 56 | 				return nil
 57 | 			}
 58 | 
 59 | 			for _, t := range tweets {
 60 | 				err = c.processTweetFromAPI(t, itemChan)
 61 | 				if err != nil {
 62 | 					return fmt.Errorf("processing tweet from API: %v", err)
 63 | 				}
 64 | 			}
 65 | 
 66 | 			// since max_id is inclusive, subtract 1 from the tweet ID
 67 | 			// https://developer.twitter.com/en/docs/tweets/timelines/guides/working-with-timelines
 68 | 			nextTweetID := tweets[len(tweets)-1].TweetID - 1
 69 | 			c.checkpoint.LastTweetID = strconv.FormatInt(int64(nextTweetID), 10)
 70 | 			c.checkpoint.save(ctx)
 71 | 
 72 | 			// decrease maxTweet to get the next page on next iteration
 73 | 			maxTweet = c.checkpoint.LastTweetID
 74 | 		}
 75 | 	}
 76 | }
 77 | 
 78 | func (c *Client) processTweetFromAPI(t tweet, itemChan chan<- *timeliner.ItemGraph) error {
 79 | 	skip, err := c.prepareTweet(&t, "api")
 80 | 	if err != nil {
 81 | 		return fmt.Errorf("preparing tweet: %v", err)
 82 | 	}
 83 | 	if skip {
 84 | 		return nil
 85 | 	}
 86 | 
 87 | 	ig, err := c.makeItemGraphFromTweet(t, "")
 88 | 	if err != nil {
 89 | 		return fmt.Errorf("processing tweet %s: %v", t.ID(), err)
 90 | 	}
 91 | 
 92 | 	// send the tweet for processing
 93 | 	if ig != nil {
 94 | 		itemChan <- ig
 95 | 	}
 96 | 
 97 | 	return nil
 98 | }
 99 | 
100 | // nextPageOfTweetsFromAPI returns the next page of tweets starting at maxTweet
101 | // and going for a full page or until minTweet, whichever comes first. Generally,
102 | // iterating over this function will involve decreasing maxTweet and leaving
103 | // minTweet the same, if set at all (maxTweet = "until", minTweet = "since").
104 | // Either or both can be empty strings, for no boundaries. This function returns
105 | // at least 0 tweets (signaling done, I think) or up to a full page of tweets.
106 | func (c *Client) nextPageOfTweetsFromAPI(maxTweet, minTweet string) ([]tweet, error) {
107 | 	q := url.Values{
108 | 		"user_id":         {c.ownerAccount.id()},
109 | 		"count":           {"200"},
110 | 		"tweet_mode":      {"extended"}, // https://developer.twitter.com/en/docs/tweets/tweet-updates
111 | 		"exclude_replies": {"false"},    // always include replies in case it's a self-reply; we can filter all others
112 | 		"include_rts":     {"false"},
113 | 	}
114 | 	if c.Retweets {
115 | 		q.Set("include_rts", "true")
116 | 	}
117 | 	if maxTweet != "" {
118 | 		q.Set("max_id", maxTweet)
119 | 	}
120 | 	if minTweet != "" {
121 | 		q.Set("since_id", minTweet)
122 | 	}
123 | 	u := "https://api.twitter.com/1.1/statuses/user_timeline.json?" + q.Encode()
124 | 
125 | 	resp, err := c.HTTPClient.Get(u)
126 | 	if err != nil {
127 | 		return nil, fmt.Errorf("performing API request: %v", err)
128 | 	}
129 | 	defer resp.Body.Close()
130 | 
131 | 	// TODO: handle HTTP errors, esp. rate limiting, a lot better
132 | 	if resp.StatusCode != http.StatusOK {
133 | 		return nil, fmt.Errorf("HTTP error: %s: %s", u, resp.Status)
134 | 	}
135 | 
136 | 	var tweets []tweet
137 | 	err = json.NewDecoder(resp.Body).Decode(&tweets)
138 | 	if err != nil {
139 | 		return nil, fmt.Errorf("reading response body: %v", err)
140 | 	}
141 | 
142 | 	return tweets, nil
143 | }
144 | 
145 | // getAccountFromAPI gets the account information for either
146 | // screenName, if set, or accountID, if set. Set only one;
147 | // leave the other argument empty string.
148 | func (c *Client) getAccountFromAPI(screenName, accountID string) (twitterAccount, error) {
149 | 	var ta twitterAccount
150 | 
151 | 	q := make(url.Values)
152 | 	if screenName != "" {
153 | 		q.Set("screen_name", screenName)
154 | 	} else if accountID != "" {
155 | 		q.Set("user_id", accountID)
156 | 	}
157 | 
158 | 	u := "https://api.twitter.com/1.1/users/show.json?" + q.Encode()
159 | 
160 | 	resp, err := c.HTTPClient.Get(u)
161 | 	if err != nil {
162 | 		return ta, fmt.Errorf("performing API request: %v", err)
163 | 	}
164 | 	defer resp.Body.Close()
165 | 
166 | 	// TODO: handle HTTP errors, esp. rate limiting, a lot better
167 | 	if resp.StatusCode != http.StatusOK {
168 | 		return ta, fmt.Errorf("HTTP error: %s: %s", u, resp.Status)
169 | 	}
170 | 
171 | 	err = json.NewDecoder(resp.Body).Decode(&ta)
172 | 	if err != nil {
173 | 		return ta, fmt.Errorf("reading response body: %v", err)
174 | 	}
175 | 
176 | 	return ta, nil
177 | }
178 | 
179 | func (c *Client) getTweetFromAPI(id string) (tweet, error) {
180 | 	var t tweet
181 | 
182 | 	q := url.Values{
183 | 		"id":         {id},
184 | 		"tweet_mode": {"extended"}, // https://developer.twitter.com/en/docs/tweets/tweet-updates
185 | 	}
186 | 	u := "https://api.twitter.com/1.1/statuses/show.json?" + q.Encode()
187 | 
188 | 	resp, err := c.HTTPClient.Get(u)
189 | 	if err != nil {
190 | 		return t, fmt.Errorf("performing API request: %v", err)
191 | 	}
192 | 	defer resp.Body.Close()
193 | 
194 | 	switch resp.StatusCode {
195 | 	case http.StatusNotFound:
196 | 		// this is okay, because the tweet may simply have been deleted,
197 | 		// and we skip empty tweets anyway
198 | 		fallthrough
199 | 	case http.StatusForbidden:
200 | 		// this happens when the author's account is suspended
201 | 		return t, nil
202 | 	case http.StatusOK:
203 | 		break
204 | 	default:
205 | 		// TODO: handle HTTP errors, esp. rate limiting, a lot better
206 | 		return t, fmt.Errorf("HTTP error: %s: %s", u, resp.Status)
207 | 	}
208 | 
209 | 	err = json.NewDecoder(resp.Body).Decode(&t)
210 | 	if err != nil {
211 | 		return t, fmt.Errorf("reading response body: %v", err)
212 | 	}
213 | 
214 | 	return t, nil
215 | }
216 | 


--------------------------------------------------------------------------------
/datasources/twitter/api_test.go:
--------------------------------------------------------------------------------
  1 | package twitter
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"strings"
  6 | 	"testing"
  7 | )
  8 | 
  9 | func TestDecodeTwitterAccount(t *testing.T) {
 10 | 	// try decode a "kitchen sink", so that we can test that most features get decoded correctly
 11 | 	twitterAccountAPIResponseJSON := strings.NewReader(`
 12 | {
 13 |   "id": 9876543,
 14 |   "id_str": "9876543",
 15 |   "name": "barry",
 16 |   "screen_name": "testingperson",
 17 |   "location": "In your hearts and minds",
 18 |   "profile_location": null,
 19 |   "description": "i am the what i was of what i will be.",
 20 |   "url": "https:\/\/t.co\/abcdefghij",
 21 |   "entities": {
 22 |     "url": {
 23 |       "urls": [
 24 |         {
 25 |           "url": "https:\/\/t.co\/abcdefghij",
 26 |           "expanded_url": "http:\/\/Instagram.com\/demotestingIGperson",
 27 |           "display_url": "Instagram.com\/demotestingIGperson",
 28 |           "indices": [
 29 |             0,
 30 |             23
 31 |           ]
 32 |         }
 33 |       ]
 34 |     },
 35 |     "description": {
 36 |       "urls": [
 37 |         
 38 |       ]
 39 |     }
 40 |   },
 41 |   "protected": false,
 42 |   "followers_count": 161,
 43 |   "friends_count": 280,
 44 |   "listed_count": 8,
 45 |   "created_at": "Wed Mar 21 18:13:14 +0000 2007",
 46 |   "favourites_count": 2279,
 47 |   "utc_offset": null,
 48 |   "time_zone": null,
 49 |   "geo_enabled": true,
 50 |   "verified": false,
 51 |   "statuses_count": 1729,
 52 |   "lang": null,
 53 |   "status": {
 54 |     "created_at": "Wed Nov 27 18:54:49 +0000 2019",
 55 |     "id": 1234567890123456789,
 56 |     "id_str": "1234567890123456789",
 57 |     "text": "Demo tweet #testing https:\/\/t.co\/abcdefgijk",
 58 |     "truncated": false,
 59 |     "entities": {
 60 |       "hashtags": [
 61 |         {
 62 |           "text": "testing",
 63 |           "indices": [
 64 |             0,
 65 |             8
 66 |           ]
 67 |         }
 68 |       ],
 69 |       "symbols": [
 70 |         
 71 |       ],
 72 |       "user_mentions": [
 73 |         
 74 |       ],
 75 |       "urls": [
 76 |         {
 77 |           "url": "https:\/\/t.co\/abcdefgijk",
 78 |           "expanded_url": "https:\/\/www.instagram.com\/p\/BAABAABAABA\/?igshid=xyxyxyxyxyxyx",
 79 |           "display_url": "instagram.com\/p\/BAABAABAABA\/\u2026",
 80 |           "indices": [
 81 |             52,
 82 |             75
 83 |           ]
 84 |         }
 85 |       ]
 86 |     },
 87 |     "source": "\u003ca href=\"http:\/\/instagram.com\" rel=\"nofollow\"\u003eInstagram\u003c\/a\u003e",
 88 |     "in_reply_to_status_id": null,
 89 |     "in_reply_to_status_id_str": null,
 90 |     "in_reply_to_user_id": null,
 91 |     "in_reply_to_user_id_str": null,
 92 |     "in_reply_to_screen_name": null,
 93 |     "geo": {
 94 |       "type": "Point",
 95 |       "coordinates": [
 96 |         34.0522,
 97 |         -118.243
 98 |       ]
 99 |     },
100 |     "coordinates": {
101 |       "type": "Point",
102 |       "coordinates": [
103 |         -118.243,
104 |         34.0522
105 |       ]
106 |     },
107 |     "place": {
108 |       "id": "3b77caf94bfc81fe",
109 |       "url": "https:\/\/api.twitter.com\/1.1\/geo\/id\/3b77caf94bfc81fe.json",
110 |       "place_type": "city",
111 |       "name": "Los Angeles",
112 |       "full_name": "Los Angeles, CA",
113 |       "country_code": "US",
114 |       "country": "USA",
115 |       "contained_within": [
116 |         
117 |       ],
118 |       "bounding_box": {
119 |         "type": "Polygon",
120 |         "coordinates": [
121 |           [
122 |             [
123 |               -118.668404,
124 |               33.704538
125 |             ],
126 |             [
127 |               -118.155409,
128 |               33.704538
129 |             ],
130 |             [
131 |               -118.155409,
132 |               34.337041
133 |             ],
134 |             [
135 |               -118.668404,
136 |               34.337041
137 |             ]
138 |           ]
139 |         ]
140 |       },
141 |       "attributes": {
142 |         
143 |       }
144 |     },
145 |     "contributors": null,
146 |     "is_quote_status": false,
147 |     "retweet_count": 0,
148 |     "favorite_count": 0,
149 |     "favorited": false,
150 |     "retweeted": false,
151 |     "possibly_sensitive": false,
152 |     "lang": "en"
153 |   },
154 |   "contributors_enabled": false,
155 |   "is_translator": false,
156 |   "is_translation_enabled": false,
157 |   "profile_background_color": "FFFFFF",
158 |   "profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
159 |   "profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
160 |   "profile_background_tile": true,
161 |   "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/923335960007340032\/pIbUjNkC_normal.jpg",
162 |   "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/923335960007340032\/pIbUjNkC_normal.jpg",
163 |   "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/9876543\/1508975481",
164 |   "profile_link_color": "0012BB",
165 |   "profile_sidebar_border_color": "AAAAAA",
166 |   "profile_sidebar_fill_color": "FFFFFF",
167 |   "profile_text_color": "000000",
168 |   "profile_use_background_image": false,
169 |   "has_extended_profile": false,
170 |   "default_profile": false,
171 |   "default_profile_image": false,
172 |   "can_media_tag": null,
173 |   "followed_by": null,
174 |   "following": null,
175 |   "follow_request_sent": null,
176 |   "notifications": null,
177 |   "translator_type": "none"
178 | }
179 | `)
180 | 
181 | 	var acc twitterAccount
182 | 	assertTrue(t, json.NewDecoder(twitterAccountAPIResponseJSON).Decode(&acc) == nil)
183 | 
184 | 	// NOTE: assertions skipped for fields typed interface{}
185 | 
186 | 	assertTrue(t, acc.ID == 9876543)
187 | 	assertEqualString(t, acc.IDStr, "9876543")
188 | 	assertEqualString(t, acc.ScreenName, "testingperson")
189 | 	assertEqualString(t, acc.Name, "barry")
190 | 	assertEqualString(t, acc.Location, "In your hearts and minds")
191 | 	assertEqualString(t, acc.Description, "i am the what i was of what i will be.")
192 | 	assertEqualString(t, acc.URL, "https://t.co/abcdefghij")
193 | 
194 | 	assertTrue(t, !acc.Protected)
195 | 	assertTrue(t, acc.GeoEnabled)
196 | 	assertTrue(t, !acc.Verified)
197 | 	assertTrue(t, !acc.ContributorsEnabled)
198 | 	assertTrue(t, !acc.HasExtendedProfile)
199 | 
200 | 	assertTrue(t, acc.FollowersCount == 161)
201 | 	assertTrue(t, acc.ListedCount == 8)
202 | 	assertTrue(t, acc.FavouritesCount == 2279)
203 | 	assertTrue(t, acc.StatusesCount == 1729)
204 | 
205 | 	assertEqualString(t, acc.Lang, "")
206 | 	assertTrue(t, !acc.IsTranslator)
207 | 	assertTrue(t, !acc.IsTranslationEnabled)
208 | 	assertEqualString(t, acc.TranslatorType, "none")
209 | 
210 | 	assertTrue(t, !acc.ProfileUseBackgroundImage)
211 | 	assertTrue(t, !acc.DefaultProfile)
212 | 	assertTrue(t, !acc.DefaultProfileImage)
213 | 	assertTrue(t, acc.ProfileBackgroundTile)
214 | 	assertEqualString(t, acc.ProfileBackgroundColor, "FFFFFF")
215 | 	assertEqualString(t, acc.ProfileBackgroundImageURL, "http://abs.twimg.com/images/themes/theme1/bg.png")
216 | 	assertEqualString(t, acc.ProfileBackgroundImageURLHTTPS, "https://abs.twimg.com/images/themes/theme1/bg.png")
217 | 	assertEqualString(t, acc.ProfileImageURL, "http://pbs.twimg.com/profile_images/923335960007340032/pIbUjNkC_normal.jpg")
218 | 	assertEqualString(t, acc.ProfileImageURLHTTPS, "https://pbs.twimg.com/profile_images/923335960007340032/pIbUjNkC_normal.jpg")
219 | 	assertEqualString(t, acc.ProfileBannerURL, "https://pbs.twimg.com/profile_banners/9876543/1508975481")
220 | 	assertEqualString(t, acc.ProfileLinkColor, "0012BB")
221 | 	assertEqualString(t, acc.ProfileSidebarBorderColor, "AAAAAA")
222 | 	assertEqualString(t, acc.ProfileSidebarFillColor, "FFFFFF")
223 | 	assertEqualString(t, acc.ProfileTextColor, "000000")
224 | 
225 | 	latestTweet := acc.Status // shorthand
226 | 
227 | 	assertEqualString(t, latestTweet.TweetIDStr, "1234567890123456789")
228 | 	assertTrue(t, latestTweet.TweetID == 1234567890123456789)
229 | 	assertTrue(t, latestTweet.User == nil)
230 | 	assertEqualString(t, latestTweet.CreatedAt, "Wed Nov 27 18:54:49 +0000 2019")
231 | 	assertEqualString(t, latestTweet.Text, "Demo tweet #testing https://t.co/abcdefgijk")
232 | 	assertEqualString(t, latestTweet.FullText, "")
233 | 	assertEqualString(t, latestTweet.Lang, "en")
234 | 	assertEqualString(t, latestTweet.Source, `<a href="http://instagram.com" rel="nofollow">Instagram</a>`)
235 | 	assertTrue(t, !latestTweet.Truncated)
236 | 	assertTrue(t, !latestTweet.PossiblySensitive)
237 | 	assertTrue(t, !latestTweet.IsQuoteStatus)
238 | 
239 | 	assertEqualString(t, latestTweet.InReplyToScreenName, "")
240 | 	assertTrue(t, latestTweet.InReplyToStatusID == 0)
241 | 	assertEqualString(t, latestTweet.InReplyToStatusIDStr, "")
242 | 	assertTrue(t, latestTweet.InReplyToUserID == 0)
243 | 	assertEqualString(t, latestTweet.InReplyToUserIDStr, "")
244 | 
245 | 	assertTrue(t, !latestTweet.WithheldCopyright)
246 | 	assertTrue(t, len(latestTweet.WithheldInCountries) == 0)
247 | 	assertEqualString(t, latestTweet.WithheldScope, "")
248 | 
249 | 	assertTrue(t, !latestTweet.Favorited)
250 | 	assertTrue(t, latestTweet.FavoriteCount == 0)
251 | 
252 | 	assertTrue(t, !latestTweet.Retweeted)
253 | 	assertTrue(t, latestTweet.RetweetedStatus == nil)
254 | 	assertTrue(t, latestTweet.RetweetCount == 0)
255 | 
256 | 	assertTrue(t, len(latestTweet.DisplayTextRange) == 0)
257 | 
258 | 	assertTrue(t, latestTweet.Coordinates.Latitude() == 34.0522)
259 | 	assertTrue(t, latestTweet.Coordinates.Longitude() == -118.243)
260 | 
261 | 	assertTrue(t, latestTweet.ExtendedEntities == nil)
262 | 	// I was too lazy to type assertions for the "entities" hierarchy, so we're just comparing
263 | 	// re-serialized versions. this would catch if we would have had typos in JSON field
264 | 	// names (they would not get decoded, and hence would not get re-serialized)
265 | 	entitiesJSON, err := json.MarshalIndent(latestTweet.Entities, "", "  ")
266 | 	assertTrue(t, err == nil)
267 | 	assertEqualString(t, string(entitiesJSON), `{
268 |   "hashtags": [
269 |     {
270 |       "indices": [
271 |         0,
272 |         8
273 |       ],
274 |       "text": "testing"
275 |     }
276 |   ],
277 |   "symbols": [],
278 |   "user_mentions": [],
279 |   "urls": [
280 |     {
281 |       "url": "https://t.co/abcdefgijk",
282 |       "expanded_url": "https://www.instagram.com/p/BAABAABAABA/?igshid=xyxyxyxyxyxyx",
283 |       "display_url": "instagram.com/p/BAABAABAABA/…",
284 |       "indices": [
285 |         52,
286 |         75
287 |       ]
288 |     }
289 |   ],
290 |   "polls": null
291 | }`)
292 | }
293 | 
294 | func assertEqualString(t *testing.T, actual string, expected string) {
295 | 	t.Helper()
296 | 
297 | 	if actual != expected {
298 | 		t.Fatalf("exp=%v; got=%v", expected, actual)
299 | 	}
300 | }
301 | 
302 | func assertTrue(t *testing.T, val bool) {
303 | 	t.Helper()
304 | 
305 | 	if !val {
306 | 		t.Fatal("expected true; got false")
307 | 	}
308 | }
309 | 


--------------------------------------------------------------------------------
/datasources/twitter/archives.go:
--------------------------------------------------------------------------------
  1 | package twitter
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io"
  7 | 
  8 | 	"github.com/mholt/archiver/v3"
  9 | 	"github.com/mholt/timeliner"
 10 | )
 11 | 
 12 | func (c *Client) getFromArchiveFile(itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 13 | 	// load the user's account ID
 14 | 	var err error
 15 | 	c.ownerAccount, err = c.getOwnerAccountFromArchive(opt.Filename)
 16 | 	if err != nil {
 17 | 		return fmt.Errorf("unable to get user account ID: %v", err)
 18 | 	}
 19 | 
 20 | 	// first pass - add tweets to timeline
 21 | 	err = c.processArchive(opt.Filename, itemChan, c.makeItemGraphFromTweet)
 22 | 	if err != nil {
 23 | 		return fmt.Errorf("processing tweets: %v", err)
 24 | 	}
 25 | 
 26 | 	// second pass - add tweet relationships to timeline
 27 | 	err = c.processArchive(opt.Filename, itemChan, c.processReplyRelationFromArchive)
 28 | 	if err != nil {
 29 | 		return fmt.Errorf("processing tweets: %v", err)
 30 | 	}
 31 | 
 32 | 	return nil
 33 | }
 34 | 
 35 | func (c *Client) processArchive(archiveFilename string, itemChan chan<- *timeliner.ItemGraph, processFunc archiveProcessFn) error {
 36 | 	err := archiver.Walk(archiveFilename, func(f archiver.File) error {
 37 | 		defer f.Close()
 38 | 		if f.Name() != "tweet.js" {
 39 | 			return nil
 40 | 		}
 41 | 
 42 | 		// consume non-JSON preface (JavaScript variable definition)
 43 | 		err := stripPreface(f, tweetFilePreface)
 44 | 		if err != nil {
 45 | 			return fmt.Errorf("reading tweet file preface: %v", err)
 46 | 		}
 47 | 
 48 | 		err = c.processTweetsFromArchive(itemChan, f, archiveFilename, processFunc)
 49 | 		if err != nil {
 50 | 			return fmt.Errorf("processing tweet file: %v", err)
 51 | 		}
 52 | 
 53 | 		return archiver.ErrStopWalk
 54 | 	})
 55 | 	if err != nil {
 56 | 		return fmt.Errorf("walking archive file %s: %v", archiveFilename, err)
 57 | 	}
 58 | 
 59 | 	return nil
 60 | }
 61 | 
 62 | func (c *Client) processTweetsFromArchive(itemChan chan<- *timeliner.ItemGraph, f io.Reader,
 63 | 	archiveFilename string, processFunc archiveProcessFn) error {
 64 | 
 65 | 	dec := json.NewDecoder(f)
 66 | 
 67 | 	// read array opening bracket '['
 68 | 	_, err := dec.Token()
 69 | 	if err != nil {
 70 | 		return fmt.Errorf("decoding opening token: %v", err)
 71 | 	}
 72 | 
 73 | 	for dec.More() {
 74 | 		var t tweet
 75 | 		err := dec.Decode(&t)
 76 | 		if err != nil {
 77 | 			return fmt.Errorf("decoding tweet element: %v", err)
 78 | 		}
 79 | 
 80 | 		skip, err := c.prepareTweet(&t, "archive")
 81 | 		if err != nil {
 82 | 			return fmt.Errorf("preparing tweet: %v", err)
 83 | 		}
 84 | 		if skip {
 85 | 			continue
 86 | 		}
 87 | 
 88 | 		ig, err := processFunc(t, archiveFilename)
 89 | 		if err != nil {
 90 | 			return fmt.Errorf("processing tweet: %v", err)
 91 | 		}
 92 | 
 93 | 		// send the tweet(s) for processing
 94 | 		if ig != nil {
 95 | 			itemChan <- ig
 96 | 		}
 97 | 	}
 98 | 
 99 | 	return nil
100 | }
101 | 
102 | func (c *Client) processReplyRelationFromArchive(t tweet, archiveFilename string) (*timeliner.ItemGraph, error) {
103 | 	if t.InReplyToStatusIDStr == "" {
104 | 		// current tweet is not a reply, so no relationship to add
105 | 		return nil, nil
106 | 	}
107 | 	if t.InReplyToUserIDStr != "" && t.InReplyToUserIDStr != c.ownerAccount.id() {
108 | 		// from archives, we only support storing replies to self... (TODO)
109 | 		return nil, nil
110 | 	}
111 | 
112 | 	ig := &timeliner.ItemGraph{
113 | 		Relations: []timeliner.RawRelation{
114 | 			{
115 | 				FromItemID: t.TweetIDStr,
116 | 				ToItemID:   t.InReplyToStatusIDStr,
117 | 				Relation:   timeliner.RelReplyTo,
118 | 			},
119 | 		},
120 | 	}
121 | 
122 | 	return ig, nil
123 | }
124 | 
125 | func (c *Client) getOwnerAccountFromArchive(filename string) (twitterAccount, error) {
126 | 	var ta twitterAccount
127 | 	err := archiver.Walk(filename, func(f archiver.File) error {
128 | 		defer f.Close()
129 | 		if f.Name() != "account.js" {
130 | 			return nil
131 | 		}
132 | 
133 | 		// consume non-JSON preface (JavaScript variable definition)
134 | 		err := stripPreface(f, accountFilePreface)
135 | 		if err != nil {
136 | 			return fmt.Errorf("reading account file preface: %v", err)
137 | 		}
138 | 
139 | 		var accFile twitterAccountFile
140 | 		err = json.NewDecoder(f).Decode(&accFile)
141 | 		if err != nil {
142 | 			return fmt.Errorf("decoding account file: %v", err)
143 | 		}
144 | 		if len(accFile) == 0 {
145 | 			return fmt.Errorf("account file was empty")
146 | 		}
147 | 
148 | 		ta = accFile[0].Account
149 | 
150 | 		return archiver.ErrStopWalk
151 | 	})
152 | 	return ta, err
153 | }
154 | 
155 | func stripPreface(f io.Reader, preface string) error {
156 | 	buf := make([]byte, len(preface))
157 | 	_, err := io.ReadFull(f, buf)
158 | 	return err
159 | }
160 | 
161 | // archiveProcessFn is a function that processes a
162 | // tweet from a Twitter export archive and returns
163 | // an ItemGraph created from t.
164 | type archiveProcessFn func(t tweet, archiveFilename string) (*timeliner.ItemGraph, error)
165 | 
166 | // Variable definitions that are intended for
167 | // use with JavaScript but which are of no use
168 | // to us and would break the JSON parser.
169 | const (
170 | 	tweetFilePreface   = "window.YTD.tweet.part0 ="
171 | 	accountFilePreface = "window.YTD.account.part0 ="
172 | )
173 | 


--------------------------------------------------------------------------------
/datasources/twitter/twitter.go:
--------------------------------------------------------------------------------
  1 | // Package twitter implements a Timeliner service for importing
  2 | // and downloading data from Twitter.
  3 | package twitter
  4 | 
  5 | import (
  6 | 	"archive/zip"
  7 | 	"bytes"
  8 | 	"context"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"log"
 12 | 	"net/http"
 13 | 	"net/url"
 14 | 	"path"
 15 | 	"regexp"
 16 | 	"strconv"
 17 | 	"time"
 18 | 
 19 | 	"github.com/mholt/archiver/v3"
 20 | 	"github.com/mholt/timeliner"
 21 | )
 22 | 
 23 | // Service name and ID.
 24 | const (
 25 | 	DataSourceName = "Twitter"
 26 | 	DataSourceID   = "twitter"
 27 | )
 28 | 
 29 | var dataSource = timeliner.DataSource{
 30 | 	ID:   DataSourceID,
 31 | 	Name: DataSourceName,
 32 | 	OAuth2: timeliner.OAuth2{
 33 | 		ProviderID: "twitter",
 34 | 	},
 35 | 	RateLimit: timeliner.RateLimit{
 36 | 		// from https://developer.twitter.com/en/docs/basics/rate-limits
 37 | 		// with some leeway since it's actually a pretty generous limit
 38 | 		RequestsPerHour: 5900,
 39 | 	},
 40 | 	NewClient: func(acc timeliner.Account) (timeliner.Client, error) {
 41 | 		httpClient, err := acc.NewHTTPClient()
 42 | 		if err != nil {
 43 | 			return nil, err
 44 | 		}
 45 | 		return &Client{
 46 | 			HTTPClient:    httpClient,
 47 | 			acc:           acc,
 48 | 			otherAccounts: make(map[string]twitterAccount),
 49 | 		}, nil
 50 | 	},
 51 | }
 52 | 
 53 | func init() {
 54 | 	err := timeliner.RegisterDataSource(dataSource)
 55 | 	if err != nil {
 56 | 		log.Fatal(err)
 57 | 	}
 58 | }
 59 | 
 60 | // Client implements the timeliner.Client interface.
 61 | type Client struct {
 62 | 	Retweets bool // whether to include retweets
 63 | 	Replies  bool // whether to include replies to tweets that are not our own; i.e. are not a continuation of thought
 64 | 
 65 | 	HTTPClient *http.Client
 66 | 
 67 | 	checkpoint checkpointInfo
 68 | 
 69 | 	acc           timeliner.Account
 70 | 	ownerAccount  twitterAccount
 71 | 	otherAccounts map[string]twitterAccount // keyed by user/account ID
 72 | }
 73 | 
 74 | // ListItems lists items from opt.Filename if specified, or from the API otherwise.
 75 | func (c *Client) ListItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.ListingOptions) error {
 76 | 	defer close(itemChan)
 77 | 
 78 | 	if opt.Filename != "" {
 79 | 		return c.getFromArchiveFile(itemChan, opt)
 80 | 	}
 81 | 
 82 | 	return c.getFromAPI(ctx, itemChan, opt)
 83 | }
 84 | 
 85 | func (c *Client) prepareTweet(t *tweet, source string) (skip bool, err error) {
 86 | 	// mark whether this tweet came from the API or an export file
 87 | 	t.source = source
 88 | 
 89 | 	// set the owner account information; this has to be done differently
 90 | 	// depending on the source (it's not embedded in the archive's tweets...)
 91 | 	switch t.source {
 92 | 	case "archive":
 93 | 		t.ownerAccount = c.ownerAccount
 94 | 	case "api":
 95 | 		if t.User != nil {
 96 | 			if t.User.UserIDStr == c.ownerAccount.id() {
 97 | 				// tweet author is the owner of the account - awesome
 98 | 				t.ownerAccount = c.ownerAccount
 99 | 			} else {
100 | 				// look up author's account info
101 | 				acc, ok := c.otherAccounts[t.User.UserIDStr]
102 | 				if !ok {
103 | 					acc, err = c.getAccountFromAPI("", t.User.UserIDStr)
104 | 					if err != nil {
105 | 						return false, fmt.Errorf("looking up tweet author's account information: %v", err)
106 | 					}
107 | 					// cache this for later
108 | 					if len(c.otherAccounts) > 2000 {
109 | 						for id := range c.otherAccounts {
110 | 							delete(c.otherAccounts, id)
111 | 							break
112 | 						}
113 | 					}
114 | 					c.otherAccounts[acc.IDStr] = acc
115 | 				}
116 | 				t.ownerAccount = acc
117 | 			}
118 | 		}
119 | 	default:
120 | 		return false, fmt.Errorf("unrecognized source: %s", t.source)
121 | 	}
122 | 
123 | 	// skip empty tweets
124 | 	if t.isEmpty() {
125 | 		return true, nil
126 | 	}
127 | 
128 | 	// skip tweets we aren't interested in
129 | 	if !c.Retweets && t.isRetweet() {
130 | 		return true, nil
131 | 	}
132 | 	if !c.Replies && t.InReplyToUserIDStr != "" && t.InReplyToUserIDStr != t.ownerAccount.id() {
133 | 		// TODO: Replies should have more context, like what are we replying to, etc... the whole thread, even?
134 | 		// this option is about replies to tweets other than our own (which are like a continuation of one thought)
135 | 		return true, nil
136 | 	}
137 | 
138 | 	// parse Twitter's time string into an actual time value
139 | 	t.createdAtParsed, err = time.Parse("Mon Jan 2 15:04:05 -0700 2006", t.CreatedAt)
140 | 	if err != nil {
141 | 		return false, fmt.Errorf("parsing created_at time: %v", err)
142 | 	}
143 | 
144 | 	return false, nil
145 | }
146 | 
147 | func (c *Client) makeItemGraphFromTweet(t tweet, archiveFilename string) (*timeliner.ItemGraph, error) {
148 | 	oneMediaItem := t.hasExactlyOneMediaItem()
149 | 
150 | 	// only create a tweet item if it has text OR exactly one media item
151 | 	// (because we don't want an empty item; we process each media item
152 | 	// as a separate item, unless there's exactly 1, in which case we
153 | 	// in-line it into the tweet itself)
154 | 	var ig *timeliner.ItemGraph
155 | 	if t.text() != "" || !oneMediaItem {
156 | 		ig = timeliner.NewItemGraph(&t)
157 | 	}
158 | 
159 | 	// process the media items attached to the tweet
160 | 	if t.ExtendedEntities != nil {
161 | 		var collItems []timeliner.CollectionItem
162 | 
163 | 		for i, m := range t.ExtendedEntities.Media {
164 | 			m.parent = &t
165 | 
166 | 			var dataFileName string
167 | 			if dfn := m.DataFileName(); dfn == nil || *dfn == "" {
168 | 				log.Printf("[ERROR][%s/%s] Tweet media has no data file name: %+v",
169 | 					DataSourceID, c.acc.UserID, m)
170 | 				continue
171 | 			} else {
172 | 				dataFileName = *dfn
173 | 			}
174 | 
175 | 			switch t.source {
176 | 			case "archive":
177 | 				targetFileInArchive := path.Join("tweet_media", dataFileName)
178 | 
179 | 				err := archiver.Walk(archiveFilename, func(f archiver.File) error {
180 | 					if f.Header.(zip.FileHeader).Name != targetFileInArchive {
181 | 						return nil
182 | 					}
183 | 
184 | 					buf := new(bytes.Buffer)
185 | 					_, err := io.Copy(buf, f)
186 | 					if err != nil {
187 | 						return fmt.Errorf("copying item into memory: %v", err)
188 | 					}
189 | 					m.readCloser = timeliner.FakeCloser(buf)
190 | 
191 | 					return archiver.ErrStopWalk
192 | 				})
193 | 				if err != nil {
194 | 					return nil, fmt.Errorf("walking archive file %s in search of tweet media: %v",
195 | 						archiveFilename, err)
196 | 				}
197 | 
198 | 			case "api":
199 | 				mediaURL := m.getURL()
200 | 				if m.Type == "photo" {
201 | 					mediaURL += ":orig" // get original file, with metadata
202 | 				}
203 | 				resp, err := http.Get(mediaURL)
204 | 				if err != nil {
205 | 					return nil, fmt.Errorf("getting media resource %s: %v", m.MediaURLHTTPS, err)
206 | 				}
207 | 				if resp.StatusCode != http.StatusOK {
208 | 					return nil, fmt.Errorf("media resource returned HTTP status %s: %s", resp.Status, m.MediaURLHTTPS)
209 | 				}
210 | 				m.readCloser = resp.Body
211 | 
212 | 			default:
213 | 				return nil, fmt.Errorf("unrecognized source value: must be api or archive: %s", t.source)
214 | 			}
215 | 
216 | 			if !oneMediaItem {
217 | 				if ig != nil {
218 | 					ig.Add(m, timeliner.RelAttached)
219 | 				}
220 | 				collItems = append(collItems, timeliner.CollectionItem{
221 | 					Item:     m,
222 | 					Position: i,
223 | 				})
224 | 			}
225 | 		}
226 | 
227 | 		if len(collItems) > 0 {
228 | 			ig.Collections = append(ig.Collections, timeliner.Collection{
229 | 				OriginalID: "tweet_" + t.ID(),
230 | 				Items:      collItems,
231 | 			})
232 | 		}
233 | 	}
234 | 
235 | 	// if we're using the API, go ahead and get the
236 | 	// 'parent' tweet to which this tweet is a reply
237 | 	if t.source == "api" && t.InReplyToStatusIDStr != "" {
238 | 		inReplyToTweet, err := c.getTweetFromAPI(t.InReplyToStatusIDStr)
239 | 		if err != nil {
240 | 			return nil, fmt.Errorf("getting tweet that this tweet (%s) is in reply to (%s): %v",
241 | 				t.ID(), t.InReplyToStatusIDStr, err)
242 | 		}
243 | 		skip, err := c.prepareTweet(&inReplyToTweet, "api")
244 | 		if err != nil {
245 | 			return nil, fmt.Errorf("preparing reply-parent tweet: %v", err)
246 | 		}
247 | 		if !skip {
248 | 			repIG, err := c.makeItemGraphFromTweet(inReplyToTweet, "")
249 | 			if err != nil {
250 | 				return nil, fmt.Errorf("making item from tweet that this tweet (%s) is in reply to (%s): %v",
251 | 					t.ID(), inReplyToTweet.ID(), err)
252 | 			}
253 | 			ig.Edges[repIG] = []timeliner.Relation{timeliner.RelReplyTo}
254 | 		}
255 | 	}
256 | 
257 | 	// if this tweet embeds/quotes/links to other tweets,
258 | 	// we should establish those relationships as well
259 | 	if t.source == "api" && t.Entities != nil {
260 | 		for _, urlEnt := range t.Entities.URLs {
261 | 			embeddedTweetID := getLinkedTweetID(urlEnt.ExpandedURL)
262 | 			if embeddedTweetID == "" {
263 | 				continue
264 | 			}
265 | 			embeddedTweet, err := c.getTweetFromAPI(embeddedTweetID)
266 | 			if err != nil {
267 | 				return nil, fmt.Errorf("getting tweet that this tweet (%s) embeds (%s): %v",
268 | 					t.ID(), t.InReplyToStatusIDStr, err)
269 | 			}
270 | 			skip, err := c.prepareTweet(&embeddedTweet, "api")
271 | 			if err != nil {
272 | 				return nil, fmt.Errorf("preparing embedded tweet: %v", err)
273 | 			}
274 | 			if !skip {
275 | 				embIG, err := c.makeItemGraphFromTweet(embeddedTweet, "")
276 | 				if err != nil {
277 | 					return nil, fmt.Errorf("making item from tweet that this tweet (%s) embeds (%s): %v",
278 | 						t.ID(), embeddedTweet.ID(), err)
279 | 				}
280 | 				ig.Edges[embIG] = []timeliner.Relation{timeliner.RelQuotes}
281 | 			}
282 | 		}
283 | 	}
284 | 
285 | 	return ig, nil
286 | }
287 | 
288 | // Assuming checkpoints are short-lived (i.e. are resumed
289 | // somewhat quickly, before the page tokens/cursors expire),
290 | // we can just store the page tokens.
291 | type checkpointInfo struct {
292 | 	LastTweetID string
293 | }
294 | 
295 | // save records the checkpoint.
296 | func (ch *checkpointInfo) save(ctx context.Context) {
297 | 	gobBytes, err := timeliner.MarshalGob(ch)
298 | 	if err != nil {
299 | 		log.Printf("[ERROR][%s] Encoding checkpoint: %v", DataSourceID, err)
300 | 	}
301 | 	timeliner.Checkpoint(ctx, gobBytes)
302 | }
303 | 
304 | // load decodes the checkpoint.
305 | func (ch *checkpointInfo) load(checkpointGob []byte) {
306 | 	if len(checkpointGob) == 0 {
307 | 		return
308 | 	}
309 | 	err := timeliner.UnmarshalGob(checkpointGob, ch)
310 | 	if err != nil {
311 | 		log.Printf("[ERROR][%s] Decoding checkpoint: %v", DataSourceID, err)
312 | 	}
313 | }
314 | 
315 | // maxTweetID returns the higher of the two tweet IDs.
316 | // Errors parsing the strings as integers are ignored.
317 | // Empty string inputs are ignored so the other value
318 | // will win automatically. If both are empty, an empty
319 | // string is returned.
320 | func maxTweetID(id1, id2 string) string {
321 | 	if id1 == "" {
322 | 		return id2
323 | 	}
324 | 	if id2 == "" {
325 | 		return id1
326 | 	}
327 | 	id1int, _ := strconv.ParseInt(id1, 10, 64)
328 | 	id2int, _ := strconv.ParseInt(id2, 10, 64)
329 | 	if id1int > id2int {
330 | 		return id1
331 | 	}
332 | 	return id2
333 | }
334 | 
335 | // getLinkedTweetID returns the ID of the tweet in
336 | // a link to a tweet, for example:
337 | // "https://twitter.com/foo/status/12345"
338 | // returns "12345". If the tweet ID cannot be found
339 | // or the URL does not match the right format,
340 | // an empty string is returned.
341 | func getLinkedTweetID(urlToTweet string) string {
342 | 	if !linkToTweetRE.MatchString(urlToTweet) {
343 | 		return ""
344 | 	}
345 | 	u, err := url.Parse(urlToTweet)
346 | 	if err != nil {
347 | 		return ""
348 | 	}
349 | 	return path.Base(u.Path)
350 | }
351 | 
352 | var linkToTweetRE = regexp.MustCompile(`https?://twitter\.com/.*/status/[0-9]+`)
353 | 


--------------------------------------------------------------------------------
/db.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 
  9 | 	// register the sqlite3 driver
 10 | 	_ "github.com/mattn/go-sqlite3"
 11 | )
 12 | 
 13 | func openDB(dataDir string) (*sql.DB, error) {
 14 | 	var db *sql.DB
 15 | 	var err error
 16 | 	defer func() {
 17 | 		if err != nil && db != nil {
 18 | 			db.Close()
 19 | 		}
 20 | 	}()
 21 | 
 22 | 	err = os.MkdirAll(dataDir, 0755)
 23 | 	if err != nil {
 24 | 		return nil, fmt.Errorf("making data directory: %v", err)
 25 | 	}
 26 | 
 27 | 	dbPath := filepath.Join(dataDir, "index.db")
 28 | 
 29 | 	db, err = sql.Open("sqlite3", dbPath+"?_foreign_keys=true")
 30 | 	if err != nil {
 31 | 		return nil, fmt.Errorf("opening database: %v", err)
 32 | 	}
 33 | 
 34 | 	// ensure DB is provisioned
 35 | 	_, err = db.Exec(createDB)
 36 | 	if err != nil {
 37 | 		return nil, fmt.Errorf("setting up database: %v", err)
 38 | 	}
 39 | 
 40 | 	// add all registered data sources
 41 | 	err = saveAllDataSources(db)
 42 | 	if err != nil {
 43 | 		return nil, fmt.Errorf("saving registered data sources to database: %v", err)
 44 | 	}
 45 | 
 46 | 	return db, nil
 47 | }
 48 | 
 49 | const createDB = `
 50 | -- A data source is a content provider, like a cloud photo service, social media site, or exported archive format.
 51 | CREATE TABLE IF NOT EXISTS "data_sources" (
 52 | 	"id" TEXT PRIMARY KEY,
 53 | 	"name" TEXT NOT NULL
 54 | );
 55 | 
 56 | -- An account contains credentials necessary for accessing a data source.
 57 | CREATE TABLE IF NOT EXISTS "accounts" (
 58 | 	"id" INTEGER PRIMARY KEY,
 59 | 	"data_source_id" TEXT NOT NULL,
 60 | 	"user_id" TEXT NOT NULL,
 61 | 	"authorization" BLOB,
 62 | 	"checkpoint" BLOB,
 63 | 	"last_item_id" INTEGER, -- row ID of item having highest timestamp processed during the last run
 64 | 	FOREIGN KEY ("data_source_id") REFERENCES "data_sources"("id") ON DELETE CASCADE,
 65 | 	FOREIGN KEY ("last_item_id") REFERENCES "items"("id") ON DELETE SET NULL,
 66 | 	UNIQUE ("data_source_id", "user_id")
 67 | );
 68 | 
 69 | CREATE TABLE IF NOT EXISTS "persons" (
 70 | 	"id" INTEGER PRIMARY KEY,
 71 | 	"name" TEXT
 72 | );
 73 | 
 74 | -- This table specifies identities (user IDs, etc.) of a person across data_sources.
 75 | CREATE TABLE IF NOT EXISTS "person_identities" (
 76 | 	"id" INTEGER PRIMARY KEY,
 77 | 	"person_id" INTEGER NOT NULL,
 78 | 	"data_source_id" TEXT NOT NULL,
 79 | 	"user_id" TEXT NOT NULL, -- whatever identifier a person takes on at the data source
 80 | 	FOREIGN KEY ("person_id") REFERENCES "persons"("id") ON DELETE CASCADE,
 81 | 	FOREIGN KEY ("data_source_id") REFERENCES "data_sources"("id") ON DELETE CASCADE,
 82 | 	UNIQUE ("person_id", "data_source_id", "user_id")
 83 | );
 84 | 
 85 | -- An item is something downloaded from a specific account on a specific data source.
 86 | CREATE TABLE IF NOT EXISTS "items" (
 87 | 	"id" INTEGER PRIMARY KEY,
 88 | 	"account_id" INTEGER NOT NULL,
 89 | 	"original_id" TEXT NOT NULL, -- ID provided by the data source
 90 | 	"person_id" INTEGER NOT NULL,
 91 | 	"timestamp" INTEGER, -- timestamp when item content was originally created (NOT when the database row was created)
 92 | 	"stored" INTEGER NOT NULL DEFAULT (strftime('%s', CURRENT_TIME)), -- timestamp row was created or last updated from source
 93 | 	"modified" INTEGER, -- timestamp when item was locally modified; if not null, then item is "not clean"
 94 | 	"class" INTEGER,
 95 | 	"mime_type" TEXT,
 96 | 	"data_text" TEXT COLLATE NOCASE,  -- item content, if text-encoded
 97 | 	"data_file" TEXT, -- item filename, if non-text or not suitable for storage in DB (usually media items)
 98 | 	"data_hash" TEXT, -- base64 encoding of SHA-256 checksum of contents of data file, if any
 99 | 	"metadata" BLOB,  -- optional extra information
100 | 	"latitude" REAL,
101 | 	"longitude" REAL,
102 | 	FOREIGN KEY ("account_id") REFERENCES "accounts"("id") ON DELETE CASCADE,
103 | 	FOREIGN KEY ("person_id") REFERENCES "persons"("id") ON DELETE CASCADE,
104 | 	UNIQUE ("original_id", "account_id")
105 | );
106 | 
107 | CREATE INDEX IF NOT EXISTS "idx_items_timestamp" ON "items"("timestamp");
108 | CREATE INDEX IF NOT EXISTS "idx_items_data_text" ON "items"("data_text");
109 | CREATE INDEX IF NOT EXISTS "idx_items_data_file" ON "items"("data_file");
110 | CREATE INDEX IF NOT EXISTS "idx_items_data_hash" ON "items"("data_hash");
111 | 
112 | -- Relationships draws relationships between and across items and persons.
113 | CREATE TABLE IF NOT EXISTS "relationships" (
114 | 	"id" INTEGER PRIMARY KEY,
115 | 	"from_person_id" INTEGER,
116 | 	"from_item_id" INTEGER,
117 | 	"to_person_id" INTEGER,
118 | 	"to_item_id" INTEGER,
119 | 	"directed" BOOLEAN, -- if false, the edge goes both ways
120 |  	"label" TEXT NOT NULL,
121 | 	FOREIGN KEY ("from_item_id") REFERENCES "items"("id") ON DELETE CASCADE,
122 | 	FOREIGN KEY ("to_item_id") REFERENCES "items"("id") ON DELETE CASCADE,
123 | 	FOREIGN KEY ("from_person_id") REFERENCES "persons"("id") ON DELETE CASCADE,
124 | 	FOREIGN KEY ("to_person_id") REFERENCES "persons"("id") ON DELETE CASCADE,
125 | 	UNIQUE ("from_item_id", "to_item_id", "label"),
126 | 	UNIQUE ("from_person_id", "to_person_id", "label"),
127 | 	UNIQUE ("from_item_id", "to_person_id", "label"),
128 | 	UNIQUE ("from_person_id", "to_item_id", "label")
129 | );
130 | 
131 | CREATE TABLE IF NOT EXISTS "collections" (
132 | 	"id" INTEGER PRIMARY KEY,
133 | 	"account_id" INTEGER NOT NULL,
134 | 	"original_id" TEXT,
135 | 	"name" TEXT,
136 | 	"description" TEXT,
137 | 	"modified" INTEGER, -- timestamp when collection or any of its items/ordering were modified locally; if not null, then collection is "not clean"
138 | 	FOREIGN KEY ("account_id") REFERENCES "accounts"("id") ON DELETE CASCADE,
139 | 	UNIQUE("account_id", "original_id")
140 | );
141 | 
142 | CREATE TABLE IF NOT EXISTS "collection_items" (
143 | 	"id" INTEGER PRIMARY KEY,
144 | 	"item_id" INTEGER NOT NULL,
145 | 	"collection_id" INTEGER NOT NULL,
146 | 	"position" INTEGER NOT NULL DEFAULT 0,
147 | 	FOREIGN KEY ("item_id") REFERENCES "items"("id") ON DELETE CASCADE,
148 | 	FOREIGN KEY ("collection_id") REFERENCES "collections"("id") ON DELETE CASCADE,
149 | 	UNIQUE("item_id", "collection_id", "position")
150 | );
151 | `
152 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/mholt/timeliner
 2 | 
 3 | go 1.13
 4 | 
 5 | require (
 6 | 	github.com/BurntSushi/toml v0.3.1
 7 | 	github.com/dgryski/go-metro v0.0.0-20180109044635-280f6062b5bc // indirect
 8 | 	github.com/mattn/go-sqlite3 v1.10.0
 9 | 	github.com/mholt/archiver/v3 v3.3.0
10 | 	github.com/seiflotfy/cuckoofilter v0.0.0-20200323075608-c8f23b6b6cef
11 | 	github.com/ttacon/builder v0.0.0-20170518171403-c099f663e1c2 // indirect
12 | 	github.com/ttacon/libphonenumber v1.1.0
13 | 	golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
14 | )
15 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 2 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 3 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 4 | github.com/andybalholm/brotli v0.0.0-20190621154722-5f990b63d2d6 h1:bZ28Hqta7TFAK3Q08CMvv8y3/8ATaEqv2nGoc6yff6c=
 5 | github.com/andybalholm/brotli v0.0.0-20190621154722-5f990b63d2d6/go.mod h1:+lx6/Aqd1kLJ1GQfkvOnaZ1WGmLpMpbprPuIOOZX30U=
 6 | github.com/dgryski/go-metro v0.0.0-20180109044635-280f6062b5bc h1:8WFBn63wegobsYAX0YjD+8suexZDga5CctH4CCTx2+8=
 7 | github.com/dgryski/go-metro v0.0.0-20180109044635-280f6062b5bc/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
 8 | github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
 9 | github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
10 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
11 | github.com/golang/gddo v0.0.0-20190419222130-af0f2af80721 h1:KRMr9A3qfbVM7iV/WcLY/rL5LICqwMHLhwRXKu99fXw=
12 | github.com/golang/gddo v0.0.0-20190419222130-af0f2af80721/go.mod h1:xEhNfoBDX1hzLm2Nf80qUvZ2sVwoMZ8d6IE2SrsQfh4=
13 | github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
14 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
15 | github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
16 | github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
17 | github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
18 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
19 | github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
20 | github.com/klauspost/compress v1.9.2 h1:LfVyl+ZlLlLDeQ/d2AqfGIIH4qEDu0Ed2S5GyhCWIWY=
21 | github.com/klauspost/compress v1.9.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
22 | github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
23 | github.com/klauspost/pgzip v1.2.1 h1:oIPZROsWuPHpOdMVWLuJZXwgjhrW8r1yEX8UqMyeNHM=
24 | github.com/klauspost/pgzip v1.2.1/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
25 | github.com/mattn/go-sqlite3 v1.10.0 h1:jbhqpg7tQe4SupckyijYiy0mJJ/pRyHvXf7JdWK860o=
26 | github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
27 | github.com/mholt/archiver/v3 v3.3.0 h1:vWjhY8SQp5yzM9P6OJ/eZEkmi3UAbRrxCq48MxjAzig=
28 | github.com/mholt/archiver/v3 v3.3.0/go.mod h1:YnQtqsp+94Rwd0D/rk5cnLrxusUBUXg+08Ebtr1Mqao=
29 | github.com/nwaples/rardecode v1.0.0 h1:r7vGuS5akxOnR4JQSkko62RJ1ReCMXxQRPtxsiFMBOs=
30 | github.com/nwaples/rardecode v1.0.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
31 | github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I=
32 | github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
33 | github.com/seiflotfy/cuckoofilter v0.0.0-20200323075608-c8f23b6b6cef h1:PokWhuPtXrgwLeUZzanj6iMZpUnFBCc6g2tDeheBLrE=
34 | github.com/seiflotfy/cuckoofilter v0.0.0-20200323075608-c8f23b6b6cef/go.mod h1:ET5mVvNjwaGXRgZxO9UZr7X+8eAf87AfIYNwRSp9s4Y=
35 | github.com/ttacon/builder v0.0.0-20170518171403-c099f663e1c2 h1:5u+EJUQiosu3JFX0XS0qTf5FznsMOzTjGqavBGuCbo0=
36 | github.com/ttacon/builder v0.0.0-20170518171403-c099f663e1c2/go.mod h1:4kyMkleCiLkgY6z8gK5BkI01ChBtxR0ro3I1ZDcGM3w=
37 | github.com/ttacon/libphonenumber v1.1.0 h1:tC6kE4t8UI4OqQVQjW5q8gSWhG2wnY5moEpSEORdYm4=
38 | github.com/ttacon/libphonenumber v1.1.0/go.mod h1:E0TpmdVMq5dyVlQ7oenAkhsLu86OkUl+yR4OAxyEg/M=
39 | github.com/ulikunitz/xz v0.5.6 h1:jGHAfXawEGZQ3blwU5wnWKQJvAraT7Ftq9EXjnXYgt8=
40 | github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
41 | github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
42 | github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
43 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
44 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg=
45 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
46 | golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw=
47 | golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
48 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw=
49 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
50 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
51 | google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
52 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
53 | 


--------------------------------------------------------------------------------
/itemfiles.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"crypto/sha256"
  5 | 	"database/sql"
  6 | 	"encoding/base64"
  7 | 	"fmt"
  8 | 	"hash"
  9 | 	"io"
 10 | 	"log"
 11 | 	mathrand "math/rand"
 12 | 	"os"
 13 | 	"path"
 14 | 	"path/filepath"
 15 | 	"regexp"
 16 | 	"strings"
 17 | 	"time"
 18 | )
 19 | 
 20 | // downloadItemFile ... TODO: finish godoc.
 21 | func (t *Timeline) downloadItemFile(src io.ReadCloser, dest *os.File, h hash.Hash) (int64, error) {
 22 | 	if src == nil {
 23 | 		return 0, fmt.Errorf("missing reader with which to download file")
 24 | 	}
 25 | 	if dest == nil {
 26 | 		return 0, fmt.Errorf("missing file to download into")
 27 | 	}
 28 | 
 29 | 	// TODO: What if file already exists on disk (byte-for-byte)? - i.e. data_hash in DB has a duplicate
 30 | 
 31 | 	// give the hasher a copy of the file bytes
 32 | 	tr := io.TeeReader(src, h)
 33 | 
 34 | 	n, err := io.Copy(dest, tr)
 35 | 	if err != nil {
 36 | 		os.Remove(dest.Name())
 37 | 		return n, fmt.Errorf("copying contents: %v", err)
 38 | 	}
 39 | 	if err := dest.Sync(); err != nil {
 40 | 		os.Remove(dest.Name())
 41 | 		return n, fmt.Errorf("syncing file: %v", err)
 42 | 	}
 43 | 
 44 | 	// TODO: If mime type is photo or video, extract most important EXIF data and return it for storage in DB?
 45 | 
 46 | 	return n, nil
 47 | }
 48 | 
 49 | // makeUniqueCanonicalItemDataFileName returns an available
 50 | // (non-overwriting) filename for the item's data file, starting
 51 | // with its plain, canonical data file name, then improvising
 52 | // and making unique if necessary. If there is no error, the
 53 | // return value is always a usable data file name.
 54 | func (t *Timeline) openUniqueCanonicalItemDataFile(it Item, dataSourceID string) (*os.File, *string, error) {
 55 | 	if dataSourceID == "" {
 56 | 		return nil, nil, fmt.Errorf("missing service ID")
 57 | 	}
 58 | 
 59 | 	dir := t.canonicalItemDataFileDir(it, dataSourceID)
 60 | 
 61 | 	err := os.MkdirAll(t.fullpath(dir), 0700)
 62 | 	if err != nil {
 63 | 		return nil, nil, fmt.Errorf("making directory for data file: %v", err)
 64 | 	}
 65 | 
 66 | 	tryPath := path.Join(dir, t.canonicalItemDataFileName(it, dataSourceID))
 67 | 	lastAppend := path.Ext(tryPath)
 68 | 
 69 | 	for i := 0; i < 100; i++ {
 70 | 		fullFilePath := t.fullpath(filepath.FromSlash(tryPath))
 71 | 
 72 | 		f, err := os.OpenFile(fullFilePath, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0600)
 73 | 		if os.IsExist(err) {
 74 | 			ext := path.Ext(tryPath)
 75 | 			tryPath = strings.TrimSuffix(tryPath, lastAppend)
 76 | 			lastAppend = fmt.Sprintf("_%d%s", i+1, ext) // start at 1, but actually 2 because existing file is "1"
 77 | 			tryPath += lastAppend
 78 | 			continue
 79 | 		}
 80 | 		if err != nil {
 81 | 			return nil, nil, fmt.Errorf("creating data file: %v", err)
 82 | 		}
 83 | 
 84 | 		return f, &tryPath, nil
 85 | 	}
 86 | 
 87 | 	return nil, nil, fmt.Errorf("unable to find available filename for item: %s", tryPath)
 88 | }
 89 | 
 90 | // canonicalItemDataFileName returns the plain, canonical name of the
 91 | // data file for the item. Canonical data file names are relative to
 92 | // the base storage (repo) path (i.e. the folder of the DB file). This
 93 | // function does no improvising in case of a name missing from the item,
 94 | // nor does it do uniqueness checks. If the item does not have enough
 95 | // information to generate a deterministic file name, the returned path
 96 | // will end with a trailing slash (i.e. the path's last component empty).
 97 | // Things considered deterministic for filename construction include the
 98 | // item's filename, the item's original ID, and its timestamp.
 99 | // TODO: fix godoc (this returns only the name now, not the whole dir)
100 | func (t *Timeline) canonicalItemDataFileName(it Item, dataSourceID string) string {
101 | 	// ideally, the filename is simply the one provided with the item
102 | 	var filename string
103 | 	if fname := it.DataFileName(); fname != nil {
104 | 		filename = t.safePathComponent(*fname)
105 | 	}
106 | 
107 | 	// otherwise, try a filename based on the item's ID
108 | 	if filename == "" {
109 | 		if itemOriginalID := it.ID(); itemOriginalID != "" {
110 | 			filename = fmt.Sprintf("item_%s", itemOriginalID)
111 | 		}
112 | 	}
113 | 
114 | 	// otherwise, try a filename based on the item's timestamp
115 | 	ts := it.Timestamp()
116 | 	if filename == "" && !ts.IsZero() {
117 | 		filename = ts.Format("2006_01_02_150405")
118 | 	}
119 | 
120 | 	// otherwise, out of options; revert to a random string
121 | 	// since no deterministic filename is available
122 | 	if filename == "" {
123 | 		filename = randomString(24, false)
124 | 	}
125 | 
126 | 	// shorten the name if needed (thanks for everything, Windows)
127 | 	return t.ensureDataFileNameShortEnough(filename)
128 | }
129 | 
130 | func (t *Timeline) canonicalItemDataFileDir(it Item, dataSourceID string) string {
131 | 	ts := it.Timestamp()
132 | 	if ts.IsZero() {
133 | 		ts = time.Now()
134 | 	}
135 | 
136 | 	if dataSourceID == "" {
137 | 		dataSourceID = "unknown_service"
138 | 	}
139 | 
140 | 	// use "/" separators and adjust for the OS
141 | 	// path separator when accessing disk
142 | 	return path.Join("data",
143 | 		fmt.Sprintf("%04d", ts.Year()),
144 | 		fmt.Sprintf("%02d", ts.Month()),
145 | 		t.safePathComponent(dataSourceID))
146 | }
147 | 
148 | func (t *Timeline) ensureDataFileNameShortEnough(filename string) string {
149 | 	// thanks for nothing, Windows
150 | 	if len(filename) > 250 {
151 | 		ext := path.Ext(filename)
152 | 		if len(ext) > 20 { // arbitrary and unlikely, but just in case
153 | 			ext = ext[:20]
154 | 		}
155 | 		filename = filename[:250-len(ext)]
156 | 		filename += ext
157 | 	}
158 | 	return filename
159 | }
160 | 
161 | // TODO:/NOTE: If changing a file name, all items with same data_hash must also be updated to use same file name
162 | func (t *Timeline) replaceWithExisting(canonical *string, checksumBase64 string, itemRowID int64) error {
163 | 	if canonical == nil || *canonical == "" || checksumBase64 == "" {
164 | 		return fmt.Errorf("missing data filename and/or hash of contents")
165 | 	}
166 | 
167 | 	var existingDatafile *string
168 | 	err := t.db.QueryRow(`SELECT data_file FROM items
169 | 		WHERE data_hash = ? AND id != ? LIMIT 1`,
170 | 		checksumBase64, itemRowID).Scan(&existingDatafile)
171 | 	if err == sql.ErrNoRows {
172 | 		return nil // file is unique; carry on
173 | 	}
174 | 	if err != nil {
175 | 		return fmt.Errorf("querying DB: %v", err)
176 | 	}
177 | 
178 | 	// file is a duplicate!
179 | 
180 | 	if existingDatafile == nil {
181 | 		// ... that's weird, how's this possible? it has a hash but no file name recorded
182 | 		return fmt.Errorf("item with matching hash is missing data file name; hash: %s", checksumBase64)
183 | 	}
184 | 
185 | 	// ensure the existing file is still the same
186 | 	h := sha256.New()
187 | 	f, err := os.Open(t.fullpath(*existingDatafile))
188 | 	if err != nil {
189 | 		return fmt.Errorf("opening existing file: %v", err)
190 | 	}
191 | 	defer f.Close()
192 | 
193 | 	_, err = io.Copy(h, f)
194 | 	if err != nil {
195 | 		return fmt.Errorf("checking file integrity: %v", err)
196 | 	}
197 | 
198 | 	existingFileHash := h.Sum(nil)
199 | 	b64ExistingFileHash := base64.StdEncoding.EncodeToString(existingFileHash)
200 | 
201 | 	// if the existing file was modified; restore it with
202 | 	// what we just downloaded, which presumably succeeded
203 | 	if checksumBase64 != b64ExistingFileHash {
204 | 		log.Printf("[INFO] Restoring modified data file: %s was '%s' but is now '%s'",
205 | 			*existingDatafile, checksumBase64, existingFileHash)
206 | 		err := os.Rename(t.fullpath(*canonical), t.fullpath(*existingDatafile))
207 | 		if err != nil {
208 | 			return fmt.Errorf("replacing modified data file: %v", err)
209 | 		}
210 | 	}
211 | 
212 | 	// everything checks out; delete the newly-downloaded file
213 | 	// and use the existing file instead of duplicating it
214 | 	err = os.Remove(t.fullpath(*canonical))
215 | 	if err != nil {
216 | 		return fmt.Errorf("removing duplicate data file: %v", err)
217 | 	}
218 | 
219 | 	canonical = existingDatafile
220 | 
221 | 	return nil
222 | }
223 | 
224 | // randomString returns a string of n random characters.
225 | // It is not even remotely secure or a proper distribution.
226 | // But it's good enough for some things. It excludes certain
227 | // confusing characters like I, l, 1, 0, O, etc. If sameCase
228 | // is true, then uppercase letters are excluded.
229 | func randomString(n int, sameCase bool) string {
230 | 	if n <= 0 {
231 | 		return ""
232 | 	}
233 | 	dict := []byte("abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRTUVWXY23456789")
234 | 	if sameCase {
235 | 		dict = []byte("abcdefghijkmnpqrstuvwxyz0123456789")
236 | 	}
237 | 	b := make([]byte, n)
238 | 	for i := range b {
239 | 		b[i] = dict[mathrand.Int63()%int64(len(dict))]
240 | 	}
241 | 	return string(b)
242 | }
243 | 
244 | func (t *Timeline) fullpath(canonicalDatafileName string) string {
245 | 	return filepath.Join(t.repoDir, filepath.FromSlash(canonicalDatafileName))
246 | }
247 | 
248 | func (t *Timeline) datafileExists(canonicalDatafileName string) bool {
249 | 	_, err := os.Stat(t.fullpath(canonicalDatafileName))
250 | 	return !os.IsNotExist(err)
251 | }
252 | 
253 | func (t *Timeline) safePathComponent(s string) string {
254 | 	s = safePathRE.ReplaceAllLiteralString(s, "")
255 | 	s = strings.Replace(s, "..", "", -1)
256 | 	if s == "." {
257 | 		s = ""
258 | 	}
259 | 	return s
260 | }
261 | 
262 | // safePathRER matches any undesirable characters in a filepath.
263 | // Note that this allows dots, so you'll have to strip ".." manually.
264 | var safePathRE = regexp.MustCompile(`[^\w.-]`)
265 | 


--------------------------------------------------------------------------------
/itemgraph.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/gob"
  6 | 	"io"
  7 | 	"time"
  8 | )
  9 | 
 10 | // Item is the central concept of a piece of content
 11 | // from a service or data source. Take note of which
 12 | // methods are required to return non-empty values.
 13 | //
 14 | // The actual content of an item is stored either in
 15 | // the database or on disk as a file. Generally,
 16 | // content that is text-encoded can and should be
 17 | // stored in the database where it will be indexed.
 18 | // However, if the item's content (for example, the
 19 | // bytes of a photo or video) are not text or if the
 20 | // text is too large to store well in a database (for
 21 | // example, an entire novel), it should be stored
 22 | // on disk, and this interface has methods to
 23 | // accommodate both. Note that an item may have both
 24 | // text and non-text content, too: for example, photos
 25 | // and videos may have descriptions that are as much
 26 | // "content" as the media iteself. One part of an item
 27 | // is not mutually exclusive with any other.
 28 | type Item interface {
 29 | 	// The unique ID of the item assigned by the service.
 30 | 	// If the service does not assign one, then invent
 31 | 	// one such that the ID is unique to the content or
 32 | 	// substance of the item (for example, an ID derived
 33 | 	// from timestamp or from the actual content of the
 34 | 	// item -- whatever makes it unique). The ID need
 35 | 	// only be unique for the account it is associated
 36 | 	// with, although more unique is, of course, acceptable.
 37 | 	//
 38 | 	// REQUIRED.
 39 | 	ID() string
 40 | 
 41 | 	// The originating timestamp of the item, which
 42 | 	// may be different from when the item was posted
 43 | 	// or created. For example, a photo may be taken
 44 | 	// one day but uploaded a week later. Prefer the
 45 | 	// time when the original item content was captured.
 46 | 	//
 47 | 	// REQUIRED.
 48 | 	Timestamp() time.Time
 49 | 
 50 | 	// A classification of the item's kind.
 51 | 	//
 52 | 	// REQUIRED.
 53 | 	Class() ItemClass
 54 | 
 55 | 	// The user/account ID of the owner or
 56 | 	// originator of the content, along with their
 57 | 	// username or real name. The ID is used to
 58 | 	// relate the item with the person behind it;
 59 | 	// the name is used to make the person
 60 | 	// recognizable to the human reader. If the
 61 | 	// ID is nil, the current account owner will
 62 | 	// be assumed. (Use the ID as given by the
 63 | 	// data source.) If the data source only
 64 | 	// provides a name but no ID, you may return
 65 | 	// the name as the ID with the understanding
 66 | 	// that a different name will be counted as a
 67 | 	// different person. You may also return the
 68 | 	// name as the name and leave the ID nil and
 69 | 	// have correct results if it is safe to assume
 70 | 	// the name belongs to the current account owner.
 71 | 	Owner() (id *string, name *string)
 72 | 
 73 | 	// Returns the text of the item, if any.
 74 | 	// This field is indexed in the DB, so don't
 75 | 	// use for unimportant metadata or huge
 76 | 	// swaths of text; if there is a large
 77 | 	// amount of text, use an item file instead.
 78 | 	DataText() (*string, error)
 79 | 
 80 | 	// For primary content which is not text or
 81 | 	// which is too large to be stored well in a
 82 | 	// database, the content can be downloaded
 83 | 	// into a file. If so, the following methods
 84 | 	// should return the necessary information,
 85 | 	// if available from the service, so that a
 86 | 	// data file can be obtained, stored, and
 87 | 	// later read successfully.
 88 | 	//
 89 | 	// DataFileName returns the filename (NOT full
 90 | 	// path or URL) of the file; prefer the original
 91 | 	// filename if it originated as a file. If the
 92 | 	// filename is not unique on disk when downloaded,
 93 | 	// it will be made unique by modifying it. If
 94 | 	// this value is nil/empty, a filename will be
 95 | 	// generated from the item's other data.
 96 | 	//
 97 | 	// DataFileReader returns a way to read the data.
 98 | 	// It will be closed when the read is completed.
 99 | 	//
100 | 	// DataFileHash returns the checksum of the
101 | 	// content as provided by the service. If the
102 | 	// service (or data source) does not provide a
103 | 	// hash, leave this field empty, but note that
104 | 	// later it will be impossible to efficiently
105 | 	// know whether the content has changed on the
106 | 	// service from what is stored locally.
107 | 	//
108 | 	// DataFileMIMEType returns the MIME type of
109 | 	// the data file, if known.
110 | 	DataFileName() *string
111 | 	DataFileReader() (io.ReadCloser, error)
112 | 	DataFileHash() []byte
113 | 	DataFileMIMEType() *string
114 | 
115 | 	// Metadata returns any optional metadata.
116 | 	// Feel free to leave as many fields empty
117 | 	// as you'd like: the less fields that are
118 | 	// filled out, the smaller the storage size.
119 | 	// Metadata is not indexed by the DB but is
120 | 	// rendered in projections and queries
121 | 	// according to the item's classification.
122 | 	Metadata() (*Metadata, error)
123 | 
124 | 	// Location returns an item's location,
125 | 	// if known. For now, only Earth
126 | 	// coordinates are accepted, but we can
127 | 	// improve this later.
128 | 	Location() (*Location, error)
129 | }
130 | 
131 | // ItemClass classifies an item.
132 | type ItemClass int
133 | 
134 | // Various classes of items.
135 | const (
136 | 	ClassUnknown ItemClass = iota
137 | 	ClassImage
138 | 	ClassVideo
139 | 	ClassAudio
140 | 	ClassPost
141 | 	ClassLocation
142 | 	ClassEmail
143 | 	ClassPrivateMessage
144 | 	ClassMessage
145 | )
146 | 
147 | // These are the standard relationships that Timeliner
148 | // recognizes. Using these known relationships is not
149 | // required, but it makes it easier to translate them to
150 | // human-friendly phrases when visualizing the timeline.
151 | var (
152 | 	RelReplyTo  = Relation{Label: "reply_to", Bidirectional: false}      // "<from> is in reply to <to>"
153 | 	RelAttached = Relation{Label: "attached", Bidirectional: true}       // "<to|from> is attached to <from|to>"
154 | 	RelQuotes   = Relation{Label: "quotes", Bidirectional: false}        // "<from> quotes <to>"
155 | 	RelCCed     = Relation{Label: "carbon_copied", Bidirectional: false} // "<from_item> is carbon-copied to <to_person>"
156 | )
157 | 
158 | // ItemRow has the structure of an item's row in our DB.
159 | type ItemRow struct {
160 | 	ID         int64
161 | 	AccountID  int64
162 | 	OriginalID string
163 | 	PersonID   int64
164 | 	Timestamp  time.Time
165 | 	Stored     time.Time
166 | 	Modified   *time.Time
167 | 	Class      ItemClass
168 | 	MIMEType   *string
169 | 	DataText   *string
170 | 	DataFile   *string
171 | 	DataHash   *string // base64-encoded SHA-256
172 | 	Metadata   *Metadata
173 | 	Location
174 | 
175 | 	metaGob []byte // use Metadata.(encode/decode)
176 | 	item    Item
177 | }
178 | 
179 | // Location contains location information.
180 | type Location struct {
181 | 	Latitude  *float64
182 | 	Longitude *float64
183 | }
184 | 
185 | // ItemGraph is an item with optional connections to other items.
186 | // All ItemGraph values should be pointers to ensure consistency.
187 | // The usual weird/fun thing about representing graph data structures
188 | // in memory is that a graph is a node, and a node is a graph. 🤓
189 | type ItemGraph struct {
190 | 	// The node item. This can be nil, but note that
191 | 	// Edges will not be traversed if Node is nil,
192 | 	// because there must be a node on both ends of
193 | 	// an edge.
194 | 	//
195 | 	// Optional.
196 | 	Node Item
197 | 
198 | 	// Edges are represented as 1:many relations
199 | 	// to other "graphs" (nodes in the graph).
200 | 	// Fill this out to add multiple items to the
201 | 	// timeline at once, while drawing the
202 | 	// designated relationships between them.
203 | 	// Useful when processing related items in
204 | 	// batches.
205 | 	//
206 | 	// Directional relationships go from Node to
207 | 	// the map key.
208 | 	//
209 | 	// If the items involved in a relationship are
210 | 	// not efficiently available at the same time
211 | 	// (i.e. if loading both items involved in the
212 | 	// relationship would take a non-trivial amount
213 | 	// of time or API calls), you can use the
214 | 	// Relations field instead, but only after the
215 | 	// items have been added to the timeline.
216 | 	//
217 | 	// Optional.
218 | 	Edges map[*ItemGraph][]Relation
219 | 
220 | 	// If items in the graph belong to a collection,
221 | 	// specify them here. If the collection does not
222 | 	// exist (by row ID or AccountID+OriginalID), it
223 | 	// will be created. If it already exists, the
224 | 	// collection in the DB will be unioned with the
225 | 	// collection specified here. Collections are
226 | 	// processed regardless of Node and Edges.
227 | 	//
228 | 	// Optional.
229 | 	Collections []Collection
230 | 
231 | 	// Relationships between existing items in the
232 | 	// timeline can be represented here in a list
233 | 	// of item IDs that are connected by a label.
234 | 	// This field is useful when relationships and
235 | 	// the items involved in them are not discovered
236 | 	// at the same time. Relations in this list will
237 | 	// be added to the timeline, joined by the item
238 | 	// IDs described in the RawRelations, only if
239 | 	// the items having those IDs (as provided by
240 | 	// the data source; we're not talking about DB
241 | 	// row IDs here) already exist in the timeline.
242 | 	// In other words, this is a best-effort field;
243 | 	// useful for forming relationships of existing
244 | 	// items, but without access to the actual items
245 | 	// themselves. If you have the items involved in
246 | 	// the relationships, use Edges instead.
247 | 	//
248 | 	// Optional.
249 | 	Relations []RawRelation
250 | }
251 | 
252 | // NewItemGraph returns a new node/graph.
253 | func NewItemGraph(node Item) *ItemGraph {
254 | 	return &ItemGraph{
255 | 		Node:  node,
256 | 		Edges: make(map[*ItemGraph][]Relation),
257 | 	}
258 | }
259 | 
260 | // Add adds item to the graph ig by making an edge described
261 | // by rel from the node ig to a new node for item.
262 | //
263 | // This method is for simple inserts, where the only thing to add
264 | // to the graph at this moment is a single item, since the graph
265 | // it inserts contains only a single node populated by item. To
266 | // add a full graph with multiple items (i.e. a graph with edges),
267 | // call ig.Connect directly.
268 | func (ig *ItemGraph) Add(item Item, rel Relation) {
269 | 	ig.Connect(NewItemGraph(item), rel)
270 | }
271 | 
272 | // Connect is a simple convenience function that adds a graph (node)
273 | // to ig by an edge described by rel.
274 | func (ig *ItemGraph) Connect(node *ItemGraph, rel Relation) {
275 | 	if ig.Edges == nil {
276 | 		ig.Edges = make(map[*ItemGraph][]Relation)
277 | 	}
278 | 	ig.Edges[node] = append(ig.Edges[node], rel)
279 | }
280 | 
281 | // RawRelation represents a relationship between
282 | // two items or people (or both) from the same
283 | // data source (but not necessarily the same
284 | // accounts; we assume that a data source's item
285 | // IDs are globally unique across accounts).
286 | // The item IDs should be those which are
287 | // assigned/provided by the data source, NOT a
288 | // database row ID. Likewise, the persons' user
289 | // IDs should be the IDs of the user as associated
290 | // with the data source, NOT their row IDs.
291 | type RawRelation struct {
292 | 	FromItemID       string
293 | 	ToItemID         string
294 | 	FromPersonUserID string
295 | 	ToPersonUserID   string
296 | 	Relation
297 | }
298 | 
299 | // Relation describes how two nodes in a graph are related.
300 | // It's essentially an edge on a graph.
301 | type Relation struct {
302 | 	Label         string
303 | 	Bidirectional bool
304 | }
305 | 
306 | // Collection represents a group of items, like an album.
307 | type Collection struct {
308 | 	// The ID of the collection as given
309 | 	// by the service; for example, the
310 | 	// album ID. If the service does not
311 | 	// provide an ID for the collection,
312 | 	// invent one such that the next time
313 | 	// the collection is encountered and
314 | 	// processed, its ID will be the same.
315 | 	// An ID is necessary here to ensure
316 | 	// uniqueness.
317 | 	//
318 | 	// REQUIRED.
319 | 	OriginalID string
320 | 
321 | 	// The name of the collection as
322 | 	// given by the service; for example,
323 | 	// the album title.
324 | 	//
325 | 	// Optional.
326 | 	Name *string
327 | 
328 | 	// The description, caption, or any
329 | 	// other relevant text describing
330 | 	// the collection.
331 | 	//
332 | 	// Optional.
333 | 	Description *string
334 | 
335 | 	// The items for the collection;
336 | 	// if ordering is significant,
337 | 	// specify each item's Position
338 | 	// field; the order of elememts
339 | 	// of this slice will not be
340 | 	// considered important.
341 | 	Items []CollectionItem
342 | }
343 | 
344 | // CollectionItem represents an item
345 | // stored in a collection.
346 | type CollectionItem struct {
347 | 	// The item to add to the collection.
348 | 	Item Item
349 | 
350 | 	// Specify if ordering is important.
351 | 	Position int
352 | 
353 | 	// Used when processing; this will
354 | 	// store the row ID of the item
355 | 	// after the item has been inserted
356 | 	// into the DB.
357 | 	itemRowID int64
358 | }
359 | 
360 | // Metadata is a unified structure for storing
361 | // item metadata in the DB.
362 | type Metadata struct {
363 | 	// A hash or etag provided by the service to
364 | 	// make it easy to know if it has changed
365 | 	ServiceHash []byte
366 | 
367 | 	// Locations
368 | 	LocationAccuracy int
369 | 	Altitude         int // meters
370 | 	AltitudeAccuracy int
371 | 	Heading          int // degrees
372 | 	Velocity         int
373 | 
374 | 	GeneralArea string // natural language description of a location
375 | 
376 | 	// Photos and videos
377 | 	EXIF map[string]interface{}
378 | 	// TODO: Should we have some of the "most important" EXIF fields explicitly here?
379 | 
380 | 	Width  int
381 | 	Height int
382 | 
383 | 	// TODO: Google Photos (how many of these belong in EXIF?)
384 | 	CameraMake      string
385 | 	CameraModel     string
386 | 	FocalLength     float64
387 | 	ApertureFNumber float64
388 | 	ISOEquivalent   int
389 | 	ExposureTime    time.Duration
390 | 
391 | 	FPS float64 // Frames Per Second
392 | 
393 | 	// Posts (Facebook so far)
394 | 	Link        string
395 | 	Description string
396 | 	Name        string
397 | 	ParentID    string
398 | 	StatusType  string
399 | 	Type        string
400 | 
401 | 	Shares int // aka "Retweets" or "Reshares"
402 | 	Likes  int
403 | }
404 | 
405 | func (m *Metadata) encode() ([]byte, error) {
406 | 	// then encode the actual data, and trim off
407 | 	// schema from the beginning
408 | 	buf := new(bytes.Buffer)
409 | 	err := gob.NewEncoder(buf).Encode(m)
410 | 	if err != nil {
411 | 		return nil, err
412 | 	}
413 | 	return buf.Bytes()[len(metadataGobPrefix):], nil
414 | }
415 | 
416 | func (m *Metadata) decode(b []byte) error {
417 | 	if b == nil {
418 | 		return nil
419 | 	}
420 | 	fullGob := append(metadataGobPrefix, b...)
421 | 	return gob.NewDecoder(bytes.NewReader(fullGob)).Decode(m)
422 | }
423 | 
424 | var metadataGobPrefix []byte
425 | 


--------------------------------------------------------------------------------
/mapmutex.go:
--------------------------------------------------------------------------------
 1 | package timeliner
 2 | 
 3 | import "sync"
 4 | 
 5 | // Modified from https://medium.com/@petrlozhkin/kmutex-lock-mutex-by-unique-id-408467659c24
 6 | 
 7 | type mapMutex struct {
 8 | 	cond *sync.Cond
 9 | 	set  map[interface{}]struct{}
10 | }
11 | 
12 | func newMapMutex() *mapMutex {
13 | 	return &mapMutex{
14 | 		cond: sync.NewCond(new(sync.Mutex)),
15 | 		set:  make(map[interface{}]struct{}),
16 | 	}
17 | }
18 | 
19 | func (mmu *mapMutex) Lock(key interface{}) {
20 | 	mmu.cond.L.Lock()
21 | 	defer mmu.cond.L.Unlock()
22 | 	for mmu.locked(key) {
23 | 		mmu.cond.Wait()
24 | 	}
25 | 	mmu.set[key] = struct{}{}
26 | 	return
27 | }
28 | 
29 | func (mmu *mapMutex) Unlock(key interface{}) {
30 | 	mmu.cond.L.Lock()
31 | 	defer mmu.cond.L.Unlock()
32 | 	delete(mmu.set, key)
33 | 	mmu.cond.Broadcast()
34 | }
35 | 
36 | func (mmu *mapMutex) locked(key interface{}) (ok bool) {
37 | 	_, ok = mmu.set[key]
38 | 	return
39 | }
40 | 


--------------------------------------------------------------------------------
/oauth2.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"net/http"
  7 | 
  8 | 	"github.com/mholt/timeliner/oauth2client"
  9 | 	"golang.org/x/oauth2"
 10 | )
 11 | 
 12 | // OAuth2AppSource returns an oauth2client.App for the OAuth2 provider
 13 | // with the given ID. Programs using data sources that authenticate
 14 | // with OAuth2 MUST set this variable, or the program will panic.
 15 | var OAuth2AppSource func(providerID string, scopes []string) (oauth2client.App, error)
 16 | 
 17 | // NewOAuth2HTTPClient returns a new HTTP client which performs
 18 | // HTTP requests that are authenticated with an oauth2.Token
 19 | // stored with the account acc.
 20 | func (acc Account) NewOAuth2HTTPClient() (*http.Client, error) {
 21 | 	// load the existing token for this account from the database
 22 | 	var tkn *oauth2.Token
 23 | 	err := UnmarshalGob(acc.authorization, &tkn)
 24 | 	if err != nil {
 25 | 		return nil, fmt.Errorf("gob-decoding OAuth2 token: %v", err)
 26 | 	}
 27 | 	if tkn == nil || tkn.AccessToken == "" {
 28 | 		return nil, fmt.Errorf("OAuth2 token is empty: %+v", tkn)
 29 | 	}
 30 | 
 31 | 	// load the service's "oauth app", which can provide both tokens and
 32 | 	// oauth configs -- in this case, we need the oauth config; we should
 33 | 	// already have a token
 34 | 	oapp, err := OAuth2AppSource(acc.ds.OAuth2.ProviderID, acc.ds.OAuth2.Scopes)
 35 | 	if err != nil {
 36 | 		return nil, fmt.Errorf("getting token source for %s: %v", acc.DataSourceID, err)
 37 | 	}
 38 | 
 39 | 	// obtain a token source from the oauth's config so that it can keep
 40 | 	// the token refreshed if it expires
 41 | 	src := oapp.TokenSource(context.Background(), tkn)
 42 | 
 43 | 	// finally, create an HTTP client that authenticates using the token,
 44 | 	// but wrapping the underlying token source so we can persist any
 45 | 	// changes to the database
 46 | 	return oauth2.NewClient(context.Background(), &persistedTokenSource{
 47 | 		tl:        acc.t,
 48 | 		ts:        src,
 49 | 		accountID: acc.ID,
 50 | 		token:     tkn,
 51 | 	}), nil
 52 | }
 53 | 
 54 | // authorizeWithOAuth2 gets an initial OAuth2 token from the user.
 55 | // It requires OAuth2AppSource to be set or it will panic.
 56 | func authorizeWithOAuth2(oc OAuth2) ([]byte, error) {
 57 | 	src, err := OAuth2AppSource(oc.ProviderID, oc.Scopes)
 58 | 	if err != nil {
 59 | 		return nil, fmt.Errorf("getting token source: %v", err)
 60 | 	}
 61 | 	tkn, err := src.InitialToken()
 62 | 	if err != nil {
 63 | 		return nil, fmt.Errorf("getting token from source: %v", err)
 64 | 	}
 65 | 	return MarshalGob(tkn)
 66 | }
 67 | 
 68 | // persistedTokenSource wraps a TokenSource for
 69 | // a particular account and persists any changes
 70 | // to the account's token to the database.
 71 | type persistedTokenSource struct {
 72 | 	tl        *Timeline
 73 | 	ts        oauth2.TokenSource
 74 | 	accountID int64
 75 | 	token     *oauth2.Token
 76 | }
 77 | 
 78 | func (ps *persistedTokenSource) Token() (*oauth2.Token, error) {
 79 | 	tkn, err := ps.ts.Token()
 80 | 	if err != nil {
 81 | 		return tkn, err
 82 | 	}
 83 | 
 84 | 	// store an updated token in the DB
 85 | 	if tkn.AccessToken != ps.token.AccessToken {
 86 | 		ps.token = tkn
 87 | 
 88 | 		authBytes, err := MarshalGob(tkn)
 89 | 		if err != nil {
 90 | 			return nil, fmt.Errorf("gob-encoding new OAuth2 token: %v", err)
 91 | 		}
 92 | 
 93 | 		_, err = ps.tl.db.Exec(`UPDATE accounts SET authorization=? WHERE id=?`, authBytes, ps.accountID)
 94 | 		if err != nil {
 95 | 			return nil, fmt.Errorf("storing refreshed OAuth2 token: %v", err)
 96 | 		}
 97 | 	}
 98 | 
 99 | 	return tkn, nil
100 | }
101 | 


--------------------------------------------------------------------------------
/oauth2client/browser.go:
--------------------------------------------------------------------------------
  1 | package oauth2client
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"net"
  7 | 	"net/http"
  8 | 	"net/url"
  9 | 	"os/exec"
 10 | 	"runtime"
 11 | 	"strings"
 12 | )
 13 | 
 14 | // Browser gets an OAuth2 code via the web browser.
 15 | type Browser struct {
 16 | 	// RedirectURL is the URL to redirect the browser
 17 | 	// to after the code is obtained; it is usually a
 18 | 	// loopback address. If empty, DefaultRedirectURL
 19 | 	// will be used instead.
 20 | 	RedirectURL string
 21 | }
 22 | 
 23 | // Get opens a browser window to authCodeURL for the user to
 24 | // authorize the application, and it returns the resulting
 25 | // OAuth2 code. It rejects requests where the "state" param
 26 | // does not match expectedStateVal.
 27 | func (b Browser) Get(expectedStateVal, authCodeURL string) (string, error) {
 28 | 	redirURLStr := b.RedirectURL
 29 | 	if redirURLStr == "" {
 30 | 		redirURLStr = DefaultRedirectURL
 31 | 	}
 32 | 	redirURL, err := url.Parse(redirURLStr)
 33 | 	if err != nil {
 34 | 		return "", err
 35 | 	}
 36 | 
 37 | 	ln, err := net.Listen("tcp", redirURL.Host)
 38 | 	if err != nil {
 39 | 		return "", err
 40 | 	}
 41 | 	defer ln.Close()
 42 | 
 43 | 	ch := make(chan string)
 44 | 	errCh := make(chan error)
 45 | 
 46 | 	go func() {
 47 | 		handler := func(w http.ResponseWriter, r *http.Request) {
 48 | 			state := r.FormValue("state")
 49 | 			code := r.FormValue("code")
 50 | 
 51 | 			if r.Method != "GET" || r.URL.Path != redirURL.Path || state == "" || code == "" {
 52 | 				http.Error(w, "This endpoint is for OAuth2 callbacks only", http.StatusNotFound)
 53 | 				return
 54 | 			}
 55 | 
 56 | 			if state != expectedStateVal {
 57 | 				http.Error(w, "invalid state", http.StatusUnauthorized)
 58 | 				errCh <- fmt.Errorf("invalid OAuth2 state; expected '%s' but got '%s'",
 59 | 					expectedStateVal, state)
 60 | 				return
 61 | 			}
 62 | 
 63 | 			fmt.Fprint(w, successBody)
 64 | 			ch <- code
 65 | 		}
 66 | 
 67 | 		// must disable keep-alives, otherwise repeated calls to
 68 | 		// this method can block indefinitely in some weird bug
 69 | 		srv := http.Server{Handler: http.HandlerFunc(handler)}
 70 | 		srv.SetKeepAlivesEnabled(false)
 71 | 		srv.Serve(ln)
 72 | 	}()
 73 | 
 74 | 	err = openBrowser(authCodeURL)
 75 | 	if err != nil {
 76 | 		fmt.Printf("Can't open browser: %s.\nPlease follow this link: %s", err, authCodeURL)
 77 | 	}
 78 | 
 79 | 	select {
 80 | 	case code := <-ch:
 81 | 		return code, nil
 82 | 	case err := <-errCh:
 83 | 		return "", err
 84 | 	}
 85 | }
 86 | 
 87 | // openBrowser opens the browser to url.
 88 | func openBrowser(url string) error {
 89 | 	osCommand := map[string][]string{
 90 | 		"darwin":  []string{"open"},
 91 | 		"freebsd": []string{"xdg-open"},
 92 | 		"linux":   []string{"xdg-open"},
 93 | 		"netbsd":  []string{"xdg-open"},
 94 | 		"openbsd": []string{"xdg-open"},
 95 | 		"windows": []string{"cmd", "/c", "start"},
 96 | 	}
 97 | 
 98 | 	if runtime.GOOS == "windows" {
 99 | 		// escape characters not allowed by cmd
100 | 		url = strings.Replace(url, "&", `^&`, -1)
101 | 	}
102 | 
103 | 	all := osCommand[runtime.GOOS]
104 | 	exe := all[0]
105 | 	args := all[1:]
106 | 
107 | 	buf := new(bytes.Buffer)
108 | 
109 | 	cmd := exec.Command(exe, append(args, url)...)
110 | 	cmd.Stdout = buf
111 | 	cmd.Stderr = buf
112 | 	err := cmd.Run()
113 | 
114 | 	if err != nil {
115 | 		return fmt.Errorf("%v: %s", err, buf.String())
116 | 	}
117 | 
118 | 	return nil
119 | }
120 | 
121 | const successBody = `<!DOCTYPE html>
122 | <html>
123 | 	<head>
124 | 		<title>OAuth2 Success</title>
125 | 		<meta charset="utf-8">
126 | 		<style>
127 | 			body { text-align: center; padding: 5%; font-family: sans-serif; }
128 | 			h1 { font-size: 20px; }
129 | 			p { font-size: 16px; color: #444; }
130 | 		</style>
131 | 	</head>
132 | 	<body>
133 | 		<h1>Code obtained, thank you!</h1>
134 | 		<p>
135 | 			You may now close this page and return to the application.
136 | 		</p>
137 | 	</body>
138 | </html>
139 | `
140 | 
141 | var _ Getter = Browser{}
142 | 


--------------------------------------------------------------------------------
/oauth2client/localapp.go:
--------------------------------------------------------------------------------
 1 | package oauth2client
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	"golang.org/x/oauth2"
 8 | 	"golang.org/x/oauth2/clientcredentials"
 9 | )
10 | 
11 | // LocalAppSource implements oauth2.TokenSource for
12 | // OAuth2 client apps that have the client app
13 | // credentials (Client ID and Secret) available
14 | // locally. The OAuth2 provider is accessed directly
15 | // using the OAuth2Config field value.
16 | //
17 | // If the OAuth2Config.Endpoint's TokenURL is set
18 | // but the AuthURL is empty, then it is assumed
19 | // that this is a two-legged ("client credentials")
20 | // OAuth2 configuration; i.e. bearer token.
21 | //
22 | // LocalAppSource instances can be ephemeral.
23 | type LocalAppSource struct {
24 | 	// OAuth2Config is the OAuth2 configuration.
25 | 	OAuth2Config *oauth2.Config
26 | 
27 | 	// AuthCodeGetter is how the auth code
28 | 	// is obtained. If not set, a default
29 | 	// oauth2client.Browser is used.
30 | 	AuthCodeGetter Getter
31 | }
32 | 
33 | // InitialToken obtains a token using s.OAuth2Config
34 | // and s.AuthCodeGetter (unless the configuration
35 | // is for a client credentials / "two-legged" flow).
36 | func (s LocalAppSource) InitialToken() (*oauth2.Token, error) {
37 | 	if s.OAuth2Config == nil {
38 | 		return nil, fmt.Errorf("missing OAuth2Config")
39 | 	}
40 | 
41 | 	// if this is a two-legged config ("client credentials" flow,
42 | 	// where the client bears the actual token, like a password,
43 | 	// without an intermediate app) configuration, then we can
44 | 	// just return that bearer token immediately
45 | 	if tlc := s.twoLeggedConfig(); tlc != nil {
46 | 		return tlc.Token(context.Background())
47 | 	}
48 | 
49 | 	if s.AuthCodeGetter == nil {
50 | 		s.AuthCodeGetter = Browser{}
51 | 	}
52 | 
53 | 	stateVal := State()
54 | 	authURL := s.OAuth2Config.AuthCodeURL(stateVal, oauth2.AccessTypeOffline)
55 | 
56 | 	code, err := s.AuthCodeGetter.Get(stateVal, authURL)
57 | 	if err != nil {
58 | 		return nil, fmt.Errorf("getting code via browser: %v", err)
59 | 	}
60 | 
61 | 	ctx := context.WithValue(context.Background(),
62 | 		oauth2.HTTPClient, httpClient)
63 | 
64 | 	return s.OAuth2Config.Exchange(ctx, code)
65 | }
66 | 
67 | // TokenSource returns a token source for s.
68 | func (s LocalAppSource) TokenSource(ctx context.Context, tkn *oauth2.Token) oauth2.TokenSource {
69 | 	if tlc := s.twoLeggedConfig(); tlc != nil {
70 | 		return tlc.TokenSource(ctx)
71 | 	}
72 | 	return s.OAuth2Config.TokenSource(ctx, tkn)
73 | }
74 | 
75 | // twoLeggedConfig returns a clientcredentials configuration if
76 | // this app source appears to be configured as one (i.e. with
77 | // bearer credentials, with a token URL but without an auth URL,
78 | // because the client credentials is the actual authentication).
79 | func (s LocalAppSource) twoLeggedConfig() *clientcredentials.Config {
80 | 	if s.OAuth2Config.Endpoint.TokenURL != "" &&
81 | 		s.OAuth2Config.Endpoint.AuthURL == "" {
82 | 		return &clientcredentials.Config{
83 | 			ClientID:     s.OAuth2Config.ClientID,
84 | 			ClientSecret: s.OAuth2Config.ClientSecret,
85 | 			TokenURL:     s.OAuth2Config.Endpoint.TokenURL,
86 | 			Scopes:       s.OAuth2Config.Scopes,
87 | 		}
88 | 	}
89 | 	return nil
90 | }
91 | 
92 | var _ App = LocalAppSource{}
93 | 


--------------------------------------------------------------------------------
/oauth2client/oauth2.go:
--------------------------------------------------------------------------------
 1 | package oauth2client
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	mathrand "math/rand"
 6 | 	"net/http"
 7 | 	"time"
 8 | 
 9 | 	"golang.org/x/oauth2"
10 | )
11 | 
12 | func init() {
13 | 	mathrand.Seed(time.Now().UnixNano())
14 | }
15 | 
16 | // Getter is a type that can get an OAuth2 auth code.
17 | // It must enforce that the state parameter of the
18 | // redirected request matches expectedStateVal.
19 | type Getter interface {
20 | 	Get(expectedStateVal, authCodeURL string) (code string, err error)
21 | }
22 | 
23 | // State returns a random string suitable as a state value.
24 | func State() string {
25 | 	return randString(14)
26 | }
27 | 
28 | // randString is not safe for cryptographic use.
29 | func randString(n int) string {
30 | 	const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
31 | 	b := make([]byte, n)
32 | 	for i := range b {
33 | 		b[i] = letterBytes[mathrand.Intn(len(letterBytes))]
34 | 	}
35 | 	return string(b)
36 | }
37 | 
38 | type (
39 | 	// OAuth2Info contains information for obtaining an auth code.
40 | 	OAuth2Info struct {
41 | 		StateValue  string
42 | 		AuthCodeURL string
43 | 	}
44 | 
45 | 	// App provides a way to get an initial OAuth2 token
46 | 	// as well as a continuing token source.
47 | 	App interface {
48 | 		InitialToken() (*oauth2.Token, error)
49 | 		TokenSource(context.Context, *oauth2.Token) oauth2.TokenSource
50 | 	}
51 | )
52 | 
53 | // httpClient is the HTTP client to use for OAuth2 requests.
54 | var httpClient = &http.Client{
55 | 	Timeout: 10 * time.Second,
56 | }
57 | 
58 | // DefaultRedirectURL is the default URL to
59 | // which to redirect clients after a code
60 | // has been obtained. Redirect URLs may
61 | // have to be registered with your OAuth2
62 | // provider.
63 | const DefaultRedirectURL = "http://localhost:8008/oauth2-redirect"
64 | 


--------------------------------------------------------------------------------
/oauth2client/oauth2proxy/cmd/oauth2proxy/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"log"
 6 | 	"net/http"
 7 | 
 8 | 	"github.com/BurntSushi/toml"
 9 | 	"github.com/mholt/timeliner/oauth2client/oauth2proxy"
10 | 	"golang.org/x/oauth2"
11 | )
12 | 
13 | func init() {
14 | 	flag.StringVar(&credentialsFile, "credentials", credentialsFile, "The path to the file containing the OAuth2 app credentials for each provider")
15 | 	flag.StringVar(&addr, "addr", addr, "The address to listen on")
16 | 	flag.StringVar(&basePath, "path", basePath, "The base path on which to serve the proxy endpoints")
17 | }
18 | 
19 | var (
20 | 	credentialsFile = "credentials.toml"
21 | 	addr            = ":7233"
22 | 	basePath        = "/oauth2"
23 | )
24 | 
25 | func main() {
26 | 	flag.Parse()
27 | 
28 | 	if credentialsFile == "" {
29 | 		log.Fatal("[FATAL] No credentials file specified (use -credentials)")
30 | 	}
31 | 	if addr == "" {
32 | 		log.Fatal("[FATAL] No address specified (use -addr)")
33 | 	}
34 | 
35 | 	// decode app credentials
36 | 	var creds oauth2Credentials
37 | 	md, err := toml.DecodeFile(credentialsFile, &creds)
38 | 	if err != nil {
39 | 		log.Fatalf("[FATAL] Decoding credentials file: %v", err)
40 | 	}
41 | 	if len(md.Undecoded()) > 0 {
42 | 		log.Fatalf("[FATAL] Unrecognized key(s) in credentials file: %+v", md.Undecoded())
43 | 	}
44 | 
45 | 	// convert them into oauth2.Configs (the structure of
46 | 	// oauth2.Config as TOML is too verbose for my taste)
47 | 	oauth2Configs := make(map[string]oauth2.Config)
48 | 	for id, prov := range creds.Providers {
49 | 		oauth2Configs[id] = oauth2.Config{
50 | 			ClientID:     prov.ClientID,
51 | 			ClientSecret: prov.ClientSecret,
52 | 			Endpoint: oauth2.Endpoint{
53 | 				AuthURL:  prov.AuthURL,
54 | 				TokenURL: prov.TokenURL,
55 | 			},
56 | 		}
57 | 		log.Println("Provider:", id)
58 | 	}
59 | 
60 | 	log.Println("Serving OAuth2 proxy on", addr)
61 | 
62 | 	p := oauth2proxy.New(basePath, oauth2Configs)
63 | 	http.ListenAndServe(addr, p)
64 | }
65 | 
66 | type oauth2Credentials struct {
67 | 	Providers map[string]oauth2ProviderConfig `toml:"providers"`
68 | }
69 | 
70 | type oauth2ProviderConfig struct {
71 | 	ClientID     string `toml:"client_id"`
72 | 	ClientSecret string `toml:"client_secret"`
73 | 	AuthURL      string `toml:"auth_url"`
74 | 	TokenURL     string `toml:"token_url"`
75 | }
76 | 


--------------------------------------------------------------------------------
/oauth2client/oauth2proxy/proxy.go:
--------------------------------------------------------------------------------
  1 | package oauth2proxy
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"io"
  6 | 	"io/ioutil"
  7 | 	"net/http"
  8 | 	"net/url"
  9 | 	"path"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/mholt/timeliner/oauth2client"
 13 | 	"golang.org/x/oauth2"
 14 | )
 15 | 
 16 | // New returns a new OAuth2 proxy that serves its endpoints
 17 | // under the given basePath and which replaces credentials
 18 | // and endpoints with those found in the configs given in
 19 | // the providers map.
 20 | //
 21 | // The map value does not use pointers, so that temporary
 22 | // manipulations of the value can occur without modifying
 23 | // the original template value.
 24 | func New(basePath string, providers map[string]oauth2.Config) http.Handler {
 25 | 	basePath = path.Join("/", basePath)
 26 | 
 27 | 	proxy := oauth2Proxy{providers: providers}
 28 | 
 29 | 	mux := http.NewServeMux()
 30 | 	mux.HandleFunc(path.Join(basePath, "auth-code-url"), proxy.handleAuthCodeURL)
 31 | 	mux.HandleFunc(path.Join(basePath, "proxy")+"/", proxy.handleOAuth2)
 32 | 
 33 | 	return mux
 34 | }
 35 | 
 36 | type oauth2Proxy struct {
 37 | 	providers map[string]oauth2.Config
 38 | }
 39 | 
 40 | func (proxy oauth2Proxy) handleAuthCodeURL(w http.ResponseWriter, r *http.Request) {
 41 | 	providerID := r.FormValue("provider")
 42 | 	redir := r.FormValue("redirect")
 43 | 	scopes := r.URL.Query()["scope"]
 44 | 
 45 | 	oauth2CfgCopy, ok := proxy.providers[providerID]
 46 | 	if !ok {
 47 | 		http.Error(w, "unknown service ID", http.StatusBadRequest)
 48 | 		return
 49 | 	}
 50 | 
 51 | 	// augment the template config with parameters specific to this
 52 | 	// request (this is why it's important that the configs aren't
 53 | 	// pointers; we should be mutating only copies here)
 54 | 	oauth2CfgCopy.Scopes = scopes
 55 | 	oauth2CfgCopy.RedirectURL = redir
 56 | 
 57 | 	stateVal := oauth2client.State()
 58 | 	url := oauth2CfgCopy.AuthCodeURL(stateVal, oauth2.AccessTypeOffline)
 59 | 
 60 | 	info := oauth2client.OAuth2Info{
 61 | 		StateValue:  stateVal,
 62 | 		AuthCodeURL: url,
 63 | 	}
 64 | 
 65 | 	json.NewEncoder(w).Encode(info)
 66 | }
 67 | 
 68 | func (proxy oauth2Proxy) handleOAuth2(w http.ResponseWriter, r *http.Request) {
 69 | 	// knead the URL into its two parts: the service
 70 | 	// ID and which endpoint to proxy to
 71 | 	// reqURL := strings.TrimPrefix(r.URL.Path, basePath+"/proxy")
 72 | 	// reqURL = path.Clean(strings.TrimPrefix(reqURL, "/"))
 73 | 
 74 | 	// we want the last two components of the path
 75 | 	urlParts := strings.Split(r.URL.Path, "/")
 76 | 	if len(urlParts) < 2 {
 77 | 		http.Error(w, "bad path length", http.StatusBadRequest)
 78 | 		return
 79 | 	}
 80 | 
 81 | 	providerID := urlParts[len(urlParts)-2]
 82 | 	whichEndpoint := urlParts[len(urlParts)-1]
 83 | 
 84 | 	// get the OAuth2 config matching the service ID
 85 | 	oauth2Config, ok := proxy.providers[providerID]
 86 | 	if !ok {
 87 | 		http.Error(w, "unknown service: "+providerID, http.StatusBadRequest)
 88 | 		return
 89 | 	}
 90 | 
 91 | 	// figure out which endpoint we'll use for upstream
 92 | 	var upstreamEndpoint string
 93 | 	switch whichEndpoint {
 94 | 	case "auth":
 95 | 		upstreamEndpoint = oauth2Config.Endpoint.AuthURL
 96 | 	case "token":
 97 | 		upstreamEndpoint = oauth2Config.Endpoint.TokenURL
 98 | 	}
 99 | 
100 | 	// read the body so we can replace values if necessary
101 | 	// (don't use r.ParseForm because we need to keep body
102 | 	// and query string distinct)
103 | 	reqBodyBytes, err := ioutil.ReadAll(r.Body) //http.MaxBytesReader(w, r.Body, 64*1024))
104 | 	if err != nil {
105 | 		http.Error(w, err.Error(), http.StatusBadRequest)
106 | 		return
107 | 	}
108 | 
109 | 	// if the request body is form-encoded, replace any
110 | 	// credential placeholders with the real credentials
111 | 	var upstreamBody io.Reader
112 | 	if strings.Contains(r.Header.Get("Content-Type"), "x-www-form-urlencoded") {
113 | 		bodyForm, err := url.ParseQuery(string(reqBodyBytes))
114 | 		if err != nil {
115 | 			http.Error(w, "error parsing request body", http.StatusBadRequest)
116 | 			return
117 | 		}
118 | 		replaceCredentials(bodyForm, oauth2Config)
119 | 		upstreamBody = strings.NewReader(bodyForm.Encode())
120 | 	}
121 | 
122 | 	// now do the same thing for the query string
123 | 	qs := r.URL.Query()
124 | 	replaceCredentials(qs, oauth2Config)
125 | 
126 | 	// make outgoing URL
127 | 	upstreamURL, err := url.Parse(upstreamEndpoint)
128 | 	if err != nil {
129 | 		http.Error(w, "bad upstream URL", http.StatusInternalServerError)
130 | 		return
131 | 	}
132 | 	upstreamURL.RawQuery = qs.Encode()
133 | 
134 | 	// set the real credentials -- this has to be done
135 | 	// carefully because apparently a lot of OAuth2
136 | 	// providers are broken (against RFC 6749), so
137 | 	// the downstream OAuth2 client lib must be sure
138 | 	// to set the credentials in the right place, and
139 | 	// we should be sure to mirror that behavior;
140 | 	// this means that even though the downstream may
141 | 	// not have the real client ID and secret, they
142 | 	// need to provide SOMETHING as bogus placeholder
143 | 	// values to signal to us where to put the real
144 | 	// credentials
145 | 	if r.Header.Get("Authorization") != "" {
146 | 		r.SetBasicAuth(oauth2Config.ClientID, oauth2Config.ClientSecret)
147 | 	}
148 | 
149 | 	// prepare the request to upstream
150 | 	upstream, err := http.NewRequest(r.Method, upstreamURL.String(), upstreamBody)
151 | 	if err != nil {
152 | 		http.Error(w, err.Error(), http.StatusBadRequest)
153 | 		return
154 | 	}
155 | 	upstream.Header = r.Header
156 | 	delete(upstream.Header, "Content-Length")
157 | 
158 | 	// perform the upstream request
159 | 	resp, err := http.DefaultClient.Do(upstream)
160 | 	if err != nil {
161 | 		http.Error(w, err.Error(), http.StatusBadGateway)
162 | 		return
163 | 	}
164 | 	defer resp.Body.Close()
165 | 
166 | 	// copy the upstream headers to the response downstream
167 | 	for key, vals := range resp.Header {
168 | 		for _, val := range vals {
169 | 			w.Header().Add(key, val)
170 | 		}
171 | 	}
172 | 
173 | 	// carry over the status code
174 | 	w.WriteHeader(resp.StatusCode)
175 | 
176 | 	// copy the response body downstream
177 | 	_, err = io.Copy(w, resp.Body)
178 | 	if err != nil {
179 | 		http.Error(w, "writing body: "+err.Error(), http.StatusBadGateway)
180 | 		return
181 | 	}
182 | }
183 | 
184 | func replaceCredentials(form url.Values, oauth2Config oauth2.Config) {
185 | 	if form.Get("client_id") != "" {
186 | 		form.Set("client_id", oauth2Config.ClientID)
187 | 	}
188 | 	if form.Get("client_secret") != "" {
189 | 		form.Set("client_secret", oauth2Config.ClientSecret)
190 | 	}
191 | }
192 | 


--------------------------------------------------------------------------------
/oauth2client/remoteapp.go:
--------------------------------------------------------------------------------
  1 | package oauth2client
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"net/http"
  8 | 	"net/url"
  9 | 	"strings"
 10 | 
 11 | 	"golang.org/x/oauth2"
 12 | )
 13 | 
 14 | // RemoteAppSource implements oauth2.TokenSource for
 15 | // OAuth2 client apps that have their credentials
 16 | // (Client ID and Secret, as well as endpoint info)
 17 | // stored remotely. Thus, this type obtains tokens
 18 | // through a remote proxy that presumably has the
 19 | // client app credentials, which it will replace
 20 | // before proxying to the provider.
 21 | //
 22 | // RemoteAppSource values can be ephemeral.
 23 | type RemoteAppSource struct {
 24 | 	// How to obtain the auth URL.
 25 | 	// Default: DirectAuthURLMode
 26 | 	AuthURLMode AuthURLMode
 27 | 
 28 | 	// The URL to the proxy server (its
 29 | 	// address + base path).
 30 | 	ProxyURL string
 31 | 
 32 | 	// The ID of the OAuth2 provider.
 33 | 	ProviderID string
 34 | 
 35 | 	// The scopes for which to obtain
 36 | 	// authorization.
 37 | 	Scopes []string
 38 | 
 39 | 	// The URL to redirect to to finish
 40 | 	// the ceremony.
 41 | 	RedirectURL string
 42 | 
 43 | 	// How the auth code is obtained.
 44 | 	// If not set, a default
 45 | 	// oauth2code.Browser is used.
 46 | 	AuthCodeGetter Getter
 47 | }
 48 | 
 49 | // InitialToken obtains an initial token using s.AuthCodeGetter.
 50 | func (s RemoteAppSource) InitialToken() (*oauth2.Token, error) {
 51 | 	if s.AuthCodeGetter == nil {
 52 | 		s.AuthCodeGetter = Browser{}
 53 | 	}
 54 | 	if s.AuthURLMode == "" {
 55 | 		s.AuthURLMode = DirectAuthURLMode
 56 | 	}
 57 | 
 58 | 	cfg := s.config()
 59 | 
 60 | 	// obtain a state value and auth URL
 61 | 	var stateVal, authURL string
 62 | 	var err error
 63 | 	switch s.AuthURLMode {
 64 | 	case DirectAuthURLMode:
 65 | 		stateVal, authURL, err = s.getDirectAuthURLFromProxy()
 66 | 	case ProxiedAuthURLMode:
 67 | 		stateVal, authURL, err = s.getProxiedAuthURL(cfg)
 68 | 	default:
 69 | 		return nil, fmt.Errorf("unknown AuthURLMode: %s", s.AuthURLMode)
 70 | 	}
 71 | 	if err != nil {
 72 | 		return nil, err
 73 | 	}
 74 | 
 75 | 	// now obtain the code
 76 | 	code, err := s.AuthCodeGetter.Get(stateVal, authURL)
 77 | 	if err != nil {
 78 | 		return nil, fmt.Errorf("getting code via browser: %v", err)
 79 | 	}
 80 | 
 81 | 	// and complete the ceremony
 82 | 	ctx := context.WithValue(context.Background(),
 83 | 		oauth2.HTTPClient, httpClient)
 84 | 
 85 | 	return cfg.Exchange(ctx, code)
 86 | }
 87 | 
 88 | // getDirectAuthURLFromProxy returns an auth URL that goes directly to the
 89 | // OAuth2 provider server, but it gets that URL by querying the proxy server
 90 | // for what it should be ("DirectAuthURLMode").
 91 | func (s RemoteAppSource) getDirectAuthURLFromProxy() (state string, authURL string, err error) {
 92 | 	redirURL := s.RedirectURL
 93 | 	if redirURL == "" {
 94 | 		redirURL = DefaultRedirectURL
 95 | 	}
 96 | 
 97 | 	v := url.Values{
 98 | 		"provider": {s.ProviderID},
 99 | 		"scope":    s.Scopes,
100 | 		"redirect": {redirURL},
101 | 	}
102 | 
103 | 	proxyURL := strings.TrimSuffix(s.ProxyURL, "/")
104 | 	resp, err := http.Get(proxyURL + "/auth-code-url?" + v.Encode())
105 | 	if err != nil {
106 | 		return "", "", err
107 | 	}
108 | 	defer resp.Body.Close()
109 | 
110 | 	if resp.StatusCode != http.StatusOK {
111 | 		return "", "", fmt.Errorf("requesting auth code URL from proxy: HTTP %d: %s",
112 | 			resp.StatusCode, resp.Status)
113 | 	}
114 | 
115 | 	var info OAuth2Info
116 | 	err = json.NewDecoder(resp.Body).Decode(&info)
117 | 	if err != nil {
118 | 		return "", "", err
119 | 	}
120 | 
121 | 	return info.StateValue, info.AuthCodeURL, nil
122 | }
123 | 
124 | // getProxiedAuthURL returns an auth URL that goes to the remote proxy ("ProxiedAuthURLMode").
125 | func (s RemoteAppSource) getProxiedAuthURL(cfg *oauth2.Config) (state string, authURL string, err error) {
126 | 	state = State()
127 | 	authURL = cfg.AuthCodeURL(state, oauth2.AccessTypeOffline)
128 | 	return
129 | }
130 | 
131 | // config builds an OAuth2 config from s.
132 | func (s RemoteAppSource) config() *oauth2.Config {
133 | 	redirURL := s.RedirectURL
134 | 	if redirURL == "" {
135 | 		redirURL = DefaultRedirectURL
136 | 	}
137 | 
138 | 	return &oauth2.Config{
139 | 		ClientID:     "placeholder",
140 | 		ClientSecret: "placeholder",
141 | 		RedirectURL:  redirURL,
142 | 		Scopes:       s.Scopes,
143 | 		Endpoint: oauth2.Endpoint{
144 | 			AuthURL:  s.ProxyURL + "/proxy/" + s.ProviderID + "/auth",
145 | 			TokenURL: s.ProxyURL + "/proxy/" + s.ProviderID + "/token",
146 | 		},
147 | 	}
148 | }
149 | 
150 | // TokenSource returns a token source for s.
151 | func (s RemoteAppSource) TokenSource(ctx context.Context, tkn *oauth2.Token) oauth2.TokenSource {
152 | 	return s.config().TokenSource(ctx, tkn)
153 | }
154 | 
155 | // AuthURLMode describes what kind of auth URL a
156 | // RemoteAppSource should obtain.
157 | type AuthURLMode string
158 | 
159 | const (
160 | 	// DirectAuthURLMode queries the remote proxy to get
161 | 	// an auth URL that goes directly to the OAuth2 provider
162 | 	// web page the user must go to in order to obtain
163 | 	// authorization. Although this mode incurs one extra
164 | 	// HTTP request (that is not part of the OAuth2 spec,
165 | 	// it is purely our own), it is perhaps more robust in
166 | 	// more environments, since the browser will access the
167 | 	// auth provider's site directly, meaning that any HTML
168 | 	// or JavaScript on the page that expects HTTPS or a
169 | 	// certain hostname will be able to function correctly.
170 | 	DirectAuthURLMode AuthURLMode = "direct"
171 | 
172 | 	// ProxiedAuthURLMode makes an auth URL that goes to
173 | 	// the remote proxy, not directly to the provider.
174 | 	// This is perhaps a "purer" approach than
175 | 	// DirectAuthURLMode, but it may not work if HTML or
176 | 	// JavaScript on the provider's auth page expects
177 | 	// a certain scheme or hostname in the page's URL.
178 | 	// This mode usually works when the proxy is running
179 | 	// over HTTPS, but this mode may break depending on
180 | 	// the provider, when the proxy uses HTTP (which
181 | 	// should only be in dev environments of course).
182 | 	//
183 | 	// For example, Google's OAuth2 page will try to set a
184 | 	// secure-context cookie using JavaScript, which fails
185 | 	// if the auth page is proxied through a plaintext HTTP
186 | 	// localhost endpoint, which is what we do during
187 | 	// development for convenience; the lack of HTTPS caused
188 | 	// the page to reload infinitely because, even though
189 | 	// the request was reverse-proxied, the JS on the page
190 | 	// expected HTTPS. (See my self-congratulatory tweet:
191 | 	// https://twitter.com/mholt6/status/1078518306045231104)
192 | 	// Using DirectAuthURLMode is the easiest way around
193 | 	// this problem.
194 | 	ProxiedAuthURLMode AuthURLMode = "proxied"
195 | )
196 | 
197 | var _ App = RemoteAppSource{}
198 | 


--------------------------------------------------------------------------------
/persons.go:
--------------------------------------------------------------------------------
 1 | package timeliner
 2 | 
 3 | import (
 4 | 	"database/sql"
 5 | 	"fmt"
 6 | )
 7 | 
 8 | // getPerson returns the person mapped to userID on service.
 9 | // If the person does not exist, it is created.
10 | func (t *Timeline) getPerson(dataSourceID, userID, name string) (Person, error) {
11 | 	// first, load the person
12 | 	var p Person
13 | 	err := t.db.QueryRow(`SELECT persons.id, persons.name
14 | 		FROM persons, person_identities
15 | 		WHERE person_identities.data_source_id=?
16 | 			AND person_identities.user_id=?
17 | 			AND persons.id = person_identities.person_id
18 | 		LIMIT 1`, dataSourceID, userID).Scan(&p.ID, &p.Name)
19 | 	if err == sql.ErrNoRows {
20 | 		// person does not exist; create this mapping - TODO: do in a transaction
21 | 		p = Person{Name: name}
22 | 		res, err := t.db.Exec(`INSERT INTO persons (name) VALUES (?)`, p.Name)
23 | 		if err != nil {
24 | 			return Person{}, fmt.Errorf("adding new person: %v", err)
25 | 		}
26 | 		p.ID, err = res.LastInsertId()
27 | 		if err != nil {
28 | 			return Person{}, fmt.Errorf("getting person ID: %v", err)
29 | 		}
30 | 		_, err = t.db.Exec(`INSERT OR IGNORE INTO person_identities
31 | 			(person_id, data_source_id, user_id) VALUES (?, ?, ?)`,
32 | 			p.ID, dataSourceID, userID)
33 | 		if err != nil {
34 | 			return Person{}, fmt.Errorf("adding new person identity mapping: %v", err)
35 | 		}
36 | 	} else if err != nil {
37 | 		return Person{}, fmt.Errorf("selecting person identity: %v", err)
38 | 	}
39 | 
40 | 	// now get all the person's identities
41 | 	rows, err := t.db.Query(`SELECT id, person_id, data_source_id, user_id
42 | 		FROM person_identities WHERE person_id=?`, p.ID)
43 | 	if err != nil {
44 | 		return Person{}, fmt.Errorf("selecting person's known identities: %v", err)
45 | 	}
46 | 	defer rows.Close()
47 | 	for rows.Next() {
48 | 		var ident PersonIdentity
49 | 		err := rows.Scan(&ident.ID, &ident.PersonID, &ident.DataSourceID, &ident.UserID)
50 | 		if err != nil {
51 | 			return Person{}, fmt.Errorf("loading person's identity: %v", err)
52 | 		}
53 | 		p.Identities = append(p.Identities, ident)
54 | 	}
55 | 	if err = rows.Err(); err != nil {
56 | 		return Person{}, fmt.Errorf("scanning identity rows: %v", err)
57 | 	}
58 | 
59 | 	return p, nil
60 | }
61 | 
62 | // Person represents a person.
63 | type Person struct {
64 | 	ID         int64
65 | 	Name       string
66 | 	Identities []PersonIdentity
67 | }
68 | 
69 | // PersonIdentity is a way to map a user ID on a service to a person.
70 | type PersonIdentity struct {
71 | 	ID           int64
72 | 	PersonID     string
73 | 	DataSourceID string
74 | 	UserID       string
75 | }
76 | 


--------------------------------------------------------------------------------
/ratelimit.go:
--------------------------------------------------------------------------------
 1 | package timeliner
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"time"
 6 | )
 7 | 
 8 | // RateLimit describes a rate limit.
 9 | type RateLimit struct {
10 | 	RequestsPerHour int
11 | 	BurstSize       int
12 | 
13 | 	ticker *time.Ticker
14 | 	token  chan struct{}
15 | }
16 | 
17 | // NewRateLimitedRoundTripper adds rate limiting to rt based on the rate
18 | // limiting policy registered by the data source associated with acc.
19 | func (acc Account) NewRateLimitedRoundTripper(rt http.RoundTripper) http.RoundTripper {
20 | 	rl, ok := acc.t.rateLimiters[acc.String()]
21 | 
22 | 	if !ok && acc.ds.RateLimit.RequestsPerHour > 0 {
23 | 		secondsBetweenReqs := 60.0 / (float64(acc.ds.RateLimit.RequestsPerHour) / 60.0)
24 | 		millisBetweenReqs := secondsBetweenReqs * 1000.0
25 | 		reqInterval := time.Duration(millisBetweenReqs) * time.Millisecond
26 | 		if reqInterval < minInterval {
27 | 			reqInterval = minInterval
28 | 		}
29 | 
30 | 		rl.ticker = time.NewTicker(reqInterval)
31 | 		rl.token = make(chan struct{}, rl.BurstSize)
32 | 
33 | 		for i := 0; i < cap(rl.token); i++ {
34 | 			rl.token <- struct{}{}
35 | 		}
36 | 		go func() {
37 | 			for range rl.ticker.C {
38 | 				rl.token <- struct{}{}
39 | 			}
40 | 		}()
41 | 
42 | 		acc.t.rateLimiters[acc.String()] = rl
43 | 	}
44 | 
45 | 	return rateLimitedRoundTripper{
46 | 		RoundTripper: rt,
47 | 		token:        rl.token,
48 | 	}
49 | }
50 | 
51 | type rateLimitedRoundTripper struct {
52 | 	http.RoundTripper
53 | 	token <-chan struct{}
54 | }
55 | 
56 | func (rt rateLimitedRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
57 | 	<-rt.token
58 | 	return rt.RoundTripper.RoundTrip(req)
59 | }
60 | 
61 | var rateLimiters = make(map[string]RateLimit)
62 | 
63 | const minInterval = 100 * time.Millisecond
64 | 


--------------------------------------------------------------------------------
/timeliner.go:
--------------------------------------------------------------------------------
  1 | // Timeliner - A personal data aggregation utility
  2 | // Copyright (C) 2019 Matthew Holt
  3 | //
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Affero General Public License as published
  6 | // by the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // This program is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | // GNU Affero General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Affero General Public License
 15 | // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 16 | 
 17 | // TODO: Apply license to all files
 18 | 
 19 | package timeliner
 20 | 
 21 | import (
 22 | 	"context"
 23 | 	"database/sql"
 24 | 	"fmt"
 25 | 	"io"
 26 | 	"log"
 27 | 	mathrand "math/rand"
 28 | 	"sync"
 29 | 	"time"
 30 | 
 31 | 	cuckoo "github.com/seiflotfy/cuckoofilter"
 32 | )
 33 | 
 34 | func init() {
 35 | 	mathrand.Seed(time.Now().UnixNano())
 36 | }
 37 | 
 38 | // Timeline represents an opened timeline repository.
 39 | // The zero value is NOT valid; use Open() to obtain
 40 | // a valid value.
 41 | type Timeline struct {
 42 | 	db           *sql.DB
 43 | 	repoDir      string
 44 | 	rateLimiters map[string]RateLimit
 45 | }
 46 | 
 47 | // Open creates/opens a timeline at the given
 48 | // repository directory. Timelines should always
 49 | // be Close()'d for a clean shutdown when done.
 50 | func Open(repo string) (*Timeline, error) {
 51 | 	db, err := openDB(repo)
 52 | 	if err != nil {
 53 | 		return nil, fmt.Errorf("opening database: %v", err)
 54 | 	}
 55 | 	return &Timeline{
 56 | 		db:           db,
 57 | 		repoDir:      repo,
 58 | 		rateLimiters: make(map[string]RateLimit),
 59 | 	}, nil
 60 | }
 61 | 
 62 | // Close frees up resources allocated from Open.
 63 | func (t *Timeline) Close() error {
 64 | 	for key, rl := range t.rateLimiters {
 65 | 		if rl.ticker != nil {
 66 | 			rl.ticker.Stop()
 67 | 			rl.ticker = nil
 68 | 		}
 69 | 		delete(t.rateLimiters, key)
 70 | 	}
 71 | 	if t.db != nil {
 72 | 		return t.db.Close()
 73 | 	}
 74 | 	return nil
 75 | }
 76 | 
 77 | type concurrentCuckoo struct {
 78 | 	*cuckoo.Filter
 79 | 	*sync.Mutex
 80 | }
 81 | 
 82 | // FakeCloser turns an io.Reader into an io.ReadCloser
 83 | // where the Close() method does nothing.
 84 | func FakeCloser(r io.Reader) io.ReadCloser {
 85 | 	return fakeCloser{r}
 86 | }
 87 | 
 88 | type fakeCloser struct {
 89 | 	io.Reader
 90 | }
 91 | 
 92 | // Close does nothing except satisfy io.Closer.
 93 | func (fc fakeCloser) Close() error { return nil }
 94 | 
 95 | // ctxKey is used for contexts, as recommended by
 96 | // https://golang.org/pkg/context/#WithValue. It
 97 | // is unexported so values stored by this package
 98 | // can only be accessed by this package.
 99 | type ctxKey string
100 | 
101 | // wrappedClientCtxKey is how the context value is accessed.
102 | var wrappedClientCtxKey ctxKey = "wrapped_client"
103 | 
104 | // CheckpointFn is a function that saves a checkpoint.
105 | type CheckpointFn func(checkpoint []byte) error
106 | 
107 | // Checkpoint saves a checkpoint for the processing associated
108 | // with the provided context. It overwrites any previous
109 | // checkpoint. Any errors are logged.
110 | func Checkpoint(ctx context.Context, checkpoint []byte) {
111 | 	wc, ok := ctx.Value(wrappedClientCtxKey).(*WrappedClient)
112 | 
113 | 	if !ok {
114 | 		log.Printf("[ERROR][%s/%s] Checkpoint function not available; got type %T (%#v)",
115 | 			wc.ds.ID, wc.acc.UserID, wc, wc)
116 | 		return
117 | 	}
118 | 
119 | 	chkpt, err := MarshalGob(checkpointWrapper{wc.commandParams, checkpoint})
120 | 	if err != nil {
121 | 		log.Printf("[ERROR][%s/%s] Encoding checkpoint wrapper: %v", wc.ds.ID, wc.acc.UserID, err)
122 | 		return
123 | 	}
124 | 
125 | 	_, err = wc.tl.db.Exec(`UPDATE accounts SET checkpoint=? WHERE id=?`, // TODO: LIMIT 1 (see https://github.com/mattn/go-sqlite3/pull/564)
126 | 		chkpt, wc.acc.ID)
127 | 	if err != nil {
128 | 		log.Printf("[ERROR][%s/%s] Checkpoint: %v", wc.ds.ID, wc.acc.UserID, err)
129 | 		return
130 | 	}
131 | }
132 | 
133 | // checkpointWrapper stores a provider's checkpoint along with the
134 | // parameters of the command that initiated the process; the checkpoint
135 | // will only be loaded and restored to the provider on next run if
136 | // the parameters match, because it doesn't make sense to restore a
137 | // process that has different, potentially conflicting, parameters,
138 | // such as timeframe.
139 | type checkpointWrapper struct {
140 | 	Params string
141 | 	Data   []byte
142 | }
143 | 
144 | // ProcessingOptions configures how item processing is carried out.
145 | type ProcessingOptions struct {
146 | 	Reprocess bool
147 | 	Prune     bool
148 | 	Integrity bool
149 | 	Timeframe Timeframe
150 | 	Merge     MergeOptions
151 | 	Verbose   bool
152 | }
153 | 
154 | // MergeOptions configures how items are merged. By
155 | // default, items are not merged; if an item with a
156 | // duplicate ID is encountered, it will be replaced
157 | // with the new item (see the "reprocess" flag).
158 | // Merging has to be explicitly enabled.
159 | //
160 | // Currently, the only way to perform a merge is to
161 | // enable "soft" merging: finding an item with the
162 | // same timestamp and either text data or filename.
163 | // Then, one of the item's IDs is updated to match
164 | // the other. These merge options configure how
165 | // the items are then combined.
166 | //
167 | // As it is possible and likely for both items to
168 | // have non-empty values for the same fields, these
169 | // "conflicts" must be resolved non-interactively.
170 | // By default, a merge conflict prefers existing
171 | // values (old item's field) over the new one, and
172 | // the new one only fills in missing values. (This
173 | // seems safest.) However, these merge options allow
174 | // you to customize that behavior and overwrite
175 | // existing values with the new item's fields (only
176 | // happens if new item's field is non-empty, i.e.
177 | // a merge will never delete existing data).
178 | type MergeOptions struct {
179 | 	// Enables "soft" merging.
180 | 	//
181 | 	// If true, an item may be merged if it is likely
182 | 	// to be the same as an existing item, even if the
183 | 	// item IDs are different. For example, if a
184 | 	// service has multiple ways of listing items, but
185 | 	// does not provide a consistent ID for the same
186 | 	// item across listings, a soft merge will allow the
187 | 	// processing to treat them as the same as long as
188 | 	// other fields match: timestamp, and either data text
189 | 	// or data filename.
190 | 	SoftMerge bool
191 | 
192 | 	// Overwrite existing (old) item's ID with the ID
193 | 	// provided by the current (new) item.
194 | 	PreferNewID bool
195 | 
196 | 	// Overwrite existing item's text data.
197 | 	PreferNewDataText bool
198 | 
199 | 	// Overwrite existing item's data file.
200 | 	PreferNewDataFile bool
201 | 
202 | 	// Overwrite existing item's metadata.
203 | 	PreferNewMetadata bool
204 | }
205 | 
206 | // ListingOptions specifies parameters for listing items
207 | // from a data source. Some data sources might not be
208 | // able to honor all fields.
209 | type ListingOptions struct {
210 | 	// A file from which to read the data.
211 | 	Filename string
212 | 
213 | 	// Time bounds on which data to retrieve.
214 | 	// The respective time and item ID fields
215 | 	// which are set must never conflict.
216 | 	Timeframe Timeframe
217 | 
218 | 	// A checkpoint from which to resume
219 | 	// item retrieval.
220 | 	Checkpoint []byte
221 | 
222 | 	// Enable verbose output (logs).
223 | 	Verbose bool
224 | }
225 | 


--------------------------------------------------------------------------------
/wrappedclient.go:
--------------------------------------------------------------------------------
  1 | package timeliner
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"os"
  9 | 	"sync"
 10 | 	"time"
 11 | 
 12 | 	cuckoo "github.com/seiflotfy/cuckoofilter"
 13 | )
 14 | 
 15 | // WrappedClient wraps a Client instance with unexported
 16 | // fields that contain necessary state for performing
 17 | // data collection operations. Do not craft this type
 18 | // manually; use Timeline.NewClient() to obtain one.
 19 | type WrappedClient struct {
 20 | 	Client
 21 | 	tl  *Timeline
 22 | 	acc Account
 23 | 	ds  DataSource
 24 | 
 25 | 	lastItemRowID     int64
 26 | 	lastItemTimestamp time.Time
 27 | 	lastItemMu        *sync.Mutex
 28 | 
 29 | 	// used with checkpoints; it only makes sense to resume a checkpoint
 30 | 	// if the process has the same operational parameters as before;
 31 | 	// some providers (like Google Photos) even return errors if you
 32 | 	// query a "next page" with different parameters
 33 | 	commandParams string
 34 | }
 35 | 
 36 | // GetLatest gets the most recent items from wc. It does not prune or
 37 | // reprocess; only meant for a quick pull (error will be returned if
 38 | // procOpt is not compatible). If there are no items pulled yet, all
 39 | // items will be pulled. If procOpt.Timeframe.Until is not nil, the
 40 | // latest only up to that timestamp will be pulled, and if until is
 41 | // after the latest item, no items will be pulled.
 42 | func (wc *WrappedClient) GetLatest(ctx context.Context, procOpt ProcessingOptions) error {
 43 | 	if ctx == nil {
 44 | 		ctx = context.Background()
 45 | 	}
 46 | 	ctx = context.WithValue(ctx, wrappedClientCtxKey, wc)
 47 | 
 48 | 	if procOpt.Reprocess || procOpt.Prune || procOpt.Integrity || procOpt.Timeframe.Since != nil {
 49 | 		return fmt.Errorf("get-latest does not support -reprocess, -prune, -integrity, or -start")
 50 | 	}
 51 | 
 52 | 	// get date and original ID of the most recent item for this
 53 | 	// account from the last successful run
 54 | 	var mostRecentTimestamp int64
 55 | 	var mostRecentOriginalID string
 56 | 	if wc.acc.lastItemID != nil {
 57 | 		err := wc.tl.db.QueryRow(`SELECT timestamp, original_id
 58 | 		FROM items WHERE id=? LIMIT 1`, *wc.acc.lastItemID).Scan(&mostRecentTimestamp, &mostRecentOriginalID)
 59 | 		if err != nil && err != sql.ErrNoRows {
 60 | 			return fmt.Errorf("getting most recent item: %v", err)
 61 | 		}
 62 | 	}
 63 | 
 64 | 	// constrain the pull to the recent timeframe
 65 | 	timeframe := Timeframe{Until: procOpt.Timeframe.Until}
 66 | 	if mostRecentTimestamp > 0 {
 67 | 		ts := time.Unix(mostRecentTimestamp, 0)
 68 | 		timeframe.Since = &ts
 69 | 		if timeframe.Until != nil && timeframe.Until.Before(ts) {
 70 | 			// most recent item is already after "until"/end date; nothing to do
 71 | 			return nil
 72 | 		}
 73 | 	}
 74 | 	if mostRecentOriginalID != "" {
 75 | 		timeframe.SinceItemID = &mostRecentOriginalID
 76 | 	}
 77 | 
 78 | 	checkpoint := wc.prepareCheckpoint(timeframe)
 79 | 
 80 | 	wg, ch := wc.beginProcessing(concurrentCuckoo{}, procOpt)
 81 | 
 82 | 	err := wc.Client.ListItems(ctx, ch, ListingOptions{
 83 | 		Timeframe:  timeframe,
 84 | 		Checkpoint: checkpoint,
 85 | 		Verbose:    procOpt.Verbose,
 86 | 	})
 87 | 	if err != nil {
 88 | 		return fmt.Errorf("getting items from service: %v", err)
 89 | 	}
 90 | 
 91 | 	// wait for processing to complete
 92 | 	wg.Wait()
 93 | 
 94 | 	err = wc.successCleanup()
 95 | 	if err != nil {
 96 | 		return fmt.Errorf("processing completed, but error cleaning up: %v", err)
 97 | 	}
 98 | 
 99 | 	return nil
100 | }
101 | 
102 | // GetAll gets all the items using wc. If procOpt.Reprocess is true, items that
103 | // are already in the timeline will be re-processed. If procOpt.Prune is true,
104 | // items that are not listed on the data source by wc will be removed
105 | // from the timeline at the end of the listing. If procOpt.Integrity is true,
106 | // all items that are listed by wc that exist in the timeline and which
107 | // consist of a data file will be opened and checked for integrity; if
108 | // the file has changed, it will be reprocessed.
109 | func (wc *WrappedClient) GetAll(ctx context.Context, procOpt ProcessingOptions) error {
110 | 	if wc.Client == nil {
111 | 		return fmt.Errorf("no client")
112 | 	}
113 | 	if ctx == nil {
114 | 		ctx = context.Background()
115 | 	}
116 | 	ctx = context.WithValue(ctx, wrappedClientCtxKey, wc)
117 | 
118 | 	var cc concurrentCuckoo
119 | 	if procOpt.Prune {
120 | 		cc.Filter = cuckoo.NewFilter(10000000) // 10mil = ~16 MB on 64-bit
121 | 		cc.Mutex = new(sync.Mutex)
122 | 	}
123 | 
124 | 	checkpoint := wc.prepareCheckpoint(procOpt.Timeframe)
125 | 
126 | 	wg, ch := wc.beginProcessing(cc, procOpt)
127 | 
128 | 	err := wc.Client.ListItems(ctx, ch, ListingOptions{
129 | 		Checkpoint: checkpoint,
130 | 		Timeframe:  procOpt.Timeframe,
131 | 		Verbose:    procOpt.Verbose,
132 | 	})
133 | 	if err != nil {
134 | 		return fmt.Errorf("getting items from service: %v", err)
135 | 	}
136 | 
137 | 	// wait for processing to complete
138 | 	wg.Wait()
139 | 
140 | 	err = wc.successCleanup()
141 | 	if err != nil {
142 | 		return fmt.Errorf("processing completed, but error cleaning up: %v", err)
143 | 	}
144 | 
145 | 	// commence prune, if requested
146 | 	if procOpt.Prune {
147 | 		err := wc.doPrune(cc)
148 | 		if err != nil {
149 | 			return fmt.Errorf("processing completed, but error pruning: %v", err)
150 | 		}
151 | 	}
152 | 
153 | 	return nil
154 | }
155 | 
156 | // prepareCheckpoint sets the current command parameters on wc for
157 | // checkpoints to be saved later on, and then returns the last
158 | // checkpoint data only if its parameters match the new/current ones.
159 | // This prevents trying to resume a process with different parameters
160 | // which can cause errors.
161 | func (wc *WrappedClient) prepareCheckpoint(tf Timeframe) []byte {
162 | 	wc.commandParams = tf.String()
163 | 	if wc.acc.cp == nil || wc.acc.cp.Params != wc.commandParams {
164 | 		return nil
165 | 	}
166 | 	return wc.acc.cp.Data
167 | }
168 | 
169 | func (wc *WrappedClient) successCleanup() error {
170 | 	// clear checkpoint
171 | 	_, err := wc.tl.db.Exec(`UPDATE accounts SET checkpoint=NULL WHERE id=?`, wc.acc.ID) // TODO: limit 1 (see https://github.com/mattn/go-sqlite3/pull/802)
172 | 	if err != nil {
173 | 		return fmt.Errorf("clearing checkpoint: %v", err)
174 | 	}
175 | 	wc.acc.checkpoint = nil
176 | 
177 | 	// update the last item ID, to advance the window for future get-latest operations
178 | 	wc.lastItemMu.Lock()
179 | 	lastItemID := wc.lastItemRowID
180 | 	wc.lastItemMu.Unlock()
181 | 	if lastItemID > 0 {
182 | 		_, err = wc.tl.db.Exec(`UPDATE accounts SET last_item_id=? WHERE id=?`, lastItemID, wc.acc.ID) // TODO: limit 1
183 | 		if err != nil {
184 | 			return fmt.Errorf("advancing most recent item ID: %v", err)
185 | 		}
186 | 	}
187 | 
188 | 	return nil
189 | }
190 | 
191 | // Import is like GetAll but for a locally-stored archive or export file that can
192 | // simply be opened and processed, rather than needing to run over a network. See
193 | // the godoc for GetAll. This is only for data sources that support Import.
194 | func (wc *WrappedClient) Import(ctx context.Context, filename string, procOpt ProcessingOptions) error {
195 | 	if wc.Client == nil {
196 | 		return fmt.Errorf("no client")
197 | 	}
198 | 
199 | 	var cc concurrentCuckoo
200 | 	if procOpt.Prune {
201 | 		cc.Filter = cuckoo.NewFilter(10000000) // 10mil = ~16 MB on 64-bit
202 | 		cc.Mutex = new(sync.Mutex)
203 | 	}
204 | 
205 | 	wg, ch := wc.beginProcessing(cc, procOpt)
206 | 
207 | 	err := wc.Client.ListItems(ctx, ch, ListingOptions{
208 | 		Filename:   filename,
209 | 		Checkpoint: wc.acc.checkpoint,
210 | 		Timeframe:  procOpt.Timeframe,
211 | 		Verbose:    procOpt.Verbose,
212 | 	})
213 | 	if err != nil {
214 | 		return fmt.Errorf("importing: %v", err)
215 | 	}
216 | 
217 | 	// wait for processing to complete
218 | 	wg.Wait()
219 | 
220 | 	err = wc.successCleanup()
221 | 	if err != nil {
222 | 		return fmt.Errorf("processing completed, but error cleaning up: %v", err)
223 | 	}
224 | 
225 | 	// commence prune, if requested
226 | 	if procOpt.Prune {
227 | 		err := wc.doPrune(cc)
228 | 		if err != nil {
229 | 			return fmt.Errorf("processing completed, but error pruning: %v", err)
230 | 		}
231 | 	}
232 | 
233 | 	return nil
234 | }
235 | 
236 | func (wc *WrappedClient) doPrune(cuckoo concurrentCuckoo) error {
237 | 	// absolutely do not allow a prune to happen if the account
238 | 	// has a checkpoint; this is because we don't store the cuckoo
239 | 	// filter with checkpoints, meaning that the list of items
240 | 	// that have been seen is INCOMPLETE, and pruning on that
241 | 	// would lead to data loss. TODO: Find a way to store the
242 | 	// cuckoo filter with a checkpoint...
243 | 	var ckpt []byte
244 | 	err := wc.tl.db.QueryRow(`SELECT checkpoint FROM accounts WHERE id=? LIMIT 1`,
245 | 		wc.acc.ID).Scan(&ckpt)
246 | 	if err != nil {
247 | 		return fmt.Errorf("querying checkpoint: %v", err)
248 | 	}
249 | 	if len(ckpt) > 0 {
250 | 		return fmt.Errorf("checkpoint exists; refusing to prune for fear of incomplete item listing")
251 | 	}
252 | 
253 | 	// deleting items can't happen while iterating the rows
254 | 	// since the database table locks; i.e. those two operations
255 | 	// are in conflict, so we can't do the delete until we
256 | 	// close the result rows; hence, we have to load each
257 | 	// item to delete into memory (sigh) and then delete after
258 | 	// the listing is complete
259 | 	itemsToDelete, err := wc.listItemsToDelete(cuckoo)
260 | 	if err != nil {
261 | 		return fmt.Errorf("listing items to delete: %v", err)
262 | 	}
263 | 
264 | 	for _, rowID := range itemsToDelete {
265 | 		err := wc.deleteItem(rowID)
266 | 		if err != nil {
267 | 			log.Printf("[ERROR][%s/%s] Deleting item: %v (item_id=%d)",
268 | 				wc.ds.ID, wc.acc.UserID, err, rowID)
269 | 		}
270 | 	}
271 | 
272 | 	return nil
273 | }
274 | 
275 | func (wc *WrappedClient) listItemsToDelete(cuckoo concurrentCuckoo) ([]int64, error) {
276 | 	rows, err := wc.tl.db.Query(`SELECT id, original_id FROM items WHERE account_id=?`, wc.acc.ID)
277 | 	if err != nil {
278 | 		return nil, fmt.Errorf("selecting all items from account: %v (account_id=%d)", err, wc.acc.ID)
279 | 	}
280 | 	defer rows.Close()
281 | 
282 | 	var itemsToDelete []int64
283 | 	for rows.Next() {
284 | 		var rowID int64
285 | 		var originalID string
286 | 		err := rows.Scan(&rowID, &originalID)
287 | 		if err != nil {
288 | 			return nil, fmt.Errorf("scanning item: %v", err)
289 | 		}
290 | 		if originalID == "" {
291 | 			continue
292 | 		}
293 | 		cuckoo.Lock()
294 | 		existsOnService := cuckoo.Lookup([]byte(originalID))
295 | 		cuckoo.Unlock()
296 | 		if !existsOnService {
297 | 			itemsToDelete = append(itemsToDelete, rowID)
298 | 		}
299 | 	}
300 | 	if err = rows.Err(); err != nil {
301 | 		return nil, fmt.Errorf("iterating item rows: %v", err)
302 | 	}
303 | 
304 | 	return itemsToDelete, nil
305 | }
306 | 
307 | func (wc *WrappedClient) deleteItem(rowID int64) error {
308 | 	// before deleting the row, find out whether this item
309 | 	// has a data file and is the only one referencing it
310 | 	var count int
311 | 	var dataFile string
312 | 	err := wc.tl.db.QueryRow(`SELECT COUNT(*), data_file FROM items
313 | 		WHERE data_file = (SELECT data_file FROM items
314 | 							WHERE id=? AND data_file IS NOT NULL
315 | 							AND data_file != "" LIMIT 1)`,
316 | 		rowID).Scan(&count, &dataFile)
317 | 	if err != nil {
318 | 		return fmt.Errorf("querying count of rows sharing data file: %v", err)
319 | 	}
320 | 
321 | 	_, err = wc.tl.db.Exec(`DELETE FROM items WHERE id=?`, rowID) // TODO: limit 1 (see https://github.com/mattn/go-sqlite3/pull/802)
322 | 	if err != nil {
323 | 		return fmt.Errorf("deleting item from DB: %v", err)
324 | 	}
325 | 
326 | 	if count == 1 {
327 | 		err := os.Remove(wc.tl.fullpath(dataFile))
328 | 		if err != nil {
329 | 			return fmt.Errorf("deleting associated data file from disk: %v", err)
330 | 		}
331 | 	}
332 | 
333 | 	return nil
334 | }
335 | 
336 | // DataSourceName returns the name of the data source wc was created from.
337 | func (wc *WrappedClient) DataSourceName() string { return wc.ds.Name }
338 | 
339 | // DataSourceID returns the ID of the data source wc was created from.
340 | func (wc *WrappedClient) DataSourceID() string { return wc.ds.ID }
341 | 
342 | // UserID returns the ID of the user associated with this client.
343 | func (wc *WrappedClient) UserID() string { return wc.acc.UserID }
344 | 


--------------------------------------------------------------------------------