├── .gitignore ├── README.md ├── drive.py ├── gitdriver.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | gd.conf 2 | venv 3 | *.pyc 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Synopsis 2 | 3 | gitdriver.py [-h] [--config CONFIG] [--text] [--html] docid 4 | 5 | ## Options 6 | 7 | - `--config CONFIG`, `-f CONFIG` -- path to configuration file 8 | - `--text`, `-T` -- fetch plain text content (Look out for BOM) 9 | - `--html`, `-H` -- fetch HTML content 10 | - `--mime-type` -- specify arbitrary mime type 11 | 12 | ## Example usage: 13 | 14 | $ python gitdriver.py 1j6Ygv0_this_is_a_fake_document_id_a8Q66mvt4 15 | Create repository "Untitled" 16 | Initialized empty Git repository in /home/lars/projects/gitdriver/Untitled/.git/ 17 | [master (root-commit) 27baec9] revision from 2013-01-08T21:57:38.837Z 18 | 1 file changed, 1 insertion(+) 19 | create mode 100644 content 20 | [master 132175a] revision from 2013-01-08T21:57:45.800Z 21 | 1 file changed, 1 insertion(+), 1 deletion(-) 22 | [master eb2302c] revision from 2013-01-09T01:47:29.593Z 23 | 1 file changed, 5 insertions(+), 1 deletion(-) 24 | $ ls Untiled 25 | content 26 | $ cd Untitled 27 | $ git log --oneline 28 | d41ad6e revision from 2013-01-09T01:47:29.593Z 29 | 8d3e3ec revision from 2013-01-08T21:57:45.800Z 30 | ccc0bdd revision from 2013-01-08T21:57:38.837Z 31 | 32 | ## Google setup 33 | 34 | You will need to create an OAuth client id and secret for use with 35 | this application, the Drive API [Python quickstart][] has links to the 36 | necessary steps. 37 | 38 | [python quickstart]: https://developers.google.com/drive/v3/web/quickstart/python 39 | 40 | ## Configuration 41 | 42 | In order to make this go you will need to create file named `gd.conf` 43 | where the code can find it (typically the directory in which you're 44 | running the code, but you can also use the `-f` command line option to 45 | specify an alternate location). 46 | 47 | The file is a simple YAML document that should look like this: 48 | 49 | googledrive: 50 | client id: YOUR_CLIENT_ID 51 | client secret: YOUR_CLIENT_SECRET 52 | 53 | Where `YOUR_CLIENT_ID` and `YOUR_CLIENT_SECRET` are replaced with the 54 | appropriate values from Google that you established in the previous 55 | step. 56 | 57 | -------------------------------------------------------------------------------- /drive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import urllib 7 | import yaml 8 | import json 9 | import requests 10 | 11 | OAUTH_URI='https://accounts.google.com/o/oauth2' 12 | VALIDATE_URI='https://www.googleapis.com/oauth2/v1/tokeninfo' 13 | DRIVE_URI='https://www.googleapis.com/drive/v2' 14 | 15 | OAUTH_SCOPES = [ 16 | 'https://www.googleapis.com/auth/userinfo.email', 17 | 'https://www.googleapis.com/auth/userinfo.profile', 18 | ] 19 | 20 | DRIVE_RW_SCOPE = 'https://www.googleapis.com/auth/drive' 21 | DRIVE_RO_SCOPE = 'https://www.googleapis.com/auth/drive.readonly' 22 | 23 | REDIRECT_URI='urn:ietf:wg:oauth:2.0:oob' 24 | 25 | class GoogleDrive(object): 26 | def __init__(self, 27 | client_id, 28 | client_secret, 29 | credentials=None, 30 | scopes=None): 31 | 32 | self.client_id = client_id 33 | self.client_secret = client_secret 34 | self.scopes = OAUTH_SCOPES 35 | self.session = requests.Session() 36 | self.token = None 37 | 38 | if scopes is not None: 39 | self.scopes.extend(scopes) 40 | 41 | if credentials is None: 42 | credentials = os.path.join(os.environ['HOME'], '.googledrive') 43 | 44 | self.credentials = credentials 45 | 46 | def authenticate(self): 47 | '''Establish Google credentials. This will load stored credentials 48 | and validate them, and it will call self.login() if stored 49 | credentials are unavailable or fail to validate.''' 50 | 51 | self.load_credentials() 52 | 53 | if self.token is None: 54 | self.login() 55 | else: 56 | try: 57 | # Always refresh the token. This is a dirty hack to avoid 58 | # doing anything more complicated. 59 | self.refresh() 60 | self.validate() 61 | except ValueError: 62 | self.login() 63 | 64 | # Add an Authorization header to all requests made through 65 | # our requests.Session object. 66 | self.session.headers.update({ 67 | 'Authorization': 'Bearer %(access_token)s' % self.token 68 | }) 69 | 70 | def refresh(self): 71 | '''Use a refresh_token to refresh the access_token. See 72 | https://developers.google.com/drive/about-auth''' 73 | 74 | if not 'refresh_token' in self.token: 75 | raise ValueError('no refresh token') 76 | 77 | r = self.session.post('%s/token' % OAUTH_URI, { 78 | 'client_id': self.client_id, 79 | 'client_secret': self.client_secret, 80 | 'refresh_token': self.token['refresh_token'], 81 | 'grant_type': 'refresh_token'}) 82 | 83 | if not r: 84 | raise ValueError('failed to refresh token') 85 | 86 | self.token['access_token'] = r.json()['access_token'] 87 | self.store_credentials() 88 | 89 | def login(self): 90 | '''Perform OAuth authentication.''' 91 | 92 | params = { 93 | 'client_id': self.client_id, 94 | 'scope': ' '.join(OAUTH_SCOPES), 95 | 'redirect_uri': REDIRECT_URI, 96 | 'access_type': 'offline', 97 | 'response_type': 'code', 98 | } 99 | 100 | url = '%s?%s' % ('%s/auth' % OAUTH_URI, urllib.urlencode(params)) 101 | 102 | print 'Point your browser at the following URL and then ' 103 | print 'enter the authorization code at the prompt:' 104 | print 105 | print url 106 | print 107 | code = raw_input('Enter code: ') 108 | self.code = code 109 | r = requests.post('%s/token' % OAUTH_URI, { 110 | 'code': code, 111 | 'client_id': self.client_id, 112 | 'client_secret': self.client_secret, 113 | 'redirect_uri': REDIRECT_URI, 114 | 'grant_type': 'authorization_code', 115 | }) 116 | 117 | if not r: 118 | raise ValueError('failed to authenticate') 119 | 120 | self.token = r.json() 121 | self.store_credentials() 122 | 123 | def store_credentials(self): 124 | '''Write credentials to file.''' 125 | with open(self.credentials, 'w') as fd: 126 | fd.write(yaml.safe_dump(self.token, encoding='utf-8', 127 | default_flow_style=False)) 128 | 129 | def load_credentials(self): 130 | '''Read credentials from file.''' 131 | try: 132 | with open(self.credentials) as fd: 133 | self.token = yaml.load(fd) 134 | except IOError: 135 | pass 136 | 137 | def validate(self): 138 | '''Validate token.''' 139 | 140 | r = requests.get('%s?access_token=%s' % ( 141 | VALIDATE_URI, self.token['access_token'] 142 | )) 143 | 144 | self._validate_response = r 145 | 146 | if not r: 147 | raise ValueError('failed to validate') 148 | 149 | def files(self): 150 | '''Return an iterator over the files in Google Drive.''' 151 | 152 | r = self.session.get('%s/files' % DRIVE_URI).json() 153 | 154 | for fspec in r['items']: 155 | yield fspec 156 | 157 | def get_file_metadata(self, fid): 158 | '''Return the file metadata for a file identified by its ID.''' 159 | 160 | return self.session.get('%s/files/%s' % (DRIVE_URI, fid)).json() 161 | 162 | def revisions(self, fid): 163 | '''Return an iterator over the revisions of a file 164 | identified by its ID.''' 165 | 166 | r = self.session.get('%s/files/%s/revisions' % ( 167 | DRIVE_URI, fid)).json() 168 | 169 | for rev in r['items']: 170 | yield rev 171 | 172 | if __name__ == '__main__': 173 | cfg = yaml.load(open('gd.conf')) 174 | gd = GoogleDrive( 175 | client_id=cfg['googledrive']['client id'], 176 | client_secret=cfg['googledrive']['client secret'], 177 | scopes=[DRIVE_RW_SCOPE], 178 | ) 179 | 180 | gd.authenticate() 181 | 182 | -------------------------------------------------------------------------------- /gitdriver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import subprocess 7 | import yaml 8 | 9 | from drive import GoogleDrive, DRIVE_RW_SCOPE 10 | 11 | def parse_args(): 12 | p = argparse.ArgumentParser() 13 | p.add_argument('--config', '-f', default='gd.conf') 14 | p.add_argument('--text', '-T', action='store_const', const='text/plain', 15 | dest='mime_type') 16 | p.add_argument('--html', '-H', action='store_const', const='text/html', 17 | dest='mime_type') 18 | p.add_argument('--mime-type', dest='mime_type') 19 | p.add_argument('--raw', '-R', action='store_true', 20 | help='Download original document if possible.') 21 | p.add_argument('docid') 22 | 23 | return p.parse_args() 24 | 25 | def main(): 26 | opts = parse_args() 27 | if not opts.mime_type: 28 | print "Exactly one mime-type must be given!" 29 | exit(1) 30 | cfg = yaml.load(open(opts.config)) 31 | gd = GoogleDrive( 32 | client_id=cfg['googledrive']['client id'], 33 | client_secret=cfg['googledrive']['client secret'], 34 | scopes=[DRIVE_RW_SCOPE], 35 | ) 36 | 37 | # Establish our credentials. 38 | gd.authenticate() 39 | 40 | # Get information about the specified file. This will throw 41 | # an exception if the file does not exist. 42 | md = gd.get_file_metadata(opts.docid) 43 | 44 | # Initialize the git repository. 45 | print 'Create repository "%(title)s"' % md 46 | subprocess.call(['git','init',md['title']]) 47 | os.chdir(md['title']) 48 | 49 | # Iterate over the revisions (from oldest to newest). 50 | for rev in gd.revisions(opts.docid): 51 | with open('content', 'w') as fd: 52 | if 'exportLinks' in rev and not opts.raw: 53 | # If the file provides an 'exportLinks' dictionary, 54 | # download the requested MIME type. 55 | r = gd.session.get(rev['exportLinks'][opts.mime_type]) 56 | elif 'downloadUrl' in rev: 57 | # Otherwise, if there is a downloadUrl, use that. 58 | r = gd.session.get(rev['downloadUrl']) 59 | else: 60 | raise KeyError('unable to download revision') 61 | 62 | # Write file content into local file. 63 | for chunk in r.iter_content(): 64 | fd.write(chunk) 65 | 66 | # Commit changes to repository. 67 | subprocess.call(['git', 'add', 'content']) 68 | subprocess.call(['git', 'commit', '-m', 69 | 'revision from %s' % rev['modifiedDate']]) 70 | 71 | if __name__ == '__main__': 72 | main() 73 | 74 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML==3.11 2 | requests==2.9.1 3 | wheel==0.24.0 4 | --------------------------------------------------------------------------------