├── airscraper ├── __init__.py └── airscraper.py ├── requirements.txt ├── tests └── test_airscraper.py ├── setup.py ├── LICENSE ├── .gitignore ├── README.md └── notebook └── Airtable Scraping CSV.ipynb /airscraper/__init__.py: -------------------------------------------------------------------------------- 1 | from .airscraper import * 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.9.1 2 | pandas==1.1.1 3 | requests -------------------------------------------------------------------------------- /tests/test_airscraper.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from airscraper import AirScraper 3 | 4 | VIEW_PROTECTED = "https://airtable.com/shr5nH7C01vm95E1Z" 5 | VIEW_PASS = "123456" 6 | VIEW_UNPROTECTED = "https://airtable.com/shrSodX4SH7WDXeBS" 7 | 8 | @pytest.fixture 9 | def unprotected_view() -> AirScraper: 10 | return AirScraper(VIEW_UNPROTECTED) 11 | 12 | @pytest.fixture 13 | def protected_view_with_pass() -> AirScraper: 14 | return AirScraper(VIEW_PROTECTED, password=VIEW_PASS) 15 | 16 | def test_protected_no_pass_conn_error(): 17 | with pytest.raises(ConnectionError): 18 | AirScraper(VIEW_PROTECTED) 19 | 20 | def test_protected_pass_verification(protected_view_with_pass:AirScraper): 21 | assert protected_view_with_pass.is_protected == True 22 | 23 | def test_unprotected_pass_verification(unprotected_view:AirScraper): 24 | assert unprotected_view.is_protected == False 25 | 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name='airscraper', 8 | version='0.1.4', 9 | author="Aditya Rachman Putra", 10 | author_email="adityarputra@gmail.com", 11 | description="Airtable Download CSV helper", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/banditelol/airscraper", 15 | packages=setuptools.find_packages(), 16 | 17 | entry_points ={ 18 | 'console_scripts': [ 19 | 'airscraper = airscraper.airscraper:main' 20 | ] 21 | }, 22 | classifiers=[ 23 | "Programming Language :: Python :: 3", 24 | "License :: OSI Approved :: MIT License", 25 | "Operating System :: OS Independent", 26 | ], 27 | install_requires=[ 28 | 'beautifulsoup4', 29 | 'pandas', 30 | ] 31 | 32 | ) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Aditya Rachman Putra 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Airscraper 2 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/banditelol/airscraper/blob/master/notebook/Airtable%20Scraping%20CSV.ipynb) 3 | [![PyPI version](https://badge.fury.io/py/airscraper.svg)](https://badge.fury.io/py/airscraper) 4 | 5 | A simple scraper to download csv from any airtable shared view programatically, think of it as a programatic way of downloading csv from airtable shared view. 6 | Use it if: 7 | - You want to download a shared view periodically 8 | - You don't mind the shared view to be accessed basically without authorization 9 | 10 | ## Requirements 11 | Because its a simple scraper, basically only beautifulsoup is needed 12 | - BeautifulSoup4 13 | - Pandas 14 | 15 | ## Installation 16 | 17 | ### Using pip (Recommended) 18 | 19 | `pip install airscraper` 20 | 21 | ### Build From Source 22 | - Install build dependencies: 23 | ``` Bash 24 | pip install --upgrade pip setuptools wheel 25 | pip install tqdm 26 | pip install --user --upgrade twine 27 | ``` 28 | - Build the Package 29 | - `python setup.py bdist_wheel` 30 | - Install the built Package 31 | - `pip install --upgrade dist/airscraper-0.1-py3-none-any.whl ` 32 | - Use it without adding python in front of it 33 | - `airscraper [url]` 34 | 35 | ### Direct Execution (Testing Purpose) 36 | - Clone this project 37 | - Install the requirements 38 | - `pip install -r requirements.txt` 39 | - run the code 40 | - `python airscraper/airscraper.py [url]` 41 | 42 | ## Usage 43 | 44 | Create a [shared view link](https://support.airtable.com/hc/en-us/articles/205752117-Creating-a-base-share-link-or-a-view-share-link#viewsharelink) and use that link to download the shared view into csv. All `[url]` mentioned in the examples are referring to the shared view link you get from this step. 45 | 46 | ### As CLI 47 | 48 | ``` Bash 49 | # Print Result to Terminal 50 | python airscraper/airscraper.py [url] 51 | 52 | # Pipe the result to csv file 53 | python airscraper/airscraper.py [url] > [filename].csv 54 | 55 | ``` 56 | 57 | ### As Python Package 58 | 59 | ``` Python 60 | from airscraper import AirScraper 61 | 62 | client = AirScraper([url]) 63 | data = client.get_table().text 64 | 65 | # print the result 66 | print(data) 67 | 68 | # save as file 69 | with open('data.csv','w') as f: 70 | f.write(data) 71 | 72 | # use it with pandas 73 | from io import StringIO 74 | import pandas as pd 75 | 76 | df = pd.read_csv(StringIO(data), sep=',') 77 | df.head() 78 | ``` 79 | 80 | ## Help 81 | ``` 82 | usage: airscraper [-h] [-l LOCALE] [-tz TIMEZONE] view_url 83 | 84 | Download CSV from Airtable Shared View Link, You can pass the result to file using 85 | '> name.csv' 86 | 87 | positional arguments: 88 | view_url url generated from sharing view using link in airtable 89 | 90 | optional arguments: 91 | -h, --help show this help message and exit 92 | -l LOCALE, --locale LOCALE 93 | Your locale, default to 'en' 94 | -tz TIMEZONE, --timezone TIMEZONE 95 | Your timezone, use URL encoded string, default to 96 | 'Asia/Jakarta' 97 | ``` 98 | 99 | ## What's next 100 | Currently I'm thinking of several things in mind: 101 | - ✅ Making this installed package 102 | - Adds accessibility to use it in FaaS Platform (most use case I could thought of are related to this) 103 | - ✅ Create a proper package that can be imported (so I could use it in my ETL script) 104 | - ✅ Fill in LICENSE and setup.py, (to be honest I have no idea yet what to put into it) 105 | - It turns out there are a lot of resources [out there](https://dzone.com/articles/executable-package-pip-install) if you know what to look for :) 106 | 107 | ## Contributing 108 | If you have similar problem or have any idea to improve this package please let me know in the issues or just hit me up on twitter [@BanditelolRP](https://twitter.com/banditelolRP) 109 | 110 | ### Development 111 | 112 | If you're going to try to develop it yourself, here's my overall workflow 113 | 114 | **1. Create a virtual environment** 115 | 116 | I usually used `venv` on python 3.8 to create a new virtualenvironment 117 | 118 | ```bash 119 | python -m venv venv 120 | # and activate the environment 121 | source venv/bin/activate 122 | ``` 123 | 124 | **2. Create a virtual environment** 125 | 126 | Install necessary requirements and install the package for development using editable 127 | 128 | ```bash 129 | pip install wheels pytest -q 130 | pip install -r requirements.txt 131 | pip install -e . 132 | ``` 133 | 134 | **3. Play around with the code** 135 | 136 | You can browse the notebook for explanation on how it works and some example use case, and I really appreciate helps in documentation and testing. Have fun! 137 | -------------------------------------------------------------------------------- /airscraper/airscraper.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import argparse 4 | import json 5 | import pandas as pd 6 | from io import StringIO 7 | from bs4 import BeautifulSoup as bs 8 | 9 | # TODO: Create Docstring for the class and each methods 10 | class AirScraper: 11 | def __init__(self, url: str, sess: requests.Session = None, password: str = None, locale: str = 'en', tz=r'Asia%2FJakarta'): 12 | self.url = url 13 | self.locale = locale 14 | self.tz = tz 15 | self.params = {} 16 | if sess is None: 17 | self.sess = requests.Session() 18 | else: 19 | self.sess = sess 20 | self.password = password 21 | self.page = None 22 | self._is_passwd_protected() 23 | if self.is_protected: 24 | self.login() 25 | self.update_params() 26 | 27 | def _get_shareId(self): 28 | try: 29 | return re.search(r"(shr[^/]+)", self.url).group(1) 30 | except: 31 | raise ValueError( 32 | "malformed url, expecting url with share ID (shr*)") 33 | 34 | def _is_passwd_protected(self) -> None: 35 | try: 36 | res = self.sess.get(self.url) 37 | if res.status_code == 200: 38 | self.is_protected = "passwordProtectedShareFormContainer" in res.text 39 | else: 40 | res.raise_for_status() 41 | except: 42 | raise 43 | 44 | def login(self) -> None: 45 | res = self.sess.get(self.url) 46 | shareId = self._get_shareId() 47 | csrf = re.search(r'csrfToken":"([^"]+)', res.text).group(1) 48 | data = { 49 | "shareId": shareId, 50 | "OriginalUrl": "/"+shareId, 51 | "_csrf": csrf, 52 | "password": self.password 53 | } 54 | self.page = self.sess.post( 55 | f"https://airtable.com/{shareId}/submitPassword", data=data) 56 | 57 | def update_params(self) -> None: 58 | # TODO: accessPolicy contains Expiry, can be used to better cache the result 59 | if self.page is None: 60 | self.page = self.sess.get(self.url) 61 | 62 | html = bs(self.page.text, features='html.parser') 63 | try: 64 | script = html.title.find_next('script') 65 | self.view_id = re.search( 66 | r"(viw[a-zA-Z0-9]+)", str(script)).group(1) 67 | access_policy = re.search( 68 | r"accessPolicy=([a-zA-Z0-9%*\-.,]+)", str(script)).group(1) 69 | app_id = re.search( 70 | r"\"x-airtable-application-id\":\"(app[a-zA-Z0-9]+)", str(script)).group(1) 71 | self.params = {"x-time-zone": self.tz, "x-user-locale": self.locale, 72 | "x-airtable-application-id": app_id, "accessPolicy": access_policy} 73 | except: 74 | raise ConnectionError( 75 | "Unauthorized Access, please try again by providing password, i.e. Airscraper(url,password=password)") 76 | 77 | def get_csv(self) -> str: 78 | self.csv_url = f"https://airtable.com/v0.3/view/{self.view_id}/downloadCsv?" 79 | # TODO: use urllib parsing because for some reason params cant be passed to `get` 80 | for (k, v) in self.params.items(): 81 | self.csv_url += k+"="+v+"&" 82 | self.csv_url = self.csv_url[:-1] 83 | r = self.sess.get(self.csv_url) 84 | r.encoding = "utf-8" 85 | # Remove weird empty character (\ufeff) in the beginning of csv 86 | if "\ufeff" in r.text: 87 | return r.text.replace("\ufeff","") 88 | else: 89 | return r.text 90 | 91 | def get_table(self): 92 | """Alias for get_csv 93 | 94 | Returns: 95 | str: string containing comma separated value of the table 96 | """ 97 | return self.get_csv() 98 | 99 | def get_df(self) -> pd.DataFrame: 100 | return pd.read_csv(StringIO(self.get_csv())) 101 | 102 | def get_json(self, orient="index", indent=2, indexcolumn=None) -> str: 103 | """print the data into json format 104 | 105 | Args: 106 | orient (str, optional): orient of the json, similar to how pandas orient works. Defaults to "index". 107 | indent (int, optional): indentation space for pretty printing. Defaults to 2. 108 | indexcolumn (str, optional): name of column to be used as index, works if the indexcolumn contains purely unique values. Defaults to None. 109 | 110 | Returns: 111 | str: _description_ 112 | """ 113 | df = self.get_df() 114 | if indexcolumn: 115 | df = df.set_index(indexcolumn) 116 | return json.dumps(df.to_dict(orient), indent=indent) 117 | 118 | 119 | def main(): 120 | parser = argparse.ArgumentParser( 121 | description="Download CSV from Airtable Shared View Link, You can pass the result to file using '> name.csv'", prog="airscraper") 122 | 123 | parser.add_argument( 124 | 'view_url', help="url generated from sharing view using link in airtable") 125 | parser.add_argument('-l', '--locale', default='en', 126 | help="Your locale, default to 'en'") 127 | parser.add_argument('-j', '--json', action="store_true", 128 | help="Should it return JSON (CSV by default), default to False") 129 | parser.add_argument('-id', '--indexcolumn', default=None, 130 | help="when outputing json, this will be column name that will be set to index, default to None") 131 | parser.add_argument('-tz', '--timezone', default=r'Asia%2FJakarta', 132 | help="Your timezone, use URL encoded string, default to 'Asia/Jakarta'") 133 | parser.add_argument( 134 | '-p', '--password', help="Fill with shared link password, not your airtable password") 135 | args = parser.parse_args() 136 | 137 | if not args.json and args.indexcolumn: 138 | print(f"Index {args.indexcolumn} passed, but will be ignored because --json flag not used") 139 | 140 | client = AirScraper(args.view_url, locale=args.locale, tz=args.timezone, password=args.password) 141 | if args.json: 142 | print(client.get_json(indexcolumn=args.indexcolumn)) 143 | else: 144 | print(client.get_csv()) 145 | 146 | 147 | if __name__ == "__main__": 148 | main() 149 | -------------------------------------------------------------------------------- /notebook/Airtable Scraping CSV.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Airtable Scraping\n", 8 | "\n", 9 | "## Requirements\n", 10 | "\n", 11 | "For scraping, I don't want to use unnecessary package, so basically the only requirement to be installed is `beautifulsoup4`, so just install it and you're ready to go!" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 72, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Collecting package metadata (current_repodata.json): ...working... done\n", 24 | "Solving environment: ...working... done\n", 25 | "\n", 26 | "## Package Plan ##\n", 27 | "\n", 28 | " environment location: /home/banditelol/miniconda3\n", 29 | "\n", 30 | " added / updated specs:\n", 31 | " - beautifulsoup4\n", 32 | " - pandas\n", 33 | "\n", 34 | "\n", 35 | "The following packages will be downloaded:\n", 36 | "\n", 37 | " package | build\n", 38 | " ---------------------------|-----------------\n", 39 | " beautifulsoup4-4.9.1 | py38_0 171 KB\n", 40 | " conda-4.8.4 | py38_0 2.8 MB\n", 41 | " ------------------------------------------------------------\n", 42 | " Total: 3.0 MB\n", 43 | "\n", 44 | "The following packages will be UPDATED:\n", 45 | "\n", 46 | " ca-certificates conda-forge::ca-certificates-2020.6.2~ --> pkgs/main::ca-certificates-2020.7.22-0\n", 47 | "\n", 48 | "The following packages will be SUPERSEDED by a higher-priority channel:\n", 49 | "\n", 50 | " beautifulsoup4 conda-forge/noarch::beautifulsoup4-4.~ --> pkgs/main/linux-64::beautifulsoup4-4.9.1-py38_0\n", 51 | " certifi conda-forge::certifi-2020.6.20-py38h3~ --> pkgs/main::certifi-2020.6.20-py38_0\n", 52 | " conda conda-forge::conda-4.8.4-py38h32f6830~ --> pkgs/main::conda-4.8.4-py38_0\n", 53 | " openssl conda-forge::openssl-1.1.1g-h516909a_1 --> pkgs/main::openssl-1.1.1g-h7b6447c_0\n", 54 | " pandas conda-forge::pandas-1.1.1-py38h950e88~ --> pkgs/main::pandas-1.1.1-py38he6710b0_0\n", 55 | "\n", 56 | "\n", 57 | "Preparing transaction: ...working... done\n", 58 | "Verifying transaction: ...working... done\n", 59 | "Executing transaction: ...working... done\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "!conda install beautifulsoup4 -q -y\n", 65 | "\n", 66 | "#or using pip\n", 67 | "\n", 68 | "#!pip install beautifulsoup4" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Package Used\n", 76 | "\n", 77 | "We'll need the following package to be able to get csv data from airtable\n", 78 | "- `requests` to make an HTTP requests\n", 79 | "- `re` will be used to extract string using regular expression\n", 80 | "- `bs4` is actually not a must need, but it feels ridiculous to work with html text without this package" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 69, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "import requests\n", 90 | "import re\n", 91 | "from bs4 import BeautifulSoup as bs" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Setup Some Variables\n", 99 | "\n", 100 | "There are several constant that we can setup in the beginning and can be changed according to your needs. Make sure to change the `table` constant according to your shared table URL." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 59, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "table = \"https://airtable.com/shr5aMEsXCxORIwhk\"\n", 110 | "locale = \"en\"\n", 111 | "time_zone = \"Asia%2FJakarta\"\n", 112 | "\n", 113 | "html = bs(requests.get(table).text)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Extract Necessary Parameters\n", 121 | "\n", 122 | "As mentioned in my post, we need to extract the necessary parameters. But first, because all of the necessary information exists in the `\n", 169 | "\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "script = html.title.find_next('script')\n", 175 | "print(script.prettify())" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Now with the search scope narrowed we need to find the following informations:\n", 183 | "\n", 184 | "- **ViewId** : a sequence of alphanumeric string starting with `viw`\n", 185 | "- **AccessPolicy** : a URL encoded string preceded by `accessPolicy=`\n", 186 | "- **AppId** : a sequence of alphanumeric string starting with `app` and preceded by `x-airtable-application-id` \n", 187 | "\n", 188 | "After we've found those parameters, we could now build the dictionary to help us in building the download requests later." 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 61, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "view_id = re.search(r\"(viw[a-zA-Z0-9]+)\",str(script)).group(1)\n", 198 | "access_policy = re.search(r\"accessPolicy=([a-zA-Z0-9%*]+)\",str(script)).group(1)\n", 199 | "app_id = re.search(r\"\\\"x-airtable-application-id\\\":\\\"(app[a-zA-Z0-9]+)\",str(script)).group(1)\n", 200 | "\n", 201 | "params = {\"x-time-zone\":time_zone, \"x-user-locale\":locale, \"x-airtable-application-id\":app_id , \"accessPolicy\":access_policy}" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## Build the Request URL\n" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 64, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "Story name,User want,Server points,Client points,Built?,Needs review,Facet,Epics,Blocks release,Sprint,Release seq. no.,Blocking stories,So that,FhabTask,P[?],Server eng. tasks,Client eng. tasks,Notes,FhabURL\n", 221 | "S143: Improve adapter saltwater resistance,Improve adapter saltwater resistance,5,1,,checked,Adapter,E113: Amphibious vehicle support,007/in-beta partnership,Backlog — required,4,,\"in case the adapter gets dropped in the ocean, it still works\",13906,,,,,https://fhabricator.automatic.co/T13906\n", 222 | "S133: Voiced alert when flux capacitor disconnected from adapter,Voiced alert when flux capacitor disconnected from adapter,5,8,checked,,Device connection,E110: Temporal displacement UX refinements,Beta1,Backlog — required,3,,fewer time travel accidents,12324,,,,,https://fhabricator.automatic.co/T12324\n", 223 | "S138: Change progress bar graphics for giant robot transformation sequence,Change progress bar graphics for giant robot transformation sequence,2,3,checked,,Global,\"E106: Full release style refinement,E117: Robot transformation sequence\",Full release,Backlog — required,5,,\"the graphics look more \"\"robotic\"\"\",13452,,,,,https://fhabricator.automatic.co/T13452\n", 224 | "S142: Improved graphics on time travel analytics dashboard,Improved graphics on time travel analytics dashboard,,,,checked,Insight: drive score,E110: Temporal displacement UX refinements,No hard requirement,Backlog — required,100,,I can see all of the aggregated data from my time travel trips at a glance,13987,,,,,https://fhabricator.automatic.co/T13987\n", 225 | "\"S146: Adapter will say a friendly \"\"Hello!\"\" upon registered driver entering vehicle\",\"Adapter will say a friendly \"\"Hello!\"\" upon registered driver entering vehicle\",3,3,,,Welcome experience,E116: Robot AI (emotional components),MIP MVP,Elise,1,,I'll feel more emotionally connected to my car,13872,,,,,https://fhabricator.automatic.co/T13872\n", 226 | "S139: Adapter warns me when time jump will take >1.21 jigowatts,Adapter warns me when time jump will take >1.21 jigowatts,5,5,checked,checked,Insight: fuel efficiency,E115: Energy efficient temporal displacement,MIP full,Elise,2,,I can be more aware of how much energy I use when time traveling,13465,,,,Is there a way to make it so that the user doesn't have to drive at 88 mph? Very energy inefficient.,https://fhabricator.automatic.co/T13465\n", 227 | "S132: Audio tone when adapter detects presence of ghosts,Audio tone when adapter detects presence of ghosts,8,3,checked,,Location: driving,E111: Ghostbusting v1,MIP full,Elise,2,,I can bust ghosts more effectively,11111,,,,,https://fhabricator.automatic.co/T11111\n", 228 | "S140: Systematic/hydromatic/ultramatic quick switcher,Systematic/hydromatic/ultramatic quick switcher,3,2,,,App settings,E55: Beta style refinement,Beta1,Ferrari 500,3,,\"Why, it could be greased lightning!\",13449,,,,,https://fhabricator.automatic.co/T13449\n", 229 | "S137: Ability to trigger giant robot transformation sequence remotely,Ability to trigger giant robot transformation sequence remotely,13,5,checked,checked,Location: parked,E117: Robot transformation sequence,MIP full,Ferrari 500,2,,\"my car can walk to me, instead of me walking to my car\",13423,,Consider edge case where car is in garage or other height-limited space,,,https://fhabricator.automatic.co/T13423\n", 230 | "S141: Push notification when nitrous levels low,Push notification when nitrous levels low,3,5,checked,,Fuel level,E112: Racing module v1,Beta1,Ferrari 500,3,,I don't run out of nitrous while I'm street racing,13245,,,,,https://fhabricator.automatic.co/T13245\n", 231 | "S145: Car (in robot form) will offer a hug when I'm stressed out,Car (in robot form) will offer a hug when I'm stressed out,13,13,,checked,Location: parked,E116: Robot AI (emotional components),Full release,Gremlin,5,,I'll know that I'm not alone and someone loves me,13333,,,,Make sure that adapter gets verbal consent before hugging as not all drivers are huggers,https://fhabricator.automatic.co/T13333\n", 232 | "S136: Show progress bar as ejector seat initializes,Show progress bar as ejector seat initializes,1,3,checked,,Device connection,E114: Ejector seat support,007/in-beta partnership,Gremlin,4,,I can prepare myself mentally for ejection sequence,93713,,,,,https://fhabricator.automatic.co/T93713\n", 233 | "S144: Different ghost detection audio tones for different ghost types,Different ghost detection audio tones for different ghost types,3,5,,,Location: driving,E102: Misc improvements to existing features,No hard requirement,Icebox — someday,100,,\"I can easily differentiate between ghouls, poltergeists, specters, and friendly ghosts\",,,,Decide which tones are the most appropriately spooky,,(Enter id in FhabTask)\n", 234 | "S134: Switch blue shell evasion sequence to manual,Switch blue shell evasion sequence to manual,8,5,checked,checked,App settings,E112: Racing module v1,No hard requirement,Icebox — someday,100,,I can decide how I want to deal with incoming projectiles,32436,,,,,https://fhabricator.automatic.co/T32436\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "csv_url = f\"https://airtable.com/v0.3/view/{view_id}/downloadCsv?\"\n", 240 | "\n", 241 | "for (k,v) in params.items():\n", 242 | " csv_url += k+\"=\"+v+\"&\"\n", 243 | "csv_url = csv_url[:-1]\n", 244 | "r = requests.get(csv_url)\n", 245 | "\n", 246 | "print(r.text)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "As you can see, the start of the csv there are `\\xef\\xbb\\xbf` which are the [UTF8 encoded version of the unicode ZERO WIDTH NO-BREAK SPACE U+FEFF](https://stackoverflow.com/a/50131187), this can be resolved by changing the encoding of our response object to `utf-8`." 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 65, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "name": "stdout", 263 | "output_type": "stream", 264 | "text": [ 265 | "Story name,User want,Server points,Client points,Built?,Needs review,Facet,Epics,Blocks release,Sprint,Release seq. no.,Blocking stories,So that,FhabTask,P[?],Server eng. tasks,Client eng. tasks,Notes,FhabURL\n", 266 | "S143: Improve adapter saltwater resistance,Improve adapter saltwater resistance,5,1,,checked,Adapter,E113: Amphibious vehicle support,007/in-beta partnership,Backlog — required,4,,\"in case the adapter gets dropped in the ocean, it still works\",13906,,,,,https://fhabricator.automatic.co/T13906\n", 267 | "S133: Voiced alert when flux capacitor disconnected from adapter,Voiced alert when flux capacitor disconnected from adapter,5,8,checked,,Device connection,E110: Temporal displacement UX refinements,Beta1,Backlog — required,3,,fewer time travel accidents,12324,,,,,https://fhabricator.automatic.co/T12324\n", 268 | "S138: Change progress bar graphics for giant robot transformation sequence,Change progress bar graphics for giant robot transformation sequence,2,3,checked,,Global,\"E106: Full release style refinement,E117: Robot transformation sequence\",Full release,Backlog — required,5,,\"the graphics look more \"\"robotic\"\"\",13452,,,,,https://fhabricator.automatic.co/T13452\n", 269 | "S142: Improved graphics on time travel analytics dashboard,Improved graphics on time travel analytics dashboard,,,,checked,Insight: drive score,E110: Temporal displacement UX refinements,No hard requirement,Backlog — required,100,,I can see all of the aggregated data from my time travel trips at a glance,13987,,,,,https://fhabricator.automatic.co/T13987\n", 270 | "\"S146: Adapter will say a friendly \"\"Hello!\"\" upon registered driver entering vehicle\",\"Adapter will say a friendly \"\"Hello!\"\" upon registered driver entering vehicle\",3,3,,,Welcome experience,E116: Robot AI (emotional components),MIP MVP,Elise,1,,I'll feel more emotionally connected to my car,13872,,,,,https://fhabricator.automatic.co/T13872\n", 271 | "S139: Adapter warns me when time jump will take >1.21 jigowatts,Adapter warns me when time jump will take >1.21 jigowatts,5,5,checked,checked,Insight: fuel efficiency,E115: Energy efficient temporal displacement,MIP full,Elise,2,,I can be more aware of how much energy I use when time traveling,13465,,,,Is there a way to make it so that the user doesn't have to drive at 88 mph? Very energy inefficient.,https://fhabricator.automatic.co/T13465\n", 272 | "S132: Audio tone when adapter detects presence of ghosts,Audio tone when adapter detects presence of ghosts,8,3,checked,,Location: driving,E111: Ghostbusting v1,MIP full,Elise,2,,I can bust ghosts more effectively,11111,,,,,https://fhabricator.automatic.co/T11111\n", 273 | "S140: Systematic/hydromatic/ultramatic quick switcher,Systematic/hydromatic/ultramatic quick switcher,3,2,,,App settings,E55: Beta style refinement,Beta1,Ferrari 500,3,,\"Why, it could be greased lightning!\",13449,,,,,https://fhabricator.automatic.co/T13449\n", 274 | "S137: Ability to trigger giant robot transformation sequence remotely,Ability to trigger giant robot transformation sequence remotely,13,5,checked,checked,Location: parked,E117: Robot transformation sequence,MIP full,Ferrari 500,2,,\"my car can walk to me, instead of me walking to my car\",13423,,Consider edge case where car is in garage or other height-limited space,,,https://fhabricator.automatic.co/T13423\n", 275 | "S141: Push notification when nitrous levels low,Push notification when nitrous levels low,3,5,checked,,Fuel level,E112: Racing module v1,Beta1,Ferrari 500,3,,I don't run out of nitrous while I'm street racing,13245,,,,,https://fhabricator.automatic.co/T13245\n", 276 | "S145: Car (in robot form) will offer a hug when I'm stressed out,Car (in robot form) will offer a hug when I'm stressed out,13,13,,checked,Location: parked,E116: Robot AI (emotional components),Full release,Gremlin,5,,I'll know that I'm not alone and someone loves me,13333,,,,Make sure that adapter gets verbal consent before hugging as not all drivers are huggers,https://fhabricator.automatic.co/T13333\n", 277 | "S136: Show progress bar as ejector seat initializes,Show progress bar as ejector seat initializes,1,3,checked,,Device connection,E114: Ejector seat support,007/in-beta partnership,Gremlin,4,,I can prepare myself mentally for ejection sequence,93713,,,,,https://fhabricator.automatic.co/T93713\n", 278 | "S144: Different ghost detection audio tones for different ghost types,Different ghost detection audio tones for different ghost types,3,5,,,Location: driving,E102: Misc improvements to existing features,No hard requirement,Icebox — someday,100,,\"I can easily differentiate between ghouls, poltergeists, specters, and friendly ghosts\",,,,Decide which tones are the most appropriately spooky,,(Enter id in FhabTask)\n", 279 | "S134: Switch blue shell evasion sequence to manual,Switch blue shell evasion sequence to manual,8,5,checked,checked,App settings,E112: Racing module v1,No hard requirement,Icebox — someday,100,,I can decide how I want to deal with incoming projectiles,32436,,,,,https://fhabricator.automatic.co/T32436\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "r.encoding = \"utf-8\"\n", 285 | "\n", 286 | "print(r.text)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "Now that we have the csv data, we could save it to csv or pass it to other service, or host the code in google function to be used as backup service. Or just save it locally using the `file` package." 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [] 302 | } 303 | ], 304 | "metadata": { 305 | "kernelspec": { 306 | "display_name": "Python 3", 307 | "language": "python", 308 | "name": "python3" 309 | }, 310 | "language_info": { 311 | "codemirror_mode": { 312 | "name": "ipython", 313 | "version": 3 314 | }, 315 | "file_extension": ".py", 316 | "mimetype": "text/x-python", 317 | "name": "python", 318 | "nbconvert_exporter": "python", 319 | "pygments_lexer": "ipython3", 320 | "version": "3.8.3" 321 | } 322 | }, 323 | "nbformat": 4, 324 | "nbformat_minor": 4 325 | } 326 | --------------------------------------------------------------------------------