├── .gitignore ├── site ├── icon.png └── index.html ├── Makefile ├── requirements.txt ├── .circleci └── config.yml ├── twitter_utils.py ├── README.md ├── lwn.py ├── sessionize.py ├── devopsdays.py ├── linux_foundation.py ├── models.py ├── main.py ├── mozilla_calendar.py ├── seecfp.py ├── papercall.py └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .envrc 2 | .venv/ 3 | __pycache__/ 4 | -------------------------------------------------------------------------------- /site/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderanger/cfp-scraper/HEAD/site/icon.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: sync upload 2 | 3 | upload: 4 | aws s3 sync site s3://cfpcalendar.com/ --acl public-read 5 | 6 | sync: 7 | python main.py 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | airtable-python-wrapper==0.11.3.post1 2 | beautifulsoup4==4.7.1 3 | certifi==2018.11.29 4 | chardet==3.0.4 5 | dateparser==0.7.0 6 | idna==2.8 7 | oauthlib==3.0.0 8 | PySocks==1.6.8 9 | python-dateutil==2.7.5 10 | pytz==2018.9 11 | regex==2018.11.22 12 | requests==2.21.0 13 | requests-oauthlib==1.2.0 14 | six==1.12.0 15 | soupsieve==1.7.2 16 | tweepy==3.7.0 17 | tzlocal==1.5.1 18 | urllib3==1.24.1 19 | ics==0.4 20 | urlextract==0.8.3 21 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | sync: 8 | docker: 9 | - image: circleci/python:3.7.0 10 | steps: 11 | - checkout 12 | 13 | # Download and cache dependencies 14 | - restore_cache: 15 | keys: 16 | - v1-dependencies-{{ checksum "requirements.txt" }} 17 | # fallback to using the latest cache if no exact match is found 18 | - v1-dependencies- 19 | 20 | - run: 21 | name: Install dependencies 22 | command: | 23 | python3 -m venv venv 24 | . venv/bin/activate 25 | pip install -r requirements.txt 26 | 27 | - save_cache: 28 | paths: 29 | - ./venv 30 | key: v1-dependencies-{{ checksum "requirements.txt" }} 31 | 32 | - run: 33 | name: Run sync 34 | command: | 35 | . venv/bin/activate 36 | python main.py 37 | 38 | workflows: 39 | version: 2 40 | sync: 41 | jobs: 42 | - sync 43 | triggers: 44 | - schedule: 45 | cron: "0 10 * * *" 46 | filters: 47 | branches: 48 | only: 49 | - master 50 | -------------------------------------------------------------------------------- /twitter_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import requests 4 | import tweepy 5 | 6 | auth = tweepy.OAuthHandler(os.environ['TWITTER_CONSUMER_KEY'], os.environ['TWITTER_CONSUMER_SECRET']) 7 | auth.set_access_token(os.environ['TWITTER_ACCESS_KEY'], os.environ['TWITTER_ACCESS_SECRET']) 8 | 9 | api = tweepy.API(auth) 10 | 11 | _expand_cache = {} 12 | 13 | def expand_url(url): 14 | expanded = _expand_cache.get(url) 15 | if expanded is not None: 16 | return expanded 17 | expanded = requests.head(url, allow_redirects=True).url 18 | _expand_cache[url] = expanded 19 | return expanded 20 | 21 | 22 | def search_for_url(query, total=1000): 23 | max_id = None 24 | last_max_id = None 25 | count = 0 26 | while count < total: 27 | for tweet in api.search(q=query, count=100, max_id=max_id, result_type='recent'): 28 | count += 1 29 | if max_id: 30 | max_id = min(max_id, tweet.id) 31 | else: 32 | max_id = tweet.id 33 | for url in tweet.entities['urls']: 34 | # Twitter only expands their own shortener so get event more. 35 | truly_expanded_url = url['expanded_url'] 36 | if query not in truly_expanded_url: 37 | truly_expanded_url = expand_url(truly_expanded_url) 38 | if query in truly_expanded_url: 39 | yield truly_expanded_url 40 | # Did we run of Tweets? 41 | if last_max_id == max_id: 42 | break 43 | last_max_id = max_id 44 | 45 | 46 | if __name__ == '__main__': 47 | for url in search_for_url('sessionize.com'): 48 | print(url) 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CFP-Scraper 2 | 3 | This is web scraper that powers cfpcalendar.com. 4 | 5 | Data is collected from various sources and written to Airtable. 6 | 7 | ## Running it for yourself 8 | 9 | Things you'll need: 10 | 11 | 1. [Airtable](https://airtable.com/) account 12 | 2. Twitter account 13 | 3. Python 3 + virtualenv 14 | 15 | ### Pre-reqs 16 | 17 | #### Airtable 18 | 19 | 1. Once you've created an Airtable account, use [this link](https://airtable.com/addBaseFromShare/shrYiAKkEEBMuVzcu?utm_source=airtable_shared_application) to copy the template Base to your own workspace. 20 | 21 | 2. From https://airtable.com/account generate your API key and make a note of this. This will be your `AIRTABLE_API_KEY`. 22 | 23 | 3. Go to https://airtable.com/api and select your base (the one to which you copied the source one in step 1 above). The URL you go to will look like `https://airtable.com/appXXXXXXYYYYY/api/docs` - make a note of the `appXXXXXXYYYYY`. This will be your `AIRTABLE_BASE_KEY` 24 | 25 | #### Twitter 26 | 27 | Create yourself API keys from https://developer.twitter.com/en/apps. 28 | 29 | #### Set up Python virtualenv 30 | 31 | The easiest way to run this is in isolation, using virtualenv. 32 | 33 | 1. Clone the git repo 34 | 35 | git clone git@github.com:coderanger/cfp-scraper.git 36 | 37 | 2. Create virtualenv 38 | 39 | cd cfp-scraper 40 | virtualenv --python=python3 . 41 | source ./bin/activate.fish 42 | 43 | (Use the `activate` script appropriate for your shell) 44 | 45 | 3. Install required modules 46 | 47 | pip install -r requirements.txt 48 | 49 | ### Run cfp-scraper 50 | 51 | * Activate the virtualenv 52 | 53 | source ./bin/activate.fish 54 | 55 | (Use the `activate` script appropriate for your shell) 56 | 57 | * Based on the credentials obtained above, run: 58 | 59 | export TWITTER_CONSUMER_KEY=xxxxxx 60 | export TWITTER_CONSUMER_SECRET=xxxxxx 61 | export TWITTER_ACCESS_KEY=xxxxxx 62 | export TWITTER_ACCESS_SECRET=xxxxxx 63 | 64 | export AIRTABLE_API_KEY=xxxxxx 65 | export AIRTABLE_BASE_KEY=xxxxxx 66 | 67 | * Launch: 68 | 69 | python main.py 70 | -------------------------------------------------------------------------------- /lwn.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import date, datetime, time 3 | 4 | import dateparser 5 | import pytz 6 | import requests 7 | from bs4 import BeautifulSoup 8 | 9 | import sessionize 10 | 11 | def get(url): 12 | res = requests.get(url) 13 | return BeautifulSoup(res.text, 'html.parser') 14 | 15 | 16 | def parse_page(root): 17 | for evt_elm in root.select('.CalMEvent a'): 18 | col_index = len(evt_elm.find_parent('td').find_previous_siblings('td')) 19 | date_row = evt_elm.find_parent('tr').find_previous_sibling(lambda elm: elm.name == 'tr' and elm.select('.CalMDate')) 20 | day = date_row.find_all('td')[col_index].text 21 | yield { 22 | 'short_name' : evt_elm.text, 23 | 'url': evt_elm['href'], 24 | 'name': evt_elm['title'], 25 | 'day': day 26 | } 27 | 28 | 29 | def find_pages(): 30 | start = date.today() 31 | for i in range(12): 32 | new_month = start.month + i 33 | new_year = start.year 34 | if new_month > 12: 35 | new_month -= 12 36 | new_year += 1 37 | yield f'https://lwn.net/Calendar/Monthly/cfp/{new_year}-{new_month:02d}/', date(new_year, new_month, 1) 38 | 39 | 40 | def parse_pages(): 41 | for url, base_date in find_pages(): 42 | for evt in parse_page(get(url)): 43 | evt['date'] = base_date.replace(day=int(evt['day'])) 44 | yield evt 45 | 46 | 47 | def format_page(raw_evt): 48 | md = re.search(r'^([^(]+) \(([^)]+)\)$', raw_evt['name']) 49 | name, location = md.group(1, 2) 50 | return { 51 | 'Conference Name': name, 52 | 'Conference URL': raw_evt['url'], 53 | 'Location': location, 54 | 'CFP URL': raw_evt['url'], 55 | 'CFP End Date': datetime.combine(raw_evt['date'], time()), 56 | } 57 | 58 | def scrape(): 59 | for raw_evt in parse_pages(): 60 | evt = format_page(raw_evt) 61 | if evt is None: 62 | continue 63 | if 'papercall.io' in evt['CFP URL']: 64 | continue 65 | if 'events.linuxfoundation.org' in evt['CFP URL']: 66 | continue 67 | if 'sessionize.com' in evt['CFP URL']: 68 | s = sessionize.parse_event(evt['CFP URL']) 69 | if s: 70 | evt.update(s) 71 | yield evt 72 | 73 | if __name__ == '__main__': 74 | for e in scrape(): 75 | print(e) 76 | -------------------------------------------------------------------------------- /sessionize.py: -------------------------------------------------------------------------------- 1 | import dateparser 2 | import requests 3 | from bs4 import BeautifulSoup 4 | 5 | import twitter_utils 6 | 7 | def get(url): 8 | res = requests.get(url) 9 | return BeautifulSoup(res.text, 'html.parser') 10 | 11 | 12 | def find_navy_section(root, label): 13 | for elm in root.select('.text-navy'): 14 | if elm.contents[-1].strip().startswith(label): 15 | return elm.find_parent(lambda e: e.has_attr('class') and 'col-' in ' '.join(e['class'])).find('h2') 16 | 17 | 18 | def parse_event(url): 19 | root = get(url) 20 | 21 | if root.find('span', string='Speaker Profile'): 22 | return None 23 | 24 | if 'Log in' in root.find('title').string: 25 | return None 26 | 27 | if '@ Sessionize.com' not in root.find('title').string: 28 | return None 29 | 30 | data = { 31 | 'Conference Name': root.select('.ibox-title h4')[0].string, 32 | 'CFP URL': url, 33 | } 34 | 35 | elm = find_navy_section(root, 'location') 36 | if elm: 37 | data['Location'] = elm.select('.block')[-1].string 38 | 39 | elm = find_navy_section(root, 'website') 40 | if elm: 41 | data['Conference URL'] = elm.find('a')['href'] 42 | 43 | elm = find_navy_section(root, 'event date') 44 | if elm: 45 | data['Conference Start Date'] = data['Conference End Date'] = dateparser.parse(elm.string).date() 46 | 47 | elm = find_navy_section(root, 'event starts') 48 | if elm: 49 | data['Conference Start Date'] = dateparser.parse(elm.string).date() 50 | 51 | elm = find_navy_section(root, 'event ends') 52 | if elm: 53 | data['Conference End Date'] = dateparser.parse(elm.string).date() 54 | 55 | # Find the UTC version of the CFP end date. 56 | elm = root.select('.js-closedate')[0] 57 | if not elm: 58 | raise ValueError(f'js-closedate not found in {url}') 59 | utc_cfp_end_date = dateparser.parse(elm['data-date']).replace(tzinfo=None) 60 | data['CFP End Date'] = utc_cfp_end_date 61 | 62 | elm = find_navy_section(root, 'CfS closes at') 63 | if not elm: 64 | raise ValueError(f'CfS closes at not found in {url}') 65 | time = elm.parent.select('.text-navy')[0].string[13:] 66 | parsed = dateparser.parse(f'{elm.string} {time}') 67 | utc_offset = parsed - utc_cfp_end_date 68 | 69 | 70 | elm = find_navy_section(root, 'CfS opens at') 71 | if elm: 72 | time = elm.parent.select('.text-navy')[0].string[13:] 73 | date = elm.string 74 | parsed = dateparser.parse(f'{date} {time}') 75 | data['CFP Start Date'] = (parsed - utc_offset).date() 76 | 77 | return data 78 | 79 | 80 | def find_events(): 81 | seen_urls = set() 82 | for url in twitter_utils.search_for_url('sessionize.com'): 83 | # Skip the queryparams and downcase it. 84 | clean_url = url.split('?')[0].lower().rstrip('/') 85 | if clean_url in seen_urls: 86 | continue 87 | if '/api/' in clean_url: 88 | continue 89 | evt = parse_event(clean_url) 90 | if evt is not None: 91 | yield evt 92 | seen_urls.add(clean_url) 93 | 94 | 95 | def scrape(): 96 | yield from find_events() 97 | 98 | 99 | if __name__ == '__main__': 100 | for d in find_events(): 101 | print(d) 102 | -------------------------------------------------------------------------------- /devopsdays.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import dateparser 4 | import requests 5 | from bs4 import BeautifulSoup 6 | 7 | def get(url): 8 | res = requests.get(url) 9 | return BeautifulSoup(res.text, 'html.parser') 10 | 11 | 12 | def parse_events(): 13 | root = get('https://www.devopsdays.org/events/') 14 | for elm in root.select('.col-md-12 .row')[1].find_all('a'): 15 | yield elm['href'] 16 | 17 | 18 | def parse_open_cfps(): 19 | root = get('https://www.devopsdays.org/speaking/') 20 | for row in root.select('table.sortable tbody tr'): 21 | yield { 22 | 'Location': row.find('a').string, 23 | 'Conference URL': 'https://www.devopsdays.org' + row.find('a')['href'], 24 | 'CFP End Date': dateparser.parse(row.find_all('td')[1].string.strip()), 25 | 'Conference Start Date': dateparser.parse(row.find_all('td')[2].string.strip()).date(), 26 | } 27 | 28 | 29 | def parse_event(url): 30 | root = get(url+'welcome/') 31 | 32 | cfp_nav = None 33 | for nav in root.select('.nav-link'): 34 | nav_text = str(nav.string).lower() 35 | if 'propose' in nav_text or 'cfp' in nav_text: 36 | cfp_nav = nav 37 | break 38 | if cfp_nav is None: 39 | propose_elm = root.find('strong', string='Propose') 40 | if propose_elm: 41 | cfp_nav = propose_elm.parent.next_sibling.find('a') 42 | if cfp_nav is None: 43 | return None 44 | cfp_url = cfp_nav['href'] 45 | if cfp_url.startswith('/'): 46 | cfp_url = f'https://www.devopsdays.org{cfp_url}' 47 | 48 | 49 | dates_elm = root.find('strong', string='Dates') 50 | if dates_elm: 51 | dates = dates_elm.parent.next_sibling.string.split('-') 52 | event_end = dateparser.parse(dates[-1]).date() 53 | else: 54 | dates = root.select('.welcome-page-date')[0].contents[0] 55 | # Looks like "April 9 - 10, 2019" 56 | md = re.match(r'^(\S+) ([ 0-9-]+), (\d+)$', dates) 57 | if md: 58 | month, days, year = md.group(1, 2, 3) 59 | if '-' in days: 60 | start_day, end_day = days.split('-') 61 | else: 62 | start_day = end_day = days 63 | event_end = dateparser.parse(f'{month} {end_day}, {year}').date() 64 | if int(start_day) > int(end_day): 65 | event_end = event_end.replace(month=event_end.month+1) 66 | else: 67 | raise ValueError(f'Unable to find end date in {url}') 68 | 69 | name_elm = root.select('.welcome-page') 70 | if not name_elm: 71 | name_elm = root.select('title') 72 | name_parts = name_elm[0].string.split() 73 | name_parts[0] = name_parts[0].capitalize() 74 | name = ' '.join(name_parts) 75 | 76 | return { 77 | 'Conference Name': name, 78 | 'CFP URL': cfp_url, 79 | 'Conference End Date': event_end, 80 | 'Tags': ['devops', 'devopsdays'], 81 | } 82 | 83 | 84 | def scrape(): 85 | for data in parse_open_cfps(): 86 | evt_data = parse_event(data['Conference URL']) 87 | if evt_data is None: 88 | continue 89 | data.update(evt_data) 90 | # Papercall is already handled. 91 | if 'papercall.io' in data['CFP URL']: 92 | continue 93 | yield data 94 | 95 | if __name__ == '__main__': 96 | # print(parse_event('https://www.devopsdays.org/events/2019-indianapolis/')) 97 | # for d in parse_open_cfps(): 98 | # print(d) 99 | for d in scrape(): 100 | print(d) 101 | -------------------------------------------------------------------------------- /linux_foundation.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urllib.parse import urljoin 3 | 4 | import dateparser 5 | import pytz 6 | import requests 7 | from bs4 import BeautifulSoup 8 | 9 | def get(url): 10 | res = requests.get(url) 11 | return BeautifulSoup(res.text, 'html.parser') 12 | 13 | 14 | def parse_date(raw_date): 15 | md = re.search(r'^(\w+) (\d+), (\d+)$', raw_date) 16 | if md: 17 | d = dateparser.parse(raw_date).date() 18 | return (d, d) 19 | md = re.search(r'^(\w+) (\d+) - (\d+), (\d+)$', raw_date) 20 | if md: 21 | return ( 22 | dateparser.parse(f'{md.group(1)} {md.group(2)}, {md.group(4)}').date(), 23 | dateparser.parse(f'{md.group(1)} {md.group(3)}, {md.group(4)}').date(), 24 | ) 25 | md = re.search(r'^(\w+) (\d+) - (\w+) (\d+), (\d+)$', raw_date) 26 | if md: 27 | return ( 28 | dateparser.parse(f'{md.group(1)} {md.group(2)}, {md.group(5)}').date(), 29 | dateparser.parse(f'{md.group(3)} {md.group(4)}, {md.group(5)}').date(), 30 | ) 31 | raise ValueError(f'Unable to parse {raw_date}') 32 | 33 | 34 | def parse_events_page(): 35 | root = get('https://events.linuxfoundation.org/') 36 | 37 | for elm in root.select('.single-event-wrap'): 38 | raw_date, location = [e.string for e in elm.find_all('h3')] 39 | start_date, end_date = parse_date(raw_date) 40 | yield { 41 | 'Conference URL': elm.find('span', string=re.compile(r'(?i:(learn more)|(view the website))')).parent['href'], 42 | 'Conference Start Date': start_date, 43 | 'Conference End Date': end_date, 44 | 'Location': location, 45 | } 46 | 47 | 48 | def fetch_smapply_json(): 49 | has_next = True 50 | page = 1 51 | # Ten page limit to deal with errors I guess? 52 | while has_next and page < 10: 53 | data = requests.get(f'https://linuxfoundation.smapply.io/prog/ds/?page={page}&base_query=all').json() 54 | has_next = data['has_next'] 55 | page += 1 56 | yield from data['results'] 57 | 58 | 59 | def parse_smapply_json(): 60 | for data in fetch_smapply_json(): 61 | if not data['startdate']: 62 | # Malformed data. 63 | continue 64 | yield { 65 | 'Conference Name': data['name'], 66 | 'CFP Start Date': dateparser.parse(data['startdate']).astimezone(pytz.utc).date(), 67 | 'CFP End Date': dateparser.parse(data['deadline']).astimezone(pytz.utc), 68 | 'CFP URL': 'https://linuxfoundation.smapply.io{}'.format(data['listing_url']), 69 | } 70 | 71 | 72 | def possible_cfp_links(evt): 73 | evt_page = get(evt['Conference URL']) 74 | for elm in evt_page.find_all('a'): 75 | if elm.has_attr('href') and ('cfp' in elm['href'] or 'program' in elm['href']): 76 | yield urljoin(evt['Conference URL'], elm['href']) 77 | 78 | 79 | def correlate_event(evt, json_data): 80 | for url in possible_cfp_links(evt): 81 | page = requests.get(url).text 82 | for d in json_data: 83 | if d['CFP URL'].rstrip('/') in page: 84 | out = {} 85 | out.update(evt) 86 | out.update(d) 87 | return out 88 | 89 | 90 | def scrape(): 91 | smapply_json = list(parse_smapply_json()) 92 | 93 | for evt in parse_events_page(): 94 | out = correlate_event(evt, smapply_json) 95 | if out is not None: 96 | yield out 97 | 98 | 99 | if __name__ == '__main__': 100 | for d in scrape(): 101 | # print(d) 102 | pass 103 | -------------------------------------------------------------------------------- /site/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | CFP Calendar 8 | 9 | 10 | 11 | 23 | 24 | 25 | 26 | 27 | iCal link 28 | 29 | 30 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | import airtable 5 | import dateparser 6 | 7 | 8 | class AirtableModel(dict): 9 | class AirtablePropety: 10 | def __get__(_self, _instance, owner): 11 | if not hasattr(owner, '_db'): 12 | if not owner.table_name: 13 | raise ValueError(f'{owner} does not define table_name') 14 | owner._db = airtable.Airtable(os.environ['AIRTABLE_BASE_KEY'], owner.table_name) 15 | return owner._db 16 | 17 | table_name = None 18 | db = AirtablePropety() 19 | 20 | def __init__(self, airtable_id=None, **fields): 21 | self.airtable_id = airtable_id 22 | super().__init__(fields) 23 | 24 | @classmethod 25 | def fetch(cls, **query): 26 | if len(query) != 1: 27 | raise ValueError(f'Invalid fetch query: {query}') 28 | key, value = list(query.items())[0] 29 | key = key.replace('_', ' ') 30 | record = cls.db.match(key, value) 31 | return cls(airtable_id=record.get('id'), **record.get('fields', {})) 32 | 33 | @classmethod 34 | def fetch_all(cls): 35 | for page in cls.db.get_iter(): 36 | for record in page: 37 | yield cls(airtable_id=record.get('id'), **record.get('fields', {})) 38 | 39 | def save(self): 40 | if self.airtable_id: 41 | self.db.update(self.airtable_id, self) 42 | else: 43 | record = self.db.insert(self) 44 | self.airtable_id = record['id'] 45 | 46 | 47 | def datetime_lt(a, b): 48 | if isinstance(a, (str, bytes)): 49 | a = dateparser.parse(a) 50 | if isinstance(b, (str, bytes)): 51 | b = dateparser.parse(b) 52 | return a.replace(tzinfo=None) < b.replace(tzinfo=None) 53 | 54 | 55 | class Conference(AirtableModel): 56 | table_name = 'Conferences' 57 | 58 | def __str__(self): 59 | label = self.get('Conference Name') 60 | if not label: 61 | label = self['CFP URL'] 62 | return f'Conference: {label}' 63 | 64 | def save(self): 65 | # If we didn't have a CFP Start Date, just assume it's today. 66 | if 'CFP Start Date' not in self: 67 | if self.get('CFP End Date') and datetime_lt(self['CFP End Date'], datetime.now()): 68 | d = self['CFP End Date'] 69 | if isinstance(d, (str, bytes)): 70 | d = str(dateparser.parse(d).date()) 71 | self['CFP Start Date'] = d 72 | else: 73 | self['CFP Start Date'] = str(datetime.utcnow().date()) 74 | 75 | # Clear computed fields. 76 | end_date_only = self.pop('CFP End Date (Only)', None) 77 | 78 | # Handle the tags value. 79 | tags = self.pop('Tags', []) 80 | try: 81 | super().save() 82 | finally: 83 | # Restore it after the save 84 | self['Tags'] = tags 85 | self['CFP End Date (Only)'] = end_date_only 86 | # Update any new tags. 87 | for t in tags: 88 | tag = Tag.fetch(Tag=t) 89 | if self.airtable_id not in tag.get('Conference', []): 90 | tag['Tag'] = t 91 | tag.setdefault('Conference', []) 92 | tag['Conference'].append(self.airtable_id) 93 | tag.save() 94 | # Remove any old tags. 95 | for t in self.db.get(self.airtable_id)['fields'].get('Tags', []): 96 | if t not in tags: 97 | tag = Tag.fetch(Tag=t) 98 | if tag.get('Conferences'): 99 | tag['Conferences'].delete(self.airtable_id) 100 | tag.save() 101 | 102 | 103 | class Tag(AirtableModel): 104 | table_name = 'Conference Tags' 105 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import os 3 | import pytz 4 | from datetime import date, datetime, timedelta 5 | 6 | import devopsdays 7 | import papercall 8 | import models 9 | import sessionize 10 | import linux_foundation 11 | import seecfp 12 | import lwn 13 | import mozilla_calendar 14 | 15 | def scrape_all(): 16 | print('Scraping Papercall') 17 | yield from papercall.scrape() 18 | print('Scraping Devopsdays') 19 | yield from devopsdays.scrape() 20 | print('Scraping Sessionize') 21 | yield from sessionize.scrape() 22 | print('Scraping Linux Foundation') 23 | yield from linux_foundation.scrape() 24 | print('Scraping SeeCFP') 25 | yield from seecfp.scrape() 26 | print('Scraping LWN CFP Calendar') 27 | yield from lwn.scrape() 28 | print('Scraping Mozilla Calendar') 29 | yield from mozilla_calendar.scrape() 30 | 31 | 32 | def sync_record(existing, fields): 33 | # Convert any needed fields: 34 | for key, value in fields.items(): 35 | if isinstance(value, datetime): 36 | if value.tzinfo: 37 | value = pytz.UTC.normalize(value).replace(tzinfo=None) 38 | fields[key] = value.replace(microsecond=0).isoformat() + '.000Z' 39 | elif isinstance(value, date): 40 | fields[key] = value.isoformat() 41 | if not fields.get('Conference Start Date'): 42 | fields.pop('Conference Start Date', None) 43 | if not fields.get('Conference End Date'): 44 | fields.pop('Conference End Date', None) 45 | if not fields.get('Tags'): 46 | fields.pop('Tags', None) 47 | 48 | # No existing verison, create it. 49 | if existing is None: 50 | conf = models.Conference(**fields) 51 | print(f'Creating {conf}') 52 | conf.save() 53 | return conf 54 | else: 55 | # Check if a save is needed. 56 | do_update = False 57 | for key, value in fields.items(): 58 | existing_value = existing.get(key) 59 | # Special case for tags, they need to be sorted to check. 60 | if key == 'Tags' and value and existing_value: 61 | if sorted(value) != sorted(existing_value): 62 | print('{} {} {}'.format(key, repr(value), repr(existing_value))) 63 | do_update = True 64 | break 65 | else: 66 | continue 67 | 68 | # Special case, none and '' are okay. 69 | if value == '' and existing_value is None: 70 | continue 71 | 72 | if value != existing_value: 73 | print('Field changed {}: was {} now {}'.format(key, repr(existing_value), repr(value))) 74 | do_update = True 75 | break 76 | if do_update: 77 | print(f'Updating {existing}') 78 | existing.update(fields) 79 | existing.save() 80 | elif os.environ.get('CI'): 81 | print(f'Scraped {existing}') 82 | return existing 83 | 84 | 85 | def sync_all(): 86 | # Fetch all the conferences into a local cache. 87 | conferences = {} 88 | for conf in models.Conference.fetch_all(): 89 | conferences[conf['CFP URL']] = conf 90 | 91 | # Run the scrapes and syncs. 92 | for fields in scrape_all(): 93 | # Try to filter out meetups 94 | if 'meetup' in fields.get('Conference Name', '').lower() or 'meetup' in fields.get('Conference URL', '').lower(): 95 | continue 96 | if fields.get('Conference Start Date') and fields.get('Conference End Date') and fields['Conference End Date'] - fields['Conference Start Date'] > timedelta(days=14): 97 | continue 98 | 99 | conf = sync_record(conferences.get(fields['CFP URL']), fields) 100 | conferences[conf['CFP URL']] = conf 101 | 102 | 103 | def main(): 104 | sync_all() 105 | 106 | 107 | if __name__ == '__main__': 108 | main() 109 | -------------------------------------------------------------------------------- /mozilla_calendar.py: -------------------------------------------------------------------------------- 1 | # https://calendar.google.com/calendar/ical/mozilla.com_tptb36ac7eijerilfnf6c1onfo%40group.calendar.google.com/public/basic.ics 2 | import re 3 | from datetime import datetime 4 | 5 | import dateparser 6 | import requests 7 | import ics 8 | from urlextract import URLExtract 9 | 10 | import sessionize 11 | 12 | FLAG_A = ord('🇦') 13 | FLAG_Z = FLAG_A + 26 14 | FLAG_OFFSET = FLAG_A - ord('A') 15 | URL_EXTRACTOR = URLExtract() 16 | 17 | 18 | def fetch_cal(): 19 | url = 'https://calendar.google.com/calendar/ical/mozilla.com_tptb36ac7eijerilfnf6c1onfo%40group.calendar.google.com/public/basic.ics' 20 | return ics.Calendar(requests.get(url).text) 21 | 22 | 23 | def convert_flags(s): 24 | ords = [ord(c) for c in s] 25 | return ''.join(chr(c - FLAG_OFFSET) if FLAG_A <= c <= FLAG_Z else chr(c) for c in ords) 26 | 27 | 28 | def parse_event_url(evt): 29 | links = URL_EXTRACTOR.find_urls(evt.description) 30 | if links: 31 | return links[0] 32 | 33 | 34 | def parse_date(raw_date, relative_to): 35 | s = {'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': relative_to.replace(tzinfo=None)} 36 | 37 | md = re.search(r'^(\w+) (\d+)\s*-\s*(\w+) (\d+)(.*)$', raw_date) 38 | if md: 39 | return ( 40 | dateparser.parse(f'{md.group(1)} {md.group(2)}', settings=s).date(), 41 | dateparser.parse(f'{md.group(3)} {md.group(4)}', settings=s).date(), 42 | md.group(5), 43 | ) 44 | md = re.search(r'^(\w+) (\d+)\s*-\s*(\d+)(.*)$', raw_date) 45 | if md: 46 | return ( 47 | dateparser.parse(f'{md.group(1)} {md.group(2)}', settings=s).date(), 48 | dateparser.parse(f'{md.group(1)} {md.group(3)}', settings=s).date(), 49 | md.group(4), 50 | ) 51 | md = re.search(r'^(\w+) (\d+)(.*)$', raw_date) 52 | if md: 53 | d = dateparser.parse(f'{md.group(1)} {md.group(2)}', settings=s).date() 54 | return (d, d, md.group(3)) 55 | return (None, None, raw_date) 56 | 57 | 58 | def parse_event_name(label, relative_to): 59 | label = convert_flags(label) 60 | md = re.search(r'^(.*) \((.*?)\)$', label) 61 | if not md: 62 | return { 63 | 'Conference Name': label.strip(), 64 | } 65 | name, dates_and_location = md.group(1, 2) 66 | # Try to filter out the word CFP. 67 | name = name.replace('CFP', '') 68 | name = re.sub(r'(^| ):( |$)', ' ', name) 69 | name = re.sub(r'\s+', ' ', name).strip() 70 | # Parse dates. 71 | start_date, end_date, location = parse_date(dates_and_location, relative_to) 72 | # Clean up the location. 73 | location = location.lstrip(',').strip() 74 | evt = { 75 | 'Conference Name': name, 76 | } 77 | if start_date: 78 | evt['Conference Start Date'] = start_date 79 | if end_date: 80 | evt['Conference End Date'] = end_date 81 | if location: 82 | evt['Location'] = location 83 | return evt 84 | 85 | 86 | def parse_events(cal): 87 | # Skip anything that closed more than a year ago. 88 | now = datetime.utcnow() 89 | cutoff = now.replace(year=now.year-1, tzinfo=None) 90 | 91 | for evt in cal.events: 92 | if evt.begin.datetime.replace(tzinfo=None) < cutoff: 93 | continue 94 | data = parse_event_name(evt.name, evt.begin.datetime) 95 | if evt.location: 96 | data['Location'] = evt.location 97 | data['CFP End Date'] = evt.begin.datetime.replace(tzinfo=None) 98 | url = parse_event_url(evt) 99 | if url: 100 | data['Conference URL'] = data['CFP URL'] = url 101 | yield data 102 | 103 | 104 | def scrape(): 105 | for evt in parse_events(fetch_cal()): 106 | if evt is None or 'CFP URL' not in evt: 107 | continue 108 | if 'papercall.io' in evt['CFP URL']: 109 | continue 110 | if 'sessionize.com' in evt['CFP URL']: 111 | s = sessionize.parse_event(evt['CFP URL']) 112 | if s: 113 | evt.update(s) 114 | yield evt 115 | 116 | 117 | if __name__ == '__main__': 118 | for e in scrape(): 119 | print(e) 120 | -------------------------------------------------------------------------------- /seecfp.py: -------------------------------------------------------------------------------- 1 | import dateparser 2 | import pytz 3 | import requests 4 | 5 | import sessionize 6 | 7 | URL = ''' 8 | https://airtable.com/v0.3/view/viw1YoXQzG3f7Ty7D/readSharedViewData?stringifiedObjectParams=%7B%7D&requestId=reqcNhwt4DFJWjr0u&accessPolicy=%7B%22allowedAction 9 | s%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viw1YoXQzG3f7Ty7D%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClas 10 | sName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viw1YoXQzG3f7Ty7D%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22row%22% 11 | 2C%22modelIdSelector%22%3A%22rows+*%5BdisplayedInView%3Dviw1YoXQzG3f7Ty7D%5D%22%2C%22action%22%3A%22createBoxDocumentSession%22%7D%2C%7B%22modelClassName%22%3A 12 | %22row%22%2C%22modelIdSelector%22%3A%22rows+*%5BdisplayedInView%3Dviw1YoXQzG3f7Ty7D%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%2C%7B%22modelC 13 | lassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viw1YoXQzG3f7Ty7D%22%2C%22action%22%3A%22downloadCsv%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22mo 14 | delIdSelector%22%3A%22viw1YoXQzG3f7Ty7D%22%2C%22action%22%3A%22downloadICal%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows+*%5Bd 15 | isplayedInView%3Dviw1YoXQzG3f7Ty7D%5D%22%2C%22action%22%3A%22downloadAttachment%22%7D%5D%2C%22shareId%22%3A%22shrBMFY4CSpSRGmAs%22%2C%22applicationId%22%3A%22a 16 | ppl4CwxGoKNDk2ek%22%2C%22sessionId%22%3A%22sestt1hvhA5QXmrdz%22%2C%22generationNumber%22%3A0%2C%22signature%22%3A%22562e2ea38b121c78fada55b507c41695cb9991bfe74 17 | e1231c9be2406c3e589ee%22%7D 18 | '''.replace('\n', '') 19 | HEADERS = {'x-airtable-application-id': 'appl4CwxGoKNDk2ek', 'X-Requested-With': 'XMLHttpRequest', 'x-time-zone': 'UTC', 'x-user-locale': 'en'} 20 | 21 | def get_data(): 22 | r = requests.get(URL, headers=HEADERS) 23 | if r.status_code != 200: 24 | raise requests.HTTPError(f'Error retreiving Airtable data {r.status_code}: {r.text}') 25 | return r.json()['data'] 26 | 27 | 28 | def convert_columns(data): 29 | col_map = {} 30 | for d in data['columns']: 31 | col_map[d['id']] = d['name'] 32 | 33 | for d in data['rows']: 34 | row_data = {} 35 | for k, v in d['cellValuesByColumnId'].items(): 36 | k = col_map[k] 37 | if k == 'Country': 38 | v = v[0]['foreignRowDisplayName'] 39 | row_data[k] = v 40 | yield row_data 41 | 42 | def format_data(row): 43 | # {'Link to the call for paper': 'http://www.jbcnconf.com/2019/', 'Submission Deadline': '2019-04-01T00:00:00.000Z', 'Country': [{'foreignRowId': 'recIhNTuv0pD1lSSN', 'foreignRowDisplayName': 'Spain'}], 'City': 'Barcelona', 'Conference Start': '2019-05-27T00:00:00.000Z', 'Conference End': '2019-05-29T00:00:00.000Z', 'Name': 'JBCNConf-2019', 'Continent': {'valuesByForeignRowId': {'recIhNTuv0pD1lSSN': ['Europe']}, 'foreignRowIdOrder': ['recIhNTuv0pD1lSSN']}, 'Days left': 69, 'When': 'May'} 44 | location = row['Country'] 45 | if 'City' in row: 46 | location = '{}, {}'.format(row['City'], row['Country']) 47 | return { 48 | 'CFP URL': row['Link to the call for paper'], 49 | 'Conference URL': row['Link to the call for paper'], # Shrug, I guess I'll just use it for both. 50 | 'CFP End Date': dateparser.parse(row['Submission Deadline']).astimezone(pytz.utc).replace(tzinfo=None), 51 | 'Location': location, 52 | 'Conference Start Date': dateparser.parse(row['Conference Start']).date(), 53 | 'Conference End Date': dateparser.parse(row['Conference End']).date(), 54 | 'Conference Name': row['Name'] 55 | } 56 | 57 | 58 | def scrape(): 59 | for raw_row in convert_columns(get_data()): 60 | row = format_data(raw_row) 61 | if row is None: 62 | continue 63 | if 'papercall.io' in row['CFP URL']: 64 | continue 65 | if 'sessionize.com' in row['CFP URL']: 66 | s = sessionize.parse_event(row['CFP URL']) 67 | if s: 68 | row.update(s) 69 | yield row 70 | 71 | 72 | if __name__ == '__main__': 73 | for row in scrape(): 74 | print(row) 75 | 76 | 77 | # 'Country': [{'foreignRowId': 'recOUw0MItMcZxNQe', 'foreignRowDisplayName': 'Slovakia'}] 78 | -------------------------------------------------------------------------------- /papercall.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | import dateparser 4 | import dateparser.search 5 | import requests 6 | from bs4 import BeautifulSoup 7 | 8 | URL = 'https://www.papercall.io/events?open-cfps=true&page={page}' 9 | 10 | def get(page): 11 | res = requests.get(URL.format(page=page)) 12 | return BeautifulSoup(res.text, 'html.parser') 13 | 14 | 15 | def maybe_int(s): 16 | try: 17 | return int(s) 18 | except ValueError: 19 | return 0 20 | 21 | 22 | def num_pages(): 23 | pagination = get(1).find(class_='pagination') 24 | return max(maybe_int(elm.string) for elm in pagination.find_all('a')) 25 | 26 | 27 | 28 | #
29 | #
30 | #
31 | #
32 | #
33 | #

34 | # Ignite Black Tech Conference - iBTechCon2019 - Atlanta, GA 35 | #

36 | #
37 | #
38 | # 39 | # 40 | # 41 | #
42 | #
43 | #
44 | #
45 | #
46 | #
47 | # 52 | #
53 | # 56 | #

57 | # Event Dates: February 16, 2019, February 16, 2019 58 | #

59 | 60 | #

61 | # 62 | # 63 | # 64 | # 65 | # 66 | # 67 | # 68 | # 69 | # 70 | # 71 | # 72 | #
CFP closes at January 27, 2019 23:01 UTC
  (Local)
73 | 74 | #

75 | #

76 | # Blockchain, Fintech, Healthcare it, Energy, Entertainment, Artificial intelligence, Virtual reality, Gaming, Cryptocurrency. cloud services, Cybersecurity, Machine learning, Big data, Data analytics, Erp management, Intermediate coding, Advanced coding, Biotech, Automation, Mechanical engineering, Advanced robotics, Unmanned systems demo, Saas, Cleantech, System engineering, Industry insight 77 | #

78 | #
79 | #
80 | #
81 | #
82 | #
83 | def parse_page(root): 84 | for event in root.select('.event-list-detail'): 85 | title_line = event.select('.event__title a')[-1] 86 | title_parts = title_line.string.split(' - ', 1) 87 | if len(title_parts) == 1: 88 | title = title_parts[0] 89 | location = '' 90 | elif len(title_parts) == 2: 91 | title = title_parts[0] 92 | location = title_parts[1] 93 | try: 94 | url = event.select('.fa-external-link')[0]['title'] 95 | except IndexError: 96 | url = '' 97 | cfp_close_label = event.find(lambda elm: elm.name == 'strong' and 'CFP closes at' in elm.string) 98 | if not cfp_close_label: 99 | # No real point. 100 | continue 101 | cfp_close = dateparser.parse(cfp_close_label.parent.find_next_sibling('td').string.strip()) 102 | start_date = end_date = None 103 | dates = event.find(lambda elm: elm.name == 'strong' and 'Event Dates' in elm.string) 104 | if dates: 105 | dates = dates.next_sibling.string.strip() 106 | if dates: 107 | parsed_dates = [d for _, d in dateparser.search.search_dates(dates)] 108 | if parsed_dates: 109 | start_date = parsed_dates[0].date() 110 | end_date = parsed_dates[-1].date() 111 | tags = [t.string for t in event.select('a[href^="/events?keywords=tags"]')] 112 | yield { 113 | 'CFP URL': title_line['href'], 114 | 'Conference Name': title, 115 | 'Location': location, 116 | 'Conference URL': url, 117 | 'Conference Start Date': start_date, 118 | 'Conference End Date': end_date, 119 | 'CFP End Date': cfp_close, 120 | 'Tags': tags, 121 | } 122 | 123 | 124 | def parse_all(): 125 | count = num_pages() 126 | for n in range(count): 127 | yield from parse_page(get(n+1)) 128 | 129 | 130 | def format_all(out): 131 | writer = csv.DictWriter(out, dialect='excel-tab', fieldnames=[ 132 | 'title', 'url', 'location', 'start_date', 'end_date', 133 | 'cfp_open', 'cfp_close', 'cfp_url', 'tags', 134 | ]) 135 | writer.writeheader() 136 | for event in parse_all(): 137 | event['cfp_open'] = True 138 | writer.writerow(event) 139 | 140 | 141 | def scrape(): 142 | yield from parse_all() 143 | 144 | 145 | if __name__ == '__main__': 146 | import pprint 147 | for event in parse_all(): 148 | pprint.pprint(event) 149 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------