├── lib
    ├── __init__.py
    └── toolbox.py
├── ptt
    ├── data
    │   └── .gitkeep
    ├── requirements.txt
    └── crawler.py
├── CNAME
├── bb102
    ├── yes123
    │   ├── detail_html
    │   │   └── .gitkeep
    │   ├── list_html
    │   │   └── .gitkeep
    │   ├── job_detail.py
    │   └── job_list.py
    ├── cai_speech.txt
    └── donald_trump.txt
├── _config.yml
├── bb103
    ├── pm2.png
    ├── bb103_pythonetl_20170829.ipynb
    └── trump_interview.txt
├── utils
    ├── chromedriver
    ├── chromedriver_linux64.zip
    └── install_chromedriver.sh
├── bb105
    ├── yes123
    │   ├── yes123.py
    │   ├── requirements.txt
    │   ├── README.md
    │   └── tasks.py
    ├── ptt_crawler.py
    └── .ipynb_checkpoints
    │   └── bb105_20171225-checkpoint.ipynb
├── datasets
    ├── A_LVR_LAND_A_BUILD.CSV
    ├── eng_stop_words.txt
    └── trump_speech.txt
├── install_gcin.sh
├── README.md
├── cb101
    ├── CB101_20180607.ipynb
    ├── 時間日期轉換.ipynb
    └── flippingmed_crawler.ipynb
├── bb106
    ├── bb106_20180402.ipynb
    └── google_trends.py
└── bb104
    ├── appledaily.py
    ├── amazon.ipynb
    └── bb104_20171109.ipynb


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ptt/data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | pythonetl.ianchenhq.com


--------------------------------------------------------------------------------
/bb102/yes123/detail_html/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bb102/yes123/list_html/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-minimal


--------------------------------------------------------------------------------
/bb103/pm2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ianchen06/pythonetl/HEAD/bb103/pm2.png


--------------------------------------------------------------------------------
/utils/chromedriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ianchen06/pythonetl/HEAD/utils/chromedriver


--------------------------------------------------------------------------------
/bb105/yes123/yes123.py:
--------------------------------------------------------------------------------
1 | import tasks
2 | 
3 | [tasks.get_list.delay(page) for page in range(1,11)]
4 | 
5 | 


--------------------------------------------------------------------------------
/datasets/A_LVR_LAND_A_BUILD.CSV:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ianchen06/pythonetl/HEAD/datasets/A_LVR_LAND_A_BUILD.CSV


--------------------------------------------------------------------------------
/ptt/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2017.11.5
2 | chardet==3.0.4
3 | idna==2.6
4 | requests==2.18.4
5 | urllib3==1.22
6 | 


--------------------------------------------------------------------------------
/utils/chromedriver_linux64.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ianchen06/pythonetl/HEAD/utils/chromedriver_linux64.zip


--------------------------------------------------------------------------------
/utils/install_chromedriver.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | echo "Installing Chromedriver..."
3 | wget https://chromedriver.storage.googleapis.com/2.29/chromedriver_linux64.zip
4 | unzip chromedriver_linux64.zip
5 | sudo cp chromedriver /usr/local/bin
6 | 


--------------------------------------------------------------------------------
/bb105/yes123/requirements.txt:
--------------------------------------------------------------------------------
 1 | amqp==2.2.2
 2 | billiard==3.5.0.3
 3 | celery==4.1.0
 4 | certifi==2018.1.18
 5 | chardet==3.0.4
 6 | idna==2.6
 7 | kombu==4.1.0
 8 | pytz==2017.3
 9 | requests==2.18.4
10 | rethinkdb==2.3.0.post6
11 | urllib3==1.22
12 | vine==1.1.4
13 | 


--------------------------------------------------------------------------------
/install_gcin.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -ex
 3 | 
 4 | sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 835AB0E3
 5 | 
 6 | cat <<EOF>>/etc/apt/sources.list
 7 | deb http://hyperrate.com/gcin-ubuntu1604 eliu release
 8 | EOF
 9 | 
10 | sudo apt-get update && sudo apt-get install -y gcin
11 | 


--------------------------------------------------------------------------------
/bb102/yes123/job_detail.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | import requests as r
 4 | 
 5 | HOST = 'https://www.yes123.com.tw/admin/'
 6 | 
 7 | def get_detail():
 8 |     with open('./urls_uniq.txt') as f:
 9 |         for line in f:
10 |             URL = HOST + line.strip()
11 |             print("[INFO] crawling %s"%URL)
12 |             res = r.get(URL, headers={'User-Agent': ''})
13 |             job_id = line.split("=")[-1].strip()
14 |             print(res.status_code)
15 |             with open('./detail_html/java_job_%s.html'%job_id, 'w') as f:
16 |                 f.write(res.text)
17 | 


--------------------------------------------------------------------------------
/lib/toolbox.py:
--------------------------------------------------------------------------------
 1 | """This is a toolbox module for our crawler
 2 | 
 3 | :author: <your name>
 4 | """
 5 | def gen_header(header_str):
 6 |     """This function generates a header_dict from Chrome dev tool for requests library
 7 |     
 8 |     :param header_str: The header string copied from Chrome developer tool.
 9 |     :type header_str: str
10 |     :returns: header dictionary
11 |     """
12 |     header_dict = {}
13 |     rows = header_str.split('\n')
14 |     for row in rows:
15 |         kv_list = row.split(":") # 把每一行用: split()開
16 | 
17 |         # kv_list = ['key1', 'https', '//ianchenhq.com']
18 |         key = kv_list[0] 
19 |         val = ':'.join(kv_list[1:]) # 再用:把1到結尾的element重新組合起來 -> https://ianchenhq.com
20 |         header_dict[key] = val
21 |     return header_dict


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python ETL Jupyter Notebooks
 2 | 
 3 | ## 如何安裝環境
 4 | 
 5 | 以下是如何在Linux/Mac底下安裝Python環境
 6 | 
 7 | 參照 https://github.com/pyenv/pyenv/wiki
 8 | 
 9 | ### Linux
10 | 
11 | ```
12 | # 更新系統套件版本列表
13 | sudo apt-get update
14 | 
15 | # 安裝C語言build tools
16 | apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev
17 | ```
18 | 
19 | ### MacOS
20 | 
21 | ```
22 | # 安裝homebrew
23 | /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
24 | 
25 | # 安裝一些系統套件
26 | brew install openssl readline xz
27 | ```
28 | 
29 | ### Linux/MacOS
30 | 
31 | ```
32 | # 安裝pyenv
33 | curl -L https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer | bash
34 | ```


--------------------------------------------------------------------------------
/ptt/crawler.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import re
 3 | import os
 4 | 
 5 | import requests
 6 | 
 7 | HOST = "https://www.ptt.cc"
 8 | URL_TPL = "https://www.ptt.cc/bbs/Gossiping/index%s.html"
 9 | PG_TO_CRAWL = 5
10 | 
11 | resp = requests.get("https://www.ptt.cc/bbs/Gossiping/index.html", headers={'cookie': 'over18=1'})
12 | 
13 | total_page = int(re.findall('/bbs/Gossiping/index(\d+).html', resp.text)[-1]) + 1
14 | 
15 | for pg in range(total_page, total_page - PG_TO_CRAWL, -1):
16 |     url = URL_TPL%pg
17 |     print(url)
18 |     resp = requests.get(url, headers={'cookie': 'over18=1'})
19 |     articles = re.findall('/bbs/Gossiping/M.+\.html', resp.text)
20 |     for link in [HOST + link for link in articles]:
21 |         resp = requests.get(link,  headers={'cookie': 'over18=1'})
22 |         with open('./data/%s'%(os.path.basename(link)), 'w') as f:
23 |             f.write(resp.text)
24 | 
25 | 


--------------------------------------------------------------------------------
/bb105/yes123/README.md:
--------------------------------------------------------------------------------
 1 | # YES123 爬蟲
 2 | 
 3 | ## Dependencies
 4 | 
 5 | 1. Python 3.5+
 6 | 1. pip
 7 | 1. Docker
 8 | 
 9 | ## Getting Started
10 | 
11 | ```bash
12 | virtualenv venv
13 | source venv/bin/activate
14 | pip install -r requirements.txt
15 | 
16 | # Start Rabbitmq
17 | docker run --name some-rabbitmq -p 15672:15672 -p 5672:5672 -e RABBITMQ_USERNAME=celery -e RABBITMQ_PASSWORD=celery -d bitnami/rabbitmq:latest
18 | 
19 | # Start RethinkDB
20 | docker run --name some-rethink -p 28015:28015 -p 8080:8080 -v "$PWD:/data" -d rethinkdb:2.3.6
21 | 
22 | # Start celery worker, c is the concurrency level
23 | celery -A tasks worker -c 4 --loglevel=info
24 | 
25 | # In another terminal
26 | source venv/bin/activate
27 | 
28 | # Launch job manually
29 | python yes123.py
30 | 
31 | ```
32 | 
33 | Add the job dispatcher in crontab for automatic job dispatch
34 | ```
35 | # crontab
36 | * * * * * python yes123.py
37 | ```


--------------------------------------------------------------------------------
/cb101/CB101_20180607.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nbformat": 4,
 3 |   "nbformat_minor": 0,
 4 |   "metadata": {
 5 |     "colab": {
 6 |       "name": "Untitled0.ipynb",
 7 |       "version": "0.3.2",
 8 |       "views": {},
 9 |       "default_view": {},
10 |       "provenance": [],
11 |       "collapsed_sections": []
12 |     },
13 |     "kernelspec": {
14 |       "name": "python3",
15 |       "display_name": "Python 3"
16 |     }
17 |   },
18 |   "cells": [
19 |     {
20 |       "metadata": {
21 |         "id": "WhBDAFmmOHGq",
22 |         "colab_type": "code",
23 |         "colab": {
24 |           "autoexec": {
25 |             "startup": false,
26 |             "wait_interval": 0
27 |           }
28 |         }
29 |       },
30 |       "cell_type": "code",
31 |       "source": [
32 |         "https://github.com/ianchen06/distributed_crawler"
33 |       ],
34 |       "execution_count": 0,
35 |       "outputs": []
36 |     }
37 |   ]
38 | }


--------------------------------------------------------------------------------
/bb106/bb106_20180402.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "https://github.com/ianchen06/pythonetl/raw/master/bb106/google_trends.py"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "https://github.com/ianchen06/google_trends_crawler"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {},
20 |    "source": [
21 |     "celery\n",
22 |     "\n",
23 |     "http://www.celeryproject.org"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "markdown",
28 |    "metadata": {},
29 |    "source": [
30 |     "rabbitmq\n",
31 |     "\n",
32 |     "https://www.rabbitmq.com"
33 |    ]
34 |   }
35 |  ],
36 |  "metadata": {
37 |   "kernelspec": {
38 |    "display_name": "Python 3",
39 |    "language": "python",
40 |    "name": "python3"
41 |   },
42 |   "language_info": {
43 |    "codemirror_mode": {
44 |     "name": "ipython",
45 |     "version": 3
46 |    },
47 |    "file_extension": ".py",
48 |    "mimetype": "text/x-python",
49 |    "name": "python",
50 |    "nbconvert_exporter": "python",
51 |    "pygments_lexer": "ipython3",
52 |    "version": "3.6.4"
53 |   }
54 |  },
55 |  "nbformat": 4,
56 |  "nbformat_minor": 2
57 | }
58 | 


--------------------------------------------------------------------------------
/bb106/google_trends.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | import datetime
 4 | 
 5 | from pymongo import MongoClient
 6 | import pymongo
 7 | import requests
 8 | import json
 9 | 
10 | start_date = sys.argv[1]
11 | days = int(sys.argv[2])
12 | 
13 | conn = MongoClient()
14 | 
15 | for day in range(1,days + 1):
16 |     dstr = (datetime.datetime.strptime(start_date,'%Y%m%d') - datetime.timedelta(days=day)).strftime("%Y%m%d")
17 | 
18 |     print("[DEBUG] Requesting %s"%dstr)
19 |     url = "https://trends.google.com/trends/hottrends/hotItems"
20 |     data = {
21 |         "ajax": "1",
22 |         "pn": "p12",
23 |         "htd": dstr,
24 |         "htv": "l"
25 |     }
26 | 
27 |     headers = {
28 |         "accept": "*/*",
29 |         "accept-encoding": "gzip, deflate, br",
30 |         "accept-language": "en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7",
31 |         "cache-control": "no-cache",
32 |         "content-length": "32",
33 |         "content-type": "application/x-www-form-urlencoded;charset=UTF-8",
34 |         "origin": "https://trends.google.com",
35 |         "pragma": "no-cache",
36 |         "referer": "https://trends.google.com/trends/hottrends",
37 |         "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
38 |     }
39 | 
40 |     resp = requests.post(url, data=data, headers=headers)
41 |     data = resp.json()
42 |     print("[INFO] %s"%data.get('oldestVisibleDate'))
43 |     data['_id'] = data.get('oldestVisibleDate')
44 | 
45 |     # conn.<db_name>.<table_name>.<operation>
46 |     try:
47 |         res = conn.crawler.google_trends.insert(data)
48 |         print("[INFO] Inserted id %s"%res)
49 |     except pymongo.errors.DuplicateKeyError as e:
50 |         print(e)
51 |         print("[INFO] %s exists, skipping"%data['_id'])
52 | 


--------------------------------------------------------------------------------
/bb104/appledaily.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Apple Daily Crawler
 3 | 
 4 |     author: Ian Chen <ianchen06@gmail.com>
 5 | """
 6 | import re
 7 | import csv
 8 | 
 9 | import requests 
10 | from bs4 import BeautifulSoup
11 | 
12 | DOMAIN = "http://www.appledaily.com.tw"
13 | 
14 | f = open('./data.csv', 'w')
15 | headers = ['title', 'dt', 'content', 'view_count']
16 | writer = csv.DictWriter(f, fieldnames=headers)
17 | 
18 | writer.writeheader()
19 | 
20 | def article_crawler(url):
21 |     """
22 |     Crawls article url, and extract fields
23 | 
24 |     args:
25 |         url <str>: article url
26 | 
27 |     return:
28 |         article_dict <dict>: artilce dict with fields
29 |     """
30 |     resp = requests.get(url)
31 |     soup = BeautifulSoup(resp.text, 'html5lib')
32 | 
33 |     article = {}
34 |     article['title']   = soup.select_one('#h1').text.replace('\u3000',' ').strip()
35 |     article['dt']      = soup.select_one('div.gggs > time').text.strip()
36 |     article['content'] = soup.select_one('#summary').text.strip()
37 | 
38 |     if soup.select_one('div.urcc > a.function_icon.clicked'):
39 |         article['view_count'] = int(re.findall('\d+', soup.select_one('div.urcc > a.function_icon.clicked').text)[0])
40 |     else:
41 |         article['view_count'] = 0
42 |     writer.writerow(article)
43 |     f.flush() # Flush here so we don't loose data on exception
44 |     return article
45 | 
46 | def list_crawler(pg=5):
47 |     """
48 |     Cralers appledaiy's realtime news
49 | 
50 |     args:
51 |         pg <int>: number of pages to crawl
52 |     """
53 |     url = DOMAIN + "/realtimenews/section/new/%s"
54 | 
55 |     article_data = []
56 | 
57 |     for p in range(1,pg+1):
58 |         print("[INFO] crawling %s"%url%p)
59 |         resp = requests.get(url%p)
60 |         urls = [DOMAIN + x for x in re.findall('href="(/realtimenews/article/.*/.*/.*/.+)" target', resp.text)]
61 |         for article_url in urls:
62 |             article_data.append(article_crawler(article_url))
63 | 
64 | if __name__ == "__main__":
65 |     list_crawler(1)
66 | 
67 | 


--------------------------------------------------------------------------------
/bb105/ptt_crawler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | import re
  4 | 
  5 | import requests
  6 | 
  7 | DATA_PATH = '/tmp/ptt_data'
  8 | CRAWL_PAGE_CNT = 10
  9 | URL_TEMPLATE = "https://www.ptt.cc/bbs/Gossiping/index{}.html"
 10 | HOST = "https://www.ptt.cc"
 11 | 
 12 | 
 13 | def get_w_cookie(url):
 14 |     """GET HTTP url with custom header containing cookie info for PTT
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     url : str
 19 |         PTT八卦版需要驗證年齡的URL，
 20 |         如：https://www.ptt.cc/bbs/Gossiping/index31063.html
 21 | 
 22 |     Returns
 23 |     -------
 24 |     Response
 25 |         Requestsµ模組的Response Object
 26 |     """
 27 |     custom_headers = {
 28 |         "cookie": "over18=1;"
 29 |     }
 30 |     resp = requests.get(url, headers=custom_headers)
 31 |     return resp
 32 | 
 33 | 
 34 | def get_total_page_cnt():
 35 |     """取得現在ptt板塊的總頁數
 36 | 
 37 |     Parameters
 38 |     ----------
 39 | 
 40 |     Returns
 41 |     -------
 42 |     int
 43 |        現在ptt板塊的總頁數
 44 |     """
 45 |     url = URL_TEMPLATE.format('')
 46 |     resp = get_w_cookie(url)
 47 | 
 48 |     # 這個符號
 49 |     # ‹-> &lsaquo;
 50 |     total_page_cnt = int(re.findall('href="/bbs/Gossiping/index(\d+).html">&lsaquo; 上頁', resp.text)[0]) + 1
 51 |     return total_page_cnt
 52 | 
 53 | def get_list_page(url):
 54 |     """GET列表頁，取得內文頁的連結們
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     url : str
 59 |         PTT 列表頁URL
 60 | 
 61 |     Returns
 62 |     -------
 63 |     list
 64 |        PTT內文頁的links
 65 |     """
 66 |     resp = get_w_cookie(url)
 67 |     links = re.findall('<a href="(/bbs/Gossiping/M.+\.html)">.+</a>', resp.text)
 68 |     detail_page_links = [HOST + link for link in links]
 69 |     return detail_page_links
 70 | 
 71 | def dump_page(url):
 72 |     """GET url的HTML並且寫到檔案裡
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     url : str
 77 |         PTT 內文頁URL
 78 | 
 79 |     Returns
 80 |     -------
 81 |     str
 82 |        儲存的檔案名稱
 83 |     """
 84 |     filename = "_".join(url.split('/')[-1].split('.')[:-1]) + '.html'
 85 |     resp = get_w_cookie(url)
 86 | 
 87 |     with open(DATA_PATH + '/' + filename, 'w') as f:
 88 |         f.write(resp.text)
 89 |     return filename
 90 | 
 91 | if __name__ == "__main__":
 92 |     """
 93 |     以下的code只有被單獨跑的時候才會執行
 94 |     被import的時候不會執行
 95 |     """
 96 |     total_page_cnt = get_total_page_cnt()
 97 | 
 98 |     for pg in range(total_page_cnt, total_page_cnt - CRAWL_PAGE_CNT, -1):
 99 |         url = URL_TEMPLATE.format(pg)
100 |         for link in get_list_page(url):
101 |             print(link)
102 |             dump_page(link)
103 | 
104 | 


--------------------------------------------------------------------------------
/datasets/eng_stop_words.txt:
--------------------------------------------------------------------------------
  1 | –
  2 | !!
  3 | ?!
  4 | ??
  5 | !?
  6 | `
  7 | ``
  8 | ''
  9 | -lrb-
 10 | -rrb-
 11 | -lsb-
 12 | -rsb-
 13 | ,
 14 | .
 15 | :
 16 | ;
 17 | "
 18 | '
 19 | ?
 20 | <
 21 | >
 22 | {
 23 | }
 24 | [
 25 | ]
 26 | +
 27 | -
 28 | (
 29 | )
 30 | &
 31 | %
 32 | $
 33 | @
 34 | !
 35 | ^
 36 | #
 37 | *
 38 | ..
 39 | ...
 40 | 'll
 41 | 's
 42 | 'm
 43 | a
 44 | about
 45 | above
 46 | after
 47 | again
 48 | against
 49 | all
 50 | am
 51 | an
 52 | and
 53 | any
 54 | are
 55 | aren't
 56 | as
 57 | at
 58 | be
 59 | because
 60 | been
 61 | before
 62 | being
 63 | below
 64 | between
 65 | both
 66 | but
 67 | by
 68 | can
 69 | can't
 70 | cannot
 71 | could
 72 | couldn't
 73 | did
 74 | didn't
 75 | do
 76 | does
 77 | doesn't
 78 | doing
 79 | don't
 80 | down
 81 | during
 82 | each
 83 | few
 84 | for
 85 | from
 86 | further
 87 | had
 88 | hadn't
 89 | has
 90 | hasn't
 91 | have
 92 | haven't
 93 | having
 94 | he
 95 | he'd
 96 | he'll
 97 | he's
 98 | her
 99 | here
100 | here's
101 | hers
102 | herself
103 | him
104 | himself
105 | his
106 | how
107 | how's
108 | i
109 | i'd
110 | i'll
111 | i'm
112 | i've
113 | if
114 | in
115 | into
116 | is
117 | isn't
118 | it
119 | it's
120 | its
121 | itself
122 | let's
123 | me
124 | more
125 | most
126 | mustn't
127 | my
128 | myself
129 | no
130 | nor
131 | not
132 | of
133 | off
134 | on
135 | once
136 | only
137 | or
138 | other
139 | ought
140 | our
141 | ours 
142 | ourselves
143 | out
144 | over
145 | own
146 | same
147 | shan't
148 | she
149 | she'd
150 | she'll
151 | she's
152 | should
153 | shouldn't
154 | so
155 | some
156 | such
157 | than
158 | that
159 | that's
160 | the
161 | their
162 | theirs
163 | them
164 | themselves
165 | then
166 | there
167 | there's
168 | these
169 | they
170 | they'd
171 | they'll
172 | they're
173 | they've
174 | this
175 | those
176 | through
177 | to
178 | too
179 | under
180 | until
181 | up
182 | very
183 | was
184 | wasn't
185 | we
186 | we'd
187 | we'll
188 | we're
189 | we've
190 | were
191 | weren't
192 | what
193 | what's
194 | when
195 | when's
196 | where
197 | where's
198 | which
199 | while
200 | who
201 | who's
202 | whom
203 | why
204 | why's
205 | with
206 | won't
207 | would
208 | wouldn't
209 | you
210 | you'd
211 | you'll
212 | you're
213 | you've
214 | your
215 | yours
216 | yourself
217 | yourselves
218 | ###
219 | return
220 | arent
221 | cant
222 | couldnt
223 | didnt
224 | doesnt
225 | dont
226 | hadnt
227 | hasnt
228 | havent
229 | hes
230 | heres
231 | hows
232 | im
233 | isnt
234 | its
235 | lets
236 | mustnt
237 | shant
238 | shes
239 | shouldnt
240 | thats
241 | theres
242 | theyll
243 | theyre
244 | theyve
245 | wasnt
246 | were
247 | werent
248 | whats
249 | whens
250 | wheres
251 | whos
252 | whys
253 | wont
254 | wouldnt
255 | youd
256 | youll
257 | youre
258 | youve
259 | 


--------------------------------------------------------------------------------
/bb102/cai_speech.txt:
--------------------------------------------------------------------------------
  1 | 各位友邦的元首與貴賓、各國駐台使節及代表、現場的好朋友，全體國人同胞，大家好
  2 | 
  3 | 感謝與承擔
  4 | 
  5 | 就在剛剛，我和陳建仁已經在總統府裡面，正式宣誓就任中華民國第十四任總統與副總統。我們要感謝這塊土地對我們的栽培，感謝人民對我們的信任，以及，最重要的，感謝這個國家的民主機制，讓我們透過和平的選舉過程，實現第三次政黨輪替，並且克服種種不確定因素，順利渡過長達四個月的交接期，完成政權和平移轉。
  6 | 
  7 | 台灣，再一次用行動告訴世界，作為一群民主人與自由人，我們有堅定的信念，去捍衛民主自由的生活方式。這段旅程，我們每一個人都參與其中。親愛的台灣人民，我們做到了。
  8 | 
  9 | 我要告訴大家，對於一月十六日的選舉結果，我從來沒有其他的解讀方式。人民選擇了新總統、新政府，所期待的就是四個字：解決問題。此時此刻，台灣的處境很困難，迫切需要執政者義無反顧的承擔。這一點，我不會忘記。
 10 | 
 11 | 我也要告訴大家，眼前的種種難關，需要我們誠實面對，需要我們共同承擔。所以，這個演說是一個邀請，我要邀請全體國人同胞一起來，扛起這個國家的未來。
 12 | 
 13 | 國家不會因為領導人而偉大；全體國民的共同奮鬥，才讓這個國家偉大。總統該團結的不只是支持者，總統該團結的是整個國家。團結是為了改變，這是我對這個國家最深切的期待。在這裡，我要誠懇地呼籲，請給這個國家一個機會，讓我們拋下成見，拋下過去的對立，我們一起來完成新時代交給我們的使命。
 14 | 
 15 | 在我們共同奮鬥的過程中，身為總統，我要向全國人民宣示，未來我和新政府，將領導這個國家的改革，展現決心，絕不退縮。
 16 | 
 17 | 為年輕人打造一個更好的國家
 18 | 
 19 | 未來的路並不好走，台灣需要一個正面迎向一切挑戰的新政府，我的責任就是領導這個新政府。
 20 | 
 21 | 我們的年金制度，如果不改，就會破產。
 22 | 我們僵化的教育制度，已經逐漸與社會脈動脫節。
 23 | 我們的能源與資源十分有限，我們的經濟缺乏動能，舊的代工模式已經面臨瓶頸，整個國家極需要新的經濟發展模式。
 24 | 我們的人口結構急速老化，長照體系卻尚未健全。
 25 | 我們的人口出生率持續低落，完善的托育制度卻始終遙遙無期。
 26 | 我們環境汙染問題仍然嚴重。
 27 | 我們國家的財政並不樂觀。
 28 | 我們的司法已經失去人民的信任。
 29 | 我們的食品安全問題，困擾著所有家庭。
 30 | 我們的貧富差距越來越嚴重。
 31 | 我們的社會安全網還是有很多破洞。
 32 | 最重要的，我要特別強調，我們的年輕人處於低薪的處境，他們的人生，動彈不得，對於未來，充滿無奈與茫然。
 33 | 
 34 | 年輕人的未來是政府的責任。如果不友善的結構沒有改變，再多個人菁英的出現，都不足以讓整體年輕人的處境變好。我期許自己，在未來的任期之內，要一步一步，從根本的結構來解決這個國家的問題。
 35 | 
 36 | 這就是我想為台灣的年輕人做的事。雖然我沒有辦法立刻幫所有的年輕人加薪，但是我願意承諾，新政府會立刻展開行動。請給我們一點時間，也請跟我們一起走上改革的這一條路。
 37 | 
 38 | 改變年輕人的處境，就是改變國家的處境。一個國家的年輕人沒有未來，這個國家必定沒有未來。幫助年輕人突破困境，實現世代正義，把一個更好的國家交到下一代手上，就是新政府重大的責任。
 39 | 
 40 | 第一、經濟結構的轉型
 41 | 
 42 | 要打造一個更好的國家，未來，新政府要做到以下幾件事情。
 43 | 
 44 | 首先，就是讓台灣的經濟結構轉型。這是新政府所必須承擔的最艱鉅使命。我們不要妄自菲薄，更不要失去信心。台灣有很多別的國家沒有的優勢，我們有海洋經濟的活力和靭性，高素質的人力資源、務實可靠的工程師文化、完整的產業鏈、敏捷靈活的中小企業，以及，永不屈服的創業精神。 
 45 | 
 46 | 我們要讓台灣經濟脫胎換骨，就必須從現在起就下定決心，勇敢地走出另外一條路。這一條路，就是打造台灣經濟發展的新模式。
 47 | 
 48 | 新政府將打造一個以創新、就業、分配為核心價值，追求永續發展的新經濟模式。改革的第一步，就是強化經濟的活力與自主性，加強和全球及區域的連結，積極參與多邊及雙邊經濟合作及自由貿易談判，包括TPP、RCEP等，並且，推動新南向政策，提升對外經濟的格局及多元性，告別以往過於依賴單一市場的現象。 
 49 | 
 50 | 除此之外，新政府相信，唯有激發新的成長動能，我們才能突破當前經濟的停滯不前。我們會以出口和內需作為雙引擎，讓企業生產和人民生活互為表裡，讓對外貿易和在地經濟緊密連結。
 51 | 
 52 | 我們會優先推動五大創新研發計畫，藉著這些產業來重新塑造台灣的全球競爭力。我們也要積極提升勞動生產力，保障勞工權益，讓薪資和經濟成長能同步提升。
 53 | 
 54 | 這是台灣經濟發展的關鍵時刻。我們有決心，也有溝通能力。我們已經有系統性的規劃，未來，會以跨部會聯手的模式，把整個國家的力量集結起來，一起來催生這個新模式。
 55 | 
 56 | 在經濟發展的同時，我們不要忘記對環境的責任。經濟發展的新模式會和國土規劃、區域發展及環境永續，相互結合。產業的佈局和國土的利用，應該拋棄零碎的規畫，和短視近利的眼光。我們必須追求區域的均衡發展，這需要中央來規畫、整合，也需要地方政府充分發揮區域聯合治理的精神。 
 57 | 
 58 | 我們也不能再像過去，無止盡地揮霍自然資源及國民健康。所以，對各種汙染的控制，我們會嚴格把關，更要讓台灣走向循環經濟的時代，把廢棄物轉換為再生資源。對於能源的選擇，我們會以永續的觀念去逐步調整。新政府會嚴肅看待氣候變遷、國土保育、災害防治的相關議題，因為，我們只有一個地球，我們也只有一個台灣。
 59 | 
 60 | 第二、強化社會安全網
 61 | 
 62 | 新政府必須要承擔的第二件事情，就是強化台灣的社會安全網。這些年，幾件關於兒少安全及隨機殺人的事件，都讓整個社會震驚。不過，一個政府不能永遠在震驚，它必須要有同理心。沒有人可以替受害者家屬承受傷痛，但是，一個政府，尤其是第一線處理問題的人，必須要讓受害者以及家屬覺得，不幸事件發生的時候，政府是站在他們這一邊。
 63 | 
 64 | 除了同理心之外，政府更應該要提出解決的方法。全力防止悲劇一再發生，從治安、教育、心理健康、社會工作等各個面向，積極把破洞補起來。尤其是治安與反毒的工作，這些事情，新政府會用最嚴肅的態度和行動來面對。 
 65 | 
 66 | 在年金的改革方面，這是攸關台灣生存發展的關鍵改革，我們不應該遲疑，也不可以躁進。由陳建仁副總統擔任召集人的年金改革委員會，已經緊鑼密鼓在籌備之中。過去的政府在這個議題上，曾經有過一些努力。但是，缺乏社會的參與。新政府的做法，是發動一個集體協商，因為年金改革必須是一個透過協商來團結所有人的過程。
 67 | 
 68 | 這就是為什麼，我們要召開年金改革國是會議，由不同階層、不同職業代表，在社會團結的基礎上，共同協商。一年之內，我們會提出可行的改革方案。無論是勞工還是公務員，每一個國民的退休生活都應該得到公平的保障。
 69 | 
 70 | 另外，在長期照顧的議題上，我們將會把優質、平價、普及的長期照顧系統建立起來。和年金改革一樣，長照體系也是一個社會總動員的過程。新政府的做法是由政府主導和規劃，鼓勵民間發揮社區主義的精神，透過社會集體互助的力量，來建立一套妥善而完整的體系。每一個老年人都可以在自己熟悉的社區，安心享受老年生活，每一個家庭的照顧壓力將會減輕。照顧老人的工作不能完全讓它變成自由市場。我們會把責任扛起來，按部就班來規劃與執行，為超高齡社會的來臨，做好準備。 
 71 | 
 72 | 第三、社會的公平與正義
 73 | 
 74 | 新政府要承擔的第三件事情，就是社會的公平與正義。在這個議題上，新政府會持續和公民社會一起合作，讓台灣的政策更符合多元、平等、開放、透明、人權的價值，讓台灣的民主機制更加深化與進化。
 75 | 
 76 | 新的民主制度要能夠上路，我們必須先找出面對過去的共同方法。未來，我會在總統府成立真相與和解委員會，用最誠懇與謹慎的態度，來處理過去的歷史。追求轉型正義的目標是在追求社會的真正和解，讓所有台灣人都記取那個時代的錯誤。
 77 | 
 78 | 我們將從真相的調查與整理出發，預計在三年之內，完成台灣自己的轉型正義調查報告書。我們將會依據調查報告所揭示的真相，來進行後續的轉型正義工作。挖掘真相、彌平傷痕、釐清責任。從此以後，過去的歷史不再是台灣分裂的原因，而是台灣一起往前走的動力。
 79 | 
 80 | 同樣在公平正義的議題上，我會秉持相同的原則，來面對原住民族的議題。今天的就職典禮，原住民族的小朋友在唱國歌之前，先唱了他們部落傳統的古調。這象徵了，我們不敢忘記，這個島上先來後到的順序。 
 81 | 
 82 | 新政府會用道歉的態度，來面對原住民族相關議題，重建原民史觀，逐步推動自治，復育語言文化，提升生活照顧，這就是我要領導新政府推動的改變。
 83 | 
 84 | 接下來，新政府也會積極推動司法改革。這是現階段台灣人民最關心的議題。司法無法親近人民、不被人民信任、司法無法有效打擊犯罪，以及，司法失去作為正義最後一道防線的功能，是人民普遍的感受。
 85 | 
 86 | 為了展現新政府的決心，我們會在今年十月召開司法國是會議，透過人民實際的參與，讓社會力進來，一起推動司法改革。司法必須回應人民的需求，不再只是法律人的司法，而是全民的司法。司法改革也不只是司法人的家務事，而是全民參與的改革。這就是我對司法改革的期待。
 87 | 
 88 | 第四、區域的和平穩定發展及兩岸關係
 89 | 
 90 | 新政府要承擔的第四件事情，是區域的和平穩定與發展，以及妥善處理兩岸關係。過去三十年，無論是對亞洲或是全球，都是變動最劇烈的時期；而全球及區域的經濟穩定和集體安全，也是各國政府越來越關切的課題。
 91 | 
 92 | 台灣在區域發展當中，一直是不可或缺的關鍵角色。但是近年來，區域的情勢快速變動，如果台灣不善用自己的實力和籌碼，積極參與區域事務，不但將會變得無足輕重，甚至可能被邊緣化，喪失對於未來的自主權。
 93 | 
 94 | 我們有危機，但也有轉機。台灣現階段的經濟發展，和區域中許多國家高度關聯和互補。如果將打造經濟發展新模式的努力，透過和亞洲、乃至亞太區域的國家合作，共同形塑未來的發展策略，不但可以為區域的經濟創新、結構調整和永續發展，做出積極的貢獻，更可以和區域內的成員，建立緊密的「經濟共同體」意識。
 95 | 
 96 | 我們要和其他國家共享資源、人才與市場，擴大經濟規模，讓資源有效利用。「新南向政策」就是基於這樣的精神。我們會在科技、文化與經貿等各層面，和區域成員廣泛交流合作，尤其是增進與東協、印度的多元關係。為此，我們也願意和對岸，就共同參與區域發展的相關議題，坦誠交換意見，尋求各種合作與協力的可能性。
 97 | 
 98 | 在積極發展經濟的同時，亞太地區的安全情勢也變得越來越複雜，而兩岸關係，也成為建構區域和平與集體安全的重要一環。這個建構的進程，台灣會做一個「和平的堅定維護者」，積極參與，絕不缺席；我們也將致力維持兩岸關係的和平穩定；我們更會努力促成內部和解，強化民主機制，凝聚共識，形成一致對外的立場。
 99 | 
100 | 對話和溝通，是我們達成目標最重要的關鍵。台灣也要成為一個「和平的積極溝通者」，我們將和相關的各方，建立常態、緊密的溝通機制，隨時交換意見，防止誤判，建立互信，有效解決爭議。我們將謹守和平原則、利益共享原則，來處理相關的爭議。
101 | 
102 | 我依照中華民國憲法當選總統，我有責任捍衛中華民國的主權和領土；對於東海及南海問題，我們主張應擱置爭議，共同開發。
103 | 
104 | 兩岸之間的對話與溝通，我們也將努力維持現有的機制。1992年兩岸兩會秉持相互諒解、求同存異的政治思維，進行溝通協商，達成若干的共同認知與諒解，我尊重這個歷史事實。92年之後，20多年來雙方交流、協商所累積形成的現狀與成果，兩岸都應該共同珍惜與維護，並在這個既有的事實與政治基礎上，持續推動兩岸關係和平穩定發展；新政府會依據中華民國憲法、兩岸人民關係條例及其他相關法律，處理兩岸事務。兩岸的兩個執政黨應該要放下歷史包袱，展開良性對話，造福兩岸人民。
105 | 
106 | 我所講的既有政治基礎，包含幾個關鍵元素，第一，1992年兩岸兩會會談的歷史事實與求同存異的共同認知，這是歷史事實；第二，中華民國現行憲政體制；第三，兩岸過去20多年來協商和交流互動的成果；第四，台灣民主原則及普遍民意。
107 | 
108 | 第五、外交與全球性議題
109 | 
110 | 新政府要承擔的第五件事情，是善盡地球公民的責任，在外交與全球性的議題上做出貢獻。讓台灣走向世界，也要讓世界走進台灣。
111 | 
112 | 現場有許多來自各國的元首與使節團，我要特別謝謝他們，長久以來一直幫助台灣，讓我們有機會參與國際社會。未來，我們會持續透過官方互動、企業投資與民間合作各種方式，分享台灣發展的經驗，與友邦建立永續的夥伴關係。
113 | 
114 | 台灣是全球公民社會的模範生，民主化以來，我們始終堅持和平、自由、民主及人權的普世價值。我們會秉持這個精神，加入全球議題的價值同盟。我們會繼續深化與包括美國、日本、歐洲在內的友好民主國家的關係，在共同的價值基礎上，推動全方位的合作。
115 | 
116 | 我們會積極參與國際經貿合作及規則制定，堅定維護全球的經濟秩序，並且融入重要的區域經貿體系。我們也不會在防制全球暖化、氣候變遷的議題上缺席。我們將會在行政院設立專責的能源和減碳辦公室，並且根據COP21巴黎協議的規定，定期檢討溫室氣體的減量目標，與友好國家攜手，共同維護永續的地球。
117 | 
118 | 同時，新政府會支持並參與，全球性新興議題的國際合作，包括人道救援、醫療援助、疾病的防治與研究、反恐合作，以及共同打擊跨國犯罪，讓台灣成為國際社會不可或缺的夥伴。
119 | 
120 | 結語
121 | 
122 | 1996年台灣第一次總統直選，到今天剛好20年。過去20年，在幾任政府以及公民社會的努力之下，我們成功渡過了許多新興民主國家必須面對的難關。在這個過程中，我們曾經有過許多感動人心的時刻和故事，不過，正如同世界上其他國家一樣，我們也曾經有過焦慮、不安、矛盾、與對立。
123 | 
124 | 我們看到了社會的對立，進步與保守的對立，環境與開發的對立，以及，政治意識之間的對立。這些對立，曾經激發出選舉時的動員能量，不過也因為這些對立，我們的民主逐漸失去了解決問題的能力。
125 | 
126 | 民主是一個進程，每一個時代的政治工作者，都要清楚認識他身上所肩負的責任。民主會前進，民主也有可能倒退。今天，我站在這裡，就是要告訴大家，倒退不會是我們的選項。新政府的責任就是把台灣的民主推向下一個階段：以前的民主是選舉的輸贏，現在的民主則是關於人民的幸福；以前的民主是兩個價值觀的對決，現在的民主則是不同價值觀的對話。
127 | 
128 | 打造一個沒有被意識形態綁架的「團結的民主」，打造一個可以回應社會與經濟問題的「有效率的民主」，打造一個能夠實質照料人民的「務實的民主」，這就是新時代的意義。
129 | 
130 | 只要我們相信，新時代就會來臨。只要這個國家的主人，有堅定的信念，新時代一定會在我們這一代人的手上誕生。
131 | 
132 | 各位親愛的台灣人民，演講要結束了，改革要開始了。從這一刻起，這個國家的擔子交在新政府身上。我會讓大家看見這個國家的改變。
133 | 
134 | 歷史會記得我們這個勇敢的世代，這個國家的繁榮、尊嚴、團結、自信和公義，都有我們努力的痕跡。歷史會記得我們的勇敢，我們在2016年一起把國家帶向新的方向。這塊土地上的每一個人，都因為參與台灣的改變，而感到驕傲。
135 | 
136 | 剛才表演節目中的一首歌曲當中，有一句讓我很感動的歌詞：
137 | （台語）現在是彼一天，勇敢ㄟ台灣人。
138 | 
139 | 各位國人同胞，兩千三百萬的台灣人民，等待已經結束，現在就是那一天。今天，明天，未來的每一天，我們都要做一個守護民主、守護自由、守護這個國家的台灣人。
140 | 
141 | 謝謝大家。
142 | 


--------------------------------------------------------------------------------
/bb104/amazon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 27,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "'\\n<!doctype html><html class=\"a-no-js\" data-19ax5a9jf=\"dingo\"><head><script>var aPageStart = (new Date()).getTime();</script><meta charset=\"utf-8\">\\n<script type=\\'text/javascript\\'>var ue_t0=ue_t0||+new Date();</script>\\n<script type=\\'text/javascript\\'>\\nvar ue_csm = window,\\n    ue_hob = +new Date();\\n(function(d){var e=d.ue=d.ue||{},f=Date.now||function(){return+new Date};e.d=function(b){return f()-(b?0:d.ue_t0)};e.stub=function(b,a){if(!b[a]){var c=[];b[a]=function(){c.push([c.slice.call(arguments),e.d(),d.ue_id])};b[a].replay=function(b){for(var a;a=c.shift();)b(a[0],a[1],a[2])};b[a].isStub=1}};e.exec=function(b,a){return function(){if(1==window.ueinit)try{return b.apply(this,arguments)}catch(c){ueLogError(c,{attribution:a||\"undefined\",logLevel:\"WARN\"})}}}})(ue_csm);\\n\\n\\n    var ue_err_chan = \\'jserr-rw\\';\\n(function(d,e){function h(f,b){if(!(a.ec>a.mxe)&&f){a.ter.push(f);b=b||{};var c=f.logLevel||b.logLevel;c&&c!==k&&c!==m&&c!==n&&c!==p||a.ec++;c&&c!=k||a.ecf++;b.pageURL=\"\"+(e.location?e.locat'"
 12 |       ]
 13 |      },
 14 |      "execution_count": 27,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "import requests\n",
 21 |     "\n",
 22 |     "URL = \"https://www.amazon.com/s/?page=3&keywords=nintendo+switch\"\n",
 23 |     "\n",
 24 |     "headers = {\n",
 25 |     "    \"User-Agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36\"\n",
 26 |     "}\n",
 27 |     "\n",
 28 |     "resp = requests.get(URL, headers=headers)\n",
 29 |     "\n",
 30 |     "resp.text[:1000]"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 9,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "from bs4 import BeautifulSoup\n",
 40 |     "\n",
 41 |     "soup = BeautifulSoup(resp.text, 'html5lib')"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "data": {
 51 |       "text/plain": [
 52 |        "True"
 53 |       ]
 54 |      },
 55 |      "execution_count": 2,
 56 |      "metadata": {},
 57 |      "output_type": "execute_result"
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "# 測試字串是否為xxx開頭\n",
 62 |     "'https://google.com'.startswith(\"https\")"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 3,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "True"
 74 |       ]
 75 |      },
 76 |      "execution_count": 3,
 77 |      "metadata": {},
 78 |      "output_type": "execute_result"
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "# 測試字串內是否含有某字串\n",
 83 |     "'google' in 'https://google.com'"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 17,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "urls = [x.get('href') for x in soup.select('a.s-access-detail-page') if x.get('href').startswith('https')]"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 18,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "text/plain": [
103 |        "'https://www.amazon.com/FIFA-18-Standard-Nintendo-Switch/dp/B01N1034ZH/ref=sr_1_35/130-5480931-2447335?ie=UTF8&qid=1509586874&sr=8-35&keywords=nintendo+switch'"
104 |       ]
105 |      },
106 |      "execution_count": 18,
107 |      "metadata": {},
108 |      "output_type": "execute_result"
109 |     }
110 |    ],
111 |    "source": [
112 |     "urls[0]"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 23,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "import re\n",
122 |     "\n",
123 |     "detail_urls = []\n",
124 |     "\n",
125 |     "for url in urls:\n",
126 |     "    detail_urls.append(re.findall('(https://www.amazon.com/.+/dp/.+)/ref=', url)[0])"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 24,
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "['https://www.amazon.com/FIFA-18-Standard-Nintendo-Switch/dp/B01N1034ZH',\n",
138 |        " 'https://www.amazon.com/Nintendo-Switch-Dock-Set/dp/B06ZZ6FGGL',\n",
139 |        " 'https://www.amazon.com/Yoshi-Nintendo-Switch/dp/B072JYTJCH',\n",
140 |        " 'https://www.amazon.com/Nintendo-Joy-L-Neon-Yellow-switch/dp/B06ZYM3LBP',\n",
141 |        " 'https://www.amazon.com/Sonic-Forces-Standard-Nintendo-Switch/dp/B07576J1H6',\n",
142 |        " 'https://www.amazon.com/Mumba-Nintendo-Switch-Heavy-Rubberized-release/dp/B07125JNMY',\n",
143 |        " 'https://www.amazon.com/Nyko-Portable-Docking-Kit-Nintendo-Switch/dp/B071X7C1B4',\n",
144 |        " 'https://www.amazon.com/Nintendo-Joy-Neon-Red-Switch/dp/B01MT8RT5I',\n",
145 |        " 'https://www.amazon.com/Nintendo-JETech-Protective-Shock-Absorption-Anti-Scratch/dp/B07457V4XJ',\n",
146 |        " 'https://www.amazon.com/Premium-Quality-Protective-Portable-Nintendo-Accessories/dp/B071NFKBP7',\n",
147 |        " 'https://www.amazon.com/FastSnail-Nintendo-Switch-Wear-resistant-Handle/dp/B06XZ1PCKK',\n",
148 |        " 'https://www.amazon.com/Rayman-Legends-Definitive-Nintendo-Switch/dp/B0744FX7SR',\n",
149 |        " 'https://www.amazon.com/Nintendo-Switch-Premium-Travel-Console-500-035/dp/B01MS7AI9G',\n",
150 |        " 'https://www.amazon.com/Super-Mario-Odyssey-Accessory-Officially-Licensed/dp/B01A8LQGCQ']"
151 |       ]
152 |      },
153 |      "execution_count": 24,
154 |      "metadata": {},
155 |      "output_type": "execute_result"
156 |     }
157 |    ],
158 |    "source": [
159 |     "detail_urls"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 26,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "headers = {\n",
169 |     "    \"User-Agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36\"\n",
170 |     "}\n",
171 |     "for url in detail_urls:\n",
172 |     "    resp = requests.get(url, headers=headers)\n",
173 |     "    filename = url.split('/')[-1]\n",
174 |     "    with open('./%s.html'%filename, 'w') as f:\n",
175 |     "        f.write(resp.text)"
176 |    ]
177 |   }
178 |  ],
179 |  "metadata": {
180 |   "kernelspec": {
181 |    "display_name": "Python 3",
182 |    "language": "python",
183 |    "name": "python3"
184 |   },
185 |   "language_info": {
186 |    "codemirror_mode": {
187 |     "name": "ipython",
188 |     "version": 3
189 |    },
190 |    "file_extension": ".py",
191 |    "mimetype": "text/x-python",
192 |    "name": "python",
193 |    "nbconvert_exporter": "python",
194 |    "pygments_lexer": "ipython3",
195 |    "version": "3.6.2"
196 |   }
197 |  },
198 |  "nbformat": 4,
199 |  "nbformat_minor": 2
200 | }
201 | 


--------------------------------------------------------------------------------
/bb102/yes123/job_list.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | import os
  4 | 
  5 | import requests as r
  6 | 
  7 | import job_detail
  8 | 
  9 | URL = "https://www.yes123.com.tw/admin/job_refer_list.asp"
 10 | 
 11 | data = {'_mu_chkbox_2': '',
 12 |  '_mu_chkbox_3': '',
 13 |  '_mu_edu_1': '',
 14 |  '_mu_edu_2': '',
 15 |  '_mu_edu_3': '',
 16 |  '_mu_edu_4': '',
 17 |  '_mu_edu_5': '',
 18 |  '_mu_edu_6': '',
 19 |  '_mu_edu_7': '',
 20 |  '_mu_job_1': '',
 21 |  '_mu_lang_1': '',
 22 |  '_mu_lang_2': '',
 23 |  '_mu_lang_3': '',
 24 |  '_mu_psn_1': '',
 25 |  '_mu_sc_1': '',
 26 |  '_mu_sc_2': '',
 27 |  '_mu_se_1': '',
 28 |  '_mu_sf_1': '',
 29 |  '_mu_vc_1': '',
 30 |  '_mu_vc_2': '',
 31 |  '_mu_vc_3': '',
 32 |  '_mu_vc_4': '',
 33 |  '_mu_wk_1': '',
 34 |  '_mu_wk_2': '',
 35 |  '_mu_wk_3': '',
 36 |  '_mu_wk_4': '',
 37 |  '_mu_wk_5': '',
 38 |  '_mu_year_1': '',
 39 |  '_mu_year_2': '',
 40 |  '_rdo_1': '1',
 41 |  '_rdo_2': '1',
 42 |  'find_cert_mode1': '',
 43 |  'find_cert_mode2': '',
 44 |  'find_cert_mode3': '',
 45 |  'find_cert_mode4': '',
 46 |  'find_cert_mode5': '',
 47 |  'find_indy_mode1': '',
 48 |  'find_indy_mode10': '',
 49 |  'find_indy_mode2': '',
 50 |  'find_indy_mode3': '',
 51 |  'find_indy_mode4': '',
 52 |  'find_indy_mode5': '',
 53 |  'find_indy_mode6': '',
 54 |  'find_indy_mode7': '',
 55 |  'find_indy_mode8': '',
 56 |  'find_indy_mode9': '',
 57 |  'find_job_mode1': '',
 58 |  'find_job_mode10': '',
 59 |  'find_job_mode2': '',
 60 |  'find_job_mode3': '',
 61 |  'find_job_mode4': '',
 62 |  'find_job_mode5': '',
 63 |  'find_job_mode6': '',
 64 |  'find_job_mode7': '',
 65 |  'find_job_mode8': '',
 66 |  'find_job_mode9': '',
 67 |  'find_key1': 'java',
 68 |  'find_key2': '',
 69 |  'find_key3': '',
 70 |  'find_map_mode1': '',
 71 |  'find_map_mode2': '',
 72 |  'find_map_mode3': '',
 73 |  'find_map_mode4': '',
 74 |  'find_metro_mode1': '',
 75 |  'find_metro_mode10': '',
 76 |  'find_metro_mode2': '',
 77 |  'find_metro_mode3': '',
 78 |  'find_metro_mode4': '',
 79 |  'find_metro_mode5': '',
 80 |  'find_metro_mode6': '',
 81 |  'find_metro_mode7': '',
 82 |  'find_metro_mode8': '',
 83 |  'find_metro_mode9': '',
 84 |  'find_sche_mode1': '',
 85 |  'find_sche_mode2': '',
 86 |  'find_sche_mode3': '',
 87 |  'find_sche_mode4': '',
 88 |  'find_sche_mode5': '',
 89 |  'find_scl_mode1': '',
 90 |  'find_scl_mode10': '',
 91 |  'find_scl_mode2': '',
 92 |  'find_scl_mode3': '',
 93 |  'find_scl_mode4': '',
 94 |  'find_scl_mode5': '',
 95 |  'find_scl_mode6': '',
 96 |  'find_scl_mode7': '',
 97 |  'find_scl_mode8': '',
 98 |  'find_scl_mode9': '',
 99 |  'find_se_work_mode1': '',
100 |  'find_sf_subj_mode1': '',
101 |  'find_subj_mode1': '',
102 |  'find_subj_mode2': '',
103 |  'find_subj_mode3': '',
104 |  'find_sw_mode1': '',
105 |  'find_sw_mode2': '',
106 |  'find_sw_mode3': '',
107 |  'find_sw_mode4': '',
108 |  'find_sw_mode5': '',
109 |  'find_work_mode1': '',
110 |  'find_work_mode10': '',
111 |  'find_work_mode2': '',
112 |  'find_work_mode3': '',
113 |  'find_work_mode4': '',
114 |  'find_work_mode5': '',
115 |  'find_work_mode6': '',
116 |  'find_work_mode7': '',
117 |  'find_work_mode8': '',
118 |  'find_work_mode9': '',
119 |  'find_zone_mode1': '',
120 |  'find_zone_mode10': '',
121 |  'find_zone_mode2': '',
122 |  'find_zone_mode3': '',
123 |  'find_zone_mode4': '',
124 |  'find_zone_mode5': '',
125 |  'find_zone_mode6': '',
126 |  'find_zone_mode7': '',
127 |  'find_zone_mode8': '',
128 |  'find_zone_mode9': '',
129 |  'job_show_type': 'L',
130 |  'order_ascend': 'desc',
131 |  'order_by': 'neworder',
132 |  's_find_cert_mode1': '',
133 |  's_find_cert_mode2': '',
134 |  's_find_cert_mode3': '',
135 |  's_find_cert_mode4': '',
136 |  's_find_cert_mode5': '',
137 |  's_find_indy_mode1': '',
138 |  's_find_indy_mode10': '',
139 |  's_find_indy_mode2': '',
140 |  's_find_indy_mode3': '',
141 |  's_find_indy_mode4': '',
142 |  's_find_indy_mode5': '',
143 |  's_find_indy_mode6': '',
144 |  's_find_indy_mode7': '',
145 |  's_find_indy_mode8': '',
146 |  's_find_indy_mode9': '',
147 |  's_find_job_mode1': '',
148 |  's_find_job_mode10': '',
149 |  's_find_job_mode2': '',
150 |  's_find_job_mode3': '',
151 |  's_find_job_mode4': '',
152 |  's_find_job_mode5': '',
153 |  's_find_job_mode6': '',
154 |  's_find_job_mode7': '',
155 |  's_find_job_mode8': '',
156 |  's_find_job_mode9': '',
157 |  's_find_metro_mode1': '',
158 |  's_find_metro_mode10': '',
159 |  's_find_metro_mode2': '',
160 |  's_find_metro_mode3': '',
161 |  's_find_metro_mode4': '',
162 |  's_find_metro_mode5': '',
163 |  's_find_metro_mode6': '',
164 |  's_find_metro_mode7': '',
165 |  's_find_metro_mode8': '',
166 |  's_find_metro_mode9': '',
167 |  's_find_sche_mode1': '',
168 |  's_find_sche_mode2': '',
169 |  's_find_sche_mode3': '',
170 |  's_find_sche_mode4': '',
171 |  's_find_sche_mode5': '',
172 |  's_find_scl_mode1': '',
173 |  's_find_scl_mode10': '',
174 |  's_find_scl_mode2': '',
175 |  's_find_scl_mode3': '',
176 |  's_find_scl_mode4': '',
177 |  's_find_scl_mode5': '',
178 |  's_find_scl_mode6': '',
179 |  's_find_scl_mode7': '',
180 |  's_find_scl_mode8': '',
181 |  's_find_scl_mode9': '',
182 |  's_find_se_work_mode1': '',
183 |  's_find_sf_subj_mode1': '',
184 |  's_find_subj_mode1': '',
185 |  's_find_subj_mode2': '',
186 |  's_find_subj_mode3': '',
187 |  's_find_sw_mode1': '',
188 |  's_find_sw_mode2': '',
189 |  's_find_sw_mode3': '',
190 |  's_find_sw_mode4': '',
191 |  's_find_sw_mode5': '',
192 |  's_find_work_mode1': '',
193 |  's_find_work_mode10': '',
194 |  's_find_work_mode2': '',
195 |  's_find_work_mode3': '',
196 |  's_find_work_mode4': '',
197 |  's_find_work_mode5': '',
198 |  's_find_work_mode6': '',
199 |  's_find_work_mode7': '',
200 |  's_find_work_mode8': '',
201 |  's_find_work_mode9': '',
202 |  's_find_zone_mode1': '',
203 |  's_find_zone_mode10': '',
204 |  's_find_zone_mode2': '',
205 |  's_find_zone_mode3': '',
206 |  's_find_zone_mode4': '',
207 |  's_find_zone_mode5': '',
208 |  's_find_zone_mode6': '',
209 |  's_find_zone_mode7': '',
210 |  's_find_zone_mode8': '',
211 |  's_find_zone_mode9': '',
212 |  'search_feature': '',
213 |  'search_from': 'joblist',
214 |  'search_item': '1',
215 |  'search_job2': '請選擇行業',
216 |  'search_key_word': 'java',
217 |  'search_multi_loc': '請選擇地區',
218 |  'search_multi_loc2': '請選擇地區',
219 |  'search_multi_loc3': '請選擇地區',
220 |  'search_multi_loc4': '請選擇地區',
221 |  'search_multi_loc5': '請選擇地區',
222 |  'search_multi_loc6': '請選擇地區',
223 |  'search_multi_loc7': '請選擇地區',
224 |  'search_subj': '',
225 |  'search_type': 'job',
226 |  'search_work': '請選擇職務',
227 |  'search_work2': '請選擇職務',
228 |  'search_work3': '請選擇職務',
229 |  'search_work4': '請選擇職務',
230 |  'search_work6': '',
231 |  'search_work7': '請選擇職務',
232 |  'search_work8': '請選擇職務',
233 |  'us_menu': ''}
234 | 
235 | strrec = 0
236 | data['strrec'] = strrec
237 | 
238 | while True:
239 |     print("[INFO] crawling strrec %s"%strrec)
240 |     res = r.post(URL, data=data, headers={'User-Agent': ''})
241 |     res.encoding = 'utf-8'
242 |     if '查無資料' in res.text:
243 |         break
244 |     with open('./list_html/java_{}.html'.format(strrec), 'w') as f:
245 |         f.write(res.text)
246 |     strrec += 20
247 |     data['strrec'] = strrec
248 | 
249 | os.system("cat ./list_html/java_*.html  | grep -Po 'job_refer_comp_job_detail2\.asp\?p_id=.*&job_id=\d*_\d*' | uniq > urls_uniq.txt")
250 | 
251 | job_detail.get_detail()
252 | 


--------------------------------------------------------------------------------
/bb102/donald_trump.txt:
--------------------------------------------------------------------------------
  1 | Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans, and people of the world: thank you.
  2 | 
  3 | We, the citizens of America, are now joined in a great national effort to rebuild our country and to restore its promise for all of our people.
  4 | 
  5 | Together, we will determine the course of America and the world for years to come.
  6 | 
  7 | We will face challenges. We will confront hardships. But we will get the job done.
  8 | 
  9 | Every four years, we gather on these steps to carry out the orderly and peaceful transfer of power, and we are grateful to President Obama and First Lady Michelle Obama for their gracious aid throughout this transition. They have been magnificent.
 10 | 
 11 | Today’s ceremony, however, has very special meaning. Because today we are not merely transferring power from one Administration to another, or from one party to another – but we are transferring power from Washington, D.C. and giving it back to you, the American People.
 12 | 
 13 | For too long, a small group in our nation’s Capital has reaped the rewards of government while the people have borne the cost.
 14 | 
 15 | Washington flourished – but the people did not share in its wealth.
 16 | 
 17 | Politicians prospered – but the jobs left, and the factories closed.
 18 | 
 19 | The establishment protected itself, but not the citizens of our country.
 20 | 
 21 | Their victories have not been your victories; their triumphs have not been your triumphs; and while they celebrated in our nation’s Capital, there was little to celebrate for struggling families all across our land.
 22 | 
 23 | That all changes – starting right here, and right now, because this moment is your moment: it belongs to you.
 24 | 
 25 | It belongs to everyone gathered here today and everyone watching all across America. 
 26 | 
 27 | This is your day. This is your celebration.
 28 | 
 29 | And this, the United States of America, is your country.
 30 | 
 31 | What truly matters is not which party controls our government, but whether our government is controlled by the people.
 32 | 
 33 | January 20th 2017, will be remembered as the day the people became the rulers of this nation again. 
 34 | 
 35 | The forgotten men and women of our country will be forgotten no longer.
 36 | 
 37 | Everyone is listening to you now.
 38 | 
 39 | You came by the tens of millions to become part of a historic movement the likes of which the world has never seen before.
 40 | 
 41 | At the center of this movement is a crucial conviction: that a nation exists to serve its citizens.
 42 | 
 43 | Americans want great schools for their children, safe neighborhoods for their families, and good jobs for themselves.
 44 | 
 45 | These are the just and reasonable demands of a righteous public.
 46 | 
 47 | But for too many of our citizens, a different reality exists: Mothers and children trapped in poverty in our inner cities; rusted-out factories scattered like tombstones across the landscape of our nation; an education system, flush with cash, but which leaves our young and beautiful students deprived of knowledge; and the crime and gangs and drugs that have stolen too many lives and robbed our country of so much unrealized potential.
 48 | 
 49 | This American carnage stops right here and stops right now.
 50 | 
 51 | We are one nation – and their pain is our pain.  Their dreams are our dreams; and their success will be our success.  We share one heart, one home, and one glorious destiny.
 52 | 
 53 | The oath of office I take today is an oath of allegiance to all Americans.
 54 | 
 55 | For many decades, we’ve enriched foreign industry at the expense of American industry;
 56 | 
 57 | Subsidized the armies of other countries while allowing for the very sad depletion of our military;
 58 | 
 59 | We've defended other nation’s borders while refusing to defend our own;
 60 | 
 61 | And spent trillions of dollars overseas while America's infrastructure has fallen into disrepair and decay.
 62 | 
 63 | We’ve made other countries rich while the wealth, strength, and confidence of our country has disappeared over the horizon.
 64 | 
 65 | One by one, the factories shuttered and left our shores, with not even a thought about the millions upon millions of American workers left behind.
 66 | 
 67 | The wealth of our middle class has been ripped from their homes and then redistributed across the entire world.
 68 | 
 69 | But that is the past. And now we are looking only to the future.
 70 | 
 71 | We assembled here today are issuing a new decree to be heard in every city, in every foreign capital, and in every hall of power.
 72 | 
 73 | From this day forward, a new vision will govern our land.
 74 | 
 75 | From this moment on, it’s going to be America First.
 76 | 
 77 | Every decision on trade, on taxes, on immigration, on foreign affairs, will be made to benefit American workers and American families.
 78 | 
 79 | We must protect our borders from the ravages of other countries making our products, stealing our companies, and destroying our jobs.  Protection will lead to great prosperity and strength.
 80 | 
 81 | I will fight for you with every breath in my body – and I will never, ever let you down.
 82 | 
 83 | America will start winning again, winning like never before.
 84 | 
 85 | We will bring back our jobs. We will bring back our borders.  We will bring back our wealth.  And we will bring back our dreams.
 86 | 
 87 | We will build new roads, and highways, and bridges, and airports, and tunnels, and railways all across our wonderful nation.
 88 | 
 89 | We will get our people off of welfare and back to work – rebuilding our country with American hands and American labor.
 90 | 
 91 | We will follow two simple rules: Buy American and Hire American.
 92 | 
 93 | We will seek friendship and goodwill with the nations of the world – but we do so with the understanding that it is the right of all nations to put their own interests first.
 94 | 
 95 | We do not seek to impose our way of life on anyone, but rather to let it shine as an example for everyone to follow.
 96 | 
 97 | We will reinforce old alliances and form new ones – and unite the civilized world against Radical Islamic Terrorism, which we will eradicate completely from the face of the Earth.
 98 | 
 99 | At the bedrock of our politics will be a total allegiance to the United States of America, and through our loyalty to our country, we will rediscover our loyalty to each other.
100 | 
101 | When you open your heart to patriotism, there is no room for prejudice.
102 | 
103 | The Bible tells us, “how good and pleasant it is when God’s people live together in unity.”
104 | 
105 | We must speak our minds openly, debate our disagreements honestly, but always pursue solidarity.
106 | 
107 | When America is united, America is totally unstoppable.
108 | 
109 | There should be no fear – we are protected, and we will always be protected.
110 | 
111 | We will be protected by the great men and women of our military and law enforcement and, most importantly, we are protected by God.
112 | 
113 | Finally, we must think big and dream even bigger.
114 | 
115 | In America, we understand that a nation is only living as long as it is striving.
116 | 
117 | We will no longer accept politicians who are all talk and no action – constantly complaining but never doing anything about it.
118 | 
119 | The time for empty talk is over.
120 | 
121 | Now arrives the hour of action.
122 | 
123 | Do not let anyone tell you it cannot be done.  No challenge can match the heart and fight and spirit of America.
124 | 
125 | We will not fail. Our country will thrive and prosper again.
126 | 
127 | We stand at the birth of a new millennium, ready to unlock the mysteries of space, to free the Earth from the miseries of disease, and to harness the energies, industries and technologies of tomorrow.
128 | 
129 | A new national pride will stir our souls, lift our sights, and heal our divisions.
130 | 
131 | It is time to remember that old wisdom our soldiers will never forget: that whether we are black or brown or white, we all bleed the same red blood of patriots, we all enjoy the same glorious freedoms, and we all salute the same great American Flag.
132 | 
133 | And whether a child is born in the urban sprawl of Detroit or the windswept plains of Nebraska, they look up at the same night sky, they fill their heart with the same dreams, and they are infused with the breath of life by the same almighty Creator.
134 | 
135 | So to all Americans, in every city near and far, small and large, from mountain to mountain, and from ocean to ocean, hear these words:
136 | 
137 | You will never be ignored again.
138 | 
139 | Your voice, your hopes, and your dreams, will define our American destiny. And your courage and goodness and love will forever guide us along the way.
140 | 
141 | Together, We Will Make America Strong Again.
142 | 
143 | We Will Make America Wealthy Again.
144 | 
145 | We Will Make America Proud Again.
146 | 
147 | We Will Make America Safe Again.
148 | 
149 | And, Yes, Together, We Will Make America Great Again. Thank you, God Bless You, And God Bless America.
150 | 


--------------------------------------------------------------------------------
/datasets/trump_speech.txt:
--------------------------------------------------------------------------------
  1 | As Prepared for Delivery –
  2 | 
  3 | Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans, and people of the world: thank you.
  4 | 
  5 | We, the citizens of America, are now joined in a great national effort to rebuild our country and to restore its promise for all of our people.
  6 | 
  7 | Together, we will determine the course of America and the world for years to come.
  8 | 
  9 | We will face challenges. We will confront hardships. But we will get the job done.
 10 | 
 11 | Every four years, we gather on these steps to carry out the orderly and peaceful transfer of power, and we are grateful to President Obama and First Lady Michelle Obama for their gracious aid throughout this transition. They have been magnificent.
 12 | 
 13 | Today’s ceremony, however, has very special meaning. Because today we are not merely transferring power from one Administration to another, or from one party to another – but we are transferring power from Washington, D.C. and giving it back to you, the American People.
 14 | 
 15 | For too long, a small group in our nation’s Capital has reaped the rewards of government while the people have borne the cost.
 16 | 
 17 | Washington flourished – but the people did not share in its wealth.
 18 | 
 19 | Politicians prospered – but the jobs left, and the factories closed.
 20 | 
 21 | The establishment protected itself, but not the citizens of our country.
 22 | 
 23 | Their victories have not been your victories; their triumphs have not been your triumphs; and while they celebrated in our nation’s Capital, there was little to celebrate for struggling families all across our land.
 24 | 
 25 | That all changes – starting right here, and right now, because this moment is your moment: it belongs to you.
 26 | 
 27 | It belongs to everyone gathered here today and everyone watching all across America. 
 28 | 
 29 | This is your day. This is your celebration.
 30 | 
 31 | And this, the United States of America, is your country.
 32 | 
 33 | What truly matters is not which party controls our government, but whether our government is controlled by the people.
 34 | 
 35 | January 20th 2017, will be remembered as the day the people became the rulers of this nation again. 
 36 | 
 37 | The forgotten men and women of our country will be forgotten no longer.
 38 | 
 39 | Everyone is listening to you now.
 40 | 
 41 | You came by the tens of millions to become part of a historic movement the likes of which the world has never seen before.
 42 | 
 43 | At the center of this movement is a crucial conviction: that a nation exists to serve its citizens.
 44 | 
 45 | Americans want great schools for their children, safe neighborhoods for their families, and good jobs for themselves.
 46 | 
 47 | These are the just and reasonable demands of a righteous public.
 48 | 
 49 | But for too many of our citizens, a different reality exists: Mothers and children trapped in poverty in our inner cities; rusted-out factories scattered like tombstones across the landscape of our nation; an education system, flush with cash, but which leaves our young and beautiful students deprived of knowledge; and the crime and gangs and drugs that have stolen too many lives and robbed our country of so much unrealized potential.
 50 | 
 51 | This American carnage stops right here and stops right now.
 52 | 
 53 | We are one nation – and their pain is our pain.  Their dreams are our dreams; and their success will be our success.  We share one heart, one home, and one glorious destiny.
 54 | 
 55 | The oath of office I take today is an oath of allegiance to all Americans.
 56 | 
 57 | For many decades, we’ve enriched foreign industry at the expense of American industry;
 58 | 
 59 | Subsidized the armies of other countries while allowing for the very sad depletion of our military;
 60 | 
 61 | We've defended other nation’s borders while refusing to defend our own;
 62 | 
 63 | And spent trillions of dollars overseas while America's infrastructure has fallen into disrepair and decay.
 64 | 
 65 | We’ve made other countries rich while the wealth, strength, and confidence of our country has disappeared over the horizon.
 66 | 
 67 | One by one, the factories shuttered and left our shores, with not even a thought about the millions upon millions of American workers left behind.
 68 | 
 69 | The wealth of our middle class has been ripped from their homes and then redistributed across the entire world.
 70 | 
 71 | But that is the past. And now we are looking only to the future.
 72 | 
 73 | We assembled here today are issuing a new decree to be heard in every city, in every foreign capital, and in every hall of power.
 74 | 
 75 | From this day forward, a new vision will govern our land.
 76 | 
 77 | From this moment on, it’s going to be America First.
 78 | 
 79 | Every decision on trade, on taxes, on immigration, on foreign affairs, will be made to benefit American workers and American families.
 80 | 
 81 | We must protect our borders from the ravages of other countries making our products, stealing our companies, and destroying our jobs.  Protection will lead to great prosperity and strength.
 82 | 
 83 | I will fight for you with every breath in my body – and I will never, ever let you down.
 84 | 
 85 | America will start winning again, winning like never before.
 86 | 
 87 | We will bring back our jobs. We will bring back our borders.  We will bring back our wealth.  And we will bring back our dreams.
 88 | 
 89 | We will build new roads, and highways, and bridges, and airports, and tunnels, and railways all across our wonderful nation.
 90 | 
 91 | We will get our people off of welfare and back to work – rebuilding our country with American hands and American labor.
 92 | 
 93 | We will follow two simple rules: Buy American and Hire American.
 94 | 
 95 | We will seek friendship and goodwill with the nations of the world – but we do so with the understanding that it is the right of all nations to put their own interests first.
 96 | 
 97 | We do not seek to impose our way of life on anyone, but rather to let it shine as an example for everyone to follow.
 98 | 
 99 | We will reinforce old alliances and form new ones – and unite the civilized world against Radical Islamic Terrorism, which we will eradicate completely from the face of the Earth.
100 | 
101 | At the bedrock of our politics will be a total allegiance to the United States of America, and through our loyalty to our country, we will rediscover our loyalty to each other.
102 | 
103 | When you open your heart to patriotism, there is no room for prejudice.
104 | 
105 | The Bible tells us, “how good and pleasant it is when God’s people live together in unity.”
106 | 
107 | We must speak our minds openly, debate our disagreements honestly, but always pursue solidarity.
108 | 
109 | When America is united, America is totally unstoppable.
110 | 
111 | There should be no fear – we are protected, and we will always be protected.
112 | 
113 | We will be protected by the great men and women of our military and law enforcement and, most importantly, we are protected by God.
114 | 
115 | Finally, we must think big and dream even bigger.
116 | 
117 | In America, we understand that a nation is only living as long as it is striving.
118 | 
119 | We will no longer accept politicians who are all talk and no action – constantly complaining but never doing anything about it.
120 | 
121 | The time for empty talk is over.
122 | 
123 | Now arrives the hour of action.
124 | 
125 | Do not let anyone tell you it cannot be done.  No challenge can match the heart and fight and spirit of America.
126 | 
127 | We will not fail. Our country will thrive and prosper again.
128 | 
129 | We stand at the birth of a new millennium, ready to unlock the mysteries of space, to free the Earth from the miseries of disease, and to harness the energies, industries and technologies of tomorrow.
130 | 
131 | A new national pride will stir our souls, lift our sights, and heal our divisions.
132 | 
133 | It is time to remember that old wisdom our soldiers will never forget: that whether we are black or brown or white, we all bleed the same red blood of patriots, we all enjoy the same glorious freedoms, and we all salute the same great American Flag.
134 | 
135 | And whether a child is born in the urban sprawl of Detroit or the windswept plains of Nebraska, they look up at the same night sky, they fill their heart with the same dreams, and they are infused with the breath of life by the same almighty Creator.
136 | 
137 | So to all Americans, in every city near and far, small and large, from mountain to mountain, and from ocean to ocean, hear these words:
138 | 
139 | You will never be ignored again.
140 | 
141 | Your voice, your hopes, and your dreams, will define our American destiny. And your courage and goodness and love will forever guide us along the way.
142 | 
143 | Together, We Will Make America Strong Again.
144 | 
145 | We Will Make America Wealthy Again.
146 | 
147 | We Will Make America Proud Again.
148 | 
149 | We Will Make America Safe Again.
150 | 
151 | And, Yes, Together, We Will Make America Great Again. Thank you, God Bless You, And God Bless America.
152 | 


--------------------------------------------------------------------------------
/cb101/時間日期轉換.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 8,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Requirement already satisfied: pytz in /Users/ian/.pyenv/versions/3.6.4/lib/python3.6/site-packages\n",
 13 |       "\u001b[33mYou are using pip version 9.0.1, however version 10.0.1 is available.\n",
 14 |       "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "!pip install pytz"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 17,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import datetime\n",
 29 |     "\n",
 30 |     "import pytz"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 18,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# 製作一個台北時區物件\n",
 40 |     "tpe = pytz.timezone('Asia/Taipei')"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 19,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "# 製作一個洛杉磯時區物件\n",
 50 |     "lax = pytz.timezone('US/Pacific')"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 20,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/plain": [
 61 |        "<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>"
 62 |       ]
 63 |      },
 64 |      "execution_count": 20,
 65 |      "metadata": {},
 66 |      "output_type": "execute_result"
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "lax"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 73,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "# 現在LAX的時間\n",
 80 |     "dt_in_lax = datetime.datetime.now(tz=lax)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 74,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "data": {
 90 |       "text/plain": [
 91 |        "datetime.datetime(2018, 6, 6, 20, 37, 38, 702578, tzinfo=<DstTzInfo 'US/Pacific' PDT-1 day, 17:00:00 DST>)"
 92 |       ]
 93 |      },
 94 |      "execution_count": 74,
 95 |      "metadata": {},
 96 |      "output_type": "execute_result"
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "dt_in_lax"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 79,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/plain": [
111 |        "datetime.datetime(2018, 6, 7, 11, 37, 38, 702578, tzinfo=<DstTzInfo 'Asia/Taipei' CST+8:00:00 STD>)"
112 |       ]
113 |      },
114 |      "execution_count": 79,
115 |      "metadata": {},
116 |      "output_type": "execute_result"
117 |     }
118 |    ],
119 |    "source": [
120 |     "# 將剛才的LAX時間轉換為台北時間\n",
121 |     "\n",
122 |     "dt_in_lax.astimezone(tpe)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "# 爬蟲的時間處理"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 76,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "# 此時間字串來源為PTT網頁，是台北時間\n",
139 |     "\n",
140 |     "dt_str = 'Sun Apr 21 22:47:12 2013'"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 77,
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "data": {
150 |       "text/plain": [
151 |        "datetime.datetime(2013, 4, 21, 22, 47, 12)"
152 |       ]
153 |      },
154 |      "execution_count": 77,
155 |      "metadata": {},
156 |      "output_type": "execute_result"
157 |     }
158 |    ],
159 |    "source": [
160 |     "# 將字串變成datetime 物件\n",
161 |     "\n",
162 |     "# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior\n",
163 |     "\n",
164 |     "# timezone.localize(<datetime obejct>) -> <datetime tzinfo=xxxxxx>\n",
165 |     "ptt_dt_notz = datetime.datetime.strptime(dt_str, '%a %b %d %H:%M:%S %Y')\n",
166 |     "ptt_dt_notz"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 80,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/plain": [
177 |        "datetime.datetime(2013, 4, 21, 22, 47, 12, tzinfo=<DstTzInfo 'Asia/Taipei' CST+8:00:00 STD>)"
178 |       ]
179 |      },
180 |      "execution_count": 80,
181 |      "metadata": {},
182 |      "output_type": "execute_result"
183 |     }
184 |    ],
185 |    "source": [
186 |     "# 將時區插入datetime物件\n",
187 |     "# ptt網站為例的話就是將台北市區插入\n",
188 |     "\n",
189 |     "tpe = pytz.timezone('Asia/Taipei')\n",
190 |     "ptt_dt_tz = tpe.localize(ptt_dt_notz)\n",
191 |     "ptt_dt_tz"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 82,
197 |    "metadata": {},
198 |    "outputs": [
199 |     {
200 |      "data": {
201 |       "text/plain": [
202 |        "1366555632.0"
203 |       ]
204 |      },
205 |      "execution_count": 82,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "# 最後將datetime object轉成UTC timestamp， 是個float\n",
212 |     "# 再存入資料庫\n",
213 |     "ptt_dt_tz.timestamp()"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 114,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "data": {
223 |       "text/plain": [
224 |        "datetime.datetime(2018, 6, 7, 3, 59, 18, 910712, tzinfo=<UTC>)"
225 |       ]
226 |      },
227 |      "execution_count": 114,
228 |      "metadata": {},
229 |      "output_type": "execute_result"
230 |     }
231 |    ],
232 |    "source": [
233 |     "# 製作現在的timestamp\n",
234 |     "\n",
235 |     "utc = pytz.utc\n",
236 |     "\n",
237 |     "# 製作現在UTC的datetim object\n",
238 |     "output_tz = utc.localize(datetime.datetime.utcnow())\n",
239 |     "output_tz"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 115,
245 |    "metadata": {},
246 |    "outputs": [
247 |     {
248 |      "data": {
249 |       "text/plain": [
250 |        "1528343958.910712"
251 |       ]
252 |      },
253 |      "execution_count": 115,
254 |      "metadata": {},
255 |      "output_type": "execute_result"
256 |     }
257 |    ],
258 |    "source": [
259 |     "# 存入資料庫\n",
260 |     "output_tz.timestamp()"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 97,
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": [
269 |     "# 從資料庫讀取時間\n",
270 |     "dt_to_show = 1528343684.590469"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 134,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/plain": [
281 |        "datetime.datetime(2018, 6, 7, 3, 54, 44, 590469, tzinfo=<UTC>)"
282 |       ]
283 |      },
284 |      "execution_count": 134,
285 |      "metadata": {},
286 |      "output_type": "execute_result"
287 |     }
288 |    ],
289 |    "source": [
290 |     "utc = pytz.utc\n",
291 |     "\n",
292 |     "# 把utc timestamp轉換為utc datetime object\n",
293 |     "dt_to_show_tz = utc.localize(datetime.datetime.utcfromtimestamp(dt_to_show))\n",
294 |     "dt_to_show_tz"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 131,
300 |    "metadata": {},
301 |    "outputs": [],
302 |    "source": [
303 |     "# 轉換為東京時間\n",
304 |     "dt_in_tokyo = dt_to_show_tz.astimezone(pytz.timezone('Asia/Tokyo'))"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 132,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "data": {
314 |       "text/plain": [
315 |        "'2018-06-07 03:54:44 UTC'"
316 |       ]
317 |      },
318 |      "execution_count": 132,
319 |      "metadata": {},
320 |      "output_type": "execute_result"
321 |     }
322 |    ],
323 |    "source": [
324 |     "# 將datetime object轉換為str\n",
325 |     "dt_to_show_tz.strftime('%Y-%m-%d %H:%M:%S %Z')"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 133,
331 |    "metadata": {},
332 |    "outputs": [
333 |     {
334 |      "data": {
335 |       "text/plain": [
336 |        "'2018-06-07 12:54:44 JST'"
337 |       ]
338 |      },
339 |      "execution_count": 133,
340 |      "metadata": {},
341 |      "output_type": "execute_result"
342 |     }
343 |    ],
344 |    "source": [
345 |     "# 將datetime object轉換為str\n",
346 |     "dt_in_tokyo.strftime('%Y-%m-%d %H:%M:%S %Z')"
347 |    ]
348 |   }
349 |  ],
350 |  "metadata": {
351 |   "kernelspec": {
352 |    "display_name": "Python 3",
353 |    "language": "python",
354 |    "name": "python3"
355 |   },
356 |   "language_info": {
357 |    "codemirror_mode": {
358 |     "name": "ipython",
359 |     "version": 3
360 |    },
361 |    "file_extension": ".py",
362 |    "mimetype": "text/x-python",
363 |    "name": "python",
364 |    "nbconvert_exporter": "python",
365 |    "pygments_lexer": "ipython3",
366 |    "version": "3.6.4"
367 |   }
368 |  },
369 |  "nbformat": 4,
370 |  "nbformat_minor": 2
371 | }
372 | 


--------------------------------------------------------------------------------
/bb105/yes123/tasks.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | import datetime
  4 | 
  5 | import pytz
  6 | import requests
  7 | from celery import Celery
  8 | from bs4 import BeautifulSoup
  9 | import rethinkdb as r
 10 | 
 11 | app = Celery('tasks', broker='pyamqp://celery:celery@localhost//')
 12 | HOST = 'https://www.yes123.com.tw/admin/'
 13 | 
 14 | @app.task
 15 | def add(x, y):
 16 |     return x + y
 17 | 
 18 | @app.task
 19 | def get_list(page):
 20 |     offset = (page-1) * 20
 21 |     url = HOST + "job_refer_list.asp"
 22 |     data = json.loads(r'''{
 23 |         "find_key1": "軟體工程師",
 24 |         "search_work": "職務",
 25 |         "search_multi_loc": "地區",
 26 |         "find_key2": "",
 27 |         "search_multi_loc2": "請選擇地區",
 28 |         "search_work2": "請選擇職務",
 29 |         "search_job2": "請選擇行業",
 30 |         "find_key3": "",
 31 |         "search_multi_loc3": "請選擇地區",
 32 |         "search_work3": "請選擇職務",
 33 |         "search_subj": "請選擇你的科系",
 34 |         "search_work4": "",
 35 |         "search_multi_loc4": "請選擇地區",
 36 |         "search_multi_loc5": "請選擇地區",
 37 |         "search_work6": "",
 38 |         "search_work7": "",
 39 |         "search_multi_loc6": "",
 40 |         "search_work8": "",
 41 |         "search_multi_loc7": "",
 42 |         "search_work9": "請選擇職務",
 43 |         "find_sf_subj_mode1": "",
 44 |         "s_find_sf_subj_mode1": "",
 45 |         "find_se_work_mode1": "",
 46 |         "s_find_se_work_mode1": "",
 47 |         "find_ss_work_mode1": "",
 48 |         "s_find_ss_work_mode1": "",
 49 |         "find_zone_mode1": "",
 50 |         "find_zone_mode2": "",
 51 |         "find_zone_mode3": "",
 52 |         "find_zone_mode4": "",
 53 |         "find_zone_mode5": "",
 54 |         "find_zone_mode6": "",
 55 |         "find_zone_mode7": "",
 56 |         "find_zone_mode8": "",
 57 |         "find_zone_mode9": "",
 58 |         "find_zone_mode10": "",
 59 |         "s_find_zone_mode1": "",
 60 |         "s_find_zone_mode2": "",
 61 |         "s_find_zone_mode3": "",
 62 |         "s_find_zone_mode4": "",
 63 |         "s_find_zone_mode5": "",
 64 |         "s_find_zone_mode6": "",
 65 |         "s_find_zone_mode7": "",
 66 |         "s_find_zone_mode8": "",
 67 |         "s_find_zone_mode9": "",
 68 |         "s_find_zone_mode10": "",
 69 |         "find_metro_mode1": "",
 70 |         "find_metro_mode2": "",
 71 |         "find_metro_mode3": "",
 72 |         "find_metro_mode4": "",
 73 |         "find_metro_mode5": "",
 74 |         "find_metro_mode6": "",
 75 |         "find_metro_mode7": "",
 76 |         "find_metro_mode8": "",
 77 |         "find_metro_mode9": "",
 78 |         "find_metro_mode10": "",
 79 |         "s_find_metro_mode1": "",
 80 |         "s_find_metro_mode2": "",
 81 |         "s_find_metro_mode3": "",
 82 |         "s_find_metro_mode4": "",
 83 |         "s_find_metro_mode5": "",
 84 |         "s_find_metro_mode6": "",
 85 |         "s_find_metro_mode7": "",
 86 |         "s_find_metro_mode8": "",
 87 |         "s_find_metro_mode9": "",
 88 |         "s_find_metro_mode10": "",
 89 |         "find_map_mode1": "",
 90 |         "find_map_mode2": "",
 91 |         "find_map_mode3": "",
 92 |         "find_map_mode4": "",
 93 |         "find_indy_mode1": "",
 94 |         "find_indy_mode2": "",
 95 |         "find_indy_mode3": "",
 96 |         "find_indy_mode4": "",
 97 |         "find_indy_mode5": "",
 98 |         "find_indy_mode6": "",
 99 |         "find_indy_mode7": "",
100 |         "find_indy_mode8": "",
101 |         "find_indy_mode9": "",
102 |         "find_indy_mode10": "",
103 |         "s_find_indy_mode1": "",
104 |         "s_find_indy_mode2": "",
105 |         "s_find_indy_mode3": "",
106 |         "s_find_indy_mode4": "",
107 |         "s_find_indy_mode5": "",
108 |         "s_find_indy_mode6": "",
109 |         "s_find_indy_mode7": "",
110 |         "s_find_indy_mode8": "",
111 |         "s_find_indy_mode9": "",
112 |         "s_find_indy_mode10": "",
113 |         "find_scl_mode1": "",
114 |         "find_scl_mode2": "",
115 |         "find_scl_mode3": "",
116 |         "find_scl_mode4": "",
117 |         "find_scl_mode5": "",
118 |         "find_scl_mode6": "",
119 |         "find_scl_mode7": "",
120 |         "find_scl_mode8": "",
121 |         "find_scl_mode9": "",
122 |         "find_scl_mode10": "",
123 |         "s_find_scl_mode1": "",
124 |         "s_find_scl_mode2": "",
125 |         "s_find_scl_mode3": "",
126 |         "s_find_scl_mode4": "",
127 |         "s_find_scl_mode5": "",
128 |         "s_find_scl_mode6": "",
129 |         "s_find_scl_mode7": "",
130 |         "s_find_scl_mode8": "",
131 |         "s_find_scl_mode9": "",
132 |         "s_find_scl_mode10": "",
133 |         "find_work_mode1": "",
134 |         "find_work_mode2": "",
135 |         "find_work_mode3": "",
136 |         "find_work_mode4": "",
137 |         "find_work_mode5": "",
138 |         "find_work_mode6": "",
139 |         "find_work_mode7": "",
140 |         "find_work_mode8": "",
141 |         "find_work_mode9": "",
142 |         "find_work_mode10": "",
143 |         "s_find_work_mode1": "",
144 |         "s_find_work_mode2": "",
145 |         "s_find_work_mode3": "",
146 |         "s_find_work_mode4": "",
147 |         "s_find_work_mode5": "",
148 |         "s_find_work_mode6": "",
149 |         "s_find_work_mode7": "",
150 |         "s_find_work_mode8": "",
151 |         "s_find_work_mode9": "",
152 |         "s_find_work_mode10": "",
153 |         "find_job_mode1": "",
154 |         "find_job_mode2": "",
155 |         "find_job_mode3": "",
156 |         "find_job_mode4": "",
157 |         "find_job_mode5": "",
158 |         "find_job_mode6": "",
159 |         "find_job_mode7": "",
160 |         "find_job_mode8": "",
161 |         "find_job_mode9": "",
162 |         "find_job_mode10": "",
163 |         "s_find_job_mode1": "",
164 |         "s_find_job_mode2": "",
165 |         "s_find_job_mode3": "",
166 |         "s_find_job_mode4": "",
167 |         "s_find_job_mode5": "",
168 |         "s_find_job_mode6": "",
169 |         "s_find_job_mode7": "",
170 |         "s_find_job_mode8": "",
171 |         "s_find_job_mode9": "",
172 |         "s_find_job_mode10": "",
173 |         "find_sche_mode1": "",
174 |         "find_sche_mode2": "",
175 |         "find_sche_mode3": "",
176 |         "find_sche_mode4": "",
177 |         "find_sche_mode5": "",
178 |         "s_find_sche_mode1": "",
179 |         "s_find_sche_mode2": "",
180 |         "s_find_sche_mode3": "",
181 |         "s_find_sche_mode4": "",
182 |         "s_find_sche_mode5": "",
183 |         "_mu_sf_1": "",
184 |         "_mu_se_1": "",
185 |         "_mu_chkbox_2": "",
186 |         "_mu_chkbox_3": "",
187 |         "_mu_wk_1": "",
188 |         "_mu_wk_2": "",
189 |         "_mu_wk_3": "",
190 |         "_mu_wk_4": "",
191 |         "_mu_wk_5": "",
192 |         "_mu_job_1": "",
193 |         "_mu_edu_1": "",
194 |         "_mu_edu_2": "",
195 |         "_mu_edu_3": "",
196 |         "_mu_edu_4": "",
197 |         "_mu_edu_5": "",
198 |         "_mu_edu_6": "",
199 |         "_mu_edu_7": "",
200 |         "_mu_year_1": "",
201 |         "_mu_year_2": "",
202 |         "_mu_lang_1": "",
203 |         "_mu_lang_2": "",
204 |         "_mu_lang_3": "",
205 |         "_mu_psn_1": "",
206 |         "_mu_time_1": "",
207 |         "_mu_time_2": "",
208 |         "_mu_time_3": "",
209 |         "_mu_time_4": "",
210 |         "_mu_time_5": "",
211 |         "_mu_vc_1": "",
212 |         "_mu_vc_2": "",
213 |         "_mu_vc_3": "",
214 |         "_mu_vc_4": "",
215 |         "_mu_sc_1": "",
216 |         "_mu_sc_2": "",
217 |         "find_subj_mode1": "",
218 |         "find_subj_mode2": "",
219 |         "find_subj_mode3": "",
220 |         "s_find_subj_mode1": "",
221 |         "s_find_subj_mode2": "",
222 |         "s_find_subj_mode3": "",
223 |         "find_sw_mode1": "",
224 |         "find_sw_mode2": "",
225 |         "find_sw_mode3": "",
226 |         "find_sw_mode4": "",
227 |         "find_sw_mode5": "",
228 |         "s_find_sw_mode1": "",
229 |         "s_find_sw_mode2": "",
230 |         "s_find_sw_mode3": "",
231 |         "s_find_sw_mode4": "",
232 |         "s_find_sw_mode5": "",
233 |         "find_cert_mode1": "",
234 |         "find_cert_mode2": "",
235 |         "find_cert_mode3": "",
236 |         "find_cert_mode4": "",
237 |         "find_cert_mode5": "",
238 |         "s_find_cert_mode1": "",
239 |         "s_find_cert_mode2": "",
240 |         "s_find_cert_mode3": "",
241 |         "s_find_cert_mode4": "",
242 |         "s_find_cert_mode5": "",
243 |         "find_us_sf_subj_mode1": "",
244 |         "s_find_us_sf_subj_mode1": "",
245 |         "find_us_se_work_mode1": "",
246 |         "s_find_us_se_work_mode1": "",
247 |         "find_us_work_mode1": "",
248 |         "find_us_work_mode2": "",
249 |         "find_us_work_mode3": "",
250 |         "find_us_work_mode4": "",
251 |         "find_us_work_mode5": "",
252 |         "find_us_work_mode6": "",
253 |         "find_us_work_mode7": "",
254 |         "find_us_work_mode8": "",
255 |         "find_us_work_mode9": "",
256 |         "find_us_work_mode10": "",
257 |         "s_find_us_work_mode1": "",
258 |         "s_find_us_work_mode2": "",
259 |         "s_find_us_work_mode3": "",
260 |         "s_find_us_work_mode4": "",
261 |         "s_find_us_work_mode5": "",
262 |         "s_find_us_work_mode6": "",
263 |         "s_find_us_work_mode7": "",
264 |         "s_find_us_work_mode8": "",
265 |         "s_find_us_work_mode9": "",
266 |         "s_find_us_work_mode10": "",
267 |         "find_us_zone_mode1": "",
268 |         "find_us_zone_mode2": "",
269 |         "find_us_zone_mode3": "",
270 |         "find_us_zone_mode4": "",
271 |         "find_us_zone_mode5": "",
272 |         "find_us_zone_mode6": "",
273 |         "find_us_zone_mode7": "",
274 |         "find_us_zone_mode8": "",
275 |         "find_us_zone_mode9": "",
276 |         "find_us_zone_mode10": "",
277 |         "s_find_us_zone_mode1": "",
278 |         "s_find_us_zone_mode2": "",
279 |         "s_find_us_zone_mode3": "",
280 |         "s_find_us_zone_mode4": "",
281 |         "s_find_us_zone_mode5": "",
282 |         "s_find_us_zone_mode6": "",
283 |         "s_find_us_zone_mode7": "",
284 |         "s_find_us_zone_mode8": "",
285 |         "s_find_us_zone_mode9": "",
286 |         "s_find_us_zone_mode10": "",
287 |         "find_us_metro_mode1": "",
288 |         "find_us_metro_mode2": "",
289 |         "find_us_metro_mode3": "",
290 |         "find_us_metro_mode4": "",
291 |         "find_us_metro_mode5": "",
292 |         "find_us_metro_mode6": "",
293 |         "find_us_metro_mode7": "",
294 |         "find_us_metro_mode8": "",
295 |         "find_us_metro_mode9": "",
296 |         "find_us_metro_mode10": "",
297 |         "s_find_us_metro_mode1": "",
298 |         "s_find_us_metro_mode2": "",
299 |         "s_find_us_metro_mode3": "",
300 |         "s_find_us_metro_mode4": "",
301 |         "s_find_us_metro_mode5": "",
302 |         "s_find_us_metro_mode6": "",
303 |         "s_find_us_metro_mode7": "",
304 |         "s_find_us_metro_mode8": "",
305 |         "s_find_us_metro_mode9": "",
306 |         "s_find_us_metro_mode10": "",
307 |         "find_us_indy_mode1": "",
308 |         "find_us_indy_mode2": "",
309 |         "find_us_indy_mode3": "",
310 |         "find_us_indy_mode4": "",
311 |         "find_us_indy_mode5": "",
312 |         "find_us_indy_mode6": "",
313 |         "find_us_indy_mode7": "",
314 |         "find_us_indy_mode8": "",
315 |         "find_us_indy_mode9": "",
316 |         "find_us_indy_mode10": "",
317 |         "s_find_us_indy_mode1": "",
318 |         "s_find_us_indy_mode2": "",
319 |         "s_find_us_indy_mode3": "",
320 |         "s_find_us_indy_mode4": "",
321 |         "s_find_us_indy_mode5": "",
322 |         "s_find_us_indy_mode6": "",
323 |         "s_find_us_indy_mode7": "",
324 |         "s_find_us_indy_mode8": "",
325 |         "s_find_us_indy_mode9": "",
326 |         "s_find_us_indy_mode10": "",
327 |         "find_us_map_mode1": "",
328 |         "find_us_map_mode2": "",
329 |         "find_us_map_mode3": "",
330 |         "find_us_map_mode4": "",
331 |         "strrec": "%s",
332 |         "search_key_word": "軟體工程師",
333 |         "search_type": "job",
334 |         "us_menu": "",
335 |         "search_item": "1",
336 |         "search_from": "index"
337 |     }'''%offset)
338 |     headers = json.loads(r'''{
339 |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
340 |         "Accept-Encoding": "gzip, deflate, br",
341 |         "Accept-Language": "en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7",
342 |         "Cache-Control": "no-cache",
343 |         "Connection": "keep-alive",
344 |         "Content-Length": "6160",
345 |         "Content-Type": "application/x-www-form-urlencoded",
346 |         "Cookie": "_sh_c_1=search_type%3Ajob%7Csearch_item%3A1%7Csearch_key_word%3A%E8%BB%9F%E9%AB%94%E5%B7%A5%E7%A8%8B%E5%B8%AB%7C_mu_ckb_21%3Acheckbox%7C_mu_ckb_22%3Acheckbox%7C_mu_ckb_23%3Acheckbox%7C_mu_ckb_24%3Acheckbox%7C_mu_ckb_25%3Acheckbox; __auc=96bb3e47161316640a21544cdad; _ga=GA1.3.1835310821.1516952241; ASPSESSIONIDQSABDSSA=NEHFNDJAFKHEFFCENHKAFOHL; citrix_ns_id=NTH5FeqYzJZTH+Cff7Zuk795B6oA000; ASP.NET_SessionId=155014982; StepCookie_id=155014982; ClientIP=36.230.46.183; _gid=GA1.3.1475659930.1517365319; __asc=4351f1c81614a0557a517057437; yes123_make_cookie=b06623371d0308352dafe66b5a40bb84; step=3",
347 |         "Host": "www.yes123.com.tw",
348 |         "Origin": "https://www.yes123.com.tw",
349 |         "Pragma": "no-cache",
350 |         "Referer": "https://www.yes123.com.tw/admin/index.asp",
351 |         "Upgrade-Insecure-Requests": "1",
352 |         "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
353 |     }''')
354 | 
355 |     resp = requests.post(url, data=data, headers=headers)
356 |     resp.encoding = 'utf-8'
357 | 
358 |     details = [HOST + url for url in re.findall("""<a href="(.*)" class='jobname'""", resp.text)]
359 | 
360 |     """Delay new jobs to get detail page"""
361 |     for url in details:
362 |         get_detail.delay(url)
363 | 
364 | @app.task
365 | def get_detail(url):
366 |     data = {}
367 |     resp = requests.get(url,
368 |                     headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"})
369 |     soup = BeautifulSoup(resp.text, 'lxml')
370 | 
371 |     head_count_str = soup.select_one('.jobname_title').select_one('span').text.strip()
372 |     head_count_str = '需求人數：2至2人'
373 |     data['head_count'] = re.findall('\d', head_count_str)
374 |     data['company_name'] = soup.select_one('.jobname_title').select_one('a').text
375 | 
376 |     [x.extract() for x in soup.select_one('.jobname_title').select('span')]
377 |     [x.extract() for x in soup.select_one('.jobname_title').select('p')]
378 |     data['job_title'] = soup.select_one('.jobname_title').text.strip()
379 | 
380 |     date_str = re.findall("""<span class="tt">職缺更新 ： </span><span class="rr">(.*)</span>""", resp.text)[0]
381 |     data['updated_at'] = int(datetime.datetime.strptime(''.join(date_str.split("&nbsp;")).strip(), '%Y年%m月%d日').astimezone(pytz.timezone('Asia/Taipei')).timestamp())
382 |     data['content'] = soup.select_one('div.left').text
383 |     data['url'] = url
384 | 
385 |     to_rethinkdb.delay(data)
386 |     return data
387 | 
388 | 
389 | @app.task
390 | def to_rethinkdb(data):
391 |     conn = r.connect()
392 |     res = r.db('yes123').table('crawler').insert(data).run(conn)
393 |     conn.close()
394 |     return res
395 | 
396 | 
397 | 


--------------------------------------------------------------------------------
/bb104/bb104_20171109.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 32,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import requests\n",
 10 |     "from bs4 import BeautifulSoup\n",
 11 |     "\n",
 12 |     "html = requests.get('https://www.ptt.cc/bbs/Baseball/M.1510185651.A.50E.html').text\n",
 13 |     "\n",
 14 |     "s = BeautifulSoup(html, 'html5lib')"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 33,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "data": {
 24 |       "text/plain": [
 25 |        "bs4.BeautifulSoup"
 26 |       ]
 27 |      },
 28 |      "execution_count": 33,
 29 |      "metadata": {},
 30 |      "output_type": "execute_result"
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "type(s)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 34,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "[<span class=\"article-meta-value\">poplc ()</span>,\n",
 46 |        " <span class=\"article-meta-value\">Baseball</span>,\n",
 47 |        " <span class=\"article-meta-value\">[問題] 預防性下架對嗎?</span>,\n",
 48 |        " <span class=\"article-meta-value\">Thu Nov  9 08:00:48 2017</span>]"
 49 |       ]
 50 |      },
 51 |      "execution_count": 34,
 52 |      "metadata": {},
 53 |      "output_type": "execute_result"
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "s.select('span.article-meta-value')"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 35,
 63 |    "metadata": {
 64 |     "scrolled": true
 65 |    },
 66 |    "outputs": [
 67 |     {
 68 |      "data": {
 69 |       "text/plain": [
 70 |        "[<div class=\"article-metaline\"><span class=\"article-meta-tag\">作者</span><span class=\"article-meta-value\">poplc ()</span></div>,\n",
 71 |        " <div class=\"article-metaline\"><span class=\"article-meta-tag\">標題</span><span class=\"article-meta-value\">[問題] 預防性下架對嗎?</span></div>,\n",
 72 |        " <div class=\"article-metaline\"><span class=\"article-meta-tag\">時間</span><span class=\"article-meta-value\">Thu Nov  9 08:00:48 2017</span></div>]"
 73 |       ]
 74 |      },
 75 |      "execution_count": 35,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "s.findAll('div', {\"class\": \"article-metaline\"})"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 36,
 87 |    "metadata": {},
 88 |    "outputs": [
 89 |     {
 90 |      "name": "stdout",
 91 |      "output_type": "stream",
 92 |      "text": [
 93 |       "[<span class=\"article-meta-value\">poplc ()</span>]\n",
 94 |       "[<span class=\"article-meta-value\">[問題] 預防性下架對嗎?</span>]\n",
 95 |       "[<span class=\"article-meta-value\">Thu Nov  9 08:00:48 2017</span>]\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "for div in s.findAll('div', {\"class\": \"article-metaline\"}):\n",
101 |     "    print(div.select('span.article-meta-value'))"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 37,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "data": {
111 |       "text/plain": [
112 |        "['119.77.211.222']"
113 |       ]
114 |      },
115 |      "execution_count": 37,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "import re\n",
122 |     "\n",
123 |     "re.findall('來自: (.+)\\n', html)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 38,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "ename": "AttributeError",
133 |      "evalue": "'list' object has no attribute 'select'",
134 |      "output_type": "error",
135 |      "traceback": [
136 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
137 |       "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
138 |       "\u001b[0;32m<ipython-input-38-bf3080a5e4ac>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
139 |       "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'select'"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "[1,2,3,4,5].select(2)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 39,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "import requests\n",
154 |     "import json\n",
155 |     "url = 'http://amis.afa.gov.tw/veg/VegProdDayTransInfo.aspx'\n",
156 |     "\n",
157 |     "# POST /veg/VegProdDayTransInfo.aspx HTTP/1.1\n",
158 |     "\n",
159 |     "data = json.loads(r'''{\"ctl00$ScriptManager_Master\":\"ctl00$ScriptManager_Master|ctl00$contentPlaceHolder$btnQuery\",\"ctl00$contentPlaceHolder$ucDateScope$rblDateScope\":\"D\",\"ctl00$contentPlaceHolder$ucSolarLunar$radlSolarLunar\":\"S\",\"ctl00$contentPlaceHolder$txtSTransDate\":\"106/11/09\",\"ctl00$contentPlaceHolder$txtETransDate\":\"106/11/09\",\"ctl00$contentPlaceHolder$txtMarket\":\"台北一\",\"ctl00$contentPlaceHolder$hfldMarketNo\":\"109\",\"ctl00$contentPlaceHolder$txtProduct\":\"FA1 黃秋葵\",\"ctl00$contentPlaceHolder$hfldProductNo\":\"FA1\",\"ctl00$contentPlaceHolder$hfldProductType\":\"S\",\"__EVENTTARGET\":\"\",\"__EVENTARGUMENT\":\"\",\"__VIEWSTATE\":\"/wEPDwUIOTY0Mzc1NDIPZBYCZg9kFgICAw9kFgICCw9kFg4CAQ8PFgIeBFRleHQFFeeUouWTgeaXpeS6pOaYk+ihjOaDhWRkAgcPDxYCHwAFCTEwNi8xMS8wOWRkAgkPD2QWAh4Fc3R5bGUFDWRpc3BsYXk6bm9uZTtkAgsPDxYCHwAFCTEwNi8xMS8wORYCHwEFDWRpc3BsYXk6bm9uZTtkAg0PDxYCHwAFCeWPsOWMl+S4gGRkAhMPDxYCHwAFDUZBMSDpu4Pnp4vokbVkZAIhD2QWAmYPZBYCAgEPDxYCHgdWaXNpYmxlZ2QWDGYPDxYCHwAFFeeUouWTgeaXpeS6pOaYk+ihjOaDhWRkAgEPDxYCHwAFHDEwNi8xMS8wOSAo6L6y5puGOjEwNi8wOS8yMSlkZAICDw8WAh8ABRIxMDYvMTEvMDkgMTA6NDU6MDBkZAIDDw8WAh8ABQnlj7DljJfkuIBkZAIEDw8WAh8ABQ1GQTEg6buD56eL6JG1ZGQCBQ8UKwACDxYGHhFJdGVtUGxhY2Vob2xkZXJJRAUYbXlMYXlvdXQkaXRlbVBsYWNlaG9sZGVyHgtfIURhdGFCb3VuZGceC18hSXRlbUNvdW50AgFkZBYCZg9kFgJmD2QWBgIBDw8WAh8ABQQ1NS40ZGQCAw8PFgIfAAUFMiwzNjBkZAIFD2QWAmYPZBYCZg8WAh4FY2xhc3MFCW1haW5fbWFpbhYQZg9kFgJmDxUBD0ZBMSDpu4Pnp4vokbUgIGQCAQ9kFgJmDxUBBDk1LjRkAgIPZBYCZg8VAQQ1NC4wZAIDD2QWAmYPFQEEMTkuM2QCBA9kFgJmDxUCCW1haW5fYmx1ZQQ1NS40ZAIFD2QWAmYPFQIJbWFpbl9ibHVlBSAtICA5ZAIGD2QWAmYPFQIJbWFpbl9ibHVlBTIsMzYwZAIHD2QWAmYPFQIJbWFpbl9ibHVlBSAtIDEwZBgBBSFjdGwwMCRjb250ZW50UGxhY2VIb2xkZXIkbGlzdFZpZXcPFCsADmRkZGRkZGQUKwABZAIBZGRkZgL/////D2Shbi0Q8FMb28ABMApzbMi9iYGvGXgI8S4PMenI1pyCUw==\",\"__VIEWSTATEGENERATOR\":\"924237A5\",\"__EVENTVALIDATION\":\"/wEdABATJXkjgjjtTvzBeYbuXY5KQlnRBSjq2R0LFBhqvIaYOdWbMM2/DWJrZzd7rAbCDCMbHYsHDbD1wmtGXihvmsnJ8BlZTYOptctvPAnPr9y5LJoyUCbB5OTDc5yZRRQ2PEmkvfJ0YrSiHU+/oXyBv2VhrkJjLitQjF6ePtmGbXiLrIzHLqmP3vmfhBo4iiBYbOAMxUXSePoiAbW03Aek83lEONL/4qBgBPfx/RZlnlGK8F2urMXFZJUEVGioaQEN8wAcw2+N1zwrySGFt1o6Y654NdK0LUPG/u+ZYgXys7Q5MzRmtqZjr7cHdstuZRNzNiosHqM4wINIxgrfpUQJzv9gPICEkdoaviBgG+fMl+hVX0x714DOoP5K0lXJZIBo+vY=\",\"__ASYNCPOST\":\"true\",\"ctl00$contentPlaceHolder$btnQuery\":\"查詢\"}''')\n",
160 |     "\n",
161 |     "headers = {\n",
162 |     "    \"Accept\": \"*/*\",\n",
163 |     "    \"Accept-Encoding\": \"gzip, deflate\",\n",
164 |     "    \"Accept-Language\": \"en-US,en;q=0.9\",\n",
165 |     "    \"Cache-Control\": \"no-cache\",\n",
166 |     "    \"Connection\": \"keep-alive\",\n",
167 |     "    \"Content-Length\": \"2331\",\n",
168 |     "    \"Content-Type\": \"application/x-www-form-urlencoded; charset=UTF-8\",\n",
169 |     "    \"Cookie\": \"_ga=GA1.3.826591586.1510195469; _gid=GA1.3.94857987.1510195469; ASP.NET_SessionId=4eedublu3syudfdtknob1q1p\",\n",
170 |     "    \"Host\": \"amis.afa.gov.tw\",\n",
171 |     "    \"Origin\": \"http://amis.afa.gov.tw\",\n",
172 |     "    \"Referer\": \"http://amis.afa.gov.tw/veg/VegProdDayTransInfo.aspx\",\n",
173 |     "    \"User-Agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36\",\n",
174 |     "    \"X-MicrosoftAjax\": \"Delta=true\",\n",
175 |     "    \"X-Requested-With\": \"XMLHttpRequest\"\n",
176 |     "}\n"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 40,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "resp = requests.post(url, data=data, headers=headers)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 41,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "# Quick HTTP Inspector\n",
195 |     "# Chrome 插件，快速複製HTTP Requests\n",
196 |     "#\n",
197 |     "# https://chrome.google.com/webstore/detail/quick-http-inspector/holkomabobpkbfdfnjomglmcgaeeojpg"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 31,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "data": {
207 |       "text/plain": [
208 |        "'1|#||4|3665|updatePanel|ctl00_contentPlaceHolder_updatePanelMain|\\r\\n\\r\\n    <div id=\"ctl00_contentPlaceHolder_panel\">\\r\\n\\t\\r\\n\\r\\n    <table width=\"100%\">\\r\\n        <tr>\\r\\n            <td align=\"center\" class=\"table_title\">\\r\\n                <input id=\"btnPrint\" type=\"button\" value=\"列印\" class=\"butten_org\" onclick=\"PrintContainer(this, \\'ctl00_contentPlaceHolder_panel\\');\" />\\r\\n                蔬菜\\r\\n                <span id=\"ctl00_contentPlaceHolder_lblResultTitle\">產品日交易行情</span>\\r\\n                查詢結果\\r\\n            </td>\\r\\n        </tr>\\r\\n    </table>\\r\\n    <table width=\"100%\" border=\"0\" cellpadding=\"2\" cellspacing=\"1\">\\r\\n        <tr>\\r\\n            <td width=\"10%\" class=\"table_title_03\">交易日期：</td>\\r\\n            <td class=\"main_main_eng\">\\r\\n                <span id=\"ctl00_contentPlaceHolder_lblTransDate\">106/11/09 (農曆:106/09/21)</span>\\r\\n            </td>\\r\\n            <td width=\"10%\" class=\"table_title_03\">查詢時間：</td>\\r\\n            <td class=\"main_main_eng\">\\r\\n                <span id=\"ctl00_contentPlaceHolder_lblQueryTime\">106/11/09 10:46:55</span>\\r\\n            </td>\\r\\n        </tr>\\r\\n        <tr>\\r\\n            <td class=\"table_title_03\">市\\u3000\\u3000場：</td>\\r\\n            <td class=\"main_main_eng\" colspan=\"3\">\\r\\n                <span id=\"ctl00_contentPlaceHolder_lblMarkets\">台北一</span>\\r\\n            </td>\\r\\n        </tr>\\r\\n        <tr>\\r\\n            <td class=\"table_title_03\">產\\u3000\\u3000品：</td>\\r\\n            <td class=\"main_main_eng\" colspan=\"3\">\\r\\n                <span id=\"ctl00_contentPlaceHolder_lblProducts\">FA1 黃秋葵</span>\\r\\n            </td>\\r\\n        </tr>\\r\\n    </table>\\r\\n\\r\\n    \\r\\n<table width=\"100%\" border=\"1\" cellpadding=\"1\" cellspacing=\"0\" style=\"border-color: Gray;\">\\r\\n  <tr align=\"center\" valign=\"middle\" class=\"main_title\" style=\"line-height: 20px;\">\\r\\n    <td style=\"white-space: nowrap;\" align=\"center\">產品</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >上價</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >中價</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >下價</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >平均價<br />(元/公斤)</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >跟前一<br />交易日<br />比較%</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >交易量<br />(公斤)</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >跟前一<br />交易日<br />比較%</td>\\r\\n  </tr>\\r\\n  <tr align=\"center\" valign=\"middle\" class=\"main_main_02\">\\r\\n    <td style=\"white-space: nowrap;\" align=\"center\">小計</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >&nbsp;</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >&nbsp;</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >&nbsp;</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" ><span id=\"ctl00_contentPlaceHolder_listView_myLayout_lblAvgPrice\">55.4</span></td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >&nbsp;</td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" ><span id=\"ctl00_contentPlaceHolder_listView_myLayout_lblTransVolume\">2,360</span></td>\\r\\n    <td style=\"white-space: nowrap;\" align=\"right\" >&nbsp;</td>\\r\\n  </tr>\\r\\n  <tr align=\"center\" valign=\"middle\" class=\"main_main\">\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"left\">FA1 黃秋葵  </td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\">95.4</td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\">54.0</td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\">19.3</td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\"><span class=\\'main_blue\\'>55.4</span></td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\"><span class=\\'main_blue\\'> -  9</span></td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\"><span class=\\'main_blue\\'>2,360</span></td>\\r\\n\\t\\t<td style=\"white-space: nowrap;\" align=\"right\"><span class=\\'main_blue\\'> - 10</span></td>\\r\\n\\t</tr>\\r\\n\\t  \\r\\n</table>\\r\\n\\r\\n    \\r\\n</div>\\r\\n\\r\\n    |0|hiddenField|__EVENTTARGET||0|hiddenField|__EVENTARGUMENT||1112|hiddenField|__VIEWSTATE|/wEPDwUIOTY0Mzc1NDIPZBYCZg9kFgICAw9kFgICCw9kFg4CAQ8PFgIeBFRleHQFFeeUouWTgeaXpeS6pOaYk+ihjOaDhWRkAgcPDxYCHwAFCTEwNi8xMS8wOWRkAgkPD2QWAh4Fc3R5bGUFDWRpc3BsYXk6bm9uZTtkAgsPDxYCHwAFCTEwNi8xMS8wORYCHwEFDWRpc3BsYXk6bm9uZTtkAg0PDxYCHwAFCeWPsOWMl+S4gGRkAhMPDxYCHwAFDUZBMSDpu4Pnp4vokbVkZAIhD2QWAmYPZBYCAgEPDxYCHgdWaXNpYmxlZ2QWDGYPDxYCHwAFFeeUouWTgeaXpeS6pOaYk+ihjOaDhWRkAgEPDxYCHwAFHDEwNi8xMS8wOSAo6L6y5puGOjEwNi8wOS8yMSlkZAICDw8WAh8ABRIxMDYvMTEvMDkgMTA6NDY6NTVkZAIDDw8WAh8ABQnlj7DljJfkuIBkZAIEDw8WAh8ABQ1GQTEg6buD56eL6JG1ZGQCBQ8UKwACDxYGHhFJdGVtUGxhY2Vob2xkZXJJRAUYbXlMYXlvdXQkaXRlbVBsYWNlaG9sZGVyHgtfIURhdGFCb3VuZGceC18hSXRlbUNvdW50AgFkZBYCZg9kFgJmD2QWBgIBDw8WAh8ABQQ1NS40ZGQCAw8PFgIfAAUFMiwzNjBkZAIFD2QWAmYPZBYCZg8WAh4FY2xhc3MFCW1haW5fbWFpbhYQZg9kFgJmDxUBD0ZBMSDpu4Pnp4vokbUgIGQCAQ9kFgJmDxUBBDk1LjRkAgIPZBYCZg8VAQQ1NC4wZAIDD2QWAmYPFQEEMTkuM2QCBA9kFgJmDxUCCW1haW5fYmx1ZQQ1NS40ZAIFD2QWAmYPFQIJbWFpbl9ibHVlBSAtICA5ZAIGD2QWAmYPFQIJbWFpbl9ibHVlBTIsMzYwZAIHD2QWAmYPFQIJbWFpbl9ibHVlBSAtIDEwZBgBBSFjdGwwMCRjb250ZW50UGxhY2VIb2xkZXIkbGlzdFZpZXcPFCsADmRkZGRkZGQUKwABZAIBZGRkZgL/////D2TUu25bTAVJjFGyQnctIv1dL8V//cYAj3XiTKyCyHUKRA==|8|hiddenField|__VIEWSTATEGENERATOR|924237A5|392|hiddenField|__EVENTVALIDATION|/wEdABBPxtWDnPKf+FwHXlYEMOFCQlnRBSjq2R0LFBhqvIaYOdWbMM2/DWJrZzd7rAbCDCMbHYsHDbD1wmtGXihvmsnJ8BlZTYOptctvPAnPr9y5LJoyUCbB5OTDc5yZRRQ2PEmkvfJ0YrSiHU+/oXyBv2VhrkJjLitQjF6ePtmGbXiLrIzHLqmP3vmfhBo4iiBYbOAMxUXSePoiAbW03Aek83lEONL/4qBgBPfx/RZlnlGK8F2urMXFZJUEVGioaQEN8wAcw2+N1zwrySGFt1o6Y654NdK0LUPG/u+ZYgXys7Q5MzRmtqZjr7cHdstuZRNzNiosHqM4wINIxgrfpUQJzv9gnwInEHWfUyTFbRBWkvgQKZTy9gViD1V6wiMFmkKIjL0=|34|asyncPostBackControlIDs||ctl00$contentPlaceHolder$btnQuery,|0|postBackControlIDs|||42|updatePanelIDs||tctl00$contentPlaceHolder$updatePanelMain,|0|childUpdatePanelIDs|||41|panelsToRefreshIDs||ctl00$contentPlaceHolder$updatePanelMain,|2|asyncPostBackTimeout||90|26|formAction||./VegProdDayTransInfo.aspx|'"
209 |       ]
210 |      },
211 |      "execution_count": 31,
212 |      "metadata": {},
213 |      "output_type": "execute_result"
214 |     }
215 |    ],
216 |    "source": [
217 |     "resp.text"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": []
226 |   }
227 |  ],
228 |  "metadata": {
229 |   "kernelspec": {
230 |    "display_name": "Python 3",
231 |    "language": "python",
232 |    "name": "python3"
233 |   },
234 |   "language_info": {
235 |    "codemirror_mode": {
236 |     "name": "ipython",
237 |     "version": 3
238 |    },
239 |    "file_extension": ".py",
240 |    "mimetype": "text/x-python",
241 |    "name": "python",
242 |    "nbconvert_exporter": "python",
243 |    "pygments_lexer": "ipython3",
244 |    "version": "3.6.2"
245 |   }
246 |  },
247 |  "nbformat": 4,
248 |  "nbformat_minor": 2
249 | }
250 | 


--------------------------------------------------------------------------------
/bb103/bb103_pythonetl_20170829.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Mizuno crawler\n",
 12 |     "import requests as r\n",
 13 |     "from bs4 import BeautifulSoup"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 3,
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "res = r.get(\"http://www.mizuno.com.tw/04product/product_series_all.aspx?fid=6&tid=35\")"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 6,
 30 |    "metadata": {
 31 |     "collapsed": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "soup = BeautifulSoup(res.text, 'html5lib')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 48,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "product_list = soup.find_all(lambda x: 'product_detail' in x.get('href', ''))"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 52,
 52 |    "metadata": {
 53 |     "scrolled": true
 54 |    },
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "{'product_detail.aspx?pid=17852',\n",
 60 |        " 'product_detail.aspx?pid=17853',\n",
 61 |        " 'product_detail.aspx?pid=19849',\n",
 62 |        " 'product_detail.aspx?pid=19850',\n",
 63 |        " 'product_detail.aspx?pid=19881',\n",
 64 |        " 'product_detail.aspx?pid=19882',\n",
 65 |        " 'product_detail.aspx?pid=19883',\n",
 66 |        " 'product_detail.aspx?pid=19884',\n",
 67 |        " 'product_detail.aspx?pid=19885',\n",
 68 |        " 'product_detail.aspx?pid=20216',\n",
 69 |        " 'product_detail.aspx?pid=20217'}"
 70 |       ]
 71 |      },
 72 |      "execution_count": 52,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "products = {list['href'] for list in product_list}\n",
 79 |     "products"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 54,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=20216\n",
 92 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=20217\n",
 93 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=17853\n",
 94 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19884\n",
 95 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19882\n",
 96 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19885\n",
 97 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19883\n",
 98 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19850\n",
 99 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=17852\n",
100 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19849\n",
101 |       "http://www.mizuno.com.tw/04product/product_detail.aspx?pid=19881\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "HOST = \"http://www.mizuno.com.tw/04product/\"\n",
107 |     "\n",
108 |     "for link in products:\n",
109 |     "    url = HOST + link\n",
110 |     "    print(url)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "collapsed": true
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "# Requests and proxy"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": true
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "# HTTP proxy\n",
133 |     "# found online, https://www.sslproxies.org/"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 40,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "data": {
143 |       "text/plain": [
144 |        "'201.218.63.171'"
145 |       ]
146 |      },
147 |      "execution_count": 40,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "import requests as r\n",
154 |     "\n",
155 |     "proxies = {\n",
156 |     "  'http': 'http://201.218.63.171:65301',\n",
157 |     "  'https': 'http://201.218.63.171:65301',\n",
158 |     "}\n",
159 |     "\n",
160 |     "url = 'http://api.ipify.org'\n",
161 |     "r.get(url, proxies=proxies).text"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 41,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/plain": [
172 |        "'111.248.77.73'"
173 |       ]
174 |      },
175 |      "execution_count": 41,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "import requests as r\n",
182 |     "\n",
183 |     "proxies = {\n",
184 |     "  'http': 'http://201.218.63.171:65301',\n",
185 |     "  'https': 'http://201.218.63.171:65301',\n",
186 |     "}\n",
187 |     "\n",
188 |     "url = 'http://api.ipify.org'\n",
189 |     "r.get(url).text"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": true
197 |    },
198 |    "outputs": [],
199 |    "source": [
200 |     "# SOCKS proxy,\n",
201 |     "# can be any server with SSH\n",
202 |     "# example:\n",
203 |     "# ssh -D [PORT] -N [username]@[HOST(ip)]\n",
204 |     "# In this example, PORT is 8123"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 44,
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "data": {
214 |       "text/plain": [
215 |        "'35.185.238.206'"
216 |       ]
217 |      },
218 |      "execution_count": 44,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "import requests as r\n",
225 |     "\n",
226 |     "proxies = {\n",
227 |     "  'http': 'socks5://localhost:8123',\n",
228 |     "  'https': 'socks5://localhost:8123',\n",
229 |     "}\n",
230 |     "\n",
231 |     "url = 'http://api.ipify.org'\n",
232 |     "r.get(url, proxies=proxies).text"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 45,
238 |    "metadata": {},
239 |    "outputs": [
240 |     {
241 |      "data": {
242 |       "text/plain": [
243 |        "'111.248.77.73'"
244 |       ]
245 |      },
246 |      "execution_count": 45,
247 |      "metadata": {},
248 |      "output_type": "execute_result"
249 |     }
250 |    ],
251 |    "source": [
252 |     "import requests as r\n",
253 |     "\n",
254 |     "proxies = {\n",
255 |     "  'http': 'socks5://localhost:8123',\n",
256 |     "  'https': 'socks5://localhost:8123',\n",
257 |     "}\n",
258 |     "\n",
259 |     "url = 'http://api.ipify.org'\n",
260 |     "r.get(url).text"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 46,
266 |    "metadata": {
267 |     "collapsed": true
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "# Python and MySQL"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 55,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "# Install MySQL on Ubuntu\n",
283 |     "# sudo apt-get update && sudo apt-get intall -y mariadb-server"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 56,
289 |    "metadata": {},
290 |    "outputs": [
291 |     {
292 |      "name": "stdout",
293 |      "output_type": "stream",
294 |      "text": [
295 |       "Collecting peewee\n",
296 |       "  Downloading peewee-2.10.1.tar.gz (515kB)\n",
297 |       "\u001b[K    100% |████████████████████████████████| 522kB 2.0MB/s ta 0:00:01\n",
298 |       "\u001b[?25hInstalling collected packages: peewee\n",
299 |       "  Running setup.py install for peewee ... \u001b[?25ldone\n",
300 |       "\u001b[?25hSuccessfully installed peewee-2.10.1\n"
301 |      ]
302 |     }
303 |    ],
304 |    "source": [
305 |     "!pip install peewee"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 57,
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "Requirement already satisfied: PyMySQL in /Users/ian/.pyenv/versions/3.6.1/lib/python3.6/site-packages\r\n"
318 |      ]
319 |     }
320 |    ],
321 |    "source": [
322 |     "!pip install PyMySQL"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 58,
328 |    "metadata": {
329 |     "collapsed": true
330 |    },
331 |    "outputs": [],
332 |    "source": [
333 |     "import peewee"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 111,
339 |    "metadata": {
340 |     "collapsed": true
341 |    },
342 |    "outputs": [],
343 |    "source": [
344 |     "db = peewee.MySQLDatabase('news',\n",
345 |     "                          user='admin',\n",
346 |     "                          host='localhost',\n",
347 |     "                          password='')"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 61,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "data": {
357 |       "text/plain": [
358 |        "{'email': 'ianchen06@gmail.com',\n",
359 |        " 'id': 1,\n",
360 |        " 'name': 'ian',\n",
361 |        " 'website': 'ianchenhq.com',\n",
362 |        " 'year': 2017}"
363 |       ]
364 |      },
365 |      "execution_count": 61,
366 |      "metadata": {},
367 |      "output_type": "execute_result"
368 |     }
369 |    ],
370 |    "source": [
371 |     "{\n",
372 |     "    'id': 1,\n",
373 |     "    'name': 'ian',\n",
374 |     "    'email': 'ianchen06@gmail.com',\n",
375 |     "    'website': 'ianchenhq.com',\n",
376 |     "    'year': 2017\n",
377 |     "}"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 113,
383 |    "metadata": {
384 |     "collapsed": true
385 |    },
386 |    "outputs": [],
387 |    "source": [
388 |     "customers = [\n",
389 |     "    {\n",
390 |     "        'name': 'ian',\n",
391 |     "        'email': 'ianchen06@gmail.com',\n",
392 |     "        'eth': 2017\n",
393 |     "    },\n",
394 |     "    {\n",
395 |     "        'name': 'ian',\n",
396 |     "        'email': 'ianchen06@gmail.com',\n",
397 |     "        'eth': 2018\n",
398 |     "    }\n",
399 |     "]"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": 87,
405 |    "metadata": {
406 |     "collapsed": true
407 |    },
408 |    "outputs": [],
409 |    "source": [
410 |     "class Customer(peewee.Model):\n",
411 |     "    name = peewee.CharField()\n",
412 |     "    email = peewee.CharField()\n",
413 |     "    eth  = peewee.IntegerField()\n",
414 |     "    \n",
415 |     "    class Meta:\n",
416 |     "        database = db"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 65,
422 |    "metadata": {},
423 |    "outputs": [
424 |     {
425 |      "data": {
426 |       "text/plain": [
427 |        "<pymysql.cursors.Cursor at 0x10b665f98>"
428 |       ]
429 |      },
430 |      "execution_count": 65,
431 |      "metadata": {},
432 |      "output_type": "execute_result"
433 |     }
434 |    ],
435 |    "source": [
436 |     "db.create_table(Customer)"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": 66,
442 |    "metadata": {
443 |     "collapsed": true
444 |    },
445 |    "outputs": [],
446 |    "source": [
447 |     "user = Customer(name='Ian',\n",
448 |     "        email='ianchen06@gmail.com',\n",
449 |     "        eth=1)"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": 67,
455 |    "metadata": {},
456 |    "outputs": [
457 |     {
458 |      "data": {
459 |       "text/plain": [
460 |        "<__main__.Customer at 0x10e6c4518>"
461 |       ]
462 |      },
463 |      "execution_count": 67,
464 |      "metadata": {},
465 |      "output_type": "execute_result"
466 |     }
467 |    ],
468 |    "source": [
469 |     "user"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": 68,
475 |    "metadata": {},
476 |    "outputs": [
477 |     {
478 |      "data": {
479 |       "text/plain": [
480 |        "1"
481 |       ]
482 |      },
483 |      "execution_count": 68,
484 |      "metadata": {},
485 |      "output_type": "execute_result"
486 |     }
487 |    ],
488 |    "source": [
489 |     "user.save()"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": 88,
495 |    "metadata": {
496 |     "collapsed": true
497 |    },
498 |    "outputs": [],
499 |    "source": [
500 |     "user = Customer(name='David',\n",
501 |     "        email='david@gmail.com',\n",
502 |     "        eth=2)"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "code",
507 |    "execution_count": 89,
508 |    "metadata": {},
509 |    "outputs": [
510 |     {
511 |      "data": {
512 |       "text/plain": [
513 |        "1"
514 |       ]
515 |      },
516 |      "execution_count": 89,
517 |      "metadata": {},
518 |      "output_type": "execute_result"
519 |     }
520 |    ],
521 |    "source": [
522 |     "user.save()"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": 91,
528 |    "metadata": {},
529 |    "outputs": [
530 |     {
531 |      "data": {
532 |       "text/plain": [
533 |        "'ianchen06@gmail.com'"
534 |       ]
535 |      },
536 |      "execution_count": 91,
537 |      "metadata": {},
538 |      "output_type": "execute_result"
539 |     }
540 |    ],
541 |    "source": [
542 |     "Customer.get(Customer.name == 'Ian').email"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "code",
547 |    "execution_count": 114,
548 |    "metadata": {},
549 |    "outputs": [],
550 |    "source": [
551 |     "# Bulk insert\n",
552 |     "\n",
553 |     "with db.atomic():\n",
554 |     "    Customer.insert_many(customers).execute()"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "code",
559 |    "execution_count": null,
560 |    "metadata": {
561 |     "collapsed": true
562 |    },
563 |    "outputs": [],
564 |    "source": [
565 |     "# High level query API,\n",
566 |     "# peewee.Dataset\n",
567 |     "# very freee API to interact with RDBMS"
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "code",
572 |    "execution_count": 93,
573 |    "metadata": {},
574 |    "outputs": [],
575 |    "source": [
576 |     "my_dict = {}"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "code",
581 |    "execution_count": 94,
582 |    "metadata": {},
583 |    "outputs": [
584 |     {
585 |      "data": {
586 |       "text/plain": [
587 |        "dict_keys([])"
588 |       ]
589 |      },
590 |      "execution_count": 94,
591 |      "metadata": {},
592 |      "output_type": "execute_result"
593 |     }
594 |    ],
595 |    "source": [
596 |     "my_dict.keys()"
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "code",
601 |    "execution_count": 117,
602 |    "metadata": {},
603 |    "outputs": [],
604 |    "source": [
605 |     "from playhouse.dataset import DataSet\n",
606 |     "\n",
607 |     "db = DataSet('mysql://admin@localhost/news')"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": 118,
613 |    "metadata": {},
614 |    "outputs": [
615 |     {
616 |      "data": {
617 |       "text/plain": [
618 |        "['cnyes', 'customer']"
619 |       ]
620 |      },
621 |      "execution_count": 118,
622 |      "metadata": {},
623 |      "output_type": "execute_result"
624 |     }
625 |    ],
626 |    "source": [
627 |     "db.tables"
628 |    ]
629 |   },
630 |   {
631 |    "cell_type": "code",
632 |    "execution_count": 119,
633 |    "metadata": {},
634 |    "outputs": [],
635 |    "source": [
636 |     "customer = db['customer']"
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "code",
641 |    "execution_count": 121,
642 |    "metadata": {},
643 |    "outputs": [
644 |     {
645 |      "data": {
646 |       "text/plain": [
647 |        "6"
648 |       ]
649 |      },
650 |      "execution_count": 121,
651 |      "metadata": {},
652 |      "output_type": "execute_result"
653 |     }
654 |    ],
655 |    "source": [
656 |     "customer.insert(\n",
657 |     "    **{\n",
658 |     "        'name': 'eason',\n",
659 |     "        'email': 'eason@gmail.com',\n",
660 |     "        'eth': 100\n",
661 |     "    }\n",
662 |     ")"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": 122,
668 |    "metadata": {},
669 |    "outputs": [
670 |     {
671 |      "name": "stdout",
672 |      "output_type": "stream",
673 |      "text": [
674 |       "{'id': 6, 'name': 'eason', 'email': 'eason@gmail.com', 'eth': 100}\n"
675 |      ]
676 |     }
677 |    ],
678 |    "source": [
679 |     "for row in customer.find(name='eason'):\n",
680 |     "    print(row)"
681 |    ]
682 |   },
683 |   {
684 |    "cell_type": "code",
685 |    "execution_count": 107,
686 |    "metadata": {},
687 |    "outputs": [
688 |     {
689 |      "data": {
690 |       "text/plain": [
691 |        "{'email': 'ianchen06@gmail.com', 'eth': 1, 'id': 1, 'name': 'Ian'}"
692 |       ]
693 |      },
694 |      "execution_count": 107,
695 |      "metadata": {},
696 |      "output_type": "execute_result"
697 |     }
698 |    ],
699 |    "source": [
700 |     "customer.find_one(name='Ian')"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": 116,
706 |    "metadata": {
707 |     "collapsed": true
708 |    },
709 |    "outputs": [],
710 |    "source": [
711 |     "# Real example\n",
712 |     "# https://github.com/ianchen06/cnyes_cralwer"
713 |    ]
714 |   },
715 |   {
716 |    "cell_type": "code",
717 |    "execution_count": null,
718 |    "metadata": {
719 |     "collapsed": true
720 |    },
721 |    "outputs": [],
722 |    "source": []
723 |   }
724 |  ],
725 |  "metadata": {
726 |   "kernelspec": {
727 |    "display_name": "Python 3",
728 |    "language": "python",
729 |    "name": "python3"
730 |   },
731 |   "language_info": {
732 |    "codemirror_mode": {
733 |     "name": "ipython",
734 |     "version": 3
735 |    },
736 |    "file_extension": ".py",
737 |    "mimetype": "text/x-python",
738 |    "name": "python",
739 |    "nbconvert_exporter": "python",
740 |    "pygments_lexer": "ipython3",
741 |    "version": "3.6.1"
742 |   }
743 |  },
744 |  "nbformat": 4,
745 |  "nbformat_minor": 2
746 | }
747 | 


--------------------------------------------------------------------------------
/bb103/trump_interview.txt:
--------------------------------------------------------------------------------
  1 | PRESIDENT TRUMP: Hello, everybody. Great to be back in New York with all of our friends and some great friends outside the building, I must tell you.
  2 | 
  3 | I want to thank all of our distinguished guests who are with us today, including members of our cabinet: Treasury secretary Stephen Mnuchin, OMB Director Mick Mulvaney and, of course, our Transportation Secretary who is doing a fabulous job, Elaine Chao.
  4 | 
  5 | Thank you all for doing a really incredible and creative job on what we're going to be discussing today, which is infrastructure. We've just had a great set of briefings upstairs on our infrastructure agenda. My administration is working every day to deliver the world class infrastructure that our people deserve, and frankly our country deserves. That’s why I just signed a new Executive Order to dramatically reform the nation's badly broken infrastructure permitting process. Just blocks away is the Empire State Building. It took 11 months to build the Empire State Building. But today it can take as long as a decade, and much more than that. Many, many stories where it takes 20 and 25 years just to get approvals to start construction of a fairly routine highway. Highway builders must get up to 16 different approvals involving nine different federal agencies, governed by 29 different statutes. One agency alone can stall a project for many, many years and even decades.
  6 | 
  7 | 
  8 | Not only does this cost our economy billions of dollars, but it also denies our citizens the safe and modern infrastructure they deserve. This overregulated permitting process is a massive self-inflicted wound on our country. It’s disgraceful. Denying our people much needed investments in their community, and I just want to show you this, because it was just shown to me, and I said, I think I'm going to show it to the media – both real and fake media by the way.
  9 | 
 10 | This is what it takes to get something approved today. Elaine, you see that? So this is what it takes. Permitting process flow chart, that's a flow chart. So that can go out to about 20 years, this shows about 10. But that can go out to about 20 years to get something approved. This is for a highway. I’ve seen a highway recently in a certain state – I won’t mention its name, it’s 17 years. I could have built it for $4 million, $5 million without the permitting process. It costs hundreds of millions of dollars, but it took 17 years to get it approved, and many, many, many, many pages of environmental impact studies.
 11 | 
 12 | This is what we will bring it down to. This is less than two years. This is going to happen quickly, that's what I'm signing today. This will be less than two years for a highway. So it's going to be quick, it's going to be a very streamlined process, and by the way, if it doesn't meet environmental safeguards, we are not going to approve it. Very simple, we’re not going to approve it. So this is – maybe this one will say, let's throw the other one away. Would anybody like it from the media? Would anybody like that long, beautiful chart, you can have it.
 13 | 
 14 | 
 15 | So my executive order also requires agencies to work together efficiently by requiring one lead agency for each major infrastructure project. It also holds agencies accountable if they fail to streamline their review process, so each agency is accountable. We're going to get infrastructure built quickly, inexpensively, relatively speaking, and the permitting process will go very, very quickly. No longer will we tolerate one job killing delay after another. No longer will we accept a broken system that benefits consultants and lobbyists at the expense of hardworking Americans.
 16 | 
 17 | Now, I knew the process very well, probably better than anybody. I had to get permits for this building and many of the buildings I built. All of the buildings I built in Manhattan and many other places, and I will tell you that the consultants are rich people. They go around making it very difficult. They lobby congress, they lobby state government, city governments to make it very difficult so that you have to hire consultants and that you have to take years and pay them a fortune, so we’re streamlining the process, and we won't be having so much of that anymore.
 18 | 
 19 | No longer will we allow the infrastructure of our magnificent country to crumble and decay, while protecting the environment we will build gleaming new roads, bridges, railways, waterways, tunnels and highways. We will rebuild our country with American workers, American iron, American aluminum, American steel. We will create millions of new jobs and make millions of American dreams come true.
 20 | 
 21 | 
 22 | Our infrastructure will again be the best in the world. We used to have the greatest infrastructure anywhere in the world, and today, we are like a third-world country. We are literally like a third-world country. Our infrastructure will again be the best, and we will restore the pride in our communities, our nation, and all over the United States, we will be proud again.
 23 | 
 24 | So I want to thank everybody for being here. God bless you, God bless the United States. If you have any questions, Mick, you could come up here please, come on up. Mick Mulvaney. If you have any questions, please feel free to ask.
 25 | 
 26 | REPORTER: Mr. President, why is it that are CEOs are leaving your manufacturing council?
 27 | 
 28 | TRUMP: Because they're not taking their jobs seriously as it pertains to this country. We want jobs, manufacturing in this country. If you look at some of those people that you’re talking about, they’re outside of the country, they’re having a lot of their product made outside, if you look at Merck as an example.
 29 | 
 30 | (chatter)
 31 | 
 32 | Excuse me, excuse me. Take a look at where their product is made. It’s made outside of our country. We want products made in the country. Now I have to tell you, some of the folks that will leave, they’re leaving out of embarrassment, because they make their products outside, and I’ve been lecturing them, including the gentleman you’ve been referring to, about, you have to bring it back to this country. You can’t do it, necessarily, in Ireland, and all of these other places. You have to bring this work back to this country. That’s what I want – I want manufacturing to be back into the United States so that American workers can benefit.
 33 | 
 34 | (chatter)
 35 | 
 36 | Reporter: Mr. President, why did you wait so long to blast neo-Nazis?
 37 | 
 38 | TRUMP: I didn’t wait long. I didn’t wait long. I wanted to make sure, unlike most politicians, that what I said was correct – not make a quick statement. The statement I made on Saturday, the first statement, was a fine statement. But you don’t make statements that direct unless you know the facts. It takes a little while to get the facts. You still don’t know the facts. And it’s a very, very important process to me and it’s an important statement. So I don’t want to go quickly, and just make a statement for the sake of making a political statement. I want to know the facts. If you go back to my – in fact, I brought it. I brought it. I brought it.
 39 | 
 40 | As I said on, remember, Saturday, “We condemn in the strongest possible terms this egregious display of hatred, bigotry and violence. It has no place in America.” And then I went on from there.
 41 | 
 42 | Now, here’s the thing.
 43 | 
 44 | (chatter)
 45 | 
 46 | Excuse me, excuse me. Take it nice and easy.
 47 | 
 48 | Here’s the thing. When I make a statement, I like to be correct. I want the facts. This event just happened. In fact, a lot of it didn’t even happen yet, as we were speaking. This event just happened. Before I make a statement, I need the facts. I don’t want to rush into a statement, so making the statement when I made it was excellent.
 49 | 
 50 | In fact, the young woman, who I hear is a fantastic young woman, her mother wrote me, and said, through I guess Twitter, social media, the nicest things, and I very much appreciated that. I hear she was a fine, really actually an incredible young woman. But her mother, on Twitter, thanked me for what I said, and honestly, if the press were not fake, and it was honest, the press would have said what I said was very nice.
 51 | 
 52 | (chatter)
 53 | 
 54 | Excuse me. But unlike you, and unlike the media – before I make a statement, I like to know the facts.
 55 | 
 56 | (chatter)
 57 | 
 58 | They don’t. They don’t.
 59 | 
 60 | How about a couple of infrastructure questions.
 61 | 
 62 | Reporter: The CEO of Walmart said you missed a critical moment to bring the country together. Did you?
 63 | 
 64 | TRUMP: Not at all. Look, you take a look. I’ve created over a million jobs since I’m president. The country is booming, the stock market is setting records, we have the highest unemployment we’ve ever had in the history of our country, we’re doing record business, we have the highest levels of enthusiasm, so the head of Walmart, who I know – he’s a very nice guy – was making a political statement. I mean, ask him how his – they do it the same way, and you know why? Because I want to make sure, when I make the statement, that the statement was correct. There’s no way, there was no way of making a correct statement that early. I had to see the facts, unlike a lot of reporters, unlike a lot of reporters.
 65 | 
 66 | I didn’t know David Duke was there. I wanted to see the facts, and the facts, as they started coming out, were very well stated. In fact, everybody said, “his statement was beautiful. If he would have made it sooner, that would have been good.” I couldn’t have made it sooner because I didn’t know all of the facts. Frankly, people still don’t know all of the facts. It was very important –
 67 | 
 68 | (chatter)
 69 | 
 70 | Excuse me, excuse me. It was very important to me to get the facts out, and correctly. Because if I would have made a fast statement – and the first statement was made without knowing much other than what we were seeing. The second statement was made after – with knowledge, with great knowledge. There’s still things – excuse me – there’s still things that people don’t know. I want to make a statement with knowledge, I wanted to know the facts. Okay.
 71 | 
 72 | Reporter: Two questions – was this terrorism? And can you tell us how you’re feeling about your chief strategist, Stephen Bannon?
 73 | 
 74 | TRUMP: Well, I think the driver of the car is a disgrace to himself, his family and this country, and that is – you can call it terrorism, you can call it murder, you can call it whatever you want. I would just call it as the fastest one to come up with a good verdict, that’s what I’d call it, because there is a question, is it murder? Is it terrorism? And then you get into legal semantics.
 75 | 
 76 | The driver of the car is a murderer, and what he did is a horrible, horrible, inexcusable thing.
 77 | 
 78 | Reporter: Can you tell us how you feel about your chief strategist, Mr. Bannon?
 79 | 
 80 | TRUMP: I never spoke to Mr. Bannon about it.
 81 | 
 82 | Reporter: But can you tell us broadly what your, do you still have confidence in Steve?
 83 | 
 84 | TRUMP: Well, let’s see. Look, look, I like Mr. Bannon, he’s a friend of mine. But Mr. Bannon came on very late, you know that. I went through 17 senators, governors, and I won all the primaries. Mr. Bannon came on very much later than that, and I like him. He’s a good man. He’s not a racist, I can tell you that. He’s a good person. He actually gets a very unfair press in that regard. But, we’ll see what happens with Mr. Bannon. But he’s a good person, and I think the press treats him, frankly, very unfairly.
 85 | 
 86 | Reporter: McCain has called on you to defend your national security adviser, H.R. McMaster, against attacks.
 87 | 
 88 | TRUMP: I’ve already done that, I did it the last time.
 89 | 
 90 | Reporter: And he called on it again,
 91 | 
 92 | TRUMP: Senator McCain, you mean the one who voted against Obamacare? You mean Senator McCain, who voted against us getting good health care?
 93 | 
 94 | Reporter: Senator McCain said that the alt-right is behind these attacks, and he linked that same group to those who perpetrated the attack against Charlottesville.
 95 | 
 96 | TRUMP: Well, I don’t know, I can’t tell you. I’m sure Senator McCain must know what he’s talking about. But when you say the alt-right, define alt-right to me. You define it to me. Define it for me, go ahead.
 97 | 
 98 | Reporter: Senator McCain defined it as the same group –
 99 | 
100 | Trump: Okay, what about the alt-left that came charging at ‘em. Excuse me, what about the alt-left that came charging at, as you say the alt-right? Do they have any semblance of guilt?
101 | 
102 | (chatter)
103 | 
104 | TRUMP: Let me ask you this – what about the fact they came charging, that they came charging with clubs in their hands, swinging clubs? Do they have any problem? I think they do. So, you know, as far as I’m concerned, that was a horrible, horrible day. Wait a minute – I’m not finished, fake news. That was a horrible day.
105 | 
106 | (chatter)
107 | 
108 | TRUMP: I will tell you something. I watched those very closely, much more closely than you people watched it, and you have, you had a group on one side that was bad, and you had a group on the other side that was also very violent. And nobody wants to say that, but I’ll say it right now. You had a group on the other side that came charging in, without a permit, and they were very, very violent.
109 | 
110 | (chatter)
111 | 
112 | Reporter: Do you think that what you call the alt-left is the same as neo-Nazis?
113 | 
114 | TRUMP: Those people, all of those people, excuse me. I’ve condemned neo-Nazis. I’ve condemned many different groups. But not all of those people were neo-Nazis, believe me. Not all of those people were white supremacists, by any stretch. Those people were also there because they wanted to protest the taking down of a statue of Robert E. Lee.
115 | 
116 | Q Should that statue be taken down?
117 | 
118 | THE PRESIDENT: Excuse me. If you take a look at some of the groups, and you see – and you'd know it if you were honest reporters, which in many cases you're not – but many of those people were there to protest the taking down of the statue of Robert E. Lee. So this week it's Robert E. Lee. I noticed that Stonewall Jackson is coming down. I wonder, is it George Washington next week? And is it Thomas Jefferson the week after? You know, you really do have to ask yourself, where does it stop?
119 | 
120 | But they were there to protest -- excuse me, if you take a look, the night before they were there to protest the taking down of the statue of Robert E. Lee. Infrastructure question. Go ahead.
121 | 
122 | Reporter: Should the statues of Robert E. Lee stay up?
123 | 
124 | TRUMP: I would say that's up to a local town, community, or the federal government, depending on where it is located.
125 | 
126 | Reporter: How concerned are you about race relations in America, and do you think things have gotten worse or better since you took office?
127 | 
128 | TRUMP: I think they've gotten better or the same. Look, they've been frayed for a long time. And you can ask President Obama about that, because he'd make speeches about it. But I believe that the fact that I brought in -- it will be soon -- millions of jobs -- you see where companies are moving back into our country -- I think that's going to have a tremendous, positive impact on race relations. We have companies coming back into our country. We have two car companies that just announced. We have Foxconn in Wisconsin just announced. We have many companies, I say, pouring back into the country. I think that's going to have a huge, positive impact on race relations.
129 | 
130 | You know why? It's jobs. What people want now, they want jobs. They want great jobs with good pay, and when they have that, you watch how race relations will be. And I’ll tell you, we’re spending a lot of money on the inner cities. We’re fixing the inner cities. We’re doing far more than anybody has done with respect to the inner cities. It’s a priority for me, and it’s very important.
131 | 
132 | Reporter: Mr. President, are you putting what you’re calling the alt-left and white supremacists on the same moral plane?
133 | 
134 | TRUMP: I’m not putting anybody on a moral plane. What I’m saying is this: You had a group on one side and you had a group on the other, and they came at each other with clubs -- and it was vicious and it was horrible. And it was a horrible thing to watch. But there is another side. There was a group on this side. You can call them the left -- you just called them the left -- that came violently attacking the other group. So you can say what you want, but that’s the way it is.
135 | 
136 | Reporter: You said there was hatred, there was violence on both sides. Are the –
137 | 
138 | THE PRESIDENT: Yes, I think there’s blame on both sides. If you look at both sides -- I think there’s blame on both sides. And I have no doubt about it, and you don’t have any doubt about it either. And if you reported it accurately, you would say.
139 | 
140 | Reporter: The neo-Nazis started this. They showed up in Charlottesville to protest –
141 | 
142 | TRUMP: Excuse me, excuse me. They didn’t put themselves -- and you had some very bad people in that group, but you also had people that were very fine people, on both sides. You had people in that group.
143 | 
144 | (chatter)
145 | 
146 | PRESIDENT: Excuse me, excuse me. I saw the same pictures as you did. You had people in that group that were there to protest the taking down of, to them, a very, very important statue and the renaming of a park from Robert E. Lee to another name.
147 | 
148 | Reporter: George Washington and Robert E. Lee are not the same.
149 | 
150 | TRUMP: George Washington was a slave owner. Was George Washington a slave owner? So will George Washington now lose his status? Are we going to take down -- Excuse me, are we going to take down statues to George Washington? How about Thomas Jefferson? What do you think of Thomas Jefferson? You like him?
151 | 
152 | Reporter: I do love Thomas Jefferson.
153 | 
154 | TRUMP: Okay, good. Are we going to take down the statue? Because he was a major slave owner. Now, are we going to take down his statue? So you know what, it’s fine. You’re changing history. You’re changing culture. And you had people -- and I’m not talking about the neo-Nazis and the white nationalists -- because they should be condemned totally. But you had many people in that group other than neo-Nazis and white nationalists. Okay? And the press has treated them absolutely unfairly. Now, in the other group also, you had some fine people. But you also had troublemakers, and you see them come with the black outfits and with the helmets, and with the baseball bats. You had a lot of bad people in the other group.
155 | 
156 | Reporter: Who are the good people? Sir, I just didn’t understand what you were saying. You were saying the press has treated white nationalists unfairly? I just don’t understand what you were saying.
157 | 
158 | TRUMP: No, no. There were people in that rally -- and I looked the night before -- if you look, there were people protesting very quietly the taking down of the statue of Robert E. Lee. I’m sure in that group there were some bad ones. The following day it looked like they had some rough, bad people -- neo-Nazis, white nationalists, whatever you want to call them. But you had a lot of people in that group that were there to innocently protest, and very legally protest -- because I don’t know if you know, they had a permit. The other group didn’t have a permit. So I only tell you this: There are two sides to a story. I thought what took place was a horrible moment for our country -- a horrible moment. But there are two sides to the country. Does anybody have a final –
159 | 
160 | Reporter: I have an infrastructure question.
161 | 
162 | TRUMP: You have an infrastructure –
163 | 
164 | Reporter: What makes you think you can get an infrastructure bill? You didn’t get healthcare –
165 | 
166 | TRUMP: Well, you know, I’ll tell you. We came very close with healthcare. Unfortunately, John McCain decided to vote against it at the last minute. You’ll have to ask John McCain why he did that. But we came very close to healthcare. We will end up getting healthcare. But we’ll get the infrastructure. And actually, infrastructure is something that I think we’ll have bipartisan support on. I actually think Democrats will go along with the infrastructure.
167 | 
168 | Reporter: Mr. President, have you spoken to the family of the victim of the car attack?
169 | 
170 | TRUMP: No, I’ll be reaching out. I’ll be reaching out.
171 | 
172 | Reporter: When will you be reaching out?
173 | 
174 | TRUMP: I thought that the statement put out -- the mother’s statement I thought was a beautiful statement. I will tell you, it was something that I really appreciated. I thought it was terrific. And, really, under the kind of stress that she’s under and the heartache that she’s under, I thought putting out that statement, to me, was really something. I won’t forget it. Thank you, all, very much. Thank you. Thank you.
175 | 
176 | Reporter: Will you go to Charlottesville? Will you go to check out what happened?
177 | 
178 | TRUMP: I own a house in Charlottesville. Does anyone know I own a house in Charlottesville?
179 | 
180 | Reporter: Where is it?
181 | 
182 | TRUMP: Oh boy, it’s going to be –
183 | 
184 | Reporter: Where is it?
185 | 
186 | TRUMP: It's in Charlottesville. You'll see.
187 | 
188 | Reporter: Is it a winery or something?
189 | 
190 | TRUMP: It is the winery. I mean, I know a lot about Charlottesville. Charlottesville is a great place that's been very badly hurt over the last couple of days.
191 | 
192 | (chatter)
193 | 
194 | TRUMP: I own, actually, one of the largest wineries in the United States. It's in Charlottesville.
195 | 
196 | Reporter: Do you believe your words are helping to heal this country right now? What do you think needs to be done to overcome the racial divides in this country?
197 | 
198 | subscribe
199 | The story must be told.
200 | Your subscription supports journalism that matters.
201 | Try 1 month for 99¢
202 | TRUMP: Well, I think jobs can have a big impact. I think if we continue to create jobs -- over a million, substantially more than a million. And you see just the other day, the car companies coming in with Foxconn. I think if we continue to create jobs at levels that I’m creating jobs, I think that’s going to have a tremendous impact -- positive impact on race relations.
203 | 
204 | Reporter: Your remarks today, how do you think that will impact the racial, sort of conflict, today?
205 | 
206 | TRUMP: The people are going to be working, they’re going to be making a lot of money – much more money than they ever thought possible. But that’s going to happen.
207 | 
208 | Reporter: Your remarks today–
209 | 
210 | TRUMP: And the other thing – very important – I believe wages will start going up. They haven’t gone up for a long time. I believe wages now – because the economy is doing so well with respect to employment and unemployment, I believe wages will start to go up. I think that will have a tremendously positive impact on race relations.
211 | 


--------------------------------------------------------------------------------
/bb105/.ipynb_checkpoints/bb105_20171225-checkpoint.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "ubuntu安裝中文輸入法\n",
   8 |     "\n",
   9 |     "1. https://hyperrate.com/thread.php?tid=28044\n",
  10 |     "1. https://gist.github.com/tanyuan/c0d4ee15cf0c9c93da28cc1cf0ff87b3"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 13,
  16 |    "metadata": {},
  17 |    "outputs": [
  18 |     {
  19 |      "data": {
  20 |       "text/plain": [
  21 |        "'\\nI am also a comment\\n\\nHi I can be multiline too\\n'"
  22 |       ]
  23 |      },
  24 |      "execution_count": 13,
  25 |      "metadata": {},
  26 |      "output_type": "execute_result"
  27 |     }
  28 |    ],
  29 |    "source": [
  30 |     "# I am a comment\n",
  31 |     "\"\"\"\n",
  32 |     "I am also a comment\n",
  33 |     "\n",
  34 |     "Hi I can be multiline too\n",
  35 |     "\"\"\""
  36 |    ]
  37 |   },
  38 |   {
  39 |    "cell_type": "markdown",
  40 |    "metadata": {},
  41 |    "source": [
  42 |     "# Primitives\n",
  43 |     "\n",
  44 |     "1. String\n",
  45 |     "1. Integer\n",
  46 |     "1. Float\n",
  47 |     "1. Boolean"
  48 |    ]
  49 |   },
  50 |   {
  51 |    "cell_type": "code",
  52 |    "execution_count": 17,
  53 |    "metadata": {},
  54 |    "outputs": [
  55 |     {
  56 |      "data": {
  57 |       "text/plain": [
  58 |        "'\\n\\nHi am a \\na\\nstring\\n\\n'"
  59 |       ]
  60 |      },
  61 |      "execution_count": 17,
  62 |      "metadata": {},
  63 |      "output_type": "execute_result"
  64 |     }
  65 |    ],
  66 |    "source": [
  67 |     "# Single line strings\n",
  68 |     "\"str1\"\n",
  69 |     "'str2'\n",
  70 |     "\n",
  71 |     "\"I am \\\"COOL\\\"\"\n",
  72 |     "\n",
  73 |     "\"I'am cool\"\n",
  74 |     "\n",
  75 |     "\"\"\"I'm a COOL GUY\"\"\"\"\n",
  76 |     "\n",
  77 |     "# Multi line strings\n",
  78 |     "\"\"\"str3\"\"\"\n",
  79 |     "'''str4'''\n",
  80 |     "\"\"\"\n",
  81 |     "\n",
  82 |     "Hi am a \n",
  83 |     "a\n",
  84 |     "string\n",
  85 |     "\n",
  86 |     "\"\"\""
  87 |    ]
  88 |   },
  89 |   {
  90 |    "cell_type": "code",
  91 |    "execution_count": 18,
  92 |    "metadata": {},
  93 |    "outputs": [
  94 |     {
  95 |      "data": {
  96 |       "text/plain": [
  97 |        "'I am \"COOL\"'"
  98 |       ]
  99 |      },
 100 |      "execution_count": 18,
 101 |      "metadata": {},
 102 |      "output_type": "execute_result"
 103 |     }
 104 |    ],
 105 |    "source": [
 106 |     "\"I am \\\"COOL\\\"\""
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "code",
 111 |    "execution_count": 22,
 112 |    "metadata": {},
 113 |    "outputs": [
 114 |     {
 115 |      "data": {
 116 |       "text/plain": [
 117 |        "1"
 118 |       ]
 119 |      },
 120 |      "execution_count": 22,
 121 |      "metadata": {},
 122 |      "output_type": "execute_result"
 123 |     }
 124 |    ],
 125 |    "source": [
 126 |     "# Integer\n",
 127 |     "1\n",
 128 |     "2\n",
 129 |     "3\n",
 130 |     "1 + 1\n",
 131 |     "2 - 1\n",
 132 |     "2 * 2\n",
 133 |     "4 / 2\n",
 134 |     "\n",
 135 |     "2 ** 3\n",
 136 |     "13 % 2"
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": 25,
 142 |    "metadata": {},
 143 |    "outputs": [
 144 |     {
 145 |      "data": {
 146 |       "text/plain": [
 147 |        "1.5"
 148 |       ]
 149 |      },
 150 |      "execution_count": 25,
 151 |      "metadata": {},
 152 |      "output_type": "execute_result"
 153 |     }
 154 |    ],
 155 |    "source": [
 156 |     "3/2"
 157 |    ]
 158 |   },
 159 |   {
 160 |    "cell_type": "code",
 161 |    "execution_count": 27,
 162 |    "metadata": {},
 163 |    "outputs": [
 164 |     {
 165 |      "data": {
 166 |       "text/plain": [
 167 |        "2.2"
 168 |       ]
 169 |      },
 170 |      "execution_count": 27,
 171 |      "metadata": {},
 172 |      "output_type": "execute_result"
 173 |     }
 174 |    ],
 175 |    "source": [
 176 |     "1.2 + 1.0"
 177 |    ]
 178 |   },
 179 |   {
 180 |    "cell_type": "code",
 181 |    "execution_count": 36,
 182 |    "metadata": {},
 183 |    "outputs": [
 184 |     {
 185 |      "data": {
 186 |       "text/plain": [
 187 |        "False"
 188 |       ]
 189 |      },
 190 |      "execution_count": 36,
 191 |      "metadata": {},
 192 |      "output_type": "execute_result"
 193 |     }
 194 |    ],
 195 |    "source": [
 196 |     "# Boolean\n",
 197 |     "True\n",
 198 |     "False\n",
 199 |     "\n",
 200 |     "True or False\n",
 201 |     "True and False\n",
 202 |     "\n",
 203 |     "not True"
 204 |    ]
 205 |   },
 206 |   {
 207 |    "cell_type": "code",
 208 |    "execution_count": 66,
 209 |    "metadata": {},
 210 |    "outputs": [
 211 |     {
 212 |      "name": "stdout",
 213 |      "output_type": "stream",
 214 |      "text": [
 215 |       "True\n",
 216 |       "True\n"
 217 |      ]
 218 |     }
 219 |    ],
 220 |    "source": [
 221 |     "print(3 > 1)\n",
 222 |     "print(1 < 3)"
 223 |    ]
 224 |   },
 225 |   {
 226 |    "cell_type": "code",
 227 |    "execution_count": 44,
 228 |    "metadata": {},
 229 |    "outputs": [
 230 |     {
 231 |      "data": {
 232 |       "text/plain": [
 233 |        "False"
 234 |       ]
 235 |      },
 236 |      "execution_count": 44,
 237 |      "metadata": {},
 238 |      "output_type": "execute_result"
 239 |     }
 240 |    ],
 241 |    "source": [
 242 |     "3 == 3\n",
 243 |     "'Ian' == \"Ian\"\n",
 244 |     "3 == \"3\""
 245 |    ]
 246 |   },
 247 |   {
 248 |    "cell_type": "code",
 249 |    "execution_count": 65,
 250 |    "metadata": {},
 251 |    "outputs": [
 252 |     {
 253 |      "data": {
 254 |       "text/plain": [
 255 |        "21"
 256 |       ]
 257 |      },
 258 |      "execution_count": 65,
 259 |      "metadata": {},
 260 |      "output_type": "execute_result"
 261 |     }
 262 |    ],
 263 |    "source": [
 264 |     "# Increment a int\n",
 265 |     "x += 1\n",
 266 |     "x"
 267 |    ]
 268 |   },
 269 |   {
 270 |    "cell_type": "markdown",
 271 |    "metadata": {},
 272 |    "source": [
 273 |     "## String cont."
 274 |    ]
 275 |   },
 276 |   {
 277 |    "cell_type": "code",
 278 |    "execution_count": 67,
 279 |    "metadata": {},
 280 |    "outputs": [
 281 |     {
 282 |      "data": {
 283 |       "text/plain": [
 284 |        "'吹風機'"
 285 |       ]
 286 |      },
 287 |      "execution_count": 67,
 288 |      "metadata": {},
 289 |      "output_type": "execute_result"
 290 |     }
 291 |    ],
 292 |    "source": [
 293 |     "\"吹風機\""
 294 |    ]
 295 |   },
 296 |   {
 297 |    "cell_type": "code",
 298 |    "execution_count": 68,
 299 |    "metadata": {},
 300 |    "outputs": [
 301 |     {
 302 |      "data": {
 303 |       "text/plain": [
 304 |        "'耶誕禮物'"
 305 |       ]
 306 |      },
 307 |      "execution_count": 68,
 308 |      "metadata": {},
 309 |      "output_type": "execute_result"
 310 |     }
 311 |    ],
 312 |    "source": [
 313 |     "\"耶誕禮物\""
 314 |    ]
 315 |   },
 316 |   {
 317 |    "cell_type": "code",
 318 |    "execution_count": 69,
 319 |    "metadata": {},
 320 |    "outputs": [
 321 |     {
 322 |      "data": {
 323 |       "text/plain": [
 324 |        "'耶誕禮物吹風機'"
 325 |       ]
 326 |      },
 327 |      "execution_count": 69,
 328 |      "metadata": {},
 329 |      "output_type": "execute_result"
 330 |     }
 331 |    ],
 332 |    "source": [
 333 |     "\"耶誕禮物\" + \"吹風機\""
 334 |    ]
 335 |   },
 336 |   {
 337 |    "cell_type": "code",
 338 |    "execution_count": 70,
 339 |    "metadata": {},
 340 |    "outputs": [
 341 |     {
 342 |      "data": {
 343 |       "text/plain": [
 344 |        "'我想要吹風機當耶誕禮物'"
 345 |       ]
 346 |      },
 347 |      "execution_count": 70,
 348 |      "metadata": {},
 349 |      "output_type": "execute_result"
 350 |     }
 351 |    ],
 352 |    "source": [
 353 |     "\"我想要\" + \"吹風機\" + \"當耶誕禮物\""
 354 |    ]
 355 |   },
 356 |   {
 357 |    "cell_type": "code",
 358 |    "execution_count": 77,
 359 |    "metadata": {},
 360 |    "outputs": [
 361 |     {
 362 |      "data": {
 363 |       "text/plain": [
 364 |        "'我想要Python當耶誕禮物'"
 365 |       ]
 366 |      },
 367 |      "execution_count": 77,
 368 |      "metadata": {},
 369 |      "output_type": "execute_result"
 370 |     }
 371 |    ],
 372 |    "source": [
 373 |     "present = \"Python\"\n",
 374 |     "\"我想要\" + present + \"當耶誕禮物\""
 375 |    ]
 376 |   },
 377 |   {
 378 |    "cell_type": "code",
 379 |    "execution_count": 78,
 380 |    "metadata": {},
 381 |    "outputs": [
 382 |     {
 383 |      "data": {
 384 |       "text/plain": [
 385 |        "'我想要超強伺服器當耶誕禮物'"
 386 |       ]
 387 |      },
 388 |      "execution_count": 78,
 389 |      "metadata": {},
 390 |      "output_type": "execute_result"
 391 |     }
 392 |    ],
 393 |    "source": [
 394 |     "\"我想要{}當耶誕禮物\".format(\"超強伺服器\")"
 395 |    ]
 396 |   },
 397 |   {
 398 |    "cell_type": "code",
 399 |    "execution_count": 79,
 400 |    "metadata": {},
 401 |    "outputs": [
 402 |     {
 403 |      "data": {
 404 |       "text/plain": [
 405 |        "'我想要吹風機當耶誕禮物'"
 406 |       ]
 407 |      },
 408 |      "execution_count": 79,
 409 |      "metadata": {},
 410 |      "output_type": "execute_result"
 411 |     }
 412 |    ],
 413 |    "source": [
 414 |     "\"我想要{}當耶誕禮物\".format(\"吹風機\")"
 415 |    ]
 416 |   },
 417 |   {
 418 |    "cell_type": "code",
 419 |    "execution_count": 81,
 420 |    "metadata": {},
 421 |    "outputs": [
 422 |     {
 423 |      "data": {
 424 |       "text/plain": [
 425 |        "'我想要Candy當耶誕禮物'"
 426 |       ]
 427 |      },
 428 |      "execution_count": 81,
 429 |      "metadata": {},
 430 |      "output_type": "execute_result"
 431 |     }
 432 |    ],
 433 |    "source": [
 434 |     "present = \"Candy\"\n",
 435 |     "\"我想要{}當耶誕禮物\".format(present)"
 436 |    ]
 437 |   },
 438 |   {
 439 |    "cell_type": "code",
 440 |    "execution_count": 95,
 441 |    "metadata": {},
 442 |    "outputs": [
 443 |     {
 444 |      "data": {
 445 |       "text/plain": [
 446 |        "'我想要Candy當耶誕禮物, HI Candy Candy'"
 447 |       ]
 448 |      },
 449 |      "execution_count": 95,
 450 |      "metadata": {},
 451 |      "output_type": "execute_result"
 452 |     }
 453 |    ],
 454 |    "source": [
 455 |     "present = \"Candy\"\n",
 456 |     "\"我想要{0}當耶誕禮物, {1} {0} {0}\".format(present, \"HI\")"
 457 |    ]
 458 |   },
 459 |   {
 460 |    "cell_type": "code",
 461 |    "execution_count": 86,
 462 |    "metadata": {},
 463 |    "outputs": [
 464 |     {
 465 |      "name": "stdout",
 466 |      "output_type": "stream",
 467 |      "text": [
 468 |       "我想要Macbook當耶誕禮物,\n",
 469 |       "結果他送我Macbook,\n",
 470 |       "我好happy\n"
 471 |      ]
 472 |     }
 473 |    ],
 474 |    "source": [
 475 |     "sent = \"\"\"我想要{item_name}當耶誕禮物,\n",
 476 |     "結果他送我{item_name},\n",
 477 |     "我好{emotion}\"\"\".format(item_name='Macbook',\n",
 478 |     "                     emotion=\"happy\")\n",
 479 |     "\n",
 480 |     "print(sent)"
 481 |    ]
 482 |   },
 483 |   {
 484 |    "cell_type": "code",
 485 |    "execution_count": 88,
 486 |    "metadata": {},
 487 |    "outputs": [
 488 |     {
 489 |      "name": "stdout",
 490 |      "output_type": "stream",
 491 |      "text": [
 492 |       "[INFO][123123123]Starting job 1\n"
 493 |      ]
 494 |     }
 495 |    ],
 496 |    "source": [
 497 |     "print(\"[INFO][{}]Starting job {}\".format(123123123, 1))"
 498 |    ]
 499 |   },
 500 |   {
 501 |    "cell_type": "code",
 502 |    "execution_count": 97,
 503 |    "metadata": {},
 504 |    "outputs": [
 505 |     {
 506 |      "name": "stdout",
 507 |      "output_type": "stream",
 508 |      "text": [
 509 |       "[INFO] Starting job 123\n",
 510 |       "[INFO][123123213] Starting job 123\n"
 511 |      ]
 512 |     }
 513 |    ],
 514 |    "source": [
 515 |     "# 舊式c style寫法\n",
 516 |     "print(\"[INFO] Starting job %s\"%123)\n",
 517 |     "print(\"[INFO][%s] Starting job %s\"%(123123213, 123))"
 518 |    ]
 519 |   },
 520 |   {
 521 |    "cell_type": "code",
 522 |    "execution_count": 92,
 523 |    "metadata": {},
 524 |    "outputs": [
 525 |     {
 526 |      "data": {
 527 |       "text/plain": [
 528 |        "3"
 529 |       ]
 530 |      },
 531 |      "execution_count": 92,
 532 |      "metadata": {},
 533 |      "output_type": "execute_result"
 534 |     }
 535 |    ],
 536 |    "source": [
 537 |     "present = \"伺服器\"\n",
 538 |     "len(present)"
 539 |    ]
 540 |   },
 541 |   {
 542 |    "cell_type": "code",
 543 |    "execution_count": 103,
 544 |    "metadata": {},
 545 |    "outputs": [
 546 |     {
 547 |      "data": {
 548 |       "text/plain": [
 549 |        "['I', 'have', 'a', 'cool', 'dog.', 'he', 'is', 'awesome']"
 550 |       ]
 551 |      },
 552 |      "execution_count": 103,
 553 |      "metadata": {},
 554 |      "output_type": "execute_result"
 555 |     }
 556 |    ],
 557 |    "source": [
 558 |     "dog = \"I have a cool dog. he is awesome\"\n",
 559 |     "dog.split()"
 560 |    ]
 561 |   },
 562 |   {
 563 |    "cell_type": "code",
 564 |    "execution_count": 104,
 565 |    "metadata": {},
 566 |    "outputs": [
 567 |     {
 568 |      "data": {
 569 |       "text/plain": [
 570 |        "['I have a cool dog', ' he is awesome']"
 571 |       ]
 572 |      },
 573 |      "execution_count": 104,
 574 |      "metadata": {},
 575 |      "output_type": "execute_result"
 576 |     }
 577 |    ],
 578 |    "source": [
 579 |     "dog = \"I have a cool dog. he is awesome\"\n",
 580 |     "dog.split('.')"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "code",
 585 |    "execution_count": 107,
 586 |    "metadata": {},
 587 |    "outputs": [
 588 |     {
 589 |      "data": {
 590 |       "text/plain": [
 591 |        "'I have a cool dog!!!! he is awesome'"
 592 |       ]
 593 |      },
 594 |      "execution_count": 107,
 595 |      "metadata": {},
 596 |      "output_type": "execute_result"
 597 |     }
 598 |    ],
 599 |    "source": [
 600 |     "dog_split = ['I have a cool dog', ' he is awesome']\n",
 601 |     "\"!!!!\".join(dog_split)"
 602 |    ]
 603 |   },
 604 |   {
 605 |    "cell_type": "markdown",
 606 |    "metadata": {},
 607 |    "source": [
 608 |     "## Collections\n",
 609 |     "\n",
 610 |     "1. List\n",
 611 |     "1. Dictionary\n",
 612 |     "1. Set\n",
 613 |     "1. Tuple"
 614 |    ]
 615 |   },
 616 |   {
 617 |    "cell_type": "code",
 618 |    "execution_count": 127,
 619 |    "metadata": {},
 620 |    "outputs": [],
 621 |    "source": [
 622 |     "xmas_presents = []\n",
 623 |     "#xmas_presents = ['程式碼', \"賓士\", \"無人機\"]"
 624 |    ]
 625 |   },
 626 |   {
 627 |    "cell_type": "code",
 628 |    "execution_count": 128,
 629 |    "metadata": {},
 630 |    "outputs": [
 631 |     {
 632 |      "data": {
 633 |       "text/plain": [
 634 |        "[]"
 635 |       ]
 636 |      },
 637 |      "execution_count": 128,
 638 |      "metadata": {},
 639 |      "output_type": "execute_result"
 640 |     }
 641 |    ],
 642 |    "source": [
 643 |     "xmas_presents"
 644 |    ]
 645 |   },
 646 |   {
 647 |    "cell_type": "code",
 648 |    "execution_count": 129,
 649 |    "metadata": {},
 650 |    "outputs": [],
 651 |    "source": [
 652 |     "# 新增\n",
 653 |     "xmas_presents.append('Macbook pro')"
 654 |    ]
 655 |   },
 656 |   {
 657 |    "cell_type": "code",
 658 |    "execution_count": 130,
 659 |    "metadata": {},
 660 |    "outputs": [
 661 |     {
 662 |      "data": {
 663 |       "text/plain": [
 664 |        "['Macbook pro']"
 665 |       ]
 666 |      },
 667 |      "execution_count": 130,
 668 |      "metadata": {},
 669 |      "output_type": "execute_result"
 670 |     }
 671 |    ],
 672 |    "source": [
 673 |     "xmas_presents"
 674 |    ]
 675 |   },
 676 |   {
 677 |    "cell_type": "code",
 678 |    "execution_count": 131,
 679 |    "metadata": {},
 680 |    "outputs": [],
 681 |    "source": [
 682 |     "xmas_presents.append('無人機')"
 683 |    ]
 684 |   },
 685 |   {
 686 |    "cell_type": "code",
 687 |    "execution_count": 132,
 688 |    "metadata": {},
 689 |    "outputs": [
 690 |     {
 691 |      "data": {
 692 |       "text/plain": [
 693 |        "['Macbook pro', '無人機']"
 694 |       ]
 695 |      },
 696 |      "execution_count": 132,
 697 |      "metadata": {},
 698 |      "output_type": "execute_result"
 699 |     }
 700 |    ],
 701 |    "source": [
 702 |     "xmas_presents"
 703 |    ]
 704 |   },
 705 |   {
 706 |    "cell_type": "code",
 707 |    "execution_count": 133,
 708 |    "metadata": {},
 709 |    "outputs": [],
 710 |    "source": [
 711 |     "xmas_presents.append('吹風機')"
 712 |    ]
 713 |   },
 714 |   {
 715 |    "cell_type": "code",
 716 |    "execution_count": 134,
 717 |    "metadata": {},
 718 |    "outputs": [
 719 |     {
 720 |      "data": {
 721 |       "text/plain": [
 722 |        "['Macbook pro', '無人機', '吹風機']"
 723 |       ]
 724 |      },
 725 |      "execution_count": 134,
 726 |      "metadata": {},
 727 |      "output_type": "execute_result"
 728 |     }
 729 |    ],
 730 |    "source": [
 731 |     "xmas_presents"
 732 |    ]
 733 |   },
 734 |   {
 735 |    "cell_type": "code",
 736 |    "execution_count": 135,
 737 |    "metadata": {},
 738 |    "outputs": [],
 739 |    "source": [
 740 |     "xmas_presents.insert(0,\"糖果\")"
 741 |    ]
 742 |   },
 743 |   {
 744 |    "cell_type": "code",
 745 |    "execution_count": 136,
 746 |    "metadata": {},
 747 |    "outputs": [
 748 |     {
 749 |      "data": {
 750 |       "text/plain": [
 751 |        "['糖果', 'Macbook pro', '無人機', '吹風機']"
 752 |       ]
 753 |      },
 754 |      "execution_count": 136,
 755 |      "metadata": {},
 756 |      "output_type": "execute_result"
 757 |     }
 758 |    ],
 759 |    "source": [
 760 |     "xmas_presents"
 761 |    ]
 762 |   },
 763 |   {
 764 |    "cell_type": "code",
 765 |    "execution_count": 137,
 766 |    "metadata": {},
 767 |    "outputs": [
 768 |     {
 769 |      "data": {
 770 |       "text/plain": [
 771 |        "'吹風機'"
 772 |       ]
 773 |      },
 774 |      "execution_count": 137,
 775 |      "metadata": {},
 776 |      "output_type": "execute_result"
 777 |     }
 778 |    ],
 779 |    "source": [
 780 |     "# 刪除\n",
 781 |     "xmas_presents.pop()"
 782 |    ]
 783 |   },
 784 |   {
 785 |    "cell_type": "code",
 786 |    "execution_count": 138,
 787 |    "metadata": {},
 788 |    "outputs": [
 789 |     {
 790 |      "data": {
 791 |       "text/plain": [
 792 |        "['糖果', 'Macbook pro', '無人機']"
 793 |       ]
 794 |      },
 795 |      "execution_count": 138,
 796 |      "metadata": {},
 797 |      "output_type": "execute_result"
 798 |     }
 799 |    ],
 800 |    "source": [
 801 |     "xmas_presents"
 802 |    ]
 803 |   },
 804 |   {
 805 |    "cell_type": "code",
 806 |    "execution_count": 139,
 807 |    "metadata": {},
 808 |    "outputs": [],
 809 |    "source": [
 810 |     "del xmas_presents[0]"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "code",
 815 |    "execution_count": 140,
 816 |    "metadata": {},
 817 |    "outputs": [
 818 |     {
 819 |      "data": {
 820 |       "text/plain": [
 821 |        "['Macbook pro', '無人機']"
 822 |       ]
 823 |      },
 824 |      "execution_count": 140,
 825 |      "metadata": {},
 826 |      "output_type": "execute_result"
 827 |     }
 828 |    ],
 829 |    "source": [
 830 |     "xmas_presents"
 831 |    ]
 832 |   },
 833 |   {
 834 |    "cell_type": "code",
 835 |    "execution_count": 141,
 836 |    "metadata": {},
 837 |    "outputs": [],
 838 |    "source": [
 839 |     "xmas_presents[0] = \"鋼筆\""
 840 |    ]
 841 |   },
 842 |   {
 843 |    "cell_type": "code",
 844 |    "execution_count": 142,
 845 |    "metadata": {},
 846 |    "outputs": [
 847 |     {
 848 |      "data": {
 849 |       "text/plain": [
 850 |        "['鋼筆', '無人機']"
 851 |       ]
 852 |      },
 853 |      "execution_count": 142,
 854 |      "metadata": {},
 855 |      "output_type": "execute_result"
 856 |     }
 857 |    ],
 858 |    "source": [
 859 |     "xmas_presents"
 860 |    ]
 861 |   },
 862 |   {
 863 |    "cell_type": "code",
 864 |    "execution_count": 143,
 865 |    "metadata": {},
 866 |    "outputs": [
 867 |     {
 868 |      "data": {
 869 |       "text/plain": [
 870 |        "'鋼筆'"
 871 |       ]
 872 |      },
 873 |      "execution_count": 143,
 874 |      "metadata": {},
 875 |      "output_type": "execute_result"
 876 |     }
 877 |    ],
 878 |    "source": [
 879 |     "xmas_presents[0]"
 880 |    ]
 881 |   },
 882 |   {
 883 |    "cell_type": "code",
 884 |    "execution_count": 144,
 885 |    "metadata": {},
 886 |    "outputs": [
 887 |     {
 888 |      "data": {
 889 |       "text/plain": [
 890 |        "'無人機'"
 891 |       ]
 892 |      },
 893 |      "execution_count": 144,
 894 |      "metadata": {},
 895 |      "output_type": "execute_result"
 896 |     }
 897 |    ],
 898 |    "source": [
 899 |     "xmas_presents[1]"
 900 |    ]
 901 |   },
 902 |   {
 903 |    "cell_type": "code",
 904 |    "execution_count": 147,
 905 |    "metadata": {},
 906 |    "outputs": [],
 907 |    "source": [
 908 |     "xmas_presents.append(\"Car\")"
 909 |    ]
 910 |   },
 911 |   {
 912 |    "cell_type": "code",
 913 |    "execution_count": 148,
 914 |    "metadata": {},
 915 |    "outputs": [],
 916 |    "source": [
 917 |     "xmas_presents.append('Jet')"
 918 |    ]
 919 |   },
 920 |   {
 921 |    "cell_type": "code",
 922 |    "execution_count": 149,
 923 |    "metadata": {},
 924 |    "outputs": [
 925 |     {
 926 |      "data": {
 927 |       "text/plain": [
 928 |        "['鋼筆', '無人機', 'Car', 'Jet']"
 929 |       ]
 930 |      },
 931 |      "execution_count": 149,
 932 |      "metadata": {},
 933 |      "output_type": "execute_result"
 934 |     }
 935 |    ],
 936 |    "source": [
 937 |     "xmas_presents"
 938 |    ]
 939 |   },
 940 |   {
 941 |    "cell_type": "code",
 942 |    "execution_count": 153,
 943 |    "metadata": {},
 944 |    "outputs": [
 945 |     {
 946 |      "data": {
 947 |       "text/plain": [
 948 |        "['無人機']"
 949 |       ]
 950 |      },
 951 |      "execution_count": 153,
 952 |      "metadata": {},
 953 |      "output_type": "execute_result"
 954 |     }
 955 |    ],
 956 |    "source": [
 957 |     "# list[start:end:step]\n",
 958 |     "xmas_presents[1:3:2]"
 959 |    ]
 960 |   },
 961 |   {
 962 |    "cell_type": "code",
 963 |    "execution_count": 162,
 964 |    "metadata": {},
 965 |    "outputs": [
 966 |     {
 967 |      "data": {
 968 |       "text/plain": [
 969 |        "[2, 4]"
 970 |       ]
 971 |      },
 972 |      "execution_count": 162,
 973 |      "metadata": {},
 974 |      "output_type": "execute_result"
 975 |     }
 976 |    ],
 977 |    "source": [
 978 |     "num_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
 979 |     "num_list[2:6:2]"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "code",
 984 |    "execution_count": 160,
 985 |    "metadata": {},
 986 |    "outputs": [
 987 |     {
 988 |      "data": {
 989 |       "text/plain": [
 990 |        "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]"
 991 |       ]
 992 |      },
 993 |      "execution_count": 160,
 994 |      "metadata": {},
 995 |      "output_type": "execute_result"
 996 |     }
 997 |    ],
 998 |    "source": [
 999 |     "num_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
1000 |     "num_list[::-1]"
1001 |    ]
1002 |   },
1003 |   {
1004 |    "cell_type": "code",
1005 |    "execution_count": 163,
1006 |    "metadata": {},
1007 |    "outputs": [],
1008 |    "source": [
1009 |     "num_list.reverse()"
1010 |    ]
1011 |   },
1012 |   {
1013 |    "cell_type": "code",
1014 |    "execution_count": 164,
1015 |    "metadata": {},
1016 |    "outputs": [
1017 |     {
1018 |      "data": {
1019 |       "text/plain": [
1020 |        "[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]"
1021 |       ]
1022 |      },
1023 |      "execution_count": 164,
1024 |      "metadata": {},
1025 |      "output_type": "execute_result"
1026 |     }
1027 |    ],
1028 |    "source": [
1029 |     "num_list"
1030 |    ]
1031 |   },
1032 |   {
1033 |    "cell_type": "code",
1034 |    "execution_count": 166,
1035 |    "metadata": {},
1036 |    "outputs": [
1037 |     {
1038 |      "data": {
1039 |       "text/plain": [
1040 |        "['a', 'b', 'c', 'd', 'e']"
1041 |       ]
1042 |      },
1043 |      "execution_count": 166,
1044 |      "metadata": {},
1045 |      "output_type": "execute_result"
1046 |     }
1047 |    ],
1048 |    "source": [
1049 |     "# 合併\n",
1050 |     "['a','b','c'] + ['d', 'e']"
1051 |    ]
1052 |   },
1053 |   {
1054 |    "cell_type": "code",
1055 |    "execution_count": 168,
1056 |    "metadata": {},
1057 |    "outputs": [],
1058 |    "source": [
1059 |     "alphabet = ['a', 'b', 'c']\n",
1060 |     "alphabet.extend(['d', 'e'])"
1061 |    ]
1062 |   },
1063 |   {
1064 |    "cell_type": "code",
1065 |    "execution_count": 169,
1066 |    "metadata": {},
1067 |    "outputs": [
1068 |     {
1069 |      "data": {
1070 |       "text/plain": [
1071 |        "['a', 'b', 'c', 'd', 'e']"
1072 |       ]
1073 |      },
1074 |      "execution_count": 169,
1075 |      "metadata": {},
1076 |      "output_type": "execute_result"
1077 |     }
1078 |    ],
1079 |    "source": [
1080 |     "alphabet"
1081 |    ]
1082 |   },
1083 |   {
1084 |    "cell_type": "code",
1085 |    "execution_count": 171,
1086 |    "metadata": {},
1087 |    "outputs": [
1088 |     {
1089 |      "data": {
1090 |       "text/plain": [
1091 |        "'cdefghijklmn'"
1092 |       ]
1093 |      },
1094 |      "execution_count": 171,
1095 |      "metadata": {},
1096 |      "output_type": "execute_result"
1097 |     }
1098 |    ],
1099 |    "source": [
1100 |     "# More strings....\n",
1101 |     "'abcdefghijkbmnopqrstuvwxyz'[2:14]"
1102 |    ]
1103 |   },
1104 |   {
1105 |    "cell_type": "code",
1106 |    "execution_count": 175,
1107 |    "metadata": {},
1108 |    "outputs": [
1109 |     {
1110 |      "data": {
1111 |       "text/plain": [
1112 |        "'a'"
1113 |       ]
1114 |      },
1115 |      "execution_count": 175,
1116 |      "metadata": {},
1117 |      "output_type": "execute_result"
1118 |     }
1119 |    ],
1120 |    "source": [
1121 |     "'aaaaaa'[0]"
1122 |    ]
1123 |   },
1124 |   {
1125 |    "cell_type": "code",
1126 |    "execution_count": 177,
1127 |    "metadata": {},
1128 |    "outputs": [
1129 |     {
1130 |      "data": {
1131 |       "text/plain": [
1132 |        "'bb'"
1133 |       ]
1134 |      },
1135 |      "execution_count": 177,
1136 |      "metadata": {},
1137 |      "output_type": "execute_result"
1138 |     }
1139 |    ],
1140 |    "source": [
1141 |     "'bbbbbb'[:2]"
1142 |    ]
1143 |   },
1144 |   {
1145 |    "cell_type": "code",
1146 |    "execution_count": 173,
1147 |    "metadata": {},
1148 |    "outputs": [
1149 |     {
1150 |      "data": {
1151 |       "text/plain": [
1152 |        "'zyxwvutsrqponmlkjihgfedcba'"
1153 |       ]
1154 |      },
1155 |      "execution_count": 173,
1156 |      "metadata": {},
1157 |      "output_type": "execute_result"
1158 |     }
1159 |    ],
1160 |    "source": [
1161 |     "'abcdefghijklmnopqrstuvwxyz'[::-1]"
1162 |    ]
1163 |   },
1164 |   {
1165 |    "cell_type": "code",
1166 |    "execution_count": 178,
1167 |    "metadata": {},
1168 |    "outputs": [
1169 |     {
1170 |      "data": {
1171 |       "text/plain": [
1172 |        "'aa'"
1173 |       ]
1174 |      },
1175 |      "execution_count": 178,
1176 |      "metadata": {},
1177 |      "output_type": "execute_result"
1178 |     }
1179 |    ],
1180 |    "source": [
1181 |     "'a' + 'a'"
1182 |    ]
1183 |   },
1184 |   {
1185 |    "cell_type": "code",
1186 |    "execution_count": null,
1187 |    "metadata": {},
1188 |    "outputs": [],
1189 |    "source": []
1190 |   }
1191 |  ],
1192 |  "metadata": {
1193 |   "kernelspec": {
1194 |    "display_name": "Python 3",
1195 |    "language": "python",
1196 |    "name": "python3"
1197 |   },
1198 |   "language_info": {
1199 |    "codemirror_mode": {
1200 |     "name": "ipython",
1201 |     "version": 3
1202 |    },
1203 |    "file_extension": ".py",
1204 |    "mimetype": "text/x-python",
1205 |    "name": "python",
1206 |    "nbconvert_exporter": "python",
1207 |    "pygments_lexer": "ipython3",
1208 |    "version": "3.6.2"
1209 |   }
1210 |  },
1211 |  "nbformat": 4,
1212 |  "nbformat_minor": 2
1213 | }
1214 | 


--------------------------------------------------------------------------------
/cb101/flippingmed_crawler.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 翻轉醫療爬蟲\n",
  8 |     "\n",
  9 |     "https://flippingmed.com/\n",
 10 |     "\n",
 11 |     "首先我們觀察flippingmed這個網站，\n",
 12 |     "\n",
 13 |     "發現他換頁的邏輯是在一個post的api裡面 (page)，\n",
 14 |     "\n",
 15 |     "我們把post的這個request dump出來，\n",
 16 |     "\n",
 17 |     "寫一個while loop，每次loop就把這個page + 1\n",
 18 |     "\n",
 19 |     "我們另外把每頁的return資料比數調高到50 (query_args[posts_per_page])"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "[INFO] crawling page 1\n",
 32 |       "[INFO] crawling page 2\n"
 33 |      ]
 34 |     },
 35 |     {
 36 |      "ename": "KeyboardInterrupt",
 37 |      "evalue": "",
 38 |      "output_type": "error",
 39 |      "traceback": [
 40 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 41 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 42 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    379\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 380\u001b[0;31m                 \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    381\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 43 |       "\u001b[0;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'",
 44 |       "\nDuring handling of the above exception, another exception occurred:\n",
 45 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
 46 |       "\u001b[0;32m<ipython-input-3-188238745b90>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     77\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m     \u001b[0;31m# 送出post request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m     \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m     \u001b[0;31m# 將response做json decode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 47 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m    110\u001b[0m     \"\"\"\n\u001b[1;32m    111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 112\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'post'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 48 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m     56\u001b[0m     \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 49 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    506\u001b[0m         }\n\u001b[1;32m    507\u001b[0m         \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 508\u001b[0;31m         \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    509\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    510\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 50 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    616\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    617\u001b[0m         \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 618\u001b[0;31m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    619\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    620\u001b[0m         \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 51 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    438\u001b[0m                     \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    439\u001b[0m                     \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m                     \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    441\u001b[0m                 )\n\u001b[1;32m    442\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 52 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    599\u001b[0m                                                   \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    600\u001b[0m                                                   \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 601\u001b[0;31m                                                   chunked=chunked)\n\u001b[0m\u001b[1;32m    602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    603\u001b[0m             \u001b[0;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 53 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    381\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    382\u001b[0m                 \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 383\u001b[0;31m                     \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    384\u001b[0m                 \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    385\u001b[0m                     \u001b[0;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 54 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1329\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1330\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m                 \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1332\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1333\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 55 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    295\u001b[0m         \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    296\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m             \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    298\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    299\u001b[0m                 \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 56 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    257\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m         \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    259\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    260\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 57 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    584\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    585\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    587\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    588\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 58 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m   1007\u001b[0m                   \u001b[0;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1008\u001b[0m                   self.__class__)\n\u001b[0;32m-> 1009\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1010\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1011\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 59 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m    869\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Read on closed or unwrapped SSL socket.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    870\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    872\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mSSLError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    873\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mSSL_ERROR_EOF\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 60 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m    629\u001b[0m         \"\"\"\n\u001b[1;32m    630\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 631\u001b[0;31m             \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    632\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    633\u001b[0m             \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 61 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "import requests\n",
 67 |     "import json\n",
 68 |     "import urllib.parse\n",
 69 |     "\n",
 70 |     "url = \"https://flippingmed.com/?infinity=scrolling\"\n",
 71 |     "data = {'action': 'infinite_scroll',\n",
 72 |     " 'currentday': '29.05.18',\n",
 73 |     " 'last_post_date': '2018-06-04+12:13:30',\n",
 74 |     " 'order': 'DESC',\n",
 75 |     " 'page': 1, # 這是我們要換頁的變數，把他設定成一個int\n",
 76 |     " 'query_args[attachment]': '',\n",
 77 |     " 'query_args[attachment_id]': '0',\n",
 78 |     " 'query_args[author]': '',\n",
 79 |     " 'query_args[author_name]': '',\n",
 80 |     " 'query_args[cache_results]': 'false',\n",
 81 |     " 'query_args[cat]': '',\n",
 82 |     " 'query_args[category_name]': '',\n",
 83 |     " 'query_args[comments_per_page]': '50',\n",
 84 |     " 'query_args[day]': '0',\n",
 85 |     " 'query_args[embed]': '',\n",
 86 |     " 'query_args[error]': '',\n",
 87 |     " 'query_args[feed]': '',\n",
 88 |     " 'query_args[fields]': '',\n",
 89 |     " 'query_args[hour]': '',\n",
 90 |     " 'query_args[ignore_sticky_posts]': 'false',\n",
 91 |     " 'query_args[lazy_load_term_meta]': 'true',\n",
 92 |     " 'query_args[m]': '',\n",
 93 |     " 'query_args[menu_order]': '',\n",
 94 |     " 'query_args[meta_key]': '',\n",
 95 |     " 'query_args[meta_value]': '',\n",
 96 |     " 'query_args[minute]': '',\n",
 97 |     " 'query_args[monthnum]': '0',\n",
 98 |     " 'query_args[name]': '',\n",
 99 |     " 'query_args[no_found_rows]': 'false',\n",
100 |     " 'query_args[nopaging]': 'false',\n",
101 |     " 'query_args[order]': 'DESC',\n",
102 |     " 'query_args[p]': '0',\n",
103 |     " 'query_args[page_id]': '0',\n",
104 |     " 'query_args[paged]': '0',\n",
105 |     " 'query_args[pagename]': '',\n",
106 |     " 'query_args[post_parent]': '',\n",
107 |     " 'query_args[post_type]': '',\n",
108 |     " 'query_args[posts_per_page]': '50',\n",
109 |     " 'query_args[preview]': '',\n",
110 |     " 'query_args[s]': '',\n",
111 |     " 'query_args[second]': '',\n",
112 |     " 'query_args[sentence]': '',\n",
113 |     " 'query_args[static]': '',\n",
114 |     " 'query_args[subpost]': '',\n",
115 |     " 'query_args[subpost_id]': '',\n",
116 |     " 'query_args[suppress_filters]': 'false',\n",
117 |     " 'query_args[tag]': '',\n",
118 |     " 'query_args[tag_id]': '',\n",
119 |     " 'query_args[tb]': '',\n",
120 |     " 'query_args[title]': '',\n",
121 |     " 'query_args[update_post_meta_cache]': 'true',\n",
122 |     " 'query_args[update_post_term_cache]': 'true',\n",
123 |     " 'query_args[w]': '0',\n",
124 |     " 'query_args[year]': '0',\n",
125 |     " 'query_before': '2018-06-05+11:04:29',\n",
126 |     " 'scripts[]': 'aepc-pixel-events',\n",
127 |     " 'styles[]': 'jetpack_css'}\n",
128 |     "headers = {'accept': '*/*',\n",
129 |     " 'accept-encoding': 'gzip, deflate, br',\n",
130 |     " 'accept-language': 'en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7',\n",
131 |     " 'content-length': '2835',\n",
132 |     " 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',\n",
133 |     " 'cookie': 'tk_or=%22https%3A%2F%2Fwww.google.com.tw%2F%22; tk_r3d=%22https%3A%2F%2Fwww.google.com.tw%2F%22; __utma=73359812.1630321809.1528167876.1528167876.1528167876.1; __utmc=73359812; __utmz=73359812.1528167876.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); __utmt=1; tk_lr=%22%22; __utmb=73359812.7.10.1528167876',\n",
134 |     " 'origin': 'https://flippingmed.com',\n",
135 |     " 'referer': 'https://flippingmed.com/',\n",
136 |     " 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',\n",
137 |     " 'x-requested-with': 'XMLHttpRequest'}\n",
138 |     "\n",
139 |     "article_links = []\n",
140 |     "while True:\n",
141 |     "    print(\"[INFO] crawling page %s\"%data['page'])\n",
142 |     "    \n",
143 |     "    # 送出post request\n",
144 |     "    resp = requests.post(url, data=data, headers=headers)\n",
145 |     "    \n",
146 |     "    # 將response做json decode\n",
147 |     "    resp_body = resp.json()\n",
148 |     "    \n",
149 |     "    # 判斷是否爬到底了，以這個網站來說，爬到底的時候會回傳一個json裡面的資料是{\"type\": \"empty\"}\n",
150 |     "    if resp_body.get('type', '') == 'empty':\n",
151 |     "        break # 爬到底就break出去這個while loop\n",
152 |     "        \n",
153 |     "    # 將resp的json取出postflair這個key，裡面再出去keys()並且cast成list type\n",
154 |     "    # 最後做URL decode的動作，把url換成中文\n",
155 |     "    links = [urllib.parse.unquote(url) for url in list(resp_body.get('postflair').keys())]\n",
156 |     "    \n",
157 |     "    # 將url list extend 進去我們的result list\n",
158 |     "    article_links.extend(links)\n",
159 |     "    \n",
160 |     "    # 將page + 1\n",
161 |     "    data['page'] += 1"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 2,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "name": "stdout",
171 |      "output_type": "stream",
172 |      "text": [
173 |       "https://flippingmed.com/2016/06/22/為什麼輔具經營是進入長照產業最好的敲門磚！/\n",
174 |       "https://flippingmed.com/2018/06/04/心智食療全面預防-地中海飲食讓熟齡不失智/\n",
175 |       "https://flippingmed.com/2018/06/04/舞動幸福熟齡：雲門律動跳出活力青春/\n",
176 |       "https://flippingmed.com/2018/06/04/益智活腦＋豐富活動：有效擊敗失智全攻略/\n",
177 |       "https://flippingmed.com/2018/06/04/接軌長照與醫療-落實全人照護理念：居家醫療照/\n",
178 |       "https://flippingmed.com/2018/06/02/保健食品-肝臟疾病需注意的保健食品/\n",
179 |       "https://flippingmed.com/2018/05/31/一休和尚有說過：想站起來，要先低頭！/\n",
180 |       "https://flippingmed.com/2018/05/30/排泄達人這麼說：掌握四大重點就能找到適合的替/\n",
181 |       "https://flippingmed.com/2018/05/30/正確了解紙尿褲的構造及材質，才能真正擺脫外漏/\n",
182 |       "https://flippingmed.com/2018/05/30/了解排尿障礙的類型-才能找到對應的解決對策/\n",
183 |       "https://flippingmed.com/2018/05/30/找到紙尿褲外漏根本原因，才能徹底泌尿解決問題/\n"
184 |      ]
185 |     },
186 |     {
187 |      "ename": "KeyboardInterrupt",
188 |      "evalue": "",
189 |      "output_type": "error",
190 |      "traceback": [
191 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
192 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
193 |       "\u001b[0;32m<ipython-input-2-3ea19de18865>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0murl\u001b[0m \u001b[0;32min\u001b[0m \u001b[0marticle_links\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m     \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m     \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/tmp/mydata/%s.html'\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
194 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(url, params, **kwargs)\u001b[0m\n\u001b[1;32m     70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     71\u001b[0m     \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'allow_redirects'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'get'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
195 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m     56\u001b[0m     \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
196 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    506\u001b[0m         }\n\u001b[1;32m    507\u001b[0m         \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 508\u001b[0;31m         \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    509\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    510\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
197 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    616\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    617\u001b[0m         \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 618\u001b[0;31m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    619\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    620\u001b[0m         \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
198 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    438\u001b[0m                     \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    439\u001b[0m                     \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m                     \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    441\u001b[0m                 )\n\u001b[1;32m    442\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
199 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    599\u001b[0m                                                   \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    600\u001b[0m                                                   \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 601\u001b[0;31m                                                   chunked=chunked)\n\u001b[0m\u001b[1;32m    602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    603\u001b[0m             \u001b[0;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
200 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    344\u001b[0m         \u001b[0;31m# Trigger any extra validation we need to do.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    345\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 346\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    347\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSocketTimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseSSLError\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    348\u001b[0m             \u001b[0;31m# Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
201 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_validate_conn\u001b[0;34m(self, conn)\u001b[0m\n\u001b[1;32m    848\u001b[0m         \u001b[0;31m# Force connect early to allow us to validate the connection.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    849\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sock'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# AppEngine might not have  `.sock`\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 850\u001b[0;31m             \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    851\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    852\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_verified\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
202 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    324\u001b[0m             \u001b[0mca_cert_dir\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mca_cert_dir\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m             \u001b[0mserver_hostname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhostname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m             ssl_context=context)\n\u001b[0m\u001b[1;32m    327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    328\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massert_fingerprint\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
203 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/site-packages/urllib3/util/ssl_.py\u001b[0m in \u001b[0;36mssl_wrap_socket\u001b[0;34m(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir)\u001b[0m\n\u001b[1;32m    327\u001b[0m         \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_cert_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcertfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeyfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    328\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mHAS_SNI\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Platform-specific: OpenSSL with enabled SNI\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 329\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrap_socket\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msock\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mserver_hostname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mserver_hostname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    330\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    331\u001b[0m     warnings.warn(\n",
204 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mwrap_socket\u001b[0;34m(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)\u001b[0m\n\u001b[1;32m    405\u001b[0m                          \u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    406\u001b[0m                          \u001b[0mserver_hostname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mserver_hostname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 407\u001b[0;31m                          _context=self, _session=session)\n\u001b[0m\u001b[1;32m    408\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    409\u001b[0m     def wrap_bio(self, incoming, outgoing, server_side=False,\n",
205 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, sock, keyfile, certfile, server_side, cert_reqs, ssl_version, ca_certs, do_handshake_on_connect, family, type, proto, fileno, suppress_ragged_eofs, npn_protocols, ciphers, server_hostname, _context, _session)\u001b[0m\n\u001b[1;32m    812\u001b[0m                         \u001b[0;31m# non-blocking\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    813\u001b[0m                         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"do_handshake_on_connect should not be specified for non-blocking sockets\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 814\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    815\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    816\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mOSError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
206 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mdo_handshake\u001b[0;34m(self, block)\u001b[0m\n\u001b[1;32m   1066\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0.0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mblock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1067\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1068\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1069\u001b[0m         \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1070\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
207 |       "\u001b[0;32m~/.pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mdo_handshake\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    687\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdo_handshake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    688\u001b[0m         \u001b[0;34m\"\"\"Start the SSL/TLS handshake.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 689\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    690\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_hostname\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    691\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver_hostname\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
208 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "import os\n",
214 |     "\n",
215 |     "# 建立資料夾， 如同linux的mkdir -p\n",
216 |     "os.makedirs('/tmp/mydata/', exist_ok=True)\n",
217 |     "\n",
218 |     "# for loop 剛才爬下來的url list\n",
219 |     "for url in article_links:\n",
220 |     "    print(url)\n",
221 |     "    resp = requests.get(url)\n",
222 |     "    \n",
223 |     "    # 製作一個filename safe的檔名，使用url來做，把/置換成_\n",
224 |     "    filename = url.replace('/', '_')\n",
225 |     "    \n",
226 |     "    # 寫出去\n",
227 |     "    with open('/tmp/mydata/%s.html'%filename, 'w') as f:\n",
228 |     "        f.write(resp.text)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "kernelspec": {
241 |    "display_name": "Python 3",
242 |    "language": "python",
243 |    "name": "python3"
244 |   },
245 |   "language_info": {
246 |    "codemirror_mode": {
247 |     "name": "ipython",
248 |     "version": 3
249 |    },
250 |    "file_extension": ".py",
251 |    "mimetype": "text/x-python",
252 |    "name": "python",
253 |    "nbconvert_exporter": "python",
254 |    "pygments_lexer": "ipython3",
255 |    "version": "3.6.4"
256 |   }
257 |  },
258 |  "nbformat": 4,
259 |  "nbformat_minor": 2
260 | }
261 | 


--------------------------------------------------------------------------------