├── .gitignore
├── LICENSE
├── README.md
└── hpv.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Mark Renton
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 通过九价HPV疫苗摇号练习程序学习 Python 编程
  2 | 
  3 | 最近9价HPV疫苗一直是热门的稀缺资源，很多地区都通过预约摇号的方式开展接种。称这个机会通过尝试用Python实现功能，比如身份证号的生成与验证，随机数的分配等。学习语言最好的方式是实践，从小项目开始可以快速完成，Python编程的快感就在与此。
  4 | 
  5 | 利用 Python 的一些网络开发框架比如 Django 很快就建立信息录入的前台界面和后台管理；通过注册的身份证信息，随机选取很容易实现；当然前提我们要测试程序的话，最好以一部分身份证数据来模拟程序。9价疫苗是有年龄限制的，另外有一些地区也有区域接种限制。加入200个接种名额，我们计划抽取400个，按照顺序判断身份证是否正确，年龄是否符合，性别是否为女性等。
  6 | 
  7 | ## 1. 生成一定数量的随机身份证号
  8 | 
  9 | ### 1.1 检测身份证最后一位是否正确
 10 | 
 11 | **身份证的规则**
 12 | 1. 将身份证号码前17位数分别乘以不同的系数，从第1位到第17位的系数分别为：`7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2`
 13 | 2. 将得到的17个乘积相加。
 14 | 3. 将相加后的和除以11并得到余数。
 15 | 4. 余数为其对应的身份证最后一位校验码，按照0-10顺序为`1, 0, X, 9, 8, 7, 6, 5, 4, 3, 2`
 16 | 
 17 | 身份证规则也可以参考[这里](https://github.com/jayknoxqu/id-number-util)
 18 | 
 19 | ```python
 20 | def check_last_num(id):
 21 |     """
 22 |     检测身份证最后一位数字是否正确，如果正确返回True，错误则返回False
 23 |     """
 24 |     # id 是一个数字，要以列表操作先将其转换成字符串
 25 |     a = str(id)
 26 |     b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
 27 |     c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2]
 28 |     # 最后一位的计算规则
 29 |     # 通过lambda表达式一行获得计算结果，不过语义不明确，也可以用for循环实现，比较易读。
 30 |     # 总想着减少行数写python是一种病
 31 |     r = list(map(lambda x, y: int(x) * y, list(a), b))
 32 |     if str(c[sum(r) % 11]) == a[17]:
 33 |         return True
 34 |     return False
 35 | ```
 36 | 
 37 | ### 1.2 批量生成身份证
 38 | 
 39 | ```python
 40 | import random
 41 | import time
 42 | 
 43 | def digit_1to6():
 44 |     """身份证前六位"""
 45 |     # 假设报名的都是杭州地区的身份证，前六位号码：
 46 |     first_list = [
 47 |         '330102', # 上城区
 48 |         '330103', # 下城区
 49 |         '330104', # 江干区
 50 |         '330105', # 拱墅区
 51 |         '330106', # 西湖区
 52 |         '330108', # 滨江区
 53 |         '330109', # 萧山区
 54 |         '330110', # 余杭区
 55 |         '330122', # 桐庐县
 56 |         '330127', # 淳安县
 57 |         '330181', # 萧山区
 58 |         '330182', # 建德市
 59 |         '330183', # 富阳市
 60 |         '330184', # 余杭区
 61 |         '330185'  # 临安市
 62 |     ]
 63 |     return random.choice(first_list)
 64 | 
 65 | def digit_7to14(start=(1948, 1, 1, 0, 0, 0, 0, 0, 0), 
 66 |     end=(2018, 12, 31, 23, 59, 59, 0, 0, 0)):
 67 |     """
 68 |     随机生成8位日期
 69 |     """
 70 |     # 生成开始时间戳，首批身份证从1948年开始。
 71 |     start = time.mktime(start)
 72 |     # 生成结束时间戳，设置为2018-12-31截至	
 73 |     end = time.mktime(end)
 74 |     rand_t = time.localtime((end - start) * random.random() + start)
 75 |     # 将时间元组转成格式化字符串
 76 |     return time.strftime("%Y%m%d", rand_t)
 77 | 
 78 | def digit_15to17():
 79 |     """
 80 |     生成身份证15到17位数字
 81 |     """
 82 |     # 后面序号低于相应位数，前面加上0填充
 83 |     # 身份证号17位必须是偶数
 84 |     num = random.randrange(0, 999, 2)
 85 |     if num < 10:
 86 |         num = '00' + str(num)
 87 |     elif 9 < five < 100:
 88 |         num = '0' + str(num)
 89 |     else:
 90 |         num = str(num)
 91 |     return num
 92 | 
 93 | def digit_18(id):
 94 |     """
 95 |     根据前17位数字计算获得身份证最后一位数字
 96 |     """
 97 |     a = list(id)
 98 |     b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
 99 |     c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2]
100 |     r = list(map(lambda x, y: int(x) * y, a, b))
101 |     return str(c[sum(r)%11])
102 | 
103 | def simulate_ids(num):
104 |     """
105 |     模拟生成一定数量的身份证号
106 |     """
107 |     ids = []
108 |     if num > 0:
109 | 	    for i in range(1, num+1):
110 | 		    id_raw = digit_1to6() + digit_7to14() + digit_15to17()
111 | 		    id = id_raw + digit_18(id_raw)
112 | 		    ids.append(id)
113 | 	else:
114 | 	    return False
115 | 	return ids
116 | 
117 | # 生成10000个身份证号，保存到文件 ids.txt 中
118 | ids = simulate_ids(10000)
119 | #
120 | if ids:
121 | 	with open("sim_ids.txt", w) as f:
122 | 	    f.write("\n".join(ids))
123 | else:
124 | 	print("please type correct simulate id number")	    
125 | ```
126 | 
127 | ## 2. 随机抽取一定数量的身份证
128 | 
129 | 第一部分的代码获得了10000个身份证号，我们打算从中间抽取一定数量的身份证。
130 | 
131 | ```python
132 | # 先随机将身份证进行随机排序
133 | # 这一步不是必须，主要目的是学习一下shuffle函数还是
134 | for i in range(100):
135 |     random.shuffle(ids)
136 | 
137 | # 抽选200个数量的身份证
138 | result_ids = random.sample(id_list, 200)
139 | ```
140 | 
141 | ## 3. 验证身份证的年龄是否符合
142 | 
143 | ```python
144 | def check_id_for_hpv(id):
145 |     """
146 |     检测身份证号是否符合要求
147 |     """
148 |     # 检测性别, 17位数字应为偶数
149 |     id = str(id)
150 |     if int(id[17]) % 2 != 0:
151 |         print("Only Femail available")
152 |         return False
153 |     # 检测年龄，9价HPV疫苗只适合9～26岁
154 |     # 身份证年份设置为1993~2010
155 |     # 考虑到疫苗有3针，要打半年，具体时间范围参考疾控中心解释。
156 |     birth = int(id[6:9])
157 |     if birth > 1992 and birth < 2010:
158 |         status = 1
159 |     else:
160 |         status = 0
161 |     return status
162 | ```
163 | 
164 | ## 4. 完成脚本
165 | 
166 | 最后引入 argparse 模块，使脚本接受参数。完成的脚本放在[这里]()，接下来要租用阿里云，建立web server，支持python后端，采用django开发框架实现前端和后端的网站功能对于数量的 pythoner 来说基本上一天就能完成。
167 | 
168 | 最终我们模拟生成10000个身份证，随机选取400个，符合规定的有大约110个左右。这主要因为年龄是随机分布的，而26-9=17,大约是2018-1948=70的1/4，因此才会有这个结果。如果考虑到真实情况，注册时年龄不应是随机分布，可以考虑引入 numpy, scipy 等模块，使用 beta 分布，alpha=2, beta=5，起始年龄为6，最大年龄50，生成一个模拟年龄分布。
169 | 
170 | ```python
171 | from scipy import stats
172 | 
173 | def digit_7to10(a=2, b=5, min=6, max=50):
174 |     """
175 |     生成年龄beta分布
176 |     """
177 |     return str(2019 - int(stats.beta.rvs(a, b, min, max)))
178 | ```
179 | 
180 | 按照顺序公布结果，其中包含的一些文本文件操作，在 shell 下利用 awk, sort 等工具即可。
181 | 
182 | ```bash
183 | # 脚本运行方式
184 | $ python hpv.py -e 400 -p 200 -s 10000
185 | $ python hpv.py --help
186 | usage: hpy.py [-h] [-e EXTRACT] [-p PICK] [-s SIM] [-i INPUT]
187 | 
188 | optional arguments:
189 |   -h, --help            show this help message and exit
190 |   -e EXTRACT, --extract EXTRACT
191 |                         How many id numbers extracted
192 |   -p PICK, --pick PICK  How many id numbers picked
193 |   -s SIM, --sim SIM     How many id numbers simulated
194 |   -i INPUT, --input INPUT
195 |                         input file
196 | ```
197 | 


--------------------------------------------------------------------------------
/hpv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | ################################################################################
  3 | # 使用方法：
  4 | # python hpv.py -e 400 -p 200 -s 10000
  5 | #
  6 | # <indexofire@gmail.com>
  7 | ################################################################################
  8 | import time
  9 | import random
 10 | import argparse
 11 | from scipy import stats
 12 | 
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('-e', '--extract', type=int, default=400,
 16 |     help='How many id numbers extracted')
 17 | parser.add_argument('-p', '--pick', type=int, default=200,
 18 |     help='How many id numbers picked')
 19 | parser.add_argument('-s', '--sim', type=int, default=10000,
 20 |     help='How many id numbers simulated')
 21 | parser.add_argument('-i', '--input', help="input file")
 22 | args = parser.parse_args()
 23 | 
 24 | def check_last_num(id):
 25 |     """
 26 |     检测身份证最后一位数字是否正确，如果正确返回True，错误则返回False
 27 |     """
 28 |     # id 是一个数字，要以列表操作先将其转换成字符串
 29 |     a = str(id)
 30 |     b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
 31 |     c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2]
 32 |     # 最后一位的计算规则
 33 |     # 通过lambda表达式一行获得计算结果，不过语义不明确，也可以用for循环实现，比较易读。
 34 |     # 总想着减少行数写python是一种病
 35 |     r = list(map(lambda x, y: int(x) * y, list(a), b))
 36 |     if str(c[sum(r) % 11]) == a[17]:
 37 |         return True
 38 |     return False
 39 | 
 40 | def digit_1to6():
 41 |     """身份证前六位"""
 42 |     # 假设报名的都是杭州地区的身份证，前六位号码：
 43 |     first_list = [
 44 |         '330102', # 上城区
 45 |         '330103', # 下城区
 46 |         '330104', # 江干区
 47 |         '330105', # 拱墅区
 48 |         '330106', # 西湖区
 49 |         '330108', # 滨江区
 50 |         '330109', # 萧山区
 51 |         '330110', # 余杭区
 52 |         '330122', # 桐庐县
 53 |         '330127', # 淳安县
 54 |         '330181', # 萧山区
 55 |         '330182', # 建德市
 56 |         '330183', # 富阳市
 57 |         '330184', # 余杭区
 58 |         '330185'  # 临安市
 59 |     ]
 60 |     return random.choice(first_list)
 61 | 
 62 | def digit_7to10(a=2, b=5, min=6, max=50):
 63 |     """
 64 |     生成年龄beta分布
 65 |     """
 66 |     return str(2019 - int(stats.beta.rvs(a, b, min, max)))
 67 | 
 68 | def digit_11to14(start=(1948, 1, 1, 0, 0, 0, 0, 0, 0),
 69 |     end=(2018, 12, 31, 23, 59, 59, 0, 0, 0)):
 70 |     """
 71 |     随机生成8位日期
 72 |     """
 73 |     # 生成开始时间戳，首批身份证从1948年开始。
 74 |     start = time.mktime(start)
 75 |     # 生成结束时间戳，设置为2018-12-31截至
 76 |     end = time.mktime(end)
 77 |     rand_t = time.localtime((end - start) * random.random() + start)
 78 |     # 将时间元组转成格式化字符串
 79 |     return time.strftime("%m%d", rand_t)
 80 | 
 81 | def digit_15to17():
 82 |     """
 83 |     生成身份证15到17位数字
 84 |     """
 85 |     # 后面序号低于相应位数，前面加上0填充
 86 |     # 身份证号17位必须是偶数
 87 |     num = random.randrange(0, 999, 2)
 88 |     if num < 10:
 89 |         num = '00' + str(num)
 90 |     elif 9 < num < 100:
 91 |         num = '0' + str(num)
 92 |     else:
 93 |         num = str(num)
 94 |     return num
 95 | 
 96 | def digit_18(id):
 97 |     """
 98 |     根据前17位数字计算获得身份证最后一位数字
 99 |     """
100 |     a = list(id)
101 |     b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
102 |     c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2]
103 |     r = list(map(lambda x, y: int(x) * y, a, b))
104 |     return str(c[sum(r)%11])
105 | 
106 | def simulate_ids(num):
107 |     """
108 |     模拟生成一定数量的身份证号
109 |     """
110 |     ids = []
111 |     if num > 0:
112 | 	for i in range(1, num+1):
113 | 	    id_raw = digit_1to6() + digit_7to10() + digit_11to14() + digit_15to17()
114 | 	    id = id_raw + digit_18(id_raw)
115 | 	    ids.append(id)
116 |     else:
117 | 	return False
118 |     return ids
119 | 
120 | def check_id_for_hpv(id):
121 |     """
122 |     检测身份证号是否符合要求
123 |     """
124 |     # 检测性别, 17位数字应为偶数
125 |     id = str(id)
126 |     if int(id[16]) % 2 != 0:
127 |         print("Only Femail available")
128 |         return False
129 |     # 检测年龄，9价HPV疫苗只适合9～26岁
130 |     # 身份证年份设置为1993~2010
131 |     # 考虑到疫苗有3针，要打半年，具体时间范围参考疾控中心解释。
132 |     birth = int(id[6:10])
133 |     if birth > 1992 and birth < 2010:
134 |         return True
135 |     else:
136 |         return False
137 | 
138 | def main():
139 |     """
140 |     调用函数
141 |     """
142 |     # 生成--sim个身份证号，保存到文件 ids.txt 中
143 |     if args.sim:
144 |         ids = simulate_ids(args.sim)
145 |         if ids:
146 | 	    with open("ids.txt", "w") as f:
147 | 	        f.write("\n".join(ids))
148 |     elif args.input:
149 |         with open(args.input, "r") as f:
150 |             ids = f.readlines()
151 |     else:
152 | 	    print("please type correct simulate id number")
153 | 
154 |     # 打乱列表100次
155 |     # 学习一下 shuffle 的操作，本质上不需要这一步
156 |     for i in range(100):
157 |         random.shuffle(ids)
158 | 
159 |     # 抽选一定个数量的身份证
160 |     result_ids = random.sample(ids, args.extract)
161 |     check = 0
162 |     last_ids = []
163 |     for id in result_ids:
164 |         if check == args.pick:
165 |             break
166 |         if check_id_for_hpv(id):
167 |             check += 1
168 |             print(id+"\n")
169 |             last_ids.append(id)
170 | 
171 |     print("total right id number is %d" % len(last_ids))
172 | 
173 |     with open("last_ids.txt", "w") as f:
174 | 	       f.write("\n".join(last_ids))
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     main()
179 | 


--------------------------------------------------------------------------------