├── .gitignore ├── LICENSE ├── README.md └── hpv.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Mark Renton 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 通过九价HPV疫苗摇号练习程序学习 Python 编程 2 | 3 | 最近9价HPV疫苗一直是热门的稀缺资源,很多地区都通过预约摇号的方式开展接种。称这个机会通过尝试用Python实现功能,比如身份证号的生成与验证,随机数的分配等。学习语言最好的方式是实践,从小项目开始可以快速完成,Python编程的快感就在与此。 4 | 5 | 利用 Python 的一些网络开发框架比如 Django 很快就建立信息录入的前台界面和后台管理;通过注册的身份证信息,随机选取很容易实现;当然前提我们要测试程序的话,最好以一部分身份证数据来模拟程序。9价疫苗是有年龄限制的,另外有一些地区也有区域接种限制。加入200个接种名额,我们计划抽取400个,按照顺序判断身份证是否正确,年龄是否符合,性别是否为女性等。 6 | 7 | ## 1. 生成一定数量的随机身份证号 8 | 9 | ### 1.1 检测身份证最后一位是否正确 10 | 11 | **身份证的规则** 12 | 1. 将身份证号码前17位数分别乘以不同的系数,从第1位到第17位的系数分别为:`7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2` 13 | 2. 将得到的17个乘积相加。 14 | 3. 将相加后的和除以11并得到余数。 15 | 4. 余数为其对应的身份证最后一位校验码,按照0-10顺序为`1, 0, X, 9, 8, 7, 6, 5, 4, 3, 2` 16 | 17 | 身份证规则也可以参考[这里](https://github.com/jayknoxqu/id-number-util) 18 | 19 | ```python 20 | def check_last_num(id): 21 | """ 22 | 检测身份证最后一位数字是否正确,如果正确返回True,错误则返回False 23 | """ 24 | # id 是一个数字,要以列表操作先将其转换成字符串 25 | a = str(id) 26 | b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] 27 | c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2] 28 | # 最后一位的计算规则 29 | # 通过lambda表达式一行获得计算结果,不过语义不明确,也可以用for循环实现,比较易读。 30 | # 总想着减少行数写python是一种病 31 | r = list(map(lambda x, y: int(x) * y, list(a), b)) 32 | if str(c[sum(r) % 11]) == a[17]: 33 | return True 34 | return False 35 | ``` 36 | 37 | ### 1.2 批量生成身份证 38 | 39 | ```python 40 | import random 41 | import time 42 | 43 | def digit_1to6(): 44 | """身份证前六位""" 45 | # 假设报名的都是杭州地区的身份证,前六位号码: 46 | first_list = [ 47 | '330102', # 上城区 48 | '330103', # 下城区 49 | '330104', # 江干区 50 | '330105', # 拱墅区 51 | '330106', # 西湖区 52 | '330108', # 滨江区 53 | '330109', # 萧山区 54 | '330110', # 余杭区 55 | '330122', # 桐庐县 56 | '330127', # 淳安县 57 | '330181', # 萧山区 58 | '330182', # 建德市 59 | '330183', # 富阳市 60 | '330184', # 余杭区 61 | '330185' # 临安市 62 | ] 63 | return random.choice(first_list) 64 | 65 | def digit_7to14(start=(1948, 1, 1, 0, 0, 0, 0, 0, 0), 66 | end=(2018, 12, 31, 23, 59, 59, 0, 0, 0)): 67 | """ 68 | 随机生成8位日期 69 | """ 70 | # 生成开始时间戳,首批身份证从1948年开始。 71 | start = time.mktime(start) 72 | # 生成结束时间戳,设置为2018-12-31截至 73 | end = time.mktime(end) 74 | rand_t = time.localtime((end - start) * random.random() + start) 75 | # 将时间元组转成格式化字符串 76 | return time.strftime("%Y%m%d", rand_t) 77 | 78 | def digit_15to17(): 79 | """ 80 | 生成身份证15到17位数字 81 | """ 82 | # 后面序号低于相应位数,前面加上0填充 83 | # 身份证号17位必须是偶数 84 | num = random.randrange(0, 999, 2) 85 | if num < 10: 86 | num = '00' + str(num) 87 | elif 9 < five < 100: 88 | num = '0' + str(num) 89 | else: 90 | num = str(num) 91 | return num 92 | 93 | def digit_18(id): 94 | """ 95 | 根据前17位数字计算获得身份证最后一位数字 96 | """ 97 | a = list(id) 98 | b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] 99 | c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2] 100 | r = list(map(lambda x, y: int(x) * y, a, b)) 101 | return str(c[sum(r)%11]) 102 | 103 | def simulate_ids(num): 104 | """ 105 | 模拟生成一定数量的身份证号 106 | """ 107 | ids = [] 108 | if num > 0: 109 | for i in range(1, num+1): 110 | id_raw = digit_1to6() + digit_7to14() + digit_15to17() 111 | id = id_raw + digit_18(id_raw) 112 | ids.append(id) 113 | else: 114 | return False 115 | return ids 116 | 117 | # 生成10000个身份证号,保存到文件 ids.txt 中 118 | ids = simulate_ids(10000) 119 | # 120 | if ids: 121 | with open("sim_ids.txt", w) as f: 122 | f.write("\n".join(ids)) 123 | else: 124 | print("please type correct simulate id number") 125 | ``` 126 | 127 | ## 2. 随机抽取一定数量的身份证 128 | 129 | 第一部分的代码获得了10000个身份证号,我们打算从中间抽取一定数量的身份证。 130 | 131 | ```python 132 | # 先随机将身份证进行随机排序 133 | # 这一步不是必须,主要目的是学习一下shuffle函数还是 134 | for i in range(100): 135 | random.shuffle(ids) 136 | 137 | # 抽选200个数量的身份证 138 | result_ids = random.sample(id_list, 200) 139 | ``` 140 | 141 | ## 3. 验证身份证的年龄是否符合 142 | 143 | ```python 144 | def check_id_for_hpv(id): 145 | """ 146 | 检测身份证号是否符合要求 147 | """ 148 | # 检测性别, 17位数字应为偶数 149 | id = str(id) 150 | if int(id[17]) % 2 != 0: 151 | print("Only Femail available") 152 | return False 153 | # 检测年龄,9价HPV疫苗只适合9~26岁 154 | # 身份证年份设置为1993~2010 155 | # 考虑到疫苗有3针,要打半年,具体时间范围参考疾控中心解释。 156 | birth = int(id[6:9]) 157 | if birth > 1992 and birth < 2010: 158 | status = 1 159 | else: 160 | status = 0 161 | return status 162 | ``` 163 | 164 | ## 4. 完成脚本 165 | 166 | 最后引入 argparse 模块,使脚本接受参数。完成的脚本放在[这里](),接下来要租用阿里云,建立web server,支持python后端,采用django开发框架实现前端和后端的网站功能对于数量的 pythoner 来说基本上一天就能完成。 167 | 168 | 最终我们模拟生成10000个身份证,随机选取400个,符合规定的有大约110个左右。这主要因为年龄是随机分布的,而26-9=17,大约是2018-1948=70的1/4,因此才会有这个结果。如果考虑到真实情况,注册时年龄不应是随机分布,可以考虑引入 numpy, scipy 等模块,使用 beta 分布,alpha=2, beta=5,起始年龄为6,最大年龄50,生成一个模拟年龄分布。 169 | 170 | ```python 171 | from scipy import stats 172 | 173 | def digit_7to10(a=2, b=5, min=6, max=50): 174 | """ 175 | 生成年龄beta分布 176 | """ 177 | return str(2019 - int(stats.beta.rvs(a, b, min, max))) 178 | ``` 179 | 180 | 按照顺序公布结果,其中包含的一些文本文件操作,在 shell 下利用 awk, sort 等工具即可。 181 | 182 | ```bash 183 | # 脚本运行方式 184 | $ python hpv.py -e 400 -p 200 -s 10000 185 | $ python hpv.py --help 186 | usage: hpy.py [-h] [-e EXTRACT] [-p PICK] [-s SIM] [-i INPUT] 187 | 188 | optional arguments: 189 | -h, --help show this help message and exit 190 | -e EXTRACT, --extract EXTRACT 191 | How many id numbers extracted 192 | -p PICK, --pick PICK How many id numbers picked 193 | -s SIM, --sim SIM How many id numbers simulated 194 | -i INPUT, --input INPUT 195 | input file 196 | ``` 197 | -------------------------------------------------------------------------------- /hpv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ################################################################################ 3 | # 使用方法: 4 | # python hpv.py -e 400 -p 200 -s 10000 5 | # 6 | # 7 | ################################################################################ 8 | import time 9 | import random 10 | import argparse 11 | from scipy import stats 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('-e', '--extract', type=int, default=400, 16 | help='How many id numbers extracted') 17 | parser.add_argument('-p', '--pick', type=int, default=200, 18 | help='How many id numbers picked') 19 | parser.add_argument('-s', '--sim', type=int, default=10000, 20 | help='How many id numbers simulated') 21 | parser.add_argument('-i', '--input', help="input file") 22 | args = parser.parse_args() 23 | 24 | def check_last_num(id): 25 | """ 26 | 检测身份证最后一位数字是否正确,如果正确返回True,错误则返回False 27 | """ 28 | # id 是一个数字,要以列表操作先将其转换成字符串 29 | a = str(id) 30 | b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] 31 | c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2] 32 | # 最后一位的计算规则 33 | # 通过lambda表达式一行获得计算结果,不过语义不明确,也可以用for循环实现,比较易读。 34 | # 总想着减少行数写python是一种病 35 | r = list(map(lambda x, y: int(x) * y, list(a), b)) 36 | if str(c[sum(r) % 11]) == a[17]: 37 | return True 38 | return False 39 | 40 | def digit_1to6(): 41 | """身份证前六位""" 42 | # 假设报名的都是杭州地区的身份证,前六位号码: 43 | first_list = [ 44 | '330102', # 上城区 45 | '330103', # 下城区 46 | '330104', # 江干区 47 | '330105', # 拱墅区 48 | '330106', # 西湖区 49 | '330108', # 滨江区 50 | '330109', # 萧山区 51 | '330110', # 余杭区 52 | '330122', # 桐庐县 53 | '330127', # 淳安县 54 | '330181', # 萧山区 55 | '330182', # 建德市 56 | '330183', # 富阳市 57 | '330184', # 余杭区 58 | '330185' # 临安市 59 | ] 60 | return random.choice(first_list) 61 | 62 | def digit_7to10(a=2, b=5, min=6, max=50): 63 | """ 64 | 生成年龄beta分布 65 | """ 66 | return str(2019 - int(stats.beta.rvs(a, b, min, max))) 67 | 68 | def digit_11to14(start=(1948, 1, 1, 0, 0, 0, 0, 0, 0), 69 | end=(2018, 12, 31, 23, 59, 59, 0, 0, 0)): 70 | """ 71 | 随机生成8位日期 72 | """ 73 | # 生成开始时间戳,首批身份证从1948年开始。 74 | start = time.mktime(start) 75 | # 生成结束时间戳,设置为2018-12-31截至 76 | end = time.mktime(end) 77 | rand_t = time.localtime((end - start) * random.random() + start) 78 | # 将时间元组转成格式化字符串 79 | return time.strftime("%m%d", rand_t) 80 | 81 | def digit_15to17(): 82 | """ 83 | 生成身份证15到17位数字 84 | """ 85 | # 后面序号低于相应位数,前面加上0填充 86 | # 身份证号17位必须是偶数 87 | num = random.randrange(0, 999, 2) 88 | if num < 10: 89 | num = '00' + str(num) 90 | elif 9 < num < 100: 91 | num = '0' + str(num) 92 | else: 93 | num = str(num) 94 | return num 95 | 96 | def digit_18(id): 97 | """ 98 | 根据前17位数字计算获得身份证最后一位数字 99 | """ 100 | a = list(id) 101 | b = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] 102 | c = [1, 0, "X", 9, 8, 7, 6, 5, 4, 3, 2] 103 | r = list(map(lambda x, y: int(x) * y, a, b)) 104 | return str(c[sum(r)%11]) 105 | 106 | def simulate_ids(num): 107 | """ 108 | 模拟生成一定数量的身份证号 109 | """ 110 | ids = [] 111 | if num > 0: 112 | for i in range(1, num+1): 113 | id_raw = digit_1to6() + digit_7to10() + digit_11to14() + digit_15to17() 114 | id = id_raw + digit_18(id_raw) 115 | ids.append(id) 116 | else: 117 | return False 118 | return ids 119 | 120 | def check_id_for_hpv(id): 121 | """ 122 | 检测身份证号是否符合要求 123 | """ 124 | # 检测性别, 17位数字应为偶数 125 | id = str(id) 126 | if int(id[16]) % 2 != 0: 127 | print("Only Femail available") 128 | return False 129 | # 检测年龄,9价HPV疫苗只适合9~26岁 130 | # 身份证年份设置为1993~2010 131 | # 考虑到疫苗有3针,要打半年,具体时间范围参考疾控中心解释。 132 | birth = int(id[6:10]) 133 | if birth > 1992 and birth < 2010: 134 | return True 135 | else: 136 | return False 137 | 138 | def main(): 139 | """ 140 | 调用函数 141 | """ 142 | # 生成--sim个身份证号,保存到文件 ids.txt 中 143 | if args.sim: 144 | ids = simulate_ids(args.sim) 145 | if ids: 146 | with open("ids.txt", "w") as f: 147 | f.write("\n".join(ids)) 148 | elif args.input: 149 | with open(args.input, "r") as f: 150 | ids = f.readlines() 151 | else: 152 | print("please type correct simulate id number") 153 | 154 | # 打乱列表100次 155 | # 学习一下 shuffle 的操作,本质上不需要这一步 156 | for i in range(100): 157 | random.shuffle(ids) 158 | 159 | # 抽选一定个数量的身份证 160 | result_ids = random.sample(ids, args.extract) 161 | check = 0 162 | last_ids = [] 163 | for id in result_ids: 164 | if check == args.pick: 165 | break 166 | if check_id_for_hpv(id): 167 | check += 1 168 | print(id+"\n") 169 | last_ids.append(id) 170 | 171 | print("total right id number is %d" % len(last_ids)) 172 | 173 | with open("last_ids.txt", "w") as f: 174 | f.write("\n".join(last_ids)) 175 | 176 | 177 | if __name__ == "__main__": 178 | main() 179 | --------------------------------------------------------------------------------