├── .gitignore ├── LICENSE ├── README.md ├── bin ├── activate ├── activate.csh ├── activate.fish ├── activate_this.py ├── easy_install ├── easy_install-2.7 ├── pip ├── pip2 ├── pip2.7 ├── python ├── python2 ├── python2.7 ├── runxlrd.py └── wheel ├── data.json ├── include └── python2.7 ├── pip-selfcheck.json ├── read_xls.py └── university.xls /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 victor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # university-data 2 | 国内省-市-高校数据,解析教育部下载的全国普通高等学校名单 3 | -------------------------------------------------------------------------------- /bin/activate: -------------------------------------------------------------------------------- 1 | # This file must be used with "source bin/activate" *from bash* 2 | # you cannot run it directly 3 | 4 | deactivate () { 5 | unset pydoc 6 | 7 | # reset old environment variables 8 | if [ -n "${_OLD_VIRTUAL_PATH-}" ] ; then 9 | PATH="$_OLD_VIRTUAL_PATH" 10 | export PATH 11 | unset _OLD_VIRTUAL_PATH 12 | fi 13 | if [ -n "${_OLD_VIRTUAL_PYTHONHOME-}" ] ; then 14 | PYTHONHOME="$_OLD_VIRTUAL_PYTHONHOME" 15 | export PYTHONHOME 16 | unset _OLD_VIRTUAL_PYTHONHOME 17 | fi 18 | 19 | # This should detect bash and zsh, which have a hash command that must 20 | # be called to get it to forget past commands. Without forgetting 21 | # past commands the $PATH changes we made may not be respected 22 | if [ -n "${BASH-}" -o -n "${ZSH_VERSION-}" ] ; then 23 | hash -r 2>/dev/null 24 | fi 25 | 26 | if [ -n "${_OLD_VIRTUAL_PS1-}" ] ; then 27 | PS1="$_OLD_VIRTUAL_PS1" 28 | export PS1 29 | unset _OLD_VIRTUAL_PS1 30 | fi 31 | 32 | unset VIRTUAL_ENV 33 | if [ ! "${1-}" = "nondestructive" ] ; then 34 | # Self destruct! 35 | unset -f deactivate 36 | fi 37 | } 38 | 39 | # unset irrelevant variables 40 | deactivate nondestructive 41 | 42 | VIRTUAL_ENV="/Users/victorli/open/university-data" 43 | export VIRTUAL_ENV 44 | 45 | _OLD_VIRTUAL_PATH="$PATH" 46 | PATH="$VIRTUAL_ENV/bin:$PATH" 47 | export PATH 48 | 49 | # unset PYTHONHOME if set 50 | # this will fail if PYTHONHOME is set to the empty string (which is bad anyway) 51 | # could use `if (set -u; : $PYTHONHOME) ;` in bash 52 | if [ -n "${PYTHONHOME-}" ] ; then 53 | _OLD_VIRTUAL_PYTHONHOME="$PYTHONHOME" 54 | unset PYTHONHOME 55 | fi 56 | 57 | if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then 58 | _OLD_VIRTUAL_PS1="$PS1" 59 | if [ "x" != x ] ; then 60 | PS1="$PS1" 61 | else 62 | if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then 63 | # special case for Aspen magic directories 64 | # see http://www.zetadev.com/software/aspen/ 65 | PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1" 66 | else 67 | PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1" 68 | fi 69 | fi 70 | export PS1 71 | fi 72 | 73 | alias pydoc="python -m pydoc" 74 | 75 | # This should detect bash and zsh, which have a hash command that must 76 | # be called to get it to forget past commands. Without forgetting 77 | # past commands the $PATH changes we made may not be respected 78 | if [ -n "${BASH-}" -o -n "${ZSH_VERSION-}" ] ; then 79 | hash -r 2>/dev/null 80 | fi 81 | -------------------------------------------------------------------------------- /bin/activate.csh: -------------------------------------------------------------------------------- 1 | # This file must be used with "source bin/activate.csh" *from csh*. 2 | # You cannot run it directly. 3 | # Created by Davide Di Blasi . 4 | 5 | alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate && unalias pydoc' 6 | 7 | # Unset irrelevant variables. 8 | deactivate nondestructive 9 | 10 | setenv VIRTUAL_ENV "/Users/victorli/open/university-data" 11 | 12 | set _OLD_VIRTUAL_PATH="$PATH" 13 | setenv PATH "$VIRTUAL_ENV/bin:$PATH" 14 | 15 | 16 | 17 | if ("" != "") then 18 | set env_name = "" 19 | else 20 | if (`basename "$VIRTUAL_ENV"` == "__") then 21 | # special case for Aspen magic directories 22 | # see http://www.zetadev.com/software/aspen/ 23 | set env_name = `basename \`dirname "$VIRTUAL_ENV"\`` 24 | else 25 | set env_name = `basename "$VIRTUAL_ENV"` 26 | endif 27 | endif 28 | 29 | # Could be in a non-interactive environment, 30 | # in which case, $prompt is undefined and we wouldn't 31 | # care about the prompt anyway. 32 | if ( $?prompt ) then 33 | set _OLD_VIRTUAL_PROMPT="$prompt" 34 | set prompt = "[$env_name] $prompt" 35 | endif 36 | 37 | unset env_name 38 | 39 | alias pydoc python -m pydoc 40 | 41 | rehash 42 | 43 | -------------------------------------------------------------------------------- /bin/activate.fish: -------------------------------------------------------------------------------- 1 | # This file must be used with "source bin/activate.fish" *from fish* (http://fishshell.com) 2 | # you cannot run it directly 3 | 4 | function deactivate -d "Exit virtualenv and return to normal shell environment" 5 | # reset old environment variables 6 | if test -n "$_OLD_VIRTUAL_PATH" 7 | set -gx PATH $_OLD_VIRTUAL_PATH 8 | set -e _OLD_VIRTUAL_PATH 9 | end 10 | if test -n "$_OLD_VIRTUAL_PYTHONHOME" 11 | set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME 12 | set -e _OLD_VIRTUAL_PYTHONHOME 13 | end 14 | 15 | if test -n "$_OLD_FISH_PROMPT_OVERRIDE" 16 | # set an empty local fish_function_path, so fish_prompt doesn't automatically reload 17 | set -l fish_function_path 18 | # erase the virtualenv's fish_prompt function, and restore the original 19 | functions -e fish_prompt 20 | functions -c _old_fish_prompt fish_prompt 21 | functions -e _old_fish_prompt 22 | set -e _OLD_FISH_PROMPT_OVERRIDE 23 | end 24 | 25 | set -e VIRTUAL_ENV 26 | if test "$argv[1]" != "nondestructive" 27 | # Self destruct! 28 | functions -e deactivate 29 | end 30 | end 31 | 32 | # unset irrelevant variables 33 | deactivate nondestructive 34 | 35 | set -gx VIRTUAL_ENV "/Users/victorli/open/university-data" 36 | 37 | set -gx _OLD_VIRTUAL_PATH $PATH 38 | set -gx PATH "$VIRTUAL_ENV/bin" $PATH 39 | 40 | # unset PYTHONHOME if set 41 | if set -q PYTHONHOME 42 | set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME 43 | set -e PYTHONHOME 44 | end 45 | 46 | if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" 47 | # fish uses a function instead of an env var to generate the prompt. 48 | 49 | # copy the current fish_prompt function as the function _old_fish_prompt 50 | functions -c fish_prompt _old_fish_prompt 51 | 52 | # with the original prompt function copied, we can override with our own. 53 | function fish_prompt 54 | # Prompt override? 55 | if test -n "" 56 | printf "%s%s" "" (set_color normal) 57 | _old_fish_prompt 58 | return 59 | end 60 | # ...Otherwise, prepend env 61 | set -l _checkbase (basename "$VIRTUAL_ENV") 62 | if test $_checkbase = "__" 63 | # special case for Aspen magic directories 64 | # see http://www.zetadev.com/software/aspen/ 65 | printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal) 66 | _old_fish_prompt 67 | else 68 | printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal) 69 | _old_fish_prompt 70 | end 71 | end 72 | 73 | set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" 74 | end 75 | -------------------------------------------------------------------------------- /bin/activate_this.py: -------------------------------------------------------------------------------- 1 | """By using execfile(this_file, dict(__file__=this_file)) you will 2 | activate this virtualenv environment. 3 | 4 | This can be used when you must use an existing Python interpreter, not 5 | the virtualenv bin/python 6 | """ 7 | 8 | try: 9 | __file__ 10 | except NameError: 11 | raise AssertionError( 12 | "You must run this like execfile('path/to/activate_this.py', dict(__file__='path/to/activate_this.py'))") 13 | import sys 14 | import os 15 | 16 | old_os_path = os.environ.get('PATH', '') 17 | os.environ['PATH'] = os.path.dirname(os.path.abspath(__file__)) + os.pathsep + old_os_path 18 | base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 19 | if sys.platform == 'win32': 20 | site_packages = os.path.join(base, 'Lib', 'site-packages') 21 | else: 22 | site_packages = os.path.join(base, 'lib', 'python%s' % sys.version[:3], 'site-packages') 23 | prev_sys_path = list(sys.path) 24 | import site 25 | site.addsitedir(site_packages) 26 | sys.real_prefix = sys.prefix 27 | sys.prefix = base 28 | # Move the added items to the front of the path: 29 | new_sys_path = [] 30 | for item in list(sys.path): 31 | if item not in prev_sys_path: 32 | new_sys_path.append(item) 33 | sys.path.remove(item) 34 | sys.path[:0] = new_sys_path 35 | -------------------------------------------------------------------------------- /bin/easy_install: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | 3 | # -*- coding: utf-8 -*- 4 | import re 5 | import sys 6 | 7 | from setuptools.command.easy_install import main 8 | 9 | if __name__ == '__main__': 10 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /bin/easy_install-2.7: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | 3 | # -*- coding: utf-8 -*- 4 | import re 5 | import sys 6 | 7 | from setuptools.command.easy_install import main 8 | 9 | if __name__ == '__main__': 10 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /bin/pip: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | 3 | # -*- coding: utf-8 -*- 4 | import re 5 | import sys 6 | 7 | from pip import main 8 | 9 | if __name__ == '__main__': 10 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /bin/pip2: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | 3 | # -*- coding: utf-8 -*- 4 | import re 5 | import sys 6 | 7 | from pip import main 8 | 9 | if __name__ == '__main__': 10 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /bin/pip2.7: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | 3 | # -*- coding: utf-8 -*- 4 | import re 5 | import sys 6 | 7 | from pip import main 8 | 9 | if __name__ == '__main__': 10 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /bin/python: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hugg95/university-data/d06ae8908bcee571e9dbf4d06cf3a2ee4cd5f298/bin/python -------------------------------------------------------------------------------- /bin/python2: -------------------------------------------------------------------------------- 1 | python -------------------------------------------------------------------------------- /bin/python2.7: -------------------------------------------------------------------------------- 1 | python -------------------------------------------------------------------------------- /bin/runxlrd.py: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | # Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd 3 | # This script is part of the xlrd package, which is released under a 4 | # BSD-style licence. 5 | 6 | from __future__ import print_function 7 | 8 | cmd_doc = """ 9 | Commands: 10 | 11 | 2rows Print the contents of first and last row in each sheet 12 | 3rows Print the contents of first, second and last row in each sheet 13 | bench Same as "show", but doesn't print -- for profiling 14 | biff_count[1] Print a count of each type of BIFF record in the file 15 | biff_dump[1] Print a dump (char and hex) of the BIFF records in the file 16 | fonts hdr + print a dump of all font objects 17 | hdr Mini-overview of file (no per-sheet information) 18 | hotshot Do a hotshot profile run e.g. ... -f1 hotshot bench bigfile*.xls 19 | labels Dump of sheet.col_label_ranges and ...row... for each sheet 20 | name_dump Dump of each object in book.name_obj_list 21 | names Print brief information for each NAME record 22 | ov Overview of file 23 | profile Like "hotshot", but uses cProfile 24 | show Print the contents of all rows in each sheet 25 | version[0] Print versions of xlrd and Python and exit 26 | xfc Print "XF counts" and cell-type counts -- see code for details 27 | 28 | [0] means no file arg 29 | [1] means only one file arg i.e. no glob.glob pattern 30 | """ 31 | 32 | options = None 33 | if __name__ == "__main__": 34 | 35 | PSYCO = 0 36 | 37 | import xlrd 38 | import sys, time, glob, traceback, gc 39 | 40 | from xlrd.timemachine import xrange, REPR 41 | 42 | 43 | class LogHandler(object): 44 | 45 | def __init__(self, logfileobj): 46 | self.logfileobj = logfileobj 47 | self.fileheading = None 48 | self.shown = 0 49 | 50 | def setfileheading(self, fileheading): 51 | self.fileheading = fileheading 52 | self.shown = 0 53 | 54 | def write(self, text): 55 | if self.fileheading and not self.shown: 56 | self.logfileobj.write(self.fileheading) 57 | self.shown = 1 58 | self.logfileobj.write(text) 59 | 60 | null_cell = xlrd.empty_cell 61 | 62 | def show_row(bk, sh, rowx, colrange, printit): 63 | if bk.ragged_rows: 64 | colrange = range(sh.row_len(rowx)) 65 | if not colrange: return 66 | if printit: print() 67 | if bk.formatting_info: 68 | for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): 69 | if printit: 70 | print("cell %s%d: type=%d, data: %r, xfx: %s" 71 | % (xlrd.colname(colx), rowx+1, ty, val, cxfx)) 72 | else: 73 | for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange): 74 | if printit: 75 | print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val)) 76 | 77 | def get_row_data(bk, sh, rowx, colrange): 78 | result = [] 79 | dmode = bk.datemode 80 | ctys = sh.row_types(rowx) 81 | cvals = sh.row_values(rowx) 82 | for colx in colrange: 83 | cty = ctys[colx] 84 | cval = cvals[colx] 85 | if bk.formatting_info: 86 | cxfx = str(sh.cell_xf_index(rowx, colx)) 87 | else: 88 | cxfx = '' 89 | if cty == xlrd.XL_CELL_DATE: 90 | try: 91 | showval = xlrd.xldate_as_tuple(cval, dmode) 92 | except xlrd.XLDateError as e: 93 | showval = "%s:%s" % (type(e).__name__, e) 94 | cty = xlrd.XL_CELL_ERROR 95 | elif cty == xlrd.XL_CELL_ERROR: 96 | showval = xlrd.error_text_from_code.get(cval, '' % cval) 97 | else: 98 | showval = cval 99 | result.append((colx, cty, showval, cxfx)) 100 | return result 101 | 102 | def bk_header(bk): 103 | print() 104 | print("BIFF version: %s; datemode: %s" 105 | % (xlrd.biff_text_from_num[bk.biff_version], bk.datemode)) 106 | print("codepage: %r (encoding: %s); countries: %r" 107 | % (bk.codepage, bk.encoding, bk.countries)) 108 | print("Last saved by: %r" % bk.user_name) 109 | print("Number of data sheets: %d" % bk.nsheets) 110 | print("Use mmap: %d; Formatting: %d; On demand: %d" 111 | % (bk.use_mmap, bk.formatting_info, bk.on_demand)) 112 | print("Ragged rows: %d" % bk.ragged_rows) 113 | if bk.formatting_info: 114 | print("FORMATs: %d, FONTs: %d, XFs: %d" 115 | % (len(bk.format_list), len(bk.font_list), len(bk.xf_list))) 116 | if not options.suppress_timing: 117 | print("Load time: %.2f seconds (stage 1) %.2f seconds (stage 2)" 118 | % (bk.load_time_stage_1, bk.load_time_stage_2)) 119 | print() 120 | 121 | def show_fonts(bk): 122 | print("Fonts:") 123 | for x in xrange(len(bk.font_list)): 124 | font = bk.font_list[x] 125 | font.dump(header='== Index %d ==' % x, indent=4) 126 | 127 | def show_names(bk, dump=0): 128 | bk_header(bk) 129 | if bk.biff_version < 50: 130 | print("Names not extracted in this BIFF version") 131 | return 132 | nlist = bk.name_obj_list 133 | print("Name list: %d entries" % len(nlist)) 134 | for nobj in nlist: 135 | if dump: 136 | nobj.dump(sys.stdout, 137 | header="\n=== Dump of name_obj_list[%d] ===" % nobj.name_index) 138 | else: 139 | print("[%d]\tName:%r macro:%r scope:%d\n\tresult:%r\n" 140 | % (nobj.name_index, nobj.name, nobj.macro, nobj.scope, nobj.result)) 141 | 142 | def print_labels(sh, labs, title): 143 | if not labs:return 144 | for rlo, rhi, clo, chi in labs: 145 | print("%s label range %s:%s contains:" 146 | % (title, xlrd.cellname(rlo, clo), xlrd.cellname(rhi-1, chi-1))) 147 | for rx in xrange(rlo, rhi): 148 | for cx in xrange(clo, chi): 149 | print(" %s: %r" % (xlrd.cellname(rx, cx), sh.cell_value(rx, cx))) 150 | 151 | def show_labels(bk): 152 | # bk_header(bk) 153 | hdr = 0 154 | for shx in range(bk.nsheets): 155 | sh = bk.sheet_by_index(shx) 156 | clabs = sh.col_label_ranges 157 | rlabs = sh.row_label_ranges 158 | if clabs or rlabs: 159 | if not hdr: 160 | bk_header(bk) 161 | hdr = 1 162 | print("sheet %d: name = %r; nrows = %d; ncols = %d" % 163 | (shx, sh.name, sh.nrows, sh.ncols)) 164 | print_labels(sh, clabs, 'Col') 165 | print_labels(sh, rlabs, 'Row') 166 | if bk.on_demand: bk.unload_sheet(shx) 167 | 168 | def show(bk, nshow=65535, printit=1): 169 | bk_header(bk) 170 | if 0: 171 | rclist = xlrd.sheet.rc_stats.items() 172 | rclist = sorted(rclist) 173 | print("rc stats") 174 | for k, v in rclist: 175 | print("0x%04x %7d" % (k, v)) 176 | if options.onesheet: 177 | try: 178 | shx = int(options.onesheet) 179 | except ValueError: 180 | shx = bk.sheet_by_name(options.onesheet).number 181 | shxrange = [shx] 182 | else: 183 | shxrange = range(bk.nsheets) 184 | # print("shxrange", list(shxrange)) 185 | for shx in shxrange: 186 | sh = bk.sheet_by_index(shx) 187 | nrows, ncols = sh.nrows, sh.ncols 188 | colrange = range(ncols) 189 | anshow = min(nshow, nrows) 190 | print("sheet %d: name = %s; nrows = %d; ncols = %d" % 191 | (shx, REPR(sh.name), sh.nrows, sh.ncols)) 192 | if nrows and ncols: 193 | # Beat the bounds 194 | for rowx in xrange(nrows): 195 | nc = sh.row_len(rowx) 196 | if nc: 197 | _junk = sh.row_types(rowx)[nc-1] 198 | _junk = sh.row_values(rowx)[nc-1] 199 | _junk = sh.cell(rowx, nc-1) 200 | for rowx in xrange(anshow-1): 201 | if not printit and rowx % 10000 == 1 and rowx > 1: 202 | print("done %d rows" % (rowx-1,)) 203 | show_row(bk, sh, rowx, colrange, printit) 204 | if anshow and nrows: 205 | show_row(bk, sh, nrows-1, colrange, printit) 206 | print() 207 | if bk.on_demand: bk.unload_sheet(shx) 208 | 209 | def count_xfs(bk): 210 | bk_header(bk) 211 | for shx in range(bk.nsheets): 212 | sh = bk.sheet_by_index(shx) 213 | nrows, ncols = sh.nrows, sh.ncols 214 | print("sheet %d: name = %r; nrows = %d; ncols = %d" % 215 | (shx, sh.name, sh.nrows, sh.ncols)) 216 | # Access all xfindexes to force gathering stats 217 | type_stats = [0, 0, 0, 0, 0, 0, 0] 218 | for rowx in xrange(nrows): 219 | for colx in xrange(sh.row_len(rowx)): 220 | xfx = sh.cell_xf_index(rowx, colx) 221 | assert xfx >= 0 222 | cty = sh.cell_type(rowx, colx) 223 | type_stats[cty] += 1 224 | print("XF stats", sh._xf_index_stats) 225 | print("type stats", type_stats) 226 | print() 227 | if bk.on_demand: bk.unload_sheet(shx) 228 | 229 | def main(cmd_args): 230 | import optparse 231 | global options, PSYCO 232 | usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc 233 | oparser = optparse.OptionParser(usage) 234 | oparser.add_option( 235 | "-l", "--logfilename", 236 | default="", 237 | help="contains error messages") 238 | oparser.add_option( 239 | "-v", "--verbosity", 240 | type="int", default=0, 241 | help="level of information and diagnostics provided") 242 | oparser.add_option( 243 | "-m", "--mmap", 244 | type="int", default=-1, 245 | help="1: use mmap; 0: don't use mmap; -1: accept heuristic") 246 | oparser.add_option( 247 | "-e", "--encoding", 248 | default="", 249 | help="encoding override") 250 | oparser.add_option( 251 | "-f", "--formatting", 252 | type="int", default=0, 253 | help="0 (default): no fmt info\n" 254 | "1: fmt info (all cells)\n" 255 | ) 256 | oparser.add_option( 257 | "-g", "--gc", 258 | type="int", default=0, 259 | help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc") 260 | oparser.add_option( 261 | "-s", "--onesheet", 262 | default="", 263 | help="restrict output to this sheet (name or index)") 264 | oparser.add_option( 265 | "-u", "--unnumbered", 266 | action="store_true", default=0, 267 | help="omit line numbers or offsets in biff_dump") 268 | oparser.add_option( 269 | "-d", "--on-demand", 270 | action="store_true", default=0, 271 | help="load sheets on demand instead of all at once") 272 | oparser.add_option( 273 | "-t", "--suppress-timing", 274 | action="store_true", default=0, 275 | help="don't print timings (diffs are less messy)") 276 | oparser.add_option( 277 | "-r", "--ragged-rows", 278 | action="store_true", default=0, 279 | help="open_workbook(..., ragged_rows=True)") 280 | options, args = oparser.parse_args(cmd_args) 281 | if len(args) == 1 and args[0] in ("version", ): 282 | pass 283 | elif len(args) < 2: 284 | oparser.error("Expected at least 2 args, found %d" % len(args)) 285 | cmd = args[0] 286 | xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5") 287 | if cmd == 'biff_dump': 288 | xlrd.dump(args[1], unnumbered=options.unnumbered) 289 | sys.exit(0) 290 | if cmd == 'biff_count': 291 | xlrd.count_records(args[1]) 292 | sys.exit(0) 293 | if cmd == 'version': 294 | print("xlrd: %s, from %s" % (xlrd_version, xlrd.__file__)) 295 | print("Python:", sys.version) 296 | sys.exit(0) 297 | if options.logfilename: 298 | logfile = LogHandler(open(options.logfilename, 'w')) 299 | else: 300 | logfile = sys.stdout 301 | mmap_opt = options.mmap 302 | mmap_arg = xlrd.USE_MMAP 303 | if mmap_opt in (1, 0): 304 | mmap_arg = mmap_opt 305 | elif mmap_opt != -1: 306 | print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt) 307 | fmt_opt = options.formatting | (cmd in ('xfc', )) 308 | gc_mode = options.gc 309 | if gc_mode: 310 | gc.disable() 311 | for pattern in args[1:]: 312 | for fname in glob.glob(pattern): 313 | print("\n=== File: %s ===" % fname) 314 | if logfile != sys.stdout: 315 | logfile.setfileheading("\n=== File: %s ===\n" % fname) 316 | if gc_mode == 1: 317 | n_unreachable = gc.collect() 318 | if n_unreachable: 319 | print("GC before open:", n_unreachable, "unreachable objects") 320 | if PSYCO: 321 | import psyco 322 | psyco.full() 323 | PSYCO = 0 324 | try: 325 | t0 = time.time() 326 | bk = xlrd.open_workbook(fname, 327 | verbosity=options.verbosity, logfile=logfile, 328 | use_mmap=mmap_arg, 329 | encoding_override=options.encoding, 330 | formatting_info=fmt_opt, 331 | on_demand=options.on_demand, 332 | ragged_rows=options.ragged_rows, 333 | ) 334 | t1 = time.time() 335 | if not options.suppress_timing: 336 | print("Open took %.2f seconds" % (t1-t0,)) 337 | except xlrd.XLRDError as e: 338 | print("*** Open failed: %s: %s" % (type(e).__name__, e)) 339 | continue 340 | except KeyboardInterrupt: 341 | print("*** KeyboardInterrupt ***") 342 | traceback.print_exc(file=sys.stdout) 343 | sys.exit(1) 344 | except BaseException as e: 345 | print("*** Open failed: %s: %s" % (type(e).__name__, e)) 346 | traceback.print_exc(file=sys.stdout) 347 | continue 348 | t0 = time.time() 349 | if cmd == 'hdr': 350 | bk_header(bk) 351 | elif cmd == 'ov': # OverView 352 | show(bk, 0) 353 | elif cmd == 'show': # all rows 354 | show(bk) 355 | elif cmd == '2rows': # first row and last row 356 | show(bk, 2) 357 | elif cmd == '3rows': # first row, 2nd row and last row 358 | show(bk, 3) 359 | elif cmd == 'bench': 360 | show(bk, printit=0) 361 | elif cmd == 'fonts': 362 | bk_header(bk) 363 | show_fonts(bk) 364 | elif cmd == 'names': # named reference list 365 | show_names(bk) 366 | elif cmd == 'name_dump': # named reference list 367 | show_names(bk, dump=1) 368 | elif cmd == 'labels': 369 | show_labels(bk) 370 | elif cmd == 'xfc': 371 | count_xfs(bk) 372 | else: 373 | print("*** Unknown command <%s>" % cmd) 374 | sys.exit(1) 375 | del bk 376 | if gc_mode == 1: 377 | n_unreachable = gc.collect() 378 | if n_unreachable: 379 | print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects") 380 | if not options.suppress_timing: 381 | t1 = time.time() 382 | print("\ncommand took %.2f seconds\n" % (t1-t0,)) 383 | 384 | return None 385 | 386 | av = sys.argv[1:] 387 | if not av: 388 | main(av) 389 | firstarg = av[0].lower() 390 | if firstarg == "hotshot": 391 | import hotshot, hotshot.stats 392 | av = av[1:] 393 | prof_log_name = "XXXX.prof" 394 | prof = hotshot.Profile(prof_log_name) 395 | # benchtime, result = prof.runcall(main, *av) 396 | result = prof.runcall(main, *(av, )) 397 | print("result", repr(result)) 398 | prof.close() 399 | stats = hotshot.stats.load(prof_log_name) 400 | stats.strip_dirs() 401 | stats.sort_stats('time', 'calls') 402 | stats.print_stats(20) 403 | elif firstarg == "profile": 404 | import cProfile 405 | av = av[1:] 406 | cProfile.run('main(av)', 'YYYY.prof') 407 | import pstats 408 | p = pstats.Stats('YYYY.prof') 409 | p.strip_dirs().sort_stats('cumulative').print_stats(30) 410 | elif firstarg == "psyco": 411 | PSYCO = 1 412 | main(av[1:]) 413 | else: 414 | main(av) 415 | -------------------------------------------------------------------------------- /bin/wheel: -------------------------------------------------------------------------------- 1 | #!/Users/victorli/open/university-data/bin/python 2 | 3 | # -*- coding: utf-8 -*- 4 | import re 5 | import sys 6 | 7 | from wheel.tool import main 8 | 9 | if __name__ == '__main__': 10 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 11 | sys.exit(main()) 12 | -------------------------------------------------------------------------------- /include/python2.7: -------------------------------------------------------------------------------- 1 | /System/Library/Frameworks/Python.framework/Versions/2.7/include/python2.7 -------------------------------------------------------------------------------- /pip-selfcheck.json: -------------------------------------------------------------------------------- 1 | {"last_check":"2017-06-19T06:52:53Z","pypi_version":"9.0.1"} -------------------------------------------------------------------------------- /read_xls.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | """ 4 | 读取高校xls文件,转换成json数据 5 | """ 6 | 7 | import xlrd 8 | import json 9 | import re 10 | 11 | def convert(filename="./university.xls"): 12 | book = xlrd.open_workbook(filename) 13 | sheets = book.sheets() 14 | result = { 15 | 'province': [], 16 | 'city': [], 17 | 'university': [] 18 | } 19 | for sheet in sheets: 20 | province_id = None 21 | city_id = None 22 | cities = [] 23 | for row in range(0, sheet.nrows): 24 | if row < 3: 25 | continue 26 | for col in range(0, sheet.ncols): 27 | cell = sheet.cell(row, col) 28 | if col == 0 and cell.ctype == 1: 29 | # 省份格式如: 河北省(121所) 30 | if re.search(ur"^\W+(\d+所)$", cell.value): 31 | if province_id is None: 32 | province_id = 1 33 | else: 34 | province_id += 1 35 | province_name = re.sub(ur"(\d+所)", u'', cell.value) 36 | province_obj = { 37 | 'id': province_id, 38 | 'name': province_name 39 | } 40 | if province_obj not in result['province']: 41 | result['province'].append(province_obj) 42 | 43 | if col == 4 and cell.ctype == 1: 44 | cid = None 45 | if cell.value not in cities: 46 | if len(cities) == 0: 47 | city_id = 1 48 | else: 49 | city_id += 1 50 | cities.append(cell.value) 51 | city_obj = { 52 | 'id': city_id, 53 | 'name': cell.value, 54 | 'pid': province_id 55 | } 56 | result['city'].append(city_obj) 57 | cid = city_id 58 | else: 59 | cid = cities.index(cell.value) + 1 60 | 61 | result['university'].append({ 62 | 'id': sheet.cell(row, 2).value, 63 | 'name': sheet.cell(row, 1).value, 64 | 'level': sheet.cell(row, 5).value, 65 | 'type': sheet.cell(row, 6).value or u'公办', 66 | 'cid': cid 67 | }) 68 | 69 | return result 70 | 71 | def save_json(json_data={}): 72 | if json_data: 73 | with open('./data.json', 'w') as f: 74 | f.write(json.dumps(json_data)) 75 | 76 | def main(): 77 | json_data = convert() 78 | save_json(json_data) 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /university.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hugg95/university-data/d06ae8908bcee571e9dbf4d06cf3a2ee4cd5f298/university.xls --------------------------------------------------------------------------------