├── Data_Analysis.ipynb ├── Data_Cleaning.ipynb ├── Lagou_Spider.ipynb ├── Lagou_Spider.py ├── README.md ├── data └── Lagou_Data.csv ├── stopwords.txt ├── synonyms.txt ├── userdict.txt ├── 全国数据分析岗位地域分布可视化.html └── 全国数据分析岗位地域分布可视化_pyecharts.ipynb /Data_Cleaning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**载入数据**" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "from sqlalchemy import create_engine" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "engine = create_engine('postgresql://postgres:947172@localhost:5432/postgres')\n", 32 | "sql = 'SELECT * FROM \"Lagou_DA\"'\n", 33 | "df = pd.read_sql(sql,con = engine)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/html": [ 46 | "
\n", 47 | "\n", 60 | "\n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | "
01234
adWord00000
appShow00000
approve11111
businessZones{石牌,岗顶,龙口}None{鲁班路,五里桥,鲁班路,五里桥,鲁班路,五里桥}None{西溪,古墩路,古墩路}
city广州北京上海上海杭州
companyFullName广州易幻网络科技有限公司人人贷商务顾问(北京)有限公司鼎必程(上海)信息技术有限公司唯品会(中国)有限公司上海依图信息技术有限公司
companyId61386678151036476068457
companyLabelList{年底双薪,绩效奖金,年度旅游,六险一金}{绩效奖金,年底双薪,五险一金,交通补助}{年底双薪,带薪年假,定期体检,交通补助}{技能培训,节日礼物,绩效奖金,岗位晋升}{技能培训,带薪年假,绩效奖金,扁平管理}
companyLogoimage1/M00/09/66/CgYXBlTdds2ASqwFAAByli6IhmM19...i/image3/M00/15/02/Cgq2xlpxe9OAPFtoAAAk0nW9FGM...i/image/M00/66/5B/Cgp3O1gJ7_aAU0uCAAAbCfDZO380...i/image/M00/5D/6A/CgpEMlmK5h6AGsPEAAAkuPM6On47...image2/M00/04/68/CgpzWlXxWBGALWQtAAApEXlSpBM72...
companyShortName易幻网络人人贷群星金融唯品会依图科技
companySize150-500人500-2000人50-150人2000人以上150-500人
content岗位职责1.挖掘及分析游戏项目数据2.管理及建立数据模型3.游戏用户行为分析4.定期编写相关...工作职责:1.建立相关统计模型,对用户交易数据进行挖掘,对公司战略决策、业务优化需求提供支撑...岗位职责:1)负责应收账款的基础数据分析,包括资产透析,可视化渲染以及最终分析报告;2)深入...岗位职责:1、对业务部门进行数据需求支持,挖掘分析主题,开展数据分析工作,基于数据分析成果,...全职/实习/兼职皆可职责定义:•活跃度运营:调动标注人员的积极性,维护合作关系;面向标注人员...
createTime2018-04-12 15:18:312018-04-12 15:16:472018-04-12 15:15:442018-04-12 15:15:072018-04-12 15:12:52
deliver00000
district天河区海淀区浦东新区闵行区西湖区
education本科本科本科本科本科
explainNoneNoneNoneNoneNone
financeStage上市公司A轮B轮上市公司C轮
firstType产品/需求/项目类开发/测试/运维类产品/需求/项目类产品/需求/项目类产品/需求/项目类
formatCreateTime15:18发布15:16发布15:15发布15:15发布15:12发布
gradeDescriptionNoneNoneNoneNoneNone
hitagsNoneNoneNoneNoneNone
imStatetodaytodaytodaydisabledtoday
industryField游戏金融金融电子商务数据服务
industryLables{游戏,建模,数值}{金融,SQL}{}{}{}
isSchoolJob00000
jobNature全职全职全职全职全职
lastLogin1.52352e+121.52352e+121.5235e+121.52344e+121.5235e+12
latitude23.136739.99471431.1838980.030.270038
linestaion1号线_体育中心;3号线_石牌桥;3号线_岗顶;3号线_华师4号线大兴线_北京大学东门;13号线_五道口;15号线_清华东路西口7号线_长清路;7号线_后滩;13号线_世博大道NoneNone
longitude113.33955116.330778121.4752150.0120.100748
pcShow00000
plusNoneNoneNoneNoneNone
positionAdvantage氛围好,牛人多,年终奖14薪,每月补助,年轻团队,升职空间发展空间大,弹性工作,免费班车,无限零食前景好,福利优包三餐,牛人多,工作氛围佳,有前景
positionId43622284321741392887441186164264098
positionLables{游戏,建模,数值}{金融,SQL}{资深,大数据,数据挖掘}{大数据,数据挖掘}{助理,风控,质检}
positionName数据分析专员数据分析师数据分析师资深数据分析师(数据产品)数据分析师(儿科方向)
promotionScoreExplainNoneNoneNoneNoneNone
publisherId621549300618925594342687098923501
resumeProcessDay11110
resumeProcessRate34100320
salary5k-8k10k-16k10k-20k25k-50k1k-2k
score00000
secondType数据分析数据开发数据分析数据分析数据分析
stationname石牌桥五道口长清路NoneNone
subwayline3号线15号线7号线NoneNone
workYear1-3年1-3年1-3年5-10年不限
\n", 450 | "
" 451 | ], 452 | "text/plain": [ 453 | " 0 \\\n", 454 | "adWord 0 \n", 455 | "appShow 0 \n", 456 | "approve 1 \n", 457 | "businessZones {石牌,岗顶,龙口} \n", 458 | "city 广州 \n", 459 | "companyFullName 广州易幻网络科技有限公司 \n", 460 | "companyId 6138 \n", 461 | "companyLabelList {年底双薪,绩效奖金,年度旅游,六险一金} \n", 462 | "companyLogo image1/M00/09/66/CgYXBlTdds2ASqwFAAByli6IhmM19... \n", 463 | "companyShortName 易幻网络 \n", 464 | "companySize 150-500人 \n", 465 | "content 岗位职责1.挖掘及分析游戏项目数据2.管理及建立数据模型3.游戏用户行为分析4.定期编写相关... \n", 466 | "createTime 2018-04-12 15:18:31 \n", 467 | "deliver 0 \n", 468 | "district 天河区 \n", 469 | "education 本科 \n", 470 | "explain None \n", 471 | "financeStage 上市公司 \n", 472 | "firstType 产品/需求/项目类 \n", 473 | "formatCreateTime 15:18发布 \n", 474 | "gradeDescription None \n", 475 | "hitags None \n", 476 | "imState today \n", 477 | "industryField 游戏 \n", 478 | "industryLables {游戏,建模,数值} \n", 479 | "isSchoolJob 0 \n", 480 | "jobNature 全职 \n", 481 | "lastLogin 1.52352e+12 \n", 482 | "latitude 23.1367 \n", 483 | "linestaion 1号线_体育中心;3号线_石牌桥;3号线_岗顶;3号线_华师 \n", 484 | "longitude 113.33955 \n", 485 | "pcShow 0 \n", 486 | "plus None \n", 487 | "positionAdvantage 氛围好,牛人多,年终奖 \n", 488 | "positionId 4362228 \n", 489 | "positionLables {游戏,建模,数值} \n", 490 | "positionName 数据分析专员 \n", 491 | "promotionScoreExplain None \n", 492 | "publisherId 62154 \n", 493 | "resumeProcessDay 1 \n", 494 | "resumeProcessRate 34 \n", 495 | "salary 5k-8k \n", 496 | "score 0 \n", 497 | "secondType 数据分析 \n", 498 | "stationname 石牌桥 \n", 499 | "subwayline 3号线 \n", 500 | "workYear 1-3年 \n", 501 | "\n", 502 | " 1 \\\n", 503 | "adWord 0 \n", 504 | "appShow 0 \n", 505 | "approve 1 \n", 506 | "businessZones None \n", 507 | "city 北京 \n", 508 | "companyFullName 人人贷商务顾问(北京)有限公司 \n", 509 | "companyId 6678 \n", 510 | "companyLabelList {绩效奖金,年底双薪,五险一金,交通补助} \n", 511 | "companyLogo i/image3/M00/15/02/Cgq2xlpxe9OAPFtoAAAk0nW9FGM... \n", 512 | "companyShortName 人人贷 \n", 513 | "companySize 500-2000人 \n", 514 | "content 工作职责:1.建立相关统计模型,对用户交易数据进行挖掘,对公司战略决策、业务优化需求提供支撑... \n", 515 | "createTime 2018-04-12 15:16:47 \n", 516 | "deliver 0 \n", 517 | "district 海淀区 \n", 518 | "education 本科 \n", 519 | "explain None \n", 520 | "financeStage A轮 \n", 521 | "firstType 开发/测试/运维类 \n", 522 | "formatCreateTime 15:16发布 \n", 523 | "gradeDescription None \n", 524 | "hitags None \n", 525 | "imState today \n", 526 | "industryField 金融 \n", 527 | "industryLables {金融,SQL} \n", 528 | "isSchoolJob 0 \n", 529 | "jobNature 全职 \n", 530 | "lastLogin 1.52352e+12 \n", 531 | "latitude 39.994714 \n", 532 | "linestaion 4号线大兴线_北京大学东门;13号线_五道口;15号线_清华东路西口 \n", 533 | "longitude 116.330778 \n", 534 | "pcShow 0 \n", 535 | "plus None \n", 536 | "positionAdvantage 14薪,每月补助,年轻团队,升职空间 \n", 537 | "positionId 4321741 \n", 538 | "positionLables {金融,SQL} \n", 539 | "positionName 数据分析师 \n", 540 | "promotionScoreExplain None \n", 541 | "publisherId 9300618 \n", 542 | "resumeProcessDay 1 \n", 543 | "resumeProcessRate 100 \n", 544 | "salary 10k-16k \n", 545 | "score 0 \n", 546 | "secondType 数据开发 \n", 547 | "stationname 五道口 \n", 548 | "subwayline 15号线 \n", 549 | "workYear 1-3年 \n", 550 | "\n", 551 | " 2 \\\n", 552 | "adWord 0 \n", 553 | "appShow 0 \n", 554 | "approve 1 \n", 555 | "businessZones {鲁班路,五里桥,鲁班路,五里桥,鲁班路,五里桥} \n", 556 | "city 上海 \n", 557 | "companyFullName 鼎必程(上海)信息技术有限公司 \n", 558 | "companyId 151036 \n", 559 | "companyLabelList {年底双薪,带薪年假,定期体检,交通补助} \n", 560 | "companyLogo i/image/M00/66/5B/Cgp3O1gJ7_aAU0uCAAAbCfDZO380... \n", 561 | "companyShortName 群星金融 \n", 562 | "companySize 50-150人 \n", 563 | "content 岗位职责:1)负责应收账款的基础数据分析,包括资产透析,可视化渲染以及最终分析报告;2)深入... \n", 564 | "createTime 2018-04-12 15:15:44 \n", 565 | "deliver 0 \n", 566 | "district 浦东新区 \n", 567 | "education 本科 \n", 568 | "explain None \n", 569 | "financeStage B轮 \n", 570 | "firstType 产品/需求/项目类 \n", 571 | "formatCreateTime 15:15发布 \n", 572 | "gradeDescription None \n", 573 | "hitags None \n", 574 | "imState today \n", 575 | "industryField 金融 \n", 576 | "industryLables {} \n", 577 | "isSchoolJob 0 \n", 578 | "jobNature 全职 \n", 579 | "lastLogin 1.5235e+12 \n", 580 | "latitude 31.183898 \n", 581 | "linestaion 7号线_长清路;7号线_后滩;13号线_世博大道 \n", 582 | "longitude 121.475215 \n", 583 | "pcShow 0 \n", 584 | "plus None \n", 585 | "positionAdvantage 发展空间大,弹性工作,免费班车,无限零食 \n", 586 | "positionId 3928874 \n", 587 | "positionLables {资深,大数据,数据挖掘} \n", 588 | "positionName 数据分析师 \n", 589 | "promotionScoreExplain None \n", 590 | "publisherId 9255943 \n", 591 | "resumeProcessDay 1 \n", 592 | "resumeProcessRate 3 \n", 593 | "salary 10k-20k \n", 594 | "score 0 \n", 595 | "secondType 数据分析 \n", 596 | "stationname 长清路 \n", 597 | "subwayline 7号线 \n", 598 | "workYear 1-3年 \n", 599 | "\n", 600 | " 3 \\\n", 601 | "adWord 0 \n", 602 | "appShow 0 \n", 603 | "approve 1 \n", 604 | "businessZones None \n", 605 | "city 上海 \n", 606 | "companyFullName 唯品会(中国)有限公司 \n", 607 | "companyId 4760 \n", 608 | "companyLabelList {技能培训,节日礼物,绩效奖金,岗位晋升} \n", 609 | "companyLogo i/image/M00/5D/6A/CgpEMlmK5h6AGsPEAAAkuPM6On47... \n", 610 | "companyShortName 唯品会 \n", 611 | "companySize 2000人以上 \n", 612 | "content 岗位职责:1、对业务部门进行数据需求支持,挖掘分析主题,开展数据分析工作,基于数据分析成果,... \n", 613 | "createTime 2018-04-12 15:15:07 \n", 614 | "deliver 0 \n", 615 | "district 闵行区 \n", 616 | "education 本科 \n", 617 | "explain None \n", 618 | "financeStage 上市公司 \n", 619 | "firstType 产品/需求/项目类 \n", 620 | "formatCreateTime 15:15发布 \n", 621 | "gradeDescription None \n", 622 | "hitags None \n", 623 | "imState disabled \n", 624 | "industryField 电子商务 \n", 625 | "industryLables {} \n", 626 | "isSchoolJob 0 \n", 627 | "jobNature 全职 \n", 628 | "lastLogin 1.52344e+12 \n", 629 | "latitude 0.0 \n", 630 | "linestaion None \n", 631 | "longitude 0.0 \n", 632 | "pcShow 0 \n", 633 | "plus None \n", 634 | "positionAdvantage 前景好,福利优 \n", 635 | "positionId 4118616 \n", 636 | "positionLables {大数据,数据挖掘} \n", 637 | "positionName 资深数据分析师(数据产品) \n", 638 | "promotionScoreExplain None \n", 639 | "publisherId 4268709 \n", 640 | "resumeProcessDay 1 \n", 641 | "resumeProcessRate 2 \n", 642 | "salary 25k-50k \n", 643 | "score 0 \n", 644 | "secondType 数据分析 \n", 645 | "stationname None \n", 646 | "subwayline None \n", 647 | "workYear 5-10年 \n", 648 | "\n", 649 | " 4 \n", 650 | "adWord 0 \n", 651 | "appShow 0 \n", 652 | "approve 1 \n", 653 | "businessZones {西溪,古墩路,古墩路} \n", 654 | "city 杭州 \n", 655 | "companyFullName 上海依图信息技术有限公司 \n", 656 | "companyId 68457 \n", 657 | "companyLabelList {技能培训,带薪年假,绩效奖金,扁平管理} \n", 658 | "companyLogo image2/M00/04/68/CgpzWlXxWBGALWQtAAApEXlSpBM72... \n", 659 | "companyShortName 依图科技 \n", 660 | "companySize 150-500人 \n", 661 | "content 全职/实习/兼职皆可职责定义:•活跃度运营:调动标注人员的积极性,维护合作关系;面向标注人员... \n", 662 | "createTime 2018-04-12 15:12:52 \n", 663 | "deliver 0 \n", 664 | "district 西湖区 \n", 665 | "education 本科 \n", 666 | "explain None \n", 667 | "financeStage C轮 \n", 668 | "firstType 产品/需求/项目类 \n", 669 | "formatCreateTime 15:12发布 \n", 670 | "gradeDescription None \n", 671 | "hitags None \n", 672 | "imState today \n", 673 | "industryField 数据服务 \n", 674 | "industryLables {} \n", 675 | "isSchoolJob 0 \n", 676 | "jobNature 全职 \n", 677 | "lastLogin 1.5235e+12 \n", 678 | "latitude 30.270038 \n", 679 | "linestaion None \n", 680 | "longitude 120.100748 \n", 681 | "pcShow 0 \n", 682 | "plus None \n", 683 | "positionAdvantage 包三餐,牛人多,工作氛围佳,有前景 \n", 684 | "positionId 4264098 \n", 685 | "positionLables {助理,风控,质检} \n", 686 | "positionName 数据分析师(儿科方向) \n", 687 | "promotionScoreExplain None \n", 688 | "publisherId 8923501 \n", 689 | "resumeProcessDay 0 \n", 690 | "resumeProcessRate 0 \n", 691 | "salary 1k-2k \n", 692 | "score 0 \n", 693 | "secondType 数据分析 \n", 694 | "stationname None \n", 695 | "subwayline None \n", 696 | "workYear 不限 " 697 | ] 698 | }, 699 | "execution_count": 3, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "df.head().sort_index(axis=1).T" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 4, 711 | "metadata": { 712 | "collapsed": true 713 | }, 714 | "outputs": [ 715 | { 716 | "data": { 717 | "text/html": [ 718 | "
\n", 719 | "\n", 732 | "\n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | "
31083109311031113112
adWord00000
appShow00000
approve11111
businessZones{西北旺,上地,马连洼}{望京,来广营,望京,来广营}{兴隆,沙洲}NoneNone
city北京北京南京深圳厦门
companyFullName北京嘀嘀无限科技发展有限公司北京三快在线科技有限公司通联数据股份公司厦门美图之家科技有限公司厦门美图之家科技有限公司
companyId247450702212182329123291
companyLabelList{弹性工作,节日礼物,岗位晋升,技能培训}{技能培训,绩效奖金,岗位晋升,领导好}{五险一金,通讯津贴,带薪年假,节日礼物}{节日礼物,与大牛共事,福利健全,五险一金}{节日礼物,与大牛共事,福利健全,五险一金}
companyLogoi/image/M00/00/20/Cgp3O1YtmkSAZjX1AAA78knErXc2...i/image/M00/6A/05/Cgp3O1gW8zSAUwUsAABMptH-XY08...image1/M00/00/27/CgYXBlTUWIeABhHDAAA3B5zOAKk17...image1/M00/00/2D/Cgo8PFTUXHmAQ6K2AAA-xp7F-7c90...image1/M00/00/2D/Cgo8PFTUXHmAQ6K2AAA-xp7F-7c90...
companyShortName滴滴出行美团点评通联数据美图公司美图公司
companySize2000人以上2000人以上150-500人500-2000人500-2000人
content职位描述:1.基于业务逻辑,建立业务数据模型,为团队设计反映产品和运营状况的数据报表;2.持...岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策...岗位职责:从行业研究角度,规划与收集投研需要的关键行业和公司数据。任职要求:本科学历有会计及...职位描述1:负责美图公司整体商业&行业&战略方向的分析,为各业务线提供策略分析建议;2:从行...岗位职责:1.建立和完善产品的数据体系、如数据收集、数据模型、数据关键指标等;2.负责产品的...
createTime2018-03-14 11:29:572018-03-14 11:29:492018-03-15 11:12:262018-03-13 16:01:222018-03-12 10:37:50
deliver00000
district海淀区朝阳区建邺区南山区思明区
education本科本科本科本科本科
explainNoneNoneNoneNoneNone
financeStage不需要融资D轮及以上不需要融资上市公司上市公司
firstType开发/测试/运维类产品/需求/项目类产品/需求/项目类产品/需求/项目类开发/测试/运维类
formatCreateTime2018-03-142018-03-142018-03-152018-03-132018-03-12
gradeDescriptionNoneNoneNoneNoneNone
hitagsNone{早九晚六,免费体检,地铁周边,生日聚会}NoneNoneNone
imStatedisableddisabledoverSevenDaystodaytoday
industryField移动互联网移动互联网,O2O金融移动互联网,硬件移动互联网,硬件
industryLables{}{旅游,电商,本地生活,SPSS,sas}{}{}{}
isSchoolJob00000
jobNature全职全职全职全职全职
lastLogin1.52352e+121.52353e+121.5218e+121.52351e+121.52351e+12
latitude40.04345440.00616631.994276116.331398116.331398
linestaionNone15号线_望京东NoneNoneNone
longitude116.290146116.487528118.9934439.89744539.897445
pcShow00000
plusNoneNoneNoneNoneNone
positionAdvantage核心部门,15薪,技术驱动免费班车,员工福利,发展空间大,大平台团队氛围好,福利待遇好十亿级用户,火箭式成长,海量数据,上市公司海量数据,牛人团队
positionId29407583604528266699740526633844460
positionLables{资深,高级,大数据,数据挖掘,SPSS}{旅游,电商,本地生活,SPSS,sas}{}{大数据,BI,业务运营,行业研究}{}
positionName数据分析师数据分析行业数据分析师(财务数据方向)数据分析师(商业&行业&战略方向)数据分析师-厦门
promotionScoreExplainNoneNoneNoneNoneNone
publisherId75016718147880321284651158445115844
resumeProcessDay10011
resumeProcessRate1900100100
salary20k-40k8k-12k5k-10k18k-36k10k-20k
score00000
secondType数据开发数据分析数据分析数据分析后端开发
stationnameNone望京东NoneNoneNone
subwaylineNone15号线NoneNoneNone
workYear3-5年1-3年不限3-5年不限
\n", 1122 | "
" 1123 | ], 1124 | "text/plain": [ 1125 | " 3108 \\\n", 1126 | "adWord 0 \n", 1127 | "appShow 0 \n", 1128 | "approve 1 \n", 1129 | "businessZones {西北旺,上地,马连洼} \n", 1130 | "city 北京 \n", 1131 | "companyFullName 北京嘀嘀无限科技发展有限公司 \n", 1132 | "companyId 2474 \n", 1133 | "companyLabelList {弹性工作,节日礼物,岗位晋升,技能培训} \n", 1134 | "companyLogo i/image/M00/00/20/Cgp3O1YtmkSAZjX1AAA78knErXc2... \n", 1135 | "companyShortName 滴滴出行 \n", 1136 | "companySize 2000人以上 \n", 1137 | "content 职位描述:1.基于业务逻辑,建立业务数据模型,为团队设计反映产品和运营状况的数据报表;2.持... \n", 1138 | "createTime 2018-03-14 11:29:57 \n", 1139 | "deliver 0 \n", 1140 | "district 海淀区 \n", 1141 | "education 本科 \n", 1142 | "explain None \n", 1143 | "financeStage 不需要融资 \n", 1144 | "firstType 开发/测试/运维类 \n", 1145 | "formatCreateTime 2018-03-14 \n", 1146 | "gradeDescription None \n", 1147 | "hitags None \n", 1148 | "imState disabled \n", 1149 | "industryField 移动互联网 \n", 1150 | "industryLables {} \n", 1151 | "isSchoolJob 0 \n", 1152 | "jobNature 全职 \n", 1153 | "lastLogin 1.52352e+12 \n", 1154 | "latitude 40.043454 \n", 1155 | "linestaion None \n", 1156 | "longitude 116.290146 \n", 1157 | "pcShow 0 \n", 1158 | "plus None \n", 1159 | "positionAdvantage 核心部门,15薪,技术驱动 \n", 1160 | "positionId 2940758 \n", 1161 | "positionLables {资深,高级,大数据,数据挖掘,SPSS} \n", 1162 | "positionName 数据分析师 \n", 1163 | "promotionScoreExplain None \n", 1164 | "publisherId 7501671 \n", 1165 | "resumeProcessDay 1 \n", 1166 | "resumeProcessRate 19 \n", 1167 | "salary 20k-40k \n", 1168 | "score 0 \n", 1169 | "secondType 数据开发 \n", 1170 | "stationname None \n", 1171 | "subwayline None \n", 1172 | "workYear 3-5年 \n", 1173 | "\n", 1174 | " 3109 \\\n", 1175 | "adWord 0 \n", 1176 | "appShow 0 \n", 1177 | "approve 1 \n", 1178 | "businessZones {望京,来广营,望京,来广营} \n", 1179 | "city 北京 \n", 1180 | "companyFullName 北京三快在线科技有限公司 \n", 1181 | "companyId 50702 \n", 1182 | "companyLabelList {技能培训,绩效奖金,岗位晋升,领导好} \n", 1183 | "companyLogo i/image/M00/6A/05/Cgp3O1gW8zSAUwUsAABMptH-XY08... \n", 1184 | "companyShortName 美团点评 \n", 1185 | "companySize 2000人以上 \n", 1186 | "content 岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策... \n", 1187 | "createTime 2018-03-14 11:29:49 \n", 1188 | "deliver 0 \n", 1189 | "district 朝阳区 \n", 1190 | "education 本科 \n", 1191 | "explain None \n", 1192 | "financeStage D轮及以上 \n", 1193 | "firstType 产品/需求/项目类 \n", 1194 | "formatCreateTime 2018-03-14 \n", 1195 | "gradeDescription None \n", 1196 | "hitags {早九晚六,免费体检,地铁周边,生日聚会} \n", 1197 | "imState disabled \n", 1198 | "industryField 移动互联网,O2O \n", 1199 | "industryLables {旅游,电商,本地生活,SPSS,sas} \n", 1200 | "isSchoolJob 0 \n", 1201 | "jobNature 全职 \n", 1202 | "lastLogin 1.52353e+12 \n", 1203 | "latitude 40.006166 \n", 1204 | "linestaion 15号线_望京东 \n", 1205 | "longitude 116.487528 \n", 1206 | "pcShow 0 \n", 1207 | "plus None \n", 1208 | "positionAdvantage 免费班车,员工福利,发展空间大,大平台 \n", 1209 | "positionId 3604528 \n", 1210 | "positionLables {旅游,电商,本地生活,SPSS,sas} \n", 1211 | "positionName 数据分析 \n", 1212 | "promotionScoreExplain None \n", 1213 | "publisherId 8147880 \n", 1214 | "resumeProcessDay 0 \n", 1215 | "resumeProcessRate 0 \n", 1216 | "salary 8k-12k \n", 1217 | "score 0 \n", 1218 | "secondType 数据分析 \n", 1219 | "stationname 望京东 \n", 1220 | "subwayline 15号线 \n", 1221 | "workYear 1-3年 \n", 1222 | "\n", 1223 | " 3110 \\\n", 1224 | "adWord 0 \n", 1225 | "appShow 0 \n", 1226 | "approve 1 \n", 1227 | "businessZones {兴隆,沙洲} \n", 1228 | "city 南京 \n", 1229 | "companyFullName 通联数据股份公司 \n", 1230 | "companyId 21218 \n", 1231 | "companyLabelList {五险一金,通讯津贴,带薪年假,节日礼物} \n", 1232 | "companyLogo image1/M00/00/27/CgYXBlTUWIeABhHDAAA3B5zOAKk17... \n", 1233 | "companyShortName 通联数据 \n", 1234 | "companySize 150-500人 \n", 1235 | "content 岗位职责:从行业研究角度,规划与收集投研需要的关键行业和公司数据。任职要求:本科学历有会计及... \n", 1236 | "createTime 2018-03-15 11:12:26 \n", 1237 | "deliver 0 \n", 1238 | "district 建邺区 \n", 1239 | "education 本科 \n", 1240 | "explain None \n", 1241 | "financeStage 不需要融资 \n", 1242 | "firstType 产品/需求/项目类 \n", 1243 | "formatCreateTime 2018-03-15 \n", 1244 | "gradeDescription None \n", 1245 | "hitags None \n", 1246 | "imState overSevenDays \n", 1247 | "industryField 金融 \n", 1248 | "industryLables {} \n", 1249 | "isSchoolJob 0 \n", 1250 | "jobNature 全职 \n", 1251 | "lastLogin 1.5218e+12 \n", 1252 | "latitude 31.994276 \n", 1253 | "linestaion None \n", 1254 | "longitude 118.99344 \n", 1255 | "pcShow 0 \n", 1256 | "plus None \n", 1257 | "positionAdvantage 团队氛围好,福利待遇好 \n", 1258 | "positionId 2666997 \n", 1259 | "positionLables {} \n", 1260 | "positionName 行业数据分析师(财务数据方向) \n", 1261 | "promotionScoreExplain None \n", 1262 | "publisherId 3212846 \n", 1263 | "resumeProcessDay 0 \n", 1264 | "resumeProcessRate 0 \n", 1265 | "salary 5k-10k \n", 1266 | "score 0 \n", 1267 | "secondType 数据分析 \n", 1268 | "stationname None \n", 1269 | "subwayline None \n", 1270 | "workYear 不限 \n", 1271 | "\n", 1272 | " 3111 \\\n", 1273 | "adWord 0 \n", 1274 | "appShow 0 \n", 1275 | "approve 1 \n", 1276 | "businessZones None \n", 1277 | "city 深圳 \n", 1278 | "companyFullName 厦门美图之家科技有限公司 \n", 1279 | "companyId 23291 \n", 1280 | "companyLabelList {节日礼物,与大牛共事,福利健全,五险一金} \n", 1281 | "companyLogo image1/M00/00/2D/Cgo8PFTUXHmAQ6K2AAA-xp7F-7c90... \n", 1282 | "companyShortName 美图公司 \n", 1283 | "companySize 500-2000人 \n", 1284 | "content 职位描述1:负责美图公司整体商业&行业&战略方向的分析,为各业务线提供策略分析建议;2:从行... \n", 1285 | "createTime 2018-03-13 16:01:22 \n", 1286 | "deliver 0 \n", 1287 | "district 南山区 \n", 1288 | "education 本科 \n", 1289 | "explain None \n", 1290 | "financeStage 上市公司 \n", 1291 | "firstType 产品/需求/项目类 \n", 1292 | "formatCreateTime 2018-03-13 \n", 1293 | "gradeDescription None \n", 1294 | "hitags None \n", 1295 | "imState today \n", 1296 | "industryField 移动互联网,硬件 \n", 1297 | "industryLables {} \n", 1298 | "isSchoolJob 0 \n", 1299 | "jobNature 全职 \n", 1300 | "lastLogin 1.52351e+12 \n", 1301 | "latitude 116.331398 \n", 1302 | "linestaion None \n", 1303 | "longitude 39.897445 \n", 1304 | "pcShow 0 \n", 1305 | "plus None \n", 1306 | "positionAdvantage 十亿级用户,火箭式成长,海量数据,上市公司 \n", 1307 | "positionId 4052663 \n", 1308 | "positionLables {大数据,BI,业务运营,行业研究} \n", 1309 | "positionName 数据分析师(商业&行业&战略方向) \n", 1310 | "promotionScoreExplain None \n", 1311 | "publisherId 5115844 \n", 1312 | "resumeProcessDay 1 \n", 1313 | "resumeProcessRate 100 \n", 1314 | "salary 18k-36k \n", 1315 | "score 0 \n", 1316 | "secondType 数据分析 \n", 1317 | "stationname None \n", 1318 | "subwayline None \n", 1319 | "workYear 3-5年 \n", 1320 | "\n", 1321 | " 3112 \n", 1322 | "adWord 0 \n", 1323 | "appShow 0 \n", 1324 | "approve 1 \n", 1325 | "businessZones None \n", 1326 | "city 厦门 \n", 1327 | "companyFullName 厦门美图之家科技有限公司 \n", 1328 | "companyId 23291 \n", 1329 | "companyLabelList {节日礼物,与大牛共事,福利健全,五险一金} \n", 1330 | "companyLogo image1/M00/00/2D/Cgo8PFTUXHmAQ6K2AAA-xp7F-7c90... \n", 1331 | "companyShortName 美图公司 \n", 1332 | "companySize 500-2000人 \n", 1333 | "content 岗位职责:1.建立和完善产品的数据体系、如数据收集、数据模型、数据关键指标等;2.负责产品的... \n", 1334 | "createTime 2018-03-12 10:37:50 \n", 1335 | "deliver 0 \n", 1336 | "district 思明区 \n", 1337 | "education 本科 \n", 1338 | "explain None \n", 1339 | "financeStage 上市公司 \n", 1340 | "firstType 开发/测试/运维类 \n", 1341 | "formatCreateTime 2018-03-12 \n", 1342 | "gradeDescription None \n", 1343 | "hitags None \n", 1344 | "imState today \n", 1345 | "industryField 移动互联网,硬件 \n", 1346 | "industryLables {} \n", 1347 | "isSchoolJob 0 \n", 1348 | "jobNature 全职 \n", 1349 | "lastLogin 1.52351e+12 \n", 1350 | "latitude 116.331398 \n", 1351 | "linestaion None \n", 1352 | "longitude 39.897445 \n", 1353 | "pcShow 0 \n", 1354 | "plus None \n", 1355 | "positionAdvantage 海量数据,牛人团队 \n", 1356 | "positionId 3844460 \n", 1357 | "positionLables {} \n", 1358 | "positionName 数据分析师-厦门 \n", 1359 | "promotionScoreExplain None \n", 1360 | "publisherId 5115844 \n", 1361 | "resumeProcessDay 1 \n", 1362 | "resumeProcessRate 100 \n", 1363 | "salary 10k-20k \n", 1364 | "score 0 \n", 1365 | "secondType 后端开发 \n", 1366 | "stationname None \n", 1367 | "subwayline None \n", 1368 | "workYear 不限 " 1369 | ] 1370 | }, 1371 | "execution_count": 4, 1372 | "metadata": {}, 1373 | "output_type": "execute_result" 1374 | } 1375 | ], 1376 | "source": [ 1377 | "df.tail().sort_index(axis=1).T" 1378 | ] 1379 | }, 1380 | { 1381 | "cell_type": "code", 1382 | "execution_count": 5, 1383 | "metadata": { 1384 | "collapsed": true 1385 | }, 1386 | "outputs": [ 1387 | { 1388 | "name": "stdout", 1389 | "output_type": "stream", 1390 | "text": [ 1391 | "\n", 1392 | "RangeIndex: 3113 entries, 0 to 3112\n", 1393 | "Data columns (total 47 columns):\n", 1394 | "companyId 3113 non-null int64\n", 1395 | "positionLables 3113 non-null object\n", 1396 | "industryLables 3113 non-null object\n", 1397 | "businessZones 2172 non-null object\n", 1398 | "score 3113 non-null int64\n", 1399 | "positionId 3113 non-null int64\n", 1400 | "positionName 3113 non-null object\n", 1401 | "createTime 3113 non-null object\n", 1402 | "positionAdvantage 3113 non-null object\n", 1403 | "salary 3113 non-null object\n", 1404 | "approve 3113 non-null int64\n", 1405 | "workYear 3113 non-null object\n", 1406 | "education 3113 non-null object\n", 1407 | "city 3113 non-null object\n", 1408 | "companyLogo 3113 non-null object\n", 1409 | "jobNature 3113 non-null object\n", 1410 | "industryField 3112 non-null object\n", 1411 | "companyShortName 3113 non-null object\n", 1412 | "companySize 3112 non-null object\n", 1413 | "financeStage 3112 non-null object\n", 1414 | "companyLabelList 3113 non-null object\n", 1415 | "publisherId 3113 non-null int64\n", 1416 | "district 3097 non-null object\n", 1417 | "hitags 275 non-null object\n", 1418 | "formatCreateTime 3113 non-null object\n", 1419 | "companyFullName 3113 non-null object\n", 1420 | "adWord 3113 non-null int64\n", 1421 | "resumeProcessRate 3113 non-null int64\n", 1422 | "resumeProcessDay 3113 non-null int64\n", 1423 | "imState 3113 non-null object\n", 1424 | "lastLogin 3112 non-null float64\n", 1425 | "explain 0 non-null object\n", 1426 | "plus 0 non-null object\n", 1427 | "pcShow 3113 non-null int64\n", 1428 | "appShow 3113 non-null int64\n", 1429 | "deliver 3113 non-null int64\n", 1430 | "gradeDescription 0 non-null object\n", 1431 | "promotionScoreExplain 0 non-null object\n", 1432 | "firstType 3113 non-null object\n", 1433 | "secondType 3113 non-null object\n", 1434 | "isSchoolJob 3113 non-null int64\n", 1435 | "subwayline 2120 non-null object\n", 1436 | "stationname 2120 non-null object\n", 1437 | "linestaion 2120 non-null object\n", 1438 | "longitude 3081 non-null object\n", 1439 | "latitude 3081 non-null object\n", 1440 | "content 3113 non-null object\n", 1441 | "dtypes: float64(1), int64(12), object(34)\n", 1442 | "memory usage: 1.1+ MB\n" 1443 | ] 1444 | } 1445 | ], 1446 | "source": [ 1447 | "df.info()" 1448 | ] 1449 | }, 1450 | { 1451 | "cell_type": "markdown", 1452 | "metadata": {}, 1453 | "source": [ 1454 | "**字段含义**" 1455 | ] 1456 | }, 1457 | { 1458 | "cell_type": "markdown", 1459 | "metadata": { 1460 | "collapsed": true 1461 | }, 1462 | "source": [ 1463 | "|字段名|含义|\n", 1464 | "|:-|:-|\n", 1465 | "|companyId|公司id|\n", 1466 | "|positionLables|职位标签|\n", 1467 | "|industryLables|行业标签|\n", 1468 | "|businessZones|商业区|\n", 1469 | "|score|得分?|\n", 1470 | "|positionId|职位id|\n", 1471 | "|positionName|职位名称|\n", 1472 | "|createTime|创建时间|\n", 1473 | "|positionAdvantage|职位诱惑|\n", 1474 | "|salary|薪资|\n", 1475 | "|approve|通过?|\n", 1476 | "|workYear|经验要求|\n", 1477 | "|education|学历|\n", 1478 | "|city|城市|\n", 1479 | "|companyLogo|公司logo|\n", 1480 | "|jobNature|全职or兼职|\n", 1481 | "|industryField|行业领域|\n", 1482 | "|companyShortName|公司简称|\n", 1483 | "|companySize|公司规模|\n", 1484 | "|financeStage|融资阶段|\n", 1485 | "|companyLabelList|公司标签|\n", 1486 | "|publisherId|发布者id|\n", 1487 | "|district|公司所属区|\n", 1488 | "|hitags|?|\n", 1489 | "|formatCreateTime|相对发布|\n", 1490 | "|companyFullName|公司全称|\n", 1491 | "|adWord|?|\n", 1492 | "|resumeProcessRate|回复率|\n", 1493 | "|resumeProcessDay|回复时间|\n", 1494 | "|imState|?|\n", 1495 | "|lastLogin|最后登入|\n", 1496 | "|explain|?|\n", 1497 | "|plus|?|\n", 1498 | "|pcShow|?|\n", 1499 | "|appShow|?|\n", 1500 | "|deliver|?|\n", 1501 | "|gradeDescription|\n", 1502 | "|promotionScoreExplain|\n", 1503 | "|firstType|第一类别|\n", 1504 | "|secondType|第二类别|\n", 1505 | "|isSchoolJob|校园|\n", 1506 | "|subwayline|地铁线路||\n", 1507 | "|stationname|站名|\n", 1508 | "|linestaion|线站|\n", 1509 | "|longitude|经度|\n", 1510 | "|latitude|纬度|\n", 1511 | "|content|jd|\n", 1512 | "\n", 1513 | "**爬虫内容包含许多无意义字段,后续进行删除处理**" 1514 | ] 1515 | }, 1516 | { 1517 | "cell_type": "markdown", 1518 | "metadata": {}, 1519 | "source": [ 1520 | "**去除无用字段**" 1521 | ] 1522 | }, 1523 | { 1524 | "cell_type": "code", 1525 | "execution_count": 6, 1526 | "metadata": {}, 1527 | "outputs": [ 1528 | { 1529 | "data": { 1530 | "text/html": [ 1531 | "
\n", 1532 | "\n", 1545 | "\n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | " \n", 2375 | " \n", 2376 | " \n", 2377 | " \n", 2378 | " \n", 2379 | " \n", 2380 | " \n", 2381 | " \n", 2382 | " \n", 2383 | " \n", 2384 | " \n", 2385 | " \n", 2386 | " \n", 2387 | " \n", 2388 | " \n", 2389 | " \n", 2390 | " \n", 2391 | " \n", 2392 | " \n", 2393 | " \n", 2394 | " \n", 2395 | " \n", 2396 | " \n", 2397 | " \n", 2398 | " \n", 2399 | " \n", 2400 | " \n", 2401 | " \n", 2402 | " \n", 2403 | " \n", 2404 | " \n", 2405 | " \n", 2406 | " \n", 2407 | " \n", 2408 | " \n", 2409 | " \n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | " \n", 2459 | " \n", 2460 | " \n", 2461 | " \n", 2462 | " \n", 2463 | " \n", 2464 | " \n", 2465 | " \n", 2466 | " \n", 2467 | " \n", 2468 | " \n", 2469 | " \n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | " \n", 2559 | " \n", 2560 | " \n", 2561 | " \n", 2562 | " \n", 2563 | " \n", 2564 | " \n", 2565 | " \n", 2566 | " \n", 2567 | " \n", 2568 | " \n", 2569 | " \n", 2570 | " \n", 2571 | " \n", 2572 | " \n", 2573 | " \n", 2574 | " \n", 2575 | " \n", 2576 | " \n", 2577 | " \n", 2578 | " \n", 2579 | " \n", 2580 | " \n", 2581 | " \n", 2582 | " \n", 2583 | " \n", 2584 | " \n", 2585 | " \n", 2586 | " \n", 2587 | " \n", 2588 | " \n", 2589 | " \n", 2590 | " \n", 2591 | " \n", 2592 | " \n", 2593 | " \n", 2594 | " \n", 2595 | " \n", 2596 | " \n", 2597 | " \n", 2598 | " \n", 2599 | " \n", 2600 | " \n", 2601 | " \n", 2602 | " \n", 2603 | " \n", 2604 | " \n", 2605 | " \n", 2606 | " \n", 2607 | " \n", 2608 | " \n", 2609 | " \n", 2610 | " \n", 2611 | " \n", 2612 | " \n", 2613 | " \n", 2614 | " \n", 2615 | " \n", 2616 | " \n", 2617 | " \n", 2618 | " \n", 2619 | " \n", 2620 | " \n", 2621 | " \n", 2622 | " \n", 2623 | " \n", 2624 | " \n", 2625 | " \n", 2626 | " \n", 2627 | " \n", 2628 | " \n", 2629 | " \n", 2630 | " \n", 2631 | " \n", 2632 | " \n", 2633 | " \n", 2634 | " \n", 2635 | " \n", 2636 | " \n", 2637 | " \n", 2638 | " \n", 2639 | " \n", 2640 | " \n", 2641 | " \n", 2642 | " \n", 2643 | " \n", 2644 | " \n", 2645 | " \n", 2646 | " \n", 2647 | " \n", 2648 | " \n", 2649 | " \n", 2650 | " \n", 2651 | " \n", 2652 | " \n", 2653 | " \n", 2654 | " \n", 2655 | " \n", 2656 | " \n", 2657 | " \n", 2658 | " \n", 2659 | " \n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | " \n", 2676 | " \n", 2677 | " \n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | " \n", 2689 | " \n", 2690 | " \n", 2691 | " \n", 2692 | " \n", 2693 | " \n", 2694 | " \n", 2695 | " \n", 2696 | " \n", 2697 | " \n", 2698 | " \n", 2699 | " \n", 2700 | " \n", 2701 | " \n", 2702 | " \n", 2703 | " \n", 2704 | " \n", 2705 | " \n", 2706 | " \n", 2707 | " \n", 2708 | " \n", 2709 | " \n", 2710 | " \n", 2711 | " \n", 2712 | " \n", 2713 | " \n", 2714 | " \n", 2715 | " \n", 2716 | " \n", 2717 | " \n", 2718 | " \n", 2719 | " \n", 2720 | " \n", 2721 | " \n", 2722 | " \n", 2723 | " \n", 2724 | " \n", 2725 | " \n", 2726 | " \n", 2727 | " \n", 2728 | " \n", 2729 | " \n", 2730 | " \n", 2731 | " \n", 2732 | " \n", 2733 | " \n", 2734 | " \n", 2735 | " \n", 2736 | " \n", 2737 | " \n", 2738 | " \n", 2739 | " \n", 2740 | " \n", 2741 | " \n", 2742 | " \n", 2743 | " \n", 2744 | " \n", 2745 | " \n", 2746 | " \n", 2747 | " \n", 2748 | " \n", 2749 | " \n", 2750 | " \n", 2751 | " \n", 2752 | " \n", 2753 | " \n", 2754 | " \n", 2755 | " \n", 2756 | " \n", 2757 | " \n", 2758 | " \n", 2759 | " \n", 2760 | " \n", 2761 | " \n", 2762 | " \n", 2763 | " \n", 2764 | " \n", 2765 | " \n", 2766 | " \n", 2767 | " \n", 2768 | " \n", 2769 | " \n", 2770 | " \n", 2771 | " \n", 2772 | " \n", 2773 | " \n", 2774 | " \n", 2775 | " \n", 2776 | " \n", 2777 | " \n", 2778 | " \n", 2779 | " \n", 2780 | " \n", 2781 | " \n", 2782 | " \n", 2783 | " \n", 2784 | " \n", 2785 | " \n", 2786 | " \n", 2787 | " \n", 2788 | " \n", 2789 | " \n", 2790 | " \n", 2791 | " \n", 2792 | " \n", 2793 | " \n", 2794 | " \n", 2795 | " \n", 2796 | " \n", 2797 | " \n", 2798 | " \n", 2799 | " \n", 2800 | " \n", 2801 | " \n", 2802 | " \n", 2803 | " \n", 2804 | " \n", 2805 | " \n", 2806 | " \n", 2807 | " \n", 2808 | " \n", 2809 | " \n", 2810 | " \n", 2811 | " \n", 2812 | " \n", 2813 | " \n", 2814 | " \n", 2815 | " \n", 2816 | " \n", 2817 | " \n", 2818 | " \n", 2819 | " \n", 2820 | " \n", 2821 | " \n", 2822 | " \n", 2823 | " \n", 2824 | " \n", 2825 | " \n", 2826 | " \n", 2827 | " \n", 2828 | " \n", 2829 | " \n", 2830 | " \n", 2831 | " \n", 2832 | " \n", 2833 | " \n", 2834 | " \n", 2835 | " \n", 2836 | " \n", 2837 | " \n", 2838 | " \n", 2839 | " \n", 2840 | " \n", 2841 | " \n", 2842 | " \n", 2843 | " \n", 2844 | " \n", 2845 | " \n", 2846 | " \n", 2847 | " \n", 2848 | " \n", 2849 | " \n", 2850 | " \n", 2851 | " \n", 2852 | " \n", 2853 | " \n", 2854 | " \n", 2855 | " \n", 2856 | " \n", 2857 | " \n", 2858 | " \n", 2859 | " \n", 2860 | " \n", 2861 | " \n", 2862 | " \n", 2863 | " \n", 2864 | " \n", 2865 | " \n", 2866 | " \n", 2867 | " \n", 2868 | " \n", 2869 | " \n", 2870 | " \n", 2871 | " \n", 2872 | " \n", 2873 | " \n", 2874 | " \n", 2875 | " \n", 2876 | " \n", 2877 | " \n", 2878 | " \n", 2879 | " \n", 2880 | " \n", 2881 | " \n", 2882 | " \n", 2883 | " \n", 2884 | " \n", 2885 | " \n", 2886 | " \n", 2887 | " \n", 2888 | " \n", 2889 | " \n", 2890 | " \n", 2891 | " \n", 2892 | " \n", 2893 | " \n", 2894 | " \n", 2895 | " \n", 2896 | " \n", 2897 | " \n", 2898 | " \n", 2899 | " \n", 2900 | " \n", 2901 | " \n", 2902 | " \n", 2903 | " \n", 2904 | " \n", 2905 | " \n", 2906 | " \n", 2907 | " \n", 2908 | " \n", 2909 | " \n", 2910 | " \n", 2911 | " \n", 2912 | " \n", 2913 | " \n", 2914 | " \n", 2915 | " \n", 2916 | " \n", 2917 | " \n", 2918 | " \n", 2919 | " \n", 2920 | " \n", 2921 | " \n", 2922 | " \n", 2923 | " \n", 2924 | " \n", 2925 | " \n", 2926 | " \n", 2927 | " \n", 2928 | " \n", 2929 | " \n", 2930 | " \n", 2931 | " \n", 2932 | " \n", 2933 | " \n", 2934 | " \n", 2935 | " \n", 2936 | " \n", 2937 | " \n", 2938 | " \n", 2939 | " \n", 2940 | " \n", 2941 | " \n", 2942 | " \n", 2943 | " \n", 2944 | " \n", 2945 | " \n", 2946 | " \n", 2947 | " \n", 2948 | " \n", 2949 | " \n", 2950 | " \n", 2951 | " \n", 2952 | " \n", 2953 | " \n", 2954 | " \n", 2955 | " \n", 2956 | " \n", 2957 | " \n", 2958 | " \n", 2959 | " \n", 2960 | " \n", 2961 | " \n", 2962 | " \n", 2963 | " \n", 2964 | " \n", 2965 | " \n", 2966 | " \n", 2967 | " \n", 2968 | " \n", 2969 | " \n", 2970 | " \n", 2971 | " \n", 2972 | " \n", 2973 | " \n", 2974 | " \n", 2975 | " \n", 2976 | " \n", 2977 | " \n", 2978 | " \n", 2979 | " \n", 2980 | " \n", 2981 | " \n", 2982 | " \n", 2983 | " \n", 2984 | " \n", 2985 | " \n", 2986 | " \n", 2987 | " \n", 2988 | " \n", 2989 | " \n", 2990 | " \n", 2991 | " \n", 2992 | " \n", 2993 | " \n", 2994 | " \n", 2995 | " \n", 2996 | " \n", 2997 | " \n", 2998 | " \n", 2999 | " \n", 3000 | " \n", 3001 | " \n", 3002 | " \n", 3003 | " \n", 3004 | " \n", 3005 | " \n", 3006 | " \n", 3007 | " \n", 3008 | " \n", 3009 | " \n", 3010 | " \n", 3011 | " \n", 3012 | " \n", 3013 | " \n", 3014 | " \n", 3015 | " \n", 3016 | " \n", 3017 | " \n", 3018 | " \n", 3019 | " \n", 3020 | " \n", 3021 | " \n", 3022 | " \n", 3023 | " \n", 3024 | " \n", 3025 | " \n", 3026 | " \n", 3027 | " \n", 3028 | " \n", 3029 | " \n", 3030 | " \n", 3031 | " \n", 3032 | " \n", 3033 | " \n", 3034 | " \n", 3035 | " \n", 3036 | " \n", 3037 | " \n", 3038 | "
companyIdpositionLablesindustryLablesbusinessZonespositionIdpositionNamecreateTimepositionAdvantagesalaryworkYear...companyFullNamefirstTypesecondTypeisSchoolJobsubwaylinestationnamelinestaionlongitudelatitudecontent
06138{游戏,建模,数值}{游戏,建模,数值}{石牌,岗顶,龙口}4362228数据分析专员2018-04-12 15:18:31氛围好,牛人多,年终奖5k-8k1-3年...广州易幻网络科技有限公司产品/需求/项目类数据分析03号线石牌桥1号线_体育中心;3号线_石牌桥;3号线_岗顶;3号线_华师113.3395523.1367岗位职责1.挖掘及分析游戏项目数据2.管理及建立数据模型3.游戏用户行为分析4.定期编写相关...
16678{金融,SQL}{金融,SQL}None4321741数据分析师2018-04-12 15:16:4714薪,每月补助,年轻团队,升职空间10k-16k1-3年...人人贷商务顾问(北京)有限公司开发/测试/运维类数据开发015号线五道口4号线大兴线_北京大学东门;13号线_五道口;15号线_清华东路西口116.33077839.994714工作职责:1.建立相关统计模型,对用户交易数据进行挖掘,对公司战略决策、业务优化需求提供支撑...
2151036{资深,大数据,数据挖掘}{}{鲁班路,五里桥,鲁班路,五里桥,鲁班路,五里桥}3928874数据分析师2018-04-12 15:15:44发展空间大,弹性工作,免费班车,无限零食10k-20k1-3年...鼎必程(上海)信息技术有限公司产品/需求/项目类数据分析07号线长清路7号线_长清路;7号线_后滩;13号线_世博大道121.47521531.183898岗位职责:1)负责应收账款的基础数据分析,包括资产透析,可视化渲染以及最终分析报告;2)深入...
34760{大数据,数据挖掘}{}None4118616资深数据分析师(数据产品)2018-04-12 15:15:07前景好,福利优25k-50k5-10年...唯品会(中国)有限公司产品/需求/项目类数据分析0NoneNoneNone0.00.0岗位职责:1、对业务部门进行数据需求支持,挖掘分析主题,开展数据分析工作,基于数据分析成果,...
468457{助理,风控,质检}{}{西溪,古墩路,古墩路}4264098数据分析师(儿科方向)2018-04-12 15:12:52包三餐,牛人多,工作氛围佳,有前景1k-2k不限...上海依图信息技术有限公司产品/需求/项目类数据分析0NoneNoneNone120.10074830.270038全职/实习/兼职皆可职责定义:•活跃度运营:调动标注人员的积极性,维护合作关系;面向标注人员...
5210490{金融,数据挖掘,大数据,数据库}{金融,数据挖掘,大数据,数据库}{科技园}4055455BI高级数据分析师2018-04-12 15:09:51年轻团队,公司福利好,发展平台大12k-20k3-5年...深圳汇商通盈科技有限公司开发/测试/运维类数据开发02号线/蛇口线深大2号线/蛇口线_科苑;1号线/罗宝线_高新园;1号线/罗宝线_深大113.95242922.537847岗位职责:•与数据架构师紧密合作,用各种数据库、数据仓库、BI工具和必要的编程工具实现数据体...
651480{资深,大数据,可视化,商业,业务运营}{}{长征,金沙江路,曹杨}3700674业务分析专家、数据分析专家2018-04-12 15:07:29平台好、发展空间大、扁平管理、弹性工作20k-40k3-5年...拉扎斯网络科技(上海)有限公司产品/需求/项目类数据分析013号线祁连山南路13号线_祁连山南路;13号线_真北路;13号线_大渡河路121.37994531.231299【岗位职责】1、负责外卖配送业务流程、产品规则模块分析等工作;2、建立业务运营分析体系,推动...
757400{}{}None1845914Data Analyst 数据分析师2018-04-12 15:06:55周末双休 五险一金 带薪年假 年终多薪8k-15k1-3年...昊盈信息技术(成都)有限公司市场与销售市场/营销0NoneNoneNoneNoneNonePLEASEAPPLYTHROUGHTHISLINK:https://nexa-corp.w...
8117203{}{}None2353221数据分析员2018-04-12 15:06:52十三薪;五险一金;出国机会;弹性工作4k-8k1-3年...西安亚购信息科技有限公司产品产品经理0NoneNoneNone108.87703734.190449职位描述:1、实时响应各业务部门的数据需求,并总结需求,制定报表及系统开发计划;2、负责网站...
9183985{}{}{西丽}4287427数据分析专员/助理2018-04-12 14:35:37五险一金,节日福利,晋升空间,扁平管理4k-8k1-3年...深圳市华商生物科技有限公司产品/需求/项目类数据分析05号线/环中线留仙洞5号线/环中线_留仙洞113.93976322.567951岗位职责:1、负责运营中心每日数据收集、审核、整理、分析、呈现工作;2、根据公司要求,将搜集...
1067342{算法,数据挖掘,大数据,建模}{}None4411559数据分析师2018-04-12 15:04:51股权期权,六险一金,同事棒,等你来13k-25k1-3年...浙江互联网金融资产交易中心股份有限公司开发/测试/运维类数据开发0NoneNoneNone120.16566730.185677岗位职责:1.深入了解公司业务,为业务团队提供数据分析及大数据预测,对业务发展提供决策建议;...
1131173{信息安全,电商,广告营销,大数据,数据可视化}{信息安全,电商,广告营销,大数据,数据可视化}{国贸,呼家楼}2744665BI 高级数据产品经理 高级数据分析产品经理2018-04-12 15:03:27五险一金,海外年会,扁平管理,期权15k-30k3-5年...悠易互通(北京)广告有限公司产品/需求/项目类产品设计/需求分析010号线国贸1号线_永安里;1号线_国贸;6号线_呼家楼;6号线_东大桥;10号线_呼家楼;10号线_金...116.45929839.918784高级数据产品经理岗位描述:1.规划营销大数据平台软件产品(DMP),设计产品功能,协调开发资...
12146945{算法,机器学习,深度学习,数据挖掘}{}None2716397大数据分析师2018-04-12 15:02:54大数据分析,数据挖掘,机器学习,人工智能10k-20k3-5年...武汉智领云科技有限公司开发/测试/运维类数据开发0NoneNoneNone114.41056430.457982智领云的云计算大数据平台研发团队聚集了硅谷和本土的优秀创业人才,团队成员中有的在硅谷著名科技...
1356521{金融,游戏,广告营销,初级,数据挖掘}{金融,游戏,广告营销,初级,数据挖掘}{科技园,科技园,大冲,大冲}3586368数据分析师(数学相关专业应届)2018-04-12 15:02:24大平台5k-7k应届毕业生...北京思恩客广告有限公司开发/测试/运维类数据开发11号线/罗宝线深大1号线/罗宝线_高新园;1号线/罗宝线_深大113.94355122.5481791、负责日常广告投放业务的数据分析工作;2、基于数据分析得到有价值信息,为业务优化提供策略和...
14196263{游戏,SPSS}{游戏,SPSS}{科技园}4252645游戏数据分析师2018-04-12 15:00:11福利好,待遇棒,补贴多,老板好5k-10k1-3年...深圳市叁亿广告有限公司产品/需求/项目类数据分析01号线/罗宝线深大1号线/罗宝线_深大113.93750322.543597岗位要求:1、数据逻辑清晰,数据敏感度高;计算机专业、统计学专业优先考虑,,MMO游戏或SL...
15280497{股票,证券,期货,外汇,操盘手,经纪人}{股票,证券,期货,外汇,操盘手,经纪人}{香蜜湖,车公庙,下沙,车公庙,车公庙,香蜜湖,香蜜湖,下沙,下沙}3800615金融(数据分析师)助理2018-04-12 14:54:15绩效奖金,带薪年假,美女多多,员工旅游6k-8k不限...深圳市名瑞德国际金融服务有限公司金融类证券/期货07号线农林1号线/罗宝线_香蜜湖;1号线/罗宝线_车公庙;1号线/罗宝线_竹子林;11号线/机场线_车...114.02681622.535326岗位职责:1.负责公司指定客户账户或配资账户的资金运作;2.分析行情,做出每日交易计划;3....
1615265{金融,大数据,商业,业务运营,数据挖掘}{金融,大数据,商业,业务运营,数据挖掘}{后海,南油,蛇口}3998833高级业务数据分析师2018-04-12 10:52:30B+轮,团队牛,培训多,假期长15k-25k5-10年...深圳卫盈智信科技有限公司产品/需求/项目类数据分析02号线/蛇口线海月2号线/蛇口线_海月;2号线/蛇口线_登良;2号线/蛇口线_后海;11号线/机场线_后海113.9421222.50907职责描述:1负责指定业务的数据分析工作,梳理并持续优化业务流程,为业务运营提升提供决策分析和...
176138{游戏,广告营销,专员,运营,广告投放}{游戏,广告营销,专员,运营,广告投放}{石牌,岗顶,龙口}4090039数据分析师(市场方向)2018-04-12 15:18:31餐饮补贴,六险一金,团队氛围好,成长速度快6k-8k1-3年...广州易幻网络科技有限公司产品/需求/项目类数据分析03号线石牌桥1号线_体育中心;3号线_石牌桥;3号线_岗顶;3号线_华师113.3395523.1367岗位职责:1、学习广告投放、市场运营规律,将游戏产品在Facebook和GoogleAdWo...
186678{金融,大数据,数据挖掘,数据仓库,SPSS}{金融,大数据,数据挖掘,数据仓库,SPSS}{五道口,清华大学,中关村}4007760数据分析师2018-04-12 15:16:4714薪,每月补助,团队年轻,节假双休16k-25k3-5年...人人贷商务顾问(北京)有限公司产品/需求/项目类数据分析015号线五道口4号线大兴线_北京大学东门;13号线_五道口;15号线_清华东路西口116.33077839.994714工作职责:1.深度学习了解业务,参与数据仓库构建,数据产品开发等。2.建立完善数据分析体系,...
19151036{数据挖掘}{}{鲁班路}3358144数据分析2018-04-12 15:15:44发展空间,弹性工作10k-20k1-3年...鼎必程(上海)信息技术有限公司产品/需求/项目类数据分析07号线长清路7号线_长清路;7号线_后滩;13号线_世博大道121.47521531.183898工作内容:1)负责海量数据监控和分析,结合数据产品或者业务的规划,设计主题和策略,从数据层面...
2057400{初级}{}{红星路,东门大桥,春熙路}2610548数据分析师-Data Analyst2018-04-12 15:06:55带薪年假,通讯补贴,五险一金5k-7k不限...昊盈信息技术(成都)有限公司产品/需求/项目类数据分析04号线锦江宾馆1号线_锦江宾馆;1号线_天府广场;2号线_天府广场;2号线_春熙路;2号线_东门大桥;3号...104.0802830.65452986Description:●Involvedincompanydatawarehouseset...
21169524{电商,大数据,BI,数据挖掘}{电商,大数据,BI,数据挖掘}None4316043数据分析工程师【跨境电商】2018-04-12 14:05:38数据分析,爬虫工程师,BI10k-20k1-3年...深圳市萨拉摩尔电子商务有限公司产品/需求/项目类数据分析0NoneNoneNone114.059222.611772【职位描述】1、参与电商业务数据清洗、模型探索、算法构建;2、通过专题研究,对业务数据和财务...
2256521{游戏,广告营销,数据挖掘,SPSS}{游戏,广告营销,数据挖掘,SPSS}{科技园,南山医院,大冲}3646985数据分析经理2018-04-12 15:02:234A公司,平台好,晋升快9k-13k3-5年...北京思恩客广告有限公司开发/测试/运维类数据开发01号线/罗宝线深大1号线/罗宝线_高新园;1号线/罗宝线_深大113.944622.548341工作职责:a、基于用户互联网行为特征和游戏互联网数据,为客户提供数据分析服务;b、负责业务数...
23280497{股票,证券,期货,操盘手,经纪人}{股票,证券,期货,操盘手,经纪人}{车公庙,香蜜湖,下沙,车公庙,香蜜湖,下沙,车公庙,香蜜湖,下沙}3806048金融外汇数据分析师2018-04-12 14:54:15绩效奖金,带薪年假,弹性工作,美女多多6k-8k不限...深圳市名瑞德国际金融服务有限公司金融类证券/期货07号线农林1号线/罗宝线_香蜜湖;1号线/罗宝线_车公庙;1号线/罗宝线_竹子林;11号线/机场线_车...114.02681622.535326【申请条件】希望您是专科、本科毕业生,最好是金融专业。希望您是一位有眼光及远大志向,致力于在...
24340169{金融,中级,算法,数据挖掘,大数据}{金融,中级,算法,数据挖掘,大数据}{江南}4273040数据分析师2018-04-12 14:52:40五险一金,员工旅游,年终奖,免费班车15k-30k3-5年...信雅达系统工程股份有限公司开发/测试/运维类数据开发0NoneNoneNone120.16113930.185305岗位职责:1、负责运用大数据金融风控模型的建立、优化和完善;2、负责金融海量数据的分析和挖掘...
25312419{}{}None4185934数据分析师2018-04-12 14:52:04从0-120k-40k3-5年...北京吾好科技发展有限公司运营/编辑/客服运营0NoneNoneNone0.00.0岗位职责:·深入了解互联网业务,建立基于业务场景的数据分析需求,解决各类数据分析问题;·对业...
2634683{大数据,SQL}{}None4373739BI数据分析师2018-04-12 14:50:43数据大牛,发展空间,福利丰富10k-13k1-3年...上海秦苍信息科技有限公司产品/需求/项目类数据分析04号线塘桥2号线_上海科技馆;2号线_世纪大道;4号线_世纪大道;4号线_浦电路(4号线);4号线_蓝...121.53134931.217794工作内容:1.负责日常的数据运营分析,整理需求,协助业务分析问题;2.数据的梳理与清洗,ET...
27109625{金融,业务运营,大数据,数据挖掘,商业分析}{金融,业务运营,大数据,数据挖掘,商业分析}{张江}4367503数据分析师2018-04-12 13:53:52移动金融,未来金矿,精英团队,助力成长10k-15k1-3年...上海数禾信息科技有限公司产品/需求/项目类数据分析02号线张江高科2号线_金科路;2号线_张江高科121.60093331.206393职位描述:数禾科技,上市公司分众传媒(股票代码002027)旗下互联网金融子公司,团队成员来...
2831505{大数据,SPSS,sas}{}None3507244数据分析2018-04-12 14:49:53周末双休,五险一金,年终奖8k-12k3-5年...深圳市轻松行商旅网有限公司产品/需求/项目类数据分析07号线车公庙1号线/罗宝线_香蜜湖;1号线/罗宝线_车公庙;11号线/机场线_车公庙;7号线_车公庙;7...114.03153722.534054岗位职责:1)负责业务数据收集整理,对多种数据源的数据进行挖掘和深度分析;2)根据需求,完成...
293422{大数据,数据挖掘,SPSS}{}{长阳路,周家嘴路,平凉路}4246467数据分析师2018-04-12 14:49:37免费三餐,弹性工作,硅谷氛围,带薪年假6k-8k1-3年...上海流利说信息技术有限公司开发/测试/运维类数据开发08号线黄兴路8号线_黄兴路;12号线_江浦公园;12号线_宁国路;12号线_隆昌路121.53350131.271454数据分析师工作职责:1.日常数据监控以及数据预警,异常数据解读挖掘,并与业务同事沟通解决方案...
..................................................................
308350702{大数据,数据挖掘,数据管理}{}None4282351高级数据分析师(BI业务方向)2018-03-19 12:16:22晋升空间大,团队牛人多,六险一金,15薪15k-30k3-5年...北京三快在线科技有限公司开发/测试/运维类数据开发0NoneNoneNone0.00.0岗位职责:数据报表维护:按时准确更新手工周期性报告(日,周,月等);运维已有线上报表(排故和...
308450702{统计,数据管理}{}None4282383高级销售数据分析师(绩效方向)2018-03-19 12:16:19晋升空间大,团队牛人多,六险一金,15薪15k-30k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析0NoneNoneNone0.00.0岗位职责:1、按时、准确的提供各指标的日数据报表,针对相关联报表进行有效整合;2、针对每日追...
308550702{旅游,商业,SQL,SPSS,sas}{旅游,商业,SQL,SPSS,sas}{望京,望京,来广营,来广营}3167634高级数据分析师2018-03-19 11:20:47平台好,发展空间大20k-30k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4886440.007096工作职责:1.旅游业务日常系统数据经营分析,通过数据监控快速精准地发现问题,透过数据现象定位...
308650702{旅游,商业}{旅游,商业}{望京,望京,来广营,来广营}2766824数据分析-业务分析方向2018-03-19 11:20:46平台好,发展空间大15k-25k1-3年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4886440.007096工作内容1.负责旅游业务相关的数据及分析需求,推动该业务系统性分析框架的构建;2.通过数据监...
308750702{大数据,建模,SPSS}{}None4275374高星酒店业务数据分析师2018-03-16 14:32:32大平台,福利好,晋升空间15k-20k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析0NoneNoneNone0.00.0岗位职责:负责监控高星酒店业务的核心KPI完成情况,以及行业走势,并对数据进行分析,逐层剖...
308850702{旅游,资深}{旅游,资深}{望京,来广营,望京,来广营}2728418数据分析师2018-03-19 11:20:46互联网公司,大平台,待遇好15k-25k1-3年...北京三快在线科技有限公司开发/测试/运维类数据开发015号线望京东15号线_望京东116.4886440.007096工作内容1.负责旅游业务相关的数据及分析需求,推动该业务系统性分析框架的构建;2.通过数据监...
308950702{大数据,SPSS,sas}{}{望京,来广营,望京,来广营,望京,来广营}3900137高级数据分析师2018-03-19 11:20:44平台大,福利好,发展好20k-30k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4886440.007096工作职责:1.日常经营数据分析,通过数据分析快速精准发现问题,指导业务优化改进。2.对业务数...
309023291{业务运营,大数据,数据挖掘}{}None3973930数据分析师-美拍2018-03-19 09:30:15上市公司15k-30k1-3年...厦门美图之家科技有限公司产品/需求/项目类数据分析0NoneNoneNone39.897445116.331398岗位职责:1.整体负责美图公司旗下美拍产品的数据分析、挖掘工作。2.建设完整的数据体系,对产...
30912474{滴滴}{}None3777055数据分析高级专家(运营分析方向)(J171031012)2018-03-19 11:51:58广阔平台 诱惑福利 急速成长30k-60k5-10年...北京嘀嘀无限科技发展有限公司产品设计|需求分析[产品设计|需求分析]0NoneNoneNone40.0468340.04683岗位职责1.与业务方一起回答重要的商业问题和产品问题,高效的拆析业务当前的核心痛点和机会。2...
309213163{企业服务,BI}{企业服务,BI}{火车站,火车站,天目西路,天目西路,闸北公园,闸北公园}2547094商业数据分析2018-03-16 14:13:39互联网独角兽公司,海量数据,福利好12k-18k不限...浙江每日互动网络科技股份有限公司市场与销售数据分析04号线上海火车站1号线_新闸路;1号线_汉中路;1号线_上海火车站;3号线_上海火车站;4号线_上海火车站;...121.45536331.244656Responsibilities:·Innovate,experiment,prototyp...
309313163{旅游,大数据,数据挖掘}{旅游,大数据,数据挖掘}{古墩路,文一路,翠苑}3131711数据分析(实习生)2018-03-16 14:13:51互联网独角兽 发展平台佳4k-5k应届毕业生...浙江每日互动网络科技股份有限公司产品/需求/项目类数据分析12号线古翠路2号线_古翠路120.111570130.29005073个旅是个推旗下的旅游大数据分析平台。个推作为独立的全领域移动大数据服务商,在消息推送领域已深...
309413163{大数据,数据挖掘}{}{翠苑,文一路,古墩路}4017275数据分析2018-03-16 14:13:5016-18薪 互联网独角兽10k-15k1-3年...浙江每日互动网络科技股份有限公司产品/需求/项目类数据分析02号线古翠路2号线_古翠路120.1145597930.2903098职位描述:1、负责日常运营数据的收集和分析,通过数据反馈发现改进点(挖掘算法优化空间和流程优...
309513163{搜索,深度学习,人工智能,大数据,数据挖掘}{}{翠苑,文一路,古墩路}3694231数据分析师(校招岗位)2018-03-16 14:13:4616-18薪 带薪年假 五险一金 生日Party 餐补10k-15k应届毕业生...浙江每日互动网络科技股份有限公司开发/测试/运维类数据开发12号线古翠路2号线_古翠路120.1145597930.2903098数据分析师(校招岗位)岗位职责:1.通过海量数据的分析和挖掘,为业务运营、产品咨询提供决策支...
309613163{专家,软件开发,大数据,数据管理}{}{古墩路,文一路,翠苑}3116205数据分析师(咨询部)2018-03-16 14:13:4616-18薪,生日Par,餐补10k-15k不限...浙江每日互动网络科技股份有限公司开发/测试/运维类软件开发02号线古翠路2号线_古翠路120.111570130.29005073工作职责:1、整理将各部门的数据需求,并将其模板化;2、通过数据分析,发现营销策略的问题并提...
309713163{大数据,数据挖掘}{}{翠苑,文一路,古墩路}3863554数据分析师(校招岗位)2018-03-16 14:13:40扁平化管理,每月餐补10k-15k应届毕业生...浙江每日互动网络科技股份有限公司开发/测试/运维类数据开发12号线古翠路2号线_古翠路120.1145597930.29030981.通过海量数据的分析和挖掘,为业务运营、产品咨询提供决策支持、解决方案等。主要职责包括:1...
309813163{中级,数据挖掘}{}{翠苑,文一路,古墩路}4065238大数据分析工程师2018-03-16 14:13:3616-18薪 互联网独角兽 发展前景佳10k-15k1-3年...浙江每日互动网络科技股份有限公司开发/测试/运维类软件开发02号线古翠路2号线_古翠路120.1145597930.2903098岗位职责:-通过海量数据的分析和挖掘,为业务运营、产品咨询提供决策支持、解决方案等。主要职责...
309950702{数据挖掘,SPSS}{}{虹桥,中山公园,天山路,中山公园,天山路}3763178​数据分析高级主管(配送运营支持方向)2018-03-16 14:12:44平台大,氛围好,各种大牛,福利待遇优10k-20k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析0NoneNoneNone121.41796931.217836数据分析高级主管(配送运营支持方向)1、推动数据产品的建设(BI系统、报表、监控预警类产品)...
31002474{滴滴}{}None4270916高级数据分析师-EMC(J170525238)2018-03-15 17:57:12产品 核心业务20k-40k3-5年...北京嘀嘀无限科技发展有限公司产品设计|需求分析[产品设计|需求分析]0NoneNoneNone40.0468340.04683岗位职责1.深刻理解数据本质,挖掘不同业务场景下用户体验的数据洞察,驱动公司运营和产品核心策...
31012474{滴滴}{}None4266963数据分析师(J180312020)2018-03-15 12:02:19互联网独角兽 薪资福利好 五险一金 奖金20k-25k5-10年...北京嘀嘀无限科技发展有限公司数据分析[数据分析]0NoneNoneNone40.0468340.04683岗位职责1、梳理运营与推广的业务需求,制定数据报表;2、挖掘数据背后的规律、短板,为业务提供...
310250702{数据挖掘,BI}{}None4178412资深数据分析师2018-03-15 11:20:47大平台25k-35k5-10年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4877640.008007岗位职责:1、运用互联网思维开展日常经营分析工作。2、重点数据分析报告制作。3、数据报表以及...
310350702{电商,本地生活,SPSS,sas}{电商,本地生活,SPSS,sas}{望京,来广营}4178481资深数据分析师2018-03-15 11:20:46大平台25k-35k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4886440.007096岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策...
310450702{大数据,数据挖掘,SPSS}{}{望京,来广营}4228595资深数据分析师/高级数据分析师2018-03-15 11:20:45大平台25k-35k5-10年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4886440.007096岗位职责:1、运用互联网思维开展日常经营分析工作2、重点数据分析报告制作。3、数据报表以及数...
310550702{大数据,数据管理}{}{望京,来广营}4228628资深/高级数据分析师2018-03-15 11:20:44大平台20k-35k3-5年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.4886440.007096岗位职责:1、战区内日常数据分析工作,日月周报的制作。2、数据产品设计规划对接产品开发。3、...
310650702{高级,专员,数据挖掘,SPSS}{}{望京,来广营}3755288销售运营(数据分析)2018-03-15 10:42:20免费班车,员工福利,发展空间大,弹性工作10k-16k3-5年...北京三快在线科技有限公司运营/编辑/客服运营015号线望京东15号线_望京东116.4886440.007096岗位职责:1、深入理解公司运营方向和战略的基础,与业务团队密切合作,对业务运营数据进行深入思...
310750702{}{}{望京,来广营,望京,来广营}3433995数据分析2018-03-14 11:37:37五险一金,补充医疗,年终奖金,节日福利10k-15k1-3年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.48752840.006166岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策...
31082474{资深,高级,大数据,数据挖掘,SPSS}{}{西北旺,上地,马连洼}2940758数据分析师2018-03-14 11:29:57核心部门,15薪,技术驱动20k-40k3-5年...北京嘀嘀无限科技发展有限公司开发/测试/运维类数据开发0NoneNoneNone116.29014640.043454职位描述:1.基于业务逻辑,建立业务数据模型,为团队设计反映产品和运营状况的数据报表;2.持...
310950702{旅游,电商,本地生活,SPSS,sas}{旅游,电商,本地生活,SPSS,sas}{望京,来广营,望京,来广营}3604528数据分析2018-03-14 11:29:49免费班车,员工福利,发展空间大,大平台8k-12k1-3年...北京三快在线科技有限公司产品/需求/项目类数据分析015号线望京东15号线_望京东116.48752840.006166岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策...
311021218{}{}{兴隆,沙洲}2666997行业数据分析师(财务数据方向)2018-03-15 11:12:26团队氛围好,福利待遇好5k-10k不限...通联数据股份公司产品/需求/项目类数据分析0NoneNoneNone118.9934431.994276岗位职责:从行业研究角度,规划与收集投研需要的关键行业和公司数据。任职要求:本科学历有会计及...
311123291{大数据,BI,业务运营,行业研究}{}None4052663数据分析师(商业&行业&战略方向)2018-03-13 16:01:22十亿级用户,火箭式成长,海量数据,上市公司18k-36k3-5年...厦门美图之家科技有限公司产品/需求/项目类数据分析0NoneNoneNone39.897445116.331398职位描述1:负责美图公司整体商业&行业&战略方向的分析,为各业务线提供策略分析建议;2:从行...
311223291{}{}None3844460数据分析师-厦门2018-03-12 10:37:50海量数据,牛人团队10k-20k不限...厦门美图之家科技有限公司开发/测试/运维类后端开发0NoneNoneNone39.897445116.331398岗位职责:1.建立和完善产品的数据体系、如数据收集、数据模型、数据关键指标等;2.负责产品的...
\n", 3039 | "

3113 rows × 29 columns

\n", 3040 | "
" 3041 | ], 3042 | "text/plain": [ 3043 | " companyId positionLables industryLables \\\n", 3044 | "0 6138 {游戏,建模,数值} {游戏,建模,数值} \n", 3045 | "1 6678 {金融,SQL} {金融,SQL} \n", 3046 | "2 151036 {资深,大数据,数据挖掘} {} \n", 3047 | "3 4760 {大数据,数据挖掘} {} \n", 3048 | "4 68457 {助理,风控,质检} {} \n", 3049 | "5 210490 {金融,数据挖掘,大数据,数据库} {金融,数据挖掘,大数据,数据库} \n", 3050 | "6 51480 {资深,大数据,可视化,商业,业务运营} {} \n", 3051 | "7 57400 {} {} \n", 3052 | "8 117203 {} {} \n", 3053 | "9 183985 {} {} \n", 3054 | "10 67342 {算法,数据挖掘,大数据,建模} {} \n", 3055 | "11 31173 {信息安全,电商,广告营销,大数据,数据可视化} {信息安全,电商,广告营销,大数据,数据可视化} \n", 3056 | "12 146945 {算法,机器学习,深度学习,数据挖掘} {} \n", 3057 | "13 56521 {金融,游戏,广告营销,初级,数据挖掘} {金融,游戏,广告营销,初级,数据挖掘} \n", 3058 | "14 196263 {游戏,SPSS} {游戏,SPSS} \n", 3059 | "15 280497 {股票,证券,期货,外汇,操盘手,经纪人} {股票,证券,期货,外汇,操盘手,经纪人} \n", 3060 | "16 15265 {金融,大数据,商业,业务运营,数据挖掘} {金融,大数据,商业,业务运营,数据挖掘} \n", 3061 | "17 6138 {游戏,广告营销,专员,运营,广告投放} {游戏,广告营销,专员,运营,广告投放} \n", 3062 | "18 6678 {金融,大数据,数据挖掘,数据仓库,SPSS} {金融,大数据,数据挖掘,数据仓库,SPSS} \n", 3063 | "19 151036 {数据挖掘} {} \n", 3064 | "20 57400 {初级} {} \n", 3065 | "21 169524 {电商,大数据,BI,数据挖掘} {电商,大数据,BI,数据挖掘} \n", 3066 | "22 56521 {游戏,广告营销,数据挖掘,SPSS} {游戏,广告营销,数据挖掘,SPSS} \n", 3067 | "23 280497 {股票,证券,期货,操盘手,经纪人} {股票,证券,期货,操盘手,经纪人} \n", 3068 | "24 340169 {金融,中级,算法,数据挖掘,大数据} {金融,中级,算法,数据挖掘,大数据} \n", 3069 | "25 312419 {} {} \n", 3070 | "26 34683 {大数据,SQL} {} \n", 3071 | "27 109625 {金融,业务运营,大数据,数据挖掘,商业分析} {金融,业务运营,大数据,数据挖掘,商业分析} \n", 3072 | "28 31505 {大数据,SPSS,sas} {} \n", 3073 | "29 3422 {大数据,数据挖掘,SPSS} {} \n", 3074 | "... ... ... ... \n", 3075 | "3083 50702 {大数据,数据挖掘,数据管理} {} \n", 3076 | "3084 50702 {统计,数据管理} {} \n", 3077 | "3085 50702 {旅游,商业,SQL,SPSS,sas} {旅游,商业,SQL,SPSS,sas} \n", 3078 | "3086 50702 {旅游,商业} {旅游,商业} \n", 3079 | "3087 50702 {大数据,建模,SPSS} {} \n", 3080 | "3088 50702 {旅游,资深} {旅游,资深} \n", 3081 | "3089 50702 {大数据,SPSS,sas} {} \n", 3082 | "3090 23291 {业务运营,大数据,数据挖掘} {} \n", 3083 | "3091 2474 {滴滴} {} \n", 3084 | "3092 13163 {企业服务,BI} {企业服务,BI} \n", 3085 | "3093 13163 {旅游,大数据,数据挖掘} {旅游,大数据,数据挖掘} \n", 3086 | "3094 13163 {大数据,数据挖掘} {} \n", 3087 | "3095 13163 {搜索,深度学习,人工智能,大数据,数据挖掘} {} \n", 3088 | "3096 13163 {专家,软件开发,大数据,数据管理} {} \n", 3089 | "3097 13163 {大数据,数据挖掘} {} \n", 3090 | "3098 13163 {中级,数据挖掘} {} \n", 3091 | "3099 50702 {数据挖掘,SPSS} {} \n", 3092 | "3100 2474 {滴滴} {} \n", 3093 | "3101 2474 {滴滴} {} \n", 3094 | "3102 50702 {数据挖掘,BI} {} \n", 3095 | "3103 50702 {电商,本地生活,SPSS,sas} {电商,本地生活,SPSS,sas} \n", 3096 | "3104 50702 {大数据,数据挖掘,SPSS} {} \n", 3097 | "3105 50702 {大数据,数据管理} {} \n", 3098 | "3106 50702 {高级,专员,数据挖掘,SPSS} {} \n", 3099 | "3107 50702 {} {} \n", 3100 | "3108 2474 {资深,高级,大数据,数据挖掘,SPSS} {} \n", 3101 | "3109 50702 {旅游,电商,本地生活,SPSS,sas} {旅游,电商,本地生活,SPSS,sas} \n", 3102 | "3110 21218 {} {} \n", 3103 | "3111 23291 {大数据,BI,业务运营,行业研究} {} \n", 3104 | "3112 23291 {} {} \n", 3105 | "\n", 3106 | " businessZones positionId \\\n", 3107 | "0 {石牌,岗顶,龙口} 4362228 \n", 3108 | "1 None 4321741 \n", 3109 | "2 {鲁班路,五里桥,鲁班路,五里桥,鲁班路,五里桥} 3928874 \n", 3110 | "3 None 4118616 \n", 3111 | "4 {西溪,古墩路,古墩路} 4264098 \n", 3112 | "5 {科技园} 4055455 \n", 3113 | "6 {长征,金沙江路,曹杨} 3700674 \n", 3114 | "7 None 1845914 \n", 3115 | "8 None 2353221 \n", 3116 | "9 {西丽} 4287427 \n", 3117 | "10 None 4411559 \n", 3118 | "11 {国贸,呼家楼} 2744665 \n", 3119 | "12 None 2716397 \n", 3120 | "13 {科技园,科技园,大冲,大冲} 3586368 \n", 3121 | "14 {科技园} 4252645 \n", 3122 | "15 {香蜜湖,车公庙,下沙,车公庙,车公庙,香蜜湖,香蜜湖,下沙,下沙} 3800615 \n", 3123 | "16 {后海,南油,蛇口} 3998833 \n", 3124 | "17 {石牌,岗顶,龙口} 4090039 \n", 3125 | "18 {五道口,清华大学,中关村} 4007760 \n", 3126 | "19 {鲁班路} 3358144 \n", 3127 | "20 {红星路,东门大桥,春熙路} 2610548 \n", 3128 | "21 None 4316043 \n", 3129 | "22 {科技园,南山医院,大冲} 3646985 \n", 3130 | "23 {车公庙,香蜜湖,下沙,车公庙,香蜜湖,下沙,车公庙,香蜜湖,下沙} 3806048 \n", 3131 | "24 {江南} 4273040 \n", 3132 | "25 None 4185934 \n", 3133 | "26 None 4373739 \n", 3134 | "27 {张江} 4367503 \n", 3135 | "28 None 3507244 \n", 3136 | "29 {长阳路,周家嘴路,平凉路} 4246467 \n", 3137 | "... ... ... \n", 3138 | "3083 None 4282351 \n", 3139 | "3084 None 4282383 \n", 3140 | "3085 {望京,望京,来广营,来广营} 3167634 \n", 3141 | "3086 {望京,望京,来广营,来广营} 2766824 \n", 3142 | "3087 None 4275374 \n", 3143 | "3088 {望京,来广营,望京,来广营} 2728418 \n", 3144 | "3089 {望京,来广营,望京,来广营,望京,来广营} 3900137 \n", 3145 | "3090 None 3973930 \n", 3146 | "3091 None 3777055 \n", 3147 | "3092 {火车站,火车站,天目西路,天目西路,闸北公园,闸北公园} 2547094 \n", 3148 | "3093 {古墩路,文一路,翠苑} 3131711 \n", 3149 | "3094 {翠苑,文一路,古墩路} 4017275 \n", 3150 | "3095 {翠苑,文一路,古墩路} 3694231 \n", 3151 | "3096 {古墩路,文一路,翠苑} 3116205 \n", 3152 | "3097 {翠苑,文一路,古墩路} 3863554 \n", 3153 | "3098 {翠苑,文一路,古墩路} 4065238 \n", 3154 | "3099 {虹桥,中山公园,天山路,中山公园,天山路} 3763178 \n", 3155 | "3100 None 4270916 \n", 3156 | "3101 None 4266963 \n", 3157 | "3102 None 4178412 \n", 3158 | "3103 {望京,来广营} 4178481 \n", 3159 | "3104 {望京,来广营} 4228595 \n", 3160 | "3105 {望京,来广营} 4228628 \n", 3161 | "3106 {望京,来广营} 3755288 \n", 3162 | "3107 {望京,来广营,望京,来广营} 3433995 \n", 3163 | "3108 {西北旺,上地,马连洼} 2940758 \n", 3164 | "3109 {望京,来广营,望京,来广营} 3604528 \n", 3165 | "3110 {兴隆,沙洲} 2666997 \n", 3166 | "3111 None 4052663 \n", 3167 | "3112 None 3844460 \n", 3168 | "\n", 3169 | " positionName createTime \\\n", 3170 | "0 数据分析专员 2018-04-12 15:18:31 \n", 3171 | "1 数据分析师 2018-04-12 15:16:47 \n", 3172 | "2 数据分析师 2018-04-12 15:15:44 \n", 3173 | "3 资深数据分析师(数据产品) 2018-04-12 15:15:07 \n", 3174 | "4 数据分析师(儿科方向) 2018-04-12 15:12:52 \n", 3175 | "5 BI高级数据分析师 2018-04-12 15:09:51 \n", 3176 | "6 业务分析专家、数据分析专家 2018-04-12 15:07:29 \n", 3177 | "7 Data Analyst 数据分析师 2018-04-12 15:06:55 \n", 3178 | "8 数据分析员 2018-04-12 15:06:52 \n", 3179 | "9 数据分析专员/助理 2018-04-12 14:35:37 \n", 3180 | "10 数据分析师 2018-04-12 15:04:51 \n", 3181 | "11 BI 高级数据产品经理 高级数据分析产品经理 2018-04-12 15:03:27 \n", 3182 | "12 大数据分析师 2018-04-12 15:02:54 \n", 3183 | "13 数据分析师(数学相关专业应届) 2018-04-12 15:02:24 \n", 3184 | "14 游戏数据分析师 2018-04-12 15:00:11 \n", 3185 | "15 金融(数据分析师)助理 2018-04-12 14:54:15 \n", 3186 | "16 高级业务数据分析师 2018-04-12 10:52:30 \n", 3187 | "17 数据分析师(市场方向) 2018-04-12 15:18:31 \n", 3188 | "18 数据分析师 2018-04-12 15:16:47 \n", 3189 | "19 数据分析 2018-04-12 15:15:44 \n", 3190 | "20 数据分析师-Data Analyst 2018-04-12 15:06:55 \n", 3191 | "21 数据分析工程师【跨境电商】 2018-04-12 14:05:38 \n", 3192 | "22 数据分析经理 2018-04-12 15:02:23 \n", 3193 | "23 金融外汇数据分析师 2018-04-12 14:54:15 \n", 3194 | "24 数据分析师 2018-04-12 14:52:40 \n", 3195 | "25 数据分析师 2018-04-12 14:52:04 \n", 3196 | "26 BI数据分析师 2018-04-12 14:50:43 \n", 3197 | "27 数据分析师 2018-04-12 13:53:52 \n", 3198 | "28 数据分析 2018-04-12 14:49:53 \n", 3199 | "29 数据分析师 2018-04-12 14:49:37 \n", 3200 | "... ... ... \n", 3201 | "3083 高级数据分析师(BI业务方向) 2018-03-19 12:16:22 \n", 3202 | "3084 高级销售数据分析师(绩效方向) 2018-03-19 12:16:19 \n", 3203 | "3085 高级数据分析师 2018-03-19 11:20:47 \n", 3204 | "3086 数据分析-业务分析方向 2018-03-19 11:20:46 \n", 3205 | "3087 高星酒店业务数据分析师 2018-03-16 14:32:32 \n", 3206 | "3088 数据分析师 2018-03-19 11:20:46 \n", 3207 | "3089 高级数据分析师 2018-03-19 11:20:44 \n", 3208 | "3090 数据分析师-美拍 2018-03-19 09:30:15 \n", 3209 | "3091 数据分析高级专家(运营分析方向)(J171031012) 2018-03-19 11:51:58 \n", 3210 | "3092 商业数据分析 2018-03-16 14:13:39 \n", 3211 | "3093 数据分析(实习生) 2018-03-16 14:13:51 \n", 3212 | "3094 数据分析 2018-03-16 14:13:50 \n", 3213 | "3095 数据分析师(校招岗位) 2018-03-16 14:13:46 \n", 3214 | "3096 数据分析师(咨询部) 2018-03-16 14:13:46 \n", 3215 | "3097 数据分析师(校招岗位) 2018-03-16 14:13:40 \n", 3216 | "3098 大数据分析工程师 2018-03-16 14:13:36 \n", 3217 | "3099 ​数据分析高级主管(配送运营支持方向) 2018-03-16 14:12:44 \n", 3218 | "3100 高级数据分析师-EMC(J170525238) 2018-03-15 17:57:12 \n", 3219 | "3101 数据分析师(J180312020) 2018-03-15 12:02:19 \n", 3220 | "3102 资深数据分析师 2018-03-15 11:20:47 \n", 3221 | "3103 资深数据分析师 2018-03-15 11:20:46 \n", 3222 | "3104 资深数据分析师/高级数据分析师 2018-03-15 11:20:45 \n", 3223 | "3105 资深/高级数据分析师 2018-03-15 11:20:44 \n", 3224 | "3106 销售运营(数据分析) 2018-03-15 10:42:20 \n", 3225 | "3107 数据分析 2018-03-14 11:37:37 \n", 3226 | "3108 数据分析师 2018-03-14 11:29:57 \n", 3227 | "3109 数据分析 2018-03-14 11:29:49 \n", 3228 | "3110 行业数据分析师(财务数据方向) 2018-03-15 11:12:26 \n", 3229 | "3111 数据分析师(商业&行业&战略方向) 2018-03-13 16:01:22 \n", 3230 | "3112 数据分析师-厦门 2018-03-12 10:37:50 \n", 3231 | "\n", 3232 | " positionAdvantage salary workYear \\\n", 3233 | "0 氛围好,牛人多,年终奖 5k-8k 1-3年 \n", 3234 | "1 14薪,每月补助,年轻团队,升职空间 10k-16k 1-3年 \n", 3235 | "2 发展空间大,弹性工作,免费班车,无限零食 10k-20k 1-3年 \n", 3236 | "3 前景好,福利优 25k-50k 5-10年 \n", 3237 | "4 包三餐,牛人多,工作氛围佳,有前景 1k-2k 不限 \n", 3238 | "5 年轻团队,公司福利好,发展平台大 12k-20k 3-5年 \n", 3239 | "6 平台好、发展空间大、扁平管理、弹性工作 20k-40k 3-5年 \n", 3240 | "7 周末双休 五险一金 带薪年假 年终多薪 8k-15k 1-3年 \n", 3241 | "8 十三薪;五险一金;出国机会;弹性工作 4k-8k 1-3年 \n", 3242 | "9 五险一金,节日福利,晋升空间,扁平管理 4k-8k 1-3年 \n", 3243 | "10 股权期权,六险一金,同事棒,等你来 13k-25k 1-3年 \n", 3244 | "11 五险一金,海外年会,扁平管理,期权 15k-30k 3-5年 \n", 3245 | "12 大数据分析,数据挖掘,机器学习,人工智能 10k-20k 3-5年 \n", 3246 | "13 大平台 5k-7k 应届毕业生 \n", 3247 | "14 福利好,待遇棒,补贴多,老板好 5k-10k 1-3年 \n", 3248 | "15 绩效奖金,带薪年假,美女多多,员工旅游 6k-8k 不限 \n", 3249 | "16 B+轮,团队牛,培训多,假期长 15k-25k 5-10年 \n", 3250 | "17 餐饮补贴,六险一金,团队氛围好,成长速度快 6k-8k 1-3年 \n", 3251 | "18 14薪,每月补助,团队年轻,节假双休 16k-25k 3-5年 \n", 3252 | "19 发展空间,弹性工作 10k-20k 1-3年 \n", 3253 | "20 带薪年假,通讯补贴,五险一金 5k-7k 不限 \n", 3254 | "21 数据分析,爬虫工程师,BI 10k-20k 1-3年 \n", 3255 | "22 4A公司,平台好,晋升快 9k-13k 3-5年 \n", 3256 | "23 绩效奖金,带薪年假,弹性工作,美女多多 6k-8k 不限 \n", 3257 | "24 五险一金,员工旅游,年终奖,免费班车 15k-30k 3-5年 \n", 3258 | "25 从0-1 20k-40k 3-5年 \n", 3259 | "26 数据大牛,发展空间,福利丰富 10k-13k 1-3年 \n", 3260 | "27 移动金融,未来金矿,精英团队,助力成长 10k-15k 1-3年 \n", 3261 | "28 周末双休,五险一金,年终奖 8k-12k 3-5年 \n", 3262 | "29 免费三餐,弹性工作,硅谷氛围,带薪年假 6k-8k 1-3年 \n", 3263 | "... ... ... ... \n", 3264 | "3083 晋升空间大,团队牛人多,六险一金,15薪 15k-30k 3-5年 \n", 3265 | "3084 晋升空间大,团队牛人多,六险一金,15薪 15k-30k 3-5年 \n", 3266 | "3085 平台好,发展空间大 20k-30k 3-5年 \n", 3267 | "3086 平台好,发展空间大 15k-25k 1-3年 \n", 3268 | "3087 大平台,福利好,晋升空间 15k-20k 3-5年 \n", 3269 | "3088 互联网公司,大平台,待遇好 15k-25k 1-3年 \n", 3270 | "3089 平台大,福利好,发展好 20k-30k 3-5年 \n", 3271 | "3090 上市公司 15k-30k 1-3年 \n", 3272 | "3091 广阔平台 诱惑福利 急速成长 30k-60k 5-10年 \n", 3273 | "3092 互联网独角兽公司,海量数据,福利好 12k-18k 不限 \n", 3274 | "3093 互联网独角兽 发展平台佳 4k-5k 应届毕业生 \n", 3275 | "3094 16-18薪 互联网独角兽 10k-15k 1-3年 \n", 3276 | "3095 16-18薪 带薪年假 五险一金 生日Party 餐补 10k-15k 应届毕业生 \n", 3277 | "3096 16-18薪,生日Par,餐补 10k-15k 不限 \n", 3278 | "3097 扁平化管理,每月餐补 10k-15k 应届毕业生 \n", 3279 | "3098 16-18薪 互联网独角兽 发展前景佳 10k-15k 1-3年 \n", 3280 | "3099 平台大,氛围好,各种大牛,福利待遇优 10k-20k 3-5年 \n", 3281 | "3100 产品 核心业务 20k-40k 3-5年 \n", 3282 | "3101 互联网独角兽 薪资福利好 五险一金 奖金 20k-25k 5-10年 \n", 3283 | "3102 大平台 25k-35k 5-10年 \n", 3284 | "3103 大平台 25k-35k 3-5年 \n", 3285 | "3104 大平台 25k-35k 5-10年 \n", 3286 | "3105 大平台 20k-35k 3-5年 \n", 3287 | "3106 免费班车,员工福利,发展空间大,弹性工作 10k-16k 3-5年 \n", 3288 | "3107 五险一金,补充医疗,年终奖金,节日福利 10k-15k 1-3年 \n", 3289 | "3108 核心部门,15薪,技术驱动 20k-40k 3-5年 \n", 3290 | "3109 免费班车,员工福利,发展空间大,大平台 8k-12k 1-3年 \n", 3291 | "3110 团队氛围好,福利待遇好 5k-10k 不限 \n", 3292 | "3111 十亿级用户,火箭式成长,海量数据,上市公司 18k-36k 3-5年 \n", 3293 | "3112 海量数据,牛人团队 10k-20k 不限 \n", 3294 | "\n", 3295 | " ... companyFullName \\\n", 3296 | "0 ... 广州易幻网络科技有限公司 \n", 3297 | "1 ... 人人贷商务顾问(北京)有限公司 \n", 3298 | "2 ... 鼎必程(上海)信息技术有限公司 \n", 3299 | "3 ... 唯品会(中国)有限公司 \n", 3300 | "4 ... 上海依图信息技术有限公司 \n", 3301 | "5 ... 深圳汇商通盈科技有限公司 \n", 3302 | "6 ... 拉扎斯网络科技(上海)有限公司 \n", 3303 | "7 ... 昊盈信息技术(成都)有限公司 \n", 3304 | "8 ... 西安亚购信息科技有限公司 \n", 3305 | "9 ... 深圳市华商生物科技有限公司 \n", 3306 | "10 ... 浙江互联网金融资产交易中心股份有限公司 \n", 3307 | "11 ... 悠易互通(北京)广告有限公司 \n", 3308 | "12 ... 武汉智领云科技有限公司 \n", 3309 | "13 ... 北京思恩客广告有限公司 \n", 3310 | "14 ... 深圳市叁亿广告有限公司 \n", 3311 | "15 ... 深圳市名瑞德国际金融服务有限公司 \n", 3312 | "16 ... 深圳卫盈智信科技有限公司 \n", 3313 | "17 ... 广州易幻网络科技有限公司 \n", 3314 | "18 ... 人人贷商务顾问(北京)有限公司 \n", 3315 | "19 ... 鼎必程(上海)信息技术有限公司 \n", 3316 | "20 ... 昊盈信息技术(成都)有限公司 \n", 3317 | "21 ... 深圳市萨拉摩尔电子商务有限公司 \n", 3318 | "22 ... 北京思恩客广告有限公司 \n", 3319 | "23 ... 深圳市名瑞德国际金融服务有限公司 \n", 3320 | "24 ... 信雅达系统工程股份有限公司 \n", 3321 | "25 ... 北京吾好科技发展有限公司 \n", 3322 | "26 ... 上海秦苍信息科技有限公司 \n", 3323 | "27 ... 上海数禾信息科技有限公司 \n", 3324 | "28 ... 深圳市轻松行商旅网有限公司 \n", 3325 | "29 ... 上海流利说信息技术有限公司 \n", 3326 | "... ... ... \n", 3327 | "3083 ... 北京三快在线科技有限公司 \n", 3328 | "3084 ... 北京三快在线科技有限公司 \n", 3329 | "3085 ... 北京三快在线科技有限公司 \n", 3330 | "3086 ... 北京三快在线科技有限公司 \n", 3331 | "3087 ... 北京三快在线科技有限公司 \n", 3332 | "3088 ... 北京三快在线科技有限公司 \n", 3333 | "3089 ... 北京三快在线科技有限公司 \n", 3334 | "3090 ... 厦门美图之家科技有限公司 \n", 3335 | "3091 ... 北京嘀嘀无限科技发展有限公司 \n", 3336 | "3092 ... 浙江每日互动网络科技股份有限公司 \n", 3337 | "3093 ... 浙江每日互动网络科技股份有限公司 \n", 3338 | "3094 ... 浙江每日互动网络科技股份有限公司 \n", 3339 | "3095 ... 浙江每日互动网络科技股份有限公司 \n", 3340 | "3096 ... 浙江每日互动网络科技股份有限公司 \n", 3341 | "3097 ... 浙江每日互动网络科技股份有限公司 \n", 3342 | "3098 ... 浙江每日互动网络科技股份有限公司 \n", 3343 | "3099 ... 北京三快在线科技有限公司 \n", 3344 | "3100 ... 北京嘀嘀无限科技发展有限公司 \n", 3345 | "3101 ... 北京嘀嘀无限科技发展有限公司 \n", 3346 | "3102 ... 北京三快在线科技有限公司 \n", 3347 | "3103 ... 北京三快在线科技有限公司 \n", 3348 | "3104 ... 北京三快在线科技有限公司 \n", 3349 | "3105 ... 北京三快在线科技有限公司 \n", 3350 | "3106 ... 北京三快在线科技有限公司 \n", 3351 | "3107 ... 北京三快在线科技有限公司 \n", 3352 | "3108 ... 北京嘀嘀无限科技发展有限公司 \n", 3353 | "3109 ... 北京三快在线科技有限公司 \n", 3354 | "3110 ... 通联数据股份公司 \n", 3355 | "3111 ... 厦门美图之家科技有限公司 \n", 3356 | "3112 ... 厦门美图之家科技有限公司 \n", 3357 | "\n", 3358 | " firstType secondType isSchoolJob subwayline stationname \\\n", 3359 | "0 产品/需求/项目类 数据分析 0 3号线 石牌桥 \n", 3360 | "1 开发/测试/运维类 数据开发 0 15号线 五道口 \n", 3361 | "2 产品/需求/项目类 数据分析 0 7号线 长清路 \n", 3362 | "3 产品/需求/项目类 数据分析 0 None None \n", 3363 | "4 产品/需求/项目类 数据分析 0 None None \n", 3364 | "5 开发/测试/运维类 数据开发 0 2号线/蛇口线 深大 \n", 3365 | "6 产品/需求/项目类 数据分析 0 13号线 祁连山南路 \n", 3366 | "7 市场与销售 市场/营销 0 None None \n", 3367 | "8 产品 产品经理 0 None None \n", 3368 | "9 产品/需求/项目类 数据分析 0 5号线/环中线 留仙洞 \n", 3369 | "10 开发/测试/运维类 数据开发 0 None None \n", 3370 | "11 产品/需求/项目类 产品设计/需求分析 0 10号线 国贸 \n", 3371 | "12 开发/测试/运维类 数据开发 0 None None \n", 3372 | "13 开发/测试/运维类 数据开发 1 1号线/罗宝线 深大 \n", 3373 | "14 产品/需求/项目类 数据分析 0 1号线/罗宝线 深大 \n", 3374 | "15 金融类 证券/期货 0 7号线 农林 \n", 3375 | "16 产品/需求/项目类 数据分析 0 2号线/蛇口线 海月 \n", 3376 | "17 产品/需求/项目类 数据分析 0 3号线 石牌桥 \n", 3377 | "18 产品/需求/项目类 数据分析 0 15号线 五道口 \n", 3378 | "19 产品/需求/项目类 数据分析 0 7号线 长清路 \n", 3379 | "20 产品/需求/项目类 数据分析 0 4号线 锦江宾馆 \n", 3380 | "21 产品/需求/项目类 数据分析 0 None None \n", 3381 | "22 开发/测试/运维类 数据开发 0 1号线/罗宝线 深大 \n", 3382 | "23 金融类 证券/期货 0 7号线 农林 \n", 3383 | "24 开发/测试/运维类 数据开发 0 None None \n", 3384 | "25 运营/编辑/客服 运营 0 None None \n", 3385 | "26 产品/需求/项目类 数据分析 0 4号线 塘桥 \n", 3386 | "27 产品/需求/项目类 数据分析 0 2号线 张江高科 \n", 3387 | "28 产品/需求/项目类 数据分析 0 7号线 车公庙 \n", 3388 | "29 开发/测试/运维类 数据开发 0 8号线 黄兴路 \n", 3389 | "... ... ... ... ... ... \n", 3390 | "3083 开发/测试/运维类 数据开发 0 None None \n", 3391 | "3084 产品/需求/项目类 数据分析 0 None None \n", 3392 | "3085 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3393 | "3086 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3394 | "3087 产品/需求/项目类 数据分析 0 None None \n", 3395 | "3088 开发/测试/运维类 数据开发 0 15号线 望京东 \n", 3396 | "3089 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3397 | "3090 产品/需求/项目类 数据分析 0 None None \n", 3398 | "3091 产品设计|需求分析 [产品设计|需求分析] 0 None None \n", 3399 | "3092 市场与销售 数据分析 0 4号线 上海火车站 \n", 3400 | "3093 产品/需求/项目类 数据分析 1 2号线 古翠路 \n", 3401 | "3094 产品/需求/项目类 数据分析 0 2号线 古翠路 \n", 3402 | "3095 开发/测试/运维类 数据开发 1 2号线 古翠路 \n", 3403 | "3096 开发/测试/运维类 软件开发 0 2号线 古翠路 \n", 3404 | "3097 开发/测试/运维类 数据开发 1 2号线 古翠路 \n", 3405 | "3098 开发/测试/运维类 软件开发 0 2号线 古翠路 \n", 3406 | "3099 产品/需求/项目类 数据分析 0 None None \n", 3407 | "3100 产品设计|需求分析 [产品设计|需求分析] 0 None None \n", 3408 | "3101 数据分析 [数据分析] 0 None None \n", 3409 | "3102 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3410 | "3103 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3411 | "3104 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3412 | "3105 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3413 | "3106 运营/编辑/客服 运营 0 15号线 望京东 \n", 3414 | "3107 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3415 | "3108 开发/测试/运维类 数据开发 0 None None \n", 3416 | "3109 产品/需求/项目类 数据分析 0 15号线 望京东 \n", 3417 | "3110 产品/需求/项目类 数据分析 0 None None \n", 3418 | "3111 产品/需求/项目类 数据分析 0 None None \n", 3419 | "3112 开发/测试/运维类 后端开发 0 None None \n", 3420 | "\n", 3421 | " linestaion longitude \\\n", 3422 | "0 1号线_体育中心;3号线_石牌桥;3号线_岗顶;3号线_华师 113.33955 \n", 3423 | "1 4号线大兴线_北京大学东门;13号线_五道口;15号线_清华东路西口 116.330778 \n", 3424 | "2 7号线_长清路;7号线_后滩;13号线_世博大道 121.475215 \n", 3425 | "3 None 0.0 \n", 3426 | "4 None 120.100748 \n", 3427 | "5 2号线/蛇口线_科苑;1号线/罗宝线_高新园;1号线/罗宝线_深大 113.952429 \n", 3428 | "6 13号线_祁连山南路;13号线_真北路;13号线_大渡河路 121.379945 \n", 3429 | "7 None None \n", 3430 | "8 None 108.877037 \n", 3431 | "9 5号线/环中线_留仙洞 113.939763 \n", 3432 | "10 None 120.165667 \n", 3433 | "11 1号线_永安里;1号线_国贸;6号线_呼家楼;6号线_东大桥;10号线_呼家楼;10号线_金... 116.459298 \n", 3434 | "12 None 114.410564 \n", 3435 | "13 1号线/罗宝线_高新园;1号线/罗宝线_深大 113.943551 \n", 3436 | "14 1号线/罗宝线_深大 113.937503 \n", 3437 | "15 1号线/罗宝线_香蜜湖;1号线/罗宝线_车公庙;1号线/罗宝线_竹子林;11号线/机场线_车... 114.026816 \n", 3438 | "16 2号线/蛇口线_海月;2号线/蛇口线_登良;2号线/蛇口线_后海;11号线/机场线_后海 113.94212 \n", 3439 | "17 1号线_体育中心;3号线_石牌桥;3号线_岗顶;3号线_华师 113.33955 \n", 3440 | "18 4号线大兴线_北京大学东门;13号线_五道口;15号线_清华东路西口 116.330778 \n", 3441 | "19 7号线_长清路;7号线_后滩;13号线_世博大道 121.475215 \n", 3442 | "20 1号线_锦江宾馆;1号线_天府广场;2号线_天府广场;2号线_春熙路;2号线_东门大桥;3号... 104.08028 \n", 3443 | "21 None 114.0592 \n", 3444 | "22 1号线/罗宝线_高新园;1号线/罗宝线_深大 113.9446 \n", 3445 | "23 1号线/罗宝线_香蜜湖;1号线/罗宝线_车公庙;1号线/罗宝线_竹子林;11号线/机场线_车... 114.026816 \n", 3446 | "24 None 120.161139 \n", 3447 | "25 None 0.0 \n", 3448 | "26 2号线_上海科技馆;2号线_世纪大道;4号线_世纪大道;4号线_浦电路(4号线);4号线_蓝... 121.531349 \n", 3449 | "27 2号线_金科路;2号线_张江高科 121.600933 \n", 3450 | "28 1号线/罗宝线_香蜜湖;1号线/罗宝线_车公庙;11号线/机场线_车公庙;7号线_车公庙;7... 114.031537 \n", 3451 | "29 8号线_黄兴路;12号线_江浦公园;12号线_宁国路;12号线_隆昌路 121.533501 \n", 3452 | "... ... ... \n", 3453 | "3083 None 0.0 \n", 3454 | "3084 None 0.0 \n", 3455 | "3085 15号线_望京东 116.48864 \n", 3456 | "3086 15号线_望京东 116.48864 \n", 3457 | "3087 None 0.0 \n", 3458 | "3088 15号线_望京东 116.48864 \n", 3459 | "3089 15号线_望京东 116.48864 \n", 3460 | "3090 None 39.897445 \n", 3461 | "3091 None 40.04683 \n", 3462 | "3092 1号线_新闸路;1号线_汉中路;1号线_上海火车站;3号线_上海火车站;4号线_上海火车站;... 121.455363 \n", 3463 | "3093 2号线_古翠路 120.1115701 \n", 3464 | "3094 2号线_古翠路 120.11455979 \n", 3465 | "3095 2号线_古翠路 120.11455979 \n", 3466 | "3096 2号线_古翠路 120.1115701 \n", 3467 | "3097 2号线_古翠路 120.11455979 \n", 3468 | "3098 2号线_古翠路 120.11455979 \n", 3469 | "3099 None 121.417969 \n", 3470 | "3100 None 40.04683 \n", 3471 | "3101 None 40.04683 \n", 3472 | "3102 15号线_望京东 116.48776 \n", 3473 | "3103 15号线_望京东 116.48864 \n", 3474 | "3104 15号线_望京东 116.48864 \n", 3475 | "3105 15号线_望京东 116.48864 \n", 3476 | "3106 15号线_望京东 116.48864 \n", 3477 | "3107 15号线_望京东 116.487528 \n", 3478 | "3108 None 116.290146 \n", 3479 | "3109 15号线_望京东 116.487528 \n", 3480 | "3110 None 118.99344 \n", 3481 | "3111 None 39.897445 \n", 3482 | "3112 None 39.897445 \n", 3483 | "\n", 3484 | " latitude content \n", 3485 | "0 23.1367 岗位职责1.挖掘及分析游戏项目数据2.管理及建立数据模型3.游戏用户行为分析4.定期编写相关... \n", 3486 | "1 39.994714 工作职责:1.建立相关统计模型,对用户交易数据进行挖掘,对公司战略决策、业务优化需求提供支撑... \n", 3487 | "2 31.183898 岗位职责:1)负责应收账款的基础数据分析,包括资产透析,可视化渲染以及最终分析报告;2)深入... \n", 3488 | "3 0.0 岗位职责:1、对业务部门进行数据需求支持,挖掘分析主题,开展数据分析工作,基于数据分析成果,... \n", 3489 | "4 30.270038 全职/实习/兼职皆可职责定义:•活跃度运营:调动标注人员的积极性,维护合作关系;面向标注人员... \n", 3490 | "5 22.537847 岗位职责:•与数据架构师紧密合作,用各种数据库、数据仓库、BI工具和必要的编程工具实现数据体... \n", 3491 | "6 31.231299 【岗位职责】1、负责外卖配送业务流程、产品规则模块分析等工作;2、建立业务运营分析体系,推动... \n", 3492 | "7 None PLEASEAPPLYTHROUGHTHISLINK:https://nexa-corp.w... \n", 3493 | "8 34.190449 职位描述:1、实时响应各业务部门的数据需求,并总结需求,制定报表及系统开发计划;2、负责网站... \n", 3494 | "9 22.567951 岗位职责:1、负责运营中心每日数据收集、审核、整理、分析、呈现工作;2、根据公司要求,将搜集... \n", 3495 | "10 30.185677 岗位职责:1.深入了解公司业务,为业务团队提供数据分析及大数据预测,对业务发展提供决策建议;... \n", 3496 | "11 39.918784 高级数据产品经理岗位描述:1.规划营销大数据平台软件产品(DMP),设计产品功能,协调开发资... \n", 3497 | "12 30.457982 智领云的云计算大数据平台研发团队聚集了硅谷和本土的优秀创业人才,团队成员中有的在硅谷著名科技... \n", 3498 | "13 22.548179 1、负责日常广告投放业务的数据分析工作;2、基于数据分析得到有价值信息,为业务优化提供策略和... \n", 3499 | "14 22.543597 岗位要求:1、数据逻辑清晰,数据敏感度高;计算机专业、统计学专业优先考虑,,MMO游戏或SL... \n", 3500 | "15 22.535326 岗位职责:1.负责公司指定客户账户或配资账户的资金运作;2.分析行情,做出每日交易计划;3.... \n", 3501 | "16 22.50907 职责描述:1负责指定业务的数据分析工作,梳理并持续优化业务流程,为业务运营提升提供决策分析和... \n", 3502 | "17 23.1367 岗位职责:1、学习广告投放、市场运营规律,将游戏产品在Facebook和GoogleAdWo... \n", 3503 | "18 39.994714 工作职责:1.深度学习了解业务,参与数据仓库构建,数据产品开发等。2.建立完善数据分析体系,... \n", 3504 | "19 31.183898 工作内容:1)负责海量数据监控和分析,结合数据产品或者业务的规划,设计主题和策略,从数据层面... \n", 3505 | "20 30.65452986 Description:●Involvedincompanydatawarehouseset... \n", 3506 | "21 22.611772 【职位描述】1、参与电商业务数据清洗、模型探索、算法构建;2、通过专题研究,对业务数据和财务... \n", 3507 | "22 22.548341 工作职责:a、基于用户互联网行为特征和游戏互联网数据,为客户提供数据分析服务;b、负责业务数... \n", 3508 | "23 22.535326 【申请条件】希望您是专科、本科毕业生,最好是金融专业。希望您是一位有眼光及远大志向,致力于在... \n", 3509 | "24 30.185305 岗位职责:1、负责运用大数据金融风控模型的建立、优化和完善;2、负责金融海量数据的分析和挖掘... \n", 3510 | "25 0.0 岗位职责:·深入了解互联网业务,建立基于业务场景的数据分析需求,解决各类数据分析问题;·对业... \n", 3511 | "26 31.217794 工作内容:1.负责日常的数据运营分析,整理需求,协助业务分析问题;2.数据的梳理与清洗,ET... \n", 3512 | "27 31.206393 职位描述:数禾科技,上市公司分众传媒(股票代码002027)旗下互联网金融子公司,团队成员来... \n", 3513 | "28 22.534054 岗位职责:1)负责业务数据收集整理,对多种数据源的数据进行挖掘和深度分析;2)根据需求,完成... \n", 3514 | "29 31.271454 数据分析师工作职责:1.日常数据监控以及数据预警,异常数据解读挖掘,并与业务同事沟通解决方案... \n", 3515 | "... ... ... \n", 3516 | "3083 0.0 岗位职责:数据报表维护:按时准确更新手工周期性报告(日,周,月等);运维已有线上报表(排故和... \n", 3517 | "3084 0.0 岗位职责:1、按时、准确的提供各指标的日数据报表,针对相关联报表进行有效整合;2、针对每日追... \n", 3518 | "3085 40.007096 工作职责:1.旅游业务日常系统数据经营分析,通过数据监控快速精准地发现问题,透过数据现象定位... \n", 3519 | "3086 40.007096 工作内容1.负责旅游业务相关的数据及分析需求,推动该业务系统性分析框架的构建;2.通过数据监... \n", 3520 | "3087 0.0 岗位职责:负责监控高星酒店业务的核心KPI完成情况,以及行业走势,并对数据进行分析,逐层剖... \n", 3521 | "3088 40.007096 工作内容1.负责旅游业务相关的数据及分析需求,推动该业务系统性分析框架的构建;2.通过数据监... \n", 3522 | "3089 40.007096 工作职责:1.日常经营数据分析,通过数据分析快速精准发现问题,指导业务优化改进。2.对业务数... \n", 3523 | "3090 116.331398 岗位职责:1.整体负责美图公司旗下美拍产品的数据分析、挖掘工作。2.建设完整的数据体系,对产... \n", 3524 | "3091 40.04683 岗位职责1.与业务方一起回答重要的商业问题和产品问题,高效的拆析业务当前的核心痛点和机会。2... \n", 3525 | "3092 31.244656 Responsibilities:·Innovate,experiment,prototyp... \n", 3526 | "3093 30.29005073 个旅是个推旗下的旅游大数据分析平台。个推作为独立的全领域移动大数据服务商,在消息推送领域已深... \n", 3527 | "3094 30.2903098 职位描述:1、负责日常运营数据的收集和分析,通过数据反馈发现改进点(挖掘算法优化空间和流程优... \n", 3528 | "3095 30.2903098 数据分析师(校招岗位)岗位职责:1.通过海量数据的分析和挖掘,为业务运营、产品咨询提供决策支... \n", 3529 | "3096 30.29005073 工作职责:1、整理将各部门的数据需求,并将其模板化;2、通过数据分析,发现营销策略的问题并提... \n", 3530 | "3097 30.2903098 1.通过海量数据的分析和挖掘,为业务运营、产品咨询提供决策支持、解决方案等。主要职责包括:1... \n", 3531 | "3098 30.2903098 岗位职责:-通过海量数据的分析和挖掘,为业务运营、产品咨询提供决策支持、解决方案等。主要职责... \n", 3532 | "3099 31.217836 数据分析高级主管(配送运营支持方向)1、推动数据产品的建设(BI系统、报表、监控预警类产品)... \n", 3533 | "3100 40.04683 岗位职责1.深刻理解数据本质,挖掘不同业务场景下用户体验的数据洞察,驱动公司运营和产品核心策... \n", 3534 | "3101 40.04683 岗位职责1、梳理运营与推广的业务需求,制定数据报表;2、挖掘数据背后的规律、短板,为业务提供... \n", 3535 | "3102 40.008007 岗位职责:1、运用互联网思维开展日常经营分析工作。2、重点数据分析报告制作。3、数据报表以及... \n", 3536 | "3103 40.007096 岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策... \n", 3537 | "3104 40.007096 岗位职责:1、运用互联网思维开展日常经营分析工作2、重点数据分析报告制作。3、数据报表以及数... \n", 3538 | "3105 40.007096 岗位职责:1、战区内日常数据分析工作,日月周报的制作。2、数据产品设计规划对接产品开发。3、... \n", 3539 | "3106 40.007096 岗位职责:1、深入理解公司运营方向和战略的基础,与业务团队密切合作,对业务运营数据进行深入思... \n", 3540 | "3107 40.006166 岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策... \n", 3541 | "3108 40.043454 职位描述:1.基于业务逻辑,建立业务数据模型,为团队设计反映产品和运营状况的数据报表;2.持... \n", 3542 | "3109 40.006166 岗位职责:1.以数据分析为基础,形成明确且详实的方案及策略输出,为公司管理层提供有质量的决策... \n", 3543 | "3110 31.994276 岗位职责:从行业研究角度,规划与收集投研需要的关键行业和公司数据。任职要求:本科学历有会计及... \n", 3544 | "3111 116.331398 职位描述1:负责美图公司整体商业&行业&战略方向的分析,为各业务线提供策略分析建议;2:从行... \n", 3545 | "3112 116.331398 岗位职责:1.建立和完善产品的数据体系、如数据收集、数据模型、数据关键指标等;2.负责产品的... \n", 3546 | "\n", 3547 | "[3113 rows x 29 columns]" 3548 | ] 3549 | }, 3550 | "execution_count": 6, 3551 | "metadata": {}, 3552 | "output_type": "execute_result" 3553 | } 3554 | ], 3555 | "source": [ 3556 | "to_drop = ['pcShow','appShow','deliver','score','approve','companyLogo','publisherId','hitags','formatCreateTime','adWord','resumeProcessRate','resumeProcessDay','imState','lastLogin','explain','plus','pcShow','appShow','deliver','gradeDescription','promotionScoreExplain']\n", 3557 | "df_noNeed = df.drop(to_drop,axis=1)\n", 3558 | "df_noNeed" 3559 | ] 3560 | }, 3561 | { 3562 | "cell_type": "markdown", 3563 | "metadata": {}, 3564 | "source": [ 3565 | "**去除重复值**" 3566 | ] 3567 | }, 3568 | { 3569 | "cell_type": "code", 3570 | "execution_count": 7, 3571 | "metadata": {}, 3572 | "outputs": [ 3573 | { 3574 | "data": { 3575 | "text/plain": [ 3576 | "2983" 3577 | ] 3578 | }, 3579 | "execution_count": 7, 3580 | "metadata": {}, 3581 | "output_type": "execute_result" 3582 | } 3583 | ], 3584 | "source": [ 3585 | "#positionId 为唯一值\n", 3586 | "len(df_noNeed['positionId'].unique())" 3587 | ] 3588 | }, 3589 | { 3590 | "cell_type": "code", 3591 | "execution_count": 8, 3592 | "metadata": { 3593 | "collapsed": true 3594 | }, 3595 | "outputs": [], 3596 | "source": [ 3597 | "df_noDup = df_noNeed.drop_duplicates(subset = 'positionId')" 3598 | ] 3599 | }, 3600 | { 3601 | "cell_type": "markdown", 3602 | "metadata": {}, 3603 | "source": [ 3604 | "**salary字段处理**" 3605 | ] 3606 | }, 3607 | { 3608 | "cell_type": "code", 3609 | "execution_count": 9, 3610 | "metadata": {}, 3611 | "outputs": [ 3612 | { 3613 | "data": { 3614 | "text/plain": [ 3615 | "array(['5k-8k', '10k-16k', '10k-20k', '25k-50k', '1k-2k', '12k-20k',\n", 3616 | " '20k-40k', '8k-15k', '4k-8k', '13k-25k', '15k-30k', '5k-7k',\n", 3617 | " '5k-10k', '6k-8k', '15k-25k', '16k-25k', '9k-13k', '10k-13k',\n", 3618 | " '10k-15k', '8k-12k', '7k-12k', '7k-14k', '13k-18k', '3k-4k',\n", 3619 | " '17k-32k', '18k-36k', '15k-22k', '4k-6k', '13k-23k', '8k-14k',\n", 3620 | " '25k-35k', '25k-40k', '2k-4k', '6k-12k', '8k-16k', '12k-18k',\n", 3621 | " '7k-9k', '20k-30k', '18K-25K', '20k-39k', '6k-10k', '2k-3k',\n", 3622 | " '3k-5k', '20k-35k', '24k-40k', '9k-16k', '10k-14k', '8K-15K',\n", 3623 | " '8k-10k', '13k-20k', '25k-45k', '16k-30k', '12k-24k', '20k-25k',\n", 3624 | " '15k-20k', '7k-8k', '8k-11k', '14k-21k', '30k-45k', '7k-13k',\n", 3625 | " '4k-7k', '4k-5k', '18k-35k', '30k-50k', '35k-45k', '10k-18k',\n", 3626 | " '30k-40k', '20K-40K', '16k-32k', '18k-30k', '38k-65k', '12k-15k',\n", 3627 | " '11k-22k', '7k-10k', '13k-26k', '18k-25k', '8k-9k', '9k-18k',\n", 3628 | " '14k-18k', '22k-35k', '30k-35k', '25k-30k', '50k-100k', '9k-14k',\n", 3629 | " '3k-6k', '18k-24k', '21k-33k', '15k-28k', '15k-23k', '15k-29k',\n", 3630 | " '6k-9k', '20K-30K', '10k-19k', '15K-30K', '30k-60k', '3K-5K',\n", 3631 | " '10k-12k', '10K-15K', '10K-20K', '12k-16k', '15k-24k', '12k-22k',\n", 3632 | " '35k-50k', '13K-25K', '15K-25K', '4K-6K', '5k-9k', '18k-28k',\n", 3633 | " '9k-12k', '15k-26k', '8k-13k', '15k-21k', '5K-8K', '25k-38k',\n", 3634 | " '40k-80k', '6K-10K', '9k-15k', '15k以上', '1K-2K', '6K-8K', '16K-32K',\n", 3635 | " '20K-35K', '10k-17k', '6K-12K', '5k-6k', '6k-7k', '18k-22k',\n", 3636 | " '4K-8K', '14k-28k', '13K-18K', '5K-10K', '24k-36k', '14k-22k',\n", 3637 | " '17k-25k', '25K-35K', '9k-17k', '15k-18k', '20k-28k', '16k-23k',\n", 3638 | " '35k-60k', '20k-22k', '17K-33K', '8K-16K', '15K-20K', '17k-22k',\n", 3639 | " '7K-10K', '2K-4K', '12K-20K', '7k-11k', '8K-12K', '45k-90k',\n", 3640 | " '14k-20k', '19K-30K', '17k-34k', '7K-14K', '4K-7K', '19k-38k',\n", 3641 | " '50k-60k', '2K-3K', '50k-80k', '30k-32k', '14k-25k', '30K-50K',\n", 3642 | " '13k-17k', '22k-42k', '12K-24K', '35k-65k'], dtype=object)" 3643 | ] 3644 | }, 3645 | "execution_count": 9, 3646 | "metadata": {}, 3647 | "output_type": "execute_result" 3648 | } 3649 | ], 3650 | "source": [ 3651 | "df_noDup['salary'].unique()" 3652 | ] 3653 | }, 3654 | { 3655 | "cell_type": "code", 3656 | "execution_count": 10, 3657 | "metadata": { 3658 | "collapsed": true 3659 | }, 3660 | "outputs": [], 3661 | "source": [ 3662 | "def cut_salary(salary,method):\n", 3663 | " try:\n", 3664 | " salary_list = salary.split('-')\n", 3665 | " bottomSalary = int(salary_list[0][:-1])\n", 3666 | " topSalary = int(salary_list[1][:-1])\n", 3667 | " except:\n", 3668 | " bottomSalary = int(salary.upper().split('K')[0])\n", 3669 | " topSalary = bottomSalary \n", 3670 | " if method == 'bottom':\n", 3671 | " return bottomSalary\n", 3672 | " else:\n", 3673 | " return topSalary" 3674 | ] 3675 | }, 3676 | { 3677 | "cell_type": "code", 3678 | "execution_count": 11, 3679 | "metadata": {}, 3680 | "outputs": [ 3681 | { 3682 | "name": "stderr", 3683 | "output_type": "stream", 3684 | "text": [ 3685 | "C:\\Users\\Weizh\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 3686 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 3687 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 3688 | "\n", 3689 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 3690 | " \"\"\"Entry point for launching an IPython kernel.\n" 3691 | ] 3692 | } 3693 | ], 3694 | "source": [ 3695 | "df_noDup['topSalary'] = df_noDup['salary'].apply(cut_salary,method = 'top')" 3696 | ] 3697 | }, 3698 | { 3699 | "cell_type": "code", 3700 | "execution_count": 12, 3701 | "metadata": {}, 3702 | "outputs": [ 3703 | { 3704 | "name": "stderr", 3705 | "output_type": "stream", 3706 | "text": [ 3707 | "C:\\Users\\Weizh\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 3708 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 3709 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 3710 | "\n", 3711 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 3712 | " \"\"\"Entry point for launching an IPython kernel.\n" 3713 | ] 3714 | } 3715 | ], 3716 | "source": [ 3717 | "df_noDup['bottomSalary'] = df_noDup['salary'].apply(cut_salary,method = 'bottom')" 3718 | ] 3719 | }, 3720 | { 3721 | "cell_type": "code", 3722 | "execution_count": 13, 3723 | "metadata": {}, 3724 | "outputs": [ 3725 | { 3726 | "name": "stderr", 3727 | "output_type": "stream", 3728 | "text": [ 3729 | "C:\\Users\\Weizh\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 3730 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 3731 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 3732 | "\n", 3733 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 3734 | " \"\"\"Entry point for launching an IPython kernel.\n" 3735 | ] 3736 | } 3737 | ], 3738 | "source": [ 3739 | "df_noDup['avgSalary'] = df_noDup.apply(lambda x: (x.topSalary + x.bottomSalary)/2,axis=1)" 3740 | ] 3741 | }, 3742 | { 3743 | "cell_type": "code", 3744 | "execution_count": 14, 3745 | "metadata": {}, 3746 | "outputs": [ 3747 | { 3748 | "name": "stdout", 3749 | "output_type": "stream", 3750 | "text": [ 3751 | "2983\n", 3752 | "2018-04-12 16:11:13\n", 3753 | "2018-03-12 09:34:09\n" 3754 | ] 3755 | }, 3756 | { 3757 | "name": "stderr", 3758 | "output_type": "stream", 3759 | "text": [ 3760 | "C:\\Users\\Weizh\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 3761 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 3762 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 3763 | "\n", 3764 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 3765 | " \"\"\"Entry point for launching an IPython kernel.\n" 3766 | ] 3767 | } 3768 | ], 3769 | "source": [ 3770 | "df_noDup['crateTime'] = pd.to_datetime(df_noDup['createTime'])\n", 3771 | "print(len(df_noDup))\n", 3772 | "print(df_noDup['crateTime'].max())\n", 3773 | "print(df_noDup['crateTime'].min())" 3774 | ] 3775 | }, 3776 | { 3777 | "cell_type": "markdown", 3778 | "metadata": {}, 3779 | "source": [ 3780 | "**数据存储**" 3781 | ] 3782 | }, 3783 | { 3784 | "cell_type": "code", 3785 | "execution_count": 15, 3786 | "metadata": { 3787 | "collapsed": true 3788 | }, 3789 | "outputs": [], 3790 | "source": [ 3791 | "df_noDup.to_csv('.\\data\\Lagou_Data.csv',index = False)" 3792 | ] 3793 | } 3794 | ], 3795 | "metadata": { 3796 | "kernelspec": { 3797 | "display_name": "Python 3", 3798 | "language": "python", 3799 | "name": "python3" 3800 | }, 3801 | "language_info": { 3802 | "codemirror_mode": { 3803 | "name": "ipython", 3804 | "version": 3 3805 | }, 3806 | "file_extension": ".py", 3807 | "mimetype": "text/x-python", 3808 | "name": "python", 3809 | "nbconvert_exporter": "python", 3810 | "pygments_lexer": "ipython3", 3811 | "version": "3.6.2" 3812 | } 3813 | }, 3814 | "nbformat": 4, 3815 | "nbformat_minor": 2 3816 | } 3817 | -------------------------------------------------------------------------------- /Lagou_Spider.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import time\n", 12 | "from urllib.parse import quote\n", 13 | "import requests\n", 14 | "from bs4 import BeautifulSoup\n", 15 | "import pandas as pd\n", 16 | "from sqlalchemy import create_engine\n", 17 | "import math" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "#header生成函数\n", 29 | "def generate_header(key = '数据分析', city = '全国'):\n", 30 | " header = {'Host': 'www.lagou.com',\n", 31 | " 'Origin': 'https://www.lagou.com',\n", 32 | " 'Referer': 'https://www.lagou.com/jobs/list_{}?px=new&city={}'.format(quote(key),quote(city)),\n", 33 | " 'Cookie' :'user_trace_token=20180314163431-b3a5992f-95ac-4cdb-84bd-8f86a5e084ad; _ga=GA1.2.521321231.1521016472; LGUID=20180314163432-84e6251a-2762-11e8-b1e3-5254005c3644; index_location_city=%E5%85%A8%E5%9B%BD; JSESSIONID=ABAAABAAAGGABCBD6CF3DF428E3511D25B9B7FB773F7B0E; X_HTTP_TOKEN=b9e62565f65403103924971a79b26efc; LGSID=20180412142155-cc1251f6-3e19-11e8-b747-5254005c3644; LGRID=20180412142622-6b0de44c-3e1a-11e8-ba22-525400f775ce; TG-TRACK-CODE=search_code; SEARCH_ID=231099152a164535b75106a7c3de1c7f',\n", 34 | " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'\n", 35 | " }\n", 36 | " return header" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "#列表页信息获取函数\n", 48 | "def request_url(offset):\n", 49 | " #网页列表页由Ajax异步加载\n", 50 | " #https://www.lagou.com/jobs/positionAjax.json?px=new&city=%E4%B8%8A%E6%B5%B7&needAddtionalResult=false\n", 51 | " url = 'https://www.lagou.com/jobs/positionAjax.json'\n", 52 | " payload = {'px': 'new',\n", 53 | " 'needAddtionalResult': 'false',\n", 54 | " 'city':'全国'\n", 55 | " }\n", 56 | " formdata = {'first': 'false',\n", 57 | " 'pn': offset,\n", 58 | " 'kd': '数据分析'\n", 59 | " }\n", 60 | " header = generate_header()\n", 61 | " try:\n", 62 | " response = requests.request('POST', url, data=formdata, params=payload, headers=header,timeout=10)\n", 63 | " if response.status_code == 200:\n", 64 | " return response.json()\n", 65 | " except Exception as e:\n", 66 | " print('Brief Info Exception: {}'.format(e))\n" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "#提取职位jd函数\n", 78 | "def get_descriptions(positionid):\n", 79 | " #根据从列表页获取的positionId爬取jd\n", 80 | " url = 'https://www.lagou.com/jobs/{}.html'.format(positionid)\n", 81 | " header = generate_header()\n", 82 | " response = requests.get(url, headers=header,timeout=10)\n", 83 | " try:\n", 84 | " if response.status_code == 200:\n", 85 | " html_code = response.text\n", 86 | " soup = BeautifulSoup(html_code, 'lxml')\n", 87 | " jd = soup.select('#job_detail > dd.job_bt > div',) \n", 88 | " content = jd[0].get_text()\n", 89 | " content_clean = ''.join(content.split())\n", 90 | " except Exception as e:\n", 91 | " print('Jd Exception: {}'.format(e))\n", 92 | " return content_clean" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 5, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "#职位所有信息提取函数\n", 104 | "def getdetail(result): \n", 105 | " #信息提取\n", 106 | " df = pd.DataFrame()\n", 107 | " data = result['content']['positionResult'] \n", 108 | " infos = data['result'] \n", 109 | " for info in infos: \n", 110 | " #列表页信息整理为DataFrame\n", 111 | " df_item = pd.DataFrame.from_dict(info, orient='index').transpose()\n", 112 | " #获取jd\n", 113 | " df_item['content'] = get_descriptions(info['positionId'])\n", 114 | " df =df.append(df_item) \n", 115 | " time.sleep(3)\n", 116 | " return df" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 6, 122 | "metadata": { 123 | "collapsed": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "#存取到数据库\n", 128 | "def save_sql(df,tablename):\n", 129 | " #打开数据库存储数据\n", 130 | " #dialect[+driver]://user:password@host/dbname[?key=value..]\n", 131 | " engine = create_engine('postgresql://postgres:947172@localhost:5432/postgres') \n", 132 | " try:\n", 133 | " df.to_sql(tablename,engine,index=False,if_exists='append')\n", 134 | " print ('数据存储成功' ) \n", 135 | " except Exception as e:\n", 136 | " print('Sql Exception: {}'.format(e))" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 7, 142 | "metadata": { 143 | "collapsed": true 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "def main(offset):\n", 148 | " positioninfo = getdetail(request_url(offset))\n", 149 | " print('第{}页爬取成功'.format(offset))\n", 150 | " save_sql(positioninfo,'Lagou_DA')\n", 151 | " time.sleep(3)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 8, 157 | "metadata": { 158 | "collapsed": true 159 | }, 160 | "outputs": [ 161 | { 162 | "ename": "KeyboardInterrupt", 163 | "evalue": "", 164 | "output_type": "error", 165 | "traceback": [ 166 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 167 | "\u001b[1;31mWantReadError\u001b[0m Traceback (most recent call last)", 168 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\contrib\\pyopenssl.py\u001b[0m in \u001b[0;36mwrap_socket\u001b[1;34m(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 438\u001b[1;33m \u001b[0mcnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 439\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mOpenSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mWantReadError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 169 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\OpenSSL\\SSL.py\u001b[0m in \u001b[0;36mdo_handshake\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1715\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_do_handshake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1716\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_raise_ssl_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1717\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 170 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\OpenSSL\\SSL.py\u001b[0m in \u001b[0;36m_raise_ssl_error\u001b[1;34m(self, ssl, result)\u001b[0m\n\u001b[0;32m 1430\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merror\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_ERROR_WANT_READ\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1431\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mWantReadError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1432\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0merror\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_ERROR_WANT_WRITE\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 171 | "\u001b[1;31mWantReadError\u001b[0m: ", 172 | "\nDuring handling of the above exception, another exception occurred:\n", 173 | "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 174 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'__main__'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mpageCount\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mceil\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrequest_url\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'content'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'positionResult'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'totalCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m/\u001b[0m\u001b[1;36m15\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'共{0}页'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpageCount\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mpage\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpageCount\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 175 | "\u001b[1;32m\u001b[0m in \u001b[0;36mrequest_url\u001b[1;34m(offset)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0mheader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgenerate_header\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"POST\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mformdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpayload\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mheader\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m200\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 176 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\api.py\u001b[0m in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[1;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 58\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 177 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m 516\u001b[0m }\n\u001b[0;32m 517\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 518\u001b[1;33m \u001b[0mresp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 519\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 178 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\sessions.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 637\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 638\u001b[0m \u001b[1;31m# Send the request\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 639\u001b[1;33m \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 640\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 179 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\adapters.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[0mretries\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 438\u001b[1;33m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 439\u001b[0m )\n\u001b[0;32m 440\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 180 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[0;32m 592\u001b[0m \u001b[0mis_new_proxy_conn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproxy\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'sock'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 593\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_new_proxy_conn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 594\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_prepare_proxy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 595\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 596\u001b[0m \u001b[1;31m# Make the request on the httplib connection object.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 181 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36m_prepare_proxy\u001b[1;34m(self, conn)\u001b[0m\n\u001b[0;32m 808\u001b[0m \u001b[0mset_tunnel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhost\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mport\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproxy_headers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 809\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 810\u001b[1;33m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 811\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 812\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_new_conn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 182 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connection.py\u001b[0m in \u001b[0;36mconnect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 324\u001b[0m \u001b[0mca_cert_dir\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mca_cert_dir\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 325\u001b[0m \u001b[0mserver_hostname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mhostname\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 326\u001b[1;33m ssl_context=context)\n\u001b[0m\u001b[0;32m 327\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 328\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0massert_fingerprint\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 183 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\util\\ssl_.py\u001b[0m in \u001b[0;36mssl_wrap_socket\u001b[1;34m(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir)\u001b[0m\n\u001b[0;32m 323\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_cert_chain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcertfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeyfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 324\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mHAS_SNI\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Platform-specific: OpenSSL with enabled SNI\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 325\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrap_socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msock\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mserver_hostname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mserver_hostname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 326\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 327\u001b[0m warnings.warn(\n", 184 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\contrib\\pyopenssl.py\u001b[0m in \u001b[0;36mwrap_socket\u001b[1;34m(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)\u001b[0m\n\u001b[0;32m 438\u001b[0m \u001b[0mcnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdo_handshake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 439\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mOpenSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mWantReadError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 440\u001b[1;33m \u001b[0mrd\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mutil\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwait_for_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msock\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgettimeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 441\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mrd\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'select timed out'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 185 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\util\\wait.py\u001b[0m in \u001b[0;36mwait_for_read\u001b[1;34m(socks, timeout)\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0moptionally\u001b[0m \u001b[0ma\u001b[0m \u001b[0msingle\u001b[0m \u001b[0msocket\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mpassed\u001b[0m \u001b[1;32min\u001b[0m\u001b[1;33m.\u001b[0m \u001b[0mReturns\u001b[0m \u001b[0ma\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mof\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m sockets that can be read from immediately. \"\"\"\n\u001b[1;32m---> 33\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_wait_for_io_events\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msocks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mEVENT_READ\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 186 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\util\\wait.py\u001b[0m in \u001b[0;36m_wait_for_io_events\u001b[1;34m(socks, events, timeout)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[0mselector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mregister\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msock\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mevents\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m return [key[0].fileobj for key in\n\u001b[1;32m---> 26\u001b[1;33m selector.select(timeout) if key[1] & events]\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 187 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\util\\selectors.py\u001b[0m in \u001b[0;36mselect\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[0mready\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 319\u001b[0m r, w, _ = _syscall_wrapper(self._select, True, self._readers,\n\u001b[1;32m--> 320\u001b[1;33m self._writers, timeout)\n\u001b[0m\u001b[0;32m 321\u001b[0m \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 322\u001b[0m \u001b[0mw\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 188 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\util\\selectors.py\u001b[0m in \u001b[0;36m_syscall_wrapper\u001b[1;34m(func, _, *args, **kwargs)\u001b[0m\n\u001b[0;32m 62\u001b[0m and recalculate their timeouts. \"\"\"\n\u001b[0;32m 63\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 64\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 65\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mOSError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIOError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mselect\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[0merrcode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 189 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\requests\\packages\\urllib3\\util\\selectors.py\u001b[0m in \u001b[0;36m_select\u001b[1;34m(self, r, w, timeout)\u001b[0m\n\u001b[0;32m 308\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_select\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 309\u001b[0m \u001b[1;34m\"\"\" Wrapper for select.select because timeout is a positional arg \"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 310\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mselect\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 311\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 312\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 190 | "\u001b[1;31mKeyboardInterrupt\u001b[0m: " 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "if __name__ == '__main__':\n", 196 | " try:\n", 197 | " pageCount = math.ceil(request_url(1)['content']['positionResult']['totalCount']/15)\n", 198 | " print('共{0}页'.format(pageCount))\n", 199 | " for page in range(1,pageCount+1):\n", 200 | " main(page)\n", 201 | " except Exception as e:\n", 202 | " print('Page Exception: {}'.format(e))\n", 203 | " \n" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [] 214 | } 215 | ], 216 | "metadata": { 217 | "kernelspec": { 218 | "display_name": "Python 3", 219 | "language": "python", 220 | "name": "python3" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 3 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython3", 232 | "version": "3.6.2" 233 | } 234 | }, 235 | "nbformat": 4, 236 | "nbformat_minor": 2 237 | } 238 | -------------------------------------------------------------------------------- /Lagou_Spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Apr 12 20:22:36 2018 4 | 5 | @author: Weizh 6 | """ 7 | 8 | import time 9 | from urllib.parse import quote 10 | import requests 11 | from bs4 import BeautifulSoup 12 | import pandas as pd 13 | from sqlalchemy import create_engine 14 | import math 15 | 16 | #header生成函数 17 | def generate_header(key = '数据分析', city = '全国'): 18 | header = {'Host': 'www.lagou.com', 19 | 'Origin': 'https://www.lagou.com', 20 | #https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90?px=new&city=%E5%85%A8%E5%9B%BD 21 | 'Referer': 'https://www.lagou.com/jobs/list_{}?px=new&city={}'.format(quote(key),quote(city)), 22 | 'Cookie' :'user_trace_token=20180314163431-b3a5992f-95ac-4cdb-84bd-8f86a5e084ad; _ga=GA1.2.521321231.1521016472; LGUID=20180314163432-84e6251a-2762-11e8-b1e3-5254005c3644; index_location_city=%E5%85%A8%E5%9B%BD; JSESSIONID=ABAAABAAAGGABCBD6CF3DF428E3511D25B9B7FB773F7B0E; X_HTTP_TOKEN=b9e62565f65403103924971a79b26efc; LGSID=20180412142155-cc1251f6-3e19-11e8-b747-5254005c3644; LGRID=20180412142622-6b0de44c-3e1a-11e8-ba22-525400f775ce; TG-TRACK-CODE=search_code; SEARCH_ID=231099152a164535b75106a7c3de1c7f', 23 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' 24 | } 25 | return header 26 | 27 | #列表页信息获取函数 28 | def request_url(offset): 29 | #网页列表页由Ajax异步加载 30 | #https://www.lagou.com/jobs/positionAjax.json?px=new&city=%E4%B8%8A%E6%B5%B7&needAddtionalResult=false 31 | url = "https://www.lagou.com/jobs/positionAjax.json" 32 | payload = {'px': 'new', 33 | 'needAddtionalResult': 'false', 34 | 'city':'全国' 35 | } 36 | formdata = {'first': 'false', 37 | 'pn': offset, 38 | 'kd': '数据分析' 39 | } 40 | header = generate_header() 41 | try: 42 | response = requests.request("POST", url, data=formdata, params=payload, headers=header,timeout=10) 43 | if response.status_code == 200: 44 | return response.json() 45 | except Exception as e: 46 | print("Brief Info Exception: {}".format(e)) 47 | 48 | #提取职位jd函数 49 | def get_descriptions(positionid): 50 | #根据从列表页获取的positionId爬取jd 51 | url = 'https://www.lagou.com/jobs/{}.html'.format(positionid) 52 | header = generate_header() 53 | response = requests.get(url, headers=header,timeout=10) 54 | try: 55 | if response.status_code == 200: 56 | html_code = response.text 57 | soup = BeautifulSoup(html_code, 'lxml') 58 | #jd = job_detail = soup.select('#job_detail > dd.job_bt ',) 59 | jd = soup.select('#job_detail > dd.job_bt > div',) 60 | content = jd[0].get_text() 61 | content_clean = "".join(content.split()) 62 | except Exception as e: 63 | print("Jd Exception: {}".format(e)) 64 | return content_clean 65 | 66 | #职位所有信息提取函数 67 | def getdetail(result): 68 | #信息提取 69 | df = pd.DataFrame() 70 | data = result['content']['positionResult'] 71 | infos = data['result'] 72 | for info in infos: 73 | #列表页信息整理为DataFrame 74 | df_item = pd.DataFrame.from_dict(info, orient='index').transpose() 75 | #获取jd 76 | df_item['content'] = get_descriptions(info['positionId']) 77 | df =df.append(df_item) 78 | time.sleep(3) 79 | return df 80 | 81 | #存取到数据库 82 | def save_sql(df,tablename): 83 | #打开数据库存储数据 84 | #dialect[+driver]://user:password@host/dbname[?key=value..] 85 | engine = create_engine('postgresql://postgres:947172@localhost:5432/postgres') 86 | 87 | try: 88 | df.to_sql(tablename,engine,index=False,if_exists='append') 89 | print ('数据存储成功' ) 90 | except Exception as e: 91 | print("Sql Exception: {}".format(e)) 92 | 93 | def main(offset): 94 | positioninfo = getdetail(request_url(offset)) 95 | print('第{}页爬取成功'.format(offset)) 96 | save_sql(positioninfo,"Lagou_DA") 97 | time.sleep(3) 98 | 99 | if __name__ == '__main__': 100 | try: 101 | pageCount = math.ceil(request_url(1)['content']['positionResult']['totalCount']/15) 102 | print('共{0}页'.format(pageCount)) 103 | for page in range(1,pageCount+1): 104 | main(page) 105 | except Exception as e: 106 | print("Page Exception: {}".format(e)) 107 | 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lagou_Position_Spider_And_Data_Analysis 2 | 爬取拉勾网数据分析岗位招聘数据以及可视化展示。 3 | 4 | -------------------------------------------------------------------------------- /data/Lagou_Data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weizhuang1113/Lagou_Spider_And_Data_Analysis/7e6ef994390b3940bfca7adaa0e6308084128e0d/data/Lagou_Data.csv -------------------------------------------------------------------------------- /stopwords.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weizhuang1113/Lagou_Spider_And_Data_Analysis/7e6ef994390b3940bfca7adaa0e6308084128e0d/stopwords.txt -------------------------------------------------------------------------------- /synonyms.txt: -------------------------------------------------------------------------------- 1 | sql mysql pgsql postgresql 2 | ppt powerpoint -------------------------------------------------------------------------------- /userdict.txt: -------------------------------------------------------------------------------- 1 | hadoop 2 | hive 3 | mapreduce 4 | hbase 5 | sas 6 | sql 7 | spss 8 | nonsql 9 | r 10 | excel 11 | powerpoint 12 | ppt 13 | office 14 | mysql 15 | pgsql 16 | postgresql 17 | bi 18 | python 19 | shell 20 | 21 | --------------------------------------------------------------------------------