├── .github
    └── FUNDING.yml
├── .gitignore
├── README.md
└── site
    ├── __init__.py
    ├── __pycache__
        └── __init__.cpython-34.pyc
    └── mybzz
        ├── StopWords.txt
        ├── __init__.py
        ├── __pycache__
            └── __init__.cpython-34.pyc
        ├── crawler
            ├── 360Crawler.py
            ├── AppleCrawler.py
            ├── BaiDuCrawler.py
            ├── MZCrawler.py
            ├── TencentCrawler.py
            ├── WdjCrawler.py
            └── XiaoMiCrawler.py
        ├── domain
            ├── MzComment.py
            └── __pycache__
            │   └── MzComment.cpython-34.pyc
        ├── keyword
            ├── CreateWordCloud.py
            └── InsertKeyWord.py
        ├── neg_review.pkl
        ├── pos_review.pkl
        ├── sentiment
            ├── Feature.py
            ├── Vec.py
            ├── feature_selection.py
            └── test1.txt
        ├── test
            ├── NltkUtil.py
            ├── Ran.py
            ├── Test.py
            ├── TestA.py
            ├── TestBs.py
            ├── TestDb.py
            ├── TestJieBa.py
            ├── TestNltk.py
            ├── TestPa.py
            ├── TestPkl.py
            ├── TestPred.py
            ├── TestQQ.py
            ├── TestReadFile.py
            ├── TestSk.py
            ├── TestWordCloud.py
            ├── model.m
            ├── model.pkl
            ├── neg_review.pkl
            └── pos_review.pkl
        └── util
            ├── BsUtil.py
            ├── DateUtil.py
            └── DbUtil.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | custom: # Replace with a single custom sponsorship URL
 9 | 
10 | #thx anyway
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *__pycache__*
3 | *.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CommentCrawler
2 | 抓取各大应用商店的评论爬虫
3 | 


--------------------------------------------------------------------------------
/site/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/__init__.py


--------------------------------------------------------------------------------
/site/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/__pycache__/__init__.cpython-34.pyc


--------------------------------------------------------------------------------
/site/mybzz/StopWords.txt:
--------------------------------------------------------------------------------
   1 | ﻿，
   2 | 。
   3 | ？
   4 | ！
   5 | ；
   6 | “
   7 | #
   8 | ：
   9 | !
  10 | "
  11 | #
  12 | $
  13 | %
  14 | &
  15 | '
  16 | (
  17 | )
  18 | *
  19 | +
  20 | ,
  21 | -
  22 | --
  23 | .
  24 | ..
  25 | ...
  26 | ......
  27 | ...................
  28 | ./
  29 | .一
  30 | .数
  31 | .日
  32 | /
  33 | //
  34 | 0
  35 | 1
  36 | 2
  37 | 3
  38 | 4
  39 | 5
  40 | 6
  41 | 7
  42 | 8
  43 | 9
  44 | :
  45 | ://
  46 | ::
  47 | ;
  48 | <
  49 | =
  50 | >
  51 | >>
  52 | ?
  53 | @
  54 | A
  55 | Lex
  56 | [
  57 | \
  58 | ]
  59 | ^
  60 | _
  61 | `
  62 | exp
  63 | sub
  64 | sup
  65 | |
  66 | }
  67 | ~
  68 | ~~~~
  69 | ·
  70 | ×
  71 | ×××
  72 | Δ
  73 | Ψ
  74 | γ
  75 | μ
  76 | φ
  77 | φ．
  78 | В
  79 | —
  80 | ——
  81 | ———
  82 | ‘
  83 | ’
  84 | ’‘
  85 | “
  86 | ”
  87 | ”，
  88 | …
  89 | ……
  90 | …………………………………………………③
  91 | ′∈
  92 | ′｜
  93 | ℃
  94 | Ⅲ
  95 | ↑
  96 | →
  97 | ∈［
  98 | ∪φ∈
  99 | ≈
 100 | ①
 101 | ②
 102 | ②ｃ
 103 | ③
 104 | ③］
 105 | ④
 106 | ⑤
 107 | ⑥
 108 | ⑦
 109 | ⑧
 110 | ⑨
 111 | ⑩
 112 | ──
 113 | ■
 114 | ▲
 115 | 　
 116 | 、
 117 | 。
 118 | 〈
 119 | 〉
 120 | 《
 121 | 》
 122 | 》），
 123 | 」
 124 | 『
 125 | 』
 126 | 【
 127 | 】
 128 | 〔
 129 | 〕
 130 | 〕〔
 131 | ㈧
 132 | 一
 133 | 一.
 134 | 一一
 135 | 一下
 136 | 一个
 137 | 一些
 138 | 一何
 139 | 一切
 140 | 一则
 141 | 一则通过
 142 | 一天
 143 | 一定
 144 | 一方面
 145 | 一旦
 146 | 一时
 147 | 一来
 148 | 一样
 149 | 一次
 150 | 一片
 151 | 一番
 152 | 一直
 153 | 一致
 154 | 一般
 155 | 一起
 156 | 一转眼
 157 | 一边
 158 | 一面
 159 | 七
 160 | 万一
 161 | 三
 162 | 三天两头
 163 | 三番两次
 164 | 三番五次
 165 | 上
 166 | 上下
 167 | 上升
 168 | 上去
 169 | 上来
 170 | 上述
 171 | 上面
 172 | 下
 173 | 下列
 174 | 下去
 175 | 下来
 176 | 下面
 177 | 不
 178 | 不一
 179 | 不下
 180 | 不久
 181 | 不了
 182 | 不亦乐乎
 183 | 不仅
 184 | 不仅...而且
 185 | 不仅仅
 186 | 不仅仅是
 187 | 不会
 188 | 不但
 189 | 不但...而且
 190 | 不光
 191 | 不免
 192 | 不再
 193 | 不力
 194 | 不单
 195 | 不变
 196 | 不只
 197 | 不可
 198 | 不可开交
 199 | 不可抗拒
 200 | 不同
 201 | 不外
 202 | 不外乎
 203 | 不够
 204 | 不大
 205 | 不如
 206 | 不妨
 207 | 不定
 208 | 不对
 209 | 不少
 210 | 不尽
 211 | 不尽然
 212 | 不巧
 213 | 不已
 214 | 不常
 215 | 不得
 216 | 不得不
 217 | 不得了
 218 | 不得已
 219 | 不必
 220 | 不怎么
 221 | 不怕
 222 | 不惟
 223 | 不成
 224 | 不拘
 225 | 不择手段
 226 | 不敢
 227 | 不料
 228 | 不断
 229 | 不日
 230 | 不时
 231 | 不是
 232 | 不曾
 233 | 不止
 234 | 不止一次
 235 | 不比
 236 | 不消
 237 | 不满
 238 | 不然
 239 | 不然的话
 240 | 不特
 241 | 不独
 242 | 不由得
 243 | 不知不觉
 244 | 不管
 245 | 不管怎样
 246 | 不经意
 247 | 不胜
 248 | 不能
 249 | 不能不
 250 | 不至于
 251 | 不若
 252 | 不要
 253 | 不论
 254 | 不起
 255 | 不足
 256 | 不过
 257 | 不迭
 258 | 不问
 259 | 不限
 260 | 与
 261 | 与其
 262 | 与其说
 263 | 与否
 264 | 与此同时
 265 | 专门
 266 | 且
 267 | 且不说
 268 | 且说
 269 | 两者
 270 | 严格
 271 | 严重
 272 | 个
 273 | 个人
 274 | 个别
 275 | 中小
 276 | 中间
 277 | 丰富
 278 | 串行
 279 | 临
 280 | 临到
 281 | 为
 282 | 为主
 283 | 为了
 284 | 为什么
 285 | 为什麽
 286 | 为何
 287 | 为止
 288 | 为此
 289 | 为着
 290 | 主张
 291 | 主要
 292 | 举凡
 293 | 举行
 294 | 乃
 295 | 乃至
 296 | 乃至于
 297 | 么
 298 | 之
 299 | 之一
 300 | 之前
 301 | 之后
 302 | 之後
 303 | 之所以
 304 | 之类
 305 | 乌乎
 306 | 乎
 307 | 乒
 308 | 乘
 309 | 乘势
 310 | 乘机
 311 | 乘胜
 312 | 乘虚
 313 | 乘隙
 314 | 九
 315 | 也
 316 | 也好
 317 | 也就是说
 318 | 也是
 319 | 也罢
 320 | 了
 321 | 了解
 322 | 争取
 323 | 二
 324 | 二来
 325 | 二话不说
 326 | 二话没说
 327 | 于
 328 | 于是
 329 | 于是乎
 330 | 云云
 331 | 云尔
 332 | 互
 333 | 互相
 334 | 五
 335 | 些
 336 | 交口
 337 | 亦
 338 | 产生
 339 | 亲口
 340 | 亲手
 341 | 亲眼
 342 | 亲自
 343 | 亲身
 344 | 人
 345 | 人人
 346 | 人们
 347 | 人家
 348 | 人民
 349 | 什么
 350 | 什么样
 351 | 什麽
 352 | 仅
 353 | 仅仅
 354 | 今
 355 | 今后
 356 | 今天
 357 | 今年
 358 | 今後
 359 | 介于
 360 | 仍
 361 | 仍旧
 362 | 仍然
 363 | 从
 364 | 从不
 365 | 从严
 366 | 从中
 367 | 从事
 368 | 从今以后
 369 | 从优
 370 | 从古到今
 371 | 从古至今
 372 | 从头
 373 | 从宽
 374 | 从小
 375 | 从新
 376 | 从无到有
 377 | 从早到晚
 378 | 从未
 379 | 从来
 380 | 从此
 381 | 从此以后
 382 | 从而
 383 | 从轻
 384 | 从速
 385 | 从重
 386 | 他
 387 | 他人
 388 | 他们
 389 | 他是
 390 | 他的
 391 | 代替
 392 | 以
 393 | 以上
 394 | 以下
 395 | 以为
 396 | 以便
 397 | 以免
 398 | 以前
 399 | 以及
 400 | 以后
 401 | 以外
 402 | 以後
 403 | 以故
 404 | 以期
 405 | 以来
 406 | 以至
 407 | 以至于
 408 | 以致
 409 | 们
 410 | 任
 411 | 任何
 412 | 任凭
 413 | 任务
 414 | 企图
 415 | 伙同
 416 | 会
 417 | 伟大
 418 | 传
 419 | 传说
 420 | 传闻
 421 | 似乎
 422 | 似的
 423 | 但
 424 | 但凡
 425 | 但愿
 426 | 但是
 427 | 何
 428 | 何乐而不为
 429 | 何以
 430 | 何况
 431 | 何处
 432 | 何妨
 433 | 何尝
 434 | 何必
 435 | 何时
 436 | 何止
 437 | 何苦
 438 | 何须
 439 | 余外
 440 | 作为
 441 | 你
 442 | 你们
 443 | 你是
 444 | 你的
 445 | 使
 446 | 使得
 447 | 使用
 448 | 例如
 449 | 依
 450 | 依据
 451 | 依照
 452 | 依靠
 453 | 便
 454 | 便于
 455 | 促进
 456 | 保持
 457 | 保管
 458 | 保险
 459 | 俺
 460 | 俺们
 461 | 倍加
 462 | 倍感
 463 | 倒不如
 464 | 倒不如说
 465 | 倒是
 466 | 倘
 467 | 倘使
 468 | 倘或
 469 | 倘然
 470 | 倘若
 471 | 借
 472 | 借以
 473 | 借此
 474 | 假使
 475 | 假如
 476 | 假若
 477 | 偏偏
 478 | 做到
 479 | 偶尔
 480 | 偶而
 481 | 傥然
 482 | 像
 483 | 儿
 484 | 允许
 485 | 元／吨
 486 | 充其极
 487 | 充其量
 488 | 充分
 489 | 先不先
 490 | 先后
 491 | 先後
 492 | 先生
 493 | 光
 494 | 光是
 495 | 全体
 496 | 全力
 497 | 全年
 498 | 全然
 499 | 全身心
 500 | 全部
 501 | 全都
 502 | 全面
 503 | 八
 504 | 八成
 505 | 公然
 506 | 六
 507 | 兮
 508 | 共
 509 | 共同
 510 | 共总
 511 | 关于
 512 | 其
 513 | 其一
 514 | 其中
 515 | 其二
 516 | 其他
 517 | 其余
 518 | 其后
 519 | 其它
 520 | 其实
 521 | 其次
 522 | 具体
 523 | 具体地说
 524 | 具体来说
 525 | 具体说来
 526 | 具有
 527 | 兼之
 528 | 内
 529 | 再
 530 | 再其次
 531 | 再则
 532 | 再有
 533 | 再次
 534 | 再者
 535 | 再者说
 536 | 再说
 537 | 冒
 538 | 冲
 539 | 决不
 540 | 决定
 541 | 决非
 542 | 况且
 543 | 准备
 544 | 凑巧
 545 | 凝神
 546 | 几
 547 | 几乎
 548 | 几度
 549 | 几时
 550 | 几番
 551 | 几经
 552 | 凡
 553 | 凡是
 554 | 凭
 555 | 凭借
 556 | 出
 557 | 出于
 558 | 出去
 559 | 出来
 560 | 出现
 561 | 分别
 562 | 分头
 563 | 分期
 564 | 分期分批
 565 | 切
 566 | 切不可
 567 | 切切
 568 | 切勿
 569 | 切莫
 570 | 则
 571 | 则甚
 572 | 刚
 573 | 刚好
 574 | 刚巧
 575 | 刚才
 576 | 初
 577 | 别
 578 | 别人
 579 | 别处
 580 | 别是
 581 | 别的
 582 | 别管
 583 | 别说
 584 | 到
 585 | 到了儿
 586 | 到处
 587 | 到头
 588 | 到头来
 589 | 到底
 590 | 到目前为止
 591 | 前后
 592 | 前此
 593 | 前者
 594 | 前进
 595 | 前面
 596 | 加上
 597 | 加之
 598 | 加以
 599 | 加入
 600 | 加强
 601 | 动不动
 602 | 动辄
 603 | 勃然
 604 | 匆匆
 605 | 十分
 606 | 千
 607 | 千万
 608 | 千万千万
 609 | 半
 610 | 单
 611 | 单单
 612 | 单纯
 613 | 即
 614 | 即令
 615 | 即使
 616 | 即便
 617 | 即刻
 618 | 即如
 619 | 即将
 620 | 即或
 621 | 即是说
 622 | 即若
 623 | 却
 624 | 却不
 625 | 历
 626 | 原来
 627 | 去
 628 | 又
 629 | 又及
 630 | 及
 631 | 及其
 632 | 及时
 633 | 及至
 634 | 双方
 635 | 反之
 636 | 反之亦然
 637 | 反之则
 638 | 反倒
 639 | 反倒是
 640 | 反应
 641 | 反手
 642 | 反映
 643 | 反而
 644 | 反过来
 645 | 反过来说
 646 | 取得
 647 | 取道
 648 | 受到
 649 | 变成
 650 | 古来
 651 | 另
 652 | 另一个
 653 | 另一方面
 654 | 另外
 655 | 另悉
 656 | 另方面
 657 | 另行
 658 | 只
 659 | 只当
 660 | 只怕
 661 | 只是
 662 | 只有
 663 | 只消
 664 | 只要
 665 | 只限
 666 | 叫
 667 | 叫做
 668 | 召开
 669 | 叮咚
 670 | 叮当
 671 | 可
 672 | 可以
 673 | 可好
 674 | 可是
 675 | 可能
 676 | 可见
 677 | 各
 678 | 各个
 679 | 各人
 680 | 各位
 681 | 各地
 682 | 各式
 683 | 各种
 684 | 各级
 685 | 各自
 686 | 合理
 687 | 同
 688 | 同一
 689 | 同时
 690 | 同样
 691 | 后
 692 | 后来
 693 | 后者
 694 | 后面
 695 | 向
 696 | 向使
 697 | 向着
 698 | 吓
 699 | 吗
 700 | 否则
 701 | 吧
 702 | 吧哒
 703 | 吱
 704 | 呀
 705 | 呃
 706 | 呆呆地
 707 | 呐
 708 | 呕
 709 | 呗
 710 | 呜
 711 | 呜呼
 712 | 呢
 713 | 周围
 714 | 呵
 715 | 呵呵
 716 | 呸
 717 | 呼哧
 718 | 呼啦
 719 | 咋
 720 | 和
 721 | 咚
 722 | 咦
 723 | 咧
 724 | 咱
 725 | 咱们
 726 | 咳
 727 | 哇
 728 | 哈
 729 | 哈哈
 730 | 哉
 731 | 哎
 732 | 哎呀
 733 | 哎哟
 734 | 哗
 735 | 哗啦
 736 | 哟
 737 | 哦
 738 | 哩
 739 | 哪
 740 | 哪个
 741 | 哪些
 742 | 哪儿
 743 | 哪天
 744 | 哪年
 745 | 哪怕
 746 | 哪样
 747 | 哪边
 748 | 哪里
 749 | 哼
 750 | 哼唷
 751 | 唉
 752 | 唯有
 753 | 啊
 754 | 啊呀
 755 | 啊哈
 756 | 啊哟
 757 | 啐
 758 | 啥
 759 | 啦
 760 | 啪达
 761 | 啷当
 762 | 喀
 763 | 喂
 764 | 喏
 765 | 喔唷
 766 | 喽
 767 | 嗡
 768 | 嗡嗡
 769 | 嗬
 770 | 嗯
 771 | 嗳
 772 | 嘎
 773 | 嘎嘎
 774 | 嘎登
 775 | 嘘
 776 | 嘛
 777 | 嘻
 778 | 嘿
 779 | 嘿嘿
 780 | 四
 781 | 因
 782 | 因为
 783 | 因了
 784 | 因此
 785 | 因着
 786 | 因而
 787 | 固
 788 | 固然
 789 | 在
 790 | 在下
 791 | 在于
 792 | 地
 793 | 均
 794 | 坚决
 795 | 坚持
 796 | 基于
 797 | 基本
 798 | 基本上
 799 | 处在
 800 | 处处
 801 | 处理
 802 | 复杂
 803 | 多
 804 | 多么
 805 | 多亏
 806 | 多多
 807 | 多多少少
 808 | 多多益善
 809 | 多少
 810 | 多年前
 811 | 多年来
 812 | 多数
 813 | 多次
 814 | 够瞧的
 815 | 大
 816 | 大不了
 817 | 大举
 818 | 大事
 819 | 大体
 820 | 大体上
 821 | 大凡
 822 | 大力
 823 | 大多
 824 | 大多数
 825 | 大大
 826 | 大家
 827 | 大张旗鼓
 828 | 大批
 829 | 大抵
 830 | 大概
 831 | 大略
 832 | 大约
 833 | 大致
 834 | 大都
 835 | 大量
 836 | 大面儿上
 837 | 失去
 838 | 奇
 839 | 奈
 840 | 奋勇
 841 | 她
 842 | 她们
 843 | 她是
 844 | 她的
 845 | 好在
 846 | 好的
 847 | 好象
 848 | 如
 849 | 如上
 850 | 如上所述
 851 | 如下
 852 | 如今
 853 | 如何
 854 | 如其
 855 | 如前所述
 856 | 如同
 857 | 如常
 858 | 如是
 859 | 如期
 860 | 如果
 861 | 如次
 862 | 如此
 863 | 如此等等
 864 | 如若
 865 | 始而
 866 | 姑且
 867 | 存在
 868 | 存心
 869 | 孰料
 870 | 孰知
 871 | 宁
 872 | 宁可
 873 | 宁愿
 874 | 宁肯
 875 | 它
 876 | 它们
 877 | 它们的
 878 | 它是
 879 | 它的
 880 | 安全
 881 | 完全
 882 | 完成
 883 | 定
 884 | 实现
 885 | 实际
 886 | 宣布
 887 | 容易
 888 | 密切
 889 | 对
 890 | 对于
 891 | 对应
 892 | 对待
 893 | 对方
 894 | 对比
 895 | 将
 896 | 将才
 897 | 将要
 898 | 将近
 899 | 小
 900 | 少数
 901 | 尔
 902 | 尔后
 903 | 尔尔
 904 | 尔等
 905 | 尚且
 906 | 尤其
 907 | 就
 908 | 就地
 909 | 就是
 910 | 就是了
 911 | 就是说
 912 | 就此
 913 | 就算
 914 | 就要
 915 | 尽
 916 | 尽可能
 917 | 尽如人意
 918 | 尽心尽力
 919 | 尽心竭力
 920 | 尽快
 921 | 尽早
 922 | 尽然
 923 | 尽管
 924 | 尽管如此
 925 | 尽量
 926 | 局外
 927 | 居然
 928 | 届时
 929 | 属于
 930 | 屡
 931 | 屡屡
 932 | 屡次
 933 | 屡次三番
 934 | 岂
 935 | 岂但
 936 | 岂止
 937 | 岂非
 938 | 川流不息
 939 | 左右
 940 | 巨大
 941 | 巩固
 942 | 差一点
 943 | 差不多
 944 | 己
 945 | 已
 946 | 已矣
 947 | 已经
 948 | 巴
 949 | 巴巴
 950 | 带
 951 | 帮助
 952 | 常
 953 | 常常
 954 | 常言说
 955 | 常言说得好
 956 | 常言道
 957 | 平素
 958 | 年复一年
 959 | 并
 960 | 并不
 961 | 并不是
 962 | 并且
 963 | 并排
 964 | 并无
 965 | 并没
 966 | 并没有
 967 | 并肩
 968 | 并非
 969 | 广大
 970 | 广泛
 971 | 应当
 972 | 应用
 973 | 应该
 974 | 庶乎
 975 | 庶几
 976 | 开外
 977 | 开始
 978 | 开展
 979 | 引起
 980 | 弗
 981 | 弹指之间
 982 | 强烈
 983 | 强调
 984 | 归
 985 | 归根到底
 986 | 归根结底
 987 | 归齐
 988 | 当
 989 | 当下
 990 | 当中
 991 | 当儿
 992 | 当前
 993 | 当即
 994 | 当口儿
 995 | 当地
 996 | 当场
 997 | 当头
 998 | 当庭
 999 | 当时
1000 | 当然
1001 | 当真
1002 | 当着
1003 | 形成
1004 | 彻夜
1005 | 彻底
1006 | 彼
1007 | 彼时
1008 | 彼此
1009 | 往
1010 | 往往
1011 | 待
1012 | 待到
1013 | 很
1014 | 很多
1015 | 很少
1016 | 後来
1017 | 後面
1018 | 得
1019 | 得了
1020 | 得出
1021 | 得到
1022 | 得天独厚
1023 | 得起
1024 | 心里
1025 | 必
1026 | 必定
1027 | 必将
1028 | 必然
1029 | 必要
1030 | 必须
1031 | 快
1032 | 快要
1033 | 忽地
1034 | 忽然
1035 | 怎
1036 | 怎么
1037 | 怎么办
1038 | 怎么样
1039 | 怎奈
1040 | 怎样
1041 | 怎麽
1042 | 怕
1043 | 急匆匆
1044 | 怪
1045 | 怪不得
1046 | 总之
1047 | 总是
1048 | 总的来看
1049 | 总的来说
1050 | 总的说来
1051 | 总结
1052 | 总而言之
1053 | 恍然
1054 | 恐怕
1055 | 恰似
1056 | 恰好
1057 | 恰如
1058 | 恰巧
1059 | 恰恰
1060 | 恰恰相反
1061 | 恰逢
1062 | 您
1063 | 您们
1064 | 您是
1065 | 惟其
1066 | 惯常
1067 | 意思
1068 | 愤然
1069 | 愿意
1070 | 慢说
1071 | 成为
1072 | 成年
1073 | 成年累月
1074 | 成心
1075 | 我
1076 | 我们
1077 | 我是
1078 | 我的
1079 | 或
1080 | 或则
1081 | 或多或少
1082 | 或是
1083 | 或曰
1084 | 或者
1085 | 或许
1086 | 战斗
1087 | 截然
1088 | 截至
1089 | 所
1090 | 所以
1091 | 所在
1092 | 所幸
1093 | 所有
1094 | 所谓
1095 | 才
1096 | 才能
1097 | 扑通
1098 | 打
1099 | 打从
1100 | 打开天窗说亮话
1101 | 扩大
1102 | 把
1103 | 抑或
1104 | 抽冷子
1105 | 拦腰
1106 | 拿
1107 | 按
1108 | 按时
1109 | 按期
1110 | 按照
1111 | 按理
1112 | 按说
1113 | 挨个
1114 | 挨家挨户
1115 | 挨次
1116 | 挨着
1117 | 挨门挨户
1118 | 挨门逐户
1119 | 换句话说
1120 | 换言之
1121 | 据
1122 | 据实
1123 | 据悉
1124 | 据我所知
1125 | 据此
1126 | 据称
1127 | 据说
1128 | 掌握
1129 | 接下来
1130 | 接着
1131 | 接著
1132 | 接连不断
1133 | 放量
1134 | 故
1135 | 故意
1136 | 故此
1137 | 故而
1138 | 敞开儿
1139 | 敢
1140 | 敢于
1141 | 敢情
1142 | 数/
1143 | 整个
1144 | 断然
1145 | 方
1146 | 方便
1147 | 方才
1148 | 方能
1149 | 方面
1150 | 旁人
1151 | 无
1152 | 无宁
1153 | 无法
1154 | 无论
1155 | 既
1156 | 既...又
1157 | 既往
1158 | 既是
1159 | 既然
1160 | 日复一日
1161 | 日渐
1162 | 日益
1163 | 日臻
1164 | 日见
1165 | 时候
1166 | 昂然
1167 | 明显
1168 | 明确
1169 | 是
1170 | 是不是
1171 | 是以
1172 | 是否
1173 | 是的
1174 | 显然
1175 | 显著
1176 | 普通
1177 | 普遍
1178 | 暗中
1179 | 暗地里
1180 | 暗自
1181 | 更
1182 | 更为
1183 | 更加
1184 | 更进一步
1185 | 曾
1186 | 曾经
1187 | 替
1188 | 替代
1189 | 最
1190 | 最后
1191 | 最大
1192 | 最好
1193 | 最後
1194 | 最近
1195 | 最高
1196 | 有
1197 | 有些
1198 | 有关
1199 | 有利
1200 | 有力
1201 | 有及
1202 | 有所
1203 | 有效
1204 | 有时
1205 | 有点
1206 | 有的
1207 | 有的是
1208 | 有着
1209 | 有著
1210 | 望
1211 | 朝
1212 | 朝着
1213 | 末##末
1214 | 本
1215 | 本人
1216 | 本地
1217 | 本着
1218 | 本身
1219 | 权时
1220 | 来
1221 | 来不及
1222 | 来得及
1223 | 来看
1224 | 来着
1225 | 来自
1226 | 来讲
1227 | 来说
1228 | 极
1229 | 极为
1230 | 极了
1231 | 极其
1232 | 极力
1233 | 极大
1234 | 极度
1235 | 极端
1236 | 构成
1237 | 果然
1238 | 果真
1239 | 某
1240 | 某个
1241 | 某些
1242 | 某某
1243 | 根据
1244 | 根本
1245 | 格外
1246 | 梆
1247 | 概
1248 | 次第
1249 | 欢迎
1250 | 欤
1251 | 正值
1252 | 正在
1253 | 正如
1254 | 正巧
1255 | 正常
1256 | 正是
1257 | 此
1258 | 此中
1259 | 此后
1260 | 此地
1261 | 此处
1262 | 此外
1263 | 此时
1264 | 此次
1265 | 此间
1266 | 殆
1267 | 毋宁
1268 | 每
1269 | 每个
1270 | 每天
1271 | 每年
1272 | 每当
1273 | 每时每刻
1274 | 每每
1275 | 每逢
1276 | 比
1277 | 比及
1278 | 比如
1279 | 比如说
1280 | 比方
1281 | 比照
1282 | 比起
1283 | 比较
1284 | 毕竟
1285 | 毫不
1286 | 毫无
1287 | 毫无例外
1288 | 毫无保留地
1289 | 汝
1290 | 沙沙
1291 | 没
1292 | 没奈何
1293 | 没有
1294 | 沿
1295 | 沿着
1296 | 注意
1297 | 活
1298 | 深入
1299 | 清楚
1300 | 满
1301 | 满足
1302 | 漫说
1303 | 焉
1304 | 然
1305 | 然则
1306 | 然后
1307 | 然後
1308 | 然而
1309 | 照
1310 | 照着
1311 | 牢牢
1312 | 特别是
1313 | 特殊
1314 | 特点
1315 | 犹且
1316 | 犹自
1317 | 独
1318 | 独自
1319 | 猛然
1320 | 猛然间
1321 | 率尔
1322 | 率然
1323 | 现代
1324 | 现在
1325 | 理应
1326 | 理当
1327 | 理该
1328 | 瑟瑟
1329 | 甚且
1330 | 甚么
1331 | 甚或
1332 | 甚而
1333 | 甚至
1334 | 甚至于
1335 | 用
1336 | 用来
1337 | 甫
1338 | 甭
1339 | 由
1340 | 由于
1341 | 由是
1342 | 由此
1343 | 由此可见
1344 | 略
1345 | 略为
1346 | 略加
1347 | 略微
1348 | 白
1349 | 白白
1350 | 的
1351 | 的确
1352 | 的话
1353 | 皆可
1354 | 目前
1355 | 直到
1356 | 直接
1357 | 相似
1358 | 相信
1359 | 相反
1360 | 相同
1361 | 相对
1362 | 相对而言
1363 | 相应
1364 | 相当
1365 | 相等
1366 | 省得
1367 | 看
1368 | 看上去
1369 | 看出
1370 | 看到
1371 | 看来
1372 | 看样子
1373 | 看看
1374 | 看见
1375 | 看起来
1376 | 真是
1377 | 真正
1378 | 眨眼
1379 | 着
1380 | 着呢
1381 | 矣
1382 | 矣乎
1383 | 矣哉
1384 | 知道
1385 | 砰
1386 | 确定
1387 | 碰巧
1388 | 社会主义
1389 | 离
1390 | 种
1391 | 积极
1392 | 移动
1393 | 究竟
1394 | 穷年累月
1395 | 突出
1396 | 突然
1397 | 窃
1398 | 立
1399 | 立刻
1400 | 立即
1401 | 立地
1402 | 立时
1403 | 立马
1404 | 竟
1405 | 竟然
1406 | 竟而
1407 | 第
1408 | 第二
1409 | 等
1410 | 等到
1411 | 等等
1412 | 策略地
1413 | 简直
1414 | 简而言之
1415 | 简言之
1416 | 管
1417 | 类如
1418 | 粗
1419 | 精光
1420 | 紧接着
1421 | 累年
1422 | 累次
1423 | 纯
1424 | 纯粹
1425 | 纵
1426 | 纵令
1427 | 纵使
1428 | 纵然
1429 | 练习
1430 | 组成
1431 | 经
1432 | 经常
1433 | 经过
1434 | 结合
1435 | 结果
1436 | 给
1437 | 绝
1438 | 绝不
1439 | 绝对
1440 | 绝非
1441 | 绝顶
1442 | 继之
1443 | 继后
1444 | 继续
1445 | 继而
1446 | 维持
1447 | 综上所述
1448 | 缕缕
1449 | 罢了
1450 | 老
1451 | 老大
1452 | 老是
1453 | 老老实实
1454 | 考虑
1455 | 者
1456 | 而
1457 | 而且
1458 | 而况
1459 | 而又
1460 | 而后
1461 | 而外
1462 | 而已
1463 | 而是
1464 | 而言
1465 | 而论
1466 | 联系
1467 | 联袂
1468 | 背地里
1469 | 背靠背
1470 | 能
1471 | 能否
1472 | 能够
1473 | 腾
1474 | 自
1475 | 自个儿
1476 | 自从
1477 | 自各儿
1478 | 自后
1479 | 自家
1480 | 自己
1481 | 自打
1482 | 自身
1483 | 臭
1484 | 至
1485 | 至于
1486 | 至今
1487 | 至若
1488 | 致
1489 | 般的
1490 | 良好
1491 | 若
1492 | 若夫
1493 | 若是
1494 | 若果
1495 | 若非
1496 | 范围
1497 | 莫
1498 | 莫不
1499 | 莫不然
1500 | 莫如
1501 | 莫若
1502 | 莫非
1503 | 获得
1504 | 藉以
1505 | 虽
1506 | 虽则
1507 | 虽然
1508 | 虽说
1509 | 蛮
1510 | 行为
1511 | 行动
1512 | 表明
1513 | 表示
1514 | 被
1515 | 要
1516 | 要不
1517 | 要不是
1518 | 要不然
1519 | 要么
1520 | 要是
1521 | 要求
1522 | 见
1523 | 规定
1524 | 觉得
1525 | 譬喻
1526 | 譬如
1527 | 认为
1528 | 认真
1529 | 认识
1530 | 让
1531 | 许多
1532 | 论
1533 | 论说
1534 | 设使
1535 | 设或
1536 | 设若
1537 | 诚如
1538 | 诚然
1539 | 话说
1540 | 该
1541 | 该当
1542 | 说明
1543 | 说来
1544 | 说说
1545 | 请勿
1546 | 诸
1547 | 诸位
1548 | 诸如
1549 | 谁
1550 | 谁人
1551 | 谁料
1552 | 谁知
1553 | 谨
1554 | 豁然
1555 | 贼死
1556 | 赖以
1557 | 赶
1558 | 赶快
1559 | 赶早不赶晚
1560 | 起
1561 | 起先
1562 | 起初
1563 | 起头
1564 | 起来
1565 | 起见
1566 | 起首
1567 | 趁
1568 | 趁便
1569 | 趁势
1570 | 趁早
1571 | 趁机
1572 | 趁热
1573 | 趁着
1574 | 越是
1575 | 距
1576 | 跟
1577 | 路经
1578 | 转动
1579 | 转变
1580 | 转贴
1581 | 轰然
1582 | 较
1583 | 较为
1584 | 较之
1585 | 较比
1586 | 边
1587 | 达到
1588 | 达旦
1589 | 迄
1590 | 迅速
1591 | 过
1592 | 过于
1593 | 过去
1594 | 过来
1595 | 运用
1596 | 近
1597 | 近几年来
1598 | 近年来
1599 | 近来
1600 | 还
1601 | 还是
1602 | 还有
1603 | 还要
1604 | 这
1605 | 这一来
1606 | 这个
1607 | 这么
1608 | 这么些
1609 | 这么样
1610 | 这么点儿
1611 | 这些
1612 | 这会儿
1613 | 这儿
1614 | 这就是说
1615 | 这时
1616 | 这样
1617 | 这次
1618 | 这点
1619 | 这种
1620 | 这般
1621 | 这边
1622 | 这里
1623 | 这麽
1624 | 进入
1625 | 进去
1626 | 进来
1627 | 进步
1628 | 进而
1629 | 进行
1630 | 连
1631 | 连同
1632 | 连声
1633 | 连日
1634 | 连日来
1635 | 连袂
1636 | 连连
1637 | 迟早
1638 | 迫于
1639 | 适应
1640 | 适当
1641 | 适用
1642 | 逐步
1643 | 逐渐
1644 | 通常
1645 | 通过
1646 | 造成
1647 | 逢
1648 | 遇到
1649 | 遭到
1650 | 遵循
1651 | 遵照
1652 | 避免
1653 | 那
1654 | 那个
1655 | 那么
1656 | 那么些
1657 | 那么样
1658 | 那些
1659 | 那会儿
1660 | 那儿
1661 | 那时
1662 | 那末
1663 | 那样
1664 | 那般
1665 | 那边
1666 | 那里
1667 | 那麽
1668 | 部分
1669 | 都
1670 | 鄙人
1671 | 采取
1672 | 里面
1673 | 重大
1674 | 重新
1675 | 重要
1676 | 鉴于
1677 | 针对
1678 | 长期以来
1679 | 长此下去
1680 | 长线
1681 | 长话短说
1682 | 问题
1683 | 间或
1684 | 防止
1685 | 阿
1686 | 附近
1687 | 陈年
1688 | 限制
1689 | 陡然
1690 | 除
1691 | 除了
1692 | 除却
1693 | 除去
1694 | 除外
1695 | 除开
1696 | 除此
1697 | 除此之外
1698 | 除此以外
1699 | 除此而外
1700 | 除非
1701 | 随
1702 | 随后
1703 | 随时
1704 | 随着
1705 | 随著
1706 | 隔夜
1707 | 隔日
1708 | 难得
1709 | 难怪
1710 | 难说
1711 | 难道
1712 | 难道说
1713 | 集中
1714 | 零
1715 | 需要
1716 | 非但
1717 | 非常
1718 | 非徒
1719 | 非得
1720 | 非特
1721 | 非独
1722 | 靠
1723 | 顶多
1724 | 顷
1725 | 顷刻
1726 | 顷刻之间
1727 | 顷刻间
1728 | 顺
1729 | 顺着
1730 | 顿时
1731 | 颇
1732 | 风雨无阻
1733 | 饱
1734 | 首先
1735 | 马上
1736 | 高低
1737 | 高兴
1738 | 默然
1739 | 默默地
1740 | 齐
1741 | ︿
1742 | ！
1743 | ＃
1744 | ＄
1745 | ％
1746 | ＆
1747 | ＇
1748 | （
1749 | ）
1750 | ）÷（１－
1751 | ）、
1752 | ＊
1753 | ＋
1754 | ＋ξ
1755 | ＋＋
1756 | ，
1757 | ，也
1758 | －
1759 | －β
1760 | －－
1761 | －［＊］－
1762 | ．
1763 | ／
1764 | ０
1765 | ０：２
1766 | １
1767 | １．
1768 | １２％
1769 | ２
1770 | ２．３％
1771 | ３
1772 | ４
1773 | ５
1774 | ５：０
1775 | ６
1776 | ７
1777 | ８
1778 | ９
1779 | ：
1780 | ；
1781 | ＜
1782 | ＜±
1783 | ＜Δ
1784 | ＜λ
1785 | ＜φ
1786 | ＜＜
1787 | ＝
1788 | ＝″
1789 | ＝☆
1790 | ＝（
1791 | ＝－
1792 | ＝［
1793 | ＝｛
1794 | ＞
1795 | ＞λ
1796 | ？
1797 | ＠
1798 | Ａ
1799 | ＬＩ
1800 | Ｒ．Ｌ．
1801 | ＺＸＦＩＴＬ
1802 | ［
1803 | ［①①］
1804 | ［①②］
1805 | ［①③］
1806 | ［①④］
1807 | ［①⑤］
1808 | ［①⑥］
1809 | ［①⑦］
1810 | ［①⑧］
1811 | ［①⑨］
1812 | ［①Ａ］
1813 | ［①Ｂ］
1814 | ［①Ｃ］
1815 | ［①Ｄ］
1816 | ［①Ｅ］
1817 | ［①］
1818 | ［①ａ］
1819 | ［①ｃ］
1820 | ［①ｄ］
1821 | ［①ｅ］
1822 | ［①ｆ］
1823 | ［①ｇ］
1824 | ［①ｈ］
1825 | ［①ｉ］
1826 | ［①ｏ］
1827 | ［②
1828 | ［②①］
1829 | ［②②］
1830 | ［②③］
1831 | ［②④
1832 | ［②⑤］
1833 | ［②⑥］
1834 | ［②⑦］
1835 | ［②⑧］
1836 | ［②⑩］
1837 | ［②Ｂ］
1838 | ［②Ｇ］
1839 | ［②］
1840 | ［②ａ］
1841 | ［②ｂ］
1842 | ［②ｃ］
1843 | ［②ｄ］
1844 | ［②ｅ］
1845 | ［②ｆ］
1846 | ［②ｇ］
1847 | ［②ｈ］
1848 | ［②ｉ］
1849 | ［②ｊ］
1850 | ［③①］
1851 | ［③⑩］
1852 | ［③Ｆ］
1853 | ［③］
1854 | ［③ａ］
1855 | ［③ｂ］
1856 | ［③ｃ］
1857 | ［③ｄ］
1858 | ［③ｅ］
1859 | ［③ｇ］
1860 | ［③ｈ］
1861 | ［④］
1862 | ［④ａ］
1863 | ［④ｂ］
1864 | ［④ｃ］
1865 | ［④ｄ］
1866 | ［④ｅ］
1867 | ［⑤］
1868 | ［⑤］］
1869 | ［⑤ａ］
1870 | ［⑤ｂ］
1871 | ［⑤ｄ］
1872 | ［⑤ｅ］
1873 | ［⑤ｆ］
1874 | ［⑥］
1875 | ［⑦］
1876 | ［⑧］
1877 | ［⑨］
1878 | ［⑩］
1879 | ［＊］
1880 | ［－
1881 | ［］
1882 | ］
1883 | ］∧′＝［
1884 | ］［
1885 | ＿
1886 | ａ］
1887 | ｂ］
1888 | ｃ］
1889 | ｅ］
1890 | ｆ］
1891 | ｎｇ昉
1892 | ｛
1893 | ｛－
1894 | ｜
1895 | ｝
1896 | ｝＞
1897 | ～
1898 | ～±
1899 | ～＋
1900 | ￥
1901 | 玩
1902 | 
1903 | 


--------------------------------------------------------------------------------
/site/mybzz/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/__init__.py


--------------------------------------------------------------------------------
/site/mybzz/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/__pycache__/__init__.cpython-34.pyc


--------------------------------------------------------------------------------
/site/mybzz/crawler/360Crawler.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from site.mybzz.util import DbUtil
 4 | from site.mybzz.util import BsUtil
 5 | from site.mybzz.util import DateUtil
 6 | 
 7 | conn, cur = DbUtil.getConn()
 8 | 
 9 | 
10 | def getData(name, id, score, totalDownload):
11 |     commentUrl = "http://comment.mobilem.360.cn/comment/getComments?baike=%s&start=%s&count=%s"
12 |     start, count = 0, 50
13 |     result = BsUtil.praseJson(commentUrl % (id, start, 1))
14 |     totalComCount = result['data']['total']
15 | 
16 |     print('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
17 |           'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (
18 |               name, '360', totalComCount, (score * 10) / 2,
19 |               totalDownload, DateUtil.currentDate()))
20 |     cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
21 |                 'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (
22 |                     name, '360', totalComCount, (score * 10) / 2,
23 |                     totalDownload, DateUtil.currentDate()))
24 |     game_id = cur.lastrowid
25 | 
26 |     while (True):
27 |         try :
28 |             result = BsUtil.praseJson(commentUrl % (id, start, count))
29 |         except:
30 |             print(commentUrl % (id, start, count))
31 |         if not result['data']['messages']:
32 |             break
33 |         for comment in result['data']['messages']:
34 |             # print(comment['username'], comment['content'], comment['score'], comment['create_time'])
35 |             print('INSERT INTO comment(game_id, content, comment_time, author, score) '
36 |                   'VALUES ("%s", "%s", "%s", "%s", %s);' % (
37 |                       game_id, comment['content'].replace('\n', ''), comment['create_time'],
38 |                       comment['username'], comment['score']))
39 |             try:
40 |                 cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
41 |                         'VALUES ("%s", "%s", "%s", "%s", %s);' % (
42 |                             game_id, comment['content'].replace('\n', '').replace('\"','\''), comment['create_time'],
43 |                             comment['username'], comment['score']))
44 |             except:
45 |                 print(sys.exc_info()[0], ":", sys.exc_info()[1])
46 |                 pass
47 |         start += 50
48 | 
49 | 
50 | def getTop50():
51 |     result = BsUtil.praseJson("http://openbox.mobilem.360.cn/app/rank?from=game&type=download&startCount=1")
52 |     # print(result)
53 |     for app in result['data'][4:]:
54 |         print(app['name'], app['id'], app['rating'], app['download_times'])
55 |         getData(app['name'], app['id'], float(app['rating']), app['download_times'])
56 |         conn.commit()
57 | 
58 | if __name__ == '__main__':
59 |     getTop50()
60 |     DbUtil.close(conn, cur)
61 | 


--------------------------------------------------------------------------------
/site/mybzz/crawler/AppleCrawler.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from urllib import request
  3 | import zlib
  4 | from bs4 import BeautifulSoup
  5 | import re
  6 | from site.mybzz.util import DbUtil
  7 | from site.mybzz.util import BsUtil
  8 | from site.mybzz.util import DateUtil
  9 | 
 10 | conn, cur = DbUtil.getConn()
 11 | 
 12 | 
 13 | def getComment(url, game_id):
 14 |     req = request.Request(url)
 15 |     req.add_header("User-Agent",
 16 |                    "iTunes/11.0 (Windows; Microsoft Windows 7 Business Edition Service Pack 1 "
 17 |                    "(Build 7601)) AppleWebKit/536.27.1")
 18 |     result = request.urlopen(req, timeout=30)
 19 |     json_result = json.loads(result.read().decode())
 20 |     for comment in json_result['userReviewList']:
 21 |         try:
 22 |             print('INSERT INTO comment(game_id, content, comment_time, author, score) '
 23 |                   'VALUES ("%s", "%s", "%s", "%s", %s);' % (
 24 |                       game_id, comment['body'],
 25 |                       comment['date'].replace("T", " ").replace("Z", ""),
 26 |                       comment['name'], comment['rating'] * 10))
 27 |             cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
 28 |                         'VALUES ("%s", "%s", "%s", "%s", %s);' % (
 29 |                             game_id, comment['body'],
 30 |                             comment['date'].replace("T", " ").replace("Z", ""),
 31 |                             comment['name'], comment['rating'] * 10))
 32 |         except:
 33 |             pass
 34 | 
 35 | 
 36 | def getData(id, totalComCount, game_id):
 37 |     start = 0
 38 |     while (totalComCount > 0):
 39 |         try:
 40 |             if totalComCount > 500:
 41 |                 url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/userReviewsRow?cc=cn&id=%s&displayable-kind=11&" \
 42 |                       "startIndex=%s&endIndex=%s&sort=4&appVersion=all" % (
 43 |                           id, start, (start + 500))
 44 |             else:
 45 |                 url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/userReviewsRow?cc=cn&id=%s&displayable-kind=11" \
 46 |                       "&startIndex=%s&endIndex=%s&sort=4&appVersion=all" % (
 47 |                           id, start, (start + totalComCount))
 48 | 
 49 |             print(url)
 50 |             totalComCount = totalComCount - 500
 51 |             start = start + 500
 52 |             getComment(url, game_id)
 53 |             conn.commit()
 54 | 
 55 |         except:
 56 |             print("comment error")
 57 |             getComment(url, game_id)
 58 |             pass
 59 | 
 60 | 
 61 | def getTop(all):
 62 |     url = "https://itunes.apple.com/cn/rss/topgrossingipadapplications/limit=50/json"
 63 | 
 64 |     result = BsUtil.praseJson(url)
 65 |     for app in result['feed']['entry']:
 66 |         if app['category']['attributes']['term'] == 'Games':
 67 |             try:
 68 |                 detail = all['storePlatformData']['lockup-room']['results'][app['id']['attributes']['im:id']]
 69 | 
 70 |                 print(
 71 |                     'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
 72 |                     'VALUES ("%s", "%s", "%s", %s, "%s", "%s");' % (
 73 |                         app['im:name']['label'], 'Apple Store', detail['userRating']['ratingCount'],
 74 |                         (detail['userRating']['value']),
 75 |                         0, DateUtil.currentDate()))
 76 |                 # cur.execute(
 77 |                 #     'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
 78 |                 #     'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (app['im:name']['label'], 'Apple Store',
 79 |                 #                                                     detail['userRating']['ratingCount'],
 80 |                 #                                                     int(detail['userRating']['value']) * 10,
 81 |                 #                                                     0, DateUtil.currentDate()))
 82 |                 # getData(app['id']['attributes']['im:id'], detail['userRating']['ratingCount'], cur.lastrowid)
 83 |             except:
 84 |                 # print(app['id']['attributes']['im:id'], '............')
 85 |                 # print(sys.exc_info()[0], ":", sys.exc_info()[1])
 86 |                 pass
 87 | 
 88 | 
 89 | def getAllDetail():
 90 |     url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/viewTop?id=29099&popId=38&genreId=36"
 91 |     req = request.Request(url)
 92 |     req.add_header("User-Agent",
 93 |                    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36")
 94 |     req.add_header("Host", "itunes.apple.com")
 95 |     req.add_header("Connection", "keep-alive")
 96 |     req.add_header("Cache-Control", "no-cache")
 97 |     req.add_header("X-Apple-Store-Front", "143465-19,32 ab:pNOGxia1")
 98 |     req.add_header("Accept-Language", "zh-cn, zh;q=0.75, en-us;q=0.50, en;q=0.25")
 99 |     req.add_header("X-Apple-I-MD-M", "sKfpwVaN+aYhvpzdR1eEp5E1nN7xuK5Q6eEl2fcooczbWhwrTp3PTfm5AwiMZi0hucRNdGaFRU3RX+Yx")
100 |     req.add_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
101 |     req.add_header("X-Apple-I-MD-RINFO", "17106176")
102 |     req.add_header("X-Apple-Tz", "28800")
103 |     req.add_header("If-Modified-Since", "Fri, 03 Jun 2016 11:23:38 GMT")
104 |     req.add_header("X-Apple-I-MD", "AAAABQAAABCRcn9HLXhoSaRR0WXm+1cmAAAAAg==")
105 |     req.add_header("Accept-Encoding", "gzip, deflate, sdch")
106 | 
107 |     resp = request.urlopen(req)
108 | 
109 |     data = zlib.decompress(resp.read(), 16 + zlib.MAX_WBITS).decode()
110 | 
111 |     soup = BeautifulSoup(data, 'html.parser')
112 |     data = soup.find(text=re.compile('its.serverData')).replace('its.serverData=', '')
113 | 
114 |     all = json.loads(data)
115 | 
116 |     return all
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     all = getAllDetail()
121 |     getTop(all)
122 |     DbUtil.close(conn, cur)
123 | 


--------------------------------------------------------------------------------
/site/mybzz/crawler/BaiDuCrawler.py:
--------------------------------------------------------------------------------
 1 | from site.mybzz.util import DbUtil
 2 | from site.mybzz.util import BsUtil
 3 | from site.mybzz.util import DateUtil
 4 | 
 5 | conn, cur = DbUtil.getConn()
 6 | 
 7 | 
 8 | def getData(groupId, game_id):
 9 |     try:
10 |         url = "http://m.baidu.com/appsrv?action=getcommentlist&native_api=1&groupid=%s&start=0&count=1" % (
11 |             groupId)
12 |         result = BsUtil.praseGzipJson(url)
13 |         totalComCount = result['total_count']
14 |         print("总评论数：", totalComCount)
15 |         url = "http://m.baidu.com/appsrv?action=getcommentlist&native_api=1&groupid=%s&start=0&count=%s" % (
16 |             groupId, totalComCount)
17 |         result = BsUtil.praseGzipJson(url)
18 | 
19 |         for comment in result['data']:
20 |             print('INSERT INTO comment(game_id, content, comment_time, author, score) '
21 |                   'VALUES ("%s", "%s", "%s", "%s", %s);' % (
22 |                       game_id, comment['content'].replace('\n', ''),
23 |                       DateUtil.longToStrTime(int(comment['create_time'])),
24 |                       comment['user_name'], comment['score']))
25 |             # cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
26 |             #             'VALUES ("%s", "%s", "%s", "%s", %s);' % (
27 |             #                 game_id, comment['content'].replace('\n', ''),
28 |             #                 DateUtil.longToStrTime(int(comment['create_time'])),
29 |             #                 comment['user_name'], comment['score']))
30 |     except:
31 |         pass
32 | 
33 | 
34 | def getTop15():
35 |     json_result = BsUtil.praseGzipJson(
36 |         'http://m.baidu.com/appsrv?action=ranklist&native_api=1&pu=ctv%401%2Ccfrom%401000561u%2Ccua%40gu2ki4uq-'
37 |         'igBNE6lI5me6NNy2I_UCvhlSdNqA%2Ccuid%400u-Yu0PYH8jVavuO_a-YagiSS8lvuvu9_a2L80ufvi6kuviJlavefYamv8_6uvtz'
38 |         '_a2etxNNB%2Ccut%40rIviC_C0vC_7uLP7NJGCjxNIB%2Cosname%40baiduappsearch&name=game')
39 | 
40 |     for app in json_result['result']['data']:
41 |         appInfo = app['itemdata']
42 |         print('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
43 |               'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' % (
44 |                   appInfo['sname'], 'baidu', appInfo['commentsnum'][:-2], 10 * round(int(appInfo['score']) / 20, 1),
45 |                   appInfo['display_download'], DateUtil.currentDate()))
46 |         # cur.execute(
47 |         #     'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
48 |         #     'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' % (
49 |         #         appInfo['sname'], 'baidu', appInfo['commentsnum'][:-2], 10 * round(int(appInfo['score']) / 20, 1),
50 |         #         appInfo['display_download'], DateUtil.currentDate()))
51 |         # game_id = cur.lastrowid
52 |         getData(appInfo['groupid'], 0)
53 | 
54 |         detailUrl = "http://m.baidu.com/appsrv?action=detail&native_api=1&docid=%s" % appInfo['docid']
55 |         detail = BsUtil.praseGzipJson(detailUrl)
56 | 
57 |         for version in detail['result']['data']['app_moreversion']:
58 |             getData(version['content'][0]['groupid'], 0)
59 |         print('------------------------------------------------------')
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     getTop15()
64 |     conn.commit()
65 |     DbUtil.close(conn, cur)
66 | 


--------------------------------------------------------------------------------
/site/mybzz/crawler/MZCrawler.py:
--------------------------------------------------------------------------------
 1 | import html
 2 | from site.mybzz.util import DbUtil
 3 | from site.mybzz.util import BsUtil
 4 | from site.mybzz.util import DateUtil
 5 | 
 6 | 
 7 | def getData(id,package_name):
 8 | 
 9 |     total = BsUtil.praseJson('http://app.flyme.cn/apps/public/evaluate/list?app_id=%s&start=0&max=1' % id)
10 |     conn,cur = DbUtil.getConn()
11 | 
12 |     totalComCount = total['value']['totalCount']
13 |     # 获取总下载量和评分
14 |     soup = BsUtil.praseHtml('http://app.flyme.cn/games/public/detail?package_name=%s' % package_name)
15 | 
16 |     totalScore = soup.find('div', class_="star_bg").attrs['data-num']
17 |     totalDownload = soup.find(text="下      载：").parent.next_sibling.next_sibling.string
18 |     #获取游戏名
19 |     for child in soup.find('div', class_="detail_top").children:
20 |         if (child.name == 'h3'):
21 |             game_name = child.string
22 | 
23 |     cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
24 |           'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' %(game_name,'meizu',totalComCount,
25 |             totalScore,totalDownload,DateUtil.currentDate()))
26 |     game_id = cur.lastrowid
27 |     #获取所有评论内容
28 |     value = BsUtil.praseJson('http://app.flyme.cn/apps/public/evaluate/list?app_id=%s&start=0&max=%s'% (id,totalComCount))
29 | 
30 |     for com in value['value']['list']:
31 |         comment = html.unescape(com['comment']).replace("\"","'")
32 |         time = com['create_time']
33 |         author = html.unescape(com['user_name']).replace("\"","'")
34 |         score = com['star']
35 | 
36 |         try:
37 |             cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
38 |                     'VALUES ("%s", "%s", "%s", "%s", %d);' % (game_id,comment,time,author,score))
39 |         except:
40 |             pass
41 | 
42 |     conn.commit()
43 |     DbUtil.close(conn,cur)
44 | 
45 | 
46 | def getTop50():
47 |     result = BsUtil.praseJson('http://api-game.meizu.com/games/public/top/layout?start=0&max=50')
48 |     for game in result['value']['blocks'][0]['data'][3:4]:
49 |         print('游戏名：%s,id：%s,包名：%s' % (game['name'],game['id'],game['package_name']))
50 |         try:
51 |             getData(game['id'],game['package_name'])
52 |         except:
53 |             pass
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     getTop50()


--------------------------------------------------------------------------------
/site/mybzz/crawler/TencentCrawler.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from site.mybzz.util import DbUtil
 4 | from site.mybzz.util import BsUtil
 5 | from site.mybzz.util import DateUtil
 6 | 
 7 | conn, cur = DbUtil.getConn()
 8 | 
 9 | 
10 | def getData(name, downloadCount, score, packageName):
11 |     contextData = ''
12 |     url = "http://sj.qq.com/myapp/app/comment.htm?apkName=%s&contextData=%s"
13 | 
14 |     totalComCount = 0
15 |     while totalComCount == 0:
16 |         try:
17 |             result = BsUtil.praseJson(url % (packageName, contextData))
18 |             totalComCount = result['obj']['total']
19 |         except:
20 |             pass
21 | 
22 |     print(
23 |         'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
24 |         'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (
25 |             name, 'qq', totalComCount, score, downloadCount, DateUtil.currentDate()))
26 |     cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
27 |                 'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (
28 |                     name, 'qq', totalComCount, score, downloadCount, DateUtil.currentDate()))
29 |     game_id = cur.lastrowid
30 |     while (True):
31 |         try:
32 |             result = BsUtil.praseJson(url % (packageName, contextData))
33 |             if not result['success']:
34 |                 continue
35 |             if result['obj']['hasNext'] != 1:
36 |                 break
37 | 
38 |             contextData = result['obj']['contextData']
39 | 
40 |             for comment in result['obj']['commentDetails']:
41 |                 print('INSERT INTO comment(game_id, content, comment_time, author, score) '
42 |                       'VALUES ("%s", "%s", "%s", "%s", %d);' % (
43 |                           game_id, comment['content'].replace('\r', '').replace(' ', ''),
44 |                           DateUtil.longToStrTime(int(comment['createdTime'])),
45 |                           comment['nickName'], int(comment['score']) * 10))
46 |                 cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
47 |                             'VALUES ("%s", "%s", "%s", "%s", %s);' % (
48 |                                 game_id, comment['content'].replace('\r', '').replace(" ", ""),
49 |                                 DateUtil.longToStrTime(int(comment['createdTime'])),
50 |                                 comment['nickName'], int(comment['score']) * 10))
51 |         except:
52 |             conn.commit()
53 |             print(result)
54 |             print(sys.exc_info()[0], ":", sys.exc_info()[1])
55 | 
56 | 
57 | def getTop():
58 |     url = "http://pngweb.3g.qq.com/getSubRankList?sortType=22&categoryId=-2&pageSize=20&startIndex=0&needCateList=0&phoneGuid=891204461307686912&phoneImei=862095025228963&qua=TMAF_652_F_2152%2F062152%26NA%2F062152%2F6522130_2152%265.1_22_2_0_0%26120_72_14%26Meizu_MX4_Meizu_meizumx4%261000047%262152%26V3&androidId=dba20155a97326c&macAdress=&imsi=460011431632413&wifiBssid="
59 |     result = BsUtil.praseQQ(url)
60 |     for app in result['appList']:
61 |         print(app['appName'], app['apkId'], '下载数：', app['appDownCount'], '评分：', float(app['score']) * 10,
62 |               app['packageName'])
63 |         getData(app['appName'], app['appDownCount'], float(app['score']) * 10, app['packageName'])
64 |     pass
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     getTop()
69 |     # getData('', 0, 0, packageName='com.qqgame.hlddz')
70 |     DbUtil.close(conn, cur)
71 | 


--------------------------------------------------------------------------------
/site/mybzz/crawler/WdjCrawler.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from site.mybzz.util import DbUtil
 4 | from site.mybzz.util import BsUtil
 5 | from site.mybzz.util import DateUtil
 6 | 
 7 | 
 8 | def getData(name, id, commentCount, totalDownload, packageName):
 9 | 
10 |     pass
11 | 
12 | 
13 | def getTop50():
14 |     result = BsUtil.praseJson(
15 |         "http://apps.wandoujia.com/api/v1/apps?type=weeklytopgame&max=50&start=0&opt_fields=likesCount,title,packageName,installedCountStr,id,commentsCount")
16 |     for app in result:
17 |         print('游戏名：%s,id：%s,评论数：%s,下载量：%s,包名：%s' % (
18 |             app['title'], app['id'], app['commentsCount'], app['installedCountStr'], app['packageName']))
19 |         # print(app)
20 |         if '万' in app['installedCountStr']:
21 |             totalDownload = int(float(app['installedCountStr'][:-2]) * 10000)
22 |         else:
23 |             totalDownload = int(float(app['installedCountStr'][:-1]))
24 | 
25 |         getData(app['title'], app['id'], app['commentsCount'], totalDownload, app['packageName'])
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     getTop50()
30 | 


--------------------------------------------------------------------------------
/site/mybzz/crawler/XiaoMiCrawler.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from site.mybzz.util import DbUtil
 4 | from site.mybzz.util import BsUtil
 5 | from site.mybzz.util import DateUtil
 6 | 
 7 | 
 8 | def getTop30():
 9 |     page = 0
10 |     result = BsUtil.praseJson(
11 |         'http://market.xiaomi.com/apm/toplist/15?clientId=2bb48bb54747e03a6ab667ab7b51050a&co=CN&la=zh&os=1461822601&page=%d&sdk=22&stamp=0' % page)
12 |     # print(result)
13 |     for game in result['listApp']:
14 |         print('游戏名：%s，id：%s，总评分：%s' % (game['displayName'], game['id'], game['ratingScore']))
15 |         try:
16 |             getData(game['id'], game['displayName'], game['ratingScore'])
17 |         except:
18 |             print(sys.exc_info()[0], ":", sys.exc_info()[1])
19 |             pass
20 | 
21 | 
22 | def getData(id, name, totalScore):
23 |     page = 0
24 |     hasMore = True
25 | 
26 |     # 插入游戏
27 |     conn, cur = DbUtil.getConn()
28 | 
29 |     result = BsUtil.praseJson('http://market.xiaomi.com/apm/comment/list/%s?'
30 |                               'clientId=2bb48bb54747e03a6ab667ab7b51050a&co=CN'
31 |                               '&la=zh&os=1461822601&page=%s&sdk=22' % (id, page))
32 |     totalComCount = result['pointCount']
33 | 
34 |     print('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
35 |           'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' % (
36 |               name, 'xiaomi', totalComCount, totalScore * 10, 0, DateUtil.currentDate()))
37 |     cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
38 |                 'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' % (
39 |                     name, 'xiaomi', totalComCount, totalScore * 10, 0, DateUtil.currentDate()))
40 |     game_id = cur.lastrowid
41 |     # game_id = 0
42 |     while (hasMore):
43 |         result = BsUtil.praseJson('http://market.xiaomi.com/apm/comment/list/%s?'
44 |                                   'clientId=2bb48bb54747e03a6ab667ab7b51050a&co=CN'
45 |                                   '&la=zh&os=1461822601&page=%s&sdk=22' % (id, page))
46 |         # print(result)
47 |         for comment in result['comments']:
48 |             content = comment['commentValue'].replace("\"", "'").replace(" ", "")
49 |             score = comment['pointValue']
50 |             time = comment['updateTime']
51 |             author = comment['nickname'].replace("\"", "'")
52 |             # 插入评论
53 |             try:
54 |                 print('INSERT INTO comment(game_id, content, comment_time, author, score) '
55 |                       'VALUES ("%s", "%s", "%s", "%s", %d);' % (
56 |                           game_id, content, DateUtil.longToStrTime(time / 1000), author, score))
57 |                 cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
58 |                             'VALUES ("%s", "%s", "%s", "%s", %d);' % (
59 |                                 game_id, content, DateUtil.longToStrTime(time / 1000), author, score))
60 |             except:
61 |                 print(sys.exc_info()[0], ":", sys.exc_info()[1])
62 |                 pass
63 |         page += 1
64 |         hasMore = result['hasMore']
65 | 
66 |     conn.commit()
67 |     DbUtil.close(conn, cur)
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     getTop30()
72 | 


--------------------------------------------------------------------------------
/site/mybzz/domain/MzComment.py:
--------------------------------------------------------------------------------
1 | class MzComment(object):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/site/mybzz/domain/__pycache__/MzComment.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/domain/__pycache__/MzComment.cpython-34.pyc


--------------------------------------------------------------------------------
/site/mybzz/keyword/CreateWordCloud.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from operator import itemgetter
 3 | import jieba
 4 | import jieba.analyse
 5 | from pytagcloud import make_tags, create_tag_image
 6 | from pytagcloud.colors import COLOR_SCHEMES
 7 | from site.mybzz.util import DbUtil
 8 | 
 9 | stop = []
10 | 
11 | def plot(game_name, game_id):
12 |     dict = {}
13 |     comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
14 |     for comment in comments:
15 | 
16 |         result = jieba.analyse.extract_tags(comment[2], topK=3)
17 | 
18 |         for word in result:
19 |             if len(word) < 2:
20 |                 continue
21 |             elif word in stop:
22 |                 continue
23 | 
24 |             if word not in dict:
25 |                 dict[word] = 1
26 |             else:
27 |                 dict[word] += 1
28 | 
29 |     print(dict)
30 | 
31 |     swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
32 |     swd = swd[1:50]
33 |     tags = make_tags(swd,
34 |                      minsize=30,
35 |                      maxsize=120,
36 |                      colors=random.choice(list(COLOR_SCHEMES.values())))
37 | 
38 |     create_tag_image(tags,
39 |                      'c:/%s.png' % game_name,
40 |                      background=(0, 0, 0, 255),
41 |                      size=(900, 600),
42 |                      fontname='SimHei')
43 | 
44 |     print('having save file to dick')
45 | 
46 | 
47 | if __name__ == '__main__':
48 | 
49 |     f = open("../StopWords.txt", encoding="utf-8")
50 |     jieba.load_userdict("c:/dict.txt")
51 | 
52 |     while True:
53 |         line = f.readline().replace("\n", '')
54 | 
55 |         if not line:
56 |             break
57 |         stop.append(line)
58 | 
59 |     games = DbUtil.getAllResult("select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50")
60 | 
61 |     l =[]
62 |     for game in games:
63 |         if game[1] not in l:
64 |             plot(game[1], game[0])
65 |             l.append(game[1])
66 | 


--------------------------------------------------------------------------------
/site/mybzz/keyword/InsertKeyWord.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from operator import itemgetter
 3 | import jieba
 4 | import jieba.analyse
 5 | from pytagcloud import make_tags, create_tag_image
 6 | from pytagcloud.colors import COLOR_SCHEMES
 7 | 
 8 | from site.mybzz.util import DbUtil
 9 | 
10 | stop = []
11 | conn, cur = DbUtil.getConn()
12 | 
13 | def plot(game_name, game_id):
14 |     dict = {}
15 |     comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
16 |     for comment in comments:
17 | 
18 |         result = jieba.analyse.extract_tags(comment[2], topK=3)
19 | 
20 |         for word in result:
21 |             if len(word) < 2:
22 |                 continue
23 |             elif word in stop:
24 |                 continue
25 | 
26 |             if word not in dict:
27 |                 dict[word] = 1
28 |             else:
29 |                 dict[word] += 1
30 | 
31 |     swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
32 |     swd = swd[1:50]
33 | 
34 |     tags = make_tags(swd,
35 |                      minsize=30,
36 |                      maxsize=100,
37 |                      colors=random.choice(list(COLOR_SCHEMES.values())))
38 | 
39 | 
40 |     create_tag_image(tags,
41 |                  'C:/Users/Administrator/Desktop/%s_%s.png' % (game_name, game_id),
42 |                  background=(0, 0, 0, 255),
43 |                  size=(900, 600),
44 |                  fontname='SimHei')
45 |     print('create file ---%s' % game_name)
46 | 
47 |     # dict = {}
48 |     #
49 |     # for (k, v) in swd:
50 |     #     dict[k] = v
51 |     # print('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s"' % (game_id, str(dict)))
52 |     # cur.execute('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s")' % (game_id, str(dict)))
53 |     # conn.commit()
54 | 
55 |     word = DbUtil.getOneResult('select keyword from keyword limit 1')
56 |     print(eval(word[0]))
57 | 
58 | 
59 | if __name__ == '__main__':
60 | 
61 |     f = open("../StopWords.txt", encoding="utf-8")
62 |     jieba.load_userdict("c:/dict.txt")
63 | 
64 |     while True:
65 |         line = f.readline().replace("\n", '')
66 | 
67 |         if not line:
68 |             break
69 |         stop.append(line)
70 | 
71 |     # games = DbUtil.getAllResult(
72 |     #     "select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc")
73 |     #
74 |     # for game in games:
75 |     #     plot(game[1], game[0])
76 |     plot('皇室战争', 189)


--------------------------------------------------------------------------------
/site/mybzz/neg_review.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/neg_review.pkl


--------------------------------------------------------------------------------
/site/mybzz/pos_review.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/pos_review.pkl


--------------------------------------------------------------------------------
/site/mybzz/sentiment/Feature.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.datasets import load_files
 5 | from sklearn.cross_validation import train_test_split
 6 | from sklearn.feature_extraction.text import CountVectorizer
 7 | from sklearn.naive_bayes import MultinomialNB
 8 | 
 9 | from site.mybzz.sentiment import feature_selection
10 | 
11 | 
12 | def text_classifly_twang(dataset_dir_name, fs_method, fs_num):
13 |     print('Loading dataset, 80% for training, 20% for testing...')
14 |     movie_reviews = load_files(dataset_dir_name)
15 |     doc_str_list_train, doc_str_list_test, doc_class_list_train, doc_class_list_test = train_test_split(
16 |         movie_reviews.data, movie_reviews.target, test_size=0.2, random_state=0)
17 | 
18 |     print('Feature selection...')
19 |     print('fs method:' + fs_method, 'fs num:' + str(fs_num))
20 | 
21 |     vectorizer = CountVectorizer(binary=True)
22 |     word_tokenizer = vectorizer.build_tokenizer()
23 |     doc_terms_list_train = [word_tokenizer(doc_str) for doc_str in doc_str_list_train]
24 |     term_set_fs = feature_selection.feature_selection(doc_terms_list_train, doc_class_list_train, fs_method)[:fs_num]
25 | 
26 |     print
27 |     'Building VSM model...'
28 |     term_dict = dict(zip(term_set_fs, range(len(term_set_fs))))
29 |     vectorizer.fixed_vocabulary = True
30 |     vectorizer.vocabulary_ = term_dict
31 |     doc_train_vec = vectorizer.fit_transform(doc_str_list_train)
32 |     doc_test_vec = vectorizer.transform(doc_str_list_test)
33 | 
34 |     clf = MultinomialNB().fit(doc_train_vec, doc_class_list_train)  # 调用MultinomialNB分类器
35 |     doc_test_predicted = clf.predict(doc_test_vec)
36 | 
37 |     acc = np.mean(doc_test_predicted == doc_class_list_test)
38 |     print
39 |     'Accuracy: ', acc
40 | 
41 |     return acc
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     dataset_dir_name = "E:\MyProject\python\comment\site\mybzz\sentiment"
46 |     fs_method_list = ['IG', 'MI', 'WLLR']
47 |     fs_num_list = range(25000, 35000, 1000)
48 |     acc_dict = {}
49 | 
50 |     for fs_method in fs_method_list:
51 |         acc_list = []
52 |         for fs_num in fs_num_list:
53 |             acc = text_classifly_twang(dataset_dir_name, fs_method, fs_num)
54 |             acc_list.append(acc)
55 |         acc_dict[fs_method] = acc_list
56 |         print
57 |         'fs method:', acc_dict[fs_method]
58 | 
59 |     for fs_method in fs_method_list:
60 |         plt.plot(fs_num_list, acc_dict[fs_method], '--^', label=fs_method)
61 |         plt.title('feature  selection')
62 |         plt.xlabel('fs num')
63 |         plt.ylabel('accuracy')
64 |         plt.ylim((0.82, 0.86))
65 | 
66 |     plt.legend(loc='upper left', numpoints=1)
67 |     plt.show()


--------------------------------------------------------------------------------
/site/mybzz/sentiment/Vec.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | 
 3 | from gensim.models import word2vec
 4 | import jieba
 5 | from gensim.models.word2vec import LineSentence
 6 | from site.mybzz.util import DbUtil
 7 | 
 8 | stop = []
 9 | 
10 | def train(fileName, modelName):
11 |     model = word2vec.Word2Vec(['a','b','c'],size=200,window=5,min_count=5,workers=multiprocessing.cpu_count())
12 |     model.save(modelName)
13 |     return model
14 | 
15 | 
16 | def cut():
17 |     comments = DbUtil.getAllResult("select * from comment limit 300000")
18 | 
19 | 
20 |     file = open("test1.txt", "w",encoding="utf-8")
21 |     for comment in comments:
22 |         list = []
23 |         result = jieba.cut(comment[2])
24 |         for word in result:
25 |             if word not in stop and word != ' ':
26 |                 list.append(word)
27 | 
28 |         if list:
29 |             file.write(" ".join(list))
30 |             file.write("\n")
31 |     file.close()
32 |     pass
33 | 
34 | def getStop():
35 |     
36 |     f = open("../StopWords.txt", encoding="utf-8")
37 |     jieba.load_userdict("c:/dict.txt")
38 | 
39 |     while True:
40 |         line = f.readline().replace("\n", '')
41 | 
42 |         if not line:
43 |             break
44 |         stop.append(line)
45 |     
46 | 
47 | if __name__ == '__main__':
48 | 
49 |     getStop()
50 | 
51 |     cut()
52 | 
53 |     # model = train("test.txt","model")
54 |     #
55 |     # for w in model.most_similar(u'魅族'):
56 |     #     print(w[0], w[1])
57 | 


--------------------------------------------------------------------------------
/site/mybzz/sentiment/feature_selection.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | 
  4 | 
  5 | def get_term_dict(doc_terms_list):
  6 |     term_set_dict = {}
  7 |     for doc_terms in doc_terms_list:
  8 |         for term in doc_terms:
  9 |             term_set_dict[term] = 1
 10 |     term_set_list = sorted(term_set_dict.keys())  # term set 排序后，按照索引做出字典
 11 |     term_set_dict = dict(zip(term_set_list, range(len(term_set_list))))
 12 |     return term_set_dict
 13 | 
 14 | 
 15 | def get_class_dict(doc_class_list):
 16 |     class_set = sorted(list(set(doc_class_list)))
 17 |     class_dict = dict(zip(class_set, range(len(class_set))))
 18 |     return class_dict
 19 | 
 20 | 
 21 | def stats_term_df(doc_terms_list, term_dict):
 22 |     term_df_dict = {}.fromkeys(term_dict.keys(), 0)
 23 |     for term in term_dict:
 24 |         for doc_terms in doc_terms_list:
 25 |             if term in doc_terms_list:
 26 |                 term_df_dict[term] += 1
 27 |     return term_df_dict
 28 | 
 29 | 
 30 | def stats_class_df(doc_class_list, class_dict):
 31 |     class_df_list = [0] * len(class_dict)
 32 |     for doc_class in doc_class_list:
 33 |         class_df_list[class_dict[doc_class]] += 1
 34 |     return class_df_list
 35 | 
 36 | 
 37 | def stats_term_class_df(doc_terms_list, doc_class_list, term_dict, class_dict):
 38 |     term_class_df_mat = np.zeros((len(term_dict), len(class_dict)), np.float32)
 39 |     for k in range(len(doc_class_list)):
 40 |         class_index = class_dict[doc_class_list[k]]
 41 |         doc_terms = doc_terms_list[k]
 42 |         for term in set(doc_terms):
 43 |             term_index = term_dict[term]
 44 |             term_class_df_mat[term_index][class_index] += 1
 45 |     return term_class_df_mat
 46 | 
 47 | 
 48 | def feature_selection_mi(class_df_list, term_set, term_class_df_mat):
 49 |     A = term_class_df_mat
 50 |     B = np.array([(sum(x) - x).tolist() for x in A])
 51 |     C = np.tile(class_df_list, (A.shape[0], 1)) - A
 52 |     N = sum(class_df_list)
 53 |     class_set_size = len(class_df_list)
 54 | 
 55 |     term_score_mat = np.log(((A + 1.0) * N) / ((A + C) * (A + B + class_set_size)))
 56 |     term_score_max_list = [max(x) for x in term_score_mat]
 57 |     term_score_array = np.array(term_score_max_list)
 58 |     sorted_term_score_index = term_score_array.argsort()[:: -1]
 59 |     term_set_fs = [term_set[index] for index in sorted_term_score_index]
 60 | 
 61 |     return term_set_fs
 62 | 
 63 | 
 64 | def feature_selection_ig(class_df_list, term_set, term_class_df_mat):
 65 |     A = term_class_df_mat
 66 |     B = np.array([(sum(x) - x).tolist() for x in A])
 67 |     C = np.tile(class_df_list, (A.shape[0], 1)) - A
 68 |     N = sum(class_df_list)
 69 |     D = N - A - B - C
 70 |     term_df_array = np.sum(A, axis=1)
 71 |     class_set_size = len(class_df_list)
 72 | 
 73 |     p_t = term_df_array / N
 74 |     p_not_t = 1 - p_t
 75 |     p_c_t_mat = (A + 1) / (A + B + class_set_size)
 76 |     p_c_not_t_mat = (C + 1) / (C + D + class_set_size)
 77 |     p_c_t = np.sum(p_c_t_mat * np.log(p_c_t_mat), axis=1)
 78 |     p_c_not_t = np.sum(p_c_not_t_mat * np.log(p_c_not_t_mat), axis=1)
 79 | 
 80 |     term_score_array = p_t * p_c_t + p_not_t * p_c_not_t
 81 |     sorted_term_score_index = term_score_array.argsort()[:: -1]
 82 |     term_set_fs = [term_set[index] for index in sorted_term_score_index]
 83 | 
 84 |     return term_set_fs
 85 | 
 86 | 
 87 | def feature_selection_wllr(class_df_list, term_set, term_class_df_mat):
 88 |     A = term_class_df_mat
 89 |     B = np.array([(sum(x) - x).tolist() for x in A])
 90 |     C_Total = np.tile(class_df_list, (A.shape[0], 1))
 91 |     N = sum(class_df_list)
 92 |     C_Total_Not = N - C_Total
 93 |     term_set_size = len(term_set)
 94 | 
 95 |     p_t_c = (A + 1E-6) / (C_Total + 1E-6 * term_set_size)
 96 |     p_t_not_c = (B + 1E-6) / (C_Total_Not + 1E-6 * term_set_size)
 97 |     term_score_mat = p_t_c * np.log(p_t_c / p_t_not_c)
 98 | 
 99 |     term_score_max_list = [max(x) for x in term_score_mat]
100 |     term_score_array = np.array(term_score_max_list)
101 |     sorted_term_score_index = term_score_array.argsort()[:: -1]
102 |     term_set_fs = [term_set[index] for index in sorted_term_score_index]
103 | 
104 |     print(term_set_fs[:10])
105 |     return term_set_fs
106 | 
107 | 
108 | def feature_selection(doc_terms_list, doc_class_list, fs_method):
109 |     class_dict = get_class_dict(doc_class_list)
110 |     term_dict = get_term_dict(doc_terms_list)
111 |     class_df_list = stats_class_df(doc_class_list, class_dict)
112 |     term_class_df_mat = stats_term_class_df(doc_terms_list, doc_class_list, term_dict, class_dict)
113 |     term_set = [term[0] for term in sorted(term_dict.items(), key=lambda x: x[1])]
114 |     term_set_fs = []
115 | 
116 |     if fs_method == 'MI':
117 |         term_set_fs = feature_selection_mi(class_df_list, term_set, term_class_df_mat)
118 |     elif fs_method == 'IG':
119 |         term_set_fs = feature_selection_ig(class_df_list, term_set, term_class_df_mat)
120 |     elif fs_method == 'WLLR':
121 |         term_set_fs = feature_selection_wllr(class_df_list, term_set, term_class_df_mat)
122 | 
123 |     return term_set_fs


--------------------------------------------------------------------------------
/site/mybzz/test/NltkUtil.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import itertools
 4 | from nltk import BigramAssocMeasures, BigramCollocationFinder
 5 | from nltk.probability import FreqDist, ConditionalFreqDist
 6 | 
 7 | 
 8 | def create_word_scores():
 9 |     posWords = pickle.load(open('pos_review.pkl', 'rb'))
10 |     negWords = pickle.load(open('neg_review.pkl', 'rb'))
11 | 
12 |     posWords = list(itertools.chain(*posWords))  # 把多维数组解链成一维数组
13 |     negWords = list(itertools.chain(*negWords))  # 同理
14 | 
15 |     word_fd = FreqDist()  # 可统计所有词的词频
16 |     cond_word_fd = ConditionalFreqDist()  # 可统计积极文本中的词频和消极文本中的词频
17 |     for word in posWords:
18 |         word_fd[word] += 1
19 |         cond_word_fd['pos'][word] += 1
20 |     for word in negWords:
21 |         word_fd[word] += 1
22 |         cond_word_fd['neg'][word] += 1
23 | 
24 |     pos_word_count = cond_word_fd['pos'].N()  # 积极词的数量
25 |     neg_word_count = cond_word_fd['neg'].N()  # 消极词的数量
26 |     total_word_count = pos_word_count + neg_word_count
27 | 
28 |     word_scores = {}
29 |     for word, freq in word_fd.items():
30 |         pos_score = BigramAssocMeasures.chi_sq(cond_word_fd['pos'][word], (freq, pos_word_count),
31 |                                                total_word_count)  # 计算积极词的卡方统计量，这里也可以计算互信息等其它统计量
32 |         neg_score = BigramAssocMeasures.chi_sq(cond_word_fd['neg'][word], (freq, neg_word_count),
33 |                                                total_word_count)  # 同理
34 |         word_scores[word] = pos_score + neg_score  # 一个词的信息量等于积极卡方统计量加上消极卡方统计量
35 | 
36 |     return word_scores  # 包括了每个词和这个词的信息量
37 | 
38 | 
39 | def create_word_bigram_scores():
40 |     posdata = pickle.load(open('pos_review.pkl', 'rb'))
41 |     negdata = pickle.load(open('neg_review.pkl', 'rb'))
42 | 
43 |     posWords = list(itertools.chain(*posdata))
44 |     negWords = list(itertools.chain(*negdata))
45 | 
46 |     bigram_finder_pos = BigramCollocationFinder.from_words(posWords)
47 |     bigram_finder_neg = BigramCollocationFinder.from_words(negWords)
48 |     posBigrams = bigram_finder_pos.nbest(BigramAssocMeasures.chi_sq, 5000)
49 |     negBigrams = bigram_finder_neg.nbest(BigramAssocMeasures.chi_sq, 5000)
50 | 
51 |     pos = posWords + posBigrams  # 词和双词搭配
52 |     neg = negWords + negBigrams
53 | 
54 |     word_fd = FreqDist()
55 |     cond_word_fd = ConditionalFreqDist()
56 |     for word in pos:
57 |         word_fd[word] += 1
58 |         cond_word_fd['pos'][word] += 1
59 |     for word in neg:
60 |         word_fd[word] += 1
61 |         cond_word_fd['neg'][word] += 1
62 | 
63 |     pos_word_count = cond_word_fd['pos'].N()
64 |     neg_word_count = cond_word_fd['neg'].N()
65 |     total_word_count = pos_word_count + neg_word_count
66 | 
67 |     word_scores = {}
68 |     for word, freq in word_fd.items():
69 |         pos_score = BigramAssocMeasures.chi_sq(cond_word_fd['pos'][word], (freq, pos_word_count), total_word_count)
70 |         neg_score = BigramAssocMeasures.chi_sq(cond_word_fd['neg'][word], (freq, neg_word_count), total_word_count)
71 |         word_scores[word] = pos_score + neg_score
72 | 
73 |     return word_scores
74 | 
75 | def find_best_words(word_scores, number):
76 | 
77 |     best_vals = sorted(word_scores.items(), key=lambda item: item[1], reverse=True)[:number] #把词按信息量倒序排序。number是特征的维度，是可以不断调整直至最优的
78 |     best_words = set([w for w, s in best_vals])
79 |     return best_words
80 | 
81 | 


--------------------------------------------------------------------------------
/site/mybzz/test/Ran.py:
--------------------------------------------------------------------------------
1 | import random
2 | 
3 | def getTime(date):
4 |     return '2016-12-%s %s:%s:%s'%(date,random.randint(0,23),random.randint(0,59),random.randint(0,59))
5 | 
6 | if __name__ == '__main__':
7 |     for i in range(1,3000):
8 |         print('2016-12-%s %s:%s:%s'%(i,random.randint(0,23),random.randint(0,59),random.randint(0,59)))


--------------------------------------------------------------------------------
/site/mybzz/test/Test.py:
--------------------------------------------------------------------------------
 1 | import jieba
 2 | 
 3 | 
 4 | if __name__ == '__main__':
 5 |     name = '穿越火线'
 6 |     f = open('c:/%s_输出.txt' % name, 'r')
 7 |     w = open('c:/结果/%s.txt' % name, 'w')
 8 |     s = ['的','了','么','呢','是','嘛','个','都','也','比','还','这','于','与','才','用','就','在','对','去','后','说','之']
 9 | 
10 |     while True:
11 |         line = f.readline()
12 | 
13 |         if not line:
14 |             break
15 |         flag = False
16 |         if len(list(jieba.cut(line.split(' ')[0]))) > 1:
17 |             for word in s:
18 |                 if line.split(' ')[0].startswith(word):
19 |                     print(line)
20 |                     flag = True
21 |                     break
22 |             if not flag :
23 |                 print('能分开...' + line)
24 |                 w.write(line)
25 |         else:
26 |             print('不能分...' + line)
27 |             pass
28 |     f.close()
29 |     w.close()


--------------------------------------------------------------------------------
/site/mybzz/test/TestA.py:
--------------------------------------------------------------------------------
1 | dic = {'的': True, '坑钱': True, '挺': True, '就是': True, '好': True, '，': True}
2 | 
3 | for f,v in dic.items():
4 |     print(f,v)
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestBs.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | html = """
 4 | <html><head><title>The Dormouse's story</title></head>
 5 | <body>
 6 | <p class="title" name="dromouse"><b>The Dormouse's story</b></p>
 7 | <p class="story">Once upon a time there were three little sisters; and their names were
 8 | <a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,
 9 | <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
10 | <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
11 | and they lived at the bottom of a well.</p>
12 | <p class="story">...</p>
13 | """
14 | 
15 | soup = BeautifulSoup(html)
16 | #格式化
17 | # print(soup.prettify())
18 | 
19 | # print(soup.a)
20 | # print(soup.a.name)
21 | # print(soup.a.attrs)
22 | #
23 | # print(soup.title.string)
24 | #
25 | # print(type(soup.title))
26 | 
27 | #打印子节点
28 | # print(soup.html.contents)
29 | #
30 | # print(soup.head.children)
31 | # for child in soup.html.children:
32 | #     print(child)
33 | 
34 | # for child in soup.body.descendants:
35 | #     print(child)
36 | #
37 | # for string in soup.stripped_strings:
38 | #     print(repr(string))
39 | 
40 | # print(soup.p.next_sibling.next_sibling)
41 | 
42 | 
43 | # for sibling in soup.p.next_siblings:
44 | #     print(sibling)
45 | print(soup.a.next_element.next_element.next_element.next_element)
46 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestDb.py:
--------------------------------------------------------------------------------
 1 | import pymysql
 2 | import time
 3 | import datetime
 4 | 
 5 | from snownlp import SnowNLP
 6 | 
 7 | from site.mybzz.util import DbUtil
 8 | from site.mybzz.util import DateUtil
 9 | 
10 | # conn = pymysql.connect(host="localhost", user="root", passwd="banban123", db="comment", port=3306, charset="utf8")
11 | #
12 | # cur = conn.cursor()
13 | #
14 | # cur.execute("INSERT INTO comment(game_name, content, comment_time, author, score)"
15 | #             " VALUES ('游戏名123', '内容123', '2016-05-19 15:56:07', 'ban', '44');")
16 | # conn.commit()
17 | # print("VALUES (%s, %s, %s, %s, %d);" % ('游戏名123', '内容123', '2016-05-19 15:56:07', 'ban', 44))
18 | # print(time.localtime(1463739856))
19 | # print(DateUtil.lomgToStrTime(1463739856))
20 | # statement = "select * from comment"
21 | #
22 | # data =DbUtil.getAllResult(statement)
23 | 
24 | # for d in data:
25 | #     print("游戏名：%s,内容：%s,时间：%s" % (d[1],d[2],d[3]))
26 | 
27 | conn, cur = DbUtil.getConn()
28 | 
29 | 
30 | # if __name__ == '__main__':
31 | #     comments = DbUtil.getAllResult("select * from comment where game_id = 275 limit 10000")
32 | #     file = open("c:/穿越火线_输入.txt", "w", encoding = "GBK")
33 | #     for comment in comments:
34 | #         try:
35 | #             print(comment[2])
36 | #             file.write(comment[2])
37 | #         except:
38 | #             pass
39 | 
40 | word = DbUtil.getOneResult('select keyword from keyword limit 1')
41 | print(eval(word[0]))
42 | dict = eval(word[0])
43 | 
44 | for key in dict:
45 |     print(key, SnowNLP(key).sentiments)
46 | # if list:
47 | #     print("11")
48 | # else:
49 | #     print("222")
50 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestJieBa.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from operator import itemgetter
 3 | 
 4 | import jieba
 5 | import jieba.analyse
 6 | from pytagcloud import make_tags, create_tag_image
 7 | from pytagcloud.colors import COLOR_SCHEMES
 8 | 
 9 | from site.mybzz.util import DbUtil
10 | 
11 | # conn, cur = DbUtil.getConn()
12 | 
13 | stop = []
14 | 
15 | def plot(game_name, game_id):
16 |     dict = {}
17 |     comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
18 |     for comment in comments:
19 | 
20 |         # print(comment[2])
21 |         result = jieba.analyse.extract_tags(comment[2], topK=3)
22 | 
23 |         for word in result:
24 |             if len(word) < 2:
25 |                 continue
26 |             elif word in stop:
27 |                 continue
28 | 
29 |             if word not in dict:
30 |                 dict[word] = 1
31 |             else:
32 |                 dict[word] += 1
33 |                 # print(",".join(jieba.analyse.extract_tags(comment[2], topK=3)))
34 |                 # print('-------------')
35 | 
36 |     print(dict)
37 | 
38 |     swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
39 |     swd = swd[1:50]
40 |     tags = make_tags(swd,
41 |                      minsize=30,
42 |                      maxsize=120,
43 |                      colors=random.choice(list(COLOR_SCHEMES.values())))
44 | 
45 |     create_tag_image(tags,
46 |                      'c:/%s.png' % game_name,
47 |                      background=(0, 0, 0, 255),
48 |                      size=(900, 600),
49 |                      fontname='SimHei')
50 | 
51 |     print('having save file to dick')
52 | 
53 | 
54 | if __name__ == '__main__':
55 | 
56 |     f = open("../StopWords.txt", encoding="utf-8")
57 |     jieba.load_userdict("c:/dict.txt")
58 | 
59 |     while True:
60 |         line = f.readline().replace("\n", '')
61 | 
62 |         if not line:
63 |             break
64 |         stop.append(line)
65 |     # result = jieba.cut('希望能看见我的话我的破血头啊啊啊')
66 |     #
67 |     # for seg in result:
68 |     #     print(seg)
69 |     #
70 |     # print(",".join(jieba.analyse.extract_tags('希望能看见我的话我的破血头啊啊啊', topK=3)))
71 | 
72 |     games = DbUtil.getAllResult("select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50")
73 | 
74 |     l =[]
75 |     for game in games:
76 |         if game[1] not in l:
77 |             plot(game[1], game[0])
78 |             l.append(game[1])
79 |             # print(game[1], game[0])
80 |     # print(l)
81 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestNltk.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | 
  3 | import nltk
  4 | from nltk.collocations import BigramCollocationFinder
  5 | from nltk.metrics import BigramAssocMeasures
  6 | from sklearn.metrics import accuracy_score
  7 | from sklearn.svm import SVC, LinearSVC, NuSVC
  8 | from sklearn.metrics import classification_report
  9 | from sklearn.metrics import precision_recall_fscore_support
 10 | from site.mybzz.test import NltkUtil
 11 | import matplotlib.pyplot as plt
 12 | from sklearn.externals import joblib
 13 | 
 14 | pos = pickle.load(open('pos_review.pkl', 'rb'))
 15 | neg = pickle.load(open('neg_review.pkl', 'rb'))
 16 | 
 17 | 
 18 | def bag_of_words(words):
 19 |     return dict([(word, True) for word in words])
 20 | 
 21 | 
 22 | def bigram(words, score_fn=BigramAssocMeasures.chi_sq, n=1000):
 23 |     bigram_finder = BigramCollocationFinder.from_words(words)  # 把文本变成双词搭配的形式
 24 |     bigrams= []
 25 |     try:
 26 |         bigrams = bigram_finder.nbest(score_fn, n)  # 使用了卡方统计的方法，选择排名前1000的双词
 27 |     except:
 28 |         pass
 29 |     return bag_of_words(bigrams)
 30 | 
 31 | 
 32 | def bigram_words(words, score_fn=BigramAssocMeasures.chi_sq, n=1000):
 33 |     bigram_finder = BigramCollocationFinder.from_words(words)
 34 |     bigrams = bigram_finder.nbest(score_fn, n)
 35 |     return bag_of_words(words + bigrams)  # 所有词和（信息量大的）双词搭配一起作为特征
 36 | 
 37 | 
 38 | def pos_features(feature_extraction_method):
 39 |     posFeatures = []
 40 |     for i in pos:
 41 |         posWords = [feature_extraction_method(i), 'pos']  # 为积极文本赋予"pos"
 42 |         posFeatures.append(posWords)
 43 |     return posFeatures
 44 | 
 45 | 
 46 | def neg_features(feature_extraction_method):
 47 |     negFeatures = []
 48 |     for j in neg:
 49 |         negWords = [feature_extraction_method(j), 'neg']  # 为消极文本赋予"neg"
 50 |         negFeatures.append(negWords)
 51 |     return negFeatures
 52 | 
 53 | 
 54 | def score(classifier):
 55 |     classifier = nltk.SklearnClassifier(classifier)  # 在nltk 中使用scikit-learn 的接口
 56 |     classifier.train(train)  # 训练分类器
 57 |     classifier = joblib.load('model.m')
 58 |     # joblib.dump(classifier,'model.m')
 59 |     pred = classifier.classify_many(dev)  # 对开发测试集的数据进行分类，给出预测的标签
 60 | 
 61 |     return precision_recall_fscore_support(tag_dev, pred)
 62 | 
 63 | def score2(classifier):
 64 |     classifier = nltk.SklearnClassifier(classifier)  # 在nltk 中使用scikit-learn 的接口
 65 |     classifier.train(train)  # 训练分类器
 66 | 
 67 |     pred = classifier.classify_many(dev)  # 对开发测试集的数据进行分类，给出预测的标签
 68 | 
 69 |     # print(classification_report(tag_dev, pred))
 70 |     # print('\n')
 71 |     return accuracy_score(tag_dev, pred)  # 对比分类预测结果和人工标注的正确结果，给出分类器准确度
 72 | 
 73 | 
 74 | def best_word_features(words):
 75 |     return dict([(word, True) for word in words if word in best_words])
 76 | 
 77 | 
 78 | if __name__ == '__main__':
 79 |     dimension = range(100, 700, 50)
 80 |     method_list = ['precision', 'recall', 'fscore']
 81 |     index = 150
 82 |     pos_dict = {}
 83 |     pos_pre_list = []
 84 |     pos_recall_list = []
 85 |     pos_f_list = []
 86 | 
 87 |     neg_dict = {}
 88 |     neg_pre_list = []
 89 |     neg_recall_list = []
 90 |     neg_f_list = []
 91 |     
 92 |     for d in dimension:
 93 |         word_scores = NltkUtil.create_word_bigram_scores()
 94 |         best_words = NltkUtil.find_best_words(word_scores, int(d))
 95 | 
 96 |         posFeatures = pos_features(best_word_features)
 97 |         negFeatures = neg_features(best_word_features)
 98 |         # posFeatures = pos_features(bag_of_words)  # 使用所有词作为特征
 99 |         # negFeatures = neg_features(bag_of_words)
100 | 
101 |         train = posFeatures[index:] + negFeatures[index:]
102 |         devtest = posFeatures[:index] + negFeatures[:index]
103 |         test = posFeatures[:5] + negFeatures[:5]
104 |         dev, tag_dev = zip(*devtest)
105 | 
106 |         # print('Feature number %s' % d)
107 |         print(score2(LinearSVC(C=0.1)))
108 |         precision,recall,fscore,support = score(LinearSVC(C=0.1))
109 |         pos_pre_list.append(round(precision[1],3))
110 |         pos_recall_list.append(round(recall[1], 3))
111 |         pos_f_list.append(round(fscore[1], 3))
112 | 
113 |         neg_pre_list.append(round(precision[0], 3))
114 |         neg_recall_list.append(round(recall[0], 3))
115 |         neg_f_list.append(round(fscore[0], 3))
116 |         # print('SVC accuracy is %5.2f %%' % (score(SVC()) * 100))
117 |         # print('LinearSVC accuracy is %5.2f %%' % (score(LinearSVC()) * 100))
118 |         # print('NuSVC accuracy is %5.2f %%' % (score(NuSVC(nu=0.01)) * 100))
119 |         # print("\n")
120 |     
121 |     pos_dict['precision'] = pos_pre_list
122 |     pos_dict['recall'] = pos_recall_list
123 |     pos_dict['fscore'] = pos_f_list
124 |     
125 |     neg_dict['precision'] = neg_pre_list
126 |     neg_dict['recall'] = neg_recall_list
127 |     neg_dict['fscore'] = neg_f_list
128 |     
129 |     # 画图
130 |     for method in method_list:
131 |         plt.plot(dimension, pos_dict[method], '--^', label=method)
132 |         plt.title('pos svm')
133 |         plt.xlabel('feature num')
134 |         plt.ylabel('score')
135 |         plt.ylim((0.8, 0.95))
136 | 
137 |     plt.legend(loc='upper right', numpoints=1)
138 |     plt.show()
139 | 
140 |     for method in method_list:
141 |         plt.plot(dimension, neg_dict[method], '--*', label=method)
142 |         plt.title('neg svm')
143 |         plt.xlabel('feature num')
144 |         plt.ylabel('score')
145 |         plt.ylim((0.8, 0.95))
146 | 
147 |     plt.legend(loc='upper right', numpoints=1)
148 |     plt.show()
149 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestPa.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from urllib import request
 3 | import html
 4 | 
 5 | import zlib
 6 | from bs4 import BeautifulSoup
 7 | import urllib
 8 | 
 9 | # req = request.urlopen('http://app.flyme.cn/apps/public/evaluate/list?app_id=2897832&start=0&max=10&mzos=3.0&screen_size=1080x1800')
10 | #
11 | # data = html.unescape(req.read().decode())
12 | # print(data)
13 | # value = json.loads(data)
14 | #
15 | # # value1= json.loads(value['value'])
16 | #
17 | # print(value['value']['list'][0]['comment'])
18 | 
19 | # req = request.Request('http://appc.baidu.com/appsrv?action=ranklist&native_api=1&pu=ctv%401%2Ccfrom%401000561u%2Ccua%40gu2ki4uq-igBNE6lI5me6NNy2I_UCvhlSdNqA%2Ccuid%400u-Yu0PYH8jVavuO_a-YagiSS8lvuvu9_a2L80ufvi6kuviJlavefYamv8_6uvtz_a2etxNNB%2Ccut%40rIviC_C0vC_7uLP7NJGCjxNIB%2Cosname%40baiduappsearch&name=game')
20 | #
21 | #
22 | # result = request.urlopen(req)
23 | 
24 | # print(zlib.decompress(result.read(),16+zlib.MAX_WBITS).decode())
25 | #
26 | req2 = request.urlopen(
27 |     'http://appc.baidu.com/appsrv?action=ranklist&native_api=1&pu=ctv%401%2Ccfrom%401000561u%2Ccua%40gu2ki4uq-igBNE6lI5me6NNy2I_UCvhlSdNqA%2Ccuid%400u-Yu0PYH8jVavuO_a-YagiSS8lvuvu9_a2L80ufvi6kuviJlavefYamv8_6uvtz_a2etxNNB%2Ccut%40rIviC_C0vC_7uLP7NJGCjxNIB%2Cosname%40baiduappsearch&name=game')
28 | print(zlib.decompress(req2.read(),16+zlib.MAX_WBITS).decode())
29 | # soup = BeautifulSoup(req2.read().decode('UTF-8'), "html.parser")
30 | # b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\xd5]ko\x1bG\x96\xfd+\x81\x80\xf1\x17\xc5T\xbd\x1f\x06\x82\x95\x93`\xe6\xc3L\xec 3\x99]`\xb5 \xea)\xd1\x91H\x85\x8f\xc4\xf2 \xfb\xdb\xf7\xdej6\xd5\xec&\xd9lQ\xf2x\x9d\x88\xa2\x9a\xdd]\xd5Uu\xef9\xf7Q\x97\xff:K\xf3\xf9l>\x9e\xce\xce\xde\x90\xaf\xcf\xee\xd2b\xe1\xae\xd3\xd9\x9b\xb3\xb3\xaf\xcf\xe6i\xb1\xba]\x9e\xbd\xf9\xd7YtKw\xf6\xe6\xbf\xab7\xcb\x87{8\x81~}6Y\xa6\xbb\xea\x93\x7f\x9d-\xa6\xee\x0e/\xbbZ)\xe3\xd4\xd5J&\xed\xafV\xc6\xea\x0cG\x127p;w\xebb\x9cL\xc7\xf9\xd6]\xc3\x99\x14\x0e\xc5Y\x98Dxo\x99\xb5F\x128r=\x9f\xad\xee\xcb1\xae\x99\xe2\xdc\xc2\xb1{\x17~\x81N\x95\xa3T0\xa5\xa9\x80\xa3U7\xce\xae\xb1\xdd\xaf\xcf~K\xf3\xc5d6]\xf7\x82\x8e\xb4\x18\x91\xc7\xc3a\x16\xcba\xa2\x05\x1e\rnY\xddM\x10\xb6\xfes\xd3}c#\x83W\xa6\xe1U1\x95\xe05\x97\xf6\x16\x93Ox\x06\x17|d\xe9\x0f\xdf\x96\xde\xff>\xbd\x9d\xb98^\xcdo\xe1\x93\x9b\xe5\xf2\xfe\xcd\xd5\xc5\xd5\xc55~0\xf2n\x12W\xa30\xbb\xbb\xba\xc0A\xba\xba\xf8}\xb2H\xd8\xdb\xab\x0b\x96\x05\xcf\x92\x10!d\xd0\xca\xf1\xab\x8b\xbb4\xbd\xbeY\xb9\xe9\xc7\xc9\xc3l5.\xfd\x1c\xb9\xfb_\xa0\x91\xd5=\\\x9d\x96\x93\xd2=F\xa8zM\xc4k\xa6\xe0\x93I\x98M\x9b\xcd\x868%\xa4j\xf6\xf5\xe4\xeez\x14\xa6W\x17p\xdd\xf5\x7f\xfc\xb6p\xf7\xf7\xaf\xb0\xff\xdfx*\xc9\x18~^\xc1q\x1c\xc0o\xf8\xab_W\xeev\xb2|\xf8\xc6\x90Wi\xfej\x91\xc27\xe4U\x9c|\xa3\x9cr\xdaH\xa1\x9d\x912\x10\xc6\xa5\xf0\x94+\xae\xadR"h\xf3j\x9e\xf27\xd8\xf6\x9f\xf8\xdb?\xb1?\xc3\xffnt3\xb9\xbf\x99-g\x8b\x91\x8f\xa5\x03\xb3\xbbW\x8by\xe8?\x0b\x0e\xd7\x83\x03o\xef\'\x01^qq\xc1/\xea3\xc9\x9a+\x97\x034\xcda\xfa\x05\xcbN\xf1\x14\x83\xf0ZH\xc1\x9d\xa5T\xca\xd1\x87\xfb\xeb\xe6\x94L\xa6\xd34\x7f\xb1IYL\xae\xa7wQ\x96\x15$\x8c\x82\xe5\xcb\xcc\xe3B\x85\xc3\xd0\xc2h\x9a\x96\xc9-\xd2\xe8\xee\xa1j\x16\xaf\x0b\xb39~\xacm\xfd\xc78\xccV\xd3e\xb569\xa7ELn\xc7\xf5c\x94\xe3\xd4\\\xadD"\x1aV\xa2g\xb4\xbcG\xd1\xca:b\x93\xb7n\x99g\xf3\xbb\xf1z\xa5\xe3"\x19q\xfc`v\xbf\x82Q\x88\xe9#\xca\x17\xae\xfa;7\x85\xa9\x1e/nf\xf3\xe5\xd8\xcf\')\xc3\'?\xfc\xf0\xfe\xa7\x1f\xff\x82K]\xf8JRAvc\x90\xf0\x1a2\xc1\xc6\xf0\xb8\xc82\\\xadr&\xd8\xc1\xe5,\xba\x87\xf1b9\xff\xbe\xd1\xcbM\x17e\x92\xad.>\xa4\xc52\xcd\xf1\x9a\xcd\xecT\x12N\xa5\x90(\x7f\xb0:\xc7\xd7\x93\xbc\x1c/\'\xcb\xdb\xb5\xee\x89\x93\x05<\xdaC{$\x84\xb6\xd8\x85\xc5\xea\xfe\x1e\x9f"\xc0Z\x82O\xc8\xa6W\xed\x16\x84(\x13S\xcd\x15\xb7Ve!b{\xa6\t\x8f\xdek\x87\xa3\x16\xd3\xd2Mn\xc7\x1e&\x125\xd14V\xbdA\xd9\xf1go\xb8"\x8ar-%4\xf7a\xad{\xc6k\xdd5^k\xa6\xf1\x01\r\x98\xd7\xd7\xcc\xdd\xf4\x97K\n\xff-f\xabyH\xe7\xdf\xff\xfc\xfe\xaf?_\xfa\x99\x9b\xc7I<\xa7\x9c)uy?[\x9c\xd3\xcb|;\xfb\xfd\xfc\xdb\xf7o\x7f\xfa\xfer\tW-\xce/\xef@\x95\x9d\x7f\xf7\xe3\xdf/=\x9eJ@\xe2I\xf9w\t\x13\x02\x8a\xe2\xfa\xe1\xbc\xf4\xca\xc57\xef\x7f|C\xdf\\\xba\xf9\xf5o\xe7a9\'\x97\xb3\xf9d\xbb\t8P\xb5\x92>\xde\x9f\xe7\x15\xac;l\xeer\xe1\xee\xeeo\xd3\xb9\x8b\xbf]\xae\x17\xf4\xf9\xae\xe5|\xb9Hn\x0e\x13\x10\xcf%\xb5\x9a\x14\xf1PD\x10\x98!)/a\xc6\xef&SXn\xa8e\xce\xc3\xed$\xc1*\x87\x11\x98\xcc\x17\xcb\n\x00\xb2\xbb]$\xd0b\xb0\x1e\x97\x9b\xbf\xeae\tC\xc6\x1d,E\xc3%i.\'x\xf5\x11\x16\x98\x96\x0c\x17\xaaH\xea\x7f\xd7k\xfd\xd6\xcdA\x8b\xd5k}\xb5H\xd3\xd5]K|D\xf2\x0e.\xd2x\xa9\x06\xbd\x81\xaa\x7fv\x07R\xbe\\\xd4\'\xa3\x0cb\x1bA\xe0\xabw\xb8\xb0\x9c_\x14\xec[\xe3\xde\x1f_\xd7o\xd9\xe3[^Mn\x85$\xbf\x8d\x11W\xfe\xf8\x1f@P\x98\xe8\xc9\xf4\xba\xdc\x9d\xfe\x81\xa7\x1f\x01\xa02\x13\x82\xaf\x81\xc3(Dc\xeaW\x91\nB\x1e\x82Q\xc1\xacT\xba\x05\xa3\x9a\x13\xd9\x81Q\x9c\xa6#`\x94\xf3\x0e\x88\x96C\r\x04\xa5m\x04\x15\x99\xc2\x18Z\x99\x01A\xb5\n0\xe4Ji\xd7@P=\x92\xe24\x00\x85\xa1\xd06$\x16\xb9M4({u\xf1\x8b\x9b|\x9c\x80\xa6v3\xfc\xb9Mc\xce\xf7\x03\xa8|\xcd\xf8\x8b\x03\xa8T\x8c\xbala2-h\x18\xc01\x13\xa5\x81\x99$\x12tMN\xbe\x0b\xa0\xf9(\x00\xddu\xd6\x01\x00\rA\xd2\xccc\xf2\xc2d\x13$L\x0e\x03\x18\x17\x89\x07\x00\xad\x98e\xa29\xe9\x9c\x9e\x01@\x87L\xca#\x80rn\x94d\x164G\x07@o`\xd4\x1f\xd2m*\x12:z;\x8d\xf3\xd9$\xbe\x9dN\xee\xdcm\x03K\x8d\xd8\x81\xa5\x84\x96%\xdf\xc6\xd2\x11/Z \x1f\x0f\xa5\xac\x03\xa5z\x1f\x94\xc2M\x83\x83\x9b\xea,\xcb\xad%4cH\x8e\x95\xb2\x01\x89f\x1c\xe5Z&T?\xca\x88\xd2\xbc\xad\xa5[[\x99*\xa0\xdd\x03\xb1\x8c?\x05c\x99\xa4\\\xd0\x01 \xcb)e\x9a\xcb!(\xcb\xb8\x92\x85\xa8W3\x1a=\t\x81\'\xd6^\x0fJjn\x189\x02f-\xdc\xd0Z\xbb\x85\xb2\x95j\x1b\xaf\x15\xd7\xb8WM\xb6\xb0\x96\xd5X\xfb\xee\xed?~\xfe\xe9\xed\xdfv\xa0-\x1b\x8eh\x97\x8b\xfb\x14\xceI\x032\x0fa\xdcr\xbez&\x88\xd3[\x10\xf7\xb8\xaa\x8f@8\x94\x8cgA8\xdaA8v4\xc2Y\x89\xad\xc7H\xf1\x01\x19,i\tL\r\'\x93f\xf1\x1a\x06FX$\xa0\x82\xa0\xec0\x11\xf0\n<\x0b4j\x9f!\xc9\x95\x01}\xdbB@a\xb5&m\x04\x84\xd5$\xb4\xeeE@@\x04\xaavX\x92\xa4\x1c=\x05\x07\xb5\x19)}\x1a\x0e\xa6\x1c\x81\xfa\t\x9d\x9c\xcf\xd6\xc3\xda\xb9\xf0iz\xeff\xde}\x84\xbe^\xc7\x89XL`\xd1^\xdf\xbb\xc9\xb8t\xf9\x10&R\xf2\xe2\x98\xe8\x82#"Pb\x92e\xc9\tc\xb9L"+\x83o\xb4\x88\xb9\x8b\x89\xe9(L\xdcu\xd6\x01L\x8c2G\tb\x1f\xe1\xc9\x89\x05\x8bR\xc7\xacS\xb2\x91\x03\x01\x94\xd9%\x1d"\x17"\x9d\x8e\x89O\x9d\xa0G|T\x84B\xaf\x94a\x1dx\xc4\x16n\xd3\xc3h\xbe\x9aN\xe1\x16\x80I\xbcch\x1a\xda\x01GI\x04\x17]ld\xc4\x90\xa1vf\x17\x1c\xd9~p\x94T\xa1\x8e\x0e(\x14(\xe0\xff\xd5\x94\xea\xca\xb8D\x99Q\xa8\xd4\x01.\xf0S\xec\x8b\xb4\xa8\x12E2\xaa\xc6\xd7\x0cZb\x9f\x19\xca\x9ed\x87r\xc6\xcd\x00\x8c\xc4\xa1\xe2\xa2h\x94\xa3\rQ\xa6\xa5iX\xa2\xb0\xec5\xd5\xbc\xbd<\xbc\x84\xd9aez\x0eC\xa4a\xc2\x12\xae\xf4\x16DV\xbao\xbc\xd6l\xe3\x93\xf5l\x0bBy\x9f\xb9\xca\xfb\xcc\xd5\xfd\xb6*\x7fC\xe9\x11\xa6*?\xceT\xdd+\x19_\xac\xcd\xca\x9a\x80\xde\x14\xc5^D/\xe2\xfcB\x80\xce\x8f\x05t v\xf8\xb0\x84H||\xc5\xcb\xfb>c\x95\x1b\xab\xadiA5PF\xdb1V%cD\xd1^\xa8\xa6lDG\xba\x8b\xd5\x0c\'\xfaD\xb7/\xd3d$\xe5\x89V+\xa3\x04\x1a\xb0L\x86L\t\x88\xe6\xc5\xefnz\xfd\xe9&\xcd\x01\n\x1e\xdcl\\\xf5\xf4 F\xbf\xbc\xddji\x04\x8a\xe88\x98\x86V\x92\xe4\x89\x8bD8\x93\xb9\x80\xbfh\x96]\x8c\xbe9\n\xa3w\x9du\x00\xa3%\xc9\xce0\xe3\x9d\xa2\xc9\x99\x08\x96\x03\x17\xc6\'j\tq\x9c\x080/\xa0/`\xd0>\x83\xdd:hZ\x1e\x91\x99Y",5D\xd3\x0e4\xc3\xc2\n \xa2\xa3\xe5\xdd\xf5\xfdh\xb1^\xae\x1b\xe7\xaf\xec`2\x93\x9c\x99.&\x03.\xb1\xd3}\xbfr?&s\x18\xcc}\xd2[>E\x9f\xb0b\xc5\xb2\xc0#\xca\xf3\xb4Ar\xe9Ie\xc8\xca\x7f\xe2\x85,#\xdbU\xa8\x86\xe0\x84P\x1c\xa0p+\t\xc8Vc|\xd3\xc9\x0cx\x0f\xe7\x18\xaa\x13\xaa-\x07MX\x1a\x10\x87`N\x7fx\xff\xed\xdb\x96K\xba\xe0\xd2\xa3\x1b\xba4\xd9\xec\xb7\x0c\t\x1dw\x81\xeb\xd2\xef\xea\x04\x15\xb1O*\xe1C\xae\xef\x904\x1e\x01RQ\x9d\x86G\x90(H\x1e\x1c\xff\'\xaf\x0e\xd2\x7f\xa2\x15\xef\x05\xda\xec\x96"o7x[\xe5p\x06\xc0\xec\xdc\x0c\x87\x95,\xeda"\\<\x85\x89HB\xac\x1c\xc0Dp\x81\x10&\xf8\x00&\xc2\x85j2\x91\x18\xb81Q\xaa\xb6\x10pa\x99\xd5\xa9\xdfXg\xb0b,\xe8m\xb1ME\x8an\x1f\xaf5\xf7\xf8\x00B\xb4H\x86\xe8\xb7\xd3\xc5\xff\x1f;]6a\xbd)\xcd\xbd\xb0^4\xc2\x0by\xa2\xc5\xd1\xb0.Q.5\x0b\xac\xf6X).\xe1\x91m\xd0\xa1\x07\xdca\xda\xb5b\xdb\xe0\xae\x90\xb2v\xc1\x1d,1\xd6\x07\xeel\xa4w\xc6s\xe1\x1f\xe7\xa4\x15\xd3\x1dl\x893>"\xea4l\'.X\xe8I\n\xc2S\xaf<\xbd\xba\xc8iz\xfd\xcb\n\x90$&\xbfz\xc0\xf8\xe1\xba\xb3\x87\x02\xbb\x85\xa7\xbc,\xbe\x07g\xc0\x08\'\xdcf\xe7xd\xd6S\xc1\x95s>\x8a\xac\xbd\xd6\xac\x8b\xef\xe1(|\xdfu\xd6\x01|\xcf\x06\x98k\x0e\x0ct\x0f3\x92\xcb\xa0\t\xe8\xda@\xadJ\x19\xe6H9\xf8\xad\xb8\xa2\xa7\xe3\xfb\xf0\xa9y\xc4xM\x19\x1aj\xb4k}\xfb8\xfan\xee>=\xfc\x19\x94F\x03\xdc\x95\xeaz\xa3\x15\xdf\x81\xedR\x8a\x93\xa1\xdd\x1c\xf2E\x83\xd8\xe2\xedTA\xab\xacv\x01y\x85}k\x94\x8cZ7\xe5\x1b?\xe5\x0e\xd5t\x08x-\xdeg\xed\xb5nHS\x85\xce\x8a!&V\x88\xde<\xa2%\x87G\xb2\xa0\x08\xe0}\xf0\t}\xe3&m\xb0\x9f\x19\x86\x08+*w<\xbe\xcf\xc9\x17\x08f\xdb\xbd\x02\xb6\x17\x8b\x9d*\xd0O^X\x02\xc7W\xb8V\xd7\xbd\xd5\x01\xc3\xad\xd0\xff\xf2\\h\xd1JR\xb1\x05Z\x865b\xaf4)\xbe\xcf\x88\xca;r\xc4wXa\xa5-]k\xe3\xf5\xdd\xb2\x13u\xbb\xeb\xe7*w^\xfb\xf0C\x89\x12\x92\x1c\xf7\xa1\xff\x93\xdc\x10\x80\xb8\x03\xb0\x1f\x16\x10\x05}:\xc4\t\x01\xc3%7\xc8\xaf\x00\x1aD\xb2\xb6-\x1e0\x90\x8c\n\x92\xf7#?t\xa6\xac\xedM\x07\xcb\x9a\xc0\xf1%\x894\x98\x81\xa0ZJC\xb7\x83\xe5\x05\x17\xc6k\xad?\xee\xc5\x98\x16=\x905=\xf8\xe1\xdb\x9f\xdf~\xff\xe3\x0ev \xbbN\x88(X\xf2R\x00\x97\xf3`_2m3\xf1\xc0\xb3\xec\xd7T\xc0 hC\xa4.A\x05C*wE\xb8\x8f\xc5]\x01\x06\xcf~W\x84<\xce\x15\xd1T\x13_\xaa\xf7\xc1l\x85\x13\x1a\x8a\xa9?\x9c\xa0\xf8\xf3\x90\x94\xae\xefA\x1e\x1f.G\xa7\xa0d\x18t\xb3\x04-w\xa0\xfa\xb6KO\xc8\x16=\xd1\x9a\x90\x16=\x01=/\x85n\xd3\x13M\x81\xcb\x1e\xe1{\x18\xf1]\x04\xc5\x9c\xe8v\xd0t$\xf9i\xcc$\xf0\x94\xa8\x924\xc8\x18@\xae\xe5\xd5\xc5\xc3\xea\xc3\xc4M\xc1\xc0]\xc2\xaf15\x073\xcd\xe8\x8b\x13\x12\xc5\xbc\xd0Ld\x13m0Z*\x0f\xbc\x84[\x89\x88l\x8cN\xea3\x11\x92\xc8%\xcd\x86Z\xa3(\x18(\xdaI\xe3\x95\x02\xf4\x033\x08l\x97(<\xa7!IFN\'$G\xcfH\xc3\xd7\xa0\x0c\x8c\xa5fLv\x88\xc8\xef\x0f7\xa3\x87\x0f\x9f\x96]\xdf\xbf\xeeP\x11aw0\x11\xcd\x87\x12\x11\xda&"\xea\x80\x8f\xc1\xd0\x8cn\x80\x80F\xb8u\x05\x86\x13\x91\xfc\xfb\x9a\x1e\x98\xc8\xcc\xdb\x92\x83&R\xb4\x08%\x8e\xfc\xf8\xd7m\xf3\x1fV\xa3\xa8_+\x9f\xc5n\xa9\xaf}\x16\xda\x194\xe1M\xb1\xc23#5U\xd0)dtO\xe8\x12\x89\xf0\xa8\xbb\x8c@:\xa4\x15\xda\xa5\xc6\xc5:\x10!\x0be\xd2)\xc5\xed\xab*2\xb0v\x1a\xc0\xfcUw\xc0\xfe\xd6\x9f4\x15gH\xb5\xeb\xa3"S\x08>\xb5\xb8\xe3\xf9_\xa1\xc7D\xa1\xc7\xc4\x1b[;V$\xc7S\xd7MXZEBD\xfdj\xf7\xb3\x8e\'%\xe1i"\xcd\xf1\xa4Cse\xcc\x10w\x834\xc5:\xabV\xb1IJ\x86\x1ci[\x00\x0c\xf7D{\x1ez\x9d\r\x1a\xb4\x02`\xf5v\xd8\xa3\xd2\xe5\xe3\xb5\xa6\x1e\x1f@\x84\x16\x99P\xbddBu\xc9\x04\xacfb\x15\x89\x84&\x92\x840J\xd8\xe08\x13}d\x82\xdb\x03dB\x1dG&\xb6E\xfdK\xa5\x13j\xcb\xeb\xf1\xa8]z\xd9\x04h\xa7\x17\xf2x\xa8\xa3=\x1e\xca\xa0%\xe0\x91\xfd\xd7\x82kv\x90\x89\xed@\x06\x17\x9c\xb5\xb3\xee\xb8\xe1\xb4\x91\xd2\xb4&\x13\x9cj\xc1z\xc9\x04/\x91\xd5m*a\xe4\x89T\xc2\x8a\x915\xa7Q\tJ5K\xf0\xac\x99\xc7l\xd1\xdf{\x81\xe9\xd1\xd7\x8b\x9b\xc9\xa7\x9b\x02_\xd3\xeb\xb1\x91\x87\xe2\x17\xe4\xe5\xe3\x17Z\x86@\x892\xcc3\xa7\xbdV1B\xb7\x9d\xf1\x86\x00z+\xb1#~\xf1"yw \x89Y\x9b\xc7\xe4u\x16\x00\xca\x08\xe6\xe1Q\x9b|\x95\xbcN\x19{\x86\x1c\x83A\xb3rT\xe6:(\xf7\x04z\xf1\xf6v\x14n\xdd\xe2f>{p\xb7\xa9\x9b\xc4\xce\xbaq\x0c-w\xa4\xb0Khg\x18\xc5\x10#\xb2\xc3\xdb\xb17\xb9\xe0\xbb\xf7\xdf\xa1\x8e\x8f\x05\xe7}i\xcb\xa1\x18\x8b@+\x9c\xc5\xe5W\xcc\x7fR\xc7-$\xb3G\xe5\xb5K\xf2$S\x9eS\xab\x86\xa4\xdd\xe1\x10\x81Ij\x86X\xf3\x12]Vz\x03\xae\xc0YcH^\xb5\x97C`\xde\xc3\xf2\xdbg\xcfo\xd6Y\xdc\xb1\xbe\x9b\xcb\x0cV\xf7\xd5\x05.\xee\xab\x0b\xe5\xb3\x93\x94G\xa6\xac\x966%\xb0\xe9Et\xd4{\x99\xbd\xe0T\xa9\xc8X\xcc|\xb3\xb6\xbb\xfe\x02\x15\x91\xfaI\xe6L\xf3\xf3\xc94\xcf\xaa\x8f\xcb\x1c55\x9a\xccH\x8a$\xc3,7\xb8\xac$\x89 g[;g\x8a\xaa\x96\x01\xb9\x9c\xd6\x0e\x8f\x90\xca\xf1URHx4\x8f\x1c\xc2\x82\xe5\xaf\x04S\xdb\xf1\x8a\xa2\xc2\xc7k\x05=>\x00\x04-\x0e\xa1k\x0e\xf1\xf3\xbb\xbf\xbe{\xff\x9f;8\x84\xeer\x88w?\xff\xedo\x15;(\xefJ&\xff^n\xa0\x8f\xe3\x06\x07d\xf6K%\nf+\xeb\xa1\xa9%\xfa\xc3#Z\xbeT\x9e\xbe>\x8e+\xfc\xe4~K\xd37\xe5\xe9\x8b\xda\xa9|\xa5D\xf6\xc5E\xb4\xa0%\x06\xb2\xe5xP\xd64\x82\xc85W`\xb4\x84\xfb{\xf2\x13wp\x85V\x86\xfep\xaf\x830#r\xa2\xdb\xc1K"#\x18Q.\x9b$\t\x83\xcf\xca\x88\xdd\xaeR\\\xcd\x00\x94\xc6\x07\xf2\xf3?\x8b\xdb\x81\x87\x14l\x0e\x8c:\xc2\x83\x14\xd6\x13\xe1X\x16)\x08\x95\x8c\x8c\xf63\xb9\x1d\\\xa4\xdeF\xaar\xcc\xcee\xb0\x89\xa8\x86%"\x8d\xf1\xd0\'0\x023\xa5Y;\xf1\x0cy\x0eG\xcf\xc8\xa0\xddms\xbcG\x87\x1bTH\xb6\x9dwHw\xf8\x1e\xc4@b\xc0\x80\x1bw\x88\x81:\x90u\x88\xde\x82\xf2\x98%\x12\xe0\xba\xf2\xba\x9d\xe6P\xe5\xee+E\xf9v$\xa3\xcaM\xc0\xd4\x98b\x99\xc3\xa7\xa8Ck\xf8\xb1:\xa1\xf5\x0es\xf8.-\xef\xdc\xdc\xdf\x96,\x89\xecj\x0eR9\xb6\xab\x14\x04\xa9\xb1\xe9*("9\x06Q@Q\xea\n\x0ckF\xd2M\x94\x90\x8c+\xbc\n\x8f<6\xf7\xf6\xfe\xfe\xab\xbf/a\x94Q\xcf\n\xf4\x9a\xcbBq\xac\xc5^I\xdczf\xb2/\x11\x11[\xc53\\\xa1Af\xb3\xb1/\x89\r\x88\xae\xc3\xf2\xdc\xeb\xbf<\xa7\x83Ap=`\x93\x1f\xd8\x80Uz\xc2\xb14HP\xda\xc8f \xd6\x81\x86\x95\xed\xa5\x1e\xb9\x88^\x07\xd6\xef`0\x02F\x81r\xdb\xf20\xa0\xd2\x1e\xafU\xf2\xf8\x80\xeao\xb1\x03\xd3\x972izw\xf8\x8d`-r\xbd#e\xf2\x1d\'oHs\x7f\x1f\xbc\x94\x0cK \x9fj/\x9b0\xc36\xfb5\xa4\xfbKe\x11j\x8bE\x88\xe39\x84\xa4/\xe5n0\xc7\xba\x1b@\xc1\x93\xe2\x07E]\xc0\x02j\x01\x93|-\x9c\xcd\x94\xaa\xca\x8bi|\x16}\x04\x832[\x12\x88\xb7\x12/\x98\xd0\x9d\xc4\x0b\x8e\t\xe7\xfd\xce\x08\xb6#\xae\xc1[\xe9\x94\x833.$\x19\xd9\xd3\xf8\x856\xd2\x01\x8dq4\'\xcf\t\x0c \xee6\xfb0q\x0f\xb3\xb5\xe9;y\x80\xe9(Y\xf6c~ \xa7R\xbc&\xe6\xc5\xb9\x86\xd7$i\t2\xa3\xc0H\x02(\x11AD@\x00\xee\xbcc\x86\xa5\x1d\\c\x975\xd6\xe5\x1a\xbb\xce:\xb4\x170\xa9\x14c\x10\x92Fgh\xf0\x9aY\x03\x8b\xc5J\xf9\xb8\x17\x02\x94\x1f?\x9dk<iv\x1ey\x07\xd5`\x0c\x83v\xe1\xddM\x81\x1ff\x0fw#7\xbf\x9b\xcd1A\xb3\xea\xc3\xc6\x11q\xb6s\xc7\x03S\xb4\xdag\xde\xdeY_e\x04\x9e\xb4\xe3\xe1P\n\xc6>\xe9^G\x18\x04\xa9\xf0\x9eW\x16*4\x9c\x9c\x7fN\xe4e\x80\x8dC\xb6\xfe\xc1x\x10\xc1\x07\xf9 \xb0\x1c\xc4\x06}\x03c\x00\xab\xe8\x04\xdf\x9e|\xe6I\xf6:\xf7\xe7\x12J@:E\xda\xdb\xeb\x8bB\x1b\xaf\xd5\xd5\xf8\xc9J\xb3\x05\xcd\xb6\x0f\x9a\xed\xd3w3\xd8\xe3v3\xd8\xe3\xb0x\xe7\x8a\xffR\xd1x;\x97\xa0)b\xfd6=\x8a\xe9\x0be\x13\xd8c\x11\xd9D\x83\x0fX\xfc\x87G\';r%D\xa9\xdd\xb1\xb5\xe9\xd0\x08\xda5\xea\x95"\xc5\x98\x1an\xd4\x97$5\xcaNOv\x14\xa3\x13\xf71\x10eB\xf443\xb0\x9b\xd1L\x05\xb4\xbcq\xb3\x0f\x93\xc7\\:\xec\xe6\xc10\x80}q\xc8\x8d`\x1eQL1T\xc0\x9e#"\x9ccI\x06\xe9r\xca\xa0\xa1\xe8g\n\x03\x18\x15c\xd2\x94ZR\xa55\x82e\xc7-\x8f\xca\xba\xb5\xc9\x9f$U\xda?C\x18`\xd0\xac\x1c\x99\xe1\xf8wt*\xb63\x1cI\x07`y\x17\\\x87\xdb\xf5md-.\x84=\xc8\xfa\xe5$1J\x9er\xdd\x87\n\xcb\xad3\xa1N\x144Fb\x7ft\x15\xd0\xc7\xbc\x85f\xa2\xa3\xa2\x14\rt\x1b\xec&\xe9QeQ\xa7)\xae\xaf*N\x0b4\xca\xf1*\xabq\xedM\x93Zl\x8c\xf6N2c\xb5q\x01\x9e\x1bmS\x8f\xcf]{\x1f\xa4\xaa\xfd\x11\xbcJ~z6\xa6\xa1\xa4\x1e\x90D\x00\xd4\x8e\x0f\xa9/\xa0\x84}\xac.@\x12\x8f\x99\xdb\xdc^\xeeQ\xd8\x9c\x81t\xf4\x92\x0c&\x95\x95\x9cl\x93\x8cJ\x83\x8f\xd7\xfay|\x00\x07\xda5|Ho\x12\x01\x9c\xd2MIL^\xb0\xc4\x85\xa29r\x0e\x8f#rb\xb1?\x8b@\xb0\x03Y\x04\xd0\xd0\xb19\x89\x1b\xc1\xfeRyD\x91\xfe\r\x8f8\xde\xa8\xe7/D (9\xbe\xb8\x01r\x9e\xb5\\\x1e[\xff\x8esit;,\xc0\x8d6\x9d\x14\x02\xc9\x88\xb2\xfd\x85{\xe8\x88\xcb.\x85\xa0\xb2H\xd2I\xf5\xef$\x1f\xd9S\t\x04\xb5\xd9\xb1\x0c?N8\x0fJ\x0b\xce\xb8Y\xb9u\x9d5\xec\xe4\xbf9\x87\x808\xcd\xa1\x7f\x92\xc2O\x922\x87h\xb1\xb8\x88\xf0\x02t\xbc$\x9f+\x87 \x80\x86\x90\x163\x19\xbc\x8e\xc4\xb1@\x8d\'\xde\xe6\x10\x90L\x10+<\x07"\xa1\xd53\x90\x87cgdPl \xde|\xdcQ\xfbnG\xbd\x1e\xb8\xcd\xae\xe0\x00\xee\xf9?u\x8b\xc4\x81\xcc\xc4\xa6\xb3\xbf\xd6\xf7\x8d\xfc@\x12\n\xfe\x95m\x80*\xa6\xdabGO|]5o\x9d\x8b\xb8S\xe2\xd9&\x19\x11\xc6o\x93v@M\xd9\xbd\x80\xef+\x00\xaf(\x85\xa1\x08\xfcM\xaa\xd1\xfcTf\x0c\x8dVU\x84T\x88\x8f\x1e\xfb\xb2\xab\xa1\xaa1d\x95\xc9u9\x1c\x910l-yxt6\x94*\nZ\x13\xff\xd5\xe6\x0f\xb8\xdd\xe6\x0f\xa0\x1c\xbeuE\xe0\xb1\xd6\xb4\x87\x1es\x13?\xd9\x84,\x9e\x93XH5\xa40\x03,\x18\xc6\xcc\x90\xba\x0c\xb2\xf2w\xac\xfd\x17\xc0\x8d\x80\x92\x89\xb608\xe9\x1c\xcfY\xf5R\x0b\x0eS]v>\xb6r\x0bP\xb7\x8f\xd7\x9a{|\x00!\xda\xdc\xa2\xbf@`o\x85@2\x02\x18Q\x8c\x0e\x89\x1f\x80\xb1\xb4\x9fc\x0c\xac\x16\xf8\xa8\x02\xbeT\xa6a\xb6\xd2\x15\x1b:\xa7\x7f\xf7\x03\xaa\xad\x17\n!\xd0\xa3\xcb\x05V\xb6\x90\xa9,\x1fV,\x90\xe4}\xc9\xb8V\x87Y\x87Q\x18\xeejU\xdd\x15\x16\x8c\xc1\x86J\xaf+u\xd2\xaa&\xc8!\xd6\xc1GX+\t\xa8\x87\xe90\x0f}j\xc0\x80\xf1\x11;\xb1X\x12\xe1*$\x1aA]Z\xe7\r\xcc\xea\xc5\xc3\xe4\xd7Itw\x0e\x93\xef\xaf\x17\x93\x80ys\xeb\xbf\xc6\xfa@\xcc\x80\xbe.\x8c\xe8\x85\x1d\x18\xc6\xd3`\xa2\xcc>p\x91\x18s\xc9\xc7@\xc0l\xe0Za\x11\x84\xcf\x153\xf0\x91\xe7\x08Z\x91\xc2\xd2\xd0\xc4\t\xefA\xb0\xac\xb4<\xb8\x08\x1dt\xca(\x19\x9f#?\xe1\x89\xf3\xf3\xc8H,\xc3\xdd\xf3\x82t\tI1\x9aW\xa3\x87_\xe3\xdd\x07x\xea&\x1b\xe9&*X\xab\xba\\\x84\xb3\xd3=\x1a\x85\xcc\xef\xdb\xae\xe9U)\x96\x80\x85\t\x80m\xa0\xfc\xc6\x82\x0f\xd5\xfe\x84*\xbf\xad\xda|\x98l#\xef\r\xe5]\x97\xdc8[\n\x0bVE\r$c@\x1b\xac\t\xa9\xee\xa8U\xe9\xb1\xe8\x82/D\x82\xa0\xc2\x84\xf1\xc2m\x06\x0c\x93\x19\xb6\xfc\x17\x8d><\xea\x93\xda\x97`\xa2\xc3\xde\n\xcdk\xcf\x05S\xa5\n\xc3\x9e\xab\xd6\xfb>\nE\xc0\xf2M\xfb\xcf4\x1c\xf7T\xd8Ds}f\xb5\x99S{\xb7)\x0fU\xa5j\x98@h\xc3\x0b\x83^\x12\x8b\xbb>\xa4\xc4\xe4\x07\xcdp\x8b&\xb6\xb2\x87\x8e\x00,\xb3\x81\x89\x0c\xa4P\x8b\xa3K3X\xc0\xb7!\xdb3\xb9-;+\xaa\xb5\x9c4P\x1f\x93S[(4\x15\x91\x1bo\xfa\xdd\x1cB\x10`\xe8[Td\xad\xf0\xc7ku>\xee\x05\x8f6!a\xfd\xce\x0e\xd6e$\xd2\xb3(<\x8d\xd1k\x9eD\x84U\x90)\xc92\xf79;\xa8:\xe4\xec`\xc7\x11\x91\x96\xe8\x7f\xb1,D\x9cm\x95\x8a8\xde\xe3\x01\xea\xea\xa5|\x1e\x03\n:2[\xbf\x0e-YL\x04\x97m\xcf\x87bZwb\'\\\x83\xfd\xd2\x97\xaf\xc0Fv\xc4v\x10\x10Q3\x8e\'W-f#ub\x8d\x08\xe5I@0\x95.j!\rGv\xe0\xa6\xf8\xd3(\x91+\xe8\xc1\xfaO/_\x1f\x02\xc4\xd1@k\xc9\x81"UT\xd2l\t#`\xe6;O\xc0V\xf4\xe23\xe5Er\xc1\xc0\x1a\x8b\xd9\xd9\x04$\x03\x13\xea\xa3c\x92\xf8L\x08\xb7\xd4dGS\x00\x04\x88\xa7\xf3\x8ea\xd3\xf2H7\xa4&\x86\x9bR|\xa7U\xb7x\x92W\xd3\xd1ryw\xf7\xb1E6\xba\xc5!L\xe5\x91ko\xc9T\x03k1\xf2!\x99\t\x94\x90:\x17\xb0\x19\xd0X\x87\th\xa9\xdaTB\t\x15Q\xa8@\xb7\x02\xd4\n\x8c--\xf2\x01\x8a|\x0f\xb8\x1aj\x86F\x10\x04\x1b\x90%\x88\xce/Y\x92C\x8e6\xf4%\x7f\x04W\x19auy@\x9f\xd6\xcc\xe7\x00\xe2\x9f\x8b\x8e\xed1\xf4\x05\x13\x92\x89\xed=\x04\x95&\x1b\xaf\xf5\xd4\xb8W+\xb6\xc1\xb5\xb7\xbe"=\xa1\xc0"=\xb2\xc2"=\xb2\xc4\xe2\xf6\x1a\xffbQu+\x1b\xa1!U\xbd\xa8\x8ar\xf9R\xb0zdY\xc5\xb2E\xba\xd4\xc5\x00y+t\x1e\x9d\x90&\x03\xfd\xc5\xda\x9d\xda\x94\xed\xad\x85L3\x11\x8b7\xd0\xf7\x80m\xa9\xec\xdd.\xb9H\x95\xa6\r%Vo.2\\\xc8\xfe0\x83\x19\x95\xc2\xed\xed8\x83)G\x1bx[\x17Kn$ZH_=\rn\xd7\xc6\'\xc3gi\x18\xfc\xec\xe4\xca\x07\x0e0B0\x998\xd8\x11^:qu\xc1\xbf/e\x7f\x1eRN`M&\xf6a\xb2X\xfd\n\x8a\xfe\x03\x8cW\xe9\xf4\xbfyO\x82&R1\xe8vV\x96\x19\xd0P\xc6\x81\xa5\xedM\xa2`\xca1\x97\xc2g\xc2^j\x8c\xb5I\x02I\xf7\x9cd&\x98\xa1\xa0(\x89\x08\xd9\xcaH\x05w@\xe7\tg\xeet\xec}\xfa\x14\r\xa9\x90\xbcts\xd6\x8dC\xf0n\x1c\x82\xc8\x92\xd9\xda\xb6\xfd\xb9\x18\\"\xa1\x0b\xc7{\x03\x11\xff\xb8I_\xbd\xbds\x9f@?|\xf5\x93CI\x97\xdeV\x05\x16mg+\xc0\xbe\x8d\x8a\xda\x14\x94\x19\xb2GQp5 h\x8f\x83@\r\xb5\x030\x17\xfb\xb4\xc1\\\'H\x16\x86\xeb\xf6\x8cS\xaf\xa9\x89\xa6?5\x9fq\xc5(%\xaa\x95\x99\x8f\nm\xbcVW\xe3\'\xaa\xcc6\x12\x8b^$\x16\' \xb18\x12\x89\xc5\xb0b\xc7\x8f\x8b\xfc\x8bE\xe3\xedr\x88\r\xa9:\xe2{\x0b$\xd7\xcf\x82\xc7\xaa\x8b\xc7G\xd7CT\xc5\xad\x86eJ\xba8\xbbU^\x08\xbfu\xa8]\xdaX\x81\xd9"U\x1bg5eF\x1e\x13\xce\xa7\xbbR\x02O./$F\xf6\xc4\xaf\xe2I\x8c\xc3\xbc\xcb`\xe1\x07LG\xb0\x9enW\x1f\xfb\xea\n\xf1\xd7\xec38\xd0\xb9\x08\xdaK/\xa8\xc41&Rg\xadS\xa4\x9e[\xefd\xd8Q\xe8\xf0E\n\x19\x87\xec2K1:\x1e\x82\x0e\xd1J\x86\xdf\xc8D\x02O\x04\xbf\x8a\'{K\x12\x18\xb4\xf6\x19\xbel\xa0w*\x1a\x05\x85@~5\x98\xaf\xaa\xeb,\xfft\xf3\xfb\xaf\xa3\xdb\x8f\xc7|\x97\x80\xb0;\xbeI@\xd3\xa1`\xd9\xad\'\xd4\xb3\xa3\xaf)\x8a\x07\xea\x14\xaf\xf7\xdb\x19\xdf\xac-\x84\xa8P\xe0UK\xb21z9\x1a\xbdJ\x962\x89U\xa9bi\xf5\xdb\xce7\xe3UA\xfcj\x8f^\xe5\xf7\xde\xca\x18(WV\x1e\xf8*L_{\xe3\xd7\xf9\x7fhQ\x10Q\xb2\x12i\xa9a\x84\x89\x01\xd6\x94\xaf6(\xd9\x06\xaa\xd4]n\xfa\xf6\xd1\xf9r\xd5\xa9\xb8\xdc~\x9e\xe2\xe7o\xd69\xaa\x8a!n\xe75b\xafX,}\x8e\xb1\xde\x12\xa8<\xfa\xe1\xab\xbaDJ\x8a}\xc1{\xa9\x0c\x19H/\xcc\x90\xbd\x07\xc0\x01\xd4\xb0o\xf6\xb3\x8d"\xc6\xf8M"\x9e$\xde\x16\x00i\xa3Z\x7f!Z\xdf\xb6?\xb0\xfe\x19\xe1[\xdc\xa2R\xe2\xe3\xb5\x8a\x1e\xb7\xd4\x7f\x9b3\xf4W&\xa4;J\x13r\x8e\xe5\xaf\x84u\xd4\xd9\xa4\x88b\xc6Q\x9d\x99\xea\xad&D\x0e\xb9\xc6\x8f\xacM\xb8%\xe8_*i\xd8\xde\xde\xf7\xa8[\x8e\xa8&\xf4<_\x8b\xb0#6/\xff\xc0C7n\xf1.}\\\xfeX4\'\xd6\x8c\xfe\xe3\x8f\xff\x03|N\xcd\x12Nw\x00\x00'
31 | 
32 | # print(b'\x1f\x8b'.decode('gbk'))
33 | # for div in soup.find_all('div', class_="detail_top"):
34 | #     print(div)
35 | # print(soup.find('div', class_="detail_top").child)
36 | # for child in soup.find('div', class_="detail_top").children:
37 | #     if (child.name == 'h3'):
38 | #         print(child.string)
39 |         # game_name = soup.find('div', class_="detail_top").
40 |         # attrs['data-num']
41 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestPkl.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import tkinter
  3 | import tkinter.font as tkFont
  4 | import sys
  5 | from random import shuffle
  6 | 
  7 | from site.mybzz.util import DbUtil
  8 | import jieba
  9 | 
 10 | stop = []
 11 | 
 12 | 
 13 | def getStop():
 14 |     f = open("../StopWords.txt", encoding="utf-8")
 15 |     jieba.load_userdict("c:/dict.txt")
 16 | 
 17 |     while True:
 18 |         line = f.readline().replace("\n", '')
 19 | 
 20 |         if not line:
 21 |             break
 22 |         stop.append(line)
 23 | 
 24 | 
 25 | def add(f):
 26 |     global count
 27 |     if f:
 28 |         pos.append(lists[count])
 29 |     else:
 30 |         neg.append(lists[count])
 31 |     count += 1
 32 | 
 33 |     if count >= len(comments):
 34 |         frame.quit()
 35 |         return
 36 | 
 37 |     text.delete(0.0, tkinter.END)
 38 | 
 39 |     if comments[count]:
 40 |         while comments[count][5] < 30:
 41 |             neg.append(lists[count])
 42 |             count += 1
 43 |         text.insert(0.0, comments[count][2])
 44 |         text.insert(tkinter.END, '\n\n')
 45 |         text.insert(tkinter.END, '%s星' % (comments[count][5] / 10))
 46 |     text.update()
 47 | 
 48 | 
 49 | def read():
 50 |     pos = pickle.load(open("pos_review.pkl", 'rb'))
 51 |     neg = pickle.load(open("neg_review.pkl", 'rb'))
 52 |     print(pos)
 53 |     print(neg)
 54 | 
 55 | 
 56 | if __name__ == '__main__':
 57 |     getStop()
 58 |     pos = pickle.load(open("pos_review.pkl", 'rb'))
 59 |     neg = pickle.load(open("neg_review.pkl", 'rb'))
 60 |     print(len(pos))
 61 |     print(len(neg))
 62 | 
 63 |     while [] in pos:
 64 |         pos.pop(pos.index([]))
 65 |     while [] in neg:
 66 |         neg.pop(neg.index([]))
 67 | 
 68 |     comments = list(DbUtil.getAllResult("select * from comment limit 10000 offset 600000"))
 69 | 
 70 |     shuffle(comments)
 71 |     comments = comments[:100]
 72 |     lists = []
 73 |     for comment in comments:
 74 |         list = []
 75 |         result = jieba.cut(comment[2])
 76 |         for word in result:
 77 |             if word not in stop and word != ' ':
 78 |                 list.append(word)
 79 | 
 80 |         lists.append(list)
 81 |     count = 0
 82 | 
 83 |     frame = tkinter.Tk()
 84 |     ft = tkFont.Font(family='黑体', size=20, weight=tkFont.BOLD)
 85 | 
 86 |     text = tkinter.Text(frame, font=ft, height=10, width=30)
 87 | 
 88 |     g = tkinter.Button(frame, text="好", width=12, command=lambda: add(True))
 89 |     b = tkinter.Button(frame, text="坏", width=12, command=lambda: add(False))
 90 |     text.pack()
 91 |     g.pack()
 92 |     b.pack()
 93 | 
 94 |     text.insert(0.0, comments[0][2])
 95 |     text.insert(tkinter.END, '\n\n')
 96 |     text.insert(tkinter.END, '%s星' % (comments[0][5] / 10))
 97 | 
 98 |     tkinter.mainloop()
 99 | 
100 |     # pickle.dump(pos, file=open('pos_review.pkl', 'wb'))
101 |     # pickle.dump(neg, file=open('neg_review.pkl', 'wb'))
102 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestPred.py:
--------------------------------------------------------------------------------
 1 | from random import shuffle
 2 | 
 3 | from sklearn.externals import joblib
 4 | 
 5 | from site.mybzz.test import NltkUtil
 6 | from site.mybzz.util import DbUtil
 7 | import jieba
 8 | from site.mybzz.test import Ran
 9 | 
10 | stop = []
11 | 
12 | 
13 | def getStop():
14 |     f = open("../StopWords.txt", encoding="utf-8")
15 |     jieba.load_userdict("c:/dict.txt")
16 | 
17 |     while True:
18 |         line = f.readline().replace("\n", '')
19 | 
20 |         if not line:
21 |             break
22 |         stop.append(line)
23 | 
24 | 
25 | def toDict(list):
26 |     return dict([(word, True) for word in list if word in best_words])
27 | 
28 | 
29 | def features(feature_extraction_method):
30 |     Features = []
31 |     for i in lists:
32 |         words = feature_extraction_method(i)  # 为积极文本赋予"pos"
33 |         Features.append(words)
34 |     return Features
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     getStop()
39 | 
40 |     comments = list(DbUtil.getAllResult("select * from comment"))
41 | 
42 |     shuffle(comments)
43 |     conn, cur = DbUtil.getConn()
44 |     for i in range(0,1065000,1000):
45 |         print(i)
46 |         for comment in comments[i:i+1000]:
47 |             print('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0]))
48 |             cur.execute('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0]))
49 |         conn.commit()
50 |     DbUtil.close(conn,cur)
51 |     # lists = []
52 |     # for comment in comments:
53 |     #     list = []
54 |     #     result = jieba.cut(comment[2])
55 |     #     for word in result:
56 |     #         if word not in stop and word != ' ':
57 |     #             list.append(word)
58 |     #
59 |     #     lists.append(list)
60 |     #
61 |     # word_scores = NltkUtil.create_word_bigram_scores()
62 |     # best_words = NltkUtil.find_best_words(word_scores, int(500))
63 |     #
64 |     # dataset = features(toDict)
65 |     #
66 |     # clf = joblib.load('model.m')
67 |     #
68 |     # tags = clf.classify_many(dataset)
69 |     # count = 0
70 |     # conn, cur = DbUtil.getConn()
71 |     # for tag in tags:
72 |     #     if (tag == 'pos'):
73 |     #         print('UPDATE comment set type = %d where id = %d;' % (1, comments[count][0]))
74 |     #         cur.execute('UPDATE comment set type = %d where id = %d;' % (1, comments[count][0]))
75 |     #     else:
76 |     #         print('UPDATE comment set type = %d where id = %d;' % (2, comments[count][0]))
77 |     #         cur.execute('UPDATE comment set type = %d where id = %d;' % (2, comments[count][0]))
78 |     #     count += 1
79 |     #
80 |     # conn.commit()
81 |     # DbUtil.close(conn,cur)


--------------------------------------------------------------------------------
/site/mybzz/test/TestQQ.py:
--------------------------------------------------------------------------------
 1 | from urllib import request
 2 | 
 3 | import zlib
 4 | 
 5 | req = request.Request("http://pngweb.3g.qq.com/getSubRankList?sortType=22&categoryId=0&pageSize=20&"
 6 |                       "startIndex=0&needCateList=1")
 7 | 
 8 | req.add_header("Referer","http://qzs.qq.com/open/yyb/yyb_toplist/html/downtoplist.html?_ck_bid=3")
 9 | req.add_header("Origin","http://qzs.qq.com")
10 | req.add_header("Accept","text/xml, text/html, application/xhtml+xml, image/png, text/plain, */*;q=0.8")
11 | req.add_header("User-Agent","Mozilla/5.0 (Linux; Android 5.1; MX4 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.117 Mobile Safari/537.36/apiLevel/22/qqdownloader/3/ft_apiLevel/1_0_0_0")
12 | req.add_header("Accept-Language","zh-CN")
13 | req.add_header("Accept-Charset","utf-8, iso-8859-1, utf-16, *;q=0.7")
14 | req.add_header("Accept-Encoding","gzip")
15 | req.add_header("Connection","keep-alive")
16 | # req.add_header("Host","pngweb.3g.qq.com")
17 | # req.add_header("Cookie","accesstoken=; caller=13; guid=891204461307686912; imei=862095025228963; isforeground=1; logintype=NONE; openappid=0; openid=; qaccesstoken=null; qopenid=null; sid=; skey=; skey_datetime=; uin=; via=UNKNOWN_VIA; vkey=")
18 | req.add_header("Q-UA2","QV=2&PL=ADR&PR=TBS&PB=GE&VE=B1&VN=1.5.1.1065&CO=X5&COVN=025489&RF=PRI&PP=com.tencent.android.qqdownloader&PPVC=6522130&RL=1152*1920&MO= MX4 &DE=PHONE&OS=5.1&API=22&CHID=0&LCID=9422")
19 | req.add_header("Q-GUID","cee926d32c56b35b7aa4310013b788cb")
20 | req.add_header("Q-Auth","31045b957cf33acf31e40be2f3e71c5217597676a9729f1b")
21 | 
22 | result = request.urlopen(req)
23 | # print(result.read())
24 | print(zlib.decompress(result.read(), 16 + zlib.MAX_WBITS).decode())
25 | 


--------------------------------------------------------------------------------
/site/mybzz/test/TestReadFile.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | 
 4 | root = 'C:\结果'
 5 | fileNameList = []
 6 | if __name__ == '__main__':
 7 |     for filename in os.walk(root):
 8 |         fileNameList = filename[2]
 9 | 
10 |     dict = {}
11 | 
12 |     for file in fileNameList:
13 |         f = open(root+'\\'+file)
14 |         while True:
15 |             line = f.readline()
16 |             if not line:
17 |                 break
18 |             key = line.split(' ')[0]
19 |             count = line.split(' ')[1]
20 |             dict[key] = count
21 | 
22 | 
23 |     for (k,v) in dict.items():
24 |         print(k,v)
25 |         f = open(root + '\\' + 'dict.txt', 'a')
26 |         # f.write(dict)
27 |         f.write((k+' '+v))
28 |     f.close()


--------------------------------------------------------------------------------
/site/mybzz/test/TestSk.py:
--------------------------------------------------------------------------------
 1 | from sklearn import svm
 2 | from sklearn.externals import joblib
 3 | from sklearn import datasets
 4 | 
 5 | 
 6 | iris = datasets.load_iris()
 7 | digit = datasets.load_digits()
 8 | 
 9 | # print(digit.data)
10 | # print(digit.target)
11 | 
12 | clf = svm.SVC(C=100,gamma=0.001)
13 | 
14 | clf.fit(digit.data[:-1],digit.target[:-1])
15 | 
16 | clf.predict(digit.data[-1])
17 | 
18 | joblib.dump(clf,"model.pkl")
19 | 
20 | print(clf.score(digit.data[:-1],digit.target[:-1]))
21 | #
22 | # result = clf.predict([2, 2])  # predict the target of testing samples
23 | #
24 | # print(clf.score(X,y))
25 | #
26 | # print (result)  # target
27 | #
28 | # print (clf.support_vectors_)  # support vectors
29 | #
30 | # print (clf.support_)  # indeices of support vectors
31 | #
32 | # print (clf.n_support_)  # number of support vectors for each class


--------------------------------------------------------------------------------
/site/mybzz/test/TestWordCloud.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from operator import itemgetter
 3 | 
 4 | from pytagcloud import make_tags, create_tag_image
 5 | from pytagcloud.colors import COLOR_SCHEMES
 6 | from snownlp import SnowNLP
 7 | 
 8 | path= 'c:/结果/dict.txt'
 9 | if __name__ == '__main__':
10 | 
11 |     dict = {}
12 | 
13 |     f = open(path)
14 |     while True:
15 |         line = f.readline()
16 | 
17 |         if not line:
18 |             break
19 |         dict[line.split(' ')[0]] = int(line.split(' ')[1])
20 | 
21 | 
22 |     swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
23 |     swd = swd[1:50]
24 | 
25 |     for word in swd:
26 |         print(word[0],SnowNLP(word[0]).sentiments)
27 |         # print(word[0])
28 |     # print(swd)
29 | 
30 |     # dict ={}
31 |     #
32 |     # for (k,v) in swd:
33 |     #     # print(k,v)
34 |     #     dict[k] = v
35 |     # print(dict)
36 |     # tags = make_tags(swd,
37 |     #                  minsize=30,
38 |     #                  maxsize=130,
39 |     #                  colors=random.choice(list(COLOR_SCHEMES.values())))
40 |     #
41 |     # create_tag_image(tags,
42 |     #                  'tag_cloud.png',
43 |     #                  background=(0, 0, 0, 255),
44 |     #                  size=(1280, 900),
45 |     #                  fontname='SimHei')
46 |     #
47 |     # print('having save file to dick')


--------------------------------------------------------------------------------
/site/mybzz/test/model.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/test/model.m


--------------------------------------------------------------------------------
/site/mybzz/test/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/test/model.pkl


--------------------------------------------------------------------------------
/site/mybzz/test/neg_review.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/test/neg_review.pkl


--------------------------------------------------------------------------------
/site/mybzz/test/pos_review.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WhiteDevilBan/CommentCrawler/2bc36084da3693d7f6b75295fbbcad216c42b2b8/site/mybzz/test/pos_review.pkl


--------------------------------------------------------------------------------
/site/mybzz/util/BsUtil.py:
--------------------------------------------------------------------------------
 1 | import zlib
 2 | from bs4 import BeautifulSoup
 3 | from urllib import request
 4 | import json
 5 | 
 6 | 
 7 | def praseHtml(url):
 8 |     req = request.urlopen(url)
 9 |     return BeautifulSoup(req.read().decode('UTF-8'), "html.parser")
10 | 
11 | 
12 | def praseJson(url, timeout= 10):
13 |     req = request.urlopen(url, timeout=timeout)
14 | 
15 |     data = req.read().decode()
16 |     return json.loads(data)
17 | 
18 | 
19 | def praseGzipJson(url):
20 |     req = request.urlopen(url)
21 | 
22 |     result = zlib.decompress(req.read(), 16 + zlib.MAX_WBITS).decode()
23 |     return json.loads(result)
24 | 
25 | 
26 | def praseQQ(url):
27 |     req = request.Request(url)
28 |     req.add_header("Referer", "http://qzs.qq.com/open/yyb/yyb_toplist/html/downtoplist.html?_ck_bid=3")
29 |     req.add_header("Origin", "http://qzs.qq.com")
30 |     req.add_header("Accept", "text/xml, text/html, application/xhtml+xml, image/png, text/plain, */*;q=0.8")
31 |     req.add_header("User-Agent",
32 |                    "Mozilla/5.0 (Linux; Android 5.1; MX4 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.117 Mobile Safari/537.36/apiLevel/22/qqdownloader/3/ft_apiLevel/1_0_0_0")
33 |     req.add_header("Accept-Language", "zh-CN")
34 |     req.add_header("Accept-Charset", "utf-8, iso-8859-1, utf-16, *;q=0.7")
35 |     req.add_header("Accept-Encoding", "gzip")
36 |     req.add_header("Connection", "keep-alive")
37 |     req.add_header("Host", "pngweb.3g.qq.com")
38 |     req.add_header("Q-UA2",
39 |                    "QV=2&PL=ADR&PR=TBS&PB=GE&VE=B1&VN=1.5.1.1065&CO=X5&COVN=025489&RF=PRI&PP=com.tencent.android.qqdownloader&PPVC=6522130&RL=1152*1920&MO= MX4 &DE=PHONE&OS=5.1&API=22&CHID=0&LCID=9422")
40 |     req.add_header("Q-GUID", "cee926d32c56b35b7aa4310013b788cb")
41 |     req.add_header("Q-Auth", "31045b957cf33acf31e40be2f3e71c5217597676a9729f1b")
42 | 
43 |     result = request.urlopen(req)
44 |     return json.loads(zlib.decompress(result.read(), 16 + zlib.MAX_WBITS).decode())


--------------------------------------------------------------------------------
/site/mybzz/util/DateUtil.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import datetime
 3 | 
 4 | 
 5 | TIMEFORMAT = "%Y-%m-%d %H:%M:%S"
 6 | def currentTime():
 7 |     return time.strftime(TIMEFORMAT, time.localtime(time.time()))
 8 | 
 9 | def currentDate():
10 |     return datetime.date.today()
11 | 
12 | def longToStrTime(t):
13 |     return time.strftime(TIMEFORMAT, time.localtime(t))


--------------------------------------------------------------------------------
/site/mybzz/util/DbUtil.py:
--------------------------------------------------------------------------------
 1 | import pymysql
 2 | """
 3 | 数据库工具包
 4 | """
 5 | 
 6 | def getConn():
 7 |     """
 8 |     获取数据库连接和游标
 9 |     :return:
10 |     """
11 |     conn = pymysql.connect(host="localhost", user="root", passwd="banban123", db="comment", port=3306, charset="utf8")
12 |     cur = conn.cursor()
13 |     return (conn,cur)
14 | 
15 | def getAllResult(statement):
16 |     """
17 |     获取所有结果
18 |     :param statement:
19 |     :return:
20 |     """
21 |     conn,cur = getConn()
22 | 
23 |     cur.execute(statement)
24 |     return cur.fetchall()
25 | 
26 | #获取一条结果
27 | def getOneResult(statement):
28 |     """
29 |     获取一条记录
30 |     :param statement:
31 |     :return:
32 |     """
33 |     conn, cur = getConn()
34 | 
35 |     cur.execute(statement)
36 |     return cur.fetchone()
37 | 
38 | #关闭连接
39 | def close(conn,cur):
40 |     """
41 |     关闭连接和游标
42 |     :param conn:
43 |     :param cur:
44 |     :return:
45 |     """
46 |     cur.close
47 |     conn.close


--------------------------------------------------------------------------------