├── images
    ├── __init__.py
    ├── migrations
    │   ├── __init__.py
    │   └── 0001_initial.py
    ├── admin.py
    ├── apps.py
    ├── models.py
    └── views.py
├── silumz
    ├── __init__.py
    ├── wsgi.py
    ├── urls.py
    └── settings.py
├── static
    ├── hot.png
    ├── logo.png
    ├── zde
    │   ├── 1.png
    │   ├── 2.png
    │   ├── 3.png
    │   ├── 4.png
    │   ├── 5.png
    │   ├── hot.png
    │   ├── next.png
    │   ├── timg.gif
    │   ├── reward.png
    │   ├── loading.gif
    │   ├── css
    │   │   └── fonts
    │   │   │   ├── iconfont.eot
    │   │   │   ├── iconfont.ttf
    │   │   │   ├── iconfont.woff
    │   │   │   └── iconfont.svg
    │   └── js
    │   │   ├── html5.js
    │   │   └── flutter-hearts-zmt.js
    ├── bg-ico.png
    ├── favicon.ico
    └── v_back.jpg
├── .gitignore
├── hyimg
    ├── 4a526605037c6.4tteiq5psd80.png
    └── 2c1185b91e0c4bb399334aeab5f6a011.4lxc1r4h9hg0.gif
├── requirements.txt
├── uwsgi.ini
├── templates
    └── zde
    │   ├── 404.html
    │   ├── tag.html
    │   ├── pagination.html
    │   ├── base.html
    │   ├── index.html
    │   ├── category.html
    │   ├── sort.html
    │   ├── video.html
    │   ├── page.html
    │   └── mVideo.html
├── config.py
├── manage.py
├── start.sh
├── crawler
    ├── delete_img.py
    ├── Compress.py
    ├── down_img.py
    ├── AutoPost.py
    ├── crawler_xmt.py
    ├── crawle_mzt.py
    ├── crawler_mtl.py
    ├── crawler_ytu.py
    ├── crawler_mmjpg.py
    ├── crawler_mm131.py
    ├── crawler_amn.py
    └── crawler_nsg.py
├── readme.md
├── 94imm.sql
└── install.sh


/images/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/silumz/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/images/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/static/hot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/hot.png


--------------------------------------------------------------------------------
/static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/logo.png


--------------------------------------------------------------------------------
/static/zde/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/1.png


--------------------------------------------------------------------------------
/static/zde/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/2.png


--------------------------------------------------------------------------------
/static/zde/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/3.png


--------------------------------------------------------------------------------
/static/zde/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/4.png


--------------------------------------------------------------------------------
/static/zde/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/5.png


--------------------------------------------------------------------------------
/static/bg-ico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/bg-ico.png


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/favicon.ico


--------------------------------------------------------------------------------
/static/v_back.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/v_back.jpg


--------------------------------------------------------------------------------
/static/zde/hot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/hot.png


--------------------------------------------------------------------------------
/static/zde/next.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/next.png


--------------------------------------------------------------------------------
/static/zde/timg.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/timg.gif


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea/*
2 | /__pycache__/*
3 | /static/images/*
4 | **/__pycache__/*
5 | test.py


--------------------------------------------------------------------------------
/images/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | 
3 | # Register your models here.
4 | 


--------------------------------------------------------------------------------
/static/zde/reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/reward.png


--------------------------------------------------------------------------------
/static/zde/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/loading.gif


--------------------------------------------------------------------------------
/static/zde/css/fonts/iconfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/css/fonts/iconfont.eot


--------------------------------------------------------------------------------
/static/zde/css/fonts/iconfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/css/fonts/iconfont.ttf


--------------------------------------------------------------------------------
/static/zde/css/fonts/iconfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/static/zde/css/fonts/iconfont.woff


--------------------------------------------------------------------------------
/hyimg/4a526605037c6.4tteiq5psd80.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/hyimg/4a526605037c6.4tteiq5psd80.png


--------------------------------------------------------------------------------
/images/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 | 
3 | 
4 | class ImagesConfig(AppConfig):
5 |     name = 'images'
6 | 


--------------------------------------------------------------------------------
/hyimg/2c1185b91e0c4bb399334aeab5f6a011.4lxc1r4h9hg0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zephyr-y/94imm/HEAD/hyimg/2c1185b91e0c4bb399334aeab5f6a011.4lxc1r4h9hg0.gif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | asn1crypto==0.24.0
 2 | beautifulsoup4==4.6.0
 3 | certifi==2018.11.29
 4 | cffi==1.11.5
 5 | chardet==3.0.4
 6 | cryptography==2.4.2
 7 | dj-pagination==2.4.0
 8 | Django==1.10.6
 9 | idna==2.6
10 | mysqlclient==1.3.14
11 | pycparser==2.19
12 | PyMySQL==0.9.2
13 | requests==2.18.4
14 | six==1.12.0
15 | urllib3==1.22
16 | uWSGI==2.0.17.1
17 | 


--------------------------------------------------------------------------------
/silumz/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for silumz project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "silumz.settings")
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------
/uwsgi.ini:
--------------------------------------------------------------------------------
 1 | # uwsig使用配置文件启动
 2 | #[uwsgi]
 3 | # 项目目录
 4 | #chdir=/root/94imm/
 5 | # 指定项目的application
 6 | #file=/root/94imm/silumz/wsgi.py
 7 | # 指定sock的文件路径       
 8 | #socket=/root/94imm/uwsgi.sock
 9 | # 进程个数       
10 | #workers=2
11 | #pidfile=/root/94imm/uwsgi.pid
12 | # 指定IP端口       
13 | #http=127.0.0.1:8000
14 | # 指定静态文件
15 | #static-map=/static=/root/94imm/static
16 | # 启动uwsgi的用户名和用户组
17 | #uid=root
18 | #gid=root
19 | # 启用主进程
20 | #master=true
21 | # 自动移除unix Socket和pid文件当服务停止的时候
22 | #vacuum=true
23 | # 序列化接受的内容，如果可能的话
24 | #thunder-lock=true
25 | # 启用线程
26 | #enable-threads=true
27 | # 设置自中断时间
28 | #harakiri=30
29 | # 设置缓冲
30 | #post-buffering=4096
31 | # 设置日志目录
32 | #daemonize=/root/94imm/uwsgi.log
33 | # 通过该端口可查看uwsgi负载情况
34 | #stats=8001
35 | 


--------------------------------------------------------------------------------
/templates/zde/404.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | <meta charset="utf-8">
 5 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 | <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
 7 | <title>404</title>
 8 | <style>
 9 | 	body{
10 | 		background-color:#fff;
11 | 		font-size:14px;
12 | 	}
13 | 	h3{
14 | 		font-size:60px;
15 | 		color:#444;
16 | 		text-align:center;
17 | 		padding-top:30px;
18 | 		font-weight:normal;
19 | 	}
20 | 	h2{
21 | 		font-size:40px;
22 | 		text-align:center;
23 | 		padding-top:30px;
24 | 		font-weight:normal;
25 | 	}
26 | 	a{
27 | 		color:#ff6651;
28 | 	}
29 | </style>
30 | </head>
31 | 
32 | <body>
33 | <h3>404，您请求的文件不存在!</h3>
34 | <h2><a href="/">返回首页</a></h2>
35 | </body>
36 | </html>
37 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # 数据库信息，一键脚本自动添加
 2 | mysql_config = {
 3 |         'ENGINE': 'django.db.backends.mysql',
 4 |         'NAME': '94imm',
 5 |         'USER': '94imm',
 6 |         'PASSWORD': '94imm',
 7 |         'HOST': '127.0.0.1',
 8 |         'PORT': '3306',
 9 |     }
10 | # 数组形式，可以添加多个域名
11 | allow_url=["www.94imm.com","94imm.com"]
12 | # 缓存超时时间，服务器性能好可缩短此时
13 | cache_time=300
14 | # 使用的模板（暂时开放一个）
15 | templates="zde"
16 | # 网站名
17 | site_name="94iMM"
18 | # 一键脚本自动添加
19 | site_url = "https://www.94imm.com"
20 | # 网站关键词
21 | key_word = "关键词1,关键词2,关键词3"
22 | # 网站说明
23 | description = "这是一个高质量的自动爬虫"
24 | # 底部联系邮箱
25 | email = "admin@94imm.com"
26 | # 网站调试模式
27 | debug = False
28 | # 友联
29 | friendly_link = [{"name":"94imm","link":"https://www.94imm.com"},{"name":"获取源码","link":"https://github.com/Turnright-git/94imm.git"}]


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "silumz.settings")
 7 |     try:
 8 |         from django.core.management import execute_from_command_line
 9 |     except ImportError:
10 |         # The above import may fail for some other reason. Ensure that the
11 |         # issue is really that Django is missing to avoid masking other
12 |         # exceptions on Python 2.
13 |         try:
14 |             import django
15 |         except ImportError:
16 |             raise ImportError(
17 |                 "Couldn't import Django. Are you sure it's installed and "
18 |                 "available on your PYTHONPATH environment variable? Did you "
19 |                 "forget to activate a virtual environment?"
20 |             )
21 |         raise
22 |     execute_from_command_line(sys.argv)
23 | 


--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | NAME="uwsgi"
 3 | 
 4 | function run_start(){
 5 | uwsgi --ini uwsgi.ini
 6 | }
 7 | 
 8 | function run_stop(){
 9 | if [ ! -n "$NAME" ];then
10 |     echo "no arguments"
11 |     exit;
12 | fi
13 | ID=`ps -ef | grep "$NAME" | grep -v "$0" | grep -v "grep" | awk '{print $2}'`
14 | for id in $ID
15 | do
16 | kill -9 $id
17 | done
18 | }
19 | 
20 | function run_clear(){
21 | rm -rf cache/*
22 | }
23 | 
24 | case "$1" in "start"|"s"|"S")
25 |     run_start
26 | 	echo "website run successfully"
27 | 	;;
28 | 	"restart"|"r"|"S")
29 | 	run_stop
30 | 	run_clear
31 | 	run_start
32 | 	echo "website restart successfully"
33 | 	;;
34 | 	"clear"|"c"|"C")
35 | 	run_clear
36 | 	echo "cache cleared"
37 | 	;;
38 | 	"stop")
39 | 	run_stop
40 | 	echo "website closed"
41 | 	;;
42 | 	*)
43 | 	echo -e "Use command to:\n-s   start website\n-r   restart website\n-c   clear cache\n-stop   stop website"
44 | 	;;
45 | 	esac
46 | 	


--------------------------------------------------------------------------------
/images/models.py:
--------------------------------------------------------------------------------
 1 | from django.db import models
 2 | 
 3 | # Create your models here.
 4 | class Page(models.Model):
 5 |     typeid = models.IntegerField()
 6 |     sendtime=models.DateField()
 7 |     title=models.CharField(max_length=200)
 8 |     firstimg=models.CharField(max_length=200)
 9 |     tagid=models.CharField(max_length=200)
10 |     hot=models.IntegerField()
11 | 
12 | class Image(models.Model):
13 |     pageid=models.IntegerField()
14 |     imageurl=models.URLField()
15 | 
16 | class Type(models.Model):
17 |     type=models.CharField(max_length=200)
18 | 
19 | class Tag(models.Model):
20 |     tag = models.CharField(max_length=200)
21 | 
22 | 
23 | class Video(models.Model):
24 |     url = models.CharField(max_length=500)
25 |     user_id = models.CharField(max_length=15)
26 |     date_time = models.CharField(max_length=30)
27 |     v_name = models.CharField(max_length=255)
28 |     v_path = models.CharField(max_length=50)
29 |     source = models.CharField(max_length=10)


--------------------------------------------------------------------------------
/silumz/urls.py:
--------------------------------------------------------------------------------
 1 | """silumz URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.10/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url
17 | from django.contrib import admin
18 | from images import views
19 | 
20 | urlpatterns = [
21 |     url(r'^$', views.index),
22 |     url(r'^article/(?P<i_id>\d+)/$', views.page, name='article'),
23 |     url(r'^tag/(?P<tid>\d+)/$', views.tag, name='tag'),
24 |     url(r'^type/(?P<typeid>\d+)/$', views.type, name='type'),
25 |     url(r'^search/', views.search),
26 |     url(r'^get_video/', views.getVideo),
27 |     url(r'^video/', views.pVideo),
28 |     url(r'^mvideo/', views.mVideo),
29 |     url(r'^tag/', views.HotTag),
30 |     url(r'^sort/(?P<method>\w+)/$', views.SortBy, name='sort'),
31 | ]
32 | 


--------------------------------------------------------------------------------
/images/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.10.6 on 2018-12-09 04:41
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     initial = True
11 | 
12 |     dependencies = [
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='Image',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('imageid', models.IntegerField()),
21 |                 ('pageid', models.IntegerField()),
22 |                 ('imageurl', models.URLField()),
23 |             ],
24 |         ),
25 |         migrations.CreateModel(
26 |             name='Page',
27 |             fields=[
28 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
29 |                 ('pageid', models.IntegerField()),
30 |                 ('typeid', models.IntegerField()),
31 |                 ('pageurl', models.URLField()),
32 |                 ('time', models.DateField()),
33 |                 ('title', models.CharField(max_length=200)),
34 |                 ('numbs', models.IntegerField()),
35 |                 ('firstimg', models.IntegerField()),
36 |                 ('otherimg', models.IntegerField()),
37 |             ],
38 |         ),
39 |     ]
40 | 


--------------------------------------------------------------------------------
/crawler/delete_img.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append('../')
 4 | import pymysql,os
 5 | from config import mysql_config
 6 | 
 7 | dbhost = {
 8 |     "host": mysql_config['HOST'],
 9 |     "dbname": mysql_config['NAME'],
10 |     "user": mysql_config['USER'],
11 |     "password": mysql_config['PASSWORD']
12 | }
13 | 
14 | db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"), dbhost.get("dbname"))
15 | cursor = db.cursor()
16 | 
17 | def del_page(id):
18 |     cursor.execute("Delete FROM images_page WHERE id=" + "'" + id + "'")
19 |     cursor.execute("SELECT imageurl FROM images_image WHERE pageid =" + "'" + id + "'")
20 |     for img in cursor.fetchall():
21 |         try:
22 |             os.remove(img[0])
23 |         except FileNotFoundError:
24 |             print("***************")
25 |             print("图片不存在或未下载")
26 |             pass
27 |         except Exception as e:
28 |             print("***************")
29 |             print("图片删除失败，错误信息：",e)
30 |     cursor.execute("Delete FROM images_image WHERE pageid=" + "'" + id + "'")
31 |     print("***************")
32 |     print("图片删除成功")
33 |     print("***************")
34 |     print("采集数据删除成功")
35 |     try:
36 |         ls = os.listdir('../cache')
37 |         for i in ls:
38 |             os.remove("../cache/"+i)
39 |         os.system("sh ../restart.sh")
40 |         print("***************")
41 |         print("缓存更新成功")
42 |     except Exception as e:
43 |         print("***************")
44 |         print("缓存更新失败，错误信息：",e)
45 | 
46 | if __name__=="__main__":
47 |     print("请输入要删除的图集ID")
48 |     id=input()
49 |     del_page(id)


--------------------------------------------------------------------------------
/templates/zde/tag.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | <!-- header start -->
 4 | {% block title %}标签云 - {{ siteName }}{% endblock title %}
 5 | {% block keywords %}{% for t in keyword %}{{ t.tag }},{% endfor %}{% endblock keywords %}
 6 | {% block description %}{{ description }}{% endblock description %}
 7 | <!-- header end -->
 8 | {% block focusbox %}
 9 | <div class="focusbox"><div class="container"><h1 class="focusbox-title">热门标签</h1></div></div>
10 | {% endblock focusbox %}
11 | <!-- menu start-->
12 | {% block menu %}
13 |     <ul>
14 | 				<li><a href="/">随便看看</a></li>
15 | 				<li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716"><a>妹子图</a>
16 | 				<ul class="sub-menu">
17 |                 {% for types in typelist %}
18 |                     <li menu-item menu-item-type-post_type menu-item-object-page menu-item-18><a href="{% url 'type' types.type_id %}">{{ types.type }}</a></li>
19 |                 {% endfor %}
20 | 				</ul>
21 | 				<li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716"><a>排行榜</a>
22 | 				<ul class="sub-menu">
23 | 				<li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/new">最新发布</a></li>
24 | 				<li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/hot">最受欢迎</a></li>
25 | 				</ul>
26 | 				</li>
27 | 				<li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28"><a href="/tag/">标签云</a></li>
28 | 				<li><a href="/video">视频小姐姐</a></li>
29 | 			</ul>
30 | {% endblock menu %}
31 | <!-- menu end-->
32 | <!-- section start-->
33 | {% block section %}
34 |     <div class="tagslist">
35 | 		<ul>
36 | 		{% for t in data %}
37 | 			<li><a class="name" href="{% url 'tag' t.tid %}">{{ t.tag }}</a><small>&times;{{t.viwe}}</small></li>
38 |         {% endfor %}
39 |         </ul>
40 | 
41 | 	</div>
42 | {% endblock section %}
43 | <!-- section end-->
44 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # 安装说明：
 2 | ## 自动安装：
 3 | ```
 4 | wget -O install.sh https://github.com/Turnright-git/94imm/blob/master/install.sh && chmod +x install.sh &&./install.sh
 5 | ```
 6 | 安装过程中需要输入
 7 | ```
 8 | > * allow url:www.94imm.com   # 防盗链允许的使用的域名
 9 | > * site_name:94imm   # 网站名称，将显示在网站底部网站title和底部
10 | > * site_url:www.94imm.com   # 网站url
11 | > * Create databases:94imm   # 添加数据库
12 | > * Create databases password:   # 数据库账号(root)如未安装mysql，此处将设置为root密码
13 | > * Password for root:   # 如本机以安装mysql，此处需输入root密码
14 | ```
15 | 参数配置
16 | > 配置文件为根目录下的config.py
17 | ```python
18 | # 数据库信息，一键脚本自动添加
19 | mysql_config = {
20 |         'ENGINE': 'django.db.backends.mysql',
21 |         'NAME': '94imm',
22 |         'USER': '94imm',
23 |         'PASSWORD': '94imm',
24 |         'HOST': '127.0.0.1',
25 |         'PORT': '3306',
26 |     }
27 | # 数组形式，可以添加多个域名
28 | allow_url=["www.94imm.com","94imm.com"]
29 | # 缓存超时时间，服务器性能好可缩短此时
30 | cache_time=300
31 | # 使用的模板（暂时开放一个）
32 | templates="zde"
33 | # 网站名
34 | site_name="94iMM"
35 | # 一键脚本自动添加
36 | site_url = "https://www.94imm.com"
37 | # 网站关键词
38 | key_word = "关键词1,关键词2,关键词3"
39 | # 网站说明
40 | description = "这是一个高质量的自动爬虫"
41 | # 底部联系邮箱
42 | email = "admin@94imm.com"
43 | # 网站调试模式
44 | debug = False
45 | # 页面底部友情链接
46 | friendly_link = [{"name":"94imm","link":"https://www.94imm.com"},{"name":"获取源码","link":"https://github.com/Turnright-git/94imm.git"}]
47 | 
48 | ```
49 | 
50 | 使用说明
51 | > 进入项目根目录
52 | ```shell
53 | 启动网站
54 | ./start s
55 | 关闭网站
56 | ./start stop
57 | 重启网站
58 | ./start r
59 | 清空网站缓存（使所做的修改立即生效）
60 | ./start c
61 | ```
62 | > 项目模板目录templates，base.html中可直接添加统计代码
63 | 
64 | 手动安装说明
65 | > 项目依赖python3.6 mysql5.6
66 | 
67 | ```
68 | git clone https://github.com/Turnright-git/94imm.git
69 | cd 94imm
70 | vi config.py  #参照配置说明修改
71 | vi uwsgi.ini  #修改uwsgi配置
72 | ```
73 | 如需使用反向代理，在nginx.conf中添加如下server段
74 | ```
75 | server {
76 |         listen       80;
77 |         server_name  localhost; # 网站域名
78 | 
79 |         location / {
80 |             proxy_pass http://127.0.0.1:8000;
81 |         }
82 | ```
83 | 


--------------------------------------------------------------------------------
/templates/zde/pagination.html:
--------------------------------------------------------------------------------
 1 | {% if is_paginated %}
 2 | {% load i18n %}
 3 | <ul>
 4 |   {% block previouslink %}
 5 |   {% if page_obj.has_previous %}
 6 |   {% if disable_link_for_first_page and page_obj.previous_page_number == 1 %}
 7 |    <li class="prev-page"><a href="{{ request.path }}{% if getvars %}?{{ getvars|slice:"1:" }}{% endif %}">{{ previous_link_decorator|safe }}{% trans "" %}</a></li>
 8 |   {% else %}
 9 |   <li class="prev-page"><a href="?page{{ page_suffix }}={{ page_obj.previous_page_number }}{{ getvars }}">{{ previous_link_decorator|safe }}{% trans "" %}</a></li>
10 |   {% endif %}
11 |   {% else %}
12 |   {% if display_disabled_previous_link %}
13 |   <li class="prev-page">{{ previous_link_decorator|safe }}{% trans "" %}</li>
14 |   {% endif %}
15 |   {% endif %}
16 |   {% endblock previouslink %}
17 |   {% block pagelinks %}
18 |   {% if display_page_links %}
19 |   {% for page in pages %}
20 |   {% if page %}
21 |   {% ifequal page page_obj.number %}
22 |   <li><a href="?page{{ page_suffix }}={{ page }}{{ getvars }}" style="background-color:#FF6651;color:#fff" >{{ page }}</a></li>
23 |   {% else %}
24 |   {% if disable_link_for_first_page and page == 1 %}
25 |   <li><a href="{{ request.path }}{% if getvars %}?{{ getvars|slice:"1:" }}{% endif %}">{{ page }}</a></li>
26 |   {% else %}
27 |   <li><a href="?page{{ page_suffix }}={{ page }}{{ getvars }}" >{{ page }}</a></li>
28 |   {% endif %}
29 |   {% endifequal %}
30 |   {% else %}
31 |   <li><span>...</span></li>
32 |   {% endif %}
33 |   {% endfor %}
34 |   {% endif %}
35 |   {% endblock pagelinks %}
36 |   {% block nextlink %}
37 |   {% if page_obj.has_next %}
38 |   <li class="next-page"><a href="?page{{ page_suffix }}={{ page_obj.next_page_number }}{{ getvars }}">{% trans "" %}{{ next_link_decorator|safe }}</a></li>
39 |   {% else %}
40 |   {% if display_disabled_next_link %}
41 |   <li class="next-page"><span class="disabled next">{% trans "" %}{{ next_link_decorator|safe }}</span></li>
42 |   {% endif %}
43 |   {% endif %}
44 |   {% endblock nextlink %}
45 | </ol>
46 | {% endif %}
47 | 


--------------------------------------------------------------------------------
/static/zde/js/html5.js:
--------------------------------------------------------------------------------
1 | (function(a,b){function h(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function i(){var a=l.elements;return typeof a=="string"?a.split(" "):a}function j(a){var b={},c=a.createElement,f=a.createDocumentFragment,g=f();a.createElement=function(a){if(!l.shivMethods)return c(a);var f;return b[a]?f=b[a].cloneNode():e.test(a)?f=(b[a]=c(a)).cloneNode():f=c(a),f.canHaveChildren&&!d.test(a)?g.appendChild(f):f},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+i().join().replace(/\w+/g,function(a){return c(a),g.createElement(a),'c("'+a+'")'})+");return n}")(l,g)}function k(a){var b;return a.documentShived?a:(l.shivCSS&&!f&&(b=!!h(a,"article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio{display:none}canvas,video{display:inline-block;*display:inline;*zoom:1}[hidden]{display:none}audio[controls]{display:inline-block;*display:inline;*zoom:1}mark{background:#FF0;color:#000}")),g||(b=!j(a)),b&&(a.documentShived=b),a)}var c=a.html5||{},d=/^<|^(?:button|form|map|select|textarea|object|iframe|option|optgroup)$/i,e=/^<|^(?:a|b|button|code|div|fieldset|form|h1|h2|h3|h4|h5|h6|i|iframe|img|input|label|li|link|ol|option|p|param|q|script|select|span|strong|style|table|tbody|td|textarea|tfoot|th|thead|tr|ul)$/i,f,g;(function(){var c=b.createElement("a");c.innerHTML="<xyz></xyz>",f="hidden"in c,f&&typeof injectElementWithStyles=="function"&&injectElementWithStyles("#modernizr{}",function(b){b.hidden=!0,f=(a.getComputedStyle?getComputedStyle(b,null):b.currentStyle).display=="none"}),g=c.childNodes.length==1||function(){try{b.createElement("a")}catch(a){return!0}var c=b.createDocumentFragment();return typeof c.cloneNode=="undefined"||typeof c.createDocumentFragment=="undefined"||typeof c.createElement=="undefined"}()})();var l={elements:c.elements||"abbr article aside audio bdi canvas data datalist details figcaption figure footer header hgroup mark meter nav output progress section summary time video",shivCSS:c.shivCSS!==!1,shivMethods:c.shivMethods!==!1,type:"default",shivDocument:k};a.html5=l,k(b)})(this,document)


--------------------------------------------------------------------------------
/crawler/Compress.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | sys.path.append('../')
 5 | from  PIL import Image as Img
 6 | import os,threading,platform
 7 | 
 8 | class Compress():
 9 |     file_list=[]
10 |     rlock = threading.RLock()
11 |     def __init__(self,file_dir,new_dir=None,th_num=10):
12 |         self.file_dir=file_dir
13 |         self.new_dir=new_dir
14 |         self.th_num=th_num
15 | 
16 |     sysstr = platform.system()
17 |     if sysstr == "Windows":
18 |         p="\\"
19 |     else:
20 |         p="/"
21 | 
22 |     def get_file_name(self):
23 |         for files in os.walk(self.file_dir):
24 |             for name in files[2]:
25 |                 file=files[0]+self.p+name
26 |                 size=int(os.path.getsize(file) / 1024)
27 |                 if size>400:
28 |                     self.file_list.append(file)
29 | 
30 |     def pl_compress(self,file_path):
31 |         path = file_path.split(self.p)
32 |         name = path[-1]
33 |         image = Img.open(file_path)
34 |         image.save("/".join(path[0:-1])+self.p+name, quality=85)
35 |         print("压缩完成：" + file_path)
36 | 
37 |     def pl_compress_new(self,file_path):
38 |         path = file_path.split(self.p)
39 |         name = path[-1]
40 |         image = Img.open(file_path)
41 |         new_name=self.new_dir+self.p+"/".join(path[-2:-1])+self.p
42 |         is_ex=os.path.exists(new_name)
43 |         if not is_ex:
44 |             os.makedirs(new_name)
45 |         image.save(new_name+name, quality=85)
46 |         print("压缩完成：" + file_path)
47 | 
48 |     def do_work(self):
49 |         while True:
50 |             Compress.rlock.acquire()
51 |             if len(Compress.file_list) == 0:
52 |                 Compress.rlock.release()
53 |                 break
54 |             else:
55 |                 file_path = Compress.file_list.pop()
56 |                 Compress.rlock.release()
57 |                 try:
58 |                     if new_dir == None:
59 |                         self.pl_compress(file_path)
60 |                     else:
61 |                         self.pl_compress_new(file_path)
62 |                 except Exception as e:
63 |                     pass
64 | 
65 | 
66 |     def run(self):
67 |         for i in range(self.th_num):
68 |             download_t = threading.Thread(target=self.do_work)
69 |             download_t.start()
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     print("输入源图片所在路径")
74 |     dir_name = input("")
75 |     print("1.覆盖原图片，2.压缩到新路径")
76 |     in_num=input("")
77 |     new_dir=None
78 |     if in_num=="2":
79 |         print("输入保存路径")
80 |         new_dir = input("")
81 |     compress=Compress(dir_name,new_dir,10)
82 |     compress.get_file_name()
83 |     compress.run()


--------------------------------------------------------------------------------
/templates/zde/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML>
 2 | <html>
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport"
 7 |           content="width=device-width, initial-scale=1.0, user-scalable=0, minimum-scale=1.0, maximum-scale=1.0">
 8 |     <meta name="apple-mobile-web-app-capable" content="yes">
 9 |     <meta name="apple-mobile-web-app-status-bar-style" content="black">
10 |     <meta http-equiv="cache-control" content="no-siteapp">
11 |     <title>{% block title %} - {% endblock title %} </title>
12 |     <link rel='dns-prefetch' href='//s.w.org'/>
13 |     <link rel='stylesheet' id='main-css' href="/static/zde/css/style.css" type='text/css' media='all'/>
14 |     <script type='text/javascript' src="/static/zde/js/jquery.js"></script>
15 | 	<script type='text/javascript' src="/static/zde/js/main.js"></script>
16 |     <meta name="keywords" content="{% block keywords %}{% endblock keywords %} ">
17 |     <meta name="description" content="{% block description %}{% endblock description %} ">
18 |     <style type="text/css">.recentcomments a {
19 |         display: inline !important;
20 |         padding: 0 !important;
21 |         margin: 0 !important;
22 |     }</style>
23 | 	{% block js_head %}{% endblock js_head %}
24 |     <link rel="icon" href="/static/favicon.ico"/>
25 |     <!--[if lt IE 9]>
26 |     <script src="/static/zde/js/html5.js"></script><![endif]-->
27 | </head>
28 | <body class="home blog list-thumb-hover-action">
29 | <header class="header">
30 |     <div class="container">
31 |         <h1 class="logo"><a href="/" title="{{ siteName }}"><img src="/static/logo.png"><span>{{ siteName }}</span></a></h1>
32 |         <div class="sitenav">
33 |             {% block menu %}{% endblock menu %}
34 |         </div>
35 |         <span class="sitenav-on"><i class="fa"></i></span>
36 |         <span class="sitenav-mask"></span>
37 |         <span class="searchstart-on"><i class="fa">&#xe600;</i></span>
38 |         <span class="searchstart-off"><i class="fa">&#xe606;</i></span>
39 |         <form method="get" class="searchform" action="/search/">
40 |             <button tabindex="3" class="sbtn" type="submit"><i class="fa">&#xe600;</i></button>
41 |             <input tabindex="2" class="sinput" name="s" type="text" placeholder="输入关键字" value="">
42 |         </form>
43 |     </div>
44 | </header>
45 | {% block focusbox %}{% endblock focusbox %}
46 | <section class="container">{% block section %}{% endblock section %}</section>
47 | {% block recommend %}{% endblock recommend %}
48 | <footer class="footer">
49 | 	    <p>&copy; 2020 <a href="/">{{ siteName }}</a></p>
50 | 		<h1>美图分享,每日更新.</h1>
51 | 		<ul>友情链接：
52 | 		{% for links in friendly_link %}
53 | 		    <li><a href="{{ links.link }}">{{ links.name }}</a></li>
54 | 			{% endfor %}
55 | 		</ul>
56 | 		<p>POWERED BY <a href="https://github.com/Turnright-git/94imm.git">94iMM</a> | 本站数据来自网络，如侵害您的权利请联系本站删除相关信息，邮箱：{{ email }}</p>
57 | </footer>
58 | 
59 | {% block js %}{% endblock js %}
60 | <!-- 统计代码 -->
61 | 
62 | </body>
63 | </html>


--------------------------------------------------------------------------------
/static/zde/js/flutter-hearts-zmt.js:
--------------------------------------------------------------------------------
1 | !function(t,e){"object"==typeof exports&&"undefined"!=typeof module?module.exports=e():"function"==typeof define&&define.amd?define(e):t.BubbleHearts=e()}(this,function(){"use strict";function t(t,n,r){var i=e.uniformDiscrete(89,91)/100,o=1-i,u=(e.uniformDiscrete(45,60)+e.uniformDiscrete(45,60))/100,a=function(t){return t>i?Math.max(((1-t)/o).toFixed(2),.1)*u:u},c=e.uniformDiscrete(-30,30),f=function(t){return c},h=10,s=n.width/2+e.uniformDiscrete(-h,h),d=(n.width-Math.sqrt(Math.pow(t.width,2)+Math.pow(t.height,2)))/2-h,l=e.uniformDiscrete(.8*d,d)*(e.uniformDiscrete(0,1)?1:-1),m=e.uniformDiscrete(250,400),w=function(t){return t>i?s:s+l*Math.sin(m*(i-t)*Math.PI/180)},v=function(e){return t.height/2+(n.height-t.height/2)*e},p=e.uniformDiscrete(14,18)/100,g=function(t){return t>p?1:1-((p-t)/p).toFixed(2)};return function(e){if(!(e>=0))return!0;r.save();var n=a(e),i=f(e),o=w(e),u=v(e);r.translate(o,u),r.scale(n,n),r.rotate(i*Math.PI/180),r.globalAlpha=g(e),r.drawImage(t,-t.width/2,-t.height/2,t.width,t.height),r.restore()}}
2 | var e=function(t){var e=t,n=Math.floor,r=Math.random;return t.uniform=function(t,e){return t+(e-t)*r()},t.uniformDiscrete=function(t,r){return t+n((r-t+1)*e.uniform(0,1))},t}({}),n=function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")},r=function(){function t(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}
3 | return function(e,n,r){return n&&t(e.prototype,n),r&&t(e,r),e}}(),i=window.requestAnimationFrame||window.webkitRequestAnimationFrame||function(t){return setTimeout(t,16)},o=function(){function o(){n(this,o);var t=this.canvas=document.createElement("canvas"),e=this.context=t.getContext("2d"),r=this._children=[],u=function n(){i(n),e.clearRect(0,0,t.width,t.height);for(var o=0,u=r.length;o<u;){var a=r[o];a.render.call(null,(a.timestamp+a.duration-new Date)/a.duration)?(r.splice(o,1),u--):o++}};i(u)}
4 | return r(o,[{key:"bubble",value:function(n){var r=arguments.length>1&&void 0!==arguments[1]?arguments[1]:e.uniformDiscrete(1200,2400),i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:t(n,this.canvas,this.context);return this._children.push({render:i,duration:r,timestamp:+new Date}),this}}]),o}();return o});var assets=['/static/zde/1.png','/static/zde/2.png','/static/zde/3.png','/static/zde/4.png','/static/zde/5.png',];assets.forEach(function(src,index){assets[index]=new Promise(function(resolve){var img=new Image;img.onload=resolve.bind(null,img);img.src=src;});});Promise.all(assets).then(function(images){var random={uniform:function(min,max){return min+(max-min)*Math.random();},uniformDiscrete:function(i,j){return i+Math.floor((j-i+1)*random.uniform(0,1));},};var stage=new BubbleHearts();var canvas=stage.canvas;canvas.width=170;canvas.height=300;canvas.style['width']='170px';canvas.style['height']='300px';document.body.appendChild(canvas);document.getElementsByClassName('journal-reward')[0].addEventListener('click',function(){stage.bubble(images[random.uniformDiscrete(0,images.length-1)]);},false);});


--------------------------------------------------------------------------------
/templates/zde/index.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | <!-- header start -->
 4 | {% block title %}{{ siteName }}{% endblock title %}
 5 | {% block keywords %}{{ keyWord }}{% endblock keywords %}
 6 | {% block description %}{{ description }}{% endblock description %}
 7 | <!-- header end -->
 8 | <!-- menu start-->
 9 | {% block menu %}
10 | <ul>
11 |     <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28"><a href="/">随便看看</a>
12 |     </li>
13 |     <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716"><a>妹子图</a>
14 |         <ul class="sub-menu">
15 |             {% for types in typelist %}
16 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a
17 |                         href="{% url 'type' types.type_id %}">{{ types.type }}</a></li>
18 |             {% endfor %}
19 |         </ul>
20 |     </li>
21 |     <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716"><a>排行榜</a>
22 |         <ul class="sub-menu">
23 |             <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a
24 |                     href="/sort/new">最新发布</a></li>
25 |             <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a
26 |                     href="/sort/hot">最受欢迎</a></li>
27 |         </ul>
28 |     </li>
29 |     <li><a href="/tag">标签云</a></li>
30 |     <li><a href="/video">视频小姐姐</a></li>
31 | </ul>
32 | {% endblock menu %}
33 | <!-- menu end-->
34 | <!-- section start-->
35 | {% block section %}
36 | <div class="excerpts-wrapper">
37 |         <div class="excerpts">
38 |             {% load pagination_tags %}
39 |             {% autopaginate data 10 %}
40 |             {% for imglist in data %}
41 |                 <article class="excerpt excerpt-c5">
42 |                     <div class="thumbnail">
43 | 					<a href="/type/{{ imglist.type_id }}" class="imgbox-a"> {{ imglist.type }}</a>
44 |                         <div class="imgbox" style="background-image:url({{ imglist.firstimg }})" onclick="window.location.href= '{% url 'article' imglist.pid %}';return false">  
45 |                     </div>
46 |                     <h2><a href="{% url 'article' imglist.pid %}">{{ imglist.title }}</a></h2>
47 |                     <footer>
48 |                         <hot>
49 |                             <img src="/static/zde/hot.png">{{ imglist.hot }}
50 |                         </hot>
51 |                         <a class="post-like">
52 |                             <time>{{ imglist.sendtime |date:'Y-m-d' }}</time></a>
53 |                     </footer>
54 |                 </article>
55 |             {% endfor %}
56 |         </div>
57 |     </div>
58 | <div class="pagination pagination-multi"> {% paginate %} </div>
59 | {% endblock section%}
60 | <!-- section end-->
61 | <!-- js start -->
62 | {% block js %}
63 | <script>
64 |     window.TBUI = {
65 |         ajaxpager: '3',
66 |         pagenum: '3',
67 |         shareimage: '',
68 |         shareimagethumb: '1',
69 |         fullgallery: '0',
70 |         fullimage: '0'
71 |     }
72 | </script>
73 | {% endblock js %}
74 | <!--js end-->


--------------------------------------------------------------------------------
/crawler/down_img.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append('../')
 4 | import threading, pymysql,os,requests
 5 | from config import mysql_config
 6 | 
 7 | 
 8 | url_list=[]
 9 | origin_list=[]
10 | 
11 | 
12 | rlock = threading.RLock()
13 | dbhost = {
14 |     "host": mysql_config['HOST'],
15 |     "dbname": mysql_config['NAME'],
16 |     "user": mysql_config['USER'],
17 |     "password": mysql_config['PASSWORD']
18 | }
19 | db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"), dbhost.get("dbname"))
20 | cursor = db.cursor()
21 | 
22 | def del_page():
23 |     cursor.execute("Delete FROM images_page WHERE firstimg='1'")
24 | 
25 | def img_url():
26 |     cursor.execute("SELECT * FROM images_image")
27 |     for img_url in cursor.fetchall():
28 |         isExiststag=os.path.exists(".."+img_url[2])
29 |         host_sql = "SELECT crawler FROM images_page WHERE id =" + "'" + str(img_url[1]) + "'"
30 |         cursor.execute(host_sql)
31 |         if not isExiststag:
32 |             print("添加图片：" + img_url[-1])
33 |             url_list.append({"img_path": img_url[-2], "origin_url": img_url[-1], "host": cursor.fetchone()[0]})
34 |         elif os.path.getsize(".."+img_url[2])==0:
35 |             os.remove(".."+img_url[2])
36 |             print("添加图片：" + img_url[-1])
37 |             url_list.append({"img_path": img_url[-2], "origin_url": img_url[-1], "host": cursor.fetchone()[0]})
38 |         elif "http://www.nvshenge.com" in img_url[-1]:
39 |             image_path=img_url[-2].split("/")[-1][0:8]
40 |             origin_url="http://nvshenge.com/uploads/image/"+image_path+"/"+img_url[-2].split("/")[-1]
41 |             print("添加图片：" + origin_url)
42 |             url_list.append({"img_path": img_url[-2], "origin_url": origin_url, "host": cursor.fetchone()[0]})
43 |         else:
44 |             continue
45 | 
46 | 
47 | def down_img():
48 |     while True:
49 |         rlock.acquire()
50 |         if len(url_list) == 0:
51 |             rlock.release()
52 |             break
53 |         else:
54 |             img_url = url_list.pop()
55 |             rlock.release()
56 |             try:
57 |                 headers = {
58 |                     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
59 |                     "Referer": img_url.get("host")
60 |                 }
61 |                 img_path=img_url.get("img_path")
62 |                 path = "/".join(img_path.split("/")[1:-1])
63 |                 is_path=os.path.exists("../"+path)
64 |                 if "http://nvshenge.com" == img_url.get("origin_url")[0:19]:
65 |                     continue
66 |                 if not is_path:
67 |                     os.makedirs("../" + path)
68 |                 origin_url=img_url.get("origin_url")
69 |                 with open (".."+img_path,"wb") as w:
70 |                     w.write(requests.get(origin_url, headers=headers,verify=False,timeout=5).content)
71 |                 print("下载完成："+origin_url)
72 |             except Exception as e:
73 |                 # url_list.append({"img_path":img_url.get("img_path"),"origin_url":img_url.get("origin_url"),"host":img_url.get("host")})
74 |                 # print("下载失败，重新添加到列表中")
75 |                 pass
76 | 
77 | if __name__ == "__main__":
78 |     del_page()
79 |     try:
80 |         img_url()
81 |     except Exception as e:
82 |         print(e)
83 |     for i in range(10):
84 |         t2 = threading.Thread(target=down_img)
85 |         t2.start()


--------------------------------------------------------------------------------
/templates/zde/category.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | <!-- header start -->
 4 | {% block title %}妹子图 - {{ siteName }}{% endblock title %}
 5 | {% block keywords %}{{ keyWord }}{% endblock keywords %}
 6 | {% block description %}{{ description }}{% endblock description %}
 7 | <!-- header end -->
 8 | <!-- menu start-->
 9 | {% block menu %}
10 |     <ul>
11 |         <li><a href="/">随便看看</a></li>
12 |         <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-ancestor current-menu-parent current_page_parent current_page_ancestor menu-item-has-children menu-item-17">
13 |             <a>妹子图</a>
14 |             <ul class="sub-menu">
15 |                 {% for types in typelist %}
16 |                     {% ifequal  typeid types.type_id %}
17 |                         <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28">
18 |                             <a href="{% url 'type' types.type_id %} ">{{ types.type }}</a></li>
19 |                     {% else %}
20 |                         <li><a href="{% url 'type' types.type_id %}">{{ types.type }}</a></li>
21 |                     {% endifequal %}
22 |                 {% endfor %}
23 |             </ul>
24 |         </li>
25 |         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716">
26 |             <a>排行榜</a>
27 |             <ul class="sub-menu">
28 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/new">最新发布</a>
29 |                 </li>
30 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/hot">最受欢迎</a>
31 |                 </li>
32 |             </ul>
33 |         </li>
34 |         <li><a href="/tag">标签云</a></li>
35 |         <li><a href="/video">视频小姐姐</a></li>
36 |     </ul>
37 | {% endblock menu %}
38 | <!-- menu end-->
39 | <!-- section start-->
40 | {% block section %}
41 |     <div class="excerpts-wrapper">
42 |         <div class="excerpts">
43 |             {% load pagination_tags %}
44 |             {% autopaginate data 10 %}
45 |             {% for imglist in data %}
46 |                 <article class="excerpt excerpt-c5">
47 |                     <div class="thumbnail">
48 | 					<a href="/type/{{ imglist.type_id }}" class="imgbox-a"> {{ imglist.type }}</a>
49 |                         <div class="imgbox" style="background-image:url({{ imglist.firstimg }})" onclick="window.location.href= '{% url 'article' imglist.pid %}';return false">  
50 |                     </div>
51 |                     <h2><a href="{% url 'article' imglist.pid %}">{{ imglist.title }}</a></h2>
52 |                     <footer>
53 |                         <hot>
54 |                             <img src="/static/zde/hot.png">{{ imglist.hot }}
55 |                         </hot>
56 |                         <a class="post-like">
57 |                             <time>{{ imglist.sendtime |date:'Y-m-d' }}</time></a>
58 |                     </footer>
59 |                 </article>
60 |             {% endfor %}
61 |         </div>
62 |     </div>
63 | 	<div class="pagination pagination-multi"> {% paginate %} </div>
64 | {% endblock section %}
65 | <!-- section end-->
66 | <!-- js start -->
67 | {% block js %}
68 |     <script>
69 |     window.TBUI = {
70 |         ajaxpager: '3',
71 |         pagenum: '3',
72 |         shareimage: '',
73 |         shareimagethumb: '1',
74 |         fullgallery: '0',
75 |         fullimage: '0'
76 |     }
77 | </script>
78 | {% endblock js %}
79 | <!--js end-->


--------------------------------------------------------------------------------
/crawler/AutoPost.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | sys.path.append('../')
 5 | import pymysql,time,os,random,shutil,platform
 6 | from config import mysql_config
 7 | 
 8 | dbhost = {
 9 |     "host": mysql_config['HOST'],
10 |     "dbname": mysql_config['NAME'],
11 |     "user": mysql_config['USER'],
12 |     "password": mysql_config['PASSWORD']
13 | }
14 | 
15 | def do_post(file_dir,sleep_time="0"):
16 |     db = pymysql.connect(dbhost.get("host"),dbhost.get("user"), dbhost.get("password"),dbhost.get("dbname"))
17 |     cursor = db.cursor()
18 |     for files in os.walk(file_dir):
19 |         tagidlist = []
20 |         sysstr = platform.system()
21 |         if sysstr == "Windows":
22 |             title=files[0].split("\\")[-1]
23 |             os_path=file_dir.split("\\")[-1]
24 |         elif sysstr == "Linux":
25 |             title = files[0].split("/")[-1]
26 |             os_path = file_dir.split("/")[-1]
27 |         if title != os_path:
28 |             tags=['cosplay','萝莉','美腿','丝袜','少女']
29 |             isExists = cursor.execute("SELECT * FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
30 |             if isExists != 0:
31 |                 print("已存在：" + title)
32 |             else:
33 |                 for tag in tags:
34 |                     sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
35 |                     isExiststag = cursor.execute(sqltag)
36 |                     if isExiststag != 1:
37 |                         cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
38 |                     cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
39 |                     for id in cursor.fetchall():
40 |                         tagidlist.append(id[0])
41 |                 p = (title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), "1", "1")
42 |                 cursor.execute("INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg) VALUES (%s,%s,%s,%s,%s)",
43 |                                    p)
44 |                 pageid = cursor.lastrowid
45 |                 rpath = "".join(random.sample('abcdefghijklmnopqrstuvwxyz', 7))
46 |                 count = 1
47 |                 for name in files[2]:
48 |                     path=files[0]+"/"+name
49 |                     rename=str(count)+"."+name.split(".")[-1]
50 |                     path_isExists=os.path.exists("../static/images/"+rpath)
51 |                     if not path_isExists:
52 |                         os.makedirs("../static/images/"+rpath)
53 |                     try:
54 |                         shutil.move(path, "../static/images/"+rpath+"/"+rename)
55 |                         imgp = "/static/images/" + rpath+"/"+rename
56 |                         if count==1:
57 |                             cursor.execute(
58 |                                 "UPDATE images_page SET firstimg = %s WHERE id=%s",(imgp,pageid))
59 |                         cursor.execute("INSERT INTO images_image (pageid,imageurl) VALUES (%s,%s)", (pageid,imgp))
60 | 
61 |                     except Exception as e:
62 |                         print(e)
63 |                         break
64 |                     count+=1
65 |                 try:
66 |                     os.removedirs(files[0])
67 |                 except:
68 |                     print("目录不为空，无法删除")
69 |             print("发布完成：" + title)
70 |         time.sleep(int(sleep_time))
71 | 
72 | # do_post("输入图片所在目录","发布间隔时间，默认0，单位秒")
73 | if __name__ == "__main__":
74 |     print("图片所在目录：")
75 |     path=input("")
76 |     print("自动发布间隔，0为全部发布，单位秒")
77 |     send_time=input("")
78 |     do_post(path,send_time)
79 | 
80 | 


--------------------------------------------------------------------------------
/templates/zde/sort.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | <!-- header start -->
 4 | {% block title %}{% ifequal  typeid "new" %}最新发布 - {{ siteName }}{{ else }}最受欢迎- {{ siteName }}{% endifequal %}{% endblock title %}
 5 | {% block keywords %}{{ keyWord }}{% endblock keywords %}
 6 | {% block description %}{{ description }}{% endblock description %}
 7 | <!-- header end -->
 8 | <!-- menu start-->
 9 | {% block menu %}
10 |     <ul>
11 |         <li><a href="/">随便看看</a></li>
12 |         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716">
13 |             <a>妹子图</a>
14 |             <ul class="sub-menu">
15 |                 {% for types in typelist %}
16 |                     <li menu-item menu-item-type-post_type menu-item-object-page menu-item-18><a
17 |                             href="{% url 'type' types.type_id %}">{{ types.type }}</a></li>
18 |                 {% endfor %}
19 |             </ul>
20 |         </li>
21 |         <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-ancestor current-menu-parent current_page_parent current_page_ancestor menu-item-has-children menu-item-17">
22 |             <a>排行榜</a>
23 |             <ul class="sub-menu">
24 |                 {% ifequal  method "new" %}
25 |                     <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28">
26 |                         <a href="/sort/new">最新发布</a></li>
27 |                     <li>
28 |                     <a href="/sort/hot">最受欢迎</a></li>
29 |                 {% else %}
30 |                     <li><a href="/sort/new">最新发布</a></li>
31 |                     <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28">
32 |                         <a href="/sort/hot">最受欢迎</a></li>
33 |                 {% endifequal %}
34 |             </ul>
35 |         </li>
36 |         <li><a href="/tag">标签云</a></li>
37 |         <li><a href="/video">视频小姐姐</a></li>
38 |     </ul>
39 | {% endblock menu %}
40 | <!-- menu end-->
41 | <!-- section start-->
42 | {% block section %}
43 |     <div class="excerpts-wrapper">
44 |         <div class="excerpts">
45 |             {% load pagination_tags %}
46 |             {% autopaginate data 10 %}
47 |             {% for imglist in data %}
48 |                 <article class="excerpt excerpt-c5">
49 |                     <div class="thumbnail">
50 | 					<a href="/type/{{ imglist.type_id }}" class="imgbox-a"> {{ imglist.type }}</a>
51 |                         <div class="imgbox" style="background-image:url({{ imglist.firstimg }})" onclick="window.location.href= '{% url 'article' imglist.pid %}';return false">  
52 |                     </div>
53 |                     <h2><a href="{% url 'article' imglist.pid %}">{{ imglist.title }}</a></h2>
54 |                     <footer>
55 |                         <hot>
56 |                             <img src="/static/zde/hot.png">{{ imglist.hot }}
57 |                         </hot>
58 |                         <a class="post-like">
59 |                             <time>{{ imglist.sendtime |date:'Y-m-d' }}</time></a>
60 |                     </footer>
61 |                 </article>
62 |             {% endfor %}
63 |         </div>
64 |     </div>
65 |     <div class="pagination pagination-multi"> {% paginate %} </div>
66 | {% endblock section %}
67 | <!-- section end-->
68 | <!-- js start -->
69 | {% block js %}
70 |     <script>
71 |         window.TBUI = {
72 |             ajaxpager: '3',
73 |             pagenum: '3',
74 |             shareimage: '',
75 |             shareimagethumb: '1',
76 |             fullgallery: '0',
77 |             fullimage: '0'
78 |         }
79 |     </script>
80 | {% endblock js %}
81 | <!--js end-->


--------------------------------------------------------------------------------
/94imm.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Navicat Premium Data Transfer
 3 | 
 4 |  Source Server         : localhost
 5 |  Source Server Type    : MySQL
 6 |  Source Server Version : 50629
 7 |  Source Host           : localhost:3306
 8 |  Source Schema         : 94imm
 9 | 
10 |  Target Server Type    : MySQL
11 |  Target Server Version : 50629
12 |  File Encoding         : 65001
13 | 
14 |  Date: 15/04/2020 20:13:30
15 | */
16 | 
17 | SET NAMES utf8mb4;
18 | SET FOREIGN_KEY_CHECKS = 0;
19 | 
20 | -- ----------------------------
21 | -- Table structure for django_migrations
22 | -- ----------------------------
23 | DROP TABLE IF EXISTS `django_migrations`;
24 | CREATE TABLE `django_migrations`  (
25 |   `id` int(11) NOT NULL AUTO_INCREMENT,
26 |   `app` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
27 |   `name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
28 |   `applied` datetime(0) NOT NULL,
29 |   PRIMARY KEY (`id`) USING BTREE
30 | ) ENGINE = MyISAM AUTO_INCREMENT = 1 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
31 | 
32 | -- ----------------------------
33 | -- Table structure for images_image
34 | -- ----------------------------
35 | DROP TABLE IF EXISTS `images_image`;
36 | CREATE TABLE `images_image`  (
37 |   `id` int(11) NOT NULL AUTO_INCREMENT,
38 |   `pageid` int(11) NOT NULL,
39 |   `imageurl` varchar(200) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
40 |   `originurl` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
41 |   PRIMARY KEY (`id`) USING BTREE
42 | ) ENGINE = MyISAM AUTO_INCREMENT = 1 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
43 | 
44 | -- ----------------------------
45 | -- Table structure for images_page
46 | -- ----------------------------
47 | DROP TABLE IF EXISTS `images_page`;
48 | CREATE TABLE `images_page`  (
49 |   `id` int(11) NOT NULL AUTO_INCREMENT,
50 |   `typeid` int(11) NOT NULL,
51 |   `sendtime` date NOT NULL,
52 |   `title` varchar(200) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
53 |   `firstimg` varchar(200) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
54 |   `tagid` varchar(200) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
55 |   `crawler` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
56 |   `hot` int(10) NULL DEFAULT 0,
57 |   PRIMARY KEY (`id`) USING BTREE
58 | ) ENGINE = MyISAM AUTO_INCREMENT = 1 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
59 | 
60 | -- ----------------------------
61 | -- Table structure for images_tag
62 | -- ----------------------------
63 | DROP TABLE IF EXISTS `images_tag`;
64 | CREATE TABLE `images_tag`  (
65 |   `id` int(4) NOT NULL AUTO_INCREMENT,
66 |   `tag` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
67 |   `uid` varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
68 |   `date` varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
69 |   PRIMARY KEY (`id`) USING BTREE
70 | ) ENGINE = MyISAM AUTO_INCREMENT = 1 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
71 | 
72 | -- ----------------------------
73 | -- Table structure for images_type
74 | -- ----------------------------
75 | DROP TABLE IF EXISTS `images_type`;
76 | CREATE TABLE `images_type`  (
77 |   `id` int(11) NOT NULL AUTO_INCREMENT,
78 |   `type` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
79 |   PRIMARY KEY (`id`) USING BTREE
80 | ) ENGINE = MyISAM AUTO_INCREMENT = 7 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
81 | 
82 | -- ----------------------------
83 | -- Table structure for images_video
84 | -- ----------------------------
85 | DROP TABLE IF EXISTS `images_video`;
86 | CREATE TABLE `images_video`  (
87 |   `id` int(11) NOT NULL AUTO_INCREMENT,
88 |   `url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
89 |   `user_id` varchar(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
90 |   `date_time` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
91 |   `v_name` varchar(255) CHARACTER SET latin1 COLLATE latin1_swedish_ci NULL DEFAULT NULL,
92 |   `v_path` varchar(50) CHARACTER SET latin1 COLLATE latin1_swedish_ci NULL DEFAULT NULL,
93 |   `source` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
94 |   PRIMARY KEY (`id`) USING BTREE
95 | ) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = latin1 COLLATE = latin1_swedish_ci ROW_FORMAT = Compact;
96 | 
97 | SET FOREIGN_KEY_CHECKS = 1;
98 | 


--------------------------------------------------------------------------------
/silumz/settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Django settings for silumz project.
  3 | 
  4 | Generated by 'django-admin startproject' using Django 1.10.6.
  5 | 
  6 | For more information on this file, see
  7 | https://docs.djangoproject.com/en/1.10/topics/settings/
  8 | 
  9 | For the full list of settings and their values, see
 10 | https://docs.djangoproject.com/en/1.10/ref/settings/
 11 | """
 12 | 
 13 | import os
 14 | from config import mysql_config,allow_url,cache_time,templates,debug
 15 | 
 16 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 17 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 18 | 
 19 | 
 20 | # Quick-start development settings - unsuitable for production
 21 | # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
 22 | 
 23 | # SECURITY WARNING: keep the secret key used in production secret!
 24 | SECRET_KEY = 'ge)(a+37gny_zn9c(+(kq+^yqw!jvblb67ck5allkpgv6(wi@^'
 25 | 
 26 | # SECURITY WARNING: don't run with debug turned on in production!
 27 | DEBUG = debug
 28 | 
 29 | ALLOWED_HOSTS = allow_url
 30 | 
 31 | 
 32 | # Application definition
 33 | 
 34 | INSTALLED_APPS = [
 35 |     'django.contrib.admin',
 36 |     'django.contrib.auth',
 37 |     'django.contrib.contenttypes',
 38 |     'django.contrib.sessions',
 39 |     'django.contrib.messages',
 40 |     'django.contrib.staticfiles',
 41 |     'images',
 42 |     'dj_pagination'
 43 | ]
 44 | 
 45 | MIDDLEWARE = [
 46 |     'django.middleware.cache.UpdateCacheMiddleware',
 47 |     'django.middleware.security.SecurityMiddleware',
 48 | 	'django.middleware.gzip.GZipMiddleware',
 49 |     'django.contrib.sessions.middleware.SessionMiddleware',
 50 |     'django.middleware.common.CommonMiddleware',
 51 |     # 'django.middleware.csrf.CsrfViewMiddleware',
 52 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
 53 |     'django.contrib.messages.middleware.MessageMiddleware',
 54 |     'django.middleware.clickjacking.XFrameOptionsMiddleware',
 55 |     'dj_pagination.middleware.PaginationMiddleware',
 56 |     'django.middleware.cache.FetchFromCacheMiddleware',
 57 | ]
 58 | 
 59 | CACHES = {
 60 |     'default': {
 61 |         'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
 62 |         'LOCATION': 'cache', #设置缓存文件的目录
 63 | 		'TIMEOUT':cache_time,
 64 | 		'OPTIONS':{
 65 | 			'MAX_ENTRIES': 300,
 66 | 			'CULL_FREQUENCY': 3,         
 67 |   }  
 68 |     }
 69 | }
 70 | 
 71 | ROOT_URLCONF = 'silumz.urls'
 72 | 
 73 | TEMPLATES = [
 74 |     {
 75 |         'BACKEND': 'django.template.backends.django.DjangoTemplates',
 76 |         'DIRS': [os.path.join(BASE_DIR, 'templates'+"/"+templates)]
 77 |         ,
 78 |         'APP_DIRS': True,
 79 |         'OPTIONS': {
 80 |             'context_processors': [
 81 |                 'django.template.context_processors.debug',
 82 |                 'django.template.context_processors.request',
 83 |                 'django.contrib.auth.context_processors.auth',
 84 |                 'django.contrib.messages.context_processors.messages',
 85 |                 'django.template.context_processors.i18n',
 86 |                 'django.template.context_processors.media',
 87 |                 'django.contrib.auth.context_processors.auth',
 88 |             ],
 89 |         },
 90 |     },
 91 | ]
 92 | 
 93 | WSGI_APPLICATION = 'silumz.wsgi.application'
 94 | 
 95 | 
 96 | # Database
 97 | # https://docs.djangoproject.com/en/1.10/ref/settings/#databases
 98 | 
 99 | DATABASES = {
100 |     'default': {
101 |         'ENGINE': 'django.db.backends.mysql',
102 |         'NAME': mysql_config['NAME'],
103 |         'USER': mysql_config['USER'],
104 |         'PASSWORD': mysql_config['PASSWORD'],
105 |         'HOST': mysql_config['HOST'],
106 |         'PORT': mysql_config['PORT'],
107 |     }
108 | }
109 | 
110 | 
111 | # Password validation
112 | # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
113 | 
114 | AUTH_PASSWORD_VALIDATORS = [
115 |     {
116 |         'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
117 |     },
118 |     {
119 |         'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
120 |     },
121 |     {
122 |         'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
123 |     },
124 |     {
125 |         'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
126 |     },
127 | ]
128 | 
129 | 
130 | # Internationalization
131 | # https://docs.djangoproject.com/en/1.10/topics/i18n/
132 | 
133 | LANGUAGE_CODE = 'en-us'
134 | 
135 | TIME_ZONE = 'UTC'
136 | 
137 | USE_I18N = True
138 | 
139 | USE_L10N = True
140 | 
141 | USE_TZ = True
142 | 
143 | 
144 | #= Static files (CSS, JavaScript, Images)
145 | # https://docs.djangoproject.com/en/1.10/howto/static-files/
146 | 
147 | STATIC_URL = '/static/'
148 | STATICFILES_DIRS=(
149 |     os.path.join(BASE_DIR,'static'),
150 | )
151 | 
152 | # dj_pagination
153 | PAGINATION_DEFAULT_WINDOW=1
154 | 


--------------------------------------------------------------------------------
/templates/zde/video.html:
--------------------------------------------------------------------------------
  1 | {% extends 'base.html' %}
  2 | 
  3 | <!-- header start -->
  4 | {% block title %}视频小姐姐 - {{ siteName }}{% endblock title %}
  5 | {% block keywords %}{% for t in keyword %}{{ t.tag }},{% endfor %}{% endblock keywords %}
  6 | {% block description %}{{ description }}{% endblock description %}
  7 | <!-- header end -->
  8 | {% block js_head %}
  9 | <script>if(/Android|webOS|iPhone|iPod|BlackBerry/i.test(navigator.userAgent)) {window.location.href = "/mvideo";}else{/**/}</script>
 10 | {% endblock js_head %}
 11 | {% block focusbox %}
 12 |     <div class="focusbox">
 13 |         <div class="container"><h1 class="focusbox-title">小姐姐福利视频</h1>
 14 |             <div class="article-tags">
 15 |                 <a href="">美女短视频</a>
 16 |                 <a href="">{{ source }}短视频</a>
 17 |                 <a href="">小姐姐福利视频</a>
 18 |             </div>
 19 |             <div style="padding-top: 5px;font-size:12px;/* margin-top: 0px; */font-family: monospace;color: #978883;">
 20 |                 点击用户ID可以关注小姐姐哦~！如查询结果为空请尝试刷新查询页面
 21 |             </div>
 22 |         </div>
 23 |     </div>
 24 | {% endblock focusbox %}
 25 | <!-- menu start-->
 26 | {% block menu %}
 27 |     <ul>
 28 |         <li><a href="/">随便看看</a></li>
 29 |         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716">
 30 |             <a>妹子图</a>
 31 |             <ul class="sub-menu">
 32 |                 {% for types in typelist %}
 33 |                     <li menu-item menu-item-type-post_type menu-item-object-page menu-item-18><a
 34 |                             href="{% url 'type' types.type_id %}">{{ types.type }}</a></li>
 35 |                 {% endfor %}
 36 |             </ul>
 37 |         </li>
 38 |         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716">
 39 |             <a>排行榜</a>
 40 |             <ul class="sub-menu">
 41 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/new">最新发布</a>
 42 |                 </li>
 43 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/hot">最受欢迎</a>
 44 |                 </li>
 45 |             </ul>
 46 |         </li>
 47 |         <li><a href="/tag">标签云</a></li>
 48 |         <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28"><a
 49 |                 href="/video">视频小姐姐</a></li>
 50 |     </ul>
 51 | {% endblock menu %}
 52 | <!-- menu end-->
 53 | <!-- section start-->
 54 | {% block section %}
 55 | 
 56 |     <div style="width: 30%;position: relative;top: 50%;left: 50%;transform: translate(-50%);">
 57 |         <video id="player" src="{{ url }}" webkit-playsinline playsinline loop autoplay="autoplay"
 58 |                style="height: auto; max-width: 100%;" poster="/static/v_back.jpg"></video>
 59 |         <span style="position:absolute;top:15px;left:5px;color:#fff;">视频来源:[<span id="source"
 60 |                                                                                  style="color: #FF4040;">{{ source }}</span>]</span>
 61 |         <span style="position:absolute;top:30px;left:5px;color:#fff;">用户ID:[<span
 62 |                 style="color: #4682B4; cursor: pointer;" id="check">{{ user_id }}</span>]</span>
 63 |         <input id="next" type="button"
 64 |                style=" display: block; width: 100px;height: 100px;position: absolute;bottom: 5px;right: 3px; background:url(/static/zde/next.png) no-repeat left top"/>
 65 |     </div>
 66 | 
 67 | {% endblock section %}
 68 | <!-- section end-->
 69 | {% block js %}
 70 | 
 71 |     <script>
 72 |         (function (window, document) {
 73 |             var user_id ={{ user_id }};
 74 |             if (top != self) {
 75 |                 window.top.location.replace(self.location.href);
 76 |             }
 77 |             var get = function (id) {
 78 |                 return document.getElementById(id);
 79 |             }
 80 |             var bind = function (element, event, callback) {
 81 |                 return element.addEventListener(event, callback);
 82 |             }
 83 |             var randomm = function () {
 84 |                 var ajax = new XMLHttpRequest();
 85 |                 ajax.open("get", "/get_video?tem=" + Math.random());
 86 |                 ajax.send()
 87 |                 ajax.onreadystatechange = function () {
 88 |                     if (ajax.readyState == 4 && ajax.status == 200) {
 89 |                         var obj = JSON.parse(ajax.responseText);
 90 |                         var uid = document.getElementById("check");
 91 |                         var source = document.getElementById("source");
 92 |                         uid.innerText = obj.user_id;
 93 |                         source.innerText = obj.source;
 94 |                         player.src = obj.url;
 95 |                         user_id = obj.user_id;
 96 |                         player.play()
 97 |                     }
 98 |                 }
 99 |                 //location.replace("?tmp="+Math.random());
100 |             }
101 | 
102 |             bind(get('next'), 'click', randomm);
103 |             bind(player, 'error', function () {
104 |                 randomm();
105 |             });
106 |             bind(get('check'), 'click', function () {
107 | 
108 |                 window.open("https://live.kuaishou.com/search/?keyword=" + user_id);
109 |             });
110 |             var video = document.getElementById("player");
111 |             video.onclick = function () {
112 |                 if (video.paused) {
113 |                     video.play();
114 |                 } else {
115 |                     video.pause();
116 |                 }
117 |             }
118 |             bind(player, 'ended', function () {
119 |                 if (auto) randomm();
120 |             });
121 |         })(window, document);
122 |     </script>
123 | {% endblock js %}


--------------------------------------------------------------------------------
/crawler/crawler_xmt.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import sys
  3 | 
  4 | sys.path.append('../')
  5 | from bs4 import BeautifulSoup
  6 | import threading, pymysql, time, requests, os, urllib3, re
  7 | from config import mysql_config
  8 | 
  9 | requests.packages.urllib3.disable_warnings()
 10 | # 数据库连接信息
 11 | dbhost = {
 12 |     "host": mysql_config['HOST'],
 13 |     "dbname": mysql_config['NAME'],
 14 |     "user": mysql_config['USER'],
 15 |     "password": mysql_config['PASSWORD']
 16 | }
 17 | 
 18 | 
 19 | class Spider():
 20 |     headers = {
 21 |         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
 22 |                       'Chrome/65.0.3325.181 Safari/537.36',
 23 |         'Referer': "http://www.xgmmtk.com/"
 24 |     }
 25 |     page_url_list = []
 26 |     img_url_list = []
 27 |     rlock = threading.RLock()
 28 |     # s = requests.session()
 29 | 
 30 |     def __init__(self, img_path='imgdir', thread_number=5):
 31 |         self.spider_url = 'http://www.xgmmtk.com/'
 32 |         self.img_path = img_path
 33 |         self.thread_num = thread_number
 34 | 
 35 |     def get_url(self):
 36 |         page = requests.get("http://www.xgmmtk.com/")
 37 |         soup = BeautifulSoup(page.text, "html.parser")
 38 |         a_soup = soup.find_all("a")
 39 |         for a in a_soup:
 40 |             url = "http://www.xgmmtk.com/" + a.get("href")
 41 |             self.page_url_list.append(url)
 42 | 
 43 |     def get_img(self):
 44 |         db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"), dbhost.get("dbname"))
 45 |         cursor = db.cursor()
 46 |         while True:
 47 |             self.rlock.acquire()
 48 |             if len(self.page_url_list) == 0:
 49 |                 self.rlock.release()
 50 |                 break
 51 |             else:
 52 |                 page_url = self.page_url_list.pop()
 53 |                 self.rlock.release()
 54 |                 page = requests.get(page_url)
 55 |                 soup=BeautifulSoup(page.text,"html.parser")
 56 |                 title=soup.title.string.replace("�","")
 57 |                 isExists = cursor.execute(
 58 |                     "SELECT title FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 59 |                 if isExists == 0:
 60 |                     print("添加采集：",title)
 61 |                     if "袜" in title or "丝" in title or "腿" in title:
 62 |                         type_id = 2
 63 |                         tagidlist=[3679,3700,3719,3628]
 64 |                     elif "青春" in title or "清纯" in title or "萝莉" in title:
 65 |                         tagidlist=[3694,3627,3635]
 66 |                         type_id = 3
 67 |                     else:
 68 |                         tagidlist=[3630,3623,3618,3642]
 69 |                         type_id = 1
 70 |                     p = (
 71 |                         title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), type_id,
 72 |                         "1",
 73 |                         page_url)
 74 |                     cursor.execute(
 75 |                         "INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)",
 76 |                         p)
 77 |                     pageid = cursor.lastrowid
 78 |                     img = soup.find_all("img")
 79 |                     i=0
 80 |                     page_id=page_url[page_url.find("?id=")+4:-1]
 81 |                     img_path = self.img_path + time.strftime('%Y%m%d', time.localtime(
 82 |                         time.time())) + "/" +page_id + "/"
 83 |                     for imgurl in img:
 84 |                         imgsrc = "http://www.xgmmtk.com/" + imgurl.get("src")
 85 |                         self.img_url_list.append(
 86 |                             {"img_url": imgsrc, "Referer": page_url,
 87 |                              "id": page_id})
 88 |                         if i==0:
 89 |                             cursor.execute(
 90 |                                 "UPDATE images_page SET firstimg = " + "'" + img_path+imgsrc.split("/")[-1] + "'" + " WHERE id=" + "'" + str(
 91 |                                     pageid) + "'")
 92 |                         i+=1
 93 |                 else:
 94 |                     print("已采集")
 95 |                     pass
 96 |     def down_img(self,imgsrc,Referer,id):
 97 |         headers = {
 98 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
 99 |             "Referer": Referer
100 |         }
101 |         path = self.img_path + time.strftime('%Y%m%d', time.localtime(time.time())) + "/"
102 |         page_id = id
103 |         isdata = os.path.exists("../" + path + page_id)
104 |         if not isdata:
105 |             os.makedirs("../" + path + page_id)
106 |         with open("../" + path + page_id + "/" + imgsrc.split("/")[-1], "wb") as f:
107 |             print("已保存：" ,imgsrc)
108 |             f.write(requests.get(imgsrc, headers=headers,verify=False).content)
109 | 
110 | 
111 | 
112 |     def run_img(self):
113 |         while True:
114 |             Spider.rlock.acquire()
115 |             if len(self.img_url_list) == 0 :
116 |                 Spider.rlock.release()
117 |                 continue
118 |             else:
119 |                 urls = self.img_url_list.pop()
120 |                 url = urls.get("img_url")
121 |                 Referer = urls.get("Referer")
122 |                 id = urls.get("id")
123 |                 Spider.rlock.release()
124 |                 try:
125 |                     self.down_img(url, Referer, id)
126 |                 except Exception as e:
127 |                     pass
128 | 
129 |     def run(self):
130 |         # 启动thread_num个来下载图片
131 |         for img_th in range(self.thread_num):
132 |             download_t = threading.Thread(target=self.run_img)
133 |             download_t.start()
134 | 
135 |         for img_th in range(self.thread_num):
136 |             run_t = threading.Thread(target=self.get_img)
137 |             run_t.start()
138 | 
139 | if __name__ == "__main__":
140 |     spider=Spider(img_path='/static/images/',thread_number=10)
141 |     spider.get_url()
142 |     spider.run()
143 | 


--------------------------------------------------------------------------------
/templates/zde/page.html:
--------------------------------------------------------------------------------
  1 | {% extends 'base.html' %}
  2 | 
  3 | <!-- header start -->
  4 | {% block title %}{{ title }} - {{ siteName }}{% endblock title %}
  5 | {% block keywords %}{% for t in tag %}{{ t.tname }},{% endfor %}{% endblock keywords %}
  6 | {% block description %}由{{ siteName }}为您整理的高清《{{ title }}》图集{% endblock description %}
  7 | <!-- header end -->
  8 | {% block focusbox %}
  9 | <div class="focusbox">
 10 |     <div class="container"><h1 class="focusbox-title">{{ title }}</h1>
 11 |         <div class="article-tags">
 12 |             {% for t in tag %}
 13 |                 <a href="{% url 'tag' t.tid %}">{{ t.tname }}</a>
 14 |             {% endfor %}
 15 |         </div>
 16 |         <div style="padding-top: 5px;font-size:12px;/* margin-top: 0px; */font-family: monospace;color: #978883;">
 17 |             点击图片自动播放
 18 |         </div>
 19 |     </div>
 20 | </div>
 21 | {% endblock focusbox %}
 22 | <!-- menu start-->
 23 | {% block menu %}
 24 |     <ul>
 25 |         <li><a href="/">随便看看</a></li>
 26 |         <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-ancestor current-menu-parent current_page_parent current_page_ancestor menu-item-has-children menu-item-17">
 27 |             <a>妹子图</a>
 28 |             <ul class="sub-menu">
 29 |                 {% for types in typelist %}
 30 |                     {% ifequal typeid types.type_id %}
 31 |                         <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-menu-item menu-item-28">
 32 |                             <a href="{% url 'type' types.type_id %} ">{{ types.type }}</a></li>
 33 |                     {% else %}
 34 |                         <li><a href="{% url 'type' types.type_id %}">{{ types.type }}</a></li>
 35 |                     {% endifequal %}
 36 |                 {% endfor %}
 37 |             </ul>
 38 |         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-50716">
 39 |             <a>排行榜</a>
 40 |             <ul class="sub-menu">
 41 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/new">最新发布</a>
 42 |                 </li>
 43 |                 <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-18"><a href="/sort/hot">最受欢迎</a>
 44 |                 </li>
 45 |             </ul>
 46 |         </li>
 47 |         <li><a href="/tag">标签云</a></li>
 48 |         <li><a href="/video">视频小姐姐</a></li>
 49 |     </ul>
 50 | {% endblock menu %}
 51 | <!-- menu end-->
 52 | <!-- section start-->
 53 | {% block section %}
 54 | 
 55 | 
 56 | 	<article class="article-content" id="img-box">
 57 | 	{% for img in data %}
 58 | 		<p><img class="alignnone size-medium wp-image-42" src="/static/zde/timg.gif" data-src="{{ img }}" alt="{{ title }}" style="width:800px;hight:auto" />
 59 | 		<!--<p><img class="alignnone size-medium wp-image-42" src="{{ img }}" alt="{{ title }}" style="width:800px;hight:auto" />-->
 60 | 		{% endfor %}
 61 | 	</p>
 62 | 	</article>
 63 | {% endblock section %}
 64 | <!-- section end-->
 65 | {% block recommend %}
 66 |     <div class="line_03"><b></b><span>相关推荐</span><b></b></div>
 67 | <div class="excerpts-article">
 68 |     <div class="excerpts">
 69 |         {% for items in similar %}
 70 |             <article class="excerpt excerpt-c5">
 71 |                 <div class="thumbnail">
 72 | 				<a href="/type/{{ items.type_id }}" class="imgbox-a"> {{ items.type }}</a>
 73 |                     <div class="imgbox" style="background-image:url({{ items.firstimg }})" onclick="window.location.href= '{% url 'article' items.pid %}';return false">  
 74 |                     </div>
 75 |                 </div>
 76 |                 <h2><a href="{% url 'article' items.pid %}">{{ items.stitle }}</a></h2>
 77 |                 <footer>
 78 |                     <hot>
 79 |                         <img src="/static/zde/hot.png">{{ items.hot }}
 80 |                     </hot>
 81 |                     <a class="post-like">
 82 |                         <time>{{ items.sendtime |date:'Y-m-d' }}</time>
 83 |                     </a>
 84 |                 </footer>
 85 |             </article>
 86 |         {% endfor %}
 87 |     </div>
 88 | </div>
 89 | {% endblock recommend %}
 90 | 
 91 | {% block js %}
 92 | <script>
 93 |     var num = document.getElementsByTagName('img').length;
 94 |     var img = document.getElementsByClassName("alignnone size-medium wp-image-42");
 95 |     var n = 0; //存储图片加载到的位置，避免每次都从第一张图片开始遍历
 96 |     lazyload(); //页面载入完毕加载可是区域内的图片
 97 |     //window.onscroll = lazyload;
 98 |     function lazyload() { //监听页面滚动事件
 99 |         var seeHeight = document.documentElement.clientHeight; //可见区域高度
100 |         var scrollTop = document.documentElement.scrollTop || document.body.scrollTop; //滚动条距离顶部高度
101 |         for (var i = n; i < num; i++) {
102 | 		    if (typeof(img[i])=="undefined" || img[i]=='' || img[i]==null){
103 | 			        break; 
104 | 			}else{
105 |             if (img[i].offsetTop < seeHeight + scrollTop) {
106 |                 if (img[i].getAttribute("src") == "/static/zde/timg.gif") {
107 |                     img[i].src = img[i].getAttribute("data-src");
108 |                 }
109 |                 n = i + 1;
110 |             }
111 |         }
112 |     }
113 | 	}
114 | 	
115 | 	
116 | 	
117 | // 简单的节流函数
118 | //fun 要执行的函数
119 | //delay 延迟
120 | //time  在time时间内必须执行一次
121 | function throttle(fun, delay, time) {
122 |     var timeout,
123 |         startTime = new Date();
124 |     return function() {
125 |         var context = this,
126 |             args = arguments,
127 |             curTime = new Date();
128 |         clearTimeout(timeout);
129 |         // 如果达到了规定的触发时间间隔，触发 handler
130 |         if (curTime - startTime >= time) {
131 |             fun.apply(context, args);
132 |             startTime = curTime;
133 |             // 没达到触发间隔，重新设定定时器
134 |         } else {
135 |             timeout = setTimeout(function(){
136 | 	            fun.apply(context, args);
137 |             }, delay);
138 |         }
139 |     };
140 | };
141 | // 实际想绑定在 scroll 事件上的 handler
142 | //function lazyload(event) {}
143 | // 采用了节流函数
144 | window.addEventListener('scroll',throttle(lazyload,200,500));
145 |  
146 | </script>
147 | {% endblock js %}
148 | 


--------------------------------------------------------------------------------
/crawler/crawle_mzt.py:
--------------------------------------------------------------------------------
  1 | # coding='UTF-8'
  2 | import sys
  3 | 
  4 | sys.path.append('../')
  5 | from bs4 import BeautifulSoup
  6 | import threading, pymysql, time, requests, os, urllib3
  7 | from config import mysql_config
  8 | 
  9 | requests.packages.urllib3.disable_warnings()
 10 | 
 11 | 
 12 | class Spider():
 13 |     headers = {
 14 |         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
 15 |                       'Chrome/65.0.3325.181 Safari/537.36',
 16 |         'Referer': "https://www.mzitu.com"
 17 |     }
 18 |     page_url_list = []
 19 |     img_url_list = []
 20 |     rlock = threading.RLock()
 21 |     s = requests.session()
 22 |     s.keep_alive = False
 23 |     dbhost = {
 24 |         "host": mysql_config['HOST'],
 25 |         "dbname": mysql_config['NAME'],
 26 |         "user": mysql_config['USER'],
 27 |         "password": mysql_config['PASSWORD']
 28 |     }
 29 | 
 30 |     def __init__(self, page_num=10, img_path='imgdir', thread_num=5, type="xinggan", type_id=1):
 31 |         self.spider_url = 'https://www.mzitu.com/'
 32 |         self.page_number = int(page_num)
 33 |         self.img_path = img_path
 34 |         self.thread_num = thread_num
 35 |         self.type = type
 36 |         self.type_id = type_id
 37 | 
 38 |     def get_url(self):
 39 |         for i in range(1, self.page_number + 1):
 40 |             if i ==1:
 41 |                 page = self.s.get(self.spider_url +"/"+self.type ,verify=False).text
 42 |             page=self.s.get(self.spider_url +"/"+self.type+"/page/"+str(i),verify=False).text
 43 |             soup = BeautifulSoup(page, "html.parser")
 44 |             page_base_url = soup.find("div",class_="postlist").find_all("li")
 45 |             for page_url in page_base_url:
 46 |                 url = page_url.find("a").get("href")
 47 |                 self.page_url_list.append(url)
 48 |             i = i + 1
 49 | 
 50 |     def get_img_url(self):
 51 |         db = pymysql.connect(self.dbhost.get("host"), self.dbhost.get("user"), self.dbhost.get("password"),
 52 |                              self.dbhost.get("dbname"))
 53 |         cursor = db.cursor()
 54 |         for img_base_url in self.page_url_list:
 55 |             tagidlist = []
 56 |             img_soup = BeautifulSoup(self.s.get(img_base_url,verify=False).text, "html.parser")
 57 |             img_num = img_soup.find("div", class_="pagenavi").text.split("…")[-1][0:-5]
 58 |             img_url = img_soup.find("div", class_="main-image").find("img").get("src").split("/")[0:-1]
 59 |             img_surl = "/".join(img_url)
 60 |             title = img_soup.find("h2", class_="main-title").text
 61 |             isExists = cursor.execute("SELECT * FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 62 |             tag_list = img_soup.find("div", class_="main-tags").find_all("a")
 63 |             if isExists == 1:
 64 |                 print("已采集：" + title)
 65 |             else:
 66 |                 for tags in tag_list:
 67 |                     tag=tags.text
 68 |                     print(tag)
 69 |                     sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
 70 |                     isExiststag = cursor.execute(sqltag)
 71 |                     if isExiststag != 1:
 72 |                         cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
 73 |                     cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
 74 |                     for id in cursor.fetchall():
 75 |                         tagidlist.append(id[0])
 76 |                 p = (title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), self.type_id, "1")
 77 |                 cursor.execute("INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg) VALUES (%s,%s,%s,%s,%s)",
 78 |                                p)
 79 |                 print("开始采集：" + title)
 80 |                 pageid = cursor.lastrowid
 81 |                 for i in range(1, int(img_num)):
 82 |                     temp_url = img_soup.find("div", class_="main-image").find("img").get("src").split("/")
 83 |                     path = temp_url[-1][0:3]
 84 |                     new_url = img_surl + "/" + path + str("%02d" % i) + ".jpg"
 85 |                     img_src = temp_url[-3] + "/" + temp_url[-2] + "/" + path + str("%02d" % i) + ".jpg"
 86 |                     imgp = pageid, self.img_path + img_src
 87 |                     cursor.execute("INSERT INTO images_image (pageid,imageurl) VALUES (%s,%s)", imgp)
 88 |                     if i == 1:
 89 |                         cursor.execute(
 90 |                             "UPDATE images_page SET firstimg = " + "'" + self.img_path + img_src + "'" + " WHERE title=" + "'" + title + "'")
 91 |                     self.img_url_list.append(new_url)
 92 |                     i = i + 1
 93 |         db.close()
 94 | 
 95 |     def down_img(self, imgsrc):
 96 |         path = imgsrc.split("/")[-3] + "/" + imgsrc.split("/")[-2]
 97 |         isdata = os.path.exists("../" + self.img_path + path)
 98 |         if isdata == False:
 99 |             os.makedirs("../" + self.img_path + path)
100 |         with open("../" + self.img_path + path + "/" + imgsrc.split("/")[-1], "wb")as f:
101 |             print("下载图片：" + self.img_path + path + "/" + imgsrc.split("/")[-1])
102 |             f.write(requests.get(imgsrc, headers=self.headers, verify=False).content)
103 | 
104 |     def down_url(self):
105 |         while True:
106 |             Spider.rlock.acquire()
107 |             if len(Spider.img_url_list) == 0:
108 |                 Spider.rlock.release()
109 |                 break
110 |             else:
111 |                 img_url = Spider.img_url_list.pop()
112 |                 Spider.rlock.release()
113 |                 try:
114 |                     self.down_img(img_url)
115 |                 except Exception as e:
116 |                     pass
117 | 
118 |     def run(self):
119 |         # 启动thread_num个来下载图片
120 |         for img_th in range(self.thread_num):
121 |             download_t = threading.Thread(target=self.down_url)
122 |             download_t.start()
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     for i in [{"page": 1, "type": "xinggan", "type_id": 1},]:
127 |         spider = Spider(page_num=i.get("page"), img_path='/static/images/', thread_num=10, type_id=i.get("type_id"),
128 |                         type=i.get("type"))
129 |         spider.get_url()
130 |         spider.get_img_url()
131 |         spider.run()
132 | 


--------------------------------------------------------------------------------
/crawler/crawler_mtl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import sys
  3 | 
  4 | sys.path.append('../')
  5 | from bs4 import BeautifulSoup
  6 | import threading,pymysql,time,requests,os,urllib3
  7 | from config import mysql_config
  8 | requests.packages.urllib3.disable_warnings()
  9 | requests.adapters.DEFAULT_RETRIES = 5
 10 | 
 11 | class Spider():
 12 |     headers = {
 13 |         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
 14 |                       'Chrome/65.0.3325.181 Safari/537.36',
 15 |         'Referer': "https://www.meitulu.com"
 16 |     }
 17 |     page_url_list = []
 18 |     img_url_list = []
 19 |     rlock = threading.RLock()
 20 |     s=requests.session()
 21 |     s.keep_alive = False
 22 |     dbhost = {
 23 |         "host": mysql_config['HOST'],
 24 |         "dbname": mysql_config['NAME'],
 25 |         "user": mysql_config['USER'],
 26 |         "password": mysql_config['PASSWORD']
 27 |     }
 28 | 
 29 |     def __init__(self,page_number=10,img_path='imgdir',thread_number=5,type='xinggan',type_id=1):
 30 |         self.spider_url = 'https://www.meitulu.com/t/'+type
 31 |         self.page_number = int(page_number)
 32 |         self.img_path = img_path
 33 |         self.thread_num = thread_number
 34 |         self.type_id = type_id
 35 | 
 36 |     def get_url(self):
 37 |         db = pymysql.connect(self.dbhost.get("host"), self.dbhost.get("user"), self.dbhost.get("password"),
 38 |                              self.dbhost.get("dbname"))
 39 |         cursor = db.cursor()
 40 |         for i in range(1, self.page_number+1):
 41 |             page_base_url = BeautifulSoup(requests.get(self.spider_url + "/" + str(i) + ".html").content.decode("utf-8"),
 42 |                                           "html.parser")
 43 |             if i == 1:
 44 |                 page_base_url = BeautifulSoup(requests.get(self.spider_url).content.decode("utf-8"), "html.parser")
 45 |             img_ul = page_base_url.find("ul", class_="img").find_all("li")
 46 |             for img_li in img_ul:
 47 |                 page_url = img_li.find("p", class_="p_title").find("a").get("href")
 48 |                 self.page_url_list.append(page_url)
 49 |         db.close()
 50 | 
 51 |     def get_img_url(self):
 52 |         db = pymysql.connect(self.dbhost.get("host"), self.dbhost.get("user"), self.dbhost.get("password"), self.dbhost.get("dbname"))
 53 |         cursor = db.cursor()
 54 |         for page_url in reversed(self.page_url_list):
 55 |             tagidlist = []
 56 |             img_div_soup = BeautifulSoup(requests.get(page_url).content.decode("utf-8"), "html.parser")
 57 |             img_base_url = img_div_soup.find("img", class_="content_img").get("src").split("/")
 58 |             img_url = "/".join(img_base_url[0:-1])
 59 |             title = img_div_soup.find("div", class_="weizhi").find("h1").text.replace(" 萝莉丝袜写真套图","")
 60 |             isExists =cursor.execute("SELECT * FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 61 |             if isExists != 0:
 62 |                 print ("已采集:"+title)
 63 |             else:
 64 |                 tag_list = img_div_soup.find("div", class_="fenxiang_l").find_all("a")
 65 |                 for tag in tag_list:
 66 |                     sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag.text + "'" + " limit 1;"
 67 |                     isExiststag = cursor.execute(sqltag)
 68 |                     if isExiststag == 0:
 69 |                         cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag.text)
 70 |                     cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag.text + "'")
 71 |                     for id in cursor.fetchall():
 72 |                         tagidlist.append(id[0])
 73 |                 p = (
 74 |                 title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), self.type_id, "1", page_url)
 75 |                 cursor.execute(
 76 |                     "INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)",
 77 |                     p)
 78 |                 pageid =cursor.lastrowid
 79 |                 ima_num_tem = img_div_soup.find("div", id="pages").text
 80 |                 img_num = ima_num_tem[-6:-4]
 81 |                 i = 1
 82 |                 for i in range(1, int(img_num)):
 83 |                     img_src = img_url + "/" + str(i) + "." + img_base_url[-1].split(".")[-1]
 84 |                     img_loc_path = self.img_path + img_base_url[-2]+"/"+ str(i) + "." + img_base_url[-1].split(".")[-1]
 85 |                     imgp = pageid, img_loc_path,img_src
 86 |                     if i == 1:
 87 |                         cursor.execute(
 88 |                             "UPDATE images_page SET firstimg = " + "'" + img_loc_path + "'" + " WHERE title=" + "'" + title + "'")
 89 |                     i = i + 1
 90 |                     cursor.execute("INSERT INTO images_image (pageid,imageurl,originurl) VALUES (%s,%s,%s)", imgp)
 91 |                     self.img_url_list.append(img_src)
 92 |                 print("添加："+title)
 93 |         db.close()
 94 | 
 95 |     def down_img(self,imgsrc):
 96 |         path = imgsrc.split("/")[-2]
 97 |         isdata = os.path.exists("../" + self.img_path + path)
 98 |         if isdata == False:
 99 |             os.makedirs("../" + self.img_path + path)
100 |         with open("../" + self.img_path + path + "/" + imgsrc.split("/")[-1], "wb")as f:
101 |             f.write(requests.get(imgsrc, headers=self.headers, verify=False).content)
102 | 
103 |     def down_url(self):
104 |         while True:
105 |             Spider.rlock.acquire()
106 |             if len(Spider.img_url_list) == 0:
107 |                 Spider.rlock.release()
108 |                 break
109 |             else:
110 |                 img_url = Spider.img_url_list.pop()
111 |                 Spider.rlock.release()
112 |                 try:
113 |                     self.down_img(img_url)
114 |                 except Exception as e:
115 |                     pass
116 | 
117 | 
118 |     def run(self):
119 |         # print("开始下载")
120 |         # 启动thread_num个进程来爬去具体的img url 链接
121 |         for th in range(self.thread_num):
122 |             add_pic_t = threading.Thread(target=self.get_img_url)
123 |             add_pic_t.start()
124 | 
125 |         # 启动thread_num个来下载图片
126 |         for img_th in range(self.thread_num):
127 |             download_t = threading.Thread(target=self.down_url)
128 |             download_t.start()
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     for i in [{"page": 3, "type": "1290", "type_id": 4}]:
133 |         spider = Spider(page_number=i.get("page"), img_path='/static/images/', thread_number=10,type=i.get("type"),type_id=i.get("type_id"))
134 |         spider.get_url()
135 |         spider.get_img_url()
136 |         spider.run()


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | function install_mysql(){
  4 |     cd ~/$tmp
  5 | 	yum install ncurses-devel libaio-devel cmake gcc gcc-c++ make autoconf -y
  6 | 	wget http://dev.mysql.com/get/Downloads/MySQL-5.6/mysql-5.6.21.tar.gz
  7 | 	tar -zxvf mysql-5.6.21.tar.gz
  8 | 	cd mysql-5.6.21
  9 | 
 10 | 	cmake .
 11 | 	make
 12 | 	sudo make install
 13 | 
 14 | 
 15 | 	sudo groupadd mysql
 16 | 	sudo useradd -r -g mysql mysql
 17 | 
 18 | 	cd /usr/local/mysql/
 19 | 	sudo chown -R root .
 20 | 	sudo chown -R mysql data
 21 | 
 22 | 
 23 | 	sudo yum install perl-Data-Dumper -y
 24 | 
 25 | 	sudo scripts/mysql_install_db --user=mysql
 26 | 	sudo cp support-files/my-default.cnf /etc/my.cnf
 27 | 
 28 | 	sudo cp support-files/mysql.server /etc/init.d/mysql
 29 | 	sudo chmod u+x /etc/init.d/mysql
 30 | 	sudo chkconfig --add mysql
 31 | 	# MySQL 环境变量
 32 | 	cd ~
 33 | 	echo 'if [ -d "/usr/local/mysql/bin" ] ; then
 34 | 		PATH=$PATH:/usr/local/mysql/bin
 35 | 		export PATH
 36 | 	fi' > env_mysql.sh
 37 | 	sudo cp env_mysql.sh /etc/profile.d/env_mysql.sh
 38 | 	touch /usr/local/vagrant.mysql.lock
 39 | 	ln -s /usr/local/mysql/bin/mysql /usr/bin
 40 | 	systemctl start mysql
 41 | 	mysql -uroot -e "CREATE DATABASE $db_name;" 
 42 | 	echo "Mysql install successful"
 43 | }
 44 | 
 45 | function install_python(){
 46 | 	# the version of python
 47 | 	version="3.8.0"
 48 | 	# the installation directory of python
 49 | 	python3_install_dir="/usr/local/python3"
 50 | 	cd ~/$tmp
 51 | 	file_name="Python-$version.tgz"
 52 | 	sudo yum -y install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gcc make libffi-devel
 53 | 	rm `pwd`"/$file_name"
 54 | 	wget "https://www.python.org/ftp/python/$version/$file_name"
 55 | 	mkdir $tmp
 56 | 	tar -xf $file_name -C $tmp
 57 | 	make_dir="$tmp/Python-$version"
 58 | 	cd $make_dir
 59 | 	mkdir -p $python3_install_dir
 60 | 	./configure --prefix=$python3_install_dir --with-ssl
 61 | 	sudo make
 62 | 	sudo make install
 63 | 	ln -s /usr/local/python3/bin/python3 /usr/bin/python3
 64 | 	cd ~/tmp
 65 | 	wget --no-check-certificate  https://pypi.python.org/packages/source/s/setuptools/setuptools-19.6.tar.gz
 66 | 	tar -zxvf setuptools-19.6.tar.gz
 67 | 	cd setuptools-19.6
 68 | 	python3 setup.py build
 69 | 	python3 setup.py install
 70 | 	ln -s /usr/local/python3/bin/pip3 /usr/bin/pip3
 71 | 	rm -rf ~/$tmp
 72 | 	echo "all in well !"
 73 | }
 74 | 
 75 | # -----------------------    安装mysql    ------------------------------
 76 | # 编译MySQL时间比较长，需要等很长时间
 77 | read -p "Allow Url: " allow_url
 78 | read -p "Site Name: " site_name
 79 | read -p "Site Url: " site_url
 80 | yum install wget git -y
 81 | git clone https://github.com/Turnright-git/94imm.git
 82 | yum install gcc mariadb-devel -y
 83 | cd "94imm"
 84 | path=$(pwd)
 85 | yum install yum install -y python3-devel
 86 | tmp="tmp"
 87 | mkdir ~/$tmp
 88 | if ! [ -x "$(command -v python3)" ]; then
 89 |     echo "Start the Python3 installation process"
 90 |     install_python
 91 | fi
 92 | if ! [ -x "$(command -v mysql)" ]; then
 93 |     echo "编译MySQL时间比较长，需要等很长时间,可自安装。行输入n退出"
 94 | 	read -p "(y , n):" isinstallmysql56
 95 | 	case "$isinstallmysql56" in
 96 | 	n|N|No|NO)
 97 |     exit
 98 | 	;;
 99 | 	*)
100 | 	esac
101 | 	echo "Start the MySQL installation process"
102 | 	install_mysql
103 | 	systemctl start mysql
104 | 	read -p "Create databases : " db_name
105 | 	read -p "Create databases password: " db_pass
106 | 	create_db_sql="create database IF NOT EXISTS ${db_name} DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;"
107 | 	create_user="update mysql.user set password=password('${db_pass}') where user='root';"
108 | 	mysql -uroot -e "${create_db_sql}"
109 | 	mysql -uroot -e "${create_user}"
110 | 	mysql -uroot -e "${grant_user}"
111 | 	mysql -uroot -e "flush privileges;"
112 | else
113 |     read -p "Create databases : " db_name
114 | 	read -p "Password for root: " db_pass
115 | 	create_db_sql="create database IF NOT EXISTS ${db_name} DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;"
116 | 	create_user="update mysql.user set password=password('${db_pass}') where user='root';"
117 | 	mysql -uroot -p$db_pass -e "${create_db_sql}"
118 | fi
119 | if ! [ -x "$(command -v nginx)" ]; then
120 |     cd ~/$tmp
121 |     wget https://nginx.org/download/nginx-1.16.0.tar.gz
122 | 	tar zxvf nginx-1.16.0.tar.gz
123 | 	cd nginx-1.16.0
124 | 	./configure --user=nobody --group=nobody --prefix=/usr/local/nginx --with-http_stub_status_module --with-http_gzip_static_module --with-http_realip_module --with-http_sub_module --with-http_ssl_module
125 | 	make && make install
126 | cd $path
127 | cat>/lib/systemd/system/nginx.service<<EOF
128 | [Unit]
129 | Description=nginx
130 | After=network.target
131 | 
132 | [Service]
133 | Type=forking
134 | ExecStart=/usr/local/nginx/sbin/nginx
135 | ExecReload=/usr/local/nginx/sbin/nginx -s reload
136 | ExecStop=/usr/local/nginx/sbin/nginx -s quit
137 | PrivateTmp=true
138 | 
139 | [Install]
140 | WantedBy=multi-user.target
141 | EOF
142 | 
143 | cat>/usr/local/nginx/conf/nginx.conf<<EOF
144 | #user  nobody;
145 | worker_processes  1;
146 | events {
147 |     worker_connections  1024;
148 | }
149 | 
150 | 
151 | http {
152 |     include       mime.types;
153 | 	include       /usr/local/nginx/conf/conf.d/*.conf;
154 |     default_type  application/octet-stream;
155 | 
156 | 
157 |     sendfile        on;
158 |     keepalive_timeout  65;
159 | 
160 |     server {
161 |         listen       80;
162 |         server_name  $site_url;
163 | 
164 |         #charset koi8-r;
165 | 
166 |         #access_log  logs/host.access.log  main;
167 | 
168 |         location / {
169 | 		proxy_pass    http://127.0.0.1:8000;
170 | 		index index.html index.htm;
171 | 		}
172 | 
173 |         
174 |         error_page   500 502 503 504  /50x.html;
175 |         location = /50x.html {
176 |             root   html;
177 |         }
178 |     }
179 | }
180 | EOF
181 | /usr/local/nginx/sbin/nginx
182 | fi	
183 | mysql -uroot -p$db_pass $db_name < $path/94imm.sql
184 | cat>"$path/uwsgi.ini"<<EOF
185 | [uwsgi]
186 | chdir=$path/
187 | file=$path/silumz/wsgi.py      
188 | socket=$path/uwsgi.sock     
189 | workers=2
190 | pidfile=$path/uwsgi.pid   
191 | http=127.0.0.1:8000
192 | static-map=/static=$path/static
193 | uid=root
194 | gid=root
195 | master=true
196 | vacuum=true
197 | thunder-lock=true
198 | enable-threads=true
199 | harakiri=30
200 | post-buffering=4096
201 | daemonize=$path/uwsgi.log
202 | stats=8001
203 | EOF
204 | cat>"$path/config.py"<<EOF
205 | mysql_config = {
206 |         'ENGINE': 'django.db.backends.mysql',
207 |         'NAME': '$db_name',
208 |         'USER': 'root',
209 |         'PASSWORD': '$db_name',
210 |         'HOST': '127.0.0.1',
211 |         'PORT': '3306',
212 |     }
213 | allow_url=['$allow_url','127.0.0.1']
214 | cache_time=300
215 | templates="zde"
216 | site_name="$site_name"
217 | site_url = "http://$site_url/"
218 | key_word = "关键词1,关键词2,关键词3"
219 | description = "这是一个高质量的自动爬虫"
220 | email = "amdin@$site_name"
221 | EOF
222 | cd $path
223 | rm -f /usr/local/lib/python3.6/site-packages/dj_pagination/templates/pagination/pagination.html
224 | cp templates/zde/pagination.html /usr/local/lib/python3.6/site-packages/dj_pagination/templates/pagination
225 | pip3 install -r requirements.txt
226 | uwsgi --ini uwsgi.ini
227 | rm -rf ../tmp


--------------------------------------------------------------------------------
/crawler/crawler_ytu.py:
--------------------------------------------------------------------------------
  1 | # coding='UTF-8'
  2 | 
  3 | import sys
  4 | 
  5 | sys.path.append('../')
  6 | from bs4 import BeautifulSoup
  7 | import threading, pymysql, time, requests, os, re
  8 | from config import mysql_config
  9 | 
 10 | requests.packages.urllib3.disable_warnings()
 11 | requests.adapters.DEFAULT_RETRIES = 5
 12 | s = requests.session()
 13 | s.keep_alive = False
 14 | # 数据库连接信息
 15 | dbhost = {
 16 |     "host": mysql_config['HOST'],
 17 |     "dbname": mysql_config['NAME'],
 18 |     "user": mysql_config['USER'],
 19 |     "password": mysql_config['PASSWORD']
 20 | }
 21 | 
 22 | base_url="https://www.yeitu.com/meinv/"
 23 | img_path='/static/images/'
 24 | 
 25 | class Spider():
 26 |     page_url_list = []
 27 |     img_url_list = []
 28 |     rlock = threading.RLock()
 29 |     exit = threading.Event()
 30 |     def __init__(self, start_page_num,end_page_num, img_path, thread_num, type="home",type_id='1'):
 31 |         self.start_page_num = start_page_num
 32 |         self.end_page_num = end_page_num
 33 |         self.img_path = img_path
 34 |         self.thread_num = thread_num
 35 |         self.type = type
 36 |         self.type_id=type_id
 37 | 
 38 |     def get_url(self):
 39 |         for i in range(self.start_page_num, self.end_page_num + 1):
 40 |             if i==1:
 41 |                 page_url=base_url + self.type
 42 |             else:
 43 |                 page_url=base_url + self.type+"/"+str(i)+".html"
 44 |             page = s.get(page_url, verify=False,timeout=5).text
 45 |             soup = BeautifulSoup(page, "html.parser")
 46 |             page_base_url = soup.find_all("li",class_="image-box")
 47 |             for page_url in page_base_url:
 48 |                 url = page_url.find("a").get("href")
 49 |                 self.page_url_list.append(url)
 50 | 
 51 |     def get_img(self, url):
 52 |         tagidlist = []
 53 |         db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"), dbhost.get("dbname"))
 54 |         cursor = db.cursor()
 55 |         page = s.get(url, verify=False,timeout=5)
 56 |         page.encoding="utf-8"
 57 |         soup = BeautifulSoup(page.text, "html.parser")
 58 |         title = soup.title.string.replace(" - 美女 - 亿图全景图库", "").replace(" ","")
 59 |         isExists = cursor.execute("SELECT title FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 60 |         if isExists != 0:
 61 |             print("已采集：" , title)
 62 |         else:
 63 |             print("正在采集：", title)
 64 |             tags = soup.find("div",class_="related_tag box").find("p").find_all("a")
 65 |             for tag_a in tags:
 66 |                 tag=tag_a.text
 67 |                 sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
 68 |                 isExiststag = cursor.execute(sqltag)
 69 |                 if isExiststag == 0:
 70 |                     cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
 71 |                 cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
 72 |                 for id in cursor.fetchall():
 73 |                     tagidlist.append(id[0])
 74 |             p = (title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), self.type_id, "1",url)
 75 |             cursor.execute("INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)", p)
 76 |             pageid = cursor.lastrowid
 77 |             img_num=soup.find("div",id="pages").text.replace("上一页","").replace("下一页","").split("..")[-1].replace("  ","")
 78 |             for i in range(1, int(img_num) + 1):
 79 |                 img_p_url=".".join(url.split(".")[0:-1])+"_"+str(i)+".html"
 80 |                 img_p_page=s.get(img_p_url, verify=False)
 81 |                 if i==1:
 82 |                     img_p = soup.find("div", class_="img_box").find("img").get("src")
 83 |                 else:
 84 |                     img_p_soup=BeautifulSoup(img_p_page.text,"html.parser")
 85 |                     img_p=img_p_soup.find("div",class_="img_box").find("img").get("src")
 86 |                 img_name = img_p.split("/")[-1]
 87 |                 img_base = img_name[8:12]
 88 |                 img_path = self.img_path + time.strftime('%Y%m%d', time.localtime(
 89 |                     time.time())) + "/" + img_base + "/"
 90 |                 imgp = pageid, img_path+img_name,img_p
 91 |                 cursor.execute("INSERT INTO images_image (pageid,imageurl,originurl) VALUES (%s,%s,%s)", imgp)
 92 |                 if i == 1:
 93 |                     cursor.execute(
 94 |                         "UPDATE images_page SET firstimg = " + "'" + img_path+img_name + "'" + " WHERE id=" + "'" + str(
 95 |                             pageid) + "'")
 96 |                 self.img_url_list.append({"img_url": img_p, "Referer": url,"id": img_base})
 97 | 
 98 | 
 99 |     def down_img(self,imgsrc,Referer,id):
100 |         headers = {
101 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
102 |             "Referer": Referer
103 |         }
104 |         path = self.img_path + time.strftime('%Y%m%d', time.localtime(time.time())) + "/"
105 |         page_id = id
106 |         isdata = os.path.exists("../" + path + page_id)
107 |         if not isdata:
108 |             os.makedirs("../" + path + page_id)
109 |         with open("../" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg", "wb") as f:
110 |             print("已保存：" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg")
111 |             f.write(s.get(imgsrc, headers=headers,verify=False,timeout=5).content)
112 | 
113 | 
114 |     def run_page(self):
115 |         while True:
116 |             Spider.rlock.acquire()
117 |             if len(self.page_url_list) == 0:
118 |                 Spider.rlock.release()
119 |                 break
120 |             else:
121 |                 page_url = self.page_url_list.pop()
122 |                 Spider.rlock.release()
123 |                 try:
124 |                     self.get_img(page_url)
125 |                 except Exception as e:
126 |                     pass
127 | 
128 |     def run_img(self):
129 |         while True:
130 |             Spider.rlock.acquire()
131 |             if len(self.img_url_list) == 0 :
132 |                 Spider.rlock.release()
133 |                 break
134 |             else:
135 |                 urls = self.img_url_list.pop()
136 |                 url = urls.get("img_url")
137 |                 Referer = urls.get("Referer")
138 |                 id = urls.get("id")
139 |                 Spider.rlock.release()
140 |                 try:
141 |                     self.down_img(url, Referer, id)
142 |                 except Exception as e:
143 |                     pass
144 | 
145 |     def run_1(self):
146 |         # 启动thread_num个进程来爬去具体的img url 链接
147 |         url_threa_list=[]
148 |         for th in range(self.thread_num):
149 |             add_pic_t = threading.Thread(target=self.run_page)
150 |             url_threa_list.append(add_pic_t)
151 | 
152 |         for t in url_threa_list:
153 |             t.setDaemon(True)
154 |             t.start()
155 | 
156 |         for t in url_threa_list:
157 |             t.join()
158 | 
159 |     def run_2(self):
160 |         # 启动thread_num个来下载图片
161 |         for img_th in range(self.thread_num):
162 |             download_t = threading.Thread(target=self.run_img)
163 |             download_t.start()
164 | 
165 | 
166 | # type是源站分类，type_id是本站分类id，start_page是开始页，end_page是结束页
167 | if __name__ == "__main__":
168 |     for i in [{"start_page": 1,"end_page":1, "type": "xinggan","type_id":"1"},{"start_page": 1,"end_page":1, "type": "wangluomeinv","type_id":"3"},{"start_page": 1,"end_page":1, "type": "siwameitui","type_id":"2"}]:
169 |         spider = Spider(start_page_num=i.get("start_page"),end_page_num=i.get("end_page"), img_path='/static/images/', thread_num=10,
170 |                         type=i.get("type"))
171 |         spider.get_url()
172 |         spider.run_1()
173 |         spider.run_2()


--------------------------------------------------------------------------------
/templates/zde/mVideo.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="UTF-8">
  5 | <link rel="icon" type="image/x-icon" href="favicon.ico" />
  6 | <meta name="viewport" content="width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no" />
  7 | <meta http-equiv="pragma" content="no-cache">
  8 | <meta http-equiv="Cache-Control" content="no-cache, must-revalidate">
  9 | <meta name="apple-touch-fullscreen" content="yes"/>
 10 | <meta name="screen-orientation" content="portrait"><!-- uc强制竖屏 -->
 11 | <meta name="x5-orientation" content="portrait"><!-- QQ强制竖屏 -->
 12 | <meta name="full-screen" content="yes"><!-- UC强制全屏 -->
 13 | <meta name="x5-fullscreen" content="true"><!-- QQ强制全屏 -->
 14 | <meta name="browsermode" content="application"><!-- UC应用模式 -->
 15 | <meta name="x5-page-mode" content="app"><!-- QQ应用模式 -->
 16 | <meta http-equiv="expires" content="0">
 17 | <script src="https://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js"></script>
 18 | <title>视频小姐姐-手机版-94iMM</title>
 19 | <style type="text/css">
 20 | *{
 21 | 	margin:0px; 
 22 | 	padding:0px;
 23 | 	touch-action: pan-y;
 24 | 	text-align: center;
 25 | }
 26 | .app{
 27 | 	width: 100%; 
 28 | 	height:100%; 
 29 | 	position: absolute; 
 30 | 	top:0px; 
 31 | 	right:0px; 
 32 | 	bottom: 0px; 
 33 | 	left:0px; 
 34 | 	overflow: hidden;
 35 | 	background-color: #ffffff;
 36 | }
 37 | .video{
 38 | 	width: 100%; 
 39 | 	height: 100%;
 40 | }
 41 | #my-video{
 42 | 	object-fit: cover; 
 43 | 	object-position: center center;
 44 | }
 45 | canvas {
 46 | 	display: block;
 47 | 	position: absolute;
 48 | 	bottom: 100px;
 49 | 	right: -24px;
 50 | 	z-index: 20;
 51 | 	cursor: pointer;
 52 | 	-webkit-tap-highlight-color: rgba(0, 0, 0, 0);
 53 | }
 54 | .journal-reward {
 55 | 	position: absolute;
 56 | 	bottom: 20px;
 57 | 	right: 20px;
 58 | 	height: 80px;
 59 | 	width: 80px;
 60 | 	display: block;
 61 | 	z-index: 21;
 62 | }
 63 | .m {
 64 | 	width: 240px;
 65 | 	height: 320px;
 66 | 	margin-left: auto;
 67 | 	margin-right: auto;
 68 | }
 69 | </style>
 70 | <script>if(/Android|webOS|iPhone|iPod|BlackBerry/i.test(navigator.userAgent)) {/*无动作*/}else{window.location.href = "/video";}</script>
 71 | </head>
 72 | <script> var url= '{{ url }}';</script><body>
 73 | <div id="app" class="app">
 74 | <video preload='auto' id='my-video' ref="video" :src="videoUrl" @click="player" loop autoplay="autoplay" webkit-playsinline='true' playsinline='true' x-webkit-airplay='true' x5-video-player-type='h5' x5-video-player-fullscreen='true' x5-video-ignore-metadata='true' width='100%' height='100%'><p>不支持video</p></video>
 75 | </div>
 76 | <span style="position:absolute;top:5px;left:5px;color:#fff;">视频来源:[<span style="color: #FF4040;" id ="source">{{ source }}</span>]</span>
 77 | <span style="position:absolute;top:25px;left:10px;color:#fff;">用户ID:[<span style="color: #4682B4;" id = "check">{{ user_id}}</span>]</span>
 78 | <span style="position:absolute;top:5px;right:10px;color:#fff;">←左滑切换视频</span>
 79 | <span style="position:absolute;top:25px;right:10px;color:#fff;">点击播放/暂停</span>
 80 | <span style="position:absolute;bottom:10px;left:10px;color:#fff;">视频采集自网络 | <a target="_blank" href="https://www.94imm.com/">返回94imm</a></span>
 81 | <div class="m"> <img src="/static/zde/reward.png" class="journal-reward" /> </div>
 82 | <script src="/static/zde/js/flutter-hearts-zmt.js" type="text/javascript" charset="utf-8"></script>
 83 | </body>
 84 | <script src="https://cdn.jsdelivr.net/npm/vue@2.6.11"></script>
 85 | <script type="text/javascript">
 86 |     // vue 解析
 87 |     var Application = new Vue({
 88 |         el: "#app",
 89 |         data: {
 90 |             videoUrl:'',
 91 |             video:null,
 92 |         },
 93 |         mounted: function(){
 94 |             this.videoUrl = url;
 95 |             this.video = this.$refs.video;
 96 |         },
 97 |         methods: {
 98 |             player:function(){
 99 |                 if(this.video.paused){
100 |                     // 播放
101 |                     this.video.play();
102 |                 }else{
103 |                     // 暂停
104 |                     this.video.pause()
105 |                 };            
106 |             }
107 |         }
108 |     });
109 | 
110 | var startx, starty;
111 |  
112 | //获得角度
113 | function getAngle(angx, angy) {
114 |     return Math.atan2(angy, angx) * 180 / Math.PI;
115 | };
116 |  
117 | //根据起点终点返回方向 1向上滑动 2向下滑动 3向左滑动 4向右滑动 0点击事件
118 | function getDirection(startx, starty, endx, endy) {
119 |     var angx = endx - startx;
120 |     var angy = endy - starty;
121 |     var result = 0;
122 |  
123 |     //如果滑动距离太短
124 |     if (Math.abs(angx) < 2 && Math.abs(angy) < 2) {
125 |         return result;
126 |     }
127 |  
128 |     var angle = getAngle(angx, angy);
129 |     if (angle >= -135 && angle <= -45) {
130 |         result = 1;
131 |     } else if (angle > 45 && angle < 135) {
132 |         result = 2;
133 |     } else if ((angle >= 135 && angle <= 180) || (angle >= -180 && angle < -135)) {
134 |         result = 3;
135 |     } else if (angle >= -45 && angle <= 45) {
136 |         result = 4;
137 |     }
138 |     return result;
139 | }
140 |  
141 | //手指接触屏幕
142 | document.addEventListener("touchstart", function(e){
143 |     startx = e.touches[0].pageX;
144 |     starty = e.touches[0].pageY;
145 | }, false);
146 |  
147 | //手指离开屏幕
148 | document.addEventListener("touchend", function(e) {
149 |     var endx, endy;
150 |     endx = e.changedTouches[0].pageX;
151 |     endy = e.changedTouches[0].pageY;
152 |     var direction = getDirection(startx, starty, endx, endy);
153 | 	
154 |     switch (direction) {
155 |         case 3:
156 | 		    var ajax = new XMLHttpRequest();
157 | 			ajax.open("get","/get_video?tem="+Math.random());
158 | 			ajax.send()
159 | 			ajax.onreadystatechange = function(){
160 | 			if(ajax.readyState == 4 && ajax.status == 200){
161 | 			    var player= document.getElementById("my-video");
162 | 				var obj = JSON.parse(ajax.responseText);  
163 |                 var uid=document.getElementById("check");
164 | 				var source=document.getElementById("source");
165 | 				uid.innerText = obj.user_id;
166 | 				source.innerText = obj.source;
167 | 				player.src = obj.url;
168 | 				player.play()
169 | 				}
170 | 			}
171 |             
172 |             break;
173 |         }
174 | }, false);
175 | 
176 | 
177 | </script>
178 | <script type="text/javascript">
179 |         /*7Core-CN - 网页鼠标点击特效（爱心）*/
180 |         !function (e, t, a) {function r() {for (var e = 0; e < s.length; e++) s[e].alpha <= 0 ? (t.body.removeChild(s[e].el), s.splice(e, 1)) : (s[e].y--, s[e].scale += .004, s[e].alpha -= .013, s[e].el.style.cssText = "left:" + s[e].x + "px;top:" + s[e].y + "px;opacity:" + s[e].alpha + ";transform:scale(" + s[e].scale + "," + s[e].scale + ") rotate(45deg);background:" + s[e].color + ";z-index:99999");requestAnimationFrame(r)}function n() {var t = "function" == typeof e.onclick && e.onclick;e.onclick = function (e) {t && t(), o(e)}}function o(e) {var a = t.createElement("div");a.className = "heart", s.push({el: a,x: e.clientX - 5,y: e.clientY - 5,scale: 1,alpha: 1,color: c()}), t.body.appendChild(a)}function i(e) {var a = t.createElement("style");a.type = "text/css";try {a.appendChild(t.createTextNode(e))} catch (t) {a.styleSheet.cssText = e}t.getElementsByTagName("head")[0].appendChild(a)}function c() {return "rgb(" + ~~(255 * Math.random()) + "," + ~~(255 * Math.random()) + "," + ~~(255 * Math.random()) + ")"}var s = [];e.requestAnimationFrame = e.requestAnimationFrame || e.webkitRequestAnimationFrame || e.mozRequestAnimationFrame || e.oRequestAnimationFrame || e.msRequestAnimationFrame || function (e) {setTimeout(e, 1e3 / 60)}, i(".heart{width: 10px;height: 10px;position: fixed;background: #f00;transform: rotate(45deg);-webkit-transform: rotate(45deg);-moz-transform: rotate(45deg);}.heart:after,.heart:before{content: '';width: inherit;height: inherit;background: inherit;border-radius: 50%;-webkit-border-radius: 50%;-moz-border-radius: 50%;position: fixed;}.heart:after{top: -5px;}.heart:before{left: -5px;}"), n(), r()}(window, document);
181 | </script>
182 | <script async src="https://www.googletagmanager.com/gtag/js?id=UA-92272800-2"></script>
183 | <script>
184 |     window.dataLayer = window.dataLayer || [];
185 | 
186 |     function gtag() {
187 |         dataLayer.push(arguments);
188 |     }
189 | 
190 |     gtag('js', new Date());
191 | 
192 |     gtag('config', 'UA-92272800-2');
193 | </script>
194 | </html>


--------------------------------------------------------------------------------
/crawler/crawler_mmjpg.py:
--------------------------------------------------------------------------------
  1 | #coding='UTF-8'
  2 | 
  3 | import sys
  4 | 
  5 | sys.path.append('../')
  6 | from bs4 import BeautifulSoup
  7 | from requests.adapters import HTTPAdapter
  8 | import threading,pymysql,time,requests,os,urllib3,re,random
  9 | from config import mysql_config
 10 | 
 11 | requests.packages.urllib3.disable_warnings()
 12 | requests.adapters.DEFAULT_RETRIES = 5
 13 | s = requests.session()
 14 | s.keep_alive = False
 15 | s.mount('http://', HTTPAdapter(max_retries=3))
 16 | # 数据库连接信息
 17 | dbhost = {
 18 |     "host": mysql_config['HOST'],
 19 |     "dbname": mysql_config['NAME'],
 20 |     "user": mysql_config['USER'],
 21 |     "password": mysql_config['PASSWORD']
 22 | }
 23 | 
 24 | class Spider():
 25 |     rlock = threading.RLock()
 26 |     page_url_list=[]
 27 |     img_url_list=[]
 28 |     proxy_dict = ""
 29 |     base_url="http://www.mmmjpg.com/"
 30 |     def __init__(self,start_page_num,end_page_num,img_path,thread_num,type):
 31 |         self.start_page_num=start_page_num
 32 |         self.end_page_num=end_page_num
 33 |         self.img_path=img_path
 34 |         self.thread_num=thread_num
 35 |         self.type=type
 36 | 
 37 |     def get_url(self):
 38 |         for i in range(self.start_page_num,self.end_page_num+1):
 39 |             if i==0:
 40 |                 page=s.get(self.base_url)
 41 |             else:
 42 |                 page=s.get(self.base_url+self.type+"/"+str(i))
 43 |             soup=BeautifulSoup(page.text, "html.parser")
 44 |             url_soup=soup.find("div",class_="pic").find("ul").find_all("li")
 45 |             for li in url_soup:
 46 |                 url=li.find("a").get("href")
 47 |                 self.page_url_list.append(url)
 48 | 
 49 |     def get_img(self,url):
 50 |         db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"),dbhost.get("dbname"))
 51 |         cursor = db.cursor()
 52 |         tagidlist=[]
 53 |         page_id = url.split("/")[-1]
 54 |         page_url=self.base_url+"mm/"+page_id
 55 |         headers = {
 56 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
 57 |             "Referer": page_url
 58 |         }
 59 |         info_page = s.get(self.base_url+"mm/" + page_id,headers=headers)
 60 |         info_page.encoding="utf-8"
 61 |         info_soup = BeautifulSoup(info_page.text,"html.parser")
 62 |         title=info_soup.find("div",class_="article").find("h1").text
 63 |         if "袜" in title or "丝" in title or "腿" in title:
 64 |             type_id = 2
 65 |         elif "青春" in title or "清纯" in title:
 66 |             type_id = 3
 67 |         elif "萝莉" in title:
 68 |             type_id = 4
 69 |         else:
 70 |             type_id = 1
 71 |         isExists = cursor.execute("SELECT title FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 72 |         img_m_src=info_soup.find("div",class_="content").find("a").find("img").get("src").split("/")[-3]
 73 |         if isExists != 0:
 74 |             print("已采集：" + title)
 75 |         else:
 76 |             tags=info_soup.find("div",class_="tags").find_all("a")
 77 |             for tag_soup in tags:
 78 |                 tag=tag_soup.text
 79 |                 sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
 80 |                 isExiststag = cursor.execute(sqltag)
 81 |                 if isExiststag == 0:
 82 |                     cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
 83 |                 cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
 84 |                 for id in cursor.fetchall():
 85 |                     tagidlist.append(id[0])
 86 |             p = (title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), self.type_id, "1",page_url)
 87 |             cursor.execute("INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)", p)
 88 |             print("开始采集："+title)
 89 |             pageid = cursor.lastrowid
 90 |             page=s.get(page_url,headers=headers)
 91 |             soup=BeautifulSoup(page.text,"html.parser")
 92 |             img_base=soup.find("div",class_="content").find("img").get("src").split("/")
 93 |             img_base_url="http://"+img_base[2]+"/"
 94 |             img_num=soup.find("div",class_="page").text.replace("全部图片下一页","").split("...")[-1]
 95 |             img_path = self.img_path + time.strftime('%Y%m%d', time.localtime(
 96 |                 time.time())) + "/" + img_base[-2] +"/"
 97 |             for i in range(1,int(img_num)):
 98 |                 img_loc_path=img_path+str(i)+".jpg"
 99 |                 imgp = pageid, img_loc_path, img_base_url+img_base[-2]+"/"+str(i)+".jpg"
100 |                 cursor.execute("INSERT INTO images_image (pageid,imageurl,originurl) VALUES (%s,%s,%s)", imgp)
101 |                 if i == 1:
102 |                     cursor.execute(
103 |                         "UPDATE images_page SET firstimg = " + "'" + img_loc_path + "'" + " WHERE id=" + "'" + str(
104 |                             pageid) + "'")
105 |                 self.img_url_list.append({"img_url": img_base_url+img_base[-2]+"/"+str(i)+".jpg", "Referer": url, "id": img_base[-2]})
106 |                 # print({"img_url": img_base_url+img_path+str(i)+".jpg", "Referer": img_base_url+img_base[-2], "id": img_base[-2]})
107 | 
108 | 
109 | 
110 |     def down_img(self,imgsrc,Referer,id):
111 |         headers = {
112 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
113 |             "Referer": Referer
114 |         }
115 |         path = self.img_path + time.strftime('%Y%m%d', time.localtime(time.time())) + "/"
116 |         page_id = id
117 |         isdata = os.path.exists("../" + path + page_id)
118 |         if not isdata:
119 |             os.makedirs("../" + path + page_id)
120 |         with open("../" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg", "wb") as f:
121 |             print("已保存：" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg")
122 |             f.write(s.get(imgsrc, headers=headers,verify=False).content)
123 | 
124 |     def run_page(self):
125 |         while True:
126 |             Spider.rlock.acquire()
127 |             if len(self.page_url_list) == 0:
128 |                 Spider.rlock.release()
129 |                 break
130 |             else:
131 |                 page_url = self.page_url_list.pop()
132 |                 Spider.rlock.release()
133 |                 try:
134 |                     self.get_img(page_url)
135 |                 except Exception as e:
136 |                     pass
137 | 
138 |     def run_img(self):
139 |         while True:
140 |             Spider.rlock.acquire()
141 |             if len(self.img_url_list) == 0 :
142 |                 Spider.rlock.release()
143 |                 break
144 |             else:
145 |                 urls = self.img_url_list.pop()
146 |                 url = urls.get("img_url")
147 |                 Referer = urls.get("Referer")
148 |                 id = urls.get("id")
149 |                 Spider.rlock.release()
150 |                 try:
151 |                     self.down_img(url, Referer, id)
152 |                 except Exception as e:
153 |                     pass
154 | 
155 |     def run_1(self):
156 |         # 启动thread_num个进程来爬去具体的img url 链接
157 |         url_threa_list=[]
158 |         for th in range(self.thread_num):
159 |             add_pic_t = threading.Thread(target=self.run_page)
160 |             url_threa_list.append(add_pic_t)
161 | 
162 |         for t in url_threa_list:
163 |             t.setDaemon(True)
164 |             t.start()
165 | 
166 |         for t in url_threa_list:
167 |             t.join()
168 | 
169 |     def run_2(self):
170 |         # 启动thread_num个来下载图片
171 |         for img_th in range(self.thread_num):
172 |             download_t = threading.Thread(target=self.run_img)
173 |             download_t.start()
174 | 
175 | # start_page是采集开始也，end是采集结束页，type不用修改，自动分类
176 | if __name__ == "__main__":
177 |     for i in [{"start_page": 1,"end_page":1, "type": "home"}]:
178 |         spider=Spider(start_page_num=i.get("start_page"),end_page_num=i.get("end_page"),img_path='/static/images/',thread_num=10,type=i.get("type"))
179 |         spider.get_url()
180 |         spider.run_1()
181 |         spider.run_2()


--------------------------------------------------------------------------------
/crawler/crawler_mm131.py:
--------------------------------------------------------------------------------
  1 | # coding='UTF-8'
  2 | 
  3 | import sys
  4 | 
  5 | sys.path.append('../')
  6 | from bs4 import BeautifulSoup
  7 | import threading, pymysql, time, requests, os, urllib3, re,random
  8 | from config import mysql_config
  9 | 
 10 | requests.packages.urllib3.disable_warnings()
 11 | requests.adapters.DEFAULT_RETRIES = 5
 12 | s = requests.session()
 13 | s.keep_alive = False
 14 | # 数据库连接信息
 15 | dbhost = {
 16 |     "host": mysql_config['HOST'],
 17 |     "dbname": mysql_config['NAME'],
 18 |     "user": mysql_config['USER'],
 19 |     "password": mysql_config['PASSWORD']
 20 | }
 21 | base_url="https://mm131.pro"
 22 | 
 23 | class Spider():
 24 |     rlock = threading.RLock()
 25 |     page_url_list = []
 26 |     img_url_list = []
 27 |     headers = {
 28 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
 29 |         "Referer": base_url
 30 |     }
 31 | 
 32 |     def __init__(self, page_num, img_path, thread_num, type_id=1, type="home",tagslist=["性感美女","诱惑美女","大胸美女","萌妹子"]):
 33 |         self.page_num = page_num
 34 |         self.img_path = img_path
 35 |         self.thread_num = thread_num
 36 |         self.type_id = type_id
 37 |         self.type = type
 38 |         self.tagslist= tagslist
 39 | 
 40 |     def get_url(self):
 41 |         for i in range(self.page_num):
 42 |             page = s.get(base_url+"/e/action/ListInfo/?classid="+str(self.type_id), headers=self.headers,verify=False)
 43 |             soup = BeautifulSoup(page.text, "html.parser")
 44 |             try:
 45 |                 page_div = soup.find("dl", class_="list-left public-box").find_all("dd")
 46 |             except:
 47 |                 print("采集错误，跳过本条")
 48 |                 continue
 49 |             del page_div[-1]
 50 |             for dd in page_div:
 51 |                 url = dd.find("a").get("href")
 52 |                 self.page_url_list.append(base_url+url)
 53 | 
 54 |     def get_img(self,url):
 55 |         db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"), dbhost.get("dbname"))
 56 |         cursor = db.cursor()
 57 |         tagidlist = []
 58 |         page = s.get(url, headers=self.headers)
 59 |         page.encoding='UTF-8'
 60 |         soup = BeautifulSoup(page.text, "html.parser")
 61 |         # page_div = soup.find("div", class_="content-pic")
 62 |         title = soup.title.string.replace("_znns.com宅男钕神",'')
 63 |         isExists = cursor.execute("SELECT title FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 64 |         if isExists != 0:
 65 |             print("isExists:" + title)
 66 |         else:
 67 |             tagslist = re.findall('<meta name="keywords" content="(.*?)" />', page.text)
 68 |             for tags in tagslist:
 69 |                 for tag in tags.split(","):
 70 |                     sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
 71 |                     isExiststag = cursor.execute(sqltag)
 72 |                     if isExiststag == 0:
 73 |                         cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
 74 |                     cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
 75 |                     for id in cursor.fetchall():
 76 |                         tagidlist.append(id[0])
 77 |             p = (
 78 |             title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), self.type_id, "1", url)
 79 |             cursor.execute(
 80 |                 "INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)", p)
 81 |             print("down：" + title)
 82 |             pageid = cursor.lastrowid
 83 |             img_num_soup = soup.find("div", class_="content-page").find("span").text
 84 |             img_num = "".join(re.findall(r"\d", img_num_soup))
 85 |             for i in range(1, int(img_num)):
 86 |                 headers = self.headers.copy()
 87 |                 headers.update({"Referer":url})
 88 |                 id = url.split("/")[-1].split(".")[0]
 89 |                 if i==1:
 90 |                     img_page_url=url
 91 |                 else:
 92 |                     img_page_url = "/".join(url.split("/")[0:-1]) + "/" + id + "_" + str(i) + ".html"
 93 |                 img_page=s.get(img_page_url,headers=headers,verify=False)
 94 |                 # page.encoding = 'utf-8'
 95 |                 img_soup=BeautifulSoup(img_page.text,"html.parser")
 96 |                 img_url = img_soup.find("div",class_="content-pic").find("img").get("src")
 97 |                 img_name =img_url.split("/")[-1]
 98 |                 id=url.split("/")[-1].split(".")[0]
 99 |                 img_loc_path = self.img_path + time.strftime('%Y%m%d', time.localtime(
100 |                     time.time())) + "/" + id + "/" +img_name
101 |                 if i == 1:
102 |                     cursor.execute(
103 |                         "UPDATE images_page SET firstimg = " + "'" + img_loc_path + "'" + " WHERE id=" + "'" + str(
104 |                             pageid) + "'")
105 |                 imgp = pageid, img_loc_path,img_url
106 |                 cursor.execute("INSERT INTO images_image (pageid,imageurl,originurl) VALUES (%s,%s,%s)", imgp)
107 |                 i += 1
108 |                 data={"img_url":img_url,"Referer":url,"id":id}
109 |                 if data in self.img_url_list:
110 |                     continue
111 |                 else:
112 |                     self.img_url_list.append(data)
113 | 
114 |     def down_img(self,imgsrc,Referer,id):
115 |         headers = {
116 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
117 |             "Referer": Referer
118 |         }
119 |         path = self.img_path + time.strftime('%Y%m%d', time.localtime(time.time())) + "/"
120 |         page_id = id
121 |         isdata = os.path.exists("../" + path + page_id)
122 |         if not isdata:
123 |             os.makedirs("../" + path + page_id)
124 |         with open("../" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg", "wb") as f:
125 |             print("已保存：" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg")
126 |             f.write(s.get(imgsrc, headers=headers,verify=False).content)
127 | 
128 |     def run_page(self):
129 |         while True:
130 |             Spider.rlock.acquire()
131 |             if len(self.page_url_list) == 0:
132 |                 Spider.rlock.release()
133 |                 break
134 |             else:
135 |                 try:
136 |                     page_url = self.page_url_list.pop()
137 |                 except Exception as e:
138 |                     print(e)
139 |                     pass
140 |                 Spider.rlock.release()
141 |                 try:
142 |                     self.get_img(page_url)
143 |                 except Exception as e:
144 |                     print(e)
145 |                     pass
146 | 
147 |     def run_img(self):
148 |         while True:
149 |             Spider.rlock.acquire()
150 |             if len(self.img_url_list) == 0 :
151 |                 Spider.rlock.release()
152 |                 break
153 |             else:
154 |                 urls = self.img_url_list.pop()
155 |                 url = urls.get("img_url")
156 |                 Referer = urls.get("Referer")
157 |                 id = urls.get("id")
158 |                 Spider.rlock.release()
159 |                 try:
160 |                     self.down_img(url, Referer, id)
161 |                 except Exception as e:
162 |                     print(e)
163 |                     pass
164 | 
165 |     def run_1(self):
166 |         # 启动thread_num个进程来爬去具体的img url 链接
167 |         url_threa_list=[]
168 |         for th in range(self.thread_num):
169 |             add_pic_t = threading.Thread(target=self.run_page)
170 |             url_threa_list.append(add_pic_t)
171 | 
172 |         for t in url_threa_list:
173 |             t.setDaemon(True)
174 |             t.start()
175 | 
176 |         for t in url_threa_list:
177 |             t.join()
178 | 
179 |     def run_2(self):
180 |         # 启动thread_num个来下载图片
181 |         for img_th in range(self.thread_num):
182 |             download_t = threading.Thread(target=self.run_img)
183 |             download_t.start()
184 | 
185 | 
186 | # page是采集深度，从1开始，采集第一页即采集最新发布。type是源站分类，type_id是对应本站分类的id
187 | if __name__ == "__main__":
188 |     for i in [{"page": 1, "type": "xinggan", "type_id": 1},{"page":1,"type":"qingchun","type_id": 2}]:
189 |         spider = Spider(page_num=i.get("page"), img_path='/static/images/', thread_num=10, type_id=i.get("type_id"),
190 |                         type=i.get("type"),tagslist=["性感美女","诱惑美女","大胸美女","萌妹子"])
191 |         spider.get_url()
192 |         spider.run_1()
193 |         spider.run_2()


--------------------------------------------------------------------------------
/crawler/crawler_amn.py:
--------------------------------------------------------------------------------
  1 | # coding='UTF-8'
  2 | import sys
  3 | sys.path.append('../')
  4 | from bs4 import BeautifulSoup
  5 | from requests.adapters import HTTPAdapter
  6 | import threading,pymysql,time,requests,os,urllib3,re,random
  7 | from config import mysql_config
  8 | 
  9 | requests.packages.urllib3.disable_warnings()
 10 | requests.adapters.DEFAULT_RETRIES = 5
 11 | s = requests.session()
 12 | s.keep_alive = False
 13 | s.mount('http://', HTTPAdapter(max_retries=3))
 14 | # 数据库连接信息
 15 | dbhost = {
 16 |     "host": mysql_config['HOST'],
 17 |     "dbname": mysql_config['NAME'],
 18 |     "user": mysql_config['USER'],
 19 |     "password": mysql_config['PASSWORD']
 20 | }
 21 | 
 22 | base_url="https://www.2meinv.com/"
 23 | tag_url="https://www.2meinv.com/tags-{}-{}.html"
 24 | index_url="https://www.2meinv.com/index-1.html"
 25 | img_path='/static/images/'
 26 | 
 27 | class Spider():
 28 |     page_url_list = []
 29 |     img_url_list = []
 30 |     rlock = threading.RLock()
 31 |     proxy_dict = ""
 32 |     def __init__(self, start_page_num, end_page_num,img_path, thread_num, type="home",type_id=0):
 33 |         self.start_page_num = start_page_num
 34 |         self.end_page_num=end_page_num
 35 |         self.img_path = img_path
 36 |         self.thread_num = thread_num
 37 |         self.type = type
 38 |         self.type_id=type_id
 39 | 
 40 |     def get_url(self):
 41 |         for i in range(self.start_page_num, self.end_page_num):
 42 |             if self.type_id==0:
 43 |                 page = s.get(index_url.format(str(i)), verify=False).text
 44 |             else:
 45 |                 page = s.get(tag_url.format(self.type,str(i)), verify=False).text
 46 |             # page = s.get(base_url + self.type+"-"+str(i)+".html", verify=False).text
 47 |             soup = BeautifulSoup(page, "html.parser")
 48 |             page_base_url = soup.find("ul", class_="detail-list").find_all("li")
 49 |             for page_url in page_base_url:
 50 |                 url = page_url.find("a",class_="dl-pic").get("href")
 51 |                 self.page_url_list.append(url)
 52 | 
 53 |     def get_img(self,url):
 54 |         tagidlist=[]
 55 |         db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"), dbhost.get("dbname"))
 56 |         cursor = db.cursor()
 57 |         page = s.get(url,verify=False)
 58 |         soup = BeautifulSoup(page.text, "html.parser")
 59 |         title=soup.title.string.replace("_爱美女","")
 60 |         if self.type_id == 0:
 61 |             if "袜" in title or "丝" in title or "腿" in title:
 62 |                 self.type_id = 2
 63 |             elif "青春" in title or "清纯" in title:
 64 |                 self.type_id = 3
 65 |             elif "萝莉" in title:
 66 |                 self.type_id = 4
 67 |             else:
 68 |                 self.type_id = 1
 69 |         isExists = cursor.execute("SELECT title FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 70 |         if isExists != 0:
 71 |             print("已采集：" , title)
 72 |         else:
 73 |             print("正在采集：", title)
 74 |             tags=soup.find(attrs={"name":"Keywords"})['content'].split(",")
 75 |             for tag in tags:
 76 |                 sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
 77 |                 isExiststag = cursor.execute(sqltag)
 78 |                 if isExiststag == 0:
 79 |                     cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
 80 |                 cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
 81 |                 for id in cursor.fetchall():
 82 |                     tagidlist.append(id[0])
 83 |             p = (title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), self.type_id, "1",url)
 84 |             cursor.execute("INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)", p)
 85 |             pageid = cursor.lastrowid
 86 |             img_soup=soup.find("div",class_="page-show").text
 87 |             img_nums=re.sub("\D", "", img_soup)
 88 |             if len(img_nums)==6:
 89 |                 img_num=img_nums[-2:]
 90 |             elif len(img_nums)<6:
 91 |                 img_num = img_nums[-1]
 92 |             elif len(img_nums)>6:
 93 |                 img_num = img_nums[-3:]
 94 |             id=url.split("-")[-1].split(".")[0]
 95 |             for i in range(1,int(img_num)+1):
 96 |                 img_page_url=base_url+"article-"+id+"-"+str(i)+".html"
 97 |                 img_page=s.get(img_page_url)
 98 |                 img_soup=BeautifulSoup(img_page.text, "html.parser")
 99 |                 img_url=img_soup.find("div",class_="pp hh").find("img").get("src")
100 |                 img_name = img_url.split("/")[-1]
101 |                 img_loc_path = self.img_path + time.strftime('%Y%m%d', time.localtime(
102 |                     time.time())) + "/" + id + "/" + img_name
103 |                 imgp = pageid, img_loc_path,img_url
104 |                 cursor.execute("INSERT INTO images_image (pageid,imageurl,originurl) VALUES (%s,%s,%s)", imgp)
105 |                 if i==1:
106 |                     cursor.execute(
107 |                         "UPDATE images_page SET firstimg = " + "'" + img_loc_path + "'" + " WHERE id=" + "'" + str(
108 |                             pageid) + "'")
109 |                 self.img_url_list.append({"img_url":img_url,"Referer":url,"id":id})
110 | 
111 |     def down_img(self,imgsrc,Referer,id):
112 |         headers = {
113 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
114 |             "Referer": Referer
115 |         }
116 |         path = img_path + time.strftime('%Y%m%d', time.localtime(time.time())) + "/"
117 |         page_id = id
118 |         isdata = os.path.exists("../" + path + page_id)
119 |         if not isdata:
120 |             os.makedirs("../" + path + page_id)
121 |         with open("../" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg", "wb") as f:
122 |             print("已保存：" + path + page_id + "/" + imgsrc.split("/")[-1].split(".")[0] + ".jpg")
123 |             f.write(s.get(imgsrc, headers=headers,verify=False).content)
124 | 
125 |     def run_page(self):
126 |         while True:
127 |             Spider.rlock.acquire()
128 |             if len(self.page_url_list) == 0:
129 |                 Spider.rlock.release()
130 |                 break
131 |             else:
132 |                 page_url = self.page_url_list.pop()
133 |                 Spider.rlock.release()
134 |                 try:
135 |                     self.get_img(page_url)
136 |                 except Exception as e:
137 |                     print(e)
138 |                     pass
139 | 
140 |     def run_img(self):
141 |         while True:
142 |             Spider.rlock.acquire()
143 |             if len(self.img_url_list) == 0 :
144 |                 Spider.rlock.release()
145 |                 break
146 |             else:
147 |                 urls = self.img_url_list.pop()
148 |                 url = urls.get("img_url")
149 |                 Referer = urls.get("Referer")
150 |                 id = urls.get("id")
151 |                 Spider.rlock.release()
152 |                 try:
153 |                     self.down_img(url, Referer, id)
154 |                 except Exception as e:
155 |                     print(e)
156 |                     pass
157 | 
158 |     def run_1(self):
159 |         # 启动thread_num个进程来爬去具体的img url 链接
160 |         url_threa_list=[]
161 |         for th in range(self.thread_num):
162 |             add_pic_t = threading.Thread(target=self.run_page)
163 |             url_threa_list.append(add_pic_t)
164 | 
165 |         for t in url_threa_list:
166 |             t.setDaemon(True)
167 |             t.start()
168 | 
169 |         for t in url_threa_list:
170 |             t.join()
171 | 
172 |     def run_2(self):
173 |         # 启动thread_num个来下载图片
174 |         for img_th in range(self.thread_num):
175 |             download_t = threading.Thread(target=self.run_img)
176 |             download_t.start()
177 | 
178 | # start_page是采集开始也，end是采集结束页，type不用修改，自动分类，起始页为1
179 | if __name__ == "__main__":
180 |     cl_list=[{"start_page": 1,"end_page":17, "type": "Cosplay", "type_id":6},
181 |              {"start_page": 1,"end_page":17, "type": "性感", "type_id":1},
182 |              {"start_page": 1, "end_page": 17, "type": "丝袜", "type_id": 2},
183 |              {"start_page": 1, "end_page": 17, "type": "美腿", "type_id": 2},
184 |              {"start_page": 1, "end_page": 17, "type": "美胸", "type_id": 1},
185 |              {"start_page": 1, "end_page": 17, "type": "制服诱惑", "type_id": 2}
186 |              ]
187 | 
188 | 
189 |     for i in cl_list:
190 |         spider = Spider(start_page_num=i.get("start_page"),end_page_num=i.get("end_page"), img_path='/static/images/', thread_num=10,
191 |                         type=i.get("type"),type_id=i.get("type_id"))
192 |         spider.get_url()
193 |         spider.run_1()
194 |         spider.run_2()


--------------------------------------------------------------------------------
/crawler/crawler_nsg.py:
--------------------------------------------------------------------------------
  1 | # coding='UTF-8'
  2 | 
  3 | import sys
  4 | 
  5 | sys.path.append('../')
  6 | from bs4 import BeautifulSoup
  7 | from requests.adapters import HTTPAdapter
  8 | import threading,pymysql,time,requests,os,urllib3,re,random
  9 | from config import mysql_config
 10 | 
 11 | requests.packages.urllib3.disable_warnings()
 12 | requests.adapters.DEFAULT_RETRIES = 5
 13 | s = requests.session()
 14 | s.keep_alive = False
 15 | s.mount('http://', HTTPAdapter(max_retries=3))
 16 | # 数据库连接信息
 17 | dbhost = {
 18 |     "host": mysql_config['HOST'],
 19 |     "dbname": mysql_config['NAME'],
 20 |     "user": mysql_config['USER'],
 21 |     "password": mysql_config['PASSWORD']
 22 | }
 23 | 
 24 | # dbhost = {
 25 | #     "host": "192.168.1.67",
 26 | #     "dbname": "silumz",
 27 | #     "user": "silumz",
 28 | #     "password": "fendou2009"
 29 | # }
 30 | 
 31 | base_url="http://www.nvshenge.com/mntp/"
 32 | img_path='/static/images/'
 33 | 
 34 | class Spider():
 35 |     page_url_list = []
 36 |     img_url_list = []
 37 |     rlock = threading.RLock()
 38 |     proxy_dict = ""
 39 |     def __init__(self, start_page_num, end_page_num,img_path, thread_num, type="home"):
 40 |         self.start_page_num = start_page_num
 41 |         self.end_page_num=end_page_num
 42 |         self.img_path = img_path
 43 |         self.thread_num = thread_num
 44 |         self.type = type
 45 | 
 46 |     def get_url(self):
 47 |         for i in range(self.start_page_num -1, self.end_page_num -1):
 48 |             if i==0:
 49 |                 page=s.get(base_url, verify=False).text
 50 |             else:
 51 |                 page = s.get(base_url + "list_"+str(i)+".html", verify=False).text
 52 |             soup = BeautifulSoup(page, "html.parser")
 53 |             all_list = soup.find_all("a", class_="PicTxt")
 54 |             i = 0
 55 |             for info_soup in all_list:
 56 |                 url=info_soup.get("href")
 57 |                 title=info_soup.text
 58 |                 self.page_url_list.append({"url":url,"title":title})
 59 |                 i += 1
 60 | 
 61 | 
 62 |     def get_img(self):
 63 |         db = pymysql.connect(dbhost.get("host"), dbhost.get("user"), dbhost.get("password"),
 64 |                              dbhost.get("dbname"))
 65 |         cursor = db.cursor()
 66 |         while True:
 67 |             self.rlock.acquire()
 68 |             if len(self.page_url_list) == 0:
 69 |                 self.rlock.release()
 70 |                 break
 71 |             else:
 72 |                 page_info= self.page_url_list.pop()
 73 |                 page_url = page_info.get("url")
 74 |                 title = page_info.get("title")
 75 |                 if "袜" in title or "丝" in title or "腿" in title:
 76 |                     type_id = 2
 77 |                 elif "青春" in title or "清纯" in title:
 78 |                     type_id = 3
 79 |                 elif "萝莉" in title:
 80 |                     type_id = 4
 81 |                 else:
 82 |                     type_id = 1
 83 |                 self.rlock.release()
 84 |                 try:
 85 |                     tagidlist = []
 86 |                     page = s.get(page_url, verify=False).text
 87 |                     soup = BeautifulSoup(page, "html.parser")
 88 |                     img_num_soup = soup.find("div", class_="articleTop yh").find("h1").text
 89 |                     img_num = int(img_num_soup[img_num_soup.find("(1/") + 3:img_num_soup.find("）")])
 90 |                     isExists = cursor.execute(
 91 |                         "SELECT title FROM images_page WHERE title =" + "'" + title + "'" + " limit 1;")
 92 |                     if isExists != 0:
 93 |                         print("已采集：" + title)
 94 |                     else:
 95 |                         taglist = soup.find("div",class_="articleTag l").find_all("dd")
 96 |                         for tag_soup in taglist:
 97 |                             tag=tag_soup.text
 98 |                             sqltag = "SELECT * FROM images_tag WHERE tag =" + "'" + tag + "'" + " limit 1;"
 99 |                             isExiststag = cursor.execute(sqltag)
100 |                             if isExiststag == 0:
101 |                                 cursor.execute("INSERT INTO images_tag (tag) VALUES (%s)", tag)
102 |                             cursor.execute("SELECT id FROM images_tag WHERE tag =" + "'" + tag + "'")
103 |                             for id in cursor.fetchall():
104 |                                 tagidlist.append(id[0])
105 |                         p = (
106 |                         title, str(tagidlist), time.strftime('%Y-%m-%d', time.localtime(time.time())), type_id,
107 |                         "1", page_url)
108 |                         cursor.execute(
109 |                             "INSERT INTO images_page (title,tagid,sendtime,typeid,firstimg,crawler) VALUES (%s,%s,%s,%s,%s,%s)",
110 |                             p)
111 |                         print("开始采集：" + title)
112 |                         pageid = cursor.lastrowid
113 |                         for i in range(0, int(img_num)):
114 |                             img_id = page_url.split("/")[-1].split(".")[0]
115 |                             if i==0:
116 |                                 url=page_url
117 |                             else:
118 |                                 url = "/".join(page_url.split("/")[0:-1])+"/"+img_id+"_"+str(i)+".html"
119 |                             img_page=s.get(url, verify=False).text
120 |                             img_soup= BeautifulSoup(img_page, "html.parser")
121 |                             img_src=img_soup.find("div",id="ArticlePicBox1").find("img").get("src")
122 |                             img_loc_path = self.img_path + time.strftime('%Y%m%d', time.localtime(
123 |                                 time.time())) + "/"+img_id+"/"+img_src.split("/")[-1]
124 |                             if i == 0:
125 |                                 cursor.execute(
126 |                                     "UPDATE images_page SET firstimg = " + "'" + img_loc_path + "'" + " WHERE title=" + "'" + title + "'")
127 |                             imgp = pageid, img_loc_path, img_src
128 |                             cursor.execute("INSERT INTO images_image (pageid,imageurl,originurl) VALUES (%s,%s,%s)",
129 |                                            imgp)
130 |                             self.img_url_list.append({"url": img_src, "path": img_loc_path, "referer": page_url})
131 |                 except Exception as e:
132 |                     cursor.execute("Delete FROM images_page WHERE title=" + "'" + title + "'")
133 |                     print("采集失败(已删除)：",title)
134 |                     print("连接地址：", page_url)
135 |                     print("错误信息：", e)
136 |         db.close()
137 | 
138 |     def down_img(self, imgsrc, imgpath, referer):
139 |         headers = {
140 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
141 |             "Referer": referer
142 |         }
143 |         isdata = os.path.exists(".." +"/".join(imgpath.split("/")[0:-1]))
144 |         if not isdata:
145 |             os.makedirs(".."+"/".join(imgpath.split("/")[0:-1]))
146 |         with open(".."+ imgpath, "wb")as f:
147 |             f.write(requests.get(imgsrc, headers=headers, verify=False).content)
148 |             print("下载图片：" + imgpath)
149 | 
150 |     def down_url(self):
151 |         while True:
152 |             Spider.rlock.acquire()
153 |             if len(Spider.img_url_list) == 0:
154 |                 Spider.rlock.release()
155 |                 break
156 |             else:
157 |                 img_url = Spider.img_url_list.pop()
158 |                 Spider.rlock.release()
159 |                 try:
160 |                     url = img_url.get("url")
161 |                     path = img_url.get("path")
162 |                     referer = img_url.get("referer")
163 |                     self.down_img(url, path, referer)
164 |                 except Exception as e:
165 |                     print(e)
166 |                     self.img_url_list.append(
167 |                         {"url": img_url.get("url"), "path": img_url.get("path"), "referer": img_url.get("referer")})
168 |                     pass
169 | 
170 |     def run_1(self):
171 |         # 启动thread_num个进程来爬去具体的img url 链接
172 |         url_threa_list = []
173 |         for th in range(self.thread_num):
174 |             add_pic_t = threading.Thread(target=self.get_img)
175 |             url_threa_list.append(add_pic_t)
176 | 
177 |         for t in url_threa_list:
178 |             t.setDaemon(True)
179 |             t.start()
180 | 
181 |         for t in url_threa_list:
182 |             t.join()
183 | 
184 |     def run_2(self):
185 |         # 启动thread_num个来下载图片
186 |         for img_th in range(self.thread_num):
187 |             download_t = threading.Thread(target=self.down_url)
188 |             download_t.start()
189 | 
190 | 
191 | # start_page是采集开始也，end是采集结束页，type不用修改，自动分类，起始页为1
192 | if __name__ == "__main__":
193 |     for i in [{"start_page": 1,"end_page":2, "type": "index"}]:
194 |         spider = Spider(start_page_num=i.get("start_page"),end_page_num=i.get("end_page"), img_path='/static/images/', thread_num=10,
195 |                         type=i.get("type"))
196 |         spider.get_url()
197 |         spider.run_1()
198 |         spider.run_2()


--------------------------------------------------------------------------------
/static/zde/css/fonts/iconfont.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
 3 | <svg xmlns="http://www.w3.org/2000/svg">
 4 | <metadata>
 5 | Created by FontForge Q2wB39 20120731 at Fri Nov 18 13:25:10 2016
 6 |  By admin
 7 | </metadata>
 8 | <defs>
 9 | <font id="iconfont" horiz-adv-x="1024" >
10 |   <font-face 
11 |     font-family="iconfont"
12 |     font-weight="500"
13 |     font-stretch="normal"
14 |     units-per-em="1024"
15 |     panose-1="2 0 6 3 0 0 0 0 0 0"
16 |     ascent="896"
17 |     descent="-128"
18 |     x-height="792"
19 |     bbox="20 -161 1004 867"
20 |     underline-thickness="0"
21 |     underline-position="0"
22 |     unicode-range="U+0078-E615"
23 |   />
24 | <missing-glyph 
25 |  />
26 |     <glyph glyph-name=".notdef" 
27 |  />
28 |     <glyph glyph-name=".notdef" 
29 |  />
30 |     <glyph glyph-name=".null" horiz-adv-x="0" 
31 |  />
32 |     <glyph glyph-name="nonmarkingreturn" horiz-adv-x="341" 
33 |  />
34 |     <glyph glyph-name="x" unicode="x" horiz-adv-x="1001" 
35 | d="M281 543q-27 -1 -53 -1h-83q-18 0 -36.5 -6t-32.5 -18.5t-23 -32t-9 -45.5v-76h912v41q0 16 -0.5 30t-0.5 18q0 13 -5 29t-17 29.5t-31.5 22.5t-49.5 9h-133v-97h-438v97zM955 310v-52q0 -23 0.5 -52t0.5 -58t-10.5 -47.5t-26 -30t-33 -16t-31.5 -4.5q-14 -1 -29.5 -0.5
36 | t-29.5 0.5h-32l-45 128h-439l-44 -128h-29h-34q-20 0 -45 1q-25 0 -41 9.5t-25.5 23t-13.5 29.5t-4 30v167h911zM163 247q-12 0 -21 -8.5t-9 -21.5t9 -21.5t21 -8.5q13 0 22 8.5t9 21.5t-9 21.5t-22 8.5zM316 123q-8 -26 -14 -48q-5 -19 -10.5 -37t-7.5 -25t-3 -15t1 -14.5
37 | t9.5 -10.5t21.5 -4h37h67h81h80h64h36q23 0 34 12t2 38q-5 13 -9.5 30.5t-9.5 34.5q-5 19 -11 39h-368zM336 498v228q0 11 2.5 23t10 21.5t20.5 15.5t34 6h188q31 0 51.5 -14.5t20.5 -52.5v-227h-327z" />
38 |     <glyph glyph-name="sousuo1" unicode="&#xe600;" horiz-adv-x="1000" 
39 | d="M734 156q-14 -19 -33 -37l220 -216l36 34zM812 389q0 -104 -49 -190.5t-133.5 -135t-185.5 -48.5q-105 2 -189.5 52.5t-131.5 135.5t-47 186q0 97 49.5 180.5t134.5 132.5t184 49t184.5 -49.5t134.5 -133t49 -179.5zM442 702q-131 0 -224 -93t-93 -226t93 -226.5
40 | t224 -93.5q86 0 160 43t117.5 116.5t43.5 160.5t-43.5 160.5t-117.5 116t-160 42.5z" />
41 |     <glyph glyph-name="tengxunkongjian" unicode="&#xe607;" 
42 | d="M960 455q0 6 -0.5 6.5t-3.5 0.5h-6h-99h-169l-39 71l-110 210h-19l-100 -210l-39 -71h-169h-142v-5l56 -39l157 -139l-36 -318q0 -8 3.5 -9t18.5 -1l259 159l259 -159q8 0 10.5 1t9.5 9l-49 318l149 127zM305 150l288 235l-268 46h407l-248 -225l258 -56h-437z" />
43 |     <glyph glyph-name="weibo" unicode="&#xe608;" 
44 | d="M743 310q-17 4 -21.5 10.5t-1.5 11.5l4 6q1 2 3.5 6.5t6 17t4.5 25t-4 28t-18 27.5q-31 26 -72 22.5t-77.5 -15.5t-36.5 -14q-10 -3 -17.5 -4.5t-11.5 0t-6 3.5t-1.5 8t1 10t3 13.5t3.5 15.5q0 16 -2.5 30t-12 29.5t-25 23.5t-44 7.5t-66.5 -13.5q-37 -13 -76.5 -39
45 | t-69 -54t-54 -54t-36.5 -43l-13 -17q-22 -30 -37.5 -60.5t-21.5 -52.5t-8.5 -40t-2.5 -27l1 -9q6 -53 31.5 -94.5t61 -67.5t84 -44t93 -26.5t94.5 -12.5q81 -6 168 13.5t162 66.5t106 113q19 39 19 73t-12 55.5t-31 38t-36 24t-30 9.5zM408 -31q-118 -6 -201 46t-83 131
46 | q0 78 82.5 134.5t201.5 61.5q120 6 202.5 -39.5t82.5 -122.5q0 -80 -84 -143t-201 -68zM377 273q-32 -4 -57.5 -15t-39.5 -26t-24 -32.5t-13.5 -33.5t-5 -30t-1 -22l0.5 -8v-5t2.5 -12.5t6 -18t12.5 -18.5t22 -17q71 -35 134 -21t101 64q15 18 20 45.5t-2.5 55.5t-24.5 51.5
47 | t-51.5 35.5t-79.5 7zM337 64.5q-22 -2.5 -37.5 9t-15.5 29.5q0 19 14.5 35t35.5 19q25 2 40.5 -10t15.5 -31t-15.5 -34t-37.5 -17.5zM430.5 144.5q-7.5 -5.5 -15.5 -5t-12 7.5q-5 6 -3 15.5t9 14.5q9 6 17 5t12 -7.5t2 -15.5t-9.5 -14.5zM820 375q9 0 16.5 6t8.5 15v3
48 | q4 36 -1.5 63t-18 42t-28 25t-32 12.5t-30.5 3.5t-23 -1l-9 -1q-10 0 -18 -7.5t-8 -18.5t8 -18.5t18 -7.5q86 19 93 -55q1 -19 -2 -36q0 -10 7.5 -17.5t18.5 -7.5zM801 678q-55 13 -129 -1q-1 0 -2 -1t-2 -1h-1q-12 -4 -19.5 -14t-7.5 -23q0 -15 11 -26.5t26 -11.5
49 | q21 3 34 8q6 1 19.5 2.5t36 -4.5t45 -14.5t46.5 -28.5t40 -47q29 -65 12 -126q-8 -18 -8 -36q0 -15 11 -24.5t27 -9.5t24.5 6t11.5 26v0q20 61 15.5 110.5t-21.5 86.5t-46.5 63.5t-60 42.5t-62.5 23z" />
50 |     <glyph glyph-name="video" unicode="&#xe615;" 
51 | d="M393 737q94 25 196.5 7.5t180.5 -75.5q60 -41 103.5 -101.5t64.5 -129t21 -143t-24 -142.5q-30 -87 -94.5 -157t-149.5 -106q-71 -32 -151.5 -36.5t-155 16.5t-140 67.5t-108.5 111.5q-55 80 -70.5 182.5t11.5 196.5q31 112 117.5 196t198.5 113zM498 709
52 | q-97 -2 -184.5 -51t-143.5 -132.5t-65 -179.5q-11 -79 14 -158t74 -140t121 -102.5t152 -48.5q80 -11 160 15t141.5 77t102 125t45.5 155q9 85 -22 168t-89 144t-139.5 96.5t-166.5 31.5zM387 515q237 -141 352 -214q-55 -34 -172 -104t-176 -108q-4 62 -4.5 134.5
53 | t0.5 156.5t0 135z" />
54 |     <glyph glyph-name="qq" unicode="&#xe609;" 
55 | d="M116 218q-25 -59 -28.5 -108.5t16.5 -59.5q14 -7 34.5 7.5t40.5 45.5q16 -66 76 -116q-31 -12 -48.5 -29t-17.5 -38q0 -33 46 -57t112 -24q58 0 102.5 19.5t53.5 48.5h9h10q9 -29 53 -48.5t103 -19.5q65 0 111 24t46 57q0 21 -17.5 38t-48.5 29q60 50 76 116
56 | q21 -31 41 -45.5t34 -7.5q20 10 16.5 59.5t-28.5 108.5q-20 46 -44.5 78t-45.5 40q1 6 1 12q0 36 -18 64v4q0 17 -7 31q-5 125 -81.5 207t-200.5 82t-200.5 -82t-81.5 -207q-7 -14 -7 -31v-4q-18 -28 -18 -64q0 -6 1 -12q-21 -8 -45.5 -40t-44.5 -78v0z" />
57 |     <glyph glyph-name="renren" unicode="&#xe60a;" 
58 | d="M439 748v-286q0 -149 -74.5 -271t-195.5 -181q-107 127 -107 292q0 110 49.5 206.5t135.5 158.5q87 64 192 81v0zM731 -94q-101 -56 -217 -56t-217 56q82 51 139.5 124t77.5 158q20 -85 78 -158t139 -124v0zM969 302q0 -165 -107 -292q-120 58 -195 181q-74 122 -74 271
59 | v286q105 -18 191 -81t135.5 -159t49.5 -206v0zM969 302z" />
60 |     <glyph glyph-name="douban" unicode="&#xe60b;" 
61 | d="M90 727h845v-95h-845v95zM861 160v373h-698v-373h698zM264 437h497v-181h-497v181zM710 -54q46 71 86 162l-102 37q-40 -109 -92 -199h-177q-44 117 -99 199l-94 -37q58 -87 94 -162h-260v-94h893v94h-26h-62.5h-73h-61.5h-26z" />
62 |     <glyph glyph-name="tengxunweibo" unicode="&#xe60c;" 
63 | d="M285.5 676q-109.5 0 -187.5 -76.5t-78 -184.5q0 -63 30 -119q4 -8 13 -11t18 1q9 5 12 14t-2 17q-24 46 -24 98q0 89 64 152t154 63t154 -63t64 -151.5t-64 -151.5t-154 -63q-25 0 -49 5q-9 2 -17.5 -3t-10.5 -14q-2 -10 3 -18t15 -10q29 -6 59 -6q110 0 187.5 76.5
64 | t77.5 184t-77.5 184t-187 76.5zM239 349q23 -15 50 -15q37 0 62.5 25t25.5 61t-25.5 61.5t-62 25.5t-62.5 -25.5t-26 -61.5q0 -18 8 -34q-32 -34 -60 -74q-50 -74 -81.5 -185.5t-19.5 -245.5q1 -11 8 -23t15 -12h3q8 0 15 14t6 25q-21 225 92 392q40 60 52 72zM777 485
65 | q-29 -2 -57 11q-46 22 -63 69.5t5 92.5q22 46 70 63t94 -5t63 -69.5t-5 -92.5q-6 -13 -14.5 -23.5t2.5 -18.5t19 2q9 12 18 28q26 56 5.5 113.5t-76.5 84t-115 6t-85.5 -76t-5.5 -113t77 -84.5q32 -15 69 -14q5 0 9 4t3.5 9.5t-4.5 9.5t-9 4zM995 384q-119 44 -178 141
66 | q-12 19 -21 42q14 7 20 21q9 18 2 37.5t-25.5 28t-38 2t-28 -25t-1.5 -37.5t25 -28q11 -5 20 -5q10 -25 23 -48q64 -106 193 -152q11 -4 16 6l0.5 0.5l0.5 0.5q4 13 -8 17z" />
67 |     <glyph glyph-name="zan" unicode="&#xe60d;" 
68 | d="M853 -31q0 -38 -26.5 -64.5t-64.5 -26.5h-364v0h-113h-1h-159v501h159q2 0 3 -0.5t3 -0.5h3q65 188 85 233q18 43 37.5 61.5t50.5 24.5q17 4 43 -6.5t52 -35.5t37 -66.5t-2 -100.5q-5 -17 -16 -64h251q35 0 51.5 -25t16.5 -66l-46 -364v0zM262 333h-91v-410l113 1v405
69 | l1 3v0q-2 1 -23 1zM808 379h-285l35 136q12 55 -17 94.5t-52 42.5q-14 1 -35 -9t-33 -37q-13 -30 -92 -248l0.5 -1l0.5 -1v-432l432 -1q19 0 32.5 13.5t13.5 32.5l45 364q0 19 -13 32.5t-32 13.5z" />
70 |     <glyph glyph-name="weixin" unicode="&#xe60e;" 
71 | d="M684 448q14 0 32 -2q-22 99 -115.5 164t-214.5 65q-88 0 -163 -36.5t-119 -100.5t-44 -140q0 -129 131 -221l-33 -98l114 58q84 -17 114 -17q17 0 31 2q-10 33 -10 68q0 107 80.5 182.5t196.5 75.5zM509 536q18 0 29 -11t11 -29.5t-11 -29.5t-29 -11q-20 0 -35 11.5
72 | t-15 29.5q1 17 16 28.5t34 11.5zM280 455q-19 0 -34 11.5t-15 29t15 29t34 11.5t30 -11t11 -29q0 -19 -11 -30t-30 -11zM982 194q0 97 -83 166.5t-194 69.5q-116 0 -197 -69t-81 -167q0 -99 81 -168t197 -69q31 0 98 16l89 -49l-24 82q114 86 114 188zM615 234
73 | q-12 0 -22.5 10.5t-10.5 22.5t10.5 22t22.5 10q17 0 29 -9.5t12 -22.5t-12 -23t-29 -10zM794 234q-12 0 -22 10.5t-10 22.5t10 22t22 10q18 0 29.5 -9.5t11.5 -22.5t-11.5 -23t-29.5 -10z" />
74 |     <glyph glyph-name="qian" unicode="&#xe601;" 
75 | d="M761 637h-93l-105 -193q-42 -79 -52 -105h-2q-27 63 -156 298h-99l184 -316h-151v-69h177v-97h-177v-70h177v-140h87v140h171v70h-171v97h171v69h-146z" />
76 |     <glyph glyph-name="pinglun" unicode="&#xe602;" 
77 | d="M806 103v0q-156 -120 -294 -225q-24 -18 -50.5 -22.5t-44 5.5t-17.5 31v156q-91 0 -168.5 46.5t-122.5 127t-45 175.5t45 175.5t122.5 127.5t168.5 47h224q91 0 168.5 -47t122.5 -127.5t45 -175.5q0 -91 -42 -168.5t-112 -125.5zM610 691h-196q-122 0 -208 -86t-86 -208
78 | t86 -208t208 -86h42v-142q0 -14 7 -20.5t14 -5.5h7l280 210h-3q65 39 104 106t39 146q0 122 -86 208t-208 86v0z" />
79 |     <glyph glyph-name="sanheng" unicode="&#xe605;" 
80 | d="M942 271h-860q-12 0 -20.5 8.5t-8.5 20.5t8.5 20.5t20.5 8.5h860q12 0 20.5 -8.5t8.5 -20.5t-8.5 -20.5t-20.5 -8.5v0zM942 271h-860q-12 0 -20.5 8.5t-8.5 20.5t8.5 20.5t20.5 8.5h860q12 0 20.5 -8.5t8.5 -20.5t-8.5 -20.5t-20.5 -8.5v0zM942 -52h-860q-12 0 -20.5 8.5
81 | t-8.5 21t8.5 21t20.5 8.5h860q12 0 20.5 -8.5t8.5 -21t-8.5 -21t-20.5 -8.5v0zM942 -52h-860q-12 0 -20.5 8.5t-8.5 21t8.5 21t20.5 8.5h860q12 0 20.5 -8.5t8.5 -21t-8.5 -21t-20.5 -8.5v0zM942 593h-860q-12 0 -20.5 8.5t-8.5 21t8.5 21t20.5 8.5h860q12 0 20.5 -8.5
82 | t8.5 -21t-8.5 -21t-20.5 -8.5v0zM942 593h-860q-12 0 -20.5 8.5t-8.5 21t8.5 21t20.5 8.5h860q12 0 20.5 -8.5t8.5 -21t-8.5 -21t-20.5 -8.5v0z" />
83 |     <glyph glyph-name="guanbi" unicode="&#xe606;" 
84 | d="M555 384l299 299q9 9 9 22t-9 21q-9 9 -22 9t-21 -9l-299 -299l-299 299q-9 9 -22 9t-21 -9q-9 -9 -9 -22t9 -21l299 -299l-299 -299q-9 -9 -9 -22t9 -21q9 -9 21.5 -9t21.5 9l299 299l299 -299q9 -9 21.5 -9t21.5 9t9 22t-9 21z" />
85 |     <glyph glyph-name="jiantou-copy" unicode="&#xe610;" 
86 | d="M630 867q9 0 17 -6q9 -7 11 -18.5t-5 -21.5l-297 -418l297 -417q7 -10 5 -21.5t-11.5 -18.5t-21.5 -5t-19 11l-309 435q-12 16 0 33l309 434q9 13 24 13z" />
87 |     <glyph glyph-name="icon1460281299377" unicode="&#xe604;" 
88 | d="M966 122q0 -11 -8.5 -19.5t-20 -8.5t-19.5 9l-402 401l-402 -401q-8 -9 -19.5 -9t-20 8.5t-8.5 20t9 19.5l421 422q8 8 20 8t20 -8l422 -422q8 -8 8 -20v0v0z" />
89 |     <glyph glyph-name="jiantou-copy-copy" unicode="&#xe603;" 
90 | d="M394 -77q-9 0 -17 5q-9 7 -11 19t5 22l297 417l-297 417q-7 10 -5 22t11.5 19t21.5 5t19 -12l309 -434q12 -17 0 -34l-309 -434q-9 -12 -24 -12z" />
91 |   </font>
92 | </defs></svg>
93 | 


--------------------------------------------------------------------------------
/images/views.py:
--------------------------------------------------------------------------------
  1 | from django.shortcuts import render
  2 | from images.models import *
  3 | import random, json
  4 | from django.http import HttpResponse
  5 | from config import site_name, site_url, key_word, description, email,friendly_link
  6 | 
  7 | 
  8 | 
  9 | def index(request):
 10 |     if request.method == "GET":
 11 |         imgs = []
 12 |         page_list = Page.objects.all().order_by('?')[:50]
 13 |         typedict, typelist = type_list()
 14 |         for pid in page_list:
 15 |             id = pid.id
 16 |             title = pid.title
 17 |             firstimg = pid.firstimg
 18 |             sendtime = pid.sendtime
 19 |             hot = pid.hot
 20 |             type_id = pid.typeid
 21 |             imgs.append({"pid": id, "firstimg": firstimg, "title": title, "sendtime": sendtime, "hot": hot,
 22 |                          "type": typedict[type_id], "type_id": type_id})
 23 |         return render(request, 'index.html',
 24 |                       {"data": imgs, "typelist": typelist, "siteName": site_name, "keyWord": key_word,
 25 |                        "description": description, "siteUrl": site_url, "email": email})
 26 | 
 27 | 
 28 | def page(request, i_id):
 29 |     # try:
 30 |     page_arr = Page.objects.get(id=i_id)
 31 |     imgs = []
 32 |     tags = []
 33 |     typedict, typelist = type_list()
 34 |     page_hot = page_arr.hot
 35 |     page_arr.hot = page_hot + 1
 36 |     page_arr.save()
 37 |     time = page_arr.sendtime
 38 |     typeid = page_arr.typeid
 39 |     pagetype = Type.objects.get(id=typeid).type
 40 |     title = page_arr.title
 41 |     taglist = page_arr.tagid
 42 |     tag_arr = taglist.replace("[", "").replace("]", "").split(",")
 43 |     for t_id in tag_arr:
 44 |         tagid = t_id.strip(" ")
 45 |         tag = Tag.objects.get(id=tagid).tag
 46 |         tags.append({"tname": tag, "tid": tagid})
 47 |     imglist = Image.objects.filter(pageid=i_id)
 48 |     for img_arr in imglist:
 49 |         img = img_arr.imageurl
 50 |         imgs.append(img)
 51 |     if len(tags) > 4:
 52 |         tags = random.sample(tags, 4)
 53 |     typename = typedict[typeid]
 54 |     return render(request, 'page.html',
 55 |                   {"data": imgs, "tag": tags, "title": title, "type": pagetype, "typeid": str(typeid), "time": time,
 56 |                    "similar": page_similar(typeid), "typelist": typelist, "pageid": i_id, "siteName": site_name,
 57 |                    "keyWord": key_word, "description": description, "typeName": typename, "siteUrl": site_url,
 58 |                    "email": email,"friendly_link":friendly_link})
 59 | 
 60 | 
 61 | def tag(request, tid):
 62 |     if request.method == "GET":
 63 |         imgs = []
 64 |         page_list = Page.objects.all().order_by("-id")
 65 |         typedict, typelist = type_list()
 66 |         for pid in page_list:
 67 |             if tid in pid.tagid:
 68 |                 id = pid.id
 69 |                 title = pid.title
 70 |                 firstimg = pid.firstimg
 71 |                 type_id = pid.typeid
 72 |                 sendtime = pid.sendtime
 73 |                 hot = pid.hot
 74 |                 imgs.append({"pid": id, "firstimg": firstimg, "title": title, "sendtime": sendtime, "hot": hot,
 75 |                              "type": typedict[type_id], "type_id": type_id})
 76 |         return render(request, 'index.html',
 77 |                       {"data": imgs, "typelist": typelist, "siteName": site_name, "keyWord": key_word,
 78 |                        "description": description, "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
 79 | 
 80 | 
 81 | def type(request, typeid):
 82 |     if request.method == "GET":
 83 |         imgs = []
 84 |         typedict, typelist = type_list()
 85 |         page_list = Page.objects.filter(typeid=typeid).order_by("-id")
 86 |         for pid in page_list:
 87 |             title = pid.title
 88 |             firstimg = pid.firstimg
 89 |             id = pid.id
 90 |             hot = pid.hot
 91 |             type_id = pid.typeid
 92 |             sendtime = pid.sendtime
 93 |             imgs.append({"pid": id, "firstimg": firstimg, "title": title, "sendtime": sendtime, "hot": hot,
 94 |                          "type": typedict[type_id], "type_id": type_id})
 95 |         return render(request, 'category.html',
 96 |                       {"data": imgs, "typelist": typelist, "typeid": str(typeid), "siteName": site_name,
 97 |                        "keyWord": key_word, "description": description, "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
 98 | 
 99 | 
100 | def page_similar(id):
101 |     similarlist = []
102 |     sidlist = Page.objects.filter(typeid=id).order_by("?")
103 |     type = Type.objects.get(id=id).type
104 |     i = 0
105 |     for s in sidlist:
106 |         if i < 20:
107 |             stitle = s.title
108 |             pid = s.id
109 |             tid = s.typeid
110 |             firstimg = s.firstimg
111 |             sendtime = s.sendtime
112 |             hot = s.hot
113 |             if pid != id:
114 |                 similarlist.append(
115 |                     {"stitle": stitle, "tid": tid, "pid": pid, "firstimg": firstimg, "sendtime": sendtime, "hot": hot,
116 |                      "type": type, "type_id": tid
117 |                      })
118 |                 i += 1
119 |     return similarlist
120 | 
121 | 
122 | def search(request):
123 |     if "s" in request.GET:
124 |         imgs = []
125 |         typedict, typelist = type_list()
126 |         context = request.GET['s']
127 |         pagelist = Page.objects.filter(title__contains=context).order_by("-id")
128 |         for pid in pagelist:
129 |             title = pid.title
130 |             firstimg = pid.firstimg
131 |             id = pid.id
132 |             hot = pid.hot
133 |             type_id = pid.typeid
134 |             sendtime = pid.sendtime
135 |             imgs.append({"pid": id, "firstimg": firstimg, "title": title, "sendtime": sendtime, "hot": hot,
136 |                          "type": typedict[type_id], "type_id": type_id})
137 |         return render(request, 'index.html',
138 |                       {"data": imgs, "typelist": typelist, "siteName": site_name, "keyWord": key_word,
139 |                        "description": description, "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
140 | 
141 | 
142 | def HotTag(request):
143 |     tag_sql = Tag.objects.all().order_by("?")
144 |     tag_dict = {}
145 |     tag_id_list = []
146 |     page_sql = Page.objects.all()
147 |     page_dict = {}
148 |     return_list = []
149 |     typedict, typelist = type_list()
150 |     for alltag in tag_sql:
151 |         tag_dict.update({str(alltag.id).strip(): alltag.tag})
152 |     for page in page_sql:
153 |         title = page.title
154 |         pid = page.id
155 |         tag_id = page.tagid.replace("[", "").replace("]", "").split(",")
156 |         for t in tag_id:
157 |             if str(t).strip() == '':
158 |                 pass
159 |             else:
160 |                 if str(t).strip() not in tag_id_list:
161 |                     page_dict.update({str(t).strip(): 1})
162 |                     tag_id_list.append(str(t).strip())
163 |                 else:
164 |                     view = page_dict[str(t).strip()]
165 |                     page_dict.update({str(t).strip(): view + 1})
166 | 
167 |     page_dict_sort = sorted(page_dict.items(), key=lambda d: d[1], reverse=True)
168 |     for i in page_dict_sort:
169 |         if page_dict[str(i[0])] > 20:
170 |             return_list.append(
171 |                 {"tid": i[0], "tag": tag_dict[str(i[0]).strip()], "viwe": page_dict[str(i[0].strip())]}
172 |             )
173 |     return render(request, 'tag.html',
174 |                   {"data": return_list, "typelist": typelist, "keyword": return_list[0:10], "siteName": site_name,
175 |                    "keyWord": key_word, "description": description, "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
176 | 
177 | 
178 | def SortBy(request, method):
179 |     if request.method == "GET":
180 |         if method == "new":
181 |             page_list = Page.objects.all().order_by("-id")[:100]
182 |         else:
183 |             page_list = Page.objects.all().order_by("-hot")[:100]
184 |         imgs = []
185 |         type_dict, typelist = type_list()
186 |         for pid in page_list:
187 |             title = pid.title
188 |             firstimg = pid.firstimg
189 |             id = pid.id
190 |             hot = pid.hot
191 |             type_id = pid.typeid
192 |             sendtime = pid.sendtime
193 |             imgs.append({"pid": id, "firstimg": firstimg, "title": title, "sendtime": sendtime, "hot": hot,
194 |                          "type": type_dict[type_id], "type_id": type_id})
195 | 
196 |         return render(request, 'sort.html',
197 |                       {"data": imgs, "typelist": typelist, "method": method, "siteName": site_name, "keyWord": key_word,
198 |                        "description": description, "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
199 | 
200 | 
201 | def getVideo(request):
202 |     count = Video.objects.count()
203 |     video_info = ''
204 |     while True:
205 |         vid = random.randint(1, count)
206 |         try:
207 |             video_info = Video.objects.get(id=vid)
208 |             break
209 |         except:
210 |             continue
211 |     url = video_info.url
212 |     user_id = video_info.user_id
213 |     source = video_info.source
214 |     return HttpResponse(json.dumps({"url": url, "user_id": user_id, "source": source}))
215 | 
216 | 
217 | def mVideo(request):
218 |     if request.method == "GET":
219 |         count = Video.objects.count()
220 |         video_info = ''
221 |         while True:
222 |             vid = random.randint(1, count)
223 |             try:
224 |                 video_info = Video.objects.get(id=vid)
225 |                 break
226 |             except:
227 |                 continue
228 |         url = "https:"+video_info.url
229 |         return render(request, 'mVideo.html', {
230 |             "url": url,
231 |             "user_id": video_info.user_id,
232 |             "date_time": video_info.date_time,
233 |             "v_name": video_info.v_name,
234 |             "source": video_info.source, "siteName": site_name, "keyWord": key_word, "description": description,
235 |             "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
236 | 
237 | 
238 | def pVideo(request):
239 |     if request.method == "GET":
240 |         typedict, typelist = type_list()
241 |         count = Video.objects.count()
242 |         video_info = ''
243 |         while True:
244 |             vid = random.randint(1, count)
245 |             try:
246 |                 video_info = Video.objects.get(id=vid)
247 |                 break
248 |             except:
249 |                 continue
250 |         url="https:"+video_info.url
251 |         return render(request, 'video.html', {
252 |             "url": url,
253 |             "user_id": video_info.user_id,
254 |             "date_time": video_info.date_time,
255 |             "v_name": video_info.v_name,
256 |             "source": video_info.source,
257 |             "typelist": typelist, "siteName": site_name, "keyWord": key_word, "description": description,
258 |             "siteUrl": site_url, "email": email,"friendly_link":friendly_link})
259 | 
260 | 
261 | def type_list():
262 |     typelist = []
263 |     type_list = Type.objects.all().order_by("id")
264 |     type_dict = {}
265 |     for type_arr in type_list:
266 |         type = type_arr.type
267 |         type_id = type_arr.id
268 |         typelist.append({"type": type, "type_id": str(type_id)})
269 |         type_dict.update({type_id: type})
270 |     return type_dict, typelist
271 | 


--------------------------------------------------------------------------------