├── Makefile.am
├── Makefile.in
├── README.md
├── aclocal.m4
├── aclocal
├── ax_boost.m4
├── ax_libevent.m4
├── ax_log4cplus.m4
├── ax_mongo_client.m4
├── ax_qt5.m4
└── ax_thrift.m4
├── conf.xml.demo
├── config.guess
├── config.sub
├── configure
├── configure.ac
├── create.sql
├── depcomp
├── install-sh
├── log.properties.demo
├── lspider.thrift
├── maindomain.list.demo
├── missing
├── readme.txt
├── src
├── CrawlService.cpp
├── CrawlService.h
├── DoubleList.h
├── atomic.h
├── cmd_ctrler.cpp
├── cmd_ctrler.h
├── conf.cpp
├── conf.h
├── controllable.cpp
├── controllable.h
├── crawl_listen_handler.cpp
├── crawl_listen_handler.h
├── defines.h
├── extractor.cpp
├── extractor.h
├── extractor_worker_view.cpp
├── extractor_worker_view.h
├── http_event_engine.cpp
├── http_event_engine.h
├── http_processor.cpp
├── http_processor.h
├── http_protocol.cpp
├── http_protocol.h
├── keyed_queue.h
├── link.h
├── link_scheduler.cpp
├── link_scheduler.h
├── link_table.cpp
├── link_table.h
├── locked_map.h
├── locked_queue.h
├── logger_container.cpp
├── logger_container.h
├── lspider_client.cpp
├── lspider_constants.cpp
├── lspider_constants.h
├── lspider_types.cpp
├── lspider_types.h
├── lthread.cpp
├── lthread.h
├── main.cpp
├── moc_extractor_worker_view.cpp
├── moc_link_scheduler.cpp
├── moc_mysql_dumper.cpp
├── moc_mysql_selector.cpp
├── mongo_dumper.cpp
├── mongo_dumper.h
├── mutex_lock.h
├── mysql_base.cpp
├── mysql_base.h
├── mysql_dumper.cpp
├── mysql_dumper.h
├── mysql_selector.cpp
├── mysql_selector.h
├── parse_url.cpp
├── request_recv.cpp
├── request_recv.h
├── singleton.h
├── synced_queue.h
├── url_context.cpp
├── url_context.h
├── url_tools.cpp
├── url_tools.h
├── util.cpp
└── util.h
├── stamp-h1
└── test
├── Makefile
├── test_backtrace.cpp
├── test_bson.cpp
├── test_fetchmaindomain.cpp
├── test_keyedqueue.cpp
├── test_log.cpp
├── test_mongo.cpp
├── test_mysql.cpp
├── test_priorityqueue.cpp
├── test_qtextcodec.cpp
├── test_qweb.cpp
├── test_urlnormalize.cpp
└── test_util.cpp
/Makefile.am:
--------------------------------------------------------------------------------
1 | AUTOMAKE_OPTIONS=foreign
2 |
3 | bin_PROGRAMS=lspider lspider_client parse_url
4 |
5 | lspider_SOURCES=src/main.cpp src/conf.cpp src/extractor_worker_view.cpp src/lspider_constants.cpp src/logger_container.cpp src/moc_link_scheduler.cpp src/mysql_base.cpp src/url_context.cpp \
6 | src/crawl_listen_handler.cpp src/http_event_engine.cpp src/lspider_types.cpp src/lthread.cpp src/moc_mysql_dumper.cpp src/mysql_dumper.cpp src/url_tools.cpp \
7 | src/CrawlService.cpp src/http_processor.cpp src/link_scheduler.cpp src/moc_mysql_selector.cpp src/mysql_selector.cpp src/util.cpp \
8 | src/extractor.cpp src/http_protocol.cpp src/link_table.cpp src/moc_extractor_worker_view.cpp src/mongo_dumper.cpp src/request_recv.cpp \
9 | src/cmd_ctrler.cpp src/controllable.cpp
10 | lspider_CPPFLAGS=$(QT_CPPFLAGS) $(MONGO_CLIENT_CPPFLAGS) \
11 | $(BOOST_CPPFLAGS) $(LOG4CPLUS_CPPFLAGS) $(LIBEVENT_CPPFLAGS) \
12 | $(QT_CPPFLAGS)/QtWebKit $(THRIFT_CPPFLAGS) -fPIC
13 |
14 | lspider_client_SOURCES=src/lspider_client.cpp src/lspider_constants.cpp src/CrawlService.cpp src/lspider_types.cpp src/url_context.cpp src/logger_container.cpp src/url_tools.cpp src/util.cpp
15 | lspider_client_CPPFLAGS=$(QT_CPPFLAGS) $(MONGO_CLIENT_CPPFLAGS) \
16 | $(BOOST_CPPFLAGS) $(LOG4CPLUS_CPPFLAGS) $(LIBEVENT_CPPFLAGS) \
17 | $(QT_CPPFLAGS)/QtWebKit $(THRIFT_CPPFLAGS) -fPIC
18 |
19 | parse_url_SOURCES=src/parse_url.cpp src/url_tools.cpp
20 |
21 | LIBS=$(QT_LDFLAGS)/ -lQt5Core -lQt5Widgets -lQt5WebKit -lQt5WebKitWidgets -lQt5Network -lQt5Sql \
22 | $(MONGO_CLIENT_LDFLAGS) -lmongoclient $(LIBEVENT_LDFLAGS) -levent -levent_core -levent_extra -levent_pthreads \
23 | $(BOOST_LDFLAGS) -lboost_system -lboost_thread-mt -lboost_filesystem -lboost_program_options -lboost_regex $(LOG4CPLUS_LDFLAGS) -llog4cplus \
24 | $(THRIFT_LDFLAGS) -lthrift -lz -lpthread -lreadline
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | lspider
2 | =====
3 | Linux下的轻量级网页抓取系统
4 |
5 | [下载地址](https://github.com/warmheartli/lspider/archive/master.zip)
6 |
7 | ### 功能特性
8 | - 将整套spider系统功能浓缩到一个二进制中,部署轻便
9 | - 多线程和并发网络通信,性能高,资源利用充分,单机可同时抓取数百站点并能保持cpu低占用
10 | - 简易配置,指定待抓取站点列表和高质量种子链接即可自适应抓取扩散至整站,参考链接深度的广度优先遍历同时保证时新性
11 | - 通过配置不同站点ip的抓取压力,全异步的压力控制和链接调度,优雅抓取,避免封禁
12 | - 基于WebKit的可定制的页面渲染和链接提取,便于性能和效果之间的权衡
13 | - mysql作为链接存储介质,更高效;mongo作为网页存储介质,便于字段扩展和快速查询
14 | - 天然支持分布式部署(基于站点划分),可横向扩展
15 |
16 | ### 安装依赖
17 | - libevent (>=2.0.22)
18 | - thrift (>=0.9.2)
19 | - qt (>=4.8.6)
20 | - mongo-cxx-driver-legacy (>=1.0.2)
21 | - log4cplus (>=1.2.0)
22 | - boost (>=1.58.0)
23 |
24 | ### 安装方法(RHEL6)
25 |
26 | ===================================================================================================================
27 |
28 | ## 一、安装依赖库(包括libevent、boost、mongo-client、qt、thrift)
29 |
30 | ### 1、安装libevent
31 |
32 | 在http://libevent.org/ 中找最新版(如2.0.22)
33 |
34 | `wget https://sourceforge.net/projects/levent/files/libevent/libevent-2.0/libevent-2.0.22-stable.tar.gz`
35 |
36 | 解压后执行
37 |
38 | `./configure --prefix=${HOME}/libevent`
39 |
40 | `make`
41 |
42 | `make install`
43 |
44 | ### 2、安装boost
45 |
46 | `yum install boost-devel`
47 |
48 | ### 3、安装scons(因为下面编译mongoclient要用到)
49 |
50 | `yum install scons`
51 |
52 | ### 4、安装mongo-client
53 |
54 | 在https://github.com/mongodb/mongo-cxx-driver/releases/ 中找最新版(如1.0.2)
55 |
56 | `wget https://github.com/mongodb/mongo-cxx-driver/archive/legacy-1.0.2.tar.gz`
57 |
58 | 解压后执行
59 |
60 | `cd mongo-cxx-driver-legacy-1.0.2`
61 |
62 | `scons --prefix=$HOME/mongo-client install`
63 |
64 | ### 5、安装log4cplus
65 |
66 | `yum install log4cplus-devel`
67 |
68 | ### 6、安装qt5
69 |
70 | `yum install qt5-qtbase-devel`
71 |
72 | `yum install qt5-qtwebkit-devel`
73 |
74 | ### 7、安装bison(后面编译安装thrift会用到)
75 |
76 | 从http://ftp.gnu.org/gnu/bison/bison-2.7.tar.gz 下载2.5以上版本的bison,解压后执行
77 |
78 | `cd bison-2.7`
79 |
80 | `./configure`
81 |
82 | `make`
83 |
84 | `make install`
85 |
86 | ### 8、安装thrift
87 |
88 | 从http://thrift.apache.org/download 下载最新版thrift的源代码(如:http://apache.fayea.com/thrift/0.9.0/thrift-0.9.0.tar.gz),解压后执行
89 |
90 | `cd thrift-0.9.0`
91 |
92 | `./configure --prefix=${HOME}/thrift --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-ruby --without-haskell --without-go --without-d`
93 |
94 | `make`
95 |
96 | `make install`
97 |
98 | ### 8、安装高版本autoconf
99 |
100 | 从ftp://ftp.gnu.org/gnu/autoconf/ 下载最新版autoconf的源代码(如:ftp://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz),解压后执行
101 |
102 | `./configure`
103 |
104 | `make`
105 |
106 | `make install`
107 |
108 | ### 9、安装高版本automake
109 |
110 | 从ftp://ftp.gnu.org/gnu/automake/ 下载最新版automake的源代码(如:ftp://ftp.gnu.org/gnu/automake/automake-1.13.tar.gz),解压后执行
111 |
112 | `./configure`
113 |
114 | `make`
115 |
116 | `make install`
117 |
118 | ## 二、安装lspider
119 |
120 | ### 1.从github下载并编译
121 |
122 | `git clone https://github.com/warmheartli/lspider.git`
123 |
124 | ### 2.编译安装
125 |
126 | `./configure --prefix=${HOME}/lspider --with-libevent-include=${HOME}/libevent/include/ --with-libevent-libdir=${HOME}/libevent/lib/ --with-mongo-client-include=${HOME}/mongo-client/include/ --with-mongo-client-libdir=${HOME}/mongo-client/lib/ --with-thrift-include=${HOME}/thrift/include/ --with-thrift-libdir=${HOME}/thrift/lib/`
127 |
128 | `make`
129 |
130 | `make install`
131 |
132 | ### 3.配置
133 |
134 | `mkdir -p ${HOME}/lspider/conf ${HOME}/lspider/log`
135 |
136 | `cp conf.xml.demo ${HOME}/lspider/conf/conf.xml`
137 |
138 | `cp log.properties.demo ${HOME}/lspider/conf/log.properties`
139 |
140 | `cp maindomain.list.demo ${HOME}/lspider/conf/maindomain.list`
141 |
142 | `export LD_LIBRARY_PATH=/home/lichuang/libevent/lib/:/home/lichuang/thrift/lib/:${LD_LIBRARY_PATH}`
143 |
144 | ### 4.安装xvfb
145 |
146 | `yum install xorg-x11-server-Xvfb`
147 |
148 | ## 三、运行lspider
149 |
150 | ### 1.确定mysql和mongo已经启动并执行
151 |
152 | `xvfb-run --server-args="-screen 0, 1024x768x24" ./bin/lspider`
153 |
--------------------------------------------------------------------------------
/aclocal/ax_libevent.m4:
--------------------------------------------------------------------------------
1 | AC_DEFUN([AX_LIBEVENT],
2 | [
3 | AC_ARG_WITH([libevent-include],
4 | [AS_HELP_STRING([--with-libevent-include=INCLUDE_DIR],
5 | [use libevent])],
6 | [
7 | if test -d "$withval"
8 | then
9 | ac_libevent_include_path="$withval"
10 | else
11 | AC_MSG_ERROR(--with-libevent-include expected directory name)
12 | fi
13 | ],
14 | [ac_libevent_include_path=""]
15 | )
16 |
17 |
18 | AC_ARG_WITH([libevent-libdir],
19 | AS_HELP_STRING([--with-libevent-libdir=LIB_DIR],
20 | [Force given directory for libevent libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your libevent libraries are located.]),
21 | [
22 | if test -d "$withval"
23 | then
24 | ac_libevent_lib_path="$withval"
25 | else
26 | AC_MSG_ERROR(--with-libevent-libdir expected directory name)
27 | fi
28 | ],
29 | [ac_libevent_lib_path=""]
30 | )
31 |
32 | libsubdirs="lib64 libx32 lib lib64"
33 |
34 | if test "$ac_libevent_include_path" != ""; then
35 | LIBEVENT_CPPFLAGS="-I$ac_libevent_include_path"
36 | else
37 | ac_libevent_include_path="/usr/include/"
38 | LIBEVENT_CPPFLAGS="-I$ac_libevent_include_path"
39 | fi
40 |
41 | if test "$ac_libevent_lib_path" != ""; then
42 | LIBEVENT_LDFLAGS="-L$ac_libevent_lib_path"
43 | else
44 | for libsubdir in $libsubdirs ; do
45 | if ls "$/usr/$libsubdir/libevent"* >/dev/null 2>&1 ; then break; fi
46 | done
47 | ac_libevent_lib_path="/usr/$libsubdir/"
48 | LIBEVENT_LDFLAGS="-L$ac_libevent_lib_path"
49 | fi
50 |
51 | succeeded=no
52 | if ls "$ac_libevent_include_path/event.h" >/dev/null 2>&1 ; then
53 | if ls "$ac_libevent_lib_path/libevent"* >/dev/null 2>&1 ; then
54 | succeeded=yes
55 | fi
56 | fi
57 |
58 | AC_SUBST(LIBEVENT_CPPFLAGS)
59 | AC_SUBST(LIBEVENT_LDFLAGS)
60 |
61 | ])
62 |
--------------------------------------------------------------------------------
/aclocal/ax_log4cplus.m4:
--------------------------------------------------------------------------------
1 | AC_DEFUN([AX_LOG4CPLUS],
2 | [
3 | AC_ARG_WITH([log4cplus-include],
4 | [AS_HELP_STRING([--with-log4cplus-include=INCLUDE_DIR],
5 | [use log4cplus])],
6 | [
7 | if test -d "$withval"
8 | then
9 | ac_log4cplus_include_path="$withval"
10 | else
11 | AC_MSG_ERROR(--with-log4cplus-include expected directory name)
12 | fi
13 | ],
14 | [ac_log4cplus_include_path=""]
15 | )
16 |
17 |
18 | AC_ARG_WITH([log4cplus-libdir],
19 | AS_HELP_STRING([--with-log4cplus-libdir=LIB_DIR],
20 | [Force given directory for log4cplus libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your log4cplus libraries are located.]),
21 | [
22 | if test -d "$withval"
23 | then
24 | ac_log4cplus_lib_path="$withval"
25 | else
26 | AC_MSG_ERROR(--with-log4cplus-libdir expected directory name)
27 | fi
28 | ],
29 | [ac_log4cplus_lib_path=""]
30 | )
31 |
32 | libsubdirs="lib64 libx32 lib lib64"
33 |
34 | if test "$ac_log4cplus_include_path" != ""; then
35 | LOG4CPLUS_CPPFLAGS="-I$ac_log4cplus_include_path"
36 | else
37 | ac_log4cplus_include_path="/usr/include/"
38 | LOG4CPLUS_CPPFLAGS="-I$ac_log4cplus_include_path"
39 | fi
40 |
41 | if test "$ac_log4cplus_lib_path" != ""; then
42 | LOG4CPLUS_LDFLAGS="-L$ac_log4cplus_lib_path"
43 | else
44 | for libsubdir in $libsubdirs ; do
45 | if ls "$/usr/$libsubdir/liblog4cplus"* >/dev/null 2>&1 ; then break; fi
46 | done
47 | ac_log4cplus_lib_path="/usr/$libsubdir/"
48 | LOG4CPLUS_LDFLAGS="-L$ac_log4cplus_lib_path"
49 | fi
50 |
51 | succeeded=no
52 | if ls "$ac_log4cplus_include_path/log4cplus" >/dev/null 2>&1 ; then
53 | if ls "$ac_log4cplus_lib_path/liblog4cplus"* >/dev/null 2>&1 ; then
54 | succeeded=yes
55 | fi
56 | fi
57 |
58 | AC_SUBST(LOG4CPLUS_CPPFLAGS)
59 | AC_SUBST(LOG4CPLUS_LDFLAGS)
60 |
61 | ])
62 |
--------------------------------------------------------------------------------
/aclocal/ax_mongo_client.m4:
--------------------------------------------------------------------------------
1 | AC_DEFUN([AX_MONGO_CLIENT],
2 | [
3 | AC_ARG_WITH([mongo-client-include],
4 | [AS_HELP_STRING([--with-mongo-client-include=INCLUDE_DIR],
5 | [use mongo-client])],
6 | [
7 | if test -d "$withval"
8 | then
9 | ac_mongo_client_include_path="$withval"
10 | else
11 | AC_MSG_ERROR(--with-mongo-client-include expected directory name)
12 | fi
13 | ],
14 | [ac_mongo_client_include_path=""]
15 | )
16 |
17 |
18 | AC_ARG_WITH([mongo-client-libdir],
19 | AS_HELP_STRING([--with-mongo-client-libdir=LIB_DIR],
20 | [Force given directory for mongo-client libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your mongo-client libraries are located.]),
21 | [
22 | if test -d "$withval"
23 | then
24 | ac_mongo_client_lib_path="$withval"
25 | else
26 | AC_MSG_ERROR(--with-mongo-client-libdir expected directory name)
27 | fi
28 | ],
29 | [ac_mongo_client_lib_path=""]
30 | )
31 |
32 | libsubdirs="lib64 libx32 lib lib64"
33 |
34 | if test "$ac_mongo_client_include_path" != ""; then
35 | MONGO_CLIENT_CPPFLAGS="-I$ac_mongo_client_include_path"
36 | else
37 | ac_mongo_client_include_path="/usr/include/mongo-client/"
38 | MONGO_CLIENT_CPPFLAGS="-I$ac_mongo_client_include_path"
39 | fi
40 |
41 | if test "$ac_mongo_client_lib_path" != ""; then
42 | MONGO_CLIENT_LDFLAGS="-L$ac_mongo_client_lib_path"
43 | else
44 | for libsubdir in $libsubdirs ; do
45 | if ls "$/usr/$libsubdir/libmongoclient"* >/dev/null 2>&1 ; then break; fi
46 | done
47 | ac_mongo_client_lib_path="/usr/$libsubdir/"
48 | MONGO_CLIENT_LDFLAGS="-L$ac_mongo_client_lib_path"
49 | fi
50 |
51 | succeeded=no
52 | if ls "$ac_mongo_client_include_path/mongo" >/dev/null 2>&1 ; then
53 | if ls "$ac_mongo_client_lib_path/libmongoclient"* >/dev/null 2>&1 ; then
54 | succeeded=yes
55 | fi
56 | fi
57 |
58 | AC_SUBST(MONGO_CLIENT_CPPFLAGS)
59 | AC_SUBST(MONGO_CLIENT_LDFLAGS)
60 |
61 | ])
62 |
--------------------------------------------------------------------------------
/aclocal/ax_qt5.m4:
--------------------------------------------------------------------------------
1 | AC_DEFUN([AX_QT],
2 | [
3 | AC_ARG_WITH([qt-include],
4 | [AS_HELP_STRING([--with-qt-include=INCLUDE_DIR],
5 | [use qt])],
6 | [
7 | if test -d "$withval"
8 | then
9 | ac_qt_include_path="$withval"
10 | else
11 | AC_MSG_ERROR(--with-qt-include expected directory name)
12 | fi
13 | ],
14 | [ac_qt_include_path=""]
15 | )
16 |
17 |
18 | AC_ARG_WITH([qt-libdir],
19 | AS_HELP_STRING([--with-qt-libdir=LIB_DIR],
20 | [Force given directory for qt libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your qt libraries are located.]),
21 | [
22 | if test -d "$withval"
23 | then
24 | ac_qt_lib_path="$withval"
25 | else
26 | AC_MSG_ERROR(--with-qt-libdir expected directory name)
27 | fi
28 | ],
29 | [ac_qt_lib_path=""]
30 | )
31 |
32 | libsubdirs="lib64 libx32 lib lib64"
33 |
34 | if test "$ac_qt_include_path" != ""; then
35 | QT_CPPFLAGS="-I$ac_qt_include_path"
36 | else
37 | ac_qt_include_path="/usr/include/qt5/"
38 | QT_CPPFLAGS="-I$ac_qt_include_path"
39 | fi
40 |
41 | if test "$ac_qt_lib_path" != ""; then
42 | QT_LDFLAGS="-L$ac_qt_lib_path"
43 | else
44 | for libsubdir in $libsubdirs ; do
45 | if ls "$/usr/$libsubdir/libQt"* >/dev/null 2>&1 ; then break; fi
46 | done
47 | ac_qt_lib_path="/usr/$libsubdir/"
48 | QT_LDFLAGS="-L$ac_qt_lib_path"
49 | fi
50 |
51 | succeeded=no
52 | if ls "$ac_qt_include_path/QtGui" >/dev/null 2>&1 ; then
53 | if ls "$ac_qt_lib_path/libQt"* >/dev/null 2>&1 ; then
54 | succeeded=yes
55 | fi
56 | fi
57 |
58 | AC_SUBST(QT_CPPFLAGS)
59 | AC_SUBST(QT_LDFLAGS)
60 |
61 | ])
62 |
--------------------------------------------------------------------------------
/aclocal/ax_thrift.m4:
--------------------------------------------------------------------------------
1 | AC_DEFUN([AX_THRIFT],
2 | [
3 | AC_ARG_WITH([thrift-include],
4 | [AS_HELP_STRING([--with-thrift-include=INCLUDE_DIR],
5 | [use thrift])],
6 | [
7 | if test -d "$withval"
8 | then
9 | ac_thrift_include_path="$withval"
10 | else
11 | AC_MSG_ERROR(--with-thrift-include expected directory name)
12 | fi
13 | ],
14 | [ac_thrift_include_path=""]
15 | )
16 |
17 |
18 | AC_ARG_WITH([thrift-libdir],
19 | AS_HELP_STRING([--with-thrift-libdir=LIB_DIR],
20 | [Force given directory for thrift libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your thrift libraries are located.]),
21 | [
22 | if test -d "$withval"
23 | then
24 | ac_thrift_lib_path="$withval"
25 | else
26 | AC_MSG_ERROR(--with-thrift-libdir expected directory name)
27 | fi
28 | ],
29 | [ac_thrift_lib_path=""]
30 | )
31 |
32 | libsubdirs="lib64 libx32 lib lib64"
33 |
34 | if test "$ac_thrift_include_path" != ""; then
35 | THRIFT_CPPFLAGS="-I$ac_thrift_include_path"
36 | else
37 | ac_thrift_include_path="/usr/include/"
38 | THRIFT_CPPFLAGS="-I$ac_thrift_include_path"
39 | fi
40 |
41 | if test "$ac_thrift_lib_path" != ""; then
42 | THRIFT_LDFLAGS="-L$ac_thrift_lib_path"
43 | else
44 | for libsubdir in $libsubdirs ; do
45 | if ls "$/usr/$libsubdir/libthrift"* >/dev/null 2>&1 ; then break; fi
46 | done
47 | ac_thrift_lib_path="/usr/$libsubdir/"
48 | THRIFT_LDFLAGS="-L$ac_thrift_lib_path"
49 | fi
50 |
51 | succeeded=no
52 | if ls "$ac_thrift_include_path/thrift" >/dev/null 2>&1 ; then
53 | if ls "$ac_thrift_lib_path/libthrift"* >/dev/null 2>&1 ; then
54 | succeeded=yes
55 | fi
56 | fi
57 |
58 | AC_SUBST(THRIFT_CPPFLAGS)
59 | AC_SUBST(THRIFT_LDFLAGS)
60 |
61 | ])
62 |
--------------------------------------------------------------------------------
/conf.xml.demo:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | localhost
6 | 3306
7 | root
8 |
9 | lspider
10 | link
11 |
12 |
13 | on
14 | 10
15 | 1
16 | ./data/maindomain.list
17 | 5
18 |
19 |
20 | 20
21 | 20
22 |
23 |
24 |
25 |
26 | 5
27 | 20
28 | 1
29 | 5
30 | 2
31 | 512
32 | 5
33 | 524288
34 | 1000
35 |
36 |
37 |
38 | 5
39 | 20
40 | 5
41 |
42 |
43 |
44 | localhost
45 |
46 |
47 |
48 | 300
49 | 10000>
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
1 | # -*- Autoconf -*-
2 | # Process this file with autoconf to produce a configure script.
3 |
4 | AC_PREREQ([2.69])
5 | AC_INIT(lspider, 0.1, whlichuang@126.com)
6 | AC_CONFIG_SRCDIR([src/main.cpp])
7 | AM_INIT_AUTOMAKE
8 |
9 | # Checks for programs.
10 | AC_PROG_CXX
11 | AC_PROG_CC
12 |
13 | # Checks for libraries.
14 | AX_QT
15 | if test "x$succeeded" != "xyes";then
16 | AC_MSG_ERROR([checking qt fail])
17 | fi
18 |
19 | AX_MONGO_CLIENT
20 | if test "x$succeeded" != "xyes";then
21 | AC_MSG_ERROR([checking mongo-client fail])
22 | fi
23 |
24 | AX_LIBEVENT
25 | if test "x$succeeded" != "xyes";then
26 | AC_MSG_ERROR([checking libevent fail])
27 | fi
28 |
29 | AX_THRIFT
30 | if test "x$succeeded" != "xyes";then
31 | AC_MSG_ERROR([checking thrift fail])
32 | fi
33 |
34 | AX_BOOST
35 | if test "x$succeeded" != "xyes";then
36 | AC_MSG_ERROR([checking boost fail])
37 | fi
38 |
39 | AX_LOG4CPLUS
40 | if test "x$succeeded" != "xyes";then
41 | AC_MSG_ERROR([checking log4cplus fail])
42 | fi
43 |
44 | #if test "x$succeeded" = "xno" ; then
45 | # AC_MSG_ERROR(["Error: thrift required"])
46 | #fi
47 |
48 | # Checks for header files.
49 | AC_CHECK_HEADERS([arpa/inet.h fcntl.h netdb.h netinet/in.h stdint.h stdlib.h string.h sys/socket.h sys/time.h unistd.h])
50 |
51 | # Checks for typedefs, structures, and compiler characteristics.
52 | AC_CHECK_HEADER_STDBOOL
53 | AC_C_INLINE
54 | AC_TYPE_INT16_T
55 | AC_TYPE_INT32_T
56 | AC_TYPE_INT64_T
57 | AC_TYPE_SIZE_T
58 | AC_TYPE_UINT32_T
59 | AC_TYPE_UINT64_T
60 | AC_TYPE_UINT8_T
61 |
62 | # Checks for library functions.
63 | AC_FUNC_MALLOC
64 | AC_CHECK_FUNCS([bzero gettimeofday memset select socket strchr strcspn strerror strncasecmp])
65 |
66 | AC_OUTPUT([Makefile])
67 |
--------------------------------------------------------------------------------
/create.sql:
--------------------------------------------------------------------------------
1 | create database lspider;
2 | use lspider;
3 | create table link (
4 | `sign` VARCHAR(24) NOT NULL,
5 | `url` VARCHAR(1024) NOT NULL DEFAULT '',
6 | `maindomain` VARCHAR(80) DEFAULT '',
7 | `ip` VARCHAR(16) DEFAULT '',
8 | `prelink` VARCHAR(1024) DEFAULT '',
9 | `preanchor` VARCHAR(1024) DEFAULT '',
10 | `weight` INT(10) unsigned NOT NULL DEFAULT 1,
11 | `linkdepth` INT(10) unsigned NOT NULL DEFAULT 0,
12 | `crawlstate` INT(10) NOT NULL DEFAULT 0,
13 | `crawlretry` INT(10) NOT NULL DEFAULT 0,
14 | `hub` BOOL NOT NULL DEFAULT FALSE,
15 | `fresh` BOOL NOT NULL DEFAULT FALSE,
16 | `updatetime` TIMESTAMP,
17 | `foundtime` TIMESTAMP,
18 | `crawledtime` TIMESTAMP,
19 | PRIMARY KEY (`sign`)
20 | ) ENGINE=MyISAM DEFAULT character set utf8;
21 |
--------------------------------------------------------------------------------
/log.properties.demo:
--------------------------------------------------------------------------------
1 | # http://log4cplus.sourceforge.net/docs/html/classlog4cplus_1_1PropertyConfigurator.html
2 | # http://log4cplus.sourceforge.net/docs/html/classlog4cplus_1_1PatternLayout.html
3 | # %p event level
4 | # %D{%Y-%m-%d %H:%M:%S} 2015-05-16 21:33:43
5 | # %t thread name
6 | # %l test.cpp:12
7 | # %M __FUNCTION__
8 | # %m message
9 | # %n new line
10 |
11 | log4cplus.rootLogger=TRACE, normal_appender, wf_appender
12 |
13 | log4cplus.appender.normal_appender=log4cplus::RollingFileAppender
14 | log4cplus.appender.normal_appender.File=log/lspider.log
15 | log4cplus.appender.normal_appender.MaxFileSize=500MB
16 | log4cplus.appender.normal_appender.MaxBackupIndex=10
17 | log4cplus.appender.normal_appender.layout=log4cplus::PatternLayout
18 | log4cplus.appender.normal_appender.layout.ConversionPattern=%-5p: %D{%Y-%m-%d %H:%M:%S} %t %l %m%n
19 | #log4cplus.appender.normal_appender.filters.1=log4cplus::spi::LogLevelMatchFilter
20 | #log4cplus.appender.normal_appender.filters.1.LogLevelToMatch=TRACE
21 | log4cplus.appender.normal_appender.filters.1=log4cplus::spi::LogLevelRangeFilter
22 | log4cplus.appender.normal_appender.filters.1.LogLevelMin=TRACE
23 | #log4cplus.appender.normal_appender.filters.1.LogLevelMin=INFO
24 | log4cplus.appender.normal_appender.filters.1.LogLevelMax=INFO
25 | log4cplus.appender.normal_appender.filters.1.AcceptOnMatch=true
26 | log4cplus.appender.normal_appender.filters.2=log4cplus::spi::DenyAllFilter
27 |
28 | log4cplus.appender.wf_appender=log4cplus::RollingFileAppender
29 | log4cplus.appender.wf_appender.File=log/lspider.log.wf
30 | log4cplus.appender.wf_appender.MaxFileSize=500MB
31 | log4cplus.appender.wf_appender.MaxBackupIndex=10
32 | log4cplus.appender.wf_appender.layout=log4cplus::PatternLayout
33 | #log4cplus.appender.wf_appender.layout.ConversionPattern=%-5p: %D{%Y-%m-%d %H:%M:%S} %t %l [%M] %m%n
34 | log4cplus.appender.wf_appender.layout.ConversionPattern=%-5p: %D{%Y-%m-%d %H:%M:%S} %t %l %m%n
35 | #log4cplus.appender.wf_appender.filters.1=log4cplus::spi::LogLevelMatchFilter
36 | #log4cplus.appender.wf_appender.filters.1.LogLevelToMatch=TRACE
37 | log4cplus.appender.wf_appender.filters.1=log4cplus::spi::LogLevelRangeFilter
38 | log4cplus.appender.wf_appender.filters.1.LogLevelMin=WARN
39 | log4cplus.appender.wf_appender.filters.1.LogLevelMax=FATAL
40 | log4cplus.appender.wf_appender.filters.1.AcceptOnMatch=true
41 | log4cplus.appender.wf_appender.filters.2=log4cplus::spi::DenyAllFilter
42 |
--------------------------------------------------------------------------------
/lspider.thrift:
--------------------------------------------------------------------------------
1 | namespace cpp lspider
2 |
3 | struct IUrl {
4 | 1: string str,
5 | 2: string ip,
6 | 3: bool hub = false,
7 | }
8 |
9 | service CrawlService {
10 | void request(1: IUrl u),
11 | string exec_cmd(1: string cmd),
12 | }
13 |
--------------------------------------------------------------------------------
/maindomain.list.demo:
--------------------------------------------------------------------------------
1 | shareditor.com
2 |
--------------------------------------------------------------------------------
/missing:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | # Common wrapper for a few potentially missing GNU programs.
3 |
4 | scriptversion=2012-06-26.16; # UTC
5 |
6 | # Copyright (C) 1996-2013 Free Software Foundation, Inc.
7 | # Originally written by Fran,cois Pinard , 1996.
8 |
9 | # This program is free software; you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation; either version 2, or (at your option)
12 | # any later version.
13 |
14 | # This program is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU General Public License for more details.
18 |
19 | # You should have received a copy of the GNU General Public License
20 | # along with this program. If not, see .
21 |
22 | # As a special exception to the GNU General Public License, if you
23 | # distribute this file as part of a program that contains a
24 | # configuration script generated by Autoconf, you may include it under
25 | # the same distribution terms that you use for the rest of that program.
26 |
27 | if test $# -eq 0; then
28 | echo 1>&2 "Try '$0 --help' for more information"
29 | exit 1
30 | fi
31 |
32 | case $1 in
33 |
34 | --is-lightweight)
35 | # Used by our autoconf macros to check whether the available missing
36 | # script is modern enough.
37 | exit 0
38 | ;;
39 |
40 | --run)
41 | # Back-compat with the calling convention used by older automake.
42 | shift
43 | ;;
44 |
45 | -h|--h|--he|--hel|--help)
46 | echo "\
47 | $0 [OPTION]... PROGRAM [ARGUMENT]...
48 |
49 | Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
50 | to PROGRAM being missing or too old.
51 |
52 | Options:
53 | -h, --help display this help and exit
54 | -v, --version output version information and exit
55 |
56 | Supported PROGRAM values:
57 | aclocal autoconf autoheader autom4te automake makeinfo
58 | bison yacc flex lex help2man
59 |
60 | Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
61 | 'g' are ignored when checking the name.
62 |
63 | Send bug reports to ."
64 | exit $?
65 | ;;
66 |
67 | -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
68 | echo "missing $scriptversion (GNU Automake)"
69 | exit $?
70 | ;;
71 |
72 | -*)
73 | echo 1>&2 "$0: unknown '$1' option"
74 | echo 1>&2 "Try '$0 --help' for more information"
75 | exit 1
76 | ;;
77 |
78 | esac
79 |
80 | # Run the given program, remember its exit status.
81 | "$@"; st=$?
82 |
83 | # If it succeeded, we are done.
84 | test $st -eq 0 && exit 0
85 |
86 | # Also exit now if we it failed (or wasn't found), and '--version' was
87 | # passed; such an option is passed most likely to detect whether the
88 | # program is present and works.
89 | case $2 in --version|--help) exit $st;; esac
90 |
91 | # Exit code 63 means version mismatch. This often happens when the user
92 | # tries to use an ancient version of a tool on a file that requires a
93 | # minimum version.
94 | if test $st -eq 63; then
95 | msg="probably too old"
96 | elif test $st -eq 127; then
97 | # Program was missing.
98 | msg="missing on your system"
99 | else
100 | # Program was found and executed, but failed. Give up.
101 | exit $st
102 | fi
103 |
104 | perl_URL=http://www.perl.org/
105 | flex_URL=http://flex.sourceforge.net/
106 | gnu_software_URL=http://www.gnu.org/software
107 |
108 | program_details ()
109 | {
110 | case $1 in
111 | aclocal|automake)
112 | echo "The '$1' program is part of the GNU Automake package:"
113 | echo "<$gnu_software_URL/automake>"
114 | echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
115 | echo "<$gnu_software_URL/autoconf>"
116 | echo "<$gnu_software_URL/m4/>"
117 | echo "<$perl_URL>"
118 | ;;
119 | autoconf|autom4te|autoheader)
120 | echo "The '$1' program is part of the GNU Autoconf package:"
121 | echo "<$gnu_software_URL/autoconf/>"
122 | echo "It also requires GNU m4 and Perl in order to run:"
123 | echo "<$gnu_software_URL/m4/>"
124 | echo "<$perl_URL>"
125 | ;;
126 | esac
127 | }
128 |
129 | give_advice ()
130 | {
131 | # Normalize program name to check for.
132 | normalized_program=`echo "$1" | sed '
133 | s/^gnu-//; t
134 | s/^gnu//; t
135 | s/^g//; t'`
136 |
137 | printf '%s\n' "'$1' is $msg."
138 |
139 | configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
140 | case $normalized_program in
141 | autoconf*)
142 | echo "You should only need it if you modified 'configure.ac',"
143 | echo "or m4 files included by it."
144 | program_details 'autoconf'
145 | ;;
146 | autoheader*)
147 | echo "You should only need it if you modified 'acconfig.h' or"
148 | echo "$configure_deps."
149 | program_details 'autoheader'
150 | ;;
151 | automake*)
152 | echo "You should only need it if you modified 'Makefile.am' or"
153 | echo "$configure_deps."
154 | program_details 'automake'
155 | ;;
156 | aclocal*)
157 | echo "You should only need it if you modified 'acinclude.m4' or"
158 | echo "$configure_deps."
159 | program_details 'aclocal'
160 | ;;
161 | autom4te*)
162 | echo "You might have modified some maintainer files that require"
163 | echo "the 'automa4te' program to be rebuilt."
164 | program_details 'autom4te'
165 | ;;
166 | bison*|yacc*)
167 | echo "You should only need it if you modified a '.y' file."
168 | echo "You may want to install the GNU Bison package:"
169 | echo "<$gnu_software_URL/bison/>"
170 | ;;
171 | lex*|flex*)
172 | echo "You should only need it if you modified a '.l' file."
173 | echo "You may want to install the Fast Lexical Analyzer package:"
174 | echo "<$flex_URL>"
175 | ;;
176 | help2man*)
177 | echo "You should only need it if you modified a dependency" \
178 | "of a man page."
179 | echo "You may want to install the GNU Help2man package:"
180 | echo "<$gnu_software_URL/help2man/>"
181 | ;;
182 | makeinfo*)
183 | echo "You should only need it if you modified a '.texi' file, or"
184 | echo "any other file indirectly affecting the aspect of the manual."
185 | echo "You might want to install the Texinfo package:"
186 | echo "<$gnu_software_URL/texinfo/>"
187 | echo "The spurious makeinfo call might also be the consequence of"
188 | echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
189 | echo "want to install GNU make:"
190 | echo "<$gnu_software_URL/make/>"
191 | ;;
192 | *)
193 | echo "You might have modified some files without having the proper"
194 | echo "tools for further handling them. Check the 'README' file, it"
195 | echo "often tells you about the needed prerequisites for installing"
196 | echo "this package. You may also peek at any GNU archive site, in"
197 | echo "case some other package contains this missing '$1' program."
198 | ;;
199 | esac
200 | }
201 |
202 | give_advice "$1" | sed -e '1s/^/WARNING: /' \
203 | -e '2,$s/^/ /' >&2
204 |
205 | # Propagate the correct exit status (expected to be 127 for a program
206 | # not found, 63 for a program that failed due to version mismatch).
207 | exit $st
208 |
209 | # Local variables:
210 | # eval: (add-hook 'write-file-hooks 'time-stamp)
211 | # time-stamp-start: "scriptversion="
212 | # time-stamp-format: "%:y-%02m-%02d.%02H"
213 | # time-stamp-time-zone: "UTC"
214 | # time-stamp-end: "; # UTC"
215 | # End:
216 |
--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
1 | ~/thrift/bin/thrift -r -out src/ --gen cpp lspider.thrift
2 |
3 | moc-qt5 src/extractor_worker_view.h -o src/moc_extractor_worker_view.cpp
4 | moc-qt5 src/extractor_worker_view.h -o src/moc_extractor_worker_view.cpp^C
5 | moc-qt5 src/link_scheduler.h -o src/moc_link_scheduler.cpp
6 | moc-qt5 src/mysql_dumper.h -o src/moc_mysql_dumper.cpp
7 | moc-qt5 src/mysql_selector.h -o src/moc_mysql_selector.cpp
8 |
--------------------------------------------------------------------------------
/src/DoubleList.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Doubly Linked List. NOT thread safe! Refer to SyncedQ.h for a locked wrapper.
3 | * by Shiding Lin
4 | *
5 | * READ THIS BEFORE YOU USE:
6 | * To prevent a node is inserted to list twice or to multiple lists
7 | * at the same time, we will check whether the link is initalized
8 | * when you call insert-related interfaces. So, you have to do the
9 | * initialization when you create a node.
10 | *
11 | *
12 | * TElement must contain the following fields:
13 | * DLINK link;
14 | */
15 | #ifndef _DOUBLE_LINK_LIST_H_
16 | #define _DOUBLE_LINK_LIST_H_
17 |
18 | #include
19 | #include "defines.h"
20 | #include "link.h"
21 |
22 | typedef void* ptr;
23 |
24 | template
25 | static inline
26 | bool dlink_contains(TElement* pHead, TElement* pNode)
27 | {
28 | ASSERT(NULL != pHead);
29 | ASSERT(NULL != pNode);
30 | TElement* pTmp = pHead;
31 | do {
32 | if (pTmp == pNode)
33 | return true;
34 | if (pTmp && pTmp->link._next) {
35 | pTmp = CONTAINING_RECORD(pTmp->link._next, TElement, link);
36 | }
37 | else {
38 | break;
39 | }
40 | } while (NULL!=pTmp && pHead!=pTmp);
41 | return false;
42 | }
43 |
44 | // return next
45 | template
46 | static inline
47 | TElement* dlink_pop_self(TElement* pE)
48 | {
49 | ASSERT(NULL != pE);
50 | DLINK* p = &pE->link;
51 | if (p!=p->_next && NULL!=p->_next) {
52 | pE = CONTAINING_RECORD(p->_next, TElement, link);
53 | DLINK_REMOVE(p);
54 | DLINK_INITIALIZE(p);
55 | return pE;
56 | }
57 | return NULL;
58 | }
59 |
60 | template
61 | static inline
62 | TElement* dlink_get_next(TElement* pE)
63 | {
64 | ASSERT(NULL != pE);
65 | DLINK* p = pE->link._next;
66 | if (NULL != p) {
67 | pE = CONTAINING_RECORD(p, TElement, link);
68 | return pE;
69 | }
70 | return NULL;
71 | }
72 |
73 | template
74 | static inline
75 | TElement* dlink_get_prev(TElement* pE)
76 | {
77 | ASSERT(NULL != pE);
78 | DLINK* p = pE->link._prev;
79 | if (NULL != p) {
80 | pE = CONTAINING_RECORD(p, TElement, link);
81 | return pE;
82 | }
83 | return NULL;
84 | }
85 |
86 | template
87 | static inline
88 | TElement* dlink_get_container(DLINK* pLink)
89 | {
90 | ASSERT(NULL != pLink);
91 | return CONTAINING_RECORD(pLink, TElement, link);
92 | }
93 |
94 | template
95 | class TLinkedList
96 | {
97 | protected:
98 | int m_count;
99 | DLINK m_head;
100 | public:
101 | typedef bool (*ENUMERATOR)(TElement* pE, ptr pData); // return false to stop the enumeration
102 |
103 | TLinkedList() { Init(); }
104 | ~TLinkedList() {}
105 |
106 | void Init() {
107 | DLINK_INITIALIZE(&m_head);
108 | m_count = 0;
109 | }
110 |
111 | void Assert() const {
112 | int n = 0;
113 | DLINK* p = m_head._next;
114 |
115 | ASSERT_EQUAL(p->_prev->_next, p);
116 | while (p != &m_head) {
117 | ASSERT_EQUAL(p->_prev->_next, p);
118 | p = p->_next;
119 | n ++;
120 | }
121 | ASSERT_EQUAL(m_count, n);
122 | }
123 |
124 | void Print() const {
125 | const DLINK *p = &m_head;
126 |
127 | do {
128 | p = p->_next;
129 | } while (p != &m_head);
130 | }
131 |
132 | // Notice that it is O(n), be careful!
133 | bool contains(TElement* pNode) const {
134 | ASSERT(NULL != pNode);
135 | #ifndef _UNITTEST
136 | return true;
137 | #endif
138 | for (DLINK* p=m_head._next; p && p!=&m_head; p=p->_next) {
139 | if (p == &pNode->link)
140 | return true;
141 | }
142 | return false;
143 | }
144 |
145 | int size() const {
146 | return m_count;
147 | }
148 |
149 | void merge(TLinkedList* pList) {
150 | m_head._prev->_next = pList->m_head._next;
151 | pList->m_head._next->_prev = m_head._prev;
152 | m_head._prev = pList->m_head._prev;
153 | pList->m_head._prev->_next = &m_head;
154 | DLINK_INITIALIZE(&pList->m_head);
155 | m_count += pList->m_count;
156 | pList->m_count = 0;
157 | }
158 |
159 | int push_back(TElement* e) {
160 | ASSERT(NULL != e);
161 | ASSERT(DLINK_IS_STANDALONE(&e->link));
162 | DLINK_INSERT_PREV(&m_head, &e->link);
163 | return ++ m_count;
164 | }
165 |
166 | TElement* pop_back() {
167 | DLINK* p = m_head._prev;
168 | if (p != &m_head) {
169 | DLINK_REMOVE(p);
170 | DLINK_INITIALIZE(p);
171 | m_count --;
172 | return CONTAINING_RECORD(p, TElement, link);
173 | }
174 | ASSERT(0 == m_count);
175 | return NULL;
176 | }
177 |
178 | TElement* get_back() {
179 | DLINK* p = m_head._prev;
180 | if (p != &m_head) {
181 | return CONTAINING_RECORD(p, TElement, link);
182 | }
183 | ASSERT(0 == m_count);
184 | return NULL;
185 | }
186 |
187 | int push_front(TElement* e) {
188 | ASSERT(NULL != e);
189 |
190 | ASSERT(DLINK_IS_STANDALONE(&e->link));
191 | DLINK_INSERT_NEXT(&m_head, &e->link);
192 | return ++ m_count;
193 | }
194 |
195 | TElement* pop_front() {
196 | DLINK* p = m_head._next;
197 | if (p != &m_head) {
198 | DLINK_REMOVE(p);
199 | DLINK_INITIALIZE(p);
200 | m_count --;
201 | return CONTAINING_RECORD(p, TElement, link);
202 | }
203 | ASSERT_EQUAL(0, m_count);
204 | return NULL;
205 | }
206 |
207 | TElement* get_front() {
208 | DLINK* p = m_head._next;
209 | if (p != &m_head) {
210 | ASSERT(0 < m_count);
211 | return CONTAINING_RECORD(p, TElement, link);
212 | }
213 | ASSERT_EQUAL(0, m_count);
214 | return NULL;
215 | }
216 |
217 | TElement* get_next(TElement* pElement) {
218 | ASSERT(contains(pElement));
219 | DLINK* p = pElement->link._next;
220 | if (p != &m_head) {
221 | return CONTAINING_RECORD(p, TElement, link);
222 | }
223 | return NULL;
224 | }
225 |
226 | TElement* get_prev(TElement* pElement) {
227 | ASSERT(contains(pElement));
228 | DLINK* p = pElement->link._prev;
229 | if (p != &m_head) {
230 | return CONTAINING_RECORD(p, TElement, link);
231 | }
232 | return NULL;
233 | }
234 |
235 | int remove(TElement* pElement) {
236 | ASSERT(contains(pElement));
237 | DLINK_REMOVE(&pElement->link);
238 | DLINK_INITIALIZE(&pElement->link);
239 | return -- m_count;
240 | }
241 |
242 | bool Enumerate(ENUMERATOR f, ptr pData) {
243 | TElement* pE;
244 | DLINK* p = m_head._next;
245 | while (p != &m_head) {
246 | pE = CONTAINING_RECORD(p, TElement, link);
247 | p = p->_next;
248 | if (!f(pE, pData))
249 | return false;
250 | }
251 | return true;
252 | }
253 |
254 | bool Eat(ENUMERATOR f, ptr pData) {
255 | TElement* pE;
256 |
257 | while (NULL != (pE=pop_front())) {
258 | if (!f(pE, pData)) {
259 | push_front(pE);
260 | return false;
261 | }
262 | }
263 | return true;
264 | }
265 |
266 | TElement* FindIf(ENUMERATOR f, ptr pData) {
267 | TElement* pE;
268 | DLINK* p = m_head._next;
269 | while (p != &m_head) {
270 | pE = CONTAINING_RECORD(p, TElement, link);
271 | p = p->_next;
272 | if (f(pE, pData))
273 | return pE;
274 | }
275 | return NULL;
276 | }
277 | };
278 |
279 | #endif
280 |
--------------------------------------------------------------------------------
/src/atomic.h:
--------------------------------------------------------------------------------
1 | /*************************************************************************
2 | * Copyright (c) 2015, LiChuang. All rights reserved.
3 | * Author: lichuang(whlichuang@126.com)
4 | * Created Time: Fri May 15 00:25:46 2015
5 | * Description:
6 | ************************************************************************/
7 |
8 | #ifndef __ATOMIC_H__
9 | #define __ATOMIC_H__
10 |
11 | static inline int atomic_add(volatile int *count, int add)
12 | {
13 | __asm__ __volatile__(
14 | "lock xadd %0, (%1);"
15 | : "=a"(add)
16 | : "r"(count), "a"(add)
17 | : "memory"
18 | );
19 | return add;
20 | }
21 |
22 | #define __atomic_add64__(mem, add) \
23 | do { \
24 | asm volatile ( "lock addq %2, %0" \
25 | : "=m" (*mem) \
26 | : "m" (*mem), "ir" (add)); \
27 | } while (0)
28 |
29 | #define __atomic_sub64__(mem, sub) \
30 | do { \
31 | asm volatile ( "lock subq %2, %0" \
32 | : "=m" (*mem) \
33 | : "m" (*mem), "ir" (sub)); \
34 | } while (0)
35 |
36 | static inline int // return old value
37 | atomic_swap(volatile void *lockword, int value)
38 | {
39 | __asm__ __volatile__(
40 | "lock xchg %0, (%1);"
41 | : "=a"(value)
42 | : "r"(lockword), "a"(value)
43 | : "memory"
44 | );
45 | return value;
46 | }
47 |
48 | static inline int // return old value
49 | atomic_comp_swap(volatile void *lockword,
50 | int exchange,
51 | int comperand)
52 | {
53 | __asm__ __volatile__(
54 | "lock cmpxchg %1, (%2)"
55 | :"=a"(comperand)
56 | :"d"(exchange), "r"(lockword), "a"(comperand)
57 | );
58 | return comperand;
59 | }
60 |
61 | static inline int // return old value
62 | atomic_comp_swap64(volatile void *lockword,
63 | int64_t exchange,
64 | int64_t comperand)
65 | {
66 | __asm__ __volatile__(
67 | "lock cmpxchg %1, (%2)"
68 | :"=a"(comperand)
69 | :"d"(exchange), "r"(lockword), "a"(comperand)
70 | );
71 | return comperand;
72 | }
73 |
74 | #define nop() __asm__ ("pause" )
75 | #define sfence() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
76 |
77 | #define AtomicGetValue(x) (atomic_comp_swap(&(x), 0, 0))
78 | #define AtomicSetValue(x, v) (atomic_swap(&(x), (v)))
79 | #define AtomicSetValueIf(x, v, ifn) (atomic_comp_swap(&(x), (v), ifn))
80 |
81 | // return new value
82 | #define AtomicDec(c) (atomic_add(&(c), -1) - 1)
83 | #define AtomicInc(c) (atomic_add(&(c), 1) + 1)
84 |
85 | #endif //__ATOMIC_H__
86 |
--------------------------------------------------------------------------------
/src/cmd_ctrler.cpp:
--------------------------------------------------------------------------------
1 | /*************************************************************************
2 | * Copyright (c) 2015, LiChuang. All rights reserved.
3 | * Author: lichuang(whlichuang@126.com)
4 | * Created Time: Mon 29 Jun 2015 11:26:40 AM CST
5 | * Description:
6 | ************************************************************************/
7 |
8 | #include
9 | #include "logger_container.h"
10 | #include "mysql_selector.h"
11 | #include "mysql_dumper.h"
12 | #include "mongo_dumper.h"
13 | #include "link_scheduler.h"
14 | #include "http_processor.h"
15 | #include "extractor.h"
16 | #include "cmd_ctrler.h"
17 |
18 | using std::istringstream;
19 |
20 | CmdCtrler::CmdCtrler(QApplication *app)
21 | :_app(app)
22 | {
23 | }
24 |
25 | CmdCtrler::~CmdCtrler()
26 | {
27 | }
28 |
29 | void CmdCtrler::control(string& response, const string& cmd)
30 | {
31 | istringstream sscmd(cmd);
32 | string level1cmd;
33 | string level2cmd;
34 | string level3cmd;
35 | sscmd >> level1cmd;
36 | sscmd >> level2cmd;
37 | sscmd >> level3cmd;
38 |
39 | if ("help" == level1cmd) {
40 | if ("" == level2cmd) {
41 | response = "command: help|show";
42 | }
43 | } else if ("show" == level1cmd) {
44 | if ("RequestRecv" == level2cmd) {
45 | _handlers["RequestRecv"]->control(response, level3cmd);
46 | } else if ("MySqlSelector" == level2cmd) {
47 | _handlers["MySqlSelector"]->control(response, level3cmd);
48 | } else if ("HttpProcessor" == level2cmd) {
49 | _handlers["HttpProcessor"]->control(response, level3cmd);
50 | } else if ("LinkScheduler" == level2cmd) {
51 | _handlers["LinkScheduler"]->control(response, level3cmd);
52 | } else if ("MongoDumper" == level2cmd) {
53 | _handlers["MongoDumper"]->control(response, level3cmd);
54 | } else if ("MySqlDumper" == level2cmd) {
55 | _handlers["MySqlDumper"]->control(response, level3cmd);
56 | } else if ("Extractor" == level2cmd) {
57 | _handlers["Extractor"]->control(response, level3cmd);
58 | } else {
59 | response = "must specific modle: RequestRecv|MySqlSelector|HttpProcessor|LinkScheduler|MongoDumper|MySqlDumper|Extractor";
60 | }
61 | }
62 | }
63 |
64 | void CmdCtrler::addHandler(const string name, Controllable* controllable)
65 | {
66 | LOG_F(DEBUG, "addHandler %s %p", name.c_str(), controllable);
67 | _handlers[name] = controllable;
68 | }
69 |
--------------------------------------------------------------------------------
/src/cmd_ctrler.h:
--------------------------------------------------------------------------------
1 | /*************************************************************************
2 | * Copyright (c) 2015, LiChuang. All rights reserved.
3 | * Author: lichuang(whlichuang@126.com)
4 | * Created Time: Mon 29 Jun 2015 11:26:13 AM CST
5 | * Description:
6 | ************************************************************************/
7 |
8 | #ifndef __CMD_CTRLER_H__
9 | #define __CMD_CTRLER_H__
10 |
11 | #include
12 | #include