├── INSTALL ├── src ├── Makefile.in ├── watchd-gen-service ├── sample.conf └── watchd.c ├── watchd.spec ├── README └── make.sh /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | #step 1. download libfastcommon source codes from github and install it, 3 | the github address: 4 | https://github.com/happyfish100/libfastcommon.git 5 | 6 | #step 2. download watched source codes, the github address: 7 | https://github.com/happyfish100/watchd.git 8 | 9 | #step 3. enter the watchd dir 10 | cd watchd 11 | 12 | #step 4. execute: 13 | ./make.sh && sudo ./make.sh install 14 | -------------------------------------------------------------------------------- /src/Makefile.in: -------------------------------------------------------------------------------- 1 | .SUFFIXES: .c .o 2 | 3 | COMPILE = $(CC) $(CFLAGS) 4 | INC_PATH = -I/usr/local/include 5 | LIB_PATH = $(LIBS) -lfastcommon 6 | TARGET_PATH = $(TARGET_PREFIX)/bin 7 | CONFIG_PATH = $(TARGET_CONF_PATH) 8 | INIT_PATH = $(TARGET_INIT_PATH) 9 | 10 | ALL_OBJS = watchd.o 11 | ALL_PRGS = $(ALL_OBJS:.o=) 12 | 13 | all: $(ALL_PRGS) 14 | 15 | $(ALL_PRGS): $(ALL_OBJS) 16 | 17 | .o: 18 | $(COMPILE) -o $@ $< $(LIB_PATH) $(INC_PATH) 19 | .c: 20 | $(COMPILE) -o $@ $< $(ALL_OBJS) $(LIB_PATH) $(INC_PATH) 21 | .c.o: 22 | $(COMPILE) -c -o $@ $< $(INC_PATH) 23 | 24 | install: 25 | mkdir -p $(TARGET_PATH) 26 | cp -f $(ALL_PRGS) $(TARGET_PATH) 27 | install -m 755 watchd-gen-service $(TARGET_PATH) 28 | clean: 29 | rm -f *.o $(ALL_OBJS) $(ALL_PRGS) 30 | -------------------------------------------------------------------------------- /watchd.spec: -------------------------------------------------------------------------------- 1 | %define CommitVersion %(echo $COMMIT_VERSION) 2 | 3 | Name: watchd 4 | Version: 1.0.10 5 | Release: 1%{?dist} 6 | Summary: No Summary 7 | License: GPL 8 | Group: Arch/Tech 9 | URL: http://github.com/happyfish100/watchd/ 10 | Source: http://github.com/happyfish100/watchd/%{name}-%{version}.tar.gz 11 | 12 | BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) 13 | 14 | BuildRequires: libfastcommon-devel >= 1.0.38 15 | Requires: %__cp %__mv %__chmod %__grep %__mkdir %__install %__id 16 | Requires: libfastcommon >= 1.0.38 17 | 18 | %description 19 | watch daemon 20 | commit version: %{CommitVersion} 21 | 22 | %prep 23 | %setup -q 24 | 25 | %build 26 | ./make.sh 27 | 28 | %install 29 | rm -rf %{buildroot} 30 | DESTDIR=$RPM_BUILD_ROOT ./make.sh install 31 | 32 | %post 33 | 34 | %preun 35 | 36 | %postun 37 | 38 | %clean 39 | rm -rf %{buildroot} 40 | 41 | %files 42 | %defattr(-,root,root,-) 43 | /usr/bin/* 44 | 45 | %changelog 46 | * Mon Jun 23 2014 47 | - first RPM release (1.0) 48 | -------------------------------------------------------------------------------- /src/watchd-gen-service: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | 4 | if [ `whoami` != root ]; then 5 | echo 'permission denied. should be root' 6 | exit 1 7 | fi 8 | 9 | if [ x$2 = x ] || ( [ x$1 != xadd ] && [ x$1 != xdel ] ) ; then 10 | 11 | echo "usage $0 add " 12 | echo " $0 del " 13 | exit 1 14 | fi 15 | 16 | sname=$2 17 | fname=watchd-$sname 18 | svcfile=/etc/init.d/$fname 19 | 20 | if [ x$1 = xdel ]; then 21 | chkconfig --del $fname 22 | rm -f $svcfile 23 | exit 0 24 | fi 25 | 26 | if [ -e $svcfile ]; then 27 | echo "service $fname exists. please del first" 28 | exit 1 29 | fi 30 | 31 | touch $svcfile 32 | chmod 755 $svcfile 33 | 34 | CONFIGFILE=/etc/watchd/$fname.conf 35 | OPTIONS=" $CONFIGFILE" 36 | prog=/usr/bin/watchd 37 | 38 | cat > /etc/init.d/$fname << EOF 39 | #!/bin/bash 40 | 41 | # chkconfig: 2345 99 15 42 | # description: php service framework $sname 43 | # processname: $fname 44 | # config: /etc/watchd/$fname.conf 45 | 46 | # Source function library. 47 | [ -e /etc/init.d/functions ] && source /etc/init.d/functions 48 | 49 | # Source networking configuration. 50 | [ -e /etc/sysconfig/network ] && source /etc/sysconfig/network 51 | 52 | CONFIGFILE=" /etc/watchd/$fname.conf" 53 | OPTIONS=" $CONFIGFILE" 54 | prog=/usr/bin/watchd 55 | 56 | start() { 57 | [ "\$NETWORKING" = "no" ] && exit 1 58 | 59 | # Start daemons. 60 | echo -n $"Starting $prog for $sname: " 61 | $prog $OPTIONS start 62 | RETVAL=\$? 63 | [ \$RETVAL -eq 0 ] && echo "OK" 64 | return \$RETVAL 65 | } 66 | 67 | stop() { 68 | echo -n $"Shutting down $prog for $sname: " 69 | $prog $OPTIONS stop 70 | RETVAL=\$? 71 | [ \$RETVAL -eq 0 ] && echo "OK" 72 | return \$RETVAL 73 | } 74 | 75 | restart() { 76 | echo -n $"Restarting $prog for $sname: " 77 | $prog $OPTIONS restart 78 | echo 79 | RETVAL=\$? 80 | [ \$RETVAL -eq 0 ] && echo "OK" 81 | return \$RETVAL 82 | } 83 | 84 | # See how we were called. 85 | case "\$1" in 86 | start) 87 | start 88 | ;; 89 | stop) 90 | stop 91 | ;; 92 | restart) 93 | restart 94 | ;; 95 | status) 96 | status -p /home/y/var/watchd/$fname.pid 97 | ;; 98 | *) 99 | echo $"Usage: $fname {start|stop|restart|status}" 100 | exit 2 101 | esac 102 | 103 | EOF 104 | 105 | chkconfig --add $fname 106 | chkconfig $fname on 107 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | watchd is a process watch dog to keep the watched processes alive. 3 | 4 | the sample config file please see src/sample.conf. 5 | 6 | config fragment in src/sample.conf: 7 | 8 | # one section for a group sub processes 9 | # you can config more than one sections 10 | [test1] 11 | 12 | # the type: daemon or cron (for crontab) 13 | # default: daemon 14 | type = daemon 15 | 16 | # the mode: all or failover 17 | ## all: run all subprocess_command 18 | ## failover: run the first command, 19 | ## run the second command when the first exit, 20 | ## run the first command again when the last exit, 21 | ## and so on. 22 | # default: all 23 | mode = failover 24 | 25 | # if run by shell as: sh -c command 26 | ## true or 1: run with sh -c command 27 | ## false or 0: run command directly 28 | ## auto: auto detect if need run by sh -c 29 | # default auto 30 | run_by_sh = auto 31 | 32 | # force restart interval in seconds 33 | # this parameter only for daemon 34 | # default 0 for never force restart 35 | force_restart_interval = 86400 36 | 37 | # set environment variable 38 | # can ocur more than once 39 | set_env = OMP_NUM_THREADS=4 40 | 41 | # the command line of sub process 42 | # can be a simple command line or a shell command as following four formats: 43 | ## 1. (command) 44 | ## 2. command > output_filename or command >> output_filename 45 | ## 3. command1 | command2 ... 46 | ## 4. command & 47 | ## the shell command line while be exec as: sh -c command_line 48 | subprocess_command = /bin/echo OMP_NUM_THREADS=$OMP_NUM_THREADS $host_index >> /tmp/echo.log 49 | 50 | # check sub process alive interval in seconds 51 | # 0 for never check 52 | check_alive_interval = 10 53 | 54 | # retry threshold of check alive 55 | # kill the sub process when the check fail count exceeds this parameter 56 | # default: 3 57 | check_alive_retry_threshold = 3 58 | 59 | # check_alive_command can be a command or a library whose filename ends with .so 60 | ## the check command output OK for check passed, others for fail 61 | # 62 | ## the library must export c function: 63 | ## int check_alive(int argc, char **argv); 64 | ## argv[0] is the library filename, argv[1] is the first parameter and so on. 65 | ## return 0 for success, != 0 for fail. 66 | ## 67 | ## for example: 68 | ## #@function REPLACE_VARS 69 | ## check_alive_command = /usr/local/lib/libdfscheckalive.so %{encoder_port} 2 30 70 | 71 | #@function REPLACE_VARS 72 | check_alive_command = echo OK 73 | 74 | 75 | [test2] 76 | type = cron 77 | subprocess_command = ls -l / >> /tmp/ls.log 78 | 79 | # the time base to schedule 80 | # this parameter only for cron 81 | time_base = 00:00 82 | 83 | # repeat interval in seconds 84 | # this parameter only for cron 85 | repeat_interval = 60 86 | 87 | -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | ENABLE_STATIC_LIB=0 2 | ENABLE_SHARED_LIB=1 3 | TARGET_PREFIX=$DESTDIR/usr 4 | 5 | #WITH_LINUX_SERVICE=1 6 | 7 | DEBUG_FLAG=1 8 | 9 | CFLAGS="$CFLAGS -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE" 10 | if [ "$DEBUG_FLAG" = "1" ]; then 11 | CFLAGS="$CFLAGS -g -O2 -DDEBUG_FLAG" 12 | else 13 | CFLAGS="$CFLAGS -O3" 14 | fi 15 | 16 | LIBS='' 17 | uname=$(uname) 18 | if [ "$uname" = "Linux" ]; then 19 | CFLAGS="$CFLAGS" 20 | elif [ "$uname" = "FreeBSD" ] || [ "$uname" = "Darwin" ]; then 21 | CFLAGS="$CFLAGS" 22 | elif [ "$uname" = "SunOS" ]; then 23 | CFLAGS="$CFLAGS -D_THREAD_SAFE" 24 | LIBS="$LIBS -lsocket -lnsl -lresolv" 25 | export CC=gcc 26 | elif [ "$uname" = "AIX" ]; then 27 | CFLAGS="$CFLAGS -D_THREAD_SAFE" 28 | export CC=gcc 29 | elif [ "$uname" = "HP-UX" ]; then 30 | CFLAGS="$CFLAGS" 31 | fi 32 | 33 | have_pthread=0 34 | if [ -f /usr/lib/libpthread.so ] || [ -f /usr/local/lib/libpthread.so ] || [ -f /lib64/libpthread.so ] || [ -f /usr/lib64/libpthread.so ] || [ -f /usr/lib/libpthread.a ] || [ -f /usr/local/lib/libpthread.a ] || [ -f /lib64/libpthread.a ] || [ -f /usr/lib64/libpthread.a ]; then 35 | LIBS="$LIBS -lpthread" 36 | have_pthread=1 37 | elif [ "$uname" = "HP-UX" ]; then 38 | lib_path="/usr/lib/hpux$OS_BITS" 39 | if [ -f $lib_path/libpthread.so ]; then 40 | LIBS="-L$lib_path -lpthread" 41 | have_pthread=1 42 | fi 43 | elif [ "$uname" = "FreeBSD" ]; then 44 | if [ -f /usr/lib/libc_r.so ]; then 45 | line=$(nm -D /usr/lib/libc_r.so | grep pthread_create | grep -w T) 46 | if [ $? -eq 0 ]; then 47 | LIBS="$LIBS -lc_r" 48 | have_pthread=1 49 | fi 50 | elif [ -f /lib64/libc_r.so ]; then 51 | line=$(nm -D /lib64/libc_r.so | grep pthread_create | grep -w T) 52 | if [ $? -eq 0 ]; then 53 | LIBS="$LIBS -lc_r" 54 | have_pthread=1 55 | fi 56 | elif [ -f /usr/lib64/libc_r.so ]; then 57 | line=$(nm -D /usr/lib64/libc_r.so | grep pthread_create | grep -w T) 58 | if [ $? -eq 0 ]; then 59 | LIBS="$LIBS -lc_r" 60 | have_pthread=1 61 | fi 62 | fi 63 | fi 64 | 65 | if [ $have_pthread -eq 0 ] && [ "$uname" = "Linux" ]; then 66 | /sbin/ldconfig -p | fgrep libpthread.so > /dev/null 67 | if [ $? -eq 0 ]; then 68 | LIBS="$LIBS -lpthread" 69 | else 70 | echo -E 'Require pthread lib, please check!' 71 | exit 2 72 | fi 73 | fi 74 | 75 | if [ "$DEBUG_FLAG" = "1" ]; then 76 | if [ "$uname" = "Linux" ]; then 77 | LIBS="$LIBS -ldl -rdynamic" 78 | fi 79 | fi 80 | 81 | sed_replace() 82 | { 83 | sed_cmd=$1 84 | filename=$2 85 | if [ "$uname" = "FreeBSD" ] || [ "$uname" = "Darwin" ]; then 86 | sed -i "" "$sed_cmd" $filename 87 | else 88 | sed -i "$sed_cmd" $filename 89 | fi 90 | } 91 | 92 | cd src 93 | cp Makefile.in Makefile 94 | sed_replace "s#\$(CFLAGS)#$CFLAGS#g" Makefile 95 | sed_replace "s#\$(LIBS)#$LIBS#g" Makefile 96 | sed_replace "s#\$(TARGET_PREFIX)#$TARGET_PREFIX#g" Makefile 97 | sed_replace "s#\$(ENABLE_STATIC_LIB)#$ENABLE_STATIC_LIB#g" Makefile 98 | sed_replace "s#\$(ENABLE_SHARED_LIB)#$ENABLE_SHARED_LIB#g" Makefile 99 | make $1 $2 100 | 101 | -------------------------------------------------------------------------------- /src/sample.conf: -------------------------------------------------------------------------------- 1 | #standard log level as syslog, case insensitive, value list: 2 | ### emerg for emergency 3 | ### alert 4 | ### crit for critical 5 | ### error 6 | ### warn for warning 7 | ### notice 8 | ### info 9 | ### debug 10 | log_level = debug 11 | 12 | # the base path to store data and log files 13 | # the subdir logs for log files 14 | base_path = /tmp 15 | 16 | # the service name for current watchd instance 17 | # the log filename as: watchd-$service_name.log, such as watchd-sample.log 18 | service_name = sample 19 | 20 | # how many days the log files should be kept 21 | # default: 0 22 | log_file_keep_days = 30 23 | 24 | # unix group name to run this program, 25 | # not set (empty) means run by the group of current user 26 | run_by_group= 27 | 28 | # unix user name to run this program, 29 | # not set (empty) means run by current user 30 | run_by_user= 31 | 32 | # timeout for waiting the sub process exit, in milliseconds 33 | # default: 300 ms 34 | wait_subprocess_ms = 1000 35 | 36 | # restart interval of the sub process, in milliseconds 37 | # default: 1000 ms 38 | restart_interval_ms = 10000 39 | 40 | # check sub process alive interval in seconds 41 | # 0 for never check 42 | # default: 0 (never check) 43 | check_alive_interval = 300 44 | 45 | # retry threshold of check alive 46 | # kill the sub process when the check fail count exceeds this parameter 47 | # default: 3 48 | check_alive_retry_threshold = 3 49 | 50 | # if takeover the stdout of the sub process 51 | # default: true 52 | takeover_stdout = true 53 | 54 | # if takeover the stderr of the sub process 55 | # default: true 56 | takeover_stderr = true 57 | 58 | # subprocess number for the command line 59 | # default: 1 60 | subprocess_number = 1 61 | 62 | # the extended parameter can be referred by subprocess_command 63 | # the multi values splited by comma or use range format as: [start-end] 64 | # following is an example: 65 | # host_index = 0,1,2 66 | # also can be configed like this: 67 | host_index = [0-2] 68 | 69 | #@set encoder_filename=/usr/local/etc/encoder.conf 70 | 71 | #@set encoder_port = $(grep ^inner_port %{encoder_filename} | awk -F '=' '{print $2;}') 72 | 73 | # one section for a group sub processes 74 | # you can config more than one sections 75 | [test1] 76 | 77 | # the type: daemon or cron (for crontab) 78 | # default: daemon 79 | type = daemon 80 | 81 | # the mode: all or failover 82 | ## all: run all subprocess_command 83 | ## failover: run the first command, 84 | ## run the second command when the first exit, 85 | ## run the first command again when the last exit, 86 | ## and so on. 87 | # default: all 88 | mode = failover 89 | 90 | # if run by shell as: sh -c command 91 | ## true or 1: run with sh -c command 92 | ## false or 0: run command directly 93 | ## auto: auto detect if need run by sh -c 94 | # default auto 95 | run_by_sh = auto 96 | 97 | # force restart interval in seconds 98 | # this parameter only for daemon 99 | # default 0 for never force restart 100 | force_restart_interval = 600 101 | 102 | # set environment variable 103 | # can ocur more than once 104 | set_env = OMP_NUM_THREADS=4 105 | 106 | # the command line of sub process 107 | # can be a simple command line or a shell command as following three formats: 108 | ## 1. (command) 109 | ## 2. command > output_filename or command >> output_filename 110 | ## 3. command & 111 | ## the shell command line while be exec as: sh -c command_line 112 | #subprocess_command = /bin/echo OMP_NUM_THREADS=$OMP_NUM_THREADS $host_index | awk '{print $1;}' 113 | subprocess_command = /usr/local/bin/dfs_encoderd /usr/local/etc/encoder.conf --no-daemon restart 114 | 115 | check_alive_interval = 10 116 | 117 | check_alive_retry_threshold = 3 118 | 119 | # check_alive_command can be a command or a library whose filename ends with .so) 120 | ## the check command output OK for check passed, others for fail 121 | # 122 | ## the library must export c function: 123 | ## int check_alive(int argc, char **argv); 124 | ## argv[0] is the library filename, argv[1] is the first parameter and so on. 125 | ## return 0 for success, != 0 for fail. 126 | ## 127 | ## for example: 128 | ## #@function REPLACE_VARS 129 | ## check_alive_command = /usr/local/lib/libdfscheckalive.so %{encoder_port} 2 30 130 | 131 | #check_alive_command = echo OK 132 | 133 | #@function REPLACE_VARS 134 | check_alive_command = /usr/local/lib/libdfscheckalive.so %{encoder_port} 2 30 135 | 136 | 137 | [test2] 138 | type = cron 139 | subprocess_command = ls -l / >> /tmp/ls.log 140 | 141 | # the time base to schedule 142 | # this parameter only for cron 143 | time_base = 00:00 144 | 145 | # repeat interval in seconds 146 | # this parameter only for cron 147 | repeat_interval = 60 148 | 149 | takeover_stdout = false 150 | takeover_stderr = false 151 | -------------------------------------------------------------------------------- /src/watchd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "fastcommon/logger.h" 16 | #include "fastcommon/shared_func.h" 17 | #include "fastcommon/sched_thread.h" 18 | #include "fastcommon/process_ctrl.h" 19 | #include "fastcommon/ini_file_reader.h" 20 | #include "fastcommon/pthread_func.h" 21 | #include "fastcommon/common_blocked_queue.h" 22 | 23 | #define MAX_CRON_PROCESS_PER_ENTRY 64 24 | #define DEFAULT_WAIT_SUBPROCESS 300 25 | #define DEFAULT_RESTART_INTERVAL 1000 26 | #define DEFAULT_CHECK_ALIVE_INTERVAL 0 27 | #define DEFAULT_CHECK_ALIVE_RETRY_THRESHOLD 3 28 | #define MAX_NAME_SIZE 64 29 | #define MAX_PARAM_COUNT 64 30 | #define CHECK_ALIVE_THREAD_STACK_SIZE (64 * 1024) 31 | 32 | #define MODE_FAILOVER 'f' 33 | #define MODE_ALL 'a' 34 | 35 | static char base_path[MAX_PATH_SIZE]={0}; 36 | static const char* action = "start"; 37 | static const char* configfile = NULL; 38 | static char run_by_user[MAX_NAME_SIZE]={0}; 39 | static char run_by_group[MAX_NAME_SIZE]={0}; 40 | static char service_name[MAX_NAME_SIZE]={0}; 41 | static const char* program = NULL; 42 | static int subprocess_number = 1; 43 | static int wait_subprocess_ms = DEFAULT_WAIT_SUBPROCESS; 44 | static int restart_interval_ms = DEFAULT_RESTART_INTERVAL; 45 | static int check_alive_interval = DEFAULT_CHECK_ALIVE_INTERVAL; 46 | static int check_alive_retry_threshold = DEFAULT_CHECK_ALIVE_RETRY_THRESHOLD; 47 | static time_t last_check_alive_time; 48 | static bool enable_access_log = false; 49 | static bool takeover_stdout = true; 50 | static bool takeover_stderr = true; 51 | static bool continue_flag = true; 52 | static volatile bool restart_subprocess = false; 53 | static pthread_t schedule_tid; 54 | static int log_file_keep_days = 0; 55 | 56 | static char pidfile[MAX_PATH_SIZE]; 57 | static char logfile[MAX_PATH_SIZE]; 58 | static char logpath[MAX_PATH_SIZE]; 59 | 60 | static int parse_args(int argc, char* argv[]); 61 | static int setup_sig_handlers(); 62 | static int setup_schedule_tasks(); 63 | static int make_dir(const char* dirname); 64 | static int load_from_conf_file(const char* filename); 65 | 66 | typedef enum { hc_type_none=0, hc_type_kill, 67 | hc_type_exec, hc_type_library } HealthCheckType; 68 | 69 | typedef enum { 70 | spt_stop_all, 71 | spt_stop_none_standalone, 72 | spt_stop_force_restart 73 | } StopProcessType; 74 | 75 | typedef enum { rbst_true, rbst_false, rbst_auto} RunByShType; 76 | 77 | typedef int (*health_check_func)(int argc, char **argv); 78 | 79 | typedef struct env_entry { 80 | char *name; 81 | char *value; 82 | } EnvEntry; 83 | 84 | typedef struct env_array { 85 | EnvEntry *entries; 86 | int alloc_size; 87 | int count; 88 | } EnvArray; 89 | 90 | typedef struct command_params { 91 | bool run_by_sh; 92 | char *cmd; //command line 93 | int argc; 94 | char **argv; 95 | } CommandParams; 96 | 97 | typedef struct command_entry { 98 | CommandParams command; //exec command 99 | struct health_check_entry { 100 | CommandParams command; 101 | HealthCheckType type; 102 | health_check_func func; 103 | int fail_count; 104 | } health_check; 105 | } CommandEntry; 106 | 107 | typedef struct child_process_info { 108 | int pid; 109 | bool running; 110 | char mode; //run mode 111 | bool enable_access_log; 112 | bool takeover_stdout; 113 | bool takeover_stderr; 114 | bool force_restart_flag; 115 | int64_t last_start_time_ms; 116 | int last_check_alive_time; 117 | int restart_interval_ms; 118 | int check_alive_interval; 119 | int check_alive_retry_threshold; 120 | int force_restart_interval; 121 | uint32_t run_count; //run times counter 122 | char *logfile; 123 | char *acclog; 124 | struct command_array { 125 | int alloc; //alloc count 126 | int count; //item count 127 | int index; //current index 128 | CommandEntry *list; 129 | } commands; 130 | EnvArray envs; 131 | } ChildProcessInfo; 132 | 133 | typedef struct cron_entry { 134 | TimeInfo time_base; 135 | int interval; 136 | } CronEntry; 137 | 138 | typedef struct child_process_array { 139 | ChildProcessInfo **processes; 140 | int alloc_size; 141 | int count; 142 | } ChildProcessArray; 143 | 144 | static struct fast_mblock_man process_mblock; 145 | static ChildProcessArray child_proc_array = {NULL, 0, 0}; 146 | static struct { 147 | int total; 148 | int standalone; 149 | int force_restart; 150 | } child_running = {0, 0, 0}; 151 | 152 | static struct common_blocked_queue force_restart_queue; 153 | static time_t last_deal_force_restart_time; 154 | 155 | static ChildProcessArray cron_proc_array = {NULL, 0, 0}; 156 | 157 | typedef char MqPath[MAX_PATH_SIZE]; 158 | static MqPath* logfiles_all = NULL; 159 | static MqPath* acclogs_all = NULL; 160 | 161 | static int logfiles_count = 0; 162 | 163 | static IniContext ini_context; 164 | IniContext *iniContext = &ini_context; 165 | static int cron_entry_alloc_size = 0; 166 | static int cron_entry_count = 0; 167 | static CronEntry *cron_entries = NULL; 168 | 169 | typedef ChildProcessInfo* (*malloc_process_func)(); 170 | 171 | static int expand_cmd(ChildProcessInfo *cpro, malloc_process_func malloc_func, 172 | ChildProcessInfo **processes, int *pnum, const int max_count); 173 | static int set_command_params(CommandParams *command, const bool enable_access_log, 174 | char *acclog); 175 | static int process_set_command_params(ChildProcessInfo* cpro); 176 | 177 | static int update_process(int pid, const int status); 178 | static int check_all_processes(); 179 | static int run_process(ChildProcessInfo *process, 180 | CommandParams *command, pid_t *pid); 181 | static int start_all_processes(); 182 | static void stop_processes(const StopProcessType stop_type); 183 | static int rotate_logs(); 184 | static void check_subproccess_alive(); 185 | static int start_process(ChildProcessInfo *process); 186 | static int add_force_restart_shedule_entries(); 187 | static int add_cron_shedule_entries(); 188 | static void *check_alive_entrance(void *args); 189 | static int start_health_check_threads(); 190 | static void deal_force_restart_queue(); 191 | 192 | #define stop_all_processes() stop_processes(spt_stop_all) 193 | #define stop_none_standalone_processes() stop_processes(spt_stop_none_standalone) 194 | #define stop_force_restart_processes() stop_processes(spt_stop_force_restart) 195 | 196 | static void usage(const char* program) 197 | { 198 | printf("usage: %s [start|stop|restart]\n", program); 199 | } 200 | 201 | static inline bool is_standalone(ChildProcessInfo *child) 202 | { 203 | return !(child->takeover_stdout || child->takeover_stderr); 204 | } 205 | 206 | int main(int argc, char* argv[]) 207 | { 208 | int result; 209 | bool stop = false; 210 | 211 | if ((result=parse_args(argc, argv)) != 0) { 212 | return result; 213 | } 214 | 215 | g_current_time = time(NULL); 216 | log_init2(); 217 | log_set_fd_flags(&g_log_context, O_CLOEXEC); 218 | log_set_rotate_time_format(&g_log_context, "%Y%m%d"); 219 | 220 | if ((result=fast_mblock_init_ex(&process_mblock, 221 | sizeof(ChildProcessInfo), 256, 222 | 0, NULL, NULL, false)) != 0) 223 | { 224 | return result; 225 | } 226 | if ((result=common_blocked_queue_init_ex(&force_restart_queue, 128)) != 0) { 227 | return result; 228 | } 229 | 230 | result = load_from_conf_file(configfile); 231 | if (result) { 232 | logCrit("file: "__FILE__", line: %d, " 233 | "load from conf file fail, " 234 | "errno: %d, error info: %s", 235 | __LINE__, result, strerror(result)); 236 | return result; 237 | } 238 | 239 | log_set_keep_days(&g_log_context, log_file_keep_days); 240 | 241 | umask(0); 242 | if ((result = make_dir(logpath)) != 0) { 243 | logError("file: "__FILE__", line: %d, mkdir %s fail, " 244 | "errno: %d, error info: %s", __LINE__, 245 | logpath, result, strerror(result)); 246 | return result; 247 | } 248 | 249 | if ((result=process_action(pidfile, action, &stop)) != 0) { 250 | if (result == EINVAL) { 251 | usage(argv[0]); 252 | } 253 | log_destroy(); 254 | return result; 255 | } 256 | if (stop) { 257 | log_destroy(); 258 | return 0; 259 | } 260 | 261 | if ((result=set_run_by(run_by_group, run_by_user)) != 0) { 262 | logCrit("file: "__FILE__", line: %d, " 263 | "call set set_run_by fail, exit!", __LINE__); 264 | return result; 265 | } 266 | 267 | daemon_init(false); 268 | umask(0); 269 | 270 | log_set_use_file_write_lock(true); 271 | if ((result=log_set_filename(logfile)) != 0) { 272 | if (result == EAGAIN || result == EACCES) { 273 | logCrit("file: "__FILE__", line: %d, " 274 | "the process already running, " 275 | "please kill the old process first!", __LINE__); 276 | } else { 277 | logCrit("file: "__FILE__", line: %d, " 278 | "call set log_set_filename fail, exit!", __LINE__); 279 | } 280 | return result; 281 | } 282 | close(0); 283 | 284 | if ((result=write_to_pid_file(pidfile)) != 0) { 285 | log_destroy(); 286 | return result; 287 | } 288 | setup_sig_handlers(); 289 | setup_schedule_tasks(); 290 | 291 | if ((result=add_force_restart_shedule_entries()) != 0) { 292 | return result; 293 | } 294 | if ((result=add_cron_shedule_entries()) != 0) { 295 | return result; 296 | } 297 | if ((result = start_all_processes()) != 0) { 298 | return result; 299 | } 300 | 301 | iniFreeContext(iniContext); 302 | last_check_alive_time = g_current_time; 303 | logInfo("file: "__FILE__", line: %d, %s started, " 304 | "running processes count: %d, running standalone count: %d", 305 | __LINE__, program, child_running.total, child_running.standalone); 306 | 307 | if (g_log_context.log_level >= LOG_DEBUG) { 308 | sched_print_all_entries(); 309 | } 310 | if ((result=start_health_check_threads()) != 0) { 311 | continue_flag = false; 312 | } 313 | 314 | last_deal_force_restart_time = g_current_time; 315 | 316 | while (continue_flag) { 317 | if (restart_subprocess) { 318 | restart_subprocess = false; 319 | stop_none_standalone_processes(); 320 | } 321 | deal_force_restart_queue(); 322 | 323 | if ((result = check_all_processes()) != 0) { 324 | return result; 325 | } 326 | check_subproccess_alive(); 327 | 328 | if (child_running.total < child_proc_array.count && 329 | (result = start_all_processes()) != 0) 330 | { 331 | return result; 332 | } 333 | usleep(10*1000); 334 | } 335 | 336 | stop_all_processes(); 337 | delete_pid_file(pidfile); 338 | logInfo("file: "__FILE__", line: %d, %s exited normally", 339 | __LINE__, program); 340 | log_destroy(); 341 | return 0; 342 | } 343 | 344 | static void sigChildHandler(int sig) 345 | { 346 | } 347 | 348 | static void sigQuitHandler(int sig) 349 | { 350 | continue_flag = false; 351 | } 352 | 353 | static int setup_sig_handlers() 354 | { 355 | struct sigaction act; 356 | memset(&act, 0, sizeof(act)); 357 | sigemptyset(&act.sa_mask); 358 | 359 | act.sa_handler = sigChildHandler; 360 | if(sigaction(SIGCHLD, &act, NULL) < 0) { 361 | logCrit("file: "__FILE__", line: %d, " 362 | "call sigaction fail, errno: %d, error info: %s", 363 | __LINE__, errno, strerror(errno)); 364 | return errno; 365 | } 366 | 367 | act.sa_handler = SIG_IGN; 368 | if(sigaction(SIGPIPE, &act, NULL) < 0 || 369 | sigaction(SIGHUP, &act, NULL) < 0) 370 | { 371 | logCrit("file: "__FILE__", line: %d, " 372 | "call sigaction fail, errno: %d, error info: %s", 373 | __LINE__, errno, strerror(errno)); 374 | return errno; 375 | } 376 | 377 | act.sa_handler = sigQuitHandler; 378 | if(sigaction(SIGINT, &act, NULL) < 0 || 379 | sigaction(SIGTERM, &act, NULL) < 0 || 380 | sigaction(SIGQUIT, &act, NULL) < 0) 381 | { 382 | logCrit("file: "__FILE__", line: %d, " 383 | "call sigaction fail, errno: %d, error info: %s", 384 | __LINE__, errno, strerror(errno)); 385 | return errno; 386 | } 387 | 388 | return 0; 389 | } 390 | 391 | static inline CommandEntry *get_current_command_entry(ChildProcessInfo* proc) 392 | { 393 | return proc->commands.list + proc->commands.index; 394 | } 395 | 396 | static inline char *get_current_command(ChildProcessInfo* proc) 397 | { 398 | return proc->commands.list[proc->commands.index].command.cmd; 399 | } 400 | 401 | static inline CommandParams *get_next_command(ChildProcessInfo* proc) 402 | { 403 | if (proc->commands.count > 1 && proc->run_count > 0) { 404 | proc->commands.index++; 405 | if (proc->commands.index >= proc->commands.count) { 406 | proc->commands.index = 0; 407 | } 408 | } 409 | 410 | proc->run_count++; 411 | return &proc->commands.list[proc->commands.index].command; 412 | } 413 | 414 | static inline char *do_strdup(const char *str) 415 | { 416 | if (str == NULL) { 417 | return NULL; 418 | } else { 419 | return strdup(str); 420 | } 421 | } 422 | 423 | static int parse_args(int argc, char* argv[]) 424 | { 425 | int len; 426 | 427 | for (len = strlen(argv[0]) - 1; len >= 0; len --) { 428 | if (argv[0][len] == '/') { 429 | break; 430 | } 431 | } 432 | program = argv[0] + len + 1; 433 | 434 | if (argc >= 2) { 435 | configfile = argv[1]; 436 | } else { 437 | usage(argv[0]); 438 | return 1; 439 | } 440 | action = "start"; 441 | if (argc >= 3) { 442 | action = argv[2]; 443 | } 444 | 445 | if (strcmp(action, "start") == 0 || strcmp(action, "stop") == 0 446 | || strcmp(action, "restart") == 0) 447 | { 448 | return 0; 449 | } 450 | usage(argv[0]); 451 | return 1; 452 | } 453 | 454 | static int make_dir(const char* dirname) 455 | { 456 | char tpath[MAX_PATH_SIZE]; 457 | int i = 0, r = 0; 458 | 459 | snprintf(tpath, sizeof(tpath), "%s/", dirname); 460 | for (i = 1; tpath[i] != '\0'; i++) { 461 | if (tpath[i] == '/') { 462 | tpath[i] = '\0'; 463 | r = mkdir(tpath, 0777); 464 | if (r != 0 && errno != EEXIST) { 465 | logError("file: "__FILE__", line: %d, " 466 | "mkdir %s fail, errno: %d, error info: %s", 467 | __LINE__, tpath, errno, strerror(errno)); 468 | return errno; 469 | } 470 | tpath[i] = '/'; 471 | } 472 | } 473 | return 0; 474 | } 475 | 476 | static int check_alloc_command_array(struct command_array *commands, 477 | const int inc_count) 478 | { 479 | CommandEntry *list; 480 | int bytes; 481 | int alloc_size; 482 | if (commands->alloc > commands->count + inc_count) { 483 | return 0; 484 | } 485 | 486 | alloc_size = commands->alloc == 0 ? 4 : commands->alloc * 2; 487 | while (alloc_size < commands->count + inc_count) { 488 | alloc_size *= 2; 489 | } 490 | 491 | bytes = sizeof(CommandEntry) * alloc_size; 492 | list = (CommandEntry *)malloc(bytes); 493 | if (list == NULL) { 494 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 495 | __LINE__, bytes); 496 | return ENOMEM; 497 | } 498 | 499 | memset(list, 0, bytes); 500 | if (commands->count > 0) { 501 | memcpy(list, commands->list, sizeof(CommandEntry) * commands->count); 502 | } 503 | 504 | if (commands->list != NULL) { 505 | free(commands->list); 506 | } 507 | commands->alloc = alloc_size; 508 | commands->list = list; 509 | return 0; 510 | } 511 | 512 | static int process_info_cmp_pid(const void *p1, const void *p2) 513 | { 514 | return (*((ChildProcessInfo **)p1))->pid - (*((ChildProcessInfo **)p2))->pid; 515 | } 516 | 517 | static int schedule_task_func(void *args) 518 | { 519 | ChildProcessInfo *process; 520 | process = (ChildProcessInfo *)args; 521 | 522 | logInfo("file: "__FILE__", line: %d, run cron process%s: %s %s", 523 | __LINE__, get_current_command_entry(process)->command.run_by_sh ? 524 | "(run by sh -c)" : "", get_current_command(process), 525 | process->enable_access_log ? process->acclog : ""); 526 | if (start_process(process) == 0) { 527 | if (cron_proc_array.count > 1) { 528 | qsort(cron_proc_array.processes, cron_proc_array.count, 529 | sizeof(ChildProcessInfo *), process_info_cmp_pid); 530 | } 531 | } 532 | return 0; 533 | } 534 | 535 | static int force_restart_func(void *args) 536 | { 537 | ChildProcessInfo *process; 538 | process = (ChildProcessInfo *)args; 539 | 540 | logInfo("file: "__FILE__", line: %d, restart process%s: %s %s", 541 | __LINE__, get_current_command_entry(process)->command.run_by_sh ? 542 | "(run by sh -c)" : "", get_current_command(process), 543 | process->enable_access_log ? process->acclog : ""); 544 | if (!process->force_restart_flag) { 545 | common_blocked_queue_push(&force_restart_queue, process); 546 | } 547 | return 0; 548 | } 549 | 550 | static int start_health_check_threads() 551 | { 552 | ChildProcessInfo **child; 553 | ChildProcessInfo **end; 554 | CommandEntry *command_entry; 555 | pthread_attr_t thread_attr; 556 | pthread_t tid; 557 | int result; 558 | int count; 559 | 560 | result = init_pthread_attr(&thread_attr, CHECK_ALIVE_THREAD_STACK_SIZE); 561 | if (result != 0) { 562 | return result; 563 | } 564 | 565 | count = 0; 566 | end = child_proc_array.processes + child_proc_array.count; 567 | for (child=child_proc_array.processes; childcheck_alive_interval <= 0) { 569 | continue; 570 | } 571 | 572 | command_entry = get_current_command_entry(*child); 573 | if (!(command_entry->health_check.type == hc_type_exec || 574 | command_entry->health_check.type == hc_type_library)) 575 | { 576 | continue; 577 | } 578 | 579 | if ((result=pthread_create(&tid, &thread_attr, 580 | check_alive_entrance, *child)) != 0) 581 | { 582 | logError("file: "__FILE__", line: %d, " 583 | "create thread failed, errno: %d, error info: %s", 584 | __LINE__, result, strerror(result)); 585 | break; 586 | } 587 | count++; 588 | } 589 | 590 | if (result == 0 && count > 0) { 591 | logInfo("file: "__FILE__", line: %d, " 592 | "health check threads count: %d", 593 | __LINE__, count); 594 | } 595 | 596 | pthread_attr_destroy(&thread_attr); 597 | return result; 598 | } 599 | 600 | static ChildProcessInfo *malloc_process_entry(ChildProcessArray *processArray) 601 | { 602 | int bytes; 603 | ChildProcessInfo *process; 604 | 605 | if (processArray->count >= processArray->alloc_size) { 606 | if (processArray->alloc_size == 0) { 607 | processArray->alloc_size = 32; 608 | } else { 609 | processArray->alloc_size *= 2; 610 | } 611 | bytes = sizeof(ChildProcessInfo *) * processArray->alloc_size; 612 | processArray->processes = (ChildProcessInfo **)realloc( 613 | processArray->processes, bytes); 614 | if (processArray->processes == NULL) { 615 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 616 | __LINE__, bytes); 617 | return NULL; 618 | } 619 | memset(processArray->processes + processArray->count, 0, 620 | sizeof(ChildProcessInfo *) * (processArray->alloc_size - 621 | processArray->count)); 622 | } 623 | 624 | process = (ChildProcessInfo *)fast_mblock_alloc_object(&process_mblock); 625 | if (process != NULL) { 626 | memset(process, 0, sizeof(ChildProcessInfo)); 627 | processArray->processes[processArray->count++] = process; 628 | } 629 | return process; 630 | } 631 | 632 | static ChildProcessInfo *malloc_child_process_entry() 633 | { 634 | return malloc_process_entry(&child_proc_array); 635 | } 636 | 637 | static ChildProcessInfo *malloc_cron_process_entry() 638 | { 639 | return malloc_process_entry(&cron_proc_array); 640 | } 641 | 642 | static int check_alloc_schedule_entries(ScheduleArray *pSheduleArray, 643 | int *alloc_size, const int inc) 644 | { 645 | int bytes; 646 | if (pSheduleArray->count + inc > *alloc_size) { 647 | if (*alloc_size == 0) { 648 | *alloc_size = 64; 649 | } 650 | else { 651 | *alloc_size *= 2; 652 | } 653 | while (pSheduleArray->count + inc > *alloc_size) { 654 | *alloc_size *= 2; 655 | } 656 | 657 | bytes = sizeof(ScheduleEntry) * (*alloc_size); 658 | pSheduleArray->entries = (ScheduleEntry *)realloc(pSheduleArray->entries, bytes); 659 | if (pSheduleArray->entries == NULL) { 660 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 661 | __LINE__, bytes); 662 | return ENOMEM; 663 | } 664 | } 665 | 666 | return 0; 667 | } 668 | 669 | static int add_cron_shedule_entries() 670 | { 671 | ChildProcessInfo *cron_processes[MAX_CRON_PROCESS_PER_ENTRY]; 672 | ChildProcessInfo *process; 673 | CronEntry *pCronEntry; 674 | ScheduleEntry *pScheduleEntry; 675 | ScheduleArray shedule_array = {NULL, 0}; 676 | int alloc_size = 0; 677 | int i, k; 678 | int count; 679 | int result; 680 | 681 | if (cron_entry_count == 0) { 682 | return 0; 683 | } 684 | 685 | for (i=0; itime_base, pCronEntry->interval, 705 | schedule_task_func, process); 706 | shedule_array.count++; 707 | } 708 | } 709 | 710 | if (cron_proc_array.count > 0) { 711 | logInfo("cron processes count: %d", cron_proc_array.count); 712 | for (i = 0; i < cron_proc_array.count; i++) { 713 | if ((result=process_set_command_params(cron_proc_array.processes[i])) != 0) { 714 | return result; 715 | } 716 | } 717 | } 718 | 719 | if (shedule_array.count > 0 && (result=sched_add_entries(&shedule_array)) != 0) { 720 | return result; 721 | } 722 | 723 | free(shedule_array.entries); 724 | free(cron_entries); 725 | cron_entries = NULL; 726 | cron_entry_count = 0; 727 | cron_entry_alloc_size = 0; 728 | return 0; 729 | } 730 | 731 | static int add_force_restart_shedule_entries() 732 | { 733 | ChildProcessInfo **child; 734 | ChildProcessInfo **end; 735 | ScheduleEntry *pScheduleEntry; 736 | ScheduleArray shedule_array = {NULL, 0}; 737 | int result; 738 | int alloc_size = 0; 739 | 740 | end = child_proc_array.processes + child_proc_array.count; 741 | for (child=child_proc_array.processes; childforce_restart_interval <= 0) { 743 | continue; 744 | } 745 | 746 | if ((result=check_alloc_schedule_entries(&shedule_array, 747 | &alloc_size, 1)) != 0) 748 | { 749 | return result; 750 | } 751 | 752 | pScheduleEntry = shedule_array.entries + shedule_array.count; 753 | INIT_SCHEDULE_ENTRY((*pScheduleEntry), sched_generate_next_id(), 754 | 23, 59, 30, (*child)->force_restart_interval, 755 | force_restart_func, *child); 756 | shedule_array.count++; 757 | } 758 | 759 | if (shedule_array.count > 0 && (result=sched_add_entries(&shedule_array)) != 0) { 760 | return result; 761 | } 762 | 763 | free(shedule_array.entries); 764 | return 0; 765 | } 766 | 767 | static int add_cron_entry(ChildProcessInfo *process, 768 | const char *time_base, const int interval) 769 | { 770 | int bytes; 771 | int result; 772 | CronEntry *pEntry; 773 | 774 | if (cron_entry_count >= cron_entry_alloc_size) { 775 | if (cron_entry_alloc_size == 0) { 776 | cron_entry_alloc_size = 64; 777 | } 778 | else { 779 | cron_entry_alloc_size *= 2; 780 | } 781 | bytes = sizeof(CronEntry) * cron_entry_alloc_size; 782 | cron_entries = (CronEntry *)realloc(cron_entries, bytes); 783 | if (cron_entries == NULL) { 784 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 785 | __LINE__, bytes); 786 | return ENOMEM; 787 | } 788 | } 789 | 790 | pEntry = cron_entries + cron_entry_count; 791 | pEntry->interval = interval; 792 | result = get_time_item_from_str(time_base, "time_base", &pEntry->time_base, 0, 0); 793 | if (result != 0) { 794 | return result; 795 | } 796 | cron_entry_count++; 797 | return 0; 798 | } 799 | 800 | static inline bool is_run_by_sh(const char *cmd) 801 | { 802 | int cmd_len = strlen(cmd); 803 | if (strchr(cmd, '>') != NULL || strchr(cmd, '|') != NULL || 804 | cmd[cmd_len - 1] == '&') 805 | { 806 | return true; 807 | } 808 | 809 | return (cmd_len > 2 && *cmd == '(' && cmd[cmd_len - 1] == ')'); 810 | } 811 | 812 | static inline bool calc_run_by_sh(RunByShType run_by_sh, const char *cmd) 813 | { 814 | if (run_by_sh == rbst_auto) { 815 | return is_run_by_sh(cmd); 816 | } else { 817 | return run_by_sh == rbst_true; 818 | } 819 | } 820 | 821 | static int add_env(EnvArray *envs, const char *name, const char *value) 822 | { 823 | if (envs->alloc_size <= envs->count) { 824 | int bytes; 825 | 826 | if (envs->alloc_size == 0) { 827 | envs->alloc_size = 4; 828 | } else { 829 | envs->alloc_size *= 2; 830 | } 831 | bytes = sizeof(EnvEntry) * envs->alloc_size; 832 | envs->entries = (EnvEntry *)realloc(envs->entries, bytes); 833 | if (envs->entries == NULL) { 834 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 835 | __LINE__, bytes); 836 | return ENOMEM; 837 | } 838 | } 839 | 840 | envs->entries[envs->count].name = strdup(name); 841 | envs->entries[envs->count].value = strdup(value); 842 | envs->count++; 843 | return 0; 844 | } 845 | 846 | static int ini_section_load(const int index, const HashData *data, void *args) 847 | { 848 | IniSection *pSection; 849 | IniItem *pItem; 850 | IniItem *pItemEnd; 851 | char section_name[256]; 852 | int section_len; 853 | EnvArray envs = {NULL, 0, 0}; 854 | int result; 855 | int i; 856 | 857 | pSection = (IniSection *)data->value; 858 | if (pSection == NULL) { 859 | return 0; 860 | } 861 | 862 | section_len = data->key_len; 863 | if (section_len >= sizeof(section_name)) { 864 | section_len = sizeof(section_name) - 1; 865 | } 866 | 867 | memcpy(section_name, data->key, section_len); 868 | *(section_name + section_len) = '\0'; 869 | 870 | if (pSection->count > 0) { 871 | const char* cmd = NULL; 872 | char *type; 873 | char *mode; 874 | char *check_alive_command; 875 | char *time_base; 876 | int cnum = subprocess_number; 877 | int new_restart_interval_ms = restart_interval_ms; 878 | int new_check_alive_interval = check_alive_interval; 879 | int new_check_alive_retry_threshold = check_alive_retry_threshold; 880 | int new_takeover_stdout = takeover_stdout; 881 | int new_takeover_stderr = takeover_stderr; 882 | int force_restart_interval = 0; 883 | RunByShType run_by_sh = rbst_auto; 884 | int repeat_interval; 885 | bool enableAccessLog = enable_access_log; 886 | 887 | 888 | type = NULL; 889 | mode = NULL; 890 | check_alive_command = NULL; 891 | time_base = NULL; 892 | repeat_interval = 86400; 893 | pItemEnd = pSection->items + pSection->count; 894 | for (pItem=pSection->items; pItemname, "subprocess_command") == 0) { 896 | cmd = pItem->value; 897 | } else if (strcmp(pItem->name, "subprocess_number") == 0) { 898 | cnum = atoi(pItem->value); 899 | } else if (strcmp(pItem->name, "restart_interval_ms") == 0) { 900 | new_restart_interval_ms = atoi(pItem->value); 901 | } else if (strcmp(pItem->name, "check_alive_interval") == 0) { 902 | new_check_alive_interval = atoi(pItem->value); 903 | } else if (strcmp(pItem->name, "force_restart_interval") == 0) { 904 | force_restart_interval = atoi(pItem->value); 905 | } else if (strcmp(pItem->name, "check_alive_retry_threshold") == 0) { 906 | new_check_alive_retry_threshold = atoi(pItem->value); 907 | if (new_check_alive_retry_threshold <= 0) { 908 | new_check_alive_retry_threshold = DEFAULT_CHECK_ALIVE_RETRY_THRESHOLD; 909 | } 910 | } else if (strcmp(pItem->name, "takeover_stdout") == 0) { 911 | new_takeover_stdout = FAST_INI_STRING_IS_TRUE(pItem->value); 912 | } else if (strcmp(pItem->name, "takeover_stderr") == 0) { 913 | new_takeover_stderr = FAST_INI_STRING_IS_TRUE(pItem->value); 914 | } else if (strcmp(pItem->name, "check_alive_command") == 0) { 915 | check_alive_command = pItem->value; 916 | } else if (strcmp(pItem->name, "type") == 0) { 917 | type = pItem->value; 918 | } else if (strcmp(pItem->name, "run_by_sh") == 0) { 919 | if (strcasecmp(pItem->value, "auto") == 0) { 920 | run_by_sh = rbst_auto; 921 | } else { 922 | run_by_sh = FAST_INI_STRING_IS_TRUE(pItem->value) ? 923 | rbst_true : rbst_false; 924 | } 925 | } else if (strcmp(pItem->name, "mode") == 0) { 926 | mode = pItem->value; 927 | } else if (strcmp(pItem->name, "time_base") == 0) { 928 | time_base = pItem->value; 929 | } else if (strcmp(pItem->name, "repeat_interval") == 0) { 930 | repeat_interval = atoi(pItem->value); 931 | if (repeat_interval <= 0) { 932 | repeat_interval = 86400; 933 | logWarning("file: "__FILE__", line: %d, " 934 | "invalid repeat_interval for section %s, " 935 | "set to %d", __LINE__, 936 | section_name, repeat_interval); 937 | } 938 | } else if (strcmp(pItem->name, "enable_access_log") == 0) { 939 | enableAccessLog = FAST_INI_STRING_IS_TRUE(pItem->value); 940 | } else if (strcmp(pItem->name, "set_env") == 0) { 941 | char buff[FAST_INI_ITEM_VALUE_SIZE]; 942 | char *cols[2]; 943 | snprintf(buff, sizeof(buff), "%s", pItem->value); 944 | if (splitEx(buff, '=', cols, 2) != 2) { 945 | logError("file: "__FILE__", line: %d, " 946 | "invalid env pair: %s, correct format: name=value", 947 | __LINE__, pItem->value); 948 | return EINVAL; 949 | } 950 | if ((result=add_env(&envs, fc_trim(cols[0]), fc_trim(cols[1]))) != 0) { 951 | return result; 952 | } 953 | } 954 | } 955 | 956 | if (cmd == NULL || *cmd == '\0') { 957 | logError("file: "__FILE__", line: %d, section %s, " 958 | "expect subprocess_command", __LINE__, section_name); 959 | return EINVAL; 960 | } 961 | 962 | snprintf(logfiles_all[logfiles_count], MAX_PATH_SIZE, 963 | "%s/%s-%s.log", logpath, service_name, section_name); 964 | if (enableAccessLog) { 965 | snprintf(acclogs_all[logfiles_count], MAX_PATH_SIZE, 966 | "%s/%s-%s-access.log", logpath, service_name, section_name); 967 | } else { 968 | *acclogs_all[logfiles_count] = '\0'; 969 | } 970 | if (type != NULL && strlen(type) >= 4 && strncmp(type, "cron", 4) == 0) { 971 | ChildProcessInfo* cpro; 972 | cpro = malloc_cron_process_entry(); 973 | if (cpro == NULL) { 974 | return ENOMEM; 975 | } 976 | if (check_alloc_command_array(&cpro->commands, 1) != 0) { 977 | return ENOMEM; 978 | } 979 | cpro->commands.list[0].command.run_by_sh = calc_run_by_sh( 980 | run_by_sh, cmd); 981 | cpro->commands.list[0].command.cmd = strdup(cmd); 982 | cpro->commands.count = 1; 983 | cpro->logfile = strdup(logfiles_all[logfiles_count]); 984 | cpro->acclog = strdup(acclogs_all[logfiles_count]); 985 | cpro->mode = MODE_ALL; 986 | cpro->enable_access_log = enableAccessLog; 987 | cpro->takeover_stdout = new_takeover_stdout; 988 | cpro->takeover_stderr = new_takeover_stderr; 989 | cpro->envs = envs; 990 | return add_cron_entry(cpro, time_base, repeat_interval); 991 | } 992 | 993 | if (cnum > 0 && new_restart_interval_ms >= 0) { 994 | for (i = 0; i < cnum; i ++) { 995 | ChildProcessInfo* cpro; 996 | cpro = malloc_child_process_entry(); 997 | if (cpro == NULL) { 998 | return ENOMEM; 999 | } 1000 | cpro->pid = 0; 1001 | if (check_alloc_command_array(&cpro->commands, 1) != 0) { 1002 | return ENOMEM; 1003 | } 1004 | if (mode != NULL && strcmp(mode, "failover") == 0) { 1005 | cpro->mode = MODE_FAILOVER; 1006 | } else { 1007 | cpro->mode = MODE_ALL; 1008 | } 1009 | cpro->commands.list[0].command.run_by_sh = calc_run_by_sh( 1010 | run_by_sh, cmd); 1011 | cpro->commands.list[0].command.cmd = strdup(cmd); 1012 | 1013 | if (new_check_alive_interval > 0) { 1014 | if (check_alive_command != NULL) { 1015 | cpro->commands.list[0].health_check.command.cmd = 1016 | strdup(check_alive_command); 1017 | } 1018 | } 1019 | if (force_restart_interval > 0 && force_restart_interval < 1020 | new_restart_interval_ms / 1000) 1021 | { 1022 | logWarning("file: "__FILE__", line: %d, " 1023 | "force_restart_interval: %d < restart_interval: %d, " 1024 | " set to restart_interval", __LINE__, 1025 | force_restart_interval, new_restart_interval_ms / 1000); 1026 | force_restart_interval = new_restart_interval_ms / 1000; 1027 | } 1028 | 1029 | cpro->commands.count = 1; 1030 | cpro->logfile = logfiles_all[logfiles_count]; 1031 | cpro->acclog = acclogs_all[logfiles_count]; 1032 | cpro->restart_interval_ms = new_restart_interval_ms; 1033 | cpro->check_alive_interval = new_check_alive_interval; 1034 | cpro->check_alive_retry_threshold = new_check_alive_retry_threshold; 1035 | cpro->force_restart_interval = force_restart_interval; 1036 | cpro->enable_access_log = enableAccessLog; 1037 | cpro->takeover_stdout = new_takeover_stdout; 1038 | cpro->takeover_stderr = new_takeover_stderr; 1039 | cpro->envs = envs; 1040 | } 1041 | logfiles_count++; 1042 | } else { 1043 | logError("file: "__FILE__", line: %d, invalid config " 1044 | "for section %s subprocess_command: %s" 1045 | " subprocess_number: %d restart_interval_ms %d", 1046 | __LINE__, section_name, cmd, cnum, new_restart_interval_ms); 1047 | return EINVAL; 1048 | } 1049 | } 1050 | 1051 | return 0; 1052 | } 1053 | 1054 | static int expand_params(char *str, char *out_buff, const int buff_size, 1055 | char **params, const int max_count) 1056 | { 1057 | char *pStart; 1058 | char *pMid; 1059 | char *p; 1060 | int start; 1061 | int end; 1062 | int len; 1063 | int count; 1064 | int i; 1065 | 1066 | pStart = str + 1; 1067 | pMid = strchr(pStart, '-'); 1068 | if (pMid == NULL) { 1069 | return 0; 1070 | } 1071 | 1072 | while (*pStart == ' ' || *pStart == '\t') { 1073 | pStart++; 1074 | } 1075 | p = pStart; 1076 | while (*p >= '0' && *p <= '9') { 1077 | p++; 1078 | } 1079 | while (*p == ' ' || *p == '\t') { 1080 | p++; 1081 | } 1082 | if (p != pMid) { 1083 | return 0; 1084 | } 1085 | start = atoi(pStart); 1086 | 1087 | pStart = pMid + 1; 1088 | while (*pStart == ' ' || *pStart == '\t') { 1089 | pStart++; 1090 | } 1091 | p = pStart; 1092 | while (*p >= '0' && *p <= '9') { 1093 | p++; 1094 | } 1095 | while (*p == ' ' || *p == '\t') { 1096 | p++; 1097 | } 1098 | if (*(p + 1) != '\0') { 1099 | return 0; 1100 | } 1101 | end = atoi(pStart); 1102 | 1103 | if ((end - start) + 1 > max_count) { 1104 | logError("file: "__FILE__", line: %d, item count: %d " 1105 | "exceeds max: %d", __LINE__, 1106 | end - start + 1, max_count); 1107 | return 0; 1108 | } 1109 | 1110 | count = 0; 1111 | len = 0; 1112 | for (i=start; i<=end; i++) { 1113 | if (len + 16 > buff_size) { 1114 | logError("file: "__FILE__", line: %d, expect buffer " 1115 | "size: %d exceed: %d", __LINE__, len + 16, buff_size); 1116 | return 0; 1117 | } 1118 | 1119 | p = out_buff + len; 1120 | params[count++] = p; 1121 | len += sprintf(p, "%d", i) + 1; 1122 | } 1123 | return count; 1124 | } 1125 | 1126 | static int get_params(char *str, char *out_buff, const int buff_size, 1127 | char **params, const int max_count) 1128 | { 1129 | int count; 1130 | if (*str == '[' && *(str + strlen(str) - 1) == ']') { 1131 | count = expand_params(str, out_buff, buff_size, 1132 | params, max_count); 1133 | if (count > 0) { 1134 | return count; 1135 | } 1136 | } 1137 | 1138 | return splitEx(str, ',', params, max_count); 1139 | } 1140 | 1141 | static int expand_cmd(ChildProcessInfo *cpro, 1142 | malloc_process_func malloc_func, ChildProcessInfo **processes, 1143 | int *pnum, const int max_count) 1144 | { 1145 | #define MAX_PARAMS_COUNT 256 1146 | 1147 | char *cmd; 1148 | char *new_cmd; 1149 | char *pdollar; 1150 | char pword[64]; 1151 | char *pworde; 1152 | char *confArgs; 1153 | char *tail; 1154 | char args[MAX_PATH_SIZE]; 1155 | char *params[MAX_PARAMS_COUNT]; 1156 | char out_buff[1024]; 1157 | int i; 1158 | int count; 1159 | int cmd_len; 1160 | int word_len; 1161 | int front_len; 1162 | 1163 | cmd = cpro->commands.list[0].command.cmd; 1164 | cmd_len = strlen(cmd); 1165 | pdollar = cmd; 1166 | while (1) { 1167 | pdollar = (char*)strchr(pdollar, '$'); 1168 | if (pdollar == NULL) { //no need to expand 1169 | if (processes != NULL) { 1170 | processes[0] = cpro; 1171 | *pnum = 1; 1172 | } 1173 | return 0; 1174 | } 1175 | 1176 | pworde = pdollar + 1; 1177 | while (*pworde != '\0' && ((*pworde >= 'a' && *pworde <= 'z') || 1178 | (*pworde >= 'A' && *pworde <= 'Z') || 1179 | (*pworde >= '0' && *pworde <= '9') || 1180 | (*pworde >= '_') || (*pworde == '-') || 1181 | (*pworde >= '.'))) 1182 | { 1183 | pworde++; 1184 | } 1185 | 1186 | tail = pworde; 1187 | word_len = pworde - (pdollar + 1); 1188 | if (word_len >= sizeof(pword)) { 1189 | logError("file: "__FILE__", line: %d, key length " 1190 | "too long, exceeds %d, key: %.*s. in cmd: %s", 1191 | __LINE__, (int)sizeof(pword), word_len, 1192 | pdollar + 1, cmd); 1193 | pdollar = pworde; 1194 | continue; 1195 | } 1196 | 1197 | sprintf(pword, "%.*s", word_len, pdollar + 1); 1198 | confArgs = iniGetStrValue(NULL, pword, iniContext); 1199 | if (confArgs == NULL) { 1200 | logWarning("file: "__FILE__", line: %d, no conf word for " 1201 | "%s in global section. in cmd: %s", 1202 | __LINE__, pword, cmd); 1203 | pdollar = pworde; 1204 | continue; 1205 | } 1206 | 1207 | break; 1208 | } 1209 | 1210 | if ((int)strlen(confArgs) >= (int)sizeof(args)) { 1211 | logError("file: "__FILE__", line: %d, the value of " 1212 | "%s in global section is too long", 1213 | __LINE__, pword); 1214 | return ENOSPC; 1215 | } 1216 | 1217 | strcpy(args, confArgs); 1218 | count = get_params(args, out_buff, sizeof(out_buff), 1219 | params, MAX_PARAMS_COUNT); 1220 | 1221 | front_len = pdollar - cmd; 1222 | if (cpro->mode == MODE_ALL) { 1223 | for (i=1; icommands, 0, sizeof(lpro->commands)); 1232 | if (check_alloc_command_array(&lpro->commands, 1) != 0) { 1233 | return ENOMEM; 1234 | } 1235 | lpro->commands.list[0].command.cmd = malloc(cmd_len + strlen(params[i]) + 1); 1236 | if (lpro->commands.list[0].command.cmd == NULL) { 1237 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 1238 | __LINE__, (int)(cmd_len + strlen(params[i])) + 1); 1239 | return ENOMEM; 1240 | } 1241 | lpro->commands.count = 1; 1242 | memcpy(lpro->commands.list[0].command.cmd, cmd, front_len); 1243 | sprintf(lpro->commands.list[0].command.cmd + front_len, "%s%s", params[i], tail); 1244 | lpro->commands.list[0].health_check.command.cmd = do_strdup( 1245 | cpro->commands.list[0].health_check.command.cmd); 1246 | 1247 | if (processes != NULL) { 1248 | if (i < max_count) { 1249 | processes[i] = lpro; 1250 | } else { 1251 | logWarning("file: "__FILE__", line: %d, " 1252 | "exceeds max count: %d", 1253 | __LINE__, max_count); 1254 | } 1255 | } 1256 | } 1257 | 1258 | if (processes != NULL) { 1259 | processes[0] = cpro; 1260 | *pnum = count; 1261 | } 1262 | } else { //failover 1263 | if (check_alloc_command_array(&cpro->commands, count) != 0) { 1264 | return ENOMEM; 1265 | } 1266 | for (i=1; icommands.list[i].command.run_by_sh = cpro->commands.list[0].command.run_by_sh; 1268 | cpro->commands.list[i].command.cmd = malloc(cmd_len + strlen(params[i]) + 1); 1269 | if (cpro->commands.list[i].command.cmd == NULL) { 1270 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 1271 | __LINE__, (int)(cmd_len + strlen(params[i])) + 1); 1272 | return ENOMEM; 1273 | } 1274 | memcpy(cpro->commands.list[i].command.cmd, cmd, front_len); 1275 | sprintf(cpro->commands.list[i].command.cmd + front_len, "%s%s", params[i], tail); 1276 | cpro->commands.list[i].health_check.command.cmd = do_strdup( 1277 | cpro->commands.list[0].health_check.command.cmd); 1278 | } 1279 | cpro->commands.count = count; 1280 | if (processes != NULL) { 1281 | processes[0] = cpro; 1282 | *pnum = 1; 1283 | } 1284 | } 1285 | 1286 | new_cmd = malloc(cmd_len + strlen(params[0]) + 1); 1287 | if (new_cmd == NULL) { 1288 | logError("file: "__FILE__", line: %d, malloc %d bytes fail", 1289 | __LINE__, (int)(cmd_len + strlen(params[0])) + 1); 1290 | return ENOMEM; 1291 | } 1292 | memcpy(new_cmd, cmd, front_len); 1293 | sprintf(new_cmd + front_len, "%s%s", params[0], tail); 1294 | free(cpro->commands.list[0].command.cmd); 1295 | cpro->commands.list[0].command.cmd = new_cmd; 1296 | return 0; 1297 | } 1298 | 1299 | static int expand_child_cmd(ChildProcessInfo *cpro) 1300 | { 1301 | return expand_cmd(cpro, malloc_child_process_entry, NULL, NULL, 0); 1302 | } 1303 | 1304 | static char *get_command_param(char **str, char *end) 1305 | { 1306 | char *p; 1307 | char *start; 1308 | char quote; 1309 | 1310 | p = *str; 1311 | if (!(*p == '\'' || *p == '"')) { 1312 | return p; 1313 | } 1314 | 1315 | quote = *p; 1316 | start = ++p; 1317 | while (p < end && *p != quote) { 1318 | p++; 1319 | } 1320 | if (p == end) { 1321 | logError("file: "__FILE__", line: %d, " 1322 | "expect quote char: %c!", 1323 | __LINE__, quote); 1324 | return NULL; 1325 | } 1326 | if (p + 1 < end && *(p + 1) != ' ') { 1327 | logError("file: "__FILE__", line: %d, " 1328 | "expect space char, but char %c occurs!", 1329 | __LINE__, *(p + 1)); 1330 | return NULL; 1331 | } 1332 | 1333 | *p = '\0'; 1334 | *str = p + 1; 1335 | return start; 1336 | } 1337 | 1338 | static int split_command_params(char *cmd, char **argv, int *argc, 1339 | const int max_count) 1340 | { 1341 | int count; 1342 | char *p; 1343 | char *end; 1344 | 1345 | count = 0; 1346 | p = cmd; 1347 | end = cmd + strlen(cmd); 1348 | argv[count] = get_command_param(&p, end); 1349 | if (argv[count] == NULL) { 1350 | return EINVAL; 1351 | } 1352 | count++; 1353 | 1354 | while ((p = strchr(p, ' ')) != NULL) { 1355 | *p++ = '\0'; 1356 | while (*p == ' ') p++; 1357 | if (*p != '\0') { 1358 | if (count < max_count) { 1359 | argv[count] = get_command_param(&p, end); 1360 | if (argv[count] == NULL) { 1361 | return EINVAL; 1362 | } 1363 | count++; 1364 | } else { 1365 | logError("file: "__FILE__", line: %d, " 1366 | "too many parameters exceeds %d!", 1367 | __LINE__, max_count); 1368 | return ENAMETOOLONG; 1369 | } 1370 | } 1371 | } 1372 | 1373 | *argc = count; 1374 | return 0; 1375 | } 1376 | 1377 | 1378 | static int set_command_params(CommandParams *command, const bool enable_access_log, 1379 | char *acclog) 1380 | { 1381 | char *argv[MAX_PARAM_COUNT + 2]; 1382 | char *cmd; 1383 | int result; 1384 | int argc; 1385 | int bytes; 1386 | 1387 | argc = 0; 1388 | if (command->run_by_sh) { 1389 | argv[argc++] = strdup("/bin/sh"); 1390 | argv[argc++] = "-c"; 1391 | argv[argc++] = command->cmd; 1392 | } else { 1393 | cmd = strdup(command->cmd); 1394 | result = split_command_params(cmd, argv, &argc, MAX_PARAM_COUNT); 1395 | if (result != 0) { 1396 | free(cmd); 1397 | return result; 1398 | } 1399 | 1400 | if (enable_access_log) { 1401 | argv[argc++] = acclog; 1402 | } 1403 | } 1404 | argv[argc++] = NULL; 1405 | 1406 | bytes = sizeof(char *) * argc; 1407 | command->argv = (char **)malloc(bytes); 1408 | if (command->argv == NULL) { 1409 | logError("file: "__FILE__", line: %d, " 1410 | "malloc %d bytes fail", __LINE__, bytes); 1411 | return ENOMEM; 1412 | } 1413 | 1414 | command->argc = argc; 1415 | memcpy(command->argv, argv, bytes); 1416 | return 0; 1417 | } 1418 | 1419 | static int replace_check_alive_command(CommandEntry *entry) 1420 | { 1421 | char *src; 1422 | char *dest; 1423 | char *start; 1424 | char *p; 1425 | char *end; 1426 | char *new_cmd; 1427 | int result; 1428 | int bytes; 1429 | int num_len; 1430 | int param_len; 1431 | int n; 1432 | int argc; 1433 | char *buff; 1434 | char *params[MAX_PARAM_COUNT]; 1435 | char **argv; 1436 | char num[4]; 1437 | 1438 | bytes = strlen(entry->command.cmd) + strlen(entry->health_check.command.cmd); 1439 | new_cmd = (char *)malloc(bytes); 1440 | if (new_cmd == NULL) { 1441 | logError("file: "__FILE__", line: %d, " 1442 | "malloc %d bytes fail", __LINE__, bytes); 1443 | return ENOMEM; 1444 | } 1445 | 1446 | if (entry->command.run_by_sh) { 1447 | buff = strdup(entry->command.cmd); 1448 | if (buff == NULL) { 1449 | logError("file: "__FILE__", line: %d, " 1450 | "malloc %d bytes fail", __LINE__, 1451 | (int)strlen(entry->command.cmd)); 1452 | return ENOMEM; 1453 | } 1454 | result = split_command_params(buff, params, &argc, MAX_PARAM_COUNT); 1455 | if (result != 0) { 1456 | return result; 1457 | } 1458 | argv = params; 1459 | } else { 1460 | buff = NULL; 1461 | argc = entry->command.argc; 1462 | argv = entry->command.argv; 1463 | } 1464 | 1465 | dest = new_cmd; 1466 | src = entry->health_check.command.cmd; 1467 | end = entry->health_check.command.cmd + strlen(entry->health_check.command.cmd); 1468 | while (src < end) { 1469 | if (*src != '$') { 1470 | *dest++ = *src++; 1471 | continue; 1472 | } 1473 | 1474 | start = p = src + 1; 1475 | while (p < end && (*p >= '0' && *p <= '9')) { 1476 | p++; 1477 | } 1478 | 1479 | num_len = p - start; 1480 | if (num_len == 0) { 1481 | *dest++ = *src++; 1482 | continue; 1483 | } 1484 | 1485 | if (num_len >= (int)sizeof(num)) { 1486 | logError("file: "__FILE__", line: %d, " 1487 | "group number %.*s is too large", 1488 | __LINE__, num_len, start); 1489 | return ENAMETOOLONG; 1490 | } 1491 | memcpy(num, start, num_len); 1492 | *(num + num_len) = '\0'; 1493 | n = atoi(num); 1494 | if (n < 1 || n >= argc) { 1495 | logError("file: "__FILE__", line: %d, " 1496 | "group number %d is invalid", 1497 | __LINE__, n); 1498 | return ENAMETOOLONG; 1499 | } 1500 | 1501 | param_len = strlen(argv[n]); 1502 | memcpy(dest, argv[n], param_len); 1503 | dest += param_len; 1504 | src = p; 1505 | } 1506 | 1507 | if (buff != NULL) { 1508 | free(buff); 1509 | } 1510 | *dest = '\0'; 1511 | free(entry->health_check.command.cmd); 1512 | entry->health_check.command.cmd = new_cmd; 1513 | return 0; 1514 | } 1515 | 1516 | static int parse_check_alive_command(ChildProcessInfo* cpro) 1517 | { 1518 | int i; 1519 | int result; 1520 | struct health_check_entry *health_check; 1521 | 1522 | for (i=0; icommands.count; i++) { 1523 | health_check = &cpro->commands.list[i].health_check; 1524 | if (health_check->command.cmd == NULL) { 1525 | health_check->type = hc_type_kill; 1526 | continue; 1527 | } 1528 | 1529 | if (strchr(health_check->command.cmd, '$') != NULL) { 1530 | result = replace_check_alive_command(cpro->commands.list + i); 1531 | if (result != 0) { 1532 | return result; 1533 | } 1534 | } 1535 | 1536 | result = set_command_params(&health_check->command, false, NULL); 1537 | if (result != 0) { 1538 | return result; 1539 | } 1540 | 1541 | if (ends_with(health_check->command.argv[0], ".so")) { 1542 | void *handle; 1543 | health_check->type = hc_type_library; 1544 | handle = dlopen(health_check->command.argv[0], RTLD_LAZY); 1545 | if (handle == NULL) { 1546 | logError("file: "__FILE__", line: %d, load library %s fail, " 1547 | "error info: %s", __LINE__, health_check->command.argv[0], 1548 | dlerror()); 1549 | return EFAULT; 1550 | } 1551 | 1552 | health_check->func = (health_check_func)dlsym(handle, "check_alive"); 1553 | if (health_check->func == NULL) { 1554 | logError("file: "__FILE__", line: %d, load function check_alive " 1555 | "in library %s fail, error info: %s", __LINE__, 1556 | health_check->command.argv[0], dlerror()); 1557 | return EFAULT; 1558 | } 1559 | } else { 1560 | health_check->type = hc_type_exec; 1561 | } 1562 | 1563 | logDebug("cmd: %s, health check cmd: %s, check type: %d", 1564 | cpro->commands.list[i].command.cmd, health_check->command.cmd, 1565 | health_check->type); 1566 | } 1567 | 1568 | return 0; 1569 | } 1570 | 1571 | static int parse_check_alive_commands() 1572 | { 1573 | int result; 1574 | int i; 1575 | for (i = 0; i < child_proc_array.count; i++) { 1576 | ChildProcessInfo* child = child_proc_array.processes[i]; 1577 | if (child->check_alive_interval > 0) { 1578 | result = parse_check_alive_command(child); 1579 | if (result != 0) { 1580 | return result; 1581 | } 1582 | } 1583 | } 1584 | 1585 | return 0; 1586 | } 1587 | 1588 | static int process_set_command_params(ChildProcessInfo* cpro) 1589 | { 1590 | int i; 1591 | int result; 1592 | 1593 | for (i=0; icommands.count; i++) { 1594 | result = set_command_params(&cpro->commands.list[i].command, 1595 | cpro->enable_access_log, cpro->acclog); 1596 | if (result != 0) { 1597 | return result; 1598 | } 1599 | } 1600 | 1601 | return 0; 1602 | } 1603 | 1604 | static int load_from_conf_file(const char* filename) 1605 | { 1606 | int result; 1607 | int i; 1608 | const char* p; 1609 | 1610 | memset(iniContext, 0, sizeof(IniContext)); 1611 | result = iniLoadFromFileEx(filename, iniContext, 1612 | FAST_INI_ANNOTATION_WITH_BUILTIN, 1613 | NULL, 0, FAST_INI_FLAGS_SHELL_EXECUTE); 1614 | if (result != 0) { 1615 | logError("file: "__FILE__", line: %d, load conf file %s fail, " 1616 | "ret code: %d", __LINE__, filename, result); 1617 | return result; 1618 | } 1619 | p = iniGetStrValue(NULL, "run_by_group", iniContext); 1620 | if (p) { 1621 | strcpy(run_by_group, p); 1622 | } 1623 | p = iniGetStrValue(NULL, "run_by_user", iniContext); 1624 | if (p) { 1625 | strcpy(run_by_user, p); 1626 | } 1627 | load_log_level(iniContext); 1628 | 1629 | p = iniGetStrValue(NULL, "base_path", iniContext); 1630 | if (p == NULL || p[0] == '\0') { 1631 | logError("file: "__FILE__", line: %d, base_path should be set", 1632 | __LINE__); 1633 | return EINVAL; 1634 | } else { 1635 | strcpy(base_path, p); 1636 | } 1637 | p = iniGetStrValue(NULL, "service_name", iniContext); 1638 | if (p == NULL || p[0] == '\0') { 1639 | logError("file: "__FILE__", line: %d, " 1640 | "service_name should be set in config", __LINE__); 1641 | return EINVAL; 1642 | } else { 1643 | strcpy(service_name, p); 1644 | } 1645 | snprintf(pidfile, sizeof pidfile, "%s/watchd-%s.pid", base_path, service_name); 1646 | snprintf(logpath, sizeof logpath, "%s/logs", base_path); 1647 | snprintf(logfile, sizeof logfile, "%s/watchd-%s.log", logpath, service_name); 1648 | 1649 | log_file_keep_days = iniGetIntValue(NULL, "log_file_keep_days", iniContext, 0); 1650 | if (log_file_keep_days < 0) { 1651 | log_file_keep_days = 0; 1652 | } 1653 | 1654 | subprocess_number = iniGetIntValue(NULL, "subprocess_number", iniContext, 1); 1655 | if (subprocess_number <= 0) { 1656 | subprocess_number = 1; 1657 | } 1658 | 1659 | wait_subprocess_ms = iniGetIntValue(NULL, "wait_subprocess_ms", 1660 | iniContext, DEFAULT_WAIT_SUBPROCESS); 1661 | if (wait_subprocess_ms <= 0) { 1662 | wait_subprocess_ms = DEFAULT_WAIT_SUBPROCESS; 1663 | } 1664 | 1665 | restart_interval_ms = iniGetIntValue(NULL, "restart_interval_ms", 1666 | iniContext, DEFAULT_RESTART_INTERVAL); 1667 | if (restart_interval_ms < 0) { 1668 | restart_interval_ms = DEFAULT_RESTART_INTERVAL; 1669 | } 1670 | 1671 | check_alive_interval = iniGetIntValue(NULL, "check_alive_interval", 1672 | iniContext, DEFAULT_CHECK_ALIVE_INTERVAL); 1673 | if (check_alive_interval < 0) { 1674 | check_alive_interval = DEFAULT_CHECK_ALIVE_INTERVAL; 1675 | } 1676 | 1677 | check_alive_retry_threshold = iniGetIntValue(NULL, "check_alive_retry_threshold", 1678 | iniContext, DEFAULT_CHECK_ALIVE_RETRY_THRESHOLD); 1679 | if (check_alive_retry_threshold <= 0) { 1680 | check_alive_retry_threshold = DEFAULT_CHECK_ALIVE_RETRY_THRESHOLD; 1681 | } 1682 | enable_access_log = iniGetBoolValue(NULL, "enable_access_log", iniContext, false); 1683 | takeover_stdout = iniGetBoolValue(NULL, "takeover_stdout", iniContext, true); 1684 | takeover_stderr = iniGetBoolValue(NULL, "takeover_stderr", iniContext, true); 1685 | 1686 | logfiles_all = malloc(MAX_PATH_SIZE * iniContext->sections.item_count); 1687 | acclogs_all = malloc(MAX_PATH_SIZE * iniContext->sections.item_count); 1688 | if ((result=fc_hash_walk(&iniContext->sections, ini_section_load, NULL)) != 0) { 1689 | return result; 1690 | } 1691 | 1692 | for (i = child_proc_array.count-1; i >= 0; i--) { 1693 | if ((result=expand_child_cmd(child_proc_array.processes[i])) != 0) { 1694 | return result; 1695 | } 1696 | } 1697 | 1698 | for (i = 0; i < child_proc_array.count; i++) { 1699 | if ((result=process_set_command_params(child_proc_array.processes[i])) != 0) { 1700 | return result; 1701 | } 1702 | } 1703 | 1704 | return parse_check_alive_commands(); 1705 | } 1706 | 1707 | static int setup_schedule_tasks() 1708 | { 1709 | #define SCHEDULE_ENTRIES_COUNT 2 1710 | 1711 | ScheduleEntry scheduleEntries[SCHEDULE_ENTRIES_COUNT]; 1712 | ScheduleArray scheduleArray; 1713 | ScheduleEntry *pEntry; 1714 | 1715 | pEntry = scheduleEntries; 1716 | memset(scheduleEntries, 0, sizeof(scheduleEntries)); 1717 | 1718 | INIT_SCHEDULE_ENTRY((*pEntry), sched_generate_next_id(), 1719 | 0, 0, 0, 86400, rotate_logs, NULL); 1720 | pEntry++; 1721 | 1722 | scheduleArray.entries = scheduleEntries; 1723 | scheduleArray.count = pEntry - scheduleEntries; 1724 | return sched_start(&scheduleArray, &schedule_tid, 1725 | 64 * 1024, (bool * volatile)&continue_flag); 1726 | } 1727 | 1728 | static inline void do_inc_child_running(ChildProcessInfo *child, const int inc) 1729 | { 1730 | child_running.total += inc; 1731 | if (is_standalone(child)) { 1732 | child_running.standalone += inc; 1733 | } 1734 | } 1735 | 1736 | #define inc_child_running(child) do_inc_child_running(child, 1) 1737 | 1738 | static inline void dec_child_running(ChildProcessInfo *child) 1739 | { 1740 | do_inc_child_running(child, -1); 1741 | if (child->force_restart_flag) { 1742 | child->force_restart_flag = false; 1743 | child_running.force_restart--; 1744 | } 1745 | } 1746 | 1747 | static int update_process(int pid, const int status) 1748 | { 1749 | ChildProcessInfo target; 1750 | ChildProcessInfo *pTarget; 1751 | ChildProcessInfo** found = NULL; 1752 | 1753 | target.pid = pid; 1754 | pTarget = ⌖ 1755 | found = (ChildProcessInfo **)bsearch(&pTarget, child_proc_array.processes, 1756 | child_proc_array.count, sizeof(ChildProcessInfo *), 1757 | process_info_cmp_pid); 1758 | if (found == NULL) { 1759 | if (cron_proc_array.count > 0) { 1760 | found = (ChildProcessInfo **)bsearch(&pTarget, 1761 | cron_proc_array.processes, cron_proc_array.count, 1762 | sizeof(ChildProcessInfo *), process_info_cmp_pid); 1763 | } 1764 | if (found == NULL) { 1765 | logWarning("file: "__FILE__", line: %d, pid: %d not found", 1766 | __LINE__, pid); 1767 | return EINVAL; 1768 | } else { 1769 | logInfo("file: "__FILE__", line: %d, cron process exit " 1770 | "with status: %d. %s", __LINE__, 1771 | status, get_current_command(*found)); 1772 | return 0; 1773 | } 1774 | } 1775 | 1776 | if ((*found)->running) { 1777 | (*found)->running = false; 1778 | dec_child_running(*found); 1779 | } 1780 | logInfo("file: "__FILE__", line: %d, process %d exit " 1781 | "with status %d. running %d processes with %d standalone. %s", 1782 | __LINE__, (*found)->pid, status, child_running.total, 1783 | child_running.standalone, get_current_command(*found)); 1784 | return 0; 1785 | } 1786 | 1787 | static int run_process(ChildProcessInfo *process, 1788 | CommandParams *command, pid_t *pid) 1789 | { 1790 | *pid = fork(); 1791 | if (*pid == 0) { //child process 1792 | if (process->takeover_stdout || process->takeover_stderr) { 1793 | const char *lfile; 1794 | int fd; 1795 | 1796 | lfile = process->logfile; 1797 | fd = open(lfile, O_APPEND | O_CREAT | O_WRONLY, 0644); 1798 | umask(022); 1799 | if (fd < 0) { 1800 | logError("file: "__FILE__", line: %d, open file %s fail, " 1801 | "errno: %d, error info: %s", __LINE__, lfile, 1802 | errno, strerror(errno)); 1803 | _exit(1); 1804 | } 1805 | 1806 | if (process->takeover_stdout && dup2(fd, 1) < 0) { 1807 | logError("file: "__FILE__", line: %d, dup2 stdout fail, " 1808 | "errno: %d, error info: %s", 1809 | __LINE__, errno, strerror(errno)); 1810 | _exit(1); 1811 | } 1812 | 1813 | if (process->takeover_stderr && dup2(fd, 2) < 0) { 1814 | logError("file: "__FILE__", line: %d, dup2 stderr fail, " 1815 | "errno: %d, error info: %s", 1816 | __LINE__, errno, strerror(errno)); 1817 | _exit(1); 1818 | } 1819 | } 1820 | if (process->envs.count > 0) { 1821 | int i; 1822 | for (i=0; ienvs.count; i++) { 1823 | setenv(process->envs.entries[i].name, 1824 | process->envs.entries[i].value, 1); 1825 | } 1826 | } 1827 | if (execvp(command->argv[0], command->argv) < 0) { 1828 | logError("file: "__FILE__", line: %d, execvp fail, " 1829 | "errno: %d, error info: %s", 1830 | __LINE__, errno, strerror(errno)); 1831 | _exit(1); 1832 | } 1833 | } else if (*pid < 0) { 1834 | logError("file: "__FILE__", line: %d, fork fail, " 1835 | "errno: %d, error info: %s", 1836 | __LINE__, errno, strerror(errno)); 1837 | return errno != 0 ? errno : EACCES; 1838 | } 1839 | 1840 | return 0; 1841 | } 1842 | 1843 | static int start_process(ChildProcessInfo *process) 1844 | { 1845 | int result; 1846 | pid_t pid; 1847 | CommandParams *command; 1848 | 1849 | command = get_next_command(process); 1850 | if ((result=run_process(process, command, &pid)) == 0) { 1851 | process->pid = pid; 1852 | if (process->check_alive_interval > 0) { 1853 | process->last_check_alive_time = g_current_time; 1854 | get_current_command_entry(process)->health_check.fail_count = 0; 1855 | } 1856 | } 1857 | return result; 1858 | } 1859 | 1860 | static int start_all_processes() 1861 | { 1862 | int i; 1863 | int result; 1864 | int64_t now; 1865 | 1866 | now = get_current_time_ms(); 1867 | for (i = 0; i < child_proc_array.count; i++) { 1868 | if (!child_proc_array.processes[i]->running 1869 | && now - child_proc_array.processes[i]->last_start_time_ms >= 1870 | child_proc_array.processes[i]->restart_interval_ms) 1871 | { 1872 | result = start_process(child_proc_array.processes[i]); 1873 | if (result != 0) { 1874 | continue; 1875 | } 1876 | 1877 | child_proc_array.processes[i]->running = true; 1878 | child_proc_array.processes[i]->last_start_time_ms = get_current_time_ms(); 1879 | inc_child_running(child_proc_array.processes[i]); 1880 | 1881 | logInfo("file: "__FILE__", line: %d, process %d started%s." 1882 | " running %d processes with %d standalone running. %s %s", 1883 | __LINE__, child_proc_array.processes[i]->pid, 1884 | get_current_command_entry(child_proc_array.processes[i])-> 1885 | command.run_by_sh ? "(run by sh -c)" : "", 1886 | child_running.total, child_running.standalone, 1887 | get_current_command(child_proc_array.processes[i]), 1888 | child_proc_array.processes[i]->enable_access_log ? 1889 | child_proc_array.processes[i]->acclog : ""); 1890 | } 1891 | } 1892 | 1893 | if (child_proc_array.count > 1) { 1894 | qsort(child_proc_array.processes, child_proc_array.count, 1895 | sizeof(ChildProcessInfo *), process_info_cmp_pid); 1896 | } 1897 | return 0; 1898 | } 1899 | 1900 | static inline int remain_running_count(const StopProcessType stop_type) 1901 | { 1902 | if (stop_type == spt_stop_all) { 1903 | return child_running.total; 1904 | } else if (stop_type == spt_stop_none_standalone) { 1905 | return child_running.total - child_running.standalone; 1906 | }else { 1907 | return child_running.force_restart; 1908 | } 1909 | } 1910 | 1911 | static void stop_processes(const StopProcessType stop_type) 1912 | { 1913 | #define NEED_KILL_PROCESS(stop_type, pro) \ 1914 | ((stop_type == spt_stop_all) || \ 1915 | (stop_type == spt_stop_none_standalone && !is_standalone(pro)) || \ 1916 | (stop_type == spt_stop_force_restart && pro->force_restart_flag)) 1917 | 1918 | int i; 1919 | int64_t start_time; 1920 | char *prompt; 1921 | int remain_running; 1922 | 1923 | start_time = get_current_time_ms(); 1924 | for (i = 0; i < child_proc_array.count; i++) { 1925 | ChildProcessInfo* pro = child_proc_array.processes[i]; 1926 | if (pro->pid > 0 && pro->running) { 1927 | if (NEED_KILL_PROCESS(stop_type, pro)) { 1928 | kill(pro->pid, SIGTERM); 1929 | } 1930 | } 1931 | } 1932 | remain_running = remain_running_count(stop_type); 1933 | for (i = 0; i < wait_subprocess_ms/5 && remain_running > 0; i++) { 1934 | usleep(10*1000); 1935 | check_all_processes(); 1936 | remain_running = remain_running_count(stop_type); 1937 | } 1938 | 1939 | if (remain_running_count(stop_type) > 0) { 1940 | for (i = 0; i < child_proc_array.count; i++) { 1941 | ChildProcessInfo* pro = child_proc_array.processes[i]; 1942 | if (pro->pid > 0 && pro->running) { 1943 | if (stop_type == spt_stop_all || !is_standalone(pro)) { 1944 | kill(pro->pid, SIGKILL); 1945 | } 1946 | } 1947 | } 1948 | } 1949 | remain_running = remain_running_count(stop_type); 1950 | for (i = 0; i < 10 && remain_running > 0; i++) { 1951 | usleep(10*1000); 1952 | check_all_processes(); 1953 | remain_running = remain_running_count(stop_type); 1954 | } 1955 | if (remain_running_count(stop_type) > 0) { 1956 | logWarning("file: "__FILE__", line: %d, after sigkill %d " 1957 | "children still running. ignore", 1958 | __LINE__, remain_running_count(stop_type)); 1959 | for (i = 0; i < child_proc_array.count; i++) { 1960 | ChildProcessInfo* pro = child_proc_array.processes[i]; 1961 | if (pro->pid > 0 && pro->running) { 1962 | if (NEED_KILL_PROCESS(stop_type, pro)) { 1963 | pro->pid = 0; 1964 | pro->running = false; 1965 | dec_child_running(pro); 1966 | } 1967 | } 1968 | } 1969 | } 1970 | 1971 | if (stop_type == spt_stop_all) { 1972 | prompt = ""; 1973 | } else { 1974 | prompt = (stop_type == spt_stop_none_standalone) ? 1975 | " non-standalone" : " force restart"; 1976 | } 1977 | 1978 | logInfo("file: "__FILE__", line: %d, all%s subprocesses stopped. " 1979 | "used %"PRId64" ms, child running %d with %d standalone", 1980 | __LINE__, prompt, get_current_time_ms() - start_time, 1981 | child_running.total, child_running.standalone); 1982 | return; 1983 | } 1984 | 1985 | static int do_check_alive(ChildProcessInfo* child, CommandEntry *cmd_entry) 1986 | { 1987 | int result; 1988 | pid_t pid; 1989 | char output[256]; 1990 | bool ok; 1991 | 1992 | pid = child->pid; 1993 | if (pid <= 0) { 1994 | return ENOENT; 1995 | } 1996 | 1997 | if (cmd_entry->health_check.type == hc_type_exec) { 1998 | if ((result=getExecResult(cmd_entry->health_check.command.cmd, 1999 | output, sizeof(output))) != 0) 2000 | { 2001 | return result; 2002 | } 2003 | 2004 | fc_trim(output); 2005 | ok = (strcasecmp(output, "OK") == 0); 2006 | if (!ok) { 2007 | logError("file: "__FILE__", line: %d, " 2008 | "health check fail, cmd: %s, output: %s", __LINE__, 2009 | cmd_entry->health_check.command.cmd, output); 2010 | } 2011 | } else { 2012 | result = cmd_entry->health_check.func(cmd_entry->health_check.command.argc, 2013 | cmd_entry->health_check.command.argv); 2014 | ok = (result == 0); 2015 | if (!ok) { 2016 | logError("file: "__FILE__", line: %d, " 2017 | "health check fail, cmd: %s, result: %d", __LINE__, 2018 | cmd_entry->health_check.command.cmd, result); 2019 | } 2020 | } 2021 | 2022 | if (ok) { 2023 | logDebug("file: "__FILE__", line: %d, " 2024 | "health check for pid %d OK, check cmd: %s", __LINE__, 2025 | (int)child->pid, cmd_entry->health_check.command.cmd); 2026 | if (cmd_entry->health_check.fail_count > 0) { 2027 | cmd_entry->health_check.fail_count = 0; 2028 | } 2029 | return 0; 2030 | } 2031 | 2032 | cmd_entry->health_check.fail_count++; 2033 | if (cmd_entry->health_check.fail_count >= child->check_alive_retry_threshold) { 2034 | if (pid != child->pid) { 2035 | logInfo("file: "__FILE__", line: %d, " 2036 | "pid changed from %d to %d, maybe process: %s restart", 2037 | __LINE__, (int)pid, (int)child->pid, cmd_entry->command.cmd); 2038 | } else if (kill(pid, SIGTERM) == 0) { 2039 | int i; 2040 | 2041 | logWarning("file: "__FILE__", line: %d, " 2042 | "health check fail count reach %d, kill the process: %s", 2043 | __LINE__, child->check_alive_retry_threshold, 2044 | cmd_entry->command.cmd); 2045 | for (i=0; i<10; i++) { 2046 | if (kill(pid, 0) != 0) { 2047 | break; 2048 | } 2049 | sleep(1); 2050 | } 2051 | if (i == 10) { 2052 | kill(pid, SIGKILL); 2053 | logWarning("file: "__FILE__", line: %d, " 2054 | "force kill the process: %s", 2055 | __LINE__, cmd_entry->command.cmd); 2056 | } 2057 | } else { 2058 | result = errno != 0 ? errno : ESRCH; 2059 | if (result != ESRCH) { 2060 | logError("file: "__FILE__", line: %d, " 2061 | "kill fail, process: %s, error info: %s ", 2062 | __LINE__, cmd_entry->command.cmd, strerror(result)); 2063 | } 2064 | } 2065 | cmd_entry->health_check.fail_count = 0; 2066 | } 2067 | 2068 | return result; 2069 | } 2070 | 2071 | static void *check_alive_entrance(void *args) 2072 | { 2073 | ChildProcessInfo *child; 2074 | CommandEntry *cmd_entry; 2075 | 2076 | child = (ChildProcessInfo *)args; 2077 | child->last_check_alive_time = g_current_time; 2078 | 2079 | while (continue_flag) { 2080 | sleep(child->check_alive_interval); 2081 | if (child->last_check_alive_time + child->check_alive_interval > g_current_time) { 2082 | continue; 2083 | } 2084 | 2085 | if (!(child->pid > 0 && child->running)) { 2086 | continue; 2087 | } 2088 | 2089 | cmd_entry = get_current_command_entry(child); 2090 | do_check_alive(child, cmd_entry); 2091 | child->last_check_alive_time = g_current_time; 2092 | } 2093 | 2094 | return NULL; 2095 | } 2096 | 2097 | static void check_subproccess_alive() 2098 | { 2099 | int i; 2100 | 2101 | if (child_running.total <= 0 || last_check_alive_time >= g_current_time) { 2102 | return; 2103 | } 2104 | last_check_alive_time = g_current_time; 2105 | 2106 | for (i = 0; i < child_proc_array.count; i++) { 2107 | ChildProcessInfo* child = child_proc_array.processes[i]; 2108 | if (!(child->pid > 0 && child->running && child->check_alive_interval > 0)) { 2109 | continue; 2110 | } 2111 | if (get_current_command_entry(child)->health_check.type != hc_type_kill) { 2112 | continue; 2113 | } 2114 | 2115 | if (child->last_check_alive_time + child->check_alive_interval > g_current_time) { 2116 | continue; 2117 | } 2118 | 2119 | child->last_check_alive_time = g_current_time; 2120 | if (kill(child->pid, 0) != 0) { 2121 | child->running = false; 2122 | dec_child_running(child); 2123 | 2124 | logInfo("file: "__FILE__", line: %d, process %d " 2125 | "already exited. errno: %d, error info: %s, " 2126 | "running %d processes with %d standalone. %s", __LINE__, 2127 | child->pid, errno, strerror(errno), 2128 | child_running.total, child_running.standalone, 2129 | get_current_command(child)); 2130 | } 2131 | } 2132 | } 2133 | 2134 | static int check_all_processes() 2135 | { 2136 | int pid; 2137 | int status; 2138 | 2139 | while ((pid=waitpid(-1, &status, WNOHANG)) > 0) { 2140 | if (!WIFSTOPPED(status)) { 2141 | update_process(pid, status); 2142 | } else { 2143 | logDebug("pid: %d stopped", pid); 2144 | } 2145 | } 2146 | 2147 | return 0; 2148 | } 2149 | 2150 | void rotate_file(const char* fname) 2151 | { 2152 | time_t current_time; 2153 | time_t deleted_time; 2154 | struct tm tm; 2155 | char old_filename[MAX_PATH_SIZE]; 2156 | int len; 2157 | 2158 | if (access(fname, F_OK) != 0) { 2159 | return; 2160 | } 2161 | 2162 | current_time = get_current_time() - 60; 2163 | deleted_time = current_time - log_file_keep_days * 86400; 2164 | 2165 | localtime_r(¤t_time, &tm); 2166 | memset(old_filename, 0, sizeof(old_filename)); 2167 | len = sprintf(old_filename, "%s.", fname); 2168 | strftime(old_filename + len, sizeof(old_filename) - len, 2169 | "%Y%m%d", &tm); 2170 | if (access(old_filename, F_OK) == 0) { 2171 | logError("file: "__FILE__", line: %d, " 2172 | "file %s already exist, rotate failed. ignore", 2173 | __LINE__, old_filename); 2174 | } else if (rename(fname, old_filename) != 0) { 2175 | logError("file: "__FILE__", line: %d, rename %s -> %s fail, " 2176 | "errno: %d, error info: %s", __LINE__, 2177 | fname, old_filename, errno, strerror(errno)); 2178 | } 2179 | 2180 | if (log_file_keep_days <= 0) { 2181 | return; 2182 | } 2183 | while (1) { 2184 | struct tm tm2; 2185 | deleted_time -= 86400; 2186 | localtime_r(&deleted_time, &tm2); 2187 | memset(old_filename, 0, sizeof(old_filename)); 2188 | len = sprintf(old_filename, "%s.", fname); 2189 | strftime(old_filename + len, sizeof(old_filename) - len, 2190 | "%Y%m%d", &tm2); 2191 | if (unlink(old_filename) != 0) { 2192 | if (errno != ENOENT) { 2193 | logError("file: "__FILE__", line: %d, " 2194 | "delete file %s fail, " 2195 | "errno: %d, error info: %s", 2196 | __LINE__, old_filename, 2197 | errno, strerror(errno)); 2198 | } else { 2199 | break; 2200 | } 2201 | } 2202 | } 2203 | } 2204 | 2205 | static int rotate_logs(void* arg) 2206 | { 2207 | int i; 2208 | 2209 | log_rotate(&g_log_context); 2210 | if (log_file_keep_days > 0) { 2211 | log_delete_old_files(&g_log_context); 2212 | } 2213 | for (i = 0; i < logfiles_count; i ++) { 2214 | rotate_file(logfiles_all[i]); 2215 | if (enable_access_log && *acclogs_all[i] != '\0') { 2216 | rotate_file(acclogs_all[i]); 2217 | } 2218 | } 2219 | restart_subprocess = true; 2220 | return 0; 2221 | } 2222 | 2223 | 2224 | static void deal_force_restart_queue() 2225 | { 2226 | ChildProcessInfo *child; 2227 | 2228 | if (g_current_time - last_deal_force_restart_time < 1) { 2229 | return; 2230 | } 2231 | last_deal_force_restart_time = g_current_time; 2232 | 2233 | child_running.force_restart = 0; 2234 | while ((child=(ChildProcessInfo *)common_blocked_queue_try_pop( 2235 | &force_restart_queue)) != NULL) 2236 | { 2237 | if ((child->pid > 0 && child->running) && 2238 | (get_current_time_ms() - child->last_start_time_ms > 2239 | child->restart_interval_ms)) 2240 | { 2241 | child->force_restart_flag = true; 2242 | child_running.force_restart++; 2243 | } 2244 | } 2245 | 2246 | if (child_running.force_restart > 0) { 2247 | logDebug("file: "__FILE__", line: %d, " 2248 | "force restart count: %d", __LINE__, 2249 | child_running.force_restart); 2250 | stop_force_restart_processes(); 2251 | } 2252 | } 2253 | --------------------------------------------------------------------------------