├── .DS_Store ├── .idea ├── Tspider.iml ├── copyright │ └── profiles_settings.xml ├── dictionaries │ └── rudytan.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── example ├── .DS_Store └── lol_hero_god_rank │ ├── .DS_Store │ ├── .idea │ ├── copyright │ │ └── profiles_settings.xml │ ├── dictionaries │ │ └── rudytan.xml │ ├── lol_hero_rank.iml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ └── workspace.xml │ ├── application │ ├── .DS_Store │ ├── config │ │ ├── .DS_Store │ │ └── main.php │ ├── extension │ │ ├── Common.php │ │ └── Http.php │ ├── task │ │ ├── ExternalStatisticsRequestTimer.php │ │ ├── HeroGodRankRequestTimer.php │ │ ├── MonitorDownloadTask.php │ │ └── MonitorTimer.php │ └── worker │ │ ├── ExternalStatisticsWorker.php │ │ └── HeroGodRankWorker.php │ ├── framework │ ├── .DS_Store │ ├── TSpider.php │ ├── base │ │ ├── BaseWorker.php │ │ ├── Logger.php │ │ ├── Request.php │ │ └── Response.php │ ├── component │ │ ├── Component.php │ │ ├── MysqlComponent.php │ │ ├── RedisComponent.php │ │ ├── RedisLockComponent.php │ │ └── SmsWoquComponent.php │ ├── config │ │ └── default.php │ ├── coroutine │ │ ├── ExampleTask.php │ │ ├── Task.php │ │ ├── TaskScheduler.php │ │ └── TimerTask.php │ ├── extension │ │ ├── .DS_Store │ │ ├── Common.php │ │ ├── Http.php │ │ ├── TLock.php │ │ ├── TMysql.php │ │ ├── TRedis.php │ │ └── simple_html_dom.php │ ├── queue │ │ ├── LocalRequestQueue.php │ │ └── RequestQueue.php │ └── task │ │ ├── AddRequestTask.php │ │ ├── DelayTimer.php │ │ └── DownloadTask.php │ ├── index.php │ ├── log │ └── .DS_Store │ ├── readme.md │ └── snatch_lol.sql ├── framework ├── .DS_Store ├── TSpider.php ├── base │ ├── BaseWorker.php │ ├── Request.php │ └── Response.php ├── component │ ├── Component.php │ ├── MysqlComponent.php │ ├── RedisComponent.php │ ├── RedisLockComponent.php │ └── SmsWoquComponent.php ├── config │ └── default.php ├── coroutine │ ├── ExampleTask.php │ ├── Task.php │ ├── TaskScheduler.php │ └── TimerTask.php ├── extension │ ├── .DS_Store │ ├── Common.php │ ├── THttp.php │ ├── TLock.php │ ├── TLogger.php │ ├── TMysql.php │ ├── TRedis.php │ └── simple_html_dom.php ├── queue │ ├── LocalRequestQueue.php │ └── RequestQueue.php └── task │ ├── AddRequestTask.php │ ├── DelayTimer.php │ └── DownloadTask.php └── readme.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/.DS_Store -------------------------------------------------------------------------------- /.idea/Tspider.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/dictionaries/rudytan.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /example/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/example/.DS_Store -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/example/lol_hero_god_rank/.DS_Store -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.idea/dictionaries/rudytan.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.idea/lol_hero_rank.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 37 | 39 | 40 | 41 | 42 | 43 | 44 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/example/lol_hero_god_rank/application/.DS_Store -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/config/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/example/lol_hero_god_rank/application/config/.DS_Store -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/config/main.php: -------------------------------------------------------------------------------- 1 | 'lolHeroGodRankSpider', 12 | 'logs' => array( 13 | 'beforeDownload' => array( 14 | 'logName' => 'beforeDownload', 15 | 'frequency' => \framework\base\Logger::LOG_FREQUENCY_DAY, 16 | ), 17 | 'afterDownload' => array( 18 | 'logName' => 'afterDownload', 19 | 'frequency' => \framework\base\Logger::LOG_FREQUENCY_DAY, 20 | ), 21 | 'worker' => array( 22 | 'logName' => 'worker', 23 | ), 24 | 'errorDownload' => array( 25 | 'logName' => 'errorDownload', 26 | 'mode' => \framework\base\Logger::LOG_MODE_BOTH 27 | ), 28 | 'errorParse' => array( 29 | 'logName' => 'errorParse', 30 | 'mode' => \framework\base\Logger::LOG_MODE_BOTH 31 | ), 32 | 'common' => array( 33 | 'logName' => 'common', 34 | 'mode' => \framework\base\Logger::LOG_MODE_BOTH, 35 | ), 36 | 'monitor' => array( 37 | 'logName' => 'monitor', 38 | 'mode' => \framework\base\Logger::LOG_MODE_BOTH, 39 | 'frequency' => \framework\base\Logger::LOG_FREQUENCY_DAY, 40 | ), 41 | ), 42 | 'request' => array( 43 | 'maxRepeat' => 3, 44 | 'timeOut' => 300, 45 | 'requestQueue' => 'framework\queue\LocalRequestQueue' 46 | ), 47 | 'downloader' => array( 48 | 'className' => 'application\task\MonitorDownloadTask', 49 | 'windowSize' => 100, 50 | ), 51 | 'tasks' => array( 52 | 'common' => array( 53 | 'application\task\MonitorTimer' => array(), 54 | ), 55 | 'hero_god' => array( 56 | 'application\task\HeroGodRankRequestTimer' => array(), 57 | ), 58 | 'external' => array( 59 | 'application\task\ExternalStatisticsRequestTimer' => array(), 60 | ) 61 | ), 62 | 63 | 'component' => array( 64 | 'dbSnatchLol' => array( 65 | 'className' => 'framework\component\MysqlComponent', 66 | 'host'=> '*.*.*.*', 67 | 'userName' => '****', 68 | 'password' => '*****', 69 | 'dbName' => 'snatch_lol', 70 | 'port' => '3306', 71 | 'checkConnection' => true 72 | ), 73 | 'sms' => array( 74 | 'className' => 'framework\component\SmsWoquComponent', 75 | 'serverUrl' => 'http://*.*.*.*:8080/send_sms', 76 | 'Module' => 'spider', 77 | 'To' => '', 78 | 'MsgType' => '0', 79 | ) 80 | ), 81 | ); 82 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/extension/Common.php: -------------------------------------------------------------------------------- 1 | $value){ 23 | $options[$key] =$value; 24 | } 25 | if(!empty($postData) && is_array($postData)){ 26 | $options[CURLOPT_POST] = true; 27 | $options[CURLOPT_POSTFIELDS] = http_build_query($postData); 28 | } 29 | if(stripos($url,'https') === 0){ 30 | $options[CURLOPT_SSL_VERIFYPEER] = false; 31 | } 32 | $ch = curl_init(); 33 | curl_setopt_array($ch,$options); 34 | $rel = curl_exec($ch); 35 | if($rel == false){ 36 | print_r(curl_getinfo($ch)); 37 | } 38 | curl_close($ch); 39 | return $rel; 40 | } 41 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/task/ExternalStatisticsRequestTimer.php: -------------------------------------------------------------------------------- 1 | requestGenerator = $this->getOneRequest(); 27 | parent::__construct($intervalTime, $taskName); 28 | } 29 | 30 | /** 31 | * 获取英雄信息 32 | * @return array|bool 33 | * @throws \framework\base\Exception 34 | */ 35 | public static function getChampionInfo(){ 36 | if(empty(self::$championInfoList)){ 37 | $url = 'http://lol.zhangyoubao.com/apis/rest/RolesService/championInfo?iamsuperman=2'; 38 | $championInfoList = false; 39 | for ($i=0;$i < 5;$i++){ 40 | $data = Http::request($url); 41 | $data = json_decode($data,true); 42 | if(is_array($data)){ 43 | $championInfoList = $data; 44 | break; 45 | } 46 | } 47 | if($championInfoList == false){ 48 | $str = "spider exit: championInfo get error;"; 49 | Logger::factory('common')->error($str); 50 | TSpider::$component->sms->sendMessage($str,true); 51 | exit($str); 52 | }else{ 53 | foreach ($championInfoList as $key=>$row){ 54 | if($row['id'] == 1){ 55 | // 奎恩 56 | $championInfoList[$key]['enname'] = 'Quinn'; 57 | } 58 | self::$championInfoList[$championInfoList[$key]['enname']] =$championInfoList[$key]; 59 | } 60 | } 61 | } 62 | return self::$championInfoList; 63 | } 64 | 65 | public function getOneRequest(){ 66 | $championInfoList = self::getChampionInfo(); 67 | foreach ($championInfoList as $enname => $row){ 68 | yield self::createRequest($enname,$row); 69 | } 70 | } 71 | 72 | public static function createRequest($championAlias,$championInfo){ 73 | $url = "http://champion.gg/champion/{$championAlias}"; 74 | return new Request($url,'application\worker\ExternalStatisticsWorker',array(),array('championAlias'=>$championAlias,'championInfo'=>$championInfo)); 75 | } 76 | 77 | /** 78 | * 执行入口,复写父类相同名称方法 79 | */ 80 | function execute(){ 81 | for($i=0;$i<20;$i++){ 82 | $request = $this->requestGenerator->current(); 83 | if($request instanceof Request){ 84 | if(!$request->download()){ 85 | break; 86 | } 87 | $this->requestGenerator->next(); 88 | }else{ 89 | break; 90 | } 91 | } 92 | } 93 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/task/HeroGodRankRequestTimer.php: -------------------------------------------------------------------------------- 1 | requestGenerator = $this->getOneRequest(); 25 | parent::__construct($intervalTime, $taskName); 26 | } 27 | 28 | public static function createRequest($tgpId,$page=1){ 29 | $time = time().''.rand(100,999); 30 | $url = "http://img.lol.qq.com/js/cevRank/{$tgpId}/{$page}.js?t={$time}"; 31 | return new Request($url,'application\worker\HeroGodRankWorker',array(),array('tgpId'=>$tgpId,'page'=>$page)); 32 | } 33 | 34 | public function getOneRequest(){ 35 | $tgpIdList = Http::request('http://lol.anzogame.com/apis/rest/RolesService/tgpId2IdForPkg?iamsuperman=2'); 36 | $tgpIdList = json_decode($tgpIdList,true); 37 | $tgpIdList = array_keys($tgpIdList); 38 | if(empty($tgpIdList)){ 39 | $str = 'get tgpId error! exit spider'; 40 | Logger::factory('common')->error($str); 41 | TSpider::$component->sms->sendMessage($str,true); 42 | exit($str); 43 | } 44 | 45 | foreach($tgpIdList as $tgpId){ 46 | yield self::createRequest($tgpId); 47 | } 48 | } 49 | 50 | /** 51 | * 执行入口,复写父类相同名称方法 52 | */ 53 | function execute(){ 54 | for($i=0;$i<30;$i++){ 55 | $request = $this->requestGenerator->current(); 56 | if($request instanceof Request){ 57 | if(!$request->download()){ 58 | break; 59 | } 60 | $this->requestGenerator->next(); 61 | }else{ 62 | break; 63 | } 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/task/MonitorDownloadTask.php: -------------------------------------------------------------------------------- 1 | beforeDownloadLog = Logger::factory('beforeDownload'); 35 | $this->afterDownloadLog = Logger::factory('afterDownload'); 36 | $this->errorDownloadLog = Logger::factory('errorDownload'); 37 | // $this->proxyLog = Logger::factory('proxy'); 38 | parent::__construct($taskName); 39 | } 40 | 41 | public function countSpeed(){ 42 | static $lastTime; 43 | static $downloadNum; 44 | 45 | $intLastTime = (int)$lastTime; 46 | $downloadNum = (int)$downloadNum; 47 | $downloadNum ++; 48 | if(time()-$intLastTime>= self::$speedSecond){ 49 | self::$statisticsDownloadSpeed = "{$downloadNum}/m"; 50 | $downloadNum = 0; 51 | $lastTime =time(); 52 | } 53 | } 54 | 55 | 56 | public function beforeDownload(Request $request){ 57 | self::$statisticsAllRequestNum ++; 58 | $this->beforeDownloadLog->log((string)$request); 59 | return $request; 60 | } 61 | 62 | public function afterDownload(Request $request,Response $response){ 63 | self::$statisticsAllDownloadNum ++; 64 | $this->countSpeed(); 65 | if($response->code == 200 || $response->code == 404){ 66 | $this->afterDownloadLog->log($request->url.'|'.$response->code); 67 | }else{ 68 | self::$statisticsErrorNum ++; 69 | $this->afterDownloadLog->error($request->url.'|'.$response->code.'|'.$request->getRepeatNum().'|'.$response->getError()); 70 | 71 | // 重新下载 72 | if(!$request->redownload()){ 73 | // 最终下载失败的 74 | $url = substr($request->url,-35); 75 | TSpider::$component->sms->sendMessage("download error:$url"); 76 | $this->errorDownloadLog->error($request->url.'|'.$response->code.'|'.$request->getRepeatNum().'|'.$response->getError()); 77 | } 78 | return false; 79 | } 80 | return null; 81 | } 82 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/task/MonitorTimer.php: -------------------------------------------------------------------------------- 1 | log = Logger::factory('monitor'); 26 | parent::__construct($intervalTime, $taskName); 27 | } 28 | 29 | /** 30 | * 保存通用爬虫参数 31 | * @param $prefix 32 | * @param $dbName 33 | * @throws \framework\base\Exception 34 | */ 35 | public function saveCrawlData($prefix,$dbName){ 36 | $data = array( 37 | $prefix.'pre_version' => TSpider::$startCrawlTime, 38 | $prefix.'update_time' => date('Y-m-d H:i:s',TSpider::$startCrawlTime), 39 | $prefix.'use_time' => time()-TSpider::$startCrawlTime, 40 | $prefix.'version' => TSpider::$startCrawlTime 41 | ); 42 | 43 | foreach($data as $key=>$value){ 44 | $sql = ''; 45 | try{ 46 | $connection = TSpider::$component->$dbName; 47 | if($prefix.'pre_version' == $key){ 48 | $sql = "select `value` from control where `key`='{$prefix}version' limit 1;"; 49 | $rel = $connection->query($sql); 50 | if(!empty($rel)){ 51 | $value = $rel[0]['value']; 52 | } 53 | } 54 | $sql = "select `value` from control where `key`='{$key}';"; 55 | $rel = $connection->query($sql); 56 | if(empty($rel)){ 57 | $sql = "insert control(`key`,`value`) values('{$key}','{$value}')"; 58 | }else{ 59 | $sql = "update control set `value`='{$value}' where `key`='{$key}';"; 60 | } 61 | 62 | $connection->query($sql); 63 | }catch(\Exception $e){ 64 | Logger::factory('common')->error('sql error: '.$sql.' | '.$e->getMessage()); 65 | } 66 | } 67 | } 68 | 69 | function execute(){ 70 | $name = TSpider::$spiderName; 71 | $requestQueueCount = TSpider::$requestQueue->count(); 72 | $requestNum = MonitorDownloadTask::$statisticsAllRequestNum; 73 | $downloadNum = MonitorDownloadTask::$statisticsAllDownloadNum; 74 | $downloadErrorNum = MonitorDownloadTask::$statisticsErrorNum; 75 | $downloadSpeed = MonitorDownloadTask::$statisticsDownloadSpeed; 76 | $parserNum = BaseWorker::$statisticsParserNum; 77 | $parserErrorNum = BaseWorker::$statisticsParserErrorNum; 78 | $saveNum = BaseWorker::$statisticsSaveNum; 79 | $saveErrorNum = BaseWorker::$statisticsSaveErrorNum; 80 | $memory = Common::getMemoryUsedSizeShow(); 81 | $str = "Memory:{$memory}|Queue:{$requestQueueCount}|Downloader:{$requestNum}-{$downloadNum}-{$downloadErrorNum}|Parser:{$parserNum}-{$parserErrorNum}-{$saveNum}-{$saveErrorNum}|Speed:{$downloadSpeed}"; 82 | $this->log->log($str); 83 | 84 | // 当下载器空闲多长时间,关闭爬虫 85 | $downloaderSpareTime = TSpider::$downloader->getSpareTime(); 86 | if($downloaderSpareTime >= 10){ 87 | TaskScheduler::closeAllTask(); 88 | // 保存操作变量 89 | switch (TSpider::$taskGroup){ 90 | case 'hero_god':{ 91 | $this->saveCrawlData('hero_god_rank_','dbSnatchLol'); 92 | }break; 93 | case 'external':{ 94 | $this->saveCrawlData('hero_external_','dbSnatchLol'); 95 | }break; 96 | default:{ 97 | $this->saveCrawlData('hero_god_rank_','dbSnatchLol'); 98 | $this->saveCrawlData('hero_external_','dbSnatchLol'); 99 | } 100 | } 101 | // 发送结束短信 102 | $str = "{$name} End:an-{$requestNum},dn-{$downloadNum},de-{$downloadErrorNum},pn-{$parserNum},pe-{$parserErrorNum},sn-{$saveNum},se-{$saveErrorNum}"; 103 | TSpider::$component->sms->sendMessage($str,true); 104 | Logger::factory('common')->log($str); 105 | } 106 | } 107 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/worker/ExternalStatisticsWorker.php: -------------------------------------------------------------------------------- 1 | errorLog = Logger::factory('errorParse'); 36 | $this->log = Logger::factory('worker'); 37 | } 38 | 39 | 40 | /** 41 | * 重写解析方法 42 | * @param Request $request 43 | * @param Response $response 44 | */ 45 | public function parse(Request $request,Response $response){ 46 | if($response->code != 200){ 47 | $this->errorLog->log($request->url."| ".$response->code." | ".$response->getError().' | '.json_encode($request->extData['championInfo'])); 48 | return null; 49 | } 50 | 51 | $extractResult = $this->extract($response->getData(),$request); 52 | if($extractResult == false){ 53 | // 重新下载 54 | if(!$request->redownload()){ 55 | // 最终下载失败的 56 | $url = substr($request->url,-35); 57 | TSpider::$component->sms->sendMessage("parse error:$url"); 58 | $this->errorLog->error('parse error:'.$request->url.'|'.$response->code.'|'.$response->getError().'|'.$this->lastError); 59 | } 60 | return; 61 | } 62 | 63 | $this->log->log($request->url.' | '.$request->getRepeatNum()); 64 | $this->save($extractResult,$request); 65 | } 66 | 67 | /** 68 | * 提取数据 69 | * @param $data 70 | * @return array|bool 71 | */ 72 | public function extract($data,$rawRequest){ 73 | $response = array(); 74 | $dom = str_get_html($data); 75 | if(!$this->extractItemUse($dom,$response,$rawRequest)){ 76 | return false; 77 | } 78 | 79 | if(!$this->extractStatisticsChart($dom,$response,$rawRequest)){ 80 | return false; 81 | } 82 | 83 | return $response; 84 | } 85 | 86 | /** 87 | * 获取物品的数据,结果写入response中 88 | * @param $dom 89 | * @param $response 90 | * @return bool 91 | */ 92 | protected function extractItemUse($dom,&$response,$rawRequest){ 93 | do{ 94 | // 解析装备使用推荐 95 | $nodeList = $dom->find('div.col-md-7 div.build-wrapper'); 96 | if(!is_array($nodeList) || count($nodeList) != 2){ 97 | $this->lastError ='wrapper parse error'; 98 | break; 99 | } 100 | $itemData = array(); 101 | $itemUseData = array(); 102 | foreach ($nodeList as $node){ 103 | // 获取itemID 104 | $img_node_list = $node->find('a img'); 105 | $tempImg = array(); 106 | foreach ($img_node_list as $img_node){ 107 | $src = $img_node->src; 108 | if (preg_match('#(\d+)\.png#',$src,$match)){ 109 | $itemId = (int)$match[1]; 110 | if($itemId > 0){ 111 | $tempImg[] = $itemId; 112 | } 113 | } 114 | } 115 | if(count($tempImg) == 6){ 116 | $itemData[] = $tempImg; 117 | } 118 | 119 | // 获取item使用情况 120 | $use_node_list = $node->find('div.build-text strong'); 121 | $tempUse = array(); 122 | foreach ($use_node_list as $use_node){ 123 | $text = (float)$use_node->innertext; 124 | if($text > 0){ 125 | $tempUse[] = $text; 126 | } 127 | } 128 | 129 | if(count($tempUse) == 2){ 130 | $itemUseData[] = $tempUse; 131 | } 132 | } 133 | if(count($itemData) != 2 && count($itemUseData) != 2){ 134 | $this->lastError = 'item win rate num error'; 135 | $this->errorLog->error($this->lastError.'|'.$rawRequest->url); 136 | // break; 137 | } 138 | 139 | $totalData = array(); 140 | foreach ($itemData as $key=>$value){ 141 | if($key == 0){ 142 | $name = self::ITEM_FREQUENT; 143 | }elseif($key == 1){ 144 | $name = self::ITEM_WIN; 145 | }else{ 146 | break; 147 | } 148 | $totalData[$name]['item'] = $value; 149 | $totalData[$name]['win_rate'] = $itemUseData[$key][0]; 150 | $totalData[$name]['game_count'] = $itemUseData[$key][1]; 151 | } 152 | 153 | // 结果写入返回 154 | $response['item_data'] = $totalData; 155 | return true; 156 | }while(false); 157 | return false; 158 | } 159 | 160 | /** 161 | * 提取4个统计图表数据,结果写入response中 162 | * @param $dom 163 | * @param $response 164 | * @return bool 165 | */ 166 | protected function extractStatisticsChart($dom,&$response,$rawRequest){ 167 | $script_dom_list = $dom->find('script'); 168 | $data_text = ''; 169 | foreach ($script_dom_list as $script_dom){ 170 | $text = $script_dom->innertext; 171 | if(stripos($text,'matchupData.championData') !== false) { 172 | $data_text = $text; 173 | break; 174 | } 175 | } 176 | 177 | do{ 178 | if(empty($data_text)){ 179 | $this->lastError = 'no js data'; 180 | break; 181 | } 182 | // 解析general_role 183 | if(!preg_match('#matchupData\.generalRole *= *(\{.+?\});#',$data_text,$match)){ 184 | $this->lastError = 'preg_match error:generalRole'; 185 | break; 186 | } 187 | $data_general_role = json_decode($match[1],true); 188 | if(!is_array($data_general_role)){ 189 | $this->lastError = 'json_decode error:generalRole'; 190 | break; 191 | } 192 | 193 | // 解析champion_data 194 | if(!preg_match('#matchupData\.championData *= *(\{.+?\});#',$data_text,$match)){ 195 | $this->lastError = 'preg_match error:championData'; 196 | break; 197 | } 198 | $data_champion_data = json_decode($match[1],true); 199 | if(!is_array($data_general_role)){ 200 | $this->lastError = 'json_decode error:championData'; 201 | break; 202 | } 203 | 204 | // 解析patch_history 205 | if(!preg_match('#matchupData\.patchHistory *= *(\[.+?\]);#',$data_text,$match)){ 206 | $this->lastError = 'preg_match error:patchHistory'; 207 | break; 208 | } 209 | $data_patch_history = json_decode($match[1],true); 210 | if(!is_array($data_general_role)){ 211 | $this->lastError = 'json_decode error:patchHistory'; 212 | break; 213 | } 214 | 215 | $result = array(); 216 | 217 | $championName = $rawRequest->extData['championInfo']['name']; 218 | // 添加patch_win 219 | $temp_arr = array('target'=>array('y_prefix'=>'','y_suffix'=>'%'),'series'=>array()); 220 | $x_show = $data_patch_history; 221 | $serie1 = array('name'=>'英雄平均胜率','data'=>array()); 222 | $serie2 = array('name'=>$championName,'data'=>array()); 223 | $y_2 = isset($data_champion_data['patchWin'])?$data_champion_data['patchWin']:array(); 224 | if(count($x_show) !== count($y_2)){ 225 | $this->lastError = 'patch_win:y2 num error'; 226 | break; 227 | } 228 | foreach ($x_show as $key=>$value){ 229 | $serie1['data'][] = array($value,50); 230 | $serie2['data'][] = array($value,(float)$y_2[$key]); 231 | } 232 | $temp_arr['series'][] = $serie1; 233 | $temp_arr['series'][] = $serie2; 234 | $result[self::CHART_PATCH_WIN] = $temp_arr; 235 | 236 | // 添加patch_play 237 | $temp_arr = array('target'=>array('y_prefix'=>'','y_suffix'=>'%'),'series'=>array()); 238 | $x_show = $data_patch_history; 239 | $serie1 = array('name'=>'英雄平均出场率','data'=>array()); 240 | $serie2 = array('name'=>$championName,'data'=>array()); 241 | $y_1 = isset($data_general_role['patchPlay'])?$data_general_role['patchPlay']:array(); 242 | $y_2 = isset($data_champion_data['patchPlay'])?$data_champion_data['patchPlay']:array(); 243 | if(count($x_show) !== count($y_2) || count($x_show) !== count($y_1)){ 244 | $this->lastError = 'patch_play:y1/y2 num error'; 245 | break; 246 | } 247 | foreach ($x_show as $key=>$value){ 248 | $serie1['data'][] = array($value,(float)$y_1[$key]); 249 | $serie2['data'][] = array($value,(float)$y_2[$key]); 250 | } 251 | $temp_arr['series'][] = $serie1; 252 | $temp_arr['series'][] = $serie2; 253 | $result[self::CHART_PATCH_PLAY] = $temp_arr; 254 | 255 | // 添加game_length_win 256 | $temp_arr = array('target'=>array('y_prefix'=>'','y_suffix'=>'%'),'series'=>array()); 257 | $x_show = array('0-25','25-30','30-35','35-40','40+'); 258 | $serie1 = array('name'=>'英雄平均胜率','data'=>array()); 259 | $serie2 = array('name'=>$championName,'data'=>array()); 260 | $y_2 = isset($data_champion_data['gameLength'])?$data_champion_data['gameLength']:array(); 261 | if(count($x_show) !== count($y_2)){ 262 | $this->lastError = 'game_length_win:y2 num error'; 263 | break; 264 | } 265 | foreach ($x_show as $key=>$value){ 266 | $serie1['data'][] = array($value,50); 267 | $serie2['data'][] = array($value,(float)$y_2[$key]); 268 | } 269 | $temp_arr['series'][] = $serie1; 270 | $temp_arr['series'][] = $serie2; 271 | $result[self::CHART_GAME_LENGTH_WIN] = $temp_arr; 272 | 273 | // 添加game_play_win 274 | $temp_arr = array('target'=>array('y_prefix'=>'','y_suffix'=>'%'),'series'=>array()); 275 | $x_show = array('1-5','5-15','15-50','50-125','125+'); 276 | $serie1 = array('name'=>'英雄平均胜率','data'=>array()); 277 | $serie2 = array('name'=>$championName,'data'=>array()); 278 | $y_1 = array_fill(0,count($x_show),end($data_champion_data['patchWin'])); 279 | $y_2 = isset($data_champion_data['experienceRate'])?$data_champion_data['experienceRate']:array(); 280 | if(count($x_show) !== count($y_2)){ 281 | $this->lastError = 'game_length_win:y2 num error'; 282 | break; 283 | } 284 | foreach ($x_show as $key=>$value){ 285 | $serie1['data'][] = array($value,(float)$y_1[$key]); 286 | $serie2['data'][] = array($value,(float)$y_2[$key]); 287 | } 288 | $temp_arr['series'][] = $serie1; 289 | $temp_arr['series'][] = $serie2; 290 | $result[self::CHART_GAME_PLAY_WIN] = $temp_arr; 291 | 292 | $response['statistics_chart'] = $result; 293 | return true; 294 | }while(false); 295 | 296 | return false; 297 | } 298 | 299 | /** 300 | * 存储经过提取后的数据 301 | * @param $extractData 302 | * @throws \Exception 303 | */ 304 | public function save($extractData,$request){ 305 | try{ 306 | $fields = array( 307 | 'version' => TSpider::$startCrawlTime, 308 | 'champion_id' => (int)$request->extData['championInfo']['tgp_id'], 309 | 'item_frequent' => json_encode(Common::getArrayValue($extractData['item_data'],self::ITEM_FREQUENT,array())), 310 | 'item_win' => json_encode(Common::getArrayValue($extractData['item_data'],self::ITEM_WIN,array())), 311 | 'chart_patch_win' => json_encode(Common::getArrayValue($extractData['statistics_chart'],self::CHART_PATCH_WIN,array())), 312 | 'chart_patch_play' => json_encode(Common::getArrayValue($extractData['statistics_chart'],self::CHART_PATCH_PLAY,array())), 313 | 'chart_game_length_win' => json_encode(Common::getArrayValue($extractData['statistics_chart'],self::CHART_GAME_LENGTH_WIN,array())), 314 | 'chart_game_play_win' => json_encode(Common::getArrayValue($extractData['statistics_chart'],self::CHART_GAME_PLAY_WIN,array())), 315 | ); 316 | $rel = TSpider::$component->dbSnatchLol->insert('hero_external_statistics',$fields); 317 | if($rel <= 0){ 318 | throw new \Exception('insert error:',-1); 319 | } 320 | self::$statisticsSaveNum ++; 321 | }catch(\Exception $e){ 322 | $temp = substr($e->getMessage(),-35); 323 | TSpider::$component->sms->sendMessage("save error:{$temp}"); 324 | $this->errorLog->error('save error:'.$e->getMessage().'|'.json_encode($extractData)); 325 | } 326 | } 327 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/application/worker/HeroGodRankWorker.php: -------------------------------------------------------------------------------- 1 | errorLog = Logger::factory('errorParse'); 25 | $this->log = Logger::factory('worker'); 26 | } 27 | 28 | 29 | /** 30 | * 重写解析方法 31 | * @param Request $request 32 | * @param Response $response 33 | */ 34 | public function parse(Request $request,Response $response){ 35 | if($response->code == 200){ 36 | $newRequest = HeroGodRankRequestTimer::createRequest($request->extData['tgpId'],$request->extData['page']+1); 37 | $newRequest->download(); 38 | }else{ 39 | return null; 40 | } 41 | $extractResult = $this->extract($response->getData()); 42 | if($extractResult == false){ 43 | // 重新下载 44 | if(!$request->redownload()){ 45 | // 最终下载失败的 46 | $url = substr($request->url,-35); 47 | TSpider::$component->sms->sendMessage("parse error:$url"); 48 | $this->errorLog->error('parse error:'.$request->url.'|'.$response->code.'|'.$response->getData()); 49 | } 50 | return; 51 | } 52 | 53 | $this->log->log($request->url.' | '.$request->getRepeatNum()); 54 | $this->save($extractResult,$request->extData['tgpId']); 55 | } 56 | 57 | /** 58 | * 提取数据 59 | * @param $data 60 | * @return array|bool 61 | */ 62 | public function extract($data){ 63 | $data = str_replace(array("try{heroSkillCallback(",")}catch(e){}"),array('',''),$data); 64 | $data = json_decode($data,true); 65 | if(!isset($data['retCode']) || !isset($data['data']['skillRank']) || $data['retCode'] !== 0){ 66 | return false; 67 | } 68 | return $data['data']['skillRank']; 69 | } 70 | 71 | 72 | /** 73 | * 存储经过提取后的数据 74 | * @param $extractData 75 | * @throws \Exception 76 | */ 77 | public function save($extractData,$heroId=0){ 78 | try{ 79 | $data_list = array(); 80 | foreach($extractData as $row){ 81 | $temp = array(); 82 | $temp['version'] = TSpider::$startCrawlTime; 83 | $temp['hero_id'] = $heroId; 84 | $temp['rank'] = $row['index']; 85 | $temp['area_id'] = $row['area_id']; 86 | $temp['area_name'] = $row['areaName']; 87 | $temp['icon_id'] = $row['iconId']; 88 | $temp['uin'] = $row['uin']; 89 | $temp['username'] = $row['uName']; 90 | $temp['proficiency'] = $row['cevValue']; 91 | 92 | $data_list[] = $temp; 93 | } 94 | $rel = TSpider::$component->dbSnatchLol->insertMulti('hero_god_rank',$data_list); 95 | if($rel <= 0){ 96 | throw new \Exception('insert error:',-1); 97 | } 98 | self::$statisticsSaveNum ++; 99 | }catch(\Exception $e){ 100 | $temp = substr($e->getMessage(),-35); 101 | TSpider::$component->sms->sendMessage("save error:{$temp}"); 102 | $this->errorLog->error('save error:'.$e->getMessage().'|'.json_encode($extractData)); 103 | } 104 | } 105 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/example/lol_hero_god_rank/framework/.DS_Store -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/TSpider.php: -------------------------------------------------------------------------------- 1 | init($protectedName); 48 | } 49 | 50 | public static function init($protectedName){ 51 | self::$startCrawlTime = time(); 52 | self::$protectedName = $protectedName; 53 | self::$frameworkBasePath = dirname(__FILE__); 54 | self::$basePath = dirname(self::$frameworkBasePath); 55 | self::$applicationPath = self::$basePath.DIRECTORY_SEPARATOR.self::$protectedName; 56 | 57 | ini_set('date.timezone','Asia/Shanghai'); 58 | spl_autoload_register('\framework\TSpider::autoLoadFile'); 59 | 60 | self::checkEnvironment(); 61 | 62 | // 解析命令 63 | self::parseCommand(); 64 | 65 | // 导入引用的三方扩展 66 | self::searchAndInclude(self::$frameworkBasePath.'/extension'); 67 | self::searchAndInclude(self::$applicationPath.'/extension'); 68 | 69 | // 加载配置文件 70 | $default_config = include self::$frameworkBasePath.DIRECTORY_SEPARATOR.'config'.DIRECTORY_SEPARATOR.'default.php'; 71 | $user_config_path = self::$applicationPath.DIRECTORY_SEPARATOR.'config'.DIRECTORY_SEPARATOR.'main.php'; 72 | if(!is_file($user_config_path)){ 73 | throw new \Exception("config file not found:{$user_config_path}",-1); 74 | } 75 | $user_config = include $user_config_path; 76 | if(!is_array($user_config)){ 77 | throw new \Exception("config file not load error:{$user_config_path}",-1); 78 | } 79 | 80 | self::$config = array_merge($default_config,$user_config); 81 | 82 | self::$spiderName = Common::getArrayValue(self::$config,'name','defaultSpider'); 83 | self::$isDebug = Common::getArrayValue(self::$config,'debug',false); 84 | 85 | if(self::$isDebug){ 86 | ini_set("display_errors", "on"); 87 | error_reporting(E_ALL^E_STRICT); 88 | } 89 | 90 | // 加载logger 91 | Logger::loadOneConfig(array('logName'=>'system')); 92 | Logger::$g_basePath = self::$config['logPath']; 93 | Logger::loadConfig(self::$config['logs']); 94 | self::$log = Logger::factory('system'); 95 | 96 | // 设置request相关 97 | Request::$maxRepeat = self::$config['request']['maxRepeat']; 98 | Request::$timeOut = self::$config['request']['timeOut']; 99 | self::$requestQueue = new self::$config['request']['requestQueue'](); 100 | if(!(self::$requestQueue instanceof RequestQueue)){ 101 | $temp = self::$config['request']['requestQueue']; 102 | throw new \Exception("requestQueue error:{$temp}",-1); 103 | } 104 | 105 | // 加载必须任务,初始化任务调度器,添加下载器任务 106 | self::$taskScheduler = new TaskScheduler(); 107 | if(!isset(self::$config['downloader'])){ 108 | throw new \Exception('there is no download class config'); 109 | } 110 | $downloaderClass = self::$config['downloader']['className']; 111 | self::$downloader = new $downloaderClass(); 112 | self::$downloader->windowSize = self::$config['downloader']['windowSize']; 113 | self::$taskScheduler->addTask(self::$downloader); 114 | 115 | // 添加请求队列读取任务,以及自定义任务,all分组表示运行所有 116 | $addRequestTask = new AddRequestTask(self::$downloader); 117 | self::$taskScheduler->addTask($addRequestTask); 118 | if(self::$taskGroup == 'all'){ 119 | foreach(self::$config['tasks'] as $key=>$value){ 120 | if(is_array($value)){ 121 | foreach ($value as $className=>$params){ 122 | $tempTask = new $className(); 123 | self::$taskScheduler->addTask($tempTask); 124 | } 125 | } 126 | } 127 | }else{ 128 | if(isset(self::$config['tasks']['common'])){ 129 | foreach(self::$config['tasks']['common'] as $key=>$value){ 130 | $tempTask = new $key(); 131 | self::$taskScheduler->addTask($tempTask); 132 | } 133 | } 134 | 135 | if(isset(self::$config['tasks'][self::$taskGroup]) && self::$taskGroup != 'common'){ 136 | foreach(self::$config['tasks'][self::$taskGroup] as $className=>$params){ 137 | $tempTask = new $className(); 138 | self::$taskScheduler->addTask($tempTask); 139 | } 140 | }else{ 141 | $taskGroup = self::$taskGroup; 142 | exit("taskGroup not found:{$taskGroup}\n"); 143 | } 144 | } 145 | 146 | // 初始化组件配置文件 147 | if(isset(self::$config['component']) && is_array(self::$config['component'])){ 148 | Component::$config = self::$config['component']; 149 | } 150 | self::$component = new Component(); 151 | 152 | // 设置进程名称 153 | $title = self::$spiderName; 154 | $taskGroup = self::$taskGroup; 155 | $startTime = date('Y-m-d H:i:s',self::$startCrawlTime); 156 | self::setProcessTitle("TSpider:{$title}-{$taskGroup} startTime:{$startTime}"); 157 | } 158 | 159 | public static function autoLoadFile($rawName){ 160 | $name = str_replace('\\', DIRECTORY_SEPARATOR ,$rawName); 161 | $classFile = self::$basePath . DIRECTORY_SEPARATOR . $name . '.php'; 162 | 163 | if(is_file($classFile)){ 164 | if(!class_exists($rawName,true)){ 165 | $rel = require $classFile; 166 | return $rel; 167 | } 168 | }else{ 169 | throw new Exception("auto load File error:{$classFile}",-1); 170 | } 171 | return false; 172 | } 173 | 174 | /** 175 | * 解析接收的命令 176 | */ 177 | public static function parseCommand(){ 178 | if(isset($_SERVER['argv']) && count($_SERVER['argv']) >=2){ 179 | $command = $_SERVER['argv'][1]; 180 | if(strlen($command)){ 181 | self::$taskGroup = $command; 182 | }else{ 183 | self::$taskGroup = ''; 184 | } 185 | }else{ 186 | self::$taskGroup = 'all'; 187 | } 188 | echo 'taskGroup:',self::$taskGroup,"\n"; 189 | } 190 | 191 | 192 | /** 193 | * 检测运行环境 194 | */ 195 | public static function checkEnvironment(){ 196 | echo 'PHP-version:',PHP_VERSION,' TSpider-version:',self::VERSION,' start-time:',date('Y-m-d H:i:s',self::$startCrawlTime),"\n"; 197 | 198 | if(version_compare(PHP_VERSION,'5.5.0','<=')){ 199 | exit('php version must greater than 5.5.0'); 200 | } 201 | 202 | if(substr(php_sapi_name(), 0, 3) != 'cli'){ 203 | exit('this program must be running in cli mode '); 204 | } 205 | 206 | if(extension_loaded('posix')){ 207 | $userInfo = posix_getpwuid(posix_getuid()); 208 | echo 'pid:',posix_getpid()," ",'running-user:',$userInfo['name'],"\n"; 209 | } 210 | } 211 | 212 | /** 213 | * 设置进程名称 214 | * @param $title 215 | */ 216 | public static function setProcessTitle($title) 217 | { 218 | if (function_exists('cli_set_process_title')) { 219 | @cli_set_process_title($title); 220 | } elseif (extension_loaded('proctitle') && function_exists('setproctitle')) { 221 | @setproctitle($title); 222 | } 223 | } 224 | 225 | /** 226 | * 导入某个文件夹中的所有文件 227 | * @param $rootPath 228 | */ 229 | public static function searchAndInclude($rootPath){ 230 | if(is_dir($rootPath)){ 231 | $rootPath = rtrim($rootPath,'/'); 232 | $files = glob($rootPath.'/*'); 233 | foreach ($files as $file){ 234 | if(is_dir($file)){ 235 | self::searchAndInclude($file); 236 | }else{ 237 | include_once $file; 238 | } 239 | } 240 | } 241 | } 242 | 243 | public static function run(){ 244 | self::$taskScheduler->run(); 245 | } 246 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/base/BaseWorker.php: -------------------------------------------------------------------------------- 1 | code} : {$request->url} \n"; 26 | } 27 | 28 | /** 29 | * 解析前hook 30 | * @return bool 31 | */ 32 | public function beforeParse(){ 33 | self::$statisticsParserNum ++; 34 | return true; 35 | } 36 | 37 | /** 38 | * 解析后hook 39 | */ 40 | public function afterParse(){ 41 | // TODO: Implement afterParse() method. 42 | } 43 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/base/Logger.php: -------------------------------------------------------------------------------- 1 | array( 42 | 'isLogging' => true, 43 | 'basePath' => '', 44 | 'suffix' => 'log', 45 | 'level' => array(self::LOG_LEVEL_ERROR,self::LOG_LEVEL_WARN,self::LOG_LEVEL_INFO), 46 | 'mode' => self::LOG_MODE_NORMAL, 47 | 'frequency' => self::LOG_FREQUENCY_NONE, 48 | ), 49 | ); 50 | 51 | protected static $logPool = array(); // 日志对象池子 52 | 53 | protected $isLoging; //当前日志,是否记录 54 | protected $logName; //当前日志,日志名称 55 | protected $basePath; //当前日志,存储路径 56 | protected $suffix; //当前日志,日志文件后缀 57 | protected $level; //当前日志,运行记录的日志等级 58 | protected $mode; //当前日志,记录方式 59 | protected $frequency; //当前日志,日志记录每隔一(分钟/小时/天/月)换一个文件记录 60 | 61 | private $logFilePath; //完整的日志路径 62 | private $timeLength; //存储时间长度 63 | 64 | /** 65 | * 以二维数组的形式加载多个日志配置文件 66 | * @param $arr array 二维数组 67 | */ 68 | public static function loadConfig($arr){ 69 | if(!empty($arr) && is_array($arr)){ 70 | foreach($arr as $name =>$config){ 71 | self::loadOneConfig($config,$name); 72 | } 73 | } 74 | } 75 | 76 | /** 77 | * 加载一个日志配置文件 78 | * @param $arr 79 | * @param string $name 80 | * @return bool 81 | */ 82 | public static function loadOneConfig($arr,$name=''){ 83 | if(!is_array($arr)){ 84 | $arr = array(); 85 | } 86 | $name = isset($arr['logName'])?$arr['logName']:$name; 87 | $name = str_replace(array('\\','/'),'_',$name); 88 | if(!is_string($name) || empty($name) || $name == 'default'){ 89 | return false; 90 | } 91 | unset($arr['logName']); 92 | self::$g_config_arr[$name] = $arr; 93 | 94 | return true; 95 | } 96 | 97 | 98 | /** 99 | * 根据日志名称,获取一个日志实例 100 | * @param string $logName 配置名称 101 | * @param bool $isNew 是否生成一个新的日志对象 102 | * @return Logger 103 | * @throws Exception 104 | */ 105 | public static function factory($logName='default',$isNew=false){ 106 | $logName = str_replace(array('\\','/'),'_',$logName); 107 | if(!isset(self::$logPool[$logName]) || $isNew == true){ 108 | if(isset(self::$logPool[$logName])){ 109 | unset(self::$logPool[$logName]); 110 | } 111 | 112 | if(empty($logName) || !is_string($logName) || !isset(self::$g_config_arr[$logName])){ 113 | throw new Exception("Make sure that the log configuration which name is '{$logName}' is loaded successfully"); 114 | } 115 | self::$logPool[$logName] = new self(self::$g_config_arr[$logName],$logName); 116 | } 117 | return self::$logPool[$logName]; 118 | } 119 | 120 | /** 121 | * Logger 构造函数,不能直接new Logger() 122 | * @param string $logName 123 | * @param array $config 124 | * @throws Exception 125 | */ 126 | protected function __construct($config = array(),$logName='default'){ 127 | $this->isLoging = (isset($config['isLogging']))?$config['isLogging']:self::$g_config_arr['default']['isLogging']; 128 | $this->logName = (empty($logName) || !is_string($logName))?'default':$logName; 129 | $this->basePath = (isset($config['basePath']) && !empty($config['basePath']))?$config['basePath']:self::$g_basePath; 130 | $this->suffix = isset($config['suffix'])?$config['suffix']:self::$g_config_arr['default']['suffix']; 131 | $this->level = (isset($config['level']) && is_array($config['level']))?$config['level']:self::$g_config_arr['default']['level']; 132 | $this->mode = isset($config['mode'])?$config['mode']:self::$g_config_arr['default']['mode']; 133 | $this->frequency = isset($config['frequency'])?$config['frequency']:self::$g_config_arr['default']['frequency']; 134 | 135 | $this->basePath = rtrim($this->basePath,"\\/"); 136 | if(!is_dir($this->basePath)){ 137 | if( !mkdir($this->basePath,0755,true)){ 138 | throw new Exception("create directory fail:".$this->basePath); 139 | } 140 | } 141 | $this->logFilePath = $this->basePath.DIRECTORY_SEPARATOR.$this->logName.'.'.$this->suffix; 142 | switch($this->frequency){ 143 | case self::LOG_FREQUENCY_MINUTE: $this->timeLength = 12;break; 144 | case self::LOG_FREQUENCY_HOUR: $this->timeLength = 10;break; 145 | case self::LOG_FREQUENCY_DAY: $this->timeLength = 8;break; 146 | case self::LOG_FREQUENCY_MONTH: $this->timeLength = 6;break; 147 | default: 148 | $this->timeLength = -1; 149 | } 150 | } 151 | 152 | 153 | /** 154 | * 受保护的写日志方法 155 | * @param $filePath 156 | * @param $content 157 | * @return bool|int 158 | */ 159 | protected function write($filePath,$content){ 160 | $return_value = false; 161 | $content = $content."\n"; 162 | switch($this->mode){ 163 | case self::LOG_MODE_NORMAL:{ 164 | $return_value = file_put_contents($filePath,$content,LOCK_EX | FILE_APPEND); 165 | $return_value = (int)$return_value > 0 ?true:false; 166 | }break; 167 | case self::LOG_MODE_PRINT:{ 168 | echo $this->logName,':',$content; 169 | $return_value = true; 170 | }break; 171 | case self::LOG_MODE_BOTH:{ 172 | echo $this->logName,':',$content; 173 | file_put_contents($filePath,$content,LOCK_EX | FILE_APPEND); 174 | $return_value = (int)$return_value > 0 ?true:false; 175 | }break; 176 | } 177 | return $return_value; 178 | } 179 | 180 | 181 | /** 182 | * 原始记录日志函数 183 | * @param $content 184 | * @param int $level 185 | * @return bool|int 186 | */ 187 | public function log($content,$level = self::LOG_LEVEL_INFO){ 188 | if(!self::$g_isLogging || !$this->isLoging || !in_array($level,$this->level)){ 189 | return false; 190 | } 191 | 192 | if(!is_string($content)){ 193 | if(is_array($content)){ 194 | $content = json_encode($content,JSON_UNESCAPED_UNICODE); 195 | }else{ 196 | $content = serialize($content); 197 | } 198 | } 199 | 200 | $logTime = time(); //记录日志时间 201 | 202 | // 检测是否需要对日志文件进行重命名 203 | if($this->timeLength > 0){ 204 | $fileCreateTime = @filectime($this->logFilePath); 205 | if($fileCreateTime){ 206 | $logTimeFormat = substr(@date('YmdHis',$logTime),0,$this->timeLength); 207 | $createTimeFormat = substr(@date('YmdHis',$fileCreateTime),0,$this->timeLength); 208 | if(strcmp($logTimeFormat,$createTimeFormat) !== 0){ 209 | $newLogFilePath = $this->basePath.DIRECTORY_SEPARATOR.$this->logName.'_'.$createTimeFormat.'.'.$this->suffix; 210 | rename($this->logFilePath,$newLogFilePath); 211 | } 212 | } 213 | } 214 | 215 | //构造日志记录格式 216 | switch($level){ 217 | case self::LOG_LEVEL_ERROR:{ 218 | $content = sprintf('[%s %s] %s',@date('Y-m-d H:i:s',$logTime),'error',$content); 219 | }break; 220 | case self::LOG_LEVEL_WARN:{ 221 | $content = sprintf('[%s %s] %s',@date('Y-m-d H:i:s',$logTime),'warn',$content); 222 | }break; 223 | default: 224 | $content = sprintf('[%s %s] %s',@date('Y-m-d H:i:s',$logTime),'info',$content); 225 | } 226 | 227 | //记录日志 228 | return $this->write($this->logFilePath,$content); 229 | } 230 | 231 | /** 232 | * 记录错误信息 233 | * @param $content 234 | * @return bool|int 235 | */ 236 | public function error($content){ 237 | return $this->log($content,self::LOG_LEVEL_ERROR); 238 | } 239 | 240 | /** 241 | * 记录警告信息 242 | * @param $content 243 | * @return bool|int 244 | */ 245 | public function warn($content){ 246 | return $this->log($content,self::LOG_LEVEL_WARN); 247 | } 248 | } 249 | 250 | //测试 251 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 252 | $config = array( // 日志配置文件数组,default是默认配置项 253 | 'test1/a' => array( 254 | 'logName' => 'bb' 255 | ), 256 | ); 257 | 258 | Logger::loadConfig($config); 259 | $log = Logger::factory('bb'); 260 | $data = $log->error('hello 哈哈1中文'); 261 | echo $data,"\n"; 262 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/base/Request.php: -------------------------------------------------------------------------------- 1 | url = $url; 37 | $this->workerName = $workerName; 38 | 39 | $this->options[CURLOPT_URL] = $url; 40 | $this->options[CURLOPT_TIMEOUT] = self::$timeOut; 41 | $this->options[CURLOPT_USERAGENT] = 'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko'; 42 | $this->options[CURLOPT_ENCODING] = 'gzip, deflate '; 43 | $this->extData = $extData; 44 | if(empty($postData) || !is_array($postData)){ 45 | $this->type = 'GET'; 46 | }else{ 47 | $this->type = 'POST'; 48 | $this->postData = $postData; 49 | $this->options[CURLOPT_POST] = true; 50 | $this->options[CURLOPT_POSTFIELDS] = $postData; 51 | } 52 | } 53 | 54 | /** 55 | * 对象字符串化 56 | * @return string 57 | */ 58 | public function __toString(){ 59 | $json_arr = array( 60 | 'url' => $this->url, 61 | 'workerName' => $this->workerName, 62 | 'options' => $this->options, 63 | 'postData' => $this->postData, 64 | 'repeat' => $this->repeat, 65 | 'header' => $this->header, 66 | ); 67 | $returnString = json_encode($json_arr); 68 | if(!is_string($returnString)){ 69 | $returnString = ''; 70 | } 71 | return $returnString; 72 | } 73 | 74 | /** 75 | * 设置头部信息数组 76 | * @param $options 77 | */ 78 | public function setOptions($options){ 79 | foreach($options as $key=>$value){ 80 | $this->options[$key] = $value; 81 | } 82 | } 83 | 84 | /** 85 | * 累加重复次数 86 | */ 87 | public function addRepeat(){ 88 | $this->repeat ++; 89 | } 90 | 91 | /** 92 | * 获取重复次数 93 | * @return int 94 | */ 95 | public function getRepeatNum(){ 96 | return $this->repeat; 97 | } 98 | 99 | /** 100 | * 是否能重复下载 101 | * @return bool 102 | */ 103 | public function canRepeat(){ 104 | if(self::$maxRepeat < $this->repeat){ 105 | return false; 106 | } 107 | return true; 108 | } 109 | 110 | /** 111 | * 重新下载 112 | * @return bool true-添加成功,false-添加失败 113 | */ 114 | public function redownload(){ 115 | $this->addRepeat(); 116 | if($this->canRepeat()) { 117 | TSpider::$requestQueue->add($this); 118 | return true; 119 | } 120 | return false; 121 | } 122 | 123 | 124 | /** 125 | * 将请求放入下载队列中,队列满了,返回失败 126 | * @return bool 127 | */ 128 | public function download(){ 129 | if(TSpider::$requestQueue->isFull()){ 130 | return false; 131 | } 132 | TSpider::$requestQueue->add($this); 133 | return true; 134 | } 135 | 136 | 137 | public function createCurlObject(){ 138 | $ch = curl_init(); 139 | if($this->canRepeat()){ 140 | if(!empty($this->options)){ 141 | curl_setopt_array($ch,$this->options); 142 | } 143 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 144 | }else{ 145 | $ch = false; 146 | } 147 | return $ch; 148 | } 149 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/base/Response.php: -------------------------------------------------------------------------------- 1 | info = $info; 22 | $this->data = $data; 23 | $this->error = $error; 24 | if(isset($this->info['http_code'])){ 25 | $this->code = $this->info['http_code']; 26 | } 27 | } 28 | 29 | public function getData(){ 30 | return $this->data; 31 | } 32 | 33 | public function getError(){ 34 | return $this->error; 35 | } 36 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/component/Component.php: -------------------------------------------------------------------------------- 1 | getInstance(); 31 | } 32 | 33 | $className = isset(self::$config[$name]['className'])?self::$config[$name]['className']:''; 34 | if(!class_exists($className)){ 35 | throw new \Exception("have no component className:{$className}\n",-1); 36 | } 37 | 38 | $component = new $className($name,self::$config[$name]); 39 | if(!($component instanceof Component)){ 40 | throw new \Exception("className:{$className} is not Component\n",-1); 41 | } 42 | 43 | self::$instanceMap[$name] = $component; 44 | 45 | return self::$instanceMap[$name]->getInstance(); 46 | } 47 | 48 | public function __construct($componentName='',$config=array()){ 49 | $this->componentName = $componentName; 50 | $this->oneConfig = $config; 51 | } 52 | 53 | 54 | 55 | public function getInstance(){ 56 | return $this; 57 | } 58 | 59 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/component/MysqlComponent.php: -------------------------------------------------------------------------------- 1 | isLoadConfig){ 19 | $this->oneConfig['connectionName'] = $this->componentName; 20 | TMysql::loadOneConfig($this->oneConfig); 21 | $this->isLoadConfig = true; 22 | } 23 | return TMysql::getConnection($this->componentName); 24 | } 25 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/component/RedisComponent.php: -------------------------------------------------------------------------------- 1 | isLoadConfig){ 19 | $this->oneConfig['connectionName'] = $this->componentName; 20 | TRedis::loadOneConfig($this->oneConfig); 21 | $this->isLoadConfig = true; 22 | } 23 | return TRedis::getConnection($this->componentName); 24 | } 25 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/component/RedisLockComponent.php: -------------------------------------------------------------------------------- 1 | isLoadConfig){ 21 | $this->oneConfig['connectionName'] = $this->componentName; 22 | TRedis::loadOneConfig($this->oneConfig); 23 | $this->isLoadConfig = true; 24 | } 25 | 26 | return new TLock($this->componentName,$this->getArrayValue($this->oneConfig,'prefix','lock_'),$this->getArrayValue($this->oneConfig,'expire',3600)); 27 | } 28 | 29 | 30 | /** 31 | * 安全地获取数组的值 32 | * @param $arr array 数组 33 | * @param $key String 键名 34 | * @param string $default 默认值 35 | * @return string 36 | */ 37 | public static function getArrayValue($arr,$key,$default=''){ 38 | if(isset($arr[$key])){ 39 | return $arr[$key]; 40 | } 41 | return $default; 42 | } 43 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/component/SmsWoquComponent.php: -------------------------------------------------------------------------------- 1 | lastSendTime) > $interval_time){ 37 | $this->lastSendTime = $now; 38 | $this->sendMessageNum = 1; 39 | return true; 40 | } 41 | if($this->sendMessageNum <= $maxSendNum){ 42 | $this->sendMessageNum ++; 43 | return true; 44 | } 45 | return false; 46 | } 47 | 48 | 49 | /** 50 | * 发送消息函数 51 | * @param string $content 发送的消息内容 52 | * @param bool $needSend 当前短信是否必须发送出去 53 | * @return bool 54 | */ 55 | public function sendMessage($content,$needSend=false){ 56 | if(!($this->canSendMessage() || $needSend)){ 57 | return false; 58 | } 59 | $phoneNumbers = explode(',',$this->oneConfig['To']); 60 | if(empty($this->oneConfig['To'])){ 61 | Http::request($this->oneConfig['serverUrl'],array( 62 | 'Module' => $this->oneConfig['Module'], 63 | 'MsgType' => $this->oneConfig['MsgType'], 64 | 'To' => '', 65 | 'MsgText' => $content)); 66 | }else{ 67 | foreach($phoneNumbers as $key=>$phone){ 68 | Http::request($this->oneConfig['serverUrl'],array( 69 | 'Module' => $this->oneConfig['Module'], 70 | 'MsgType' => $this->oneConfig['MsgType'], 71 | 'To' => $phone, 72 | 'MsgText' => $content)); 73 | } 74 | } 75 | return true; 76 | } 77 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/config/default.php: -------------------------------------------------------------------------------- 1 | 'defaultSpider', 12 | 'debug' => false, 13 | 'logPath' => dirname(dirname(dirname(__FILE__))).DIRECTORY_SEPARATOR.'log'.DIRECTORY_SEPARATOR, 14 | 'logs' => array( 15 | 'system' => array( 16 | 'logName' => 'system', 17 | ) 18 | ), 19 | 'downloader' => array( 20 | 'className' => 'framework\task\DownloadTask', 21 | 'windowSize' => 50, 22 | ), 23 | 'request' => array( 24 | 'maxRepeat' => 5, 25 | 'timeOut' => 300, 26 | 'requestQueue' => 'framework\queue\LocalRequestQueue' 27 | ), 28 | 'tasks' => array( 29 | 'common' => array( 30 | 'framework\task\DelayTimer'=>array() 31 | ) 32 | ), 33 | 'component' => array( 34 | // 填写相应组件 35 | ), 36 | ); -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/coroutine/ExampleTask.php: -------------------------------------------------------------------------------- 1 | taskId = ++self::$maxTaskId; 24 | 25 | $this->taskName = $taskName; 26 | if(empty($taskName)){ 27 | $this->taskName = get_class(); 28 | } 29 | $taskContent = $this->coroutine(); 30 | if($taskContent instanceof \Generator){ 31 | $this->taskContent = $taskContent; 32 | }else{ 33 | throw new Exception('Task is not a coroutine',-1); 34 | } 35 | } 36 | 37 | public function getTaskId() { 38 | return $this->taskId; 39 | } 40 | 41 | public function getTaskName() { 42 | return $this->taskName; 43 | } 44 | 45 | public function setSendValue($sendValue) { 46 | $this->sendValue = $sendValue; 47 | } 48 | 49 | abstract public function coroutine(); 50 | 51 | public function run() { 52 | if ($this->beforeFirstYield) { 53 | $this->beforeFirstYield = false; 54 | return $this->taskContent->current(); 55 | } else { 56 | $retval = $this->taskContent->send($this->sendValue); 57 | $this->sendValue = null; 58 | return $retval; 59 | } 60 | } 61 | 62 | public function isFinished() { 63 | return !$this->taskContent->valid(); 64 | } 65 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/coroutine/TaskScheduler.php: -------------------------------------------------------------------------------- 1 | task 16 | protected $taskQueue = null; 17 | 18 | protected static $is_closed = false; 19 | 20 | public function __construct() { 21 | $this->taskQueue = new \SplQueue(); 22 | } 23 | 24 | public static function closeAllTask(){ 25 | self::$is_closed = true; 26 | } 27 | 28 | public static function isAllTaskClosed(){ 29 | return self::$is_closed; 30 | } 31 | 32 | public function addTask(Task $task){ 33 | $taskId = $task->getTaskId(); 34 | $this->taskMap[$taskId] = $task; 35 | $this->schedule($task); 36 | return $taskId; 37 | } 38 | 39 | public function schedule(Task $task) { 40 | $this->taskQueue->enqueue($task); 41 | } 42 | 43 | public function run() { 44 | while (!$this->taskQueue->isEmpty()) { 45 | $task = $this->taskQueue->dequeue(); 46 | $task->run(); 47 | 48 | if ($task->isFinished()) { 49 | unset($this->taskMap[$task->getTaskId()]); 50 | } else { 51 | $this->schedule($task); 52 | } 53 | if(TaskScheduler::isAllTaskClosed()){ 54 | break; 55 | } 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/coroutine/TimerTask.php: -------------------------------------------------------------------------------- 1 | 0){ 21 | $this->intervalTime = $intervalTime; 22 | } 23 | if(empty($taskName)){ 24 | $taskName = get_class($this); 25 | } 26 | $this->expireTime = time()+$this->intervalTime; 27 | parent::__construct($taskName); 28 | } 29 | 30 | public function coroutine(){ 31 | while(true){ 32 | if($this->expireTime <= time()){ 33 | $rel = $this->execute(); 34 | $this->expireTime = time()+$this->intervalTime; 35 | if($rel === false){ 36 | break; 37 | } 38 | } 39 | yield true; 40 | } 41 | } 42 | 43 | public abstract function execute(); 44 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/extension/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/example/lol_hero_god_rank/framework/extension/.DS_Store -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/extension/Common.php: -------------------------------------------------------------------------------- 1 | '','errorList'=>array()); 14 | 15 | public static function request($url,$postData=array(),$header=array()){ 16 | $options = array(); 17 | $url = trim($url); 18 | $options[CURLOPT_URL] = $url; 19 | $options[CURLOPT_TIMEOUT] = 10; 20 | $options[CURLOPT_USERAGENT] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36'; 21 | $options[CURLOPT_RETURNTRANSFER] = true; 22 | foreach($header as $key=>$value){ 23 | $options[$key] =$value; 24 | } 25 | if(!empty($postData) && is_array($postData)){ 26 | $options[CURLOPT_POST] = true; 27 | $options[CURLOPT_POSTFIELDS] = http_build_query($postData); 28 | } 29 | if(stripos($url,'https') === 0){ 30 | $options[CURLOPT_SSL_VERIFYPEER] = false; 31 | } 32 | $ch = curl_init(); 33 | curl_setopt_array($ch,$options); 34 | $rel = curl_exec($ch); 35 | if($rel == false){ 36 | $errno = curl_errno( $ch ); 37 | $error = curl_error($ch); 38 | self::$lastError['error'] = "({$errno})$error"; 39 | self::$lastError['errorList'] = curl_getinfo($ch); 40 | } 41 | curl_close($ch); 42 | return $rel; 43 | } 44 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/extension/TLock.php: -------------------------------------------------------------------------------- 1 | connectionName = $connectionName; 21 | $this->uniqueId = uniqid(); 22 | $this->prefix = $prefix; 23 | $this->expire = $expire; 24 | } 25 | 26 | 27 | /** 28 | * 获取一个非阻塞锁 true-获取到,false-没有获取到 29 | * @param $resource_name 30 | * @return bool 31 | * @throws RedisOperationException 32 | */ 33 | public function lock($resource_name){ 34 | if(!empty($this->resource_name)){ 35 | return false; 36 | } 37 | $this->resource_name = $resource_name; 38 | 39 | $redis = TRedis::getConnection($this->connectionName); 40 | if($redis->setNx($this->resource_name,$this->uniqueId)){ 41 | $redis->expire($this->resource_name,$this->expire); 42 | return true; 43 | } 44 | return false; 45 | } 46 | 47 | /** 48 | * 获得一个阻塞锁 49 | * @param $resource_name 50 | * @return bool 51 | * @throws RedisOperationException 52 | */ 53 | public function lockWait($resource_name){ 54 | if(!empty($this->resource_name)){ 55 | return false; 56 | } 57 | $this->resource_name = $resource_name; 58 | 59 | do{ 60 | $redis = TRedis::getConnection($this->connectionName); 61 | if($redis->setNx($this->resource_name,$this->uniqueId)){ 62 | $redis->expire($this->resource_name,$this->expire); 63 | break; 64 | } 65 | 66 | sleep(1); 67 | }while(true); 68 | return true; 69 | } 70 | 71 | /** 72 | * 解锁,只能解锁自己对象的说 73 | * @throws RedisOperationException 74 | */ 75 | public function unlock(){ 76 | $redis = TRedis::getConnection($this->connectionName); 77 | $rel = $redis->get($this->resource_name); 78 | if($rel == $this->uniqueId){ 79 | $redis->delete($this->resource_name); 80 | } 81 | } 82 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/extension/TMysql.php: -------------------------------------------------------------------------------- 1 | connectionName = $connectionName; 17 | $this->dbName = $dbname; 18 | $dbConnection = new \mysqli($host, $username, $passwd, $dbname, $port, $socket); 19 | $dbConnection->options(MYSQLI_OPT_CONNECT_TIMEOUT,5); 20 | if ($dbConnection->connect_error){ 21 | throw new \Exception("mysql {$connectionName} connection failure:({$dbConnection->connect_errno}){$dbConnection->connect_error}",-1); 22 | } 23 | $dbConnection->set_charset($charset); 24 | $this->dbConnection = $dbConnection; 25 | } 26 | 27 | public function __destruct(){ 28 | if ($this->dbConnection){ 29 | // 操作对象销毁时候,关闭连接 30 | @$this->dbConnection->close(); 31 | } 32 | } 33 | 34 | /** 35 | * 查看当前连接是否关闭 36 | * @return bool 37 | */ 38 | public function ping(){ 39 | // $autoReconnect = empty(ini_get('mysqli.reconnect')); 40 | try{ 41 | if(@$this->dbConnection->ping()){ 42 | return true; 43 | }else{ 44 | @$this->dbConnection->close(); 45 | return false; 46 | } 47 | }catch (\Exception $e){ 48 | @$this->dbConnection->close(); 49 | return false; 50 | } 51 | } 52 | 53 | /** 54 | * 获取当前默认的字符集 55 | * @return string 字符集 56 | */ 57 | public function getCharset(){ 58 | return $this->dbConnection->character_set_name(); 59 | } 60 | 61 | 62 | /** 63 | * 获取select结果集中的所有结果 64 | * @param $option 65 | * @return array 66 | */ 67 | protected function fetch_all($option){ 68 | $result = array(); 69 | if(method_exists($option,'fetch_all')){ 70 | $result = $option->fetch_all(MYSQLI_ASSOC); 71 | }else{ 72 | while($row = $option->fetch_assoc()){ 73 | $result[] = $row; 74 | } 75 | } 76 | 77 | return $result; 78 | } 79 | 80 | 81 | 82 | /** 83 | * 切换默认数据库 84 | * @param $dbName String 要切换的数据库名称 85 | * @return $this 做链式访问 86 | * @throws Exception 87 | */ 88 | public function switchDb($dbName){ 89 | if (is_string($dbName) && !empty($dbName)){ 90 | if($this->dbConnection->select_db($dbName)){ 91 | return $this; 92 | }; 93 | throw new \Exception("mysql {$this->connectionName} switch db from {$this->dbName} to {$dbName} failure:({$this->dbConnection->errno}){$this->dbConnection->error}",-1); 94 | } 95 | throw new \Exception("mysql {$this->connectionName} switch db from {$this->dbName} to {$dbName} failure:param error",-1); 96 | } 97 | 98 | 99 | /** 100 | * 执行数据库操作 101 | * @param $sql String 要执行的sql语句 102 | * @param array $params 如果不为空,进行参数预处理 103 | * @return bool|int|mixed 执行select等返回结果集数组,执行insert返回自增值,执行其他返回受影响的行数。失败返回false 104 | * @throws Exception 支持出错抛出异常。 105 | */ 106 | public function query($sql,$params=array()){ 107 | $returnValue = true; 108 | $sql = trim($sql,' '); 109 | if (empty($params)){ 110 | $result = $this->dbConnection->query($sql); 111 | if ($result === false){ 112 | if ($this->dbConnection->error){ 113 | throw new \Exception("{$this->connectionName} query error:({$this->dbConnection->errno}){$this->dbConnection->error}"); 114 | }else{ 115 | return false; 116 | } 117 | } 118 | 119 | if ($result === true){ 120 | if (stripos($sql,'insert') === false){ 121 | return $this->dbConnection->affected_rows; 122 | }else{ 123 | return $this->dbConnection->insert_id; 124 | } 125 | } 126 | $returnValue = $this->fetch_all($result); 127 | $result->close(); 128 | }else{ 129 | $statement = $this->dbConnection->prepare($sql); 130 | if ($statement == false){ 131 | throw new \Exception("{$this->connectionName} prepare error:({$this->dbConnection->errno}){$this->dbConnection->error}"); 132 | } 133 | $types = ''; 134 | $data = array(''); 135 | foreach($params as $key=>$value){ 136 | $type = gettype($value); 137 | switch($type){ 138 | case 'string':{ 139 | $types .= 's'; 140 | $data[] = &$params[$key]; 141 | }break; 142 | case 'integer':{ 143 | $types .= 'i'; 144 | $data[] = &$params[$key]; 145 | }break; 146 | case 'double':{ 147 | $types .= 'd'; 148 | $data[] = &$params[$key]; 149 | }break; 150 | default: 151 | $types .= 'b'; 152 | $data[] = &$params[$key]; 153 | } 154 | } 155 | $data[0] = $types; 156 | $method = new \ReflectionMethod($statement,'bind_param'); 157 | if ($method->invokeArgs($statement,$data) == false){ 158 | throw new \Exception("{$this->connectionName} bind_param error:({$statement->errno}){$statement->error}"); 159 | } 160 | $rel = $statement->execute(); 161 | if ($rel == false){ 162 | throw new \Exception("{$this->connectionName} execute error:({$statement->errno}){$statement->error}"); 163 | } 164 | $arr = explode(' ',$sql); 165 | $type = isset($arr[0])?strtolower($arr[0]):''; 166 | switch($type){ 167 | case 'explain': 168 | case 'select':{ 169 | $rel = $statement->get_result(); 170 | if ($rel === false){ 171 | throw new \Exception("{$this->connectionName} select error:({$this->dbConnection->errno}){$this->dbConnection->error}"); 172 | } 173 | $returnValue = $this->fetch_all($rel); 174 | }break; 175 | case 'insert':{ 176 | $returnValue = $this->dbConnection->insert_id; 177 | }break; 178 | case 'update': 179 | case 'delete':{ 180 | $returnValue = $this->dbConnection->affected_rows; 181 | }; 182 | } 183 | $statement->close(); 184 | } 185 | 186 | return $returnValue; 187 | } 188 | 189 | /** 190 | * 根据数组插入一条记录 191 | * @param string $table 要插入的表名 192 | * @param array $arr 插入的数组(关联数组) 193 | * @return bool|int|mixed 194 | * @throws \Exception 195 | */ 196 | public function insert($table,$arr = array()){ 197 | if(empty($arr) || empty($table) || !is_string($table) || !is_array($arr)){ 198 | return false; 199 | } 200 | $fields = array_keys($arr); 201 | $tempArr = array_fill(0,count($fields),'?'); 202 | $params = array_values($arr); 203 | 204 | foreach ($fields as $index => $row){ 205 | $fields[$index] = "`{$row}`"; 206 | } 207 | $fields = '('.implode(',',$fields).')'; 208 | $tempArr = '('.implode(',',$tempArr).')'; 209 | $sql = "insert `{$table}`{$fields} values{$tempArr};"; 210 | return $this->query($sql,$params); 211 | } 212 | 213 | /** 214 | * 一次性插入多条记录 215 | * @param string $table 插入的表名称 216 | * @param array $arr 二维数组,具有相同的表结构 217 | * @return int 返回插入的最后一条记录的自增id 218 | */ 219 | public function insertMulti($table,$arr=array(array(),array())){ 220 | if(empty($arr) || empty($table) || !is_string($table) || !is_array($arr) || !is_array($arr[0])){ 221 | return false; 222 | } 223 | $fields = array_keys($arr[0]); 224 | $tempArr = array_fill(0,count($fields),'?'); 225 | foreach ($fields as $index => $row){ 226 | $fields[$index] = "`{$row}`"; 227 | } 228 | $fields = '('.implode(',',$fields).')'; 229 | 230 | $tempArrString = '('.implode(',',$tempArr).')'; 231 | $params = array(); 232 | $tempArr = array(); 233 | foreach ($arr as $row){ 234 | $tempArr[] = $tempArrString; 235 | $temp = array_values($row); 236 | $params = array_merge($params,$temp); 237 | } 238 | $tempArrString = implode(',',$tempArr); 239 | 240 | 241 | $sql = "insert `{$table}`{$fields} values{$tempArrString};"; 242 | return $this->query($sql,$params); 243 | } 244 | 245 | 246 | /** 247 | * 返回mysqli对象 248 | * @return mysqli|null 249 | */ 250 | public function getConnection(){ 251 | $connection = $this->dbConnection; 252 | if($connection instanceof \mysqli){ 253 | return $connection; 254 | }else{ 255 | return null; 256 | } 257 | } 258 | } 259 | 260 | 261 | class TMysql{ 262 | private static $dbConnectionPool = array(); 263 | private static $configs = array(); 264 | 265 | public static function loadOneConfig($arr){ 266 | mysqli_report(MYSQLI_REPORT_ALL^MYSQLI_REPORT_INDEX); 267 | if (!extension_loaded('mysqli')){ 268 | throw new \Exception('need extension mysqli!', -1); 269 | } 270 | if (!is_array($arr) || empty($arr)){ 271 | throw new \Exception('config not empty!', -1); 272 | } 273 | if (!isset($arr['connectionName']) || empty($arr['connectionName']) || !is_string($arr['connectionName'])){ 274 | throw new \Exception('connectionName not exit or not a string!',-1); 275 | } 276 | 277 | $config = array(); 278 | $config['connectionName'] = $arr['connectionName']; 279 | $config['host'] = isset($arr['host'])?$arr['host']:''; 280 | $config['userName'] = isset($arr['userName'])?$arr['userName']:''; 281 | $config['password'] = isset($arr['password'])?$arr['password']:''; 282 | $config['dbName'] = isset($arr['dbName'])?$arr['dbName']:''; 283 | $config['port'] = isset($arr['port'])?$arr['port']:3306; 284 | $config['checkConnection'] = isset($arr['checkConnection'])?isset($arr['checkConnection']):false; 285 | $config['charset'] = isset($arr['charset'])?$arr['charset']:'utf8'; 286 | 287 | self::$configs[$config['connectionName']] = $config; 288 | if (isset(self::$dbConnectionPool[$config['connectionName']])){ 289 | unset(self::$dbConnectionPool[$config['connectionName']]); 290 | } 291 | return true; 292 | } 293 | 294 | public static function getConnection($connectionName){ 295 | if (!is_string($connectionName) || !isset(self::$configs[$connectionName])){ 296 | throw new \Exception("mysql config : {$connectionName} is't loaded !",-1); 297 | } 298 | 299 | // 获取连接前,判断连接是否可用 300 | if (isset(self::$dbConnectionPool[$connectionName]) && self::$configs[$connectionName]['checkConnection']){ 301 | $rel = self::$dbConnectionPool[$connectionName]->ping(); 302 | if($rel == false){ 303 | unset(self::$dbConnectionPool[$connectionName]); 304 | } 305 | } 306 | 307 | // 如果不存在连接,新建一个连接 308 | if(!isset(self::$dbConnectionPool[$connectionName])){ 309 | $config = self::$configs[$connectionName]; 310 | $dbConnection = new MysqlOperation($connectionName,$config['host'],$config['userName'],$config['password'],$config['dbName'],$config['port'],$config['charset']); 311 | self::$dbConnectionPool[$connectionName] = $dbConnection; 312 | } 313 | 314 | return self::$dbConnectionPool[$connectionName]; 315 | } 316 | 317 | public static function clearAllConnection(){ 318 | self::$dbConnectionPool = array(); 319 | } 320 | } 321 | 322 | 323 | //测试 324 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 325 | $config = array( 326 | 'connectionName' => 'dbLol', 327 | 'host'=> '*.*.*', 328 | 'userName' => 'xxxx', 329 | 'password' => '*****', 330 | 'dbName' => 'test', 331 | 'port' => '3306' 332 | ); 333 | 334 | TMysql::loadOneConfig($config); 335 | $lolConnection = TMysql::getConnection('dbLol'); 336 | $data = $lolConnection->query('select * from test where id<=?;',array(10)); 337 | print_r($data); 338 | } 339 | -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/extension/TRedis.php: -------------------------------------------------------------------------------- 1 | close(); 14 | } 15 | } 16 | 17 | class RedisOperationException extends \RedisException{ 18 | 19 | } 20 | 21 | class TRedis { 22 | public static $maxConnectionTime = 5; // 最大连接时间 23 | protected static $connectionPool = array(); // 连接池对象 24 | 25 | protected static $configs = array(); // 配置项 26 | 27 | /** 28 | * 获取数组值 29 | * @param array $arr 30 | * @param string $key 31 | * @param null $default 32 | * @return mixed|null 33 | */ 34 | public static function getArrayValue($arr=array(), $key='',$default=null){ 35 | $response = null; 36 | if(is_array($arr) && isset($arr[$key])){ 37 | $response = $arr[$key]; 38 | } 39 | if($response == null){ 40 | $response = $default; 41 | } 42 | return $response; 43 | } 44 | 45 | /** 46 | * 加载单个配置文件 47 | * @param array $configArr 48 | */ 49 | public static function loadOneConfig($configArr= array()){ 50 | $tempConfig = array(); 51 | $tempConfig['connectionName'] = self::getArrayValue($configArr,'connectionName'); // 连接名称 52 | $tempConfig['host'] = self::getArrayValue($configArr,'host'); // 连接host 53 | $tempConfig['port'] = self::getArrayValue($configArr,'port',6379); // 端口号 54 | $tempConfig['database'] = self::getArrayValue($configArr,'database',0); // 使用数据库索引 55 | $tempConfig['password'] = self::getArrayValue($configArr,'password'); // 使用密码 56 | $tempConfig['prefix'] = self::getArrayValue($configArr,'prefix',''); // 数据库查询中的key前缀 57 | $tempConfig['checkConnection'] = self::getArrayValue($configArr,'checkConnection',false); 58 | if(empty($tempConfig['connectionName']) || !is_string($tempConfig['connectionName'])){ 59 | $tempConfig['name'] = 'default'; 60 | } 61 | self::$configs[$tempConfig['connectionName']] = $tempConfig; 62 | } 63 | 64 | /** 65 | * 获取一个连接对象 66 | * @param string $connectionName 67 | * @return mixed 68 | * @throws \Exception 69 | * @throws \RedisException 70 | */ 71 | public static function getConnection($connectionName ='default'){ 72 | if (!is_string($connectionName) || !isset(self::$configs[$connectionName])){ 73 | throw new RedisOperationException("redis config : {$connectionName} is't loaded !",-1); 74 | } 75 | 76 | // 检测连接是否断开,断开重连 77 | if(isset(self::$connectionPool[$connectionName]) && self::$configs[$connectionName]['checkConnection']){ 78 | try{ 79 | $redisObject = self::$connectionPool[$connectionName]; 80 | if($redisObject->ping() != '+PONG'){ 81 | throw new RedisOperationException("redis ping error",-1); 82 | } 83 | }catch (RedisOperationException $e){ 84 | @self::$connectionPool[$connectionName]->close(); 85 | unset(self::$connectionPool[$connectionName]); 86 | } 87 | } 88 | 89 | // 创建连接 90 | if(!isset(self::$connectionPool[$connectionName])){ 91 | $redisObject = new RedisOperation(); 92 | $rel = $redisObject->connect(self::$configs[$connectionName]['host'],self::$configs[$connectionName]['port'],self::$maxConnectionTime); 93 | if(!$rel){ 94 | throw new RedisOperationException("redis connection error:{$connectionName}",-1); 95 | } 96 | if(!empty(self::$configs[$connectionName]['password'])){ 97 | $rel = $redisObject->auth(self::$configs[$connectionName]['password']); 98 | if(!$rel){ 99 | $redisObject->close(); 100 | throw new RedisOperationException("redis connection auth error:{$connectionName}",-1); 101 | } 102 | } 103 | $db = (int)self::$configs[$connectionName]['database']; 104 | $rel = $redisObject->select($db); 105 | if(!$rel){ 106 | $redisObject->close(); 107 | throw new RedisOperationException("redis select db error:name-{$connectionName},db-{$db}",-1); 108 | } 109 | if(is_string(self::$configs[$connectionName]['prefix']) && !empty(self::$configs[$connectionName]['prefix'])){ 110 | $redisObject->setOption(\Redis::OPT_PREFIX,self::$configs[$connectionName]['prefix']); 111 | } 112 | 113 | 114 | self::$connectionPool[$connectionName] = $redisObject; 115 | } 116 | return self::$connectionPool[$connectionName]; 117 | } 118 | 119 | /** 120 | * 清理连接 121 | */ 122 | public static function clearAllConnection(){ 123 | self::$connectionPool = array(); 124 | } 125 | } 126 | 127 | 128 | //测试 129 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 130 | $config = array( 131 | 'connectionName' => 'cacheRd', 132 | 'host'=> '*.*.*.*', 133 | // 'password' => '*****', 134 | 'database' => 1, 135 | 'port' => 6379, 136 | 'prefix' => 'redis_test_', 137 | 'checkConnection' => true 138 | ); 139 | 140 | TRedis::loadOneConfig($config); 141 | $redis = TRedis::getConnection('cacheRd'); 142 | // $redis->hSet('h', 'key1', 'hello'); 143 | // $redis->expire('h',10); 144 | echo $redis->dbSize(); 145 | print_r($redis->keys('*')); 146 | // print_r($testString); 147 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/queue/LocalRequestQueue.php: -------------------------------------------------------------------------------- 1 | queue = new \SplQueue(); 19 | } 20 | 21 | public function add(Request $request){ 22 | $this->queue->enqueue($request); 23 | } 24 | 25 | public function get(){ 26 | if($this->isEmpty()){ 27 | return null; 28 | } 29 | return $this->queue->dequeue(); 30 | } 31 | 32 | public function isEmpty(){ 33 | return $this->queue->isEmpty(); 34 | } 35 | 36 | public function count(){ 37 | return $this->queue->count(); 38 | } 39 | 40 | public function isFull(){ 41 | if($this->count() >= $this->maxCount){ 42 | return true; 43 | } 44 | return false; 45 | } 46 | 47 | public function __destruct(){ 48 | unset($this->queue); 49 | } 50 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/queue/RequestQueue.php: -------------------------------------------------------------------------------- 1 | downloader = $downloader; 22 | parent::__construct($taskName); 23 | } 24 | 25 | public function coroutine(){ 26 | do{ 27 | if (!TSpider::$requestQueue->isEmpty()){ 28 | do{ 29 | $request = TSpider::$requestQueue->get(); 30 | $rel = $this->downloader->addRequest($request); 31 | 32 | // 如果下载队列添加失败,将请求重新放回请求队列中 33 | if($request instanceof Request && !$rel){ 34 | TSpider::$requestQueue->add($request); 35 | } 36 | }while($rel); 37 | } 38 | yield true; 39 | }while(true); 40 | } 41 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/framework/task/DelayTimer.php: -------------------------------------------------------------------------------- 1 | afterDownload($request,$response); 30 | if($rel !== false){ 31 | $worker = new $request->workerName(); 32 | 33 | if($worker->beforeParse() !== false){ 34 | $worker->parse($request,$response); 35 | } 36 | $worker->afterParse(); 37 | } 38 | } 39 | 40 | /** 41 | * 初始化下载器 42 | * DownloadTask constructor. 43 | * @param string $taskName 44 | */ 45 | public function __construct($taskName=''){ 46 | $this->multiDownloader = curl_multi_init(); 47 | parent::__construct($taskName); 48 | } 49 | 50 | /** 51 | * 52 | * @param $request 53 | * @return bool 54 | */ 55 | public function addRequest($request){ 56 | $response = false; 57 | do{ 58 | if($this->multiDownloader == null){ 59 | break; 60 | } 61 | 62 | if(!($request instanceof Request)){ 63 | break; 64 | } 65 | 66 | $currentRequestNum = count($this->currentRequestMap); 67 | if($currentRequestNum >= $this->windowSize){ 68 | break; 69 | } 70 | 71 | $rel = $this->beforeDownload($request); 72 | if($rel === false){ 73 | break; 74 | } 75 | 76 | if($rel instanceof Request){ 77 | $request = $rel; 78 | } 79 | $ch = $request->createCurlObject(); 80 | $key = (string)$ch; 81 | $this->currentRequestMap[$key] = $request; 82 | curl_multi_add_handle($this->multiDownloader, $ch); 83 | 84 | $this->spareTime = 0; 85 | $response = true; 86 | }while(false); 87 | 88 | return $response; 89 | } 90 | 91 | public function beforeDownload(Request $request){ 92 | return null; 93 | } 94 | 95 | public function afterDownload(Request $request,Response $response){ 96 | return null; 97 | } 98 | 99 | // 获取下载器闲置时间 100 | public function getSpareTime(){ 101 | if($this->spareTime == 0){ 102 | return 0; 103 | }else{ 104 | return time()- $this->spareTime; 105 | } 106 | } 107 | 108 | public function coroutine() 109 | { 110 | do { 111 | while (($execrun = curl_multi_exec($this->multiDownloader, $running)) == CURLM_CALL_MULTI_PERFORM) ; 112 | if ($execrun != CURLM_OK) { 113 | if($this->spareTime == 0){ 114 | $this->spareTime = time(); 115 | } 116 | } 117 | 118 | // 一旦有一个请求完成,找出来,因为curl底层是select,所以最大受限于1024 119 | while ($done = curl_multi_info_read($this->multiDownloader)) 120 | { 121 | // 从请求中获取信息、内容、错误 122 | $info = curl_getinfo($done['handle']); 123 | $output = curl_multi_getcontent($done['handle']); 124 | $error = curl_error($done['handle']); 125 | $response = new Response($info,$output,$error); 126 | 127 | $key = (string)$done['handle']; 128 | $request = $this->currentRequestMap[$key]; 129 | $this->dispatch($request,$response); 130 | 131 | // 把请求已经完成了得 curl handle 删除 132 | unset($this->currentRequestMap[$key]); 133 | curl_multi_remove_handle($this->multiDownloader, $done['handle']); 134 | } 135 | 136 | // 当没有数据的时候进行堵塞,把 CPU 使用权交出来,避免上面 do 死循环空跑数据导致 CPU 100% 137 | if ($running) { 138 | $rel = curl_multi_select($this->multiDownloader, 1); 139 | if($rel == -1){ 140 | usleep(1000); 141 | } 142 | } 143 | 144 | if( $running == false){ 145 | if($this->spareTime == 0){ 146 | $this->spareTime = time(); 147 | } 148 | } 149 | yield true; 150 | } while (true); 151 | 152 | // 关闭任务 153 | curl_multi_close($this->multiDownloader); 154 | unset($this->multiDownloader); 155 | } 156 | } -------------------------------------------------------------------------------- /example/lol_hero_god_rank/index.php: -------------------------------------------------------------------------------- 1 | init($protectedName); 48 | } 49 | 50 | public static function init($protectedName){ 51 | self::$startCrawlTime = time(); 52 | self::$protectedName = $protectedName; 53 | self::$frameworkBasePath = dirname(__FILE__); 54 | self::$basePath = dirname(self::$frameworkBasePath); 55 | self::$applicationPath = self::$basePath.DIRECTORY_SEPARATOR.self::$protectedName; 56 | 57 | ini_set('date.timezone','Asia/Shanghai'); 58 | spl_autoload_register('\framework\TSpider::autoLoadFile'); 59 | 60 | self::checkEnvironment(); 61 | 62 | // 解析命令 63 | self::parseCommand(); 64 | 65 | // 导入引用的三方扩展 66 | self::searchAndInclude(self::$frameworkBasePath.'/extension'); 67 | self::searchAndInclude(self::$applicationPath.'/extension'); 68 | 69 | // 加载配置文件 70 | $default_config = include self::$frameworkBasePath.DIRECTORY_SEPARATOR.'config'.DIRECTORY_SEPARATOR.'default.php'; 71 | $user_config_path = self::$applicationPath.DIRECTORY_SEPARATOR.'config'.DIRECTORY_SEPARATOR.'main.php'; 72 | if(!is_file($user_config_path)){ 73 | throw new \Exception("config file not found:{$user_config_path}",-1); 74 | } 75 | $user_config = include $user_config_path; 76 | if(!is_array($user_config)){ 77 | throw new \Exception("config file not load error:{$user_config_path}",-1); 78 | } 79 | 80 | self::$config = array_merge($default_config,$user_config); 81 | 82 | self::$spiderName = Common::getArrayValue(self::$config,'name','defaultSpider'); 83 | self::$isDebug = Common::getArrayValue(self::$config,'debug',false); 84 | 85 | if(self::$isDebug){ 86 | ini_set("display_errors", "on"); 87 | error_reporting(E_ALL^E_STRICT); 88 | } 89 | 90 | // 加载logger 91 | Logger::loadOneConfig(array('logName'=>'system')); 92 | Logger::$g_basePath = self::$config['logPath']; 93 | Logger::loadConfig(self::$config['logs']); 94 | self::$log = Logger::factory('system'); 95 | 96 | // 设置request相关 97 | Request::$maxRepeat = self::$config['request']['maxRepeat']; 98 | Request::$timeOut = self::$config['request']['timeOut']; 99 | self::$requestQueue = new self::$config['request']['requestQueue'](); 100 | if(!(self::$requestQueue instanceof RequestQueue)){ 101 | $temp = self::$config['request']['requestQueue']; 102 | throw new \Exception("requestQueue error:{$temp}",-1); 103 | } 104 | 105 | // 加载必须任务,初始化任务调度器,添加下载器任务 106 | self::$taskScheduler = new TaskScheduler(); 107 | if(!isset(self::$config['downloader'])){ 108 | throw new \Exception('there is no download class config'); 109 | } 110 | $downloaderClass = self::$config['downloader']['className']; 111 | self::$downloader = new $downloaderClass(); 112 | self::$downloader->windowSize = self::$config['downloader']['windowSize']; 113 | self::$taskScheduler->addTask(self::$downloader); 114 | 115 | // 添加请求队列读取任务,以及自定义任务,all分组表示运行所有 116 | $addRequestTask = new AddRequestTask(self::$downloader); 117 | self::$taskScheduler->addTask($addRequestTask); 118 | if(self::$taskGroup == 'all'){ 119 | foreach(self::$config['tasks'] as $key=>$value){ 120 | if(is_array($value)){ 121 | foreach ($value as $className=>$params){ 122 | $tempTask = new $className(); 123 | self::$taskScheduler->addTask($tempTask); 124 | } 125 | } 126 | } 127 | }else{ 128 | if(isset(self::$config['tasks']['common'])){ 129 | foreach(self::$config['tasks']['common'] as $key=>$value){ 130 | $tempTask = new $key(); 131 | self::$taskScheduler->addTask($tempTask); 132 | } 133 | } 134 | 135 | if(isset(self::$config['tasks'][self::$taskGroup]) && self::$taskGroup != 'common'){ 136 | foreach(self::$config['tasks'][self::$taskGroup] as $className=>$params){ 137 | $tempTask = new $className(); 138 | self::$taskScheduler->addTask($tempTask); 139 | } 140 | }else{ 141 | $taskGroup = self::$taskGroup; 142 | exit("taskGroup not found:{$taskGroup}\n"); 143 | } 144 | } 145 | 146 | // 初始化组件配置文件 147 | if(isset(self::$config['component']) && is_array(self::$config['component'])){ 148 | Component::$config = self::$config['component']; 149 | } 150 | self::$component = new Component(); 151 | 152 | // 设置进程名称 153 | $title = self::$spiderName; 154 | $taskGroup = self::$taskGroup; 155 | $startTime = date('Y-m-d H:i:s',self::$startCrawlTime); 156 | self::setProcessTitle("TSpider:{$title}-{$taskGroup} startTime:{$startTime}"); 157 | } 158 | 159 | public static function autoLoadFile($rawName){ 160 | $name = str_replace('\\', DIRECTORY_SEPARATOR ,$rawName); 161 | $classFile = self::$basePath . DIRECTORY_SEPARATOR . $name . '.php'; 162 | 163 | if(is_file($classFile)){ 164 | if(!class_exists($rawName,true)){ 165 | $rel = require $classFile; 166 | return $rel; 167 | } 168 | }else{ 169 | throw new Exception("auto load File error:{$classFile}",-1); 170 | } 171 | return false; 172 | } 173 | 174 | /** 175 | * 解析接收的命令 176 | */ 177 | public static function parseCommand(){ 178 | if(isset($_SERVER['argv']) && count($_SERVER['argv']) >=2){ 179 | $command = $_SERVER['argv'][1]; 180 | if(strlen($command)){ 181 | self::$taskGroup = $command; 182 | }else{ 183 | self::$taskGroup = ''; 184 | } 185 | }else{ 186 | self::$taskGroup = 'all'; 187 | } 188 | echo 'taskGroup:',self::$taskGroup,"\n"; 189 | } 190 | 191 | 192 | /** 193 | * 检测运行环境 194 | */ 195 | public static function checkEnvironment(){ 196 | echo 'PHP-version:',PHP_VERSION,' TSpider-version:',self::VERSION,' start-time:',date('Y-m-d H:i:s',self::$startCrawlTime),"\n"; 197 | 198 | if(version_compare(PHP_VERSION,'5.5.0','<=')){ 199 | exit('php version must greater than 5.5.0'); 200 | } 201 | 202 | if(substr(php_sapi_name(), 0, 3) != 'cli'){ 203 | exit('this program must be running in cli mode '); 204 | } 205 | 206 | if(extension_loaded('posix')){ 207 | $userInfo = posix_getpwuid(posix_getuid()); 208 | echo 'pid:',posix_getpid()," ",'running-user:',$userInfo['name'],"\n"; 209 | } 210 | } 211 | 212 | /** 213 | * 设置进程名称 214 | * @param $title 215 | */ 216 | public static function setProcessTitle($title) 217 | { 218 | if (function_exists('cli_set_process_title')) { 219 | @cli_set_process_title($title); 220 | } elseif (extension_loaded('proctitle') && function_exists('setproctitle')) { 221 | @setproctitle($title); 222 | } 223 | } 224 | 225 | /** 226 | * 导入某个文件夹中的所有文件 227 | * @param $rootPath 228 | */ 229 | public static function searchAndInclude($rootPath){ 230 | if(is_dir($rootPath)){ 231 | $rootPath = rtrim($rootPath,'/'); 232 | $files = glob($rootPath.'/*'); 233 | foreach ($files as $file){ 234 | if(is_dir($file)){ 235 | self::searchAndInclude($file); 236 | }else{ 237 | include_once $file; 238 | } 239 | } 240 | } 241 | } 242 | 243 | public static function run(){ 244 | self::$taskScheduler->run(); 245 | } 246 | } -------------------------------------------------------------------------------- /framework/base/BaseWorker.php: -------------------------------------------------------------------------------- 1 | code} : {$request->url} \n"; 26 | } 27 | 28 | /** 29 | * 解析前hook 30 | * @return bool 31 | */ 32 | public function beforeParse(){ 33 | self::$statisticsParserNum ++; 34 | return true; 35 | } 36 | 37 | /** 38 | * 解析后hook 39 | */ 40 | public function afterParse(){ 41 | // TODO: Implement afterParse() method. 42 | } 43 | } -------------------------------------------------------------------------------- /framework/base/Request.php: -------------------------------------------------------------------------------- 1 | url = $url; 37 | $this->workerName = $workerName; 38 | 39 | $this->options[CURLOPT_URL] = $url; 40 | $this->options[CURLOPT_TIMEOUT] = self::$timeOut; 41 | $this->options[CURLOPT_USERAGENT] = 'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko'; 42 | $this->options[CURLOPT_ENCODING] = 'gzip, deflate '; 43 | $this->extData = $extData; 44 | if(empty($postData) || !is_array($postData)){ 45 | $this->type = 'GET'; 46 | }else{ 47 | $this->type = 'POST'; 48 | $this->postData = $postData; 49 | $this->options[CURLOPT_POST] = true; 50 | $this->options[CURLOPT_POSTFIELDS] = $postData; 51 | } 52 | } 53 | 54 | /** 55 | * 对象字符串化 56 | * @return string 57 | */ 58 | public function __toString(){ 59 | $json_arr = array( 60 | 'url' => $this->url, 61 | 'workerName' => $this->workerName, 62 | 'options' => $this->options, 63 | 'postData' => $this->postData, 64 | 'repeat' => $this->repeat, 65 | 'header' => $this->header, 66 | ); 67 | $returnString = json_encode($json_arr); 68 | if(!is_string($returnString)){ 69 | $returnString = ''; 70 | } 71 | return $returnString; 72 | } 73 | 74 | /** 75 | * 设置头部信息数组 76 | * @param $options 77 | */ 78 | public function setOptions($options){ 79 | foreach($options as $key=>$value){ 80 | $this->options[$key] = $value; 81 | } 82 | } 83 | 84 | /** 85 | * 累加重复次数 86 | */ 87 | public function addRepeat(){ 88 | $this->repeat ++; 89 | } 90 | 91 | /** 92 | * 获取重复次数 93 | * @return int 94 | */ 95 | public function getRepeatNum(){ 96 | return $this->repeat; 97 | } 98 | 99 | /** 100 | * 是否能重复下载 101 | * @return bool 102 | */ 103 | public function canRepeat(){ 104 | if(self::$maxRepeat < $this->repeat){ 105 | return false; 106 | } 107 | return true; 108 | } 109 | 110 | /** 111 | * 重新下载 112 | * @return bool true-添加成功,false-添加失败 113 | */ 114 | public function redownload(){ 115 | $this->addRepeat(); 116 | if($this->canRepeat()) { 117 | TSpider::$requestQueue->add($this); 118 | return true; 119 | } 120 | return false; 121 | } 122 | 123 | 124 | /** 125 | * 将请求放入下载队列中,队列满了,返回失败 126 | * @return bool 127 | */ 128 | public function download(){ 129 | if(TSpider::$requestQueue->isFull()){ 130 | return false; 131 | } 132 | TSpider::$requestQueue->add($this); 133 | return true; 134 | } 135 | 136 | 137 | public function createCurlObject(){ 138 | $ch = curl_init(); 139 | if($this->canRepeat()){ 140 | if(!empty($this->options)){ 141 | curl_setopt_array($ch,$this->options); 142 | } 143 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 144 | }else{ 145 | $ch = false; 146 | } 147 | return $ch; 148 | } 149 | } -------------------------------------------------------------------------------- /framework/base/Response.php: -------------------------------------------------------------------------------- 1 | info = $info; 22 | $this->data = $data; 23 | $this->error = $error; 24 | if(isset($this->info['http_code'])){ 25 | $this->code = $this->info['http_code']; 26 | } 27 | } 28 | 29 | public function getData(){ 30 | return $this->data; 31 | } 32 | 33 | public function getError(){ 34 | return $this->error; 35 | } 36 | } -------------------------------------------------------------------------------- /framework/component/Component.php: -------------------------------------------------------------------------------- 1 | getInstance(); 31 | } 32 | 33 | $className = isset(self::$config[$name]['className'])?self::$config[$name]['className']:''; 34 | if(!class_exists($className)){ 35 | throw new \Exception("have no component className:{$className}\n",-1); 36 | } 37 | 38 | $component = new $className($name,self::$config[$name]); 39 | if(!($component instanceof Component)){ 40 | throw new \Exception("className:{$className} is not Component\n",-1); 41 | } 42 | 43 | self::$instanceMap[$name] = $component; 44 | 45 | return self::$instanceMap[$name]->getInstance(); 46 | } 47 | 48 | public function __construct($componentName='',$config=array()){ 49 | $this->componentName = $componentName; 50 | $this->oneConfig = $config; 51 | } 52 | 53 | 54 | 55 | public function getInstance(){ 56 | return $this; 57 | } 58 | 59 | } -------------------------------------------------------------------------------- /framework/component/MysqlComponent.php: -------------------------------------------------------------------------------- 1 | isLoadConfig){ 19 | $this->oneConfig['connectionName'] = $this->componentName; 20 | TMysql::loadOneConfig($this->oneConfig); 21 | $this->isLoadConfig = true; 22 | } 23 | return TMysql::getConnection($this->componentName); 24 | } 25 | } -------------------------------------------------------------------------------- /framework/component/RedisComponent.php: -------------------------------------------------------------------------------- 1 | isLoadConfig){ 19 | $this->oneConfig['connectionName'] = $this->componentName; 20 | TRedis::loadOneConfig($this->oneConfig); 21 | $this->isLoadConfig = true; 22 | } 23 | return TRedis::getConnection($this->componentName); 24 | } 25 | } -------------------------------------------------------------------------------- /framework/component/RedisLockComponent.php: -------------------------------------------------------------------------------- 1 | isLoadConfig){ 21 | $this->oneConfig['connectionName'] = $this->componentName; 22 | TRedis::loadOneConfig($this->oneConfig); 23 | $this->isLoadConfig = true; 24 | } 25 | 26 | return new TLock($this->componentName,$this->getArrayValue($this->oneConfig,'prefix','lock_'),$this->getArrayValue($this->oneConfig,'expire',3600)); 27 | } 28 | 29 | 30 | /** 31 | * 安全地获取数组的值 32 | * @param $arr array 数组 33 | * @param $key String 键名 34 | * @param string $default 默认值 35 | * @return string 36 | */ 37 | public static function getArrayValue($arr,$key,$default=''){ 38 | if(isset($arr[$key])){ 39 | return $arr[$key]; 40 | } 41 | return $default; 42 | } 43 | } -------------------------------------------------------------------------------- /framework/component/SmsWoquComponent.php: -------------------------------------------------------------------------------- 1 | lastSendTime) > $interval_time){ 37 | $this->lastSendTime = $now; 38 | $this->sendMessageNum = 1; 39 | return true; 40 | } 41 | if($this->sendMessageNum <= $maxSendNum){ 42 | $this->sendMessageNum ++; 43 | return true; 44 | } 45 | return false; 46 | } 47 | 48 | 49 | /** 50 | * 发送消息函数 51 | * @param string $content 发送的消息内容 52 | * @param bool $needSend 当前短信是否必须发送出去 53 | * @return bool 54 | */ 55 | public function sendMessage($content,$needSend=false){ 56 | if(!($this->canSendMessage() || $needSend)){ 57 | return false; 58 | } 59 | $phoneNumbers = explode(',',$this->oneConfig['To']); 60 | if(empty($this->oneConfig['To'])){ 61 | Http::request($this->oneConfig['serverUrl'],array( 62 | 'Module' => $this->oneConfig['Module'], 63 | 'MsgType' => $this->oneConfig['MsgType'], 64 | 'To' => '', 65 | 'MsgText' => $content)); 66 | }else{ 67 | foreach($phoneNumbers as $key=>$phone){ 68 | Http::request($this->oneConfig['serverUrl'],array( 69 | 'Module' => $this->oneConfig['Module'], 70 | 'MsgType' => $this->oneConfig['MsgType'], 71 | 'To' => $phone, 72 | 'MsgText' => $content)); 73 | } 74 | } 75 | return true; 76 | } 77 | } -------------------------------------------------------------------------------- /framework/config/default.php: -------------------------------------------------------------------------------- 1 | 'defaultSpider', 12 | 'debug' => false, 13 | 'logPath' => dirname(dirname(dirname(__FILE__))).DIRECTORY_SEPARATOR.'log'.DIRECTORY_SEPARATOR, 14 | 'logs' => array( 15 | 'system' => array( 16 | 'logName' => 'system', 17 | ) 18 | ), 19 | 'downloader' => array( 20 | 'className' => 'framework\task\DownloadTask', 21 | 'windowSize' => 50, 22 | ), 23 | 'request' => array( 24 | 'maxRepeat' => 5, 25 | 'timeOut' => 300, 26 | 'requestQueue' => 'framework\queue\LocalRequestQueue' 27 | ), 28 | 'tasks' => array( 29 | 'common' => array( 30 | 'framework\task\DelayTimer'=>array() 31 | ) 32 | ), 33 | 'component' => array( 34 | // 填写相应组件 35 | ), 36 | ); -------------------------------------------------------------------------------- /framework/coroutine/ExampleTask.php: -------------------------------------------------------------------------------- 1 | taskId = ++self::$maxTaskId; 24 | 25 | $this->taskName = $taskName; 26 | if(empty($taskName)){ 27 | $this->taskName = get_class(); 28 | } 29 | $taskContent = $this->coroutine(); 30 | if($taskContent instanceof \Generator){ 31 | $this->taskContent = $taskContent; 32 | }else{ 33 | throw new Exception('Task is not a coroutine',-1); 34 | } 35 | } 36 | 37 | public function getTaskId() { 38 | return $this->taskId; 39 | } 40 | 41 | public function getTaskName() { 42 | return $this->taskName; 43 | } 44 | 45 | public function setSendValue($sendValue) { 46 | $this->sendValue = $sendValue; 47 | } 48 | 49 | abstract public function coroutine(); 50 | 51 | public function run() { 52 | if ($this->beforeFirstYield) { 53 | $this->beforeFirstYield = false; 54 | return $this->taskContent->current(); 55 | } else { 56 | $retval = $this->taskContent->send($this->sendValue); 57 | $this->sendValue = null; 58 | return $retval; 59 | } 60 | } 61 | 62 | public function isFinished() { 63 | return !$this->taskContent->valid(); 64 | } 65 | } -------------------------------------------------------------------------------- /framework/coroutine/TaskScheduler.php: -------------------------------------------------------------------------------- 1 | task 16 | protected $taskQueue = null; 17 | 18 | protected static $is_closed = false; 19 | 20 | public function __construct() { 21 | $this->taskQueue = new \SplQueue(); 22 | } 23 | 24 | public static function closeAllTask(){ 25 | self::$is_closed = true; 26 | } 27 | 28 | public static function isAllTaskClosed(){ 29 | return self::$is_closed; 30 | } 31 | 32 | public function addTask(Task $task){ 33 | $taskId = $task->getTaskId(); 34 | $this->taskMap[$taskId] = $task; 35 | $this->schedule($task); 36 | return $taskId; 37 | } 38 | 39 | public function schedule(Task $task) { 40 | $this->taskQueue->enqueue($task); 41 | } 42 | 43 | public function run() { 44 | while (!$this->taskQueue->isEmpty()) { 45 | $task = $this->taskQueue->dequeue(); 46 | $task->run(); 47 | 48 | if ($task->isFinished()) { 49 | unset($this->taskMap[$task->getTaskId()]); 50 | } else { 51 | $this->schedule($task); 52 | } 53 | if(TaskScheduler::isAllTaskClosed()){ 54 | break; 55 | } 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /framework/coroutine/TimerTask.php: -------------------------------------------------------------------------------- 1 | 0){ 21 | $this->intervalTime = $intervalTime; 22 | } 23 | if(empty($taskName)){ 24 | $taskName = get_class($this); 25 | } 26 | $this->expireTime = time()+$this->intervalTime; 27 | parent::__construct($taskName); 28 | } 29 | 30 | public function coroutine(){ 31 | while(true){ 32 | if($this->expireTime <= time()){ 33 | $rel = $this->execute(); 34 | $this->expireTime = time()+$this->intervalTime; 35 | if($rel === false){ 36 | break; 37 | } 38 | } 39 | yield true; 40 | } 41 | } 42 | 43 | public abstract function execute(); 44 | } -------------------------------------------------------------------------------- /framework/extension/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hirudy/Tspider/93ba6b4f9ac1725519af3386c7ca28b33ceb8770/framework/extension/.DS_Store -------------------------------------------------------------------------------- /framework/extension/Common.php: -------------------------------------------------------------------------------- 1 | $value){ 42 | $options[$key] =$value; 43 | } 44 | 45 | // 是否是post请求 46 | if(!empty($postData) && is_array($postData)){ 47 | $options[CURLOPT_POST] = true; 48 | $options[CURLOPT_POSTFIELDS] = http_build_query($postData); 49 | } 50 | 51 | // 是否是https 52 | if(stripos($url,'https') === 0){ 53 | $options[CURLOPT_SSL_VERIFYPEER] = false; 54 | } 55 | 56 | // 返回curl对象 57 | $ch = curl_init(); 58 | curl_setopt_array($ch,$options); 59 | return $ch; 60 | } 61 | 62 | /** 63 | * 将字符串解析为http响应头部数组 64 | * @param $strResponseHeader 65 | * @return array 66 | */ 67 | private static function parseResponseHeader($strResponseHeader){ 68 | $headerList = array(); 69 | $tempHeaderList = explode("\r\n",$strResponseHeader); 70 | foreach ($tempHeaderList as $row){ 71 | if (stripos($row,':') === false){ 72 | $tmp = explode(" ",$row); 73 | $headerList['Protocol'] = isset($tmp[0])?$tmp[0]:''; 74 | $headerList['Status'] = (int)(isset($tmp[1])?$tmp[1]:0); 75 | $headerList['Message'] = isset($tmp[2])?$tmp[2]:''; 76 | }else{ 77 | $tmp = explode(":",$row, 2); 78 | if (count($tmp) != 2){ 79 | continue; 80 | } 81 | $key = trim($tmp[0]); 82 | $value = trim($tmp[1]); 83 | if ($key == 'Set-Cookie'){ 84 | if (!isset($headerList[$key])){ 85 | $headerList[$key] = array(); 86 | } 87 | $tmpCookieList = explode(";",$value); 88 | foreach ($tmpCookieList as $oneCookie){ 89 | $tmpCookie = explode("=",$oneCookie,2); 90 | if (count($tmpCookie) != 2){ 91 | continue; 92 | } 93 | $key_cookie = trim($tmpCookie[0]); 94 | $value_cookie = trim($tmpCookie[1]); 95 | $headerList[$key][$key_cookie] = $value_cookie; 96 | } 97 | }else{ 98 | $headerList[$key] = $value; 99 | } 100 | } 101 | } 102 | 103 | return $headerList; 104 | } 105 | 106 | /** 107 | * 从curl对象中提取请求的结果 108 | * @param $ch 109 | * @param $rel 110 | * @return array 111 | */ 112 | private static function fetchResponse($ch, $rel){ 113 | $response = array( 114 | 'status' => false, 115 | 'code' => 0, 116 | 'header' => array(), 117 | 'body' => '', 118 | 'extraInfo' => array(), 119 | 'errorInfo' => array() 120 | ); 121 | $response['extraInfo'] = curl_getinfo($ch); 122 | if($rel == false){ 123 | $error = array(); 124 | $error['code'] = curl_errno( $ch ); 125 | $error['info'] = curl_error($ch); 126 | $response['errorInfo'] = $error; 127 | }else{ 128 | // 切割header 与 body 129 | $header_body = explode("\r\n\r\n",$rel); 130 | do{ 131 | if (count($header_body) !== 2){ 132 | $error =array(); 133 | $error['code'] = 0; 134 | $error['info'] = 'split header and body error'; 135 | $error['list'] = $rel; 136 | $response['errorInfo'] = $error; 137 | break; 138 | } 139 | 140 | // 格式化返回结果 141 | $response['body'] = $header_body[1]; 142 | $response['header'] = self::parseResponseHeader($header_body[0]); 143 | $response['code'] = $response['header']['Status']; 144 | $response['status'] = true; 145 | }while(false); 146 | } 147 | 148 | return $response; 149 | } 150 | 151 | /** 152 | * 通用请求方法 153 | * @param string $url 请求url地址 154 | * @param array $postData 请求post参数数组 155 | * @param array $header 请求附带请求头部数组 156 | * @param int $timeOut 超时时间 157 | * @param string $proxy 代理设置 158 | * @return array 159 | */ 160 | public static function request($url,$postData=array(),$header=array(),$timeOut=self::DEFAULT_TIMEOUT, $proxy=''){ 161 | // 执行请求 162 | $ch = self::buildCurlObject($url, $postData, $header, $timeOut, $proxy); 163 | $rel = curl_exec($ch); 164 | $response = self::fetchResponse($ch, $rel); 165 | curl_close($ch); 166 | return $response; 167 | } 168 | 169 | /** 170 | * 简单返回请求方法 171 | * @param string $url 请求url地址 172 | * @param array $postData 请求post参数数组 173 | * @param array $header 请求附带请求头部数组 174 | * @param int $timeOut 超时时间 175 | * @param string $proxy 代理设置 176 | * @return bool|mixed 成功返回body字符串,失败返回false 177 | */ 178 | public static function simpleResponseRequest($url,$postData=array(), $header=array(), $timeOut=self::DEFAULT_TIMEOUT, $proxy=''){ 179 | $result = self::request($url, $postData, $header, $timeOut, $proxy); 180 | if ($result['status']){ 181 | return $result['body']; 182 | }else{ 183 | print_r($result['extraInfo']); 184 | print_r($result['errorInfo']); 185 | return false; 186 | } 187 | } 188 | 189 | /** 190 | * 多个http请求并行执行 191 | * @param array $requestList 参数同self::request,只不过改成了数组 192 | * @return arrayMAX_REDIRECT_NUM = 5 193 | */ 194 | public static function multiRequest(Array $requestList){ 195 | // 创建curl对象,存放到数组,添加到下载器中 196 | $requestCurlObjectList = array(); 197 | $downloader = curl_multi_init(); 198 | foreach ($requestList as $row){ 199 | 200 | $url = isset($row['url'])?$row['url']:''; 201 | $postData = isset($row['postData'])?$row['postData']:array(); 202 | $header = isset($row['header'])?$row['header']:array(); 203 | $timeOut = isset($row['timeOut'])?$row['timeOut']:self::DEFAULT_TIMEOUT; 204 | $proxy = isset($row['proxy'])?$row['proxy']:''; 205 | $tmpCurlObject = self::buildCurlObject($url,$postData,$header,$timeOut,$proxy); 206 | $requestCurlObjectList[] = $tmpCurlObject; 207 | curl_multi_add_handle($downloader,$tmpCurlObject); 208 | } 209 | 210 | // 并行执行多个curl对象,等待所有请求完毕退出循环 211 | $active = true; 212 | $mrc = CURLM_OK; 213 | while ($active && $mrc == CURLM_OK) { 214 | do { 215 | $mrc = curl_multi_exec($downloader, $active); 216 | } while ($mrc == CURLM_CALL_MULTI_PERFORM); 217 | 218 | if (curl_multi_select($downloader) == -1) { 219 | usleep(100); 220 | } 221 | } 222 | 223 | // 解析每一个请求对象 224 | $responseList = array(); 225 | foreach ($requestCurlObjectList as $key=>$ch){ 226 | $rel = curl_multi_getcontent($ch); 227 | $response = self::fetchResponse($ch, $rel); 228 | $responseList[$key] = $response; 229 | curl_multi_remove_handle($downloader, $ch); 230 | curl_close($ch); 231 | } 232 | curl_multi_close($downloader); 233 | return $responseList; 234 | } 235 | } 236 | 237 | //测试 238 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 239 | // 串行请求 240 | $start_time = microtime(true); 241 | $response1 = THttp::request('https://www.baidu.com/'); 242 | $response2 = THttp::request('http://www.jd.com'); 243 | $response3 = THttp::request('http://www.jianshu.com/'); 244 | $response4 = THttp::request('http://www.zhihu.com/'); 245 | $response5 = THttp::request('http://www.php.net/'); 246 | $response6 = THttp::request('https://github.com/hirudy'); 247 | $response7 = THttp::request('http://www.toutiao.com/'); 248 | $response8 = THttp::request('http://www.mi.com/'); 249 | // $response9 = THttp::request('https://www.google.com'); 250 | echo "serial request take time : ", microtime(true)-$start_time,"\n"; 251 | 252 | // 并行请求 253 | $start_time = microtime(true); 254 | $responseList = THttp::multiRequest(array( 255 | array('url'=>'https://www.baidu.com/'), 256 | array('url'=>'http://www.jd.com'), 257 | array('url'=>'http://www.jianshu.com/'), 258 | array('url'=>'http://www.zhihu.com/'), 259 | array('url'=>'http://www.php.net/'), 260 | array('url'=>'https://github.com/hirudy'), 261 | array('url'=>'http://www.toutiao.com/'), 262 | array('url'=>'http://www.mi.com/'), 263 | // array('url'=>'https://www.google.com') 264 | )); 265 | echo "parallel requests take time : ", microtime(true)-$start_time,"\n"; 266 | 267 | } -------------------------------------------------------------------------------- /framework/extension/TLock.php: -------------------------------------------------------------------------------- 1 | connectionName = $connectionName; 21 | $this->uniqueId = uniqid(); 22 | $this->prefix = $prefix; 23 | $this->expire = $expire; 24 | } 25 | 26 | 27 | /** 28 | * 获取一个非阻塞锁 true-获取到,false-没有获取到 29 | * @param $resource_name 30 | * @return bool 31 | * @throws RedisOperationException 32 | */ 33 | public function lock($resource_name){ 34 | if(!empty($this->resource_name)){ 35 | return false; 36 | } 37 | $this->resource_name = $resource_name; 38 | 39 | $redis = TRedis::getConnection($this->connectionName); 40 | if($redis->setNx($this->resource_name,$this->uniqueId)){ 41 | $redis->expire($this->resource_name,$this->expire); 42 | return true; 43 | } 44 | return false; 45 | } 46 | 47 | /** 48 | * 获得一个阻塞锁 49 | * @param $resource_name 50 | * @return bool 51 | * @throws RedisOperationException 52 | */ 53 | public function lockWait($resource_name){ 54 | if(!empty($this->resource_name)){ 55 | return false; 56 | } 57 | $this->resource_name = $resource_name; 58 | 59 | do{ 60 | $redis = TRedis::getConnection($this->connectionName); 61 | if($redis->setNx($this->resource_name,$this->uniqueId)){ 62 | $redis->expire($this->resource_name,$this->expire); 63 | break; 64 | } 65 | 66 | sleep(1); 67 | }while(true); 68 | return true; 69 | } 70 | 71 | /** 72 | * 解锁,只能解锁自己对象的说 73 | * @throws RedisOperationException 74 | */ 75 | public function unlock(){ 76 | $redis = TRedis::getConnection($this->connectionName); 77 | $rel = $redis->get($this->resource_name); 78 | if($rel == $this->uniqueId){ 79 | $redis->delete($this->resource_name); 80 | } 81 | } 82 | } -------------------------------------------------------------------------------- /framework/extension/TLogger.php: -------------------------------------------------------------------------------- 1 | 'fatal', 44 | TLogger::LOG_LEVEL_ERROR => 'error', 45 | TLogger::LOG_LEVEL_WARN => 'warn', 46 | TLogger::LOG_LEVEL_INFO => 'info', 47 | TLogger::LOG_LEVEL_DEBUG => 'debug' 48 | ); 49 | 50 | // 具体的写类 51 | protected abstract function write($message); 52 | 53 | protected function log($message, $level){ 54 | // 是否需要记录 55 | if(!TLogger::$g_isLogging || !$this->isLogging || $level > $this->level){ 56 | return false; 57 | } 58 | 59 | // 序列化消息 60 | if(!is_string($message)){ 61 | if(is_array($message)){ 62 | $message = json_encode($message,JSON_UNESCAPED_UNICODE); 63 | }else if (is_object($message)){ 64 | $message = serialize($message); 65 | }else{ 66 | $message = (string)$message; 67 | } 68 | } 69 | 70 | // 格式化消息输出 71 | $levelInfo = TLogger::getArrayValue(self::$logLevelMap, $level, TLogger::LOG_LEVEL_INFO); 72 | $message = sprintf('[%s %s] %s',@date('Y-m-d H:i:s'), $levelInfo, $message); 73 | 74 | return $this->write($message); 75 | } 76 | 77 | public function fatal($message){ 78 | return $this->log($message, TLogger::LOG_LEVEL_FATAL); 79 | } 80 | 81 | public function error($message){ 82 | return $this->log($message, TLogger::LOG_LEVEL_ERROR); 83 | } 84 | 85 | public function warn($message){ 86 | return $this->log($message, TLogger::LOG_LEVEL_WARN); 87 | } 88 | 89 | public function info($message){ 90 | return $this->log($message, TLogger::LOG_LEVEL_INFO); 91 | } 92 | 93 | public function debug($message){ 94 | return $this->log($message, TLogger::LOG_LEVEL_DEBUG); 95 | } 96 | } 97 | 98 | /** 99 | * 控制台操作类 100 | * Class ConsoleHandle 101 | */ 102 | class ConsoleHandle extends LoggerHandle{ 103 | 104 | public static function parseConfig($rawConfig){ 105 | // 控制台模式日志配置格式 106 | $result = array( 107 | 'name' => TLogger::getArrayValue($rawConfig, 'name', 'default', 'string'), // 日志名称 108 | 'isLogging' => TLogger::getArrayValue($rawConfig, 'isLogging', TLogger::$g_isLogging, 'boolean'), // 当前日志是否记录 109 | 'mode' => TLogger::LOG_MODE_CONSOLE, // 记录模式 110 | 'level' => TLogger::getArrayValue($rawConfig, 'level', TLogger::LOG_LEVEL_DEBUG,'integer'), // 日志等级 111 | ); 112 | 113 | // 判断记录等级是否合法 114 | if ($result['level'] > TLogger::LOG_LEVEL_DEBUG || $result['level'] < TLogger::LOG_LEVEL_FATAL){ 115 | throw new TLoggerException("({$result['name']}) config level set error"); 116 | } 117 | return $result; 118 | } 119 | 120 | public function __construct($config){ 121 | $this->isLogging = $config['isLogging']; 122 | $this->name = $config['name']; 123 | $this->config = $config; 124 | $this->mode = $config['mode']; 125 | $this->level = $config['level']; 126 | } 127 | 128 | protected function write($message){ 129 | echo $message, PHP_EOL; 130 | return true; 131 | } 132 | } 133 | 134 | /** 135 | * 文件操作类 136 | * Class FileHandle 137 | */ 138 | class FileHandle extends LoggerHandle{ 139 | protected $basePath; // 存储路径 140 | protected $frequency; // 切割日志方式 141 | protected $suffix = '.log'; // 日志文件后缀 142 | 143 | private $logFilePath; // 完整的日志路径 144 | private $timeLength; // 存储时间长度 145 | 146 | public static function parseConfig($rawConfig){ 147 | // 文件模式日志配置格式化 148 | $result = array( 149 | 'name' => TLogger::getArrayValue($rawConfig, 'name', 'default', 'string'), // 日志名称 150 | 'isLogging' => TLogger::getArrayValue($rawConfig, 'isLogging', TLogger::$g_isLogging, 'boolean'), // 当前日志是否记录 151 | 'mode' => TLogger::LOG_MODE_FILE, // 记录模式 152 | 'level' => TLogger::getArrayValue($rawConfig, 'level', TLogger::LOG_LEVEL_DEBUG,'integer'), // 日志等级 153 | 154 | 'basePath' => TLogger::getArrayValue($rawConfig, 'basePath', TLogger::$g_basePath, 'string'), // 当前日志的记录文件根目录 155 | 'frequency' => TLogger::getArrayValue($rawConfig, 'frequency', TLogger::LOG_FREQUENCY_NONE, 'integer') // 切割日志方式 156 | ); 157 | 158 | // 判断记录等级是否合法 159 | if ($result['level'] > TLogger::LOG_LEVEL_DEBUG || $result['level'] < TLogger::LOG_LEVEL_FATAL){ 160 | throw new TLoggerException("({$result['name']}) config level set error"); 161 | } 162 | 163 | // 判断记录日志切割是否合法 164 | if ($result['frequency'] > TLogger::LOG_FREQUENCY_MONTH || $result['frequency'] < TLogger::LOG_FREQUENCY_NONE){ 165 | throw new TLoggerException("({$result['name']}) config frequency set error"); 166 | } 167 | 168 | // 初始化日志记录根目录 169 | $result['basePath'] = rtrim($result['basePath'], DIRECTORY_SEPARATOR); 170 | if(!is_dir($result['basePath'])){ 171 | if( !mkdir($result['basePath'], 0775, true)){ 172 | throw new TLoggerException("({$result['name']}) config create directory fail:".$result['basePath']); 173 | } 174 | } 175 | 176 | return $result; 177 | } 178 | 179 | public function __construct($config){ 180 | $this->isLogging = $config['isLogging']; 181 | $this->name = $config['name']; 182 | $this->config = $config; 183 | $this->mode = $config['mode']; 184 | $this->level = $config['level']; 185 | $this->frequency = $config['frequency']; 186 | $this->basePath = $config['basePath']; 187 | 188 | $this->logFilePath = $this->basePath.DIRECTORY_SEPARATOR.$this->name; 189 | switch($this->frequency){ 190 | case TLogger::LOG_FREQUENCY_MINUTE: $this->timeLength = 12;break; 191 | case TLogger::LOG_FREQUENCY_HOUR: $this->timeLength = 10;break; 192 | case TLogger::LOG_FREQUENCY_DAY: $this->timeLength = 8;break; 193 | case TLogger::LOG_FREQUENCY_MONTH: $this->timeLength = 6;break; 194 | default: 195 | $this->timeLength = -1; 196 | } 197 | } 198 | 199 | protected function write($message){ 200 | $logTime = time(); 201 | // 检测是否需要对日志文件进行重命名 202 | if($this->timeLength > 0){ 203 | $fileCreateTime = @filectime($this->logFilePath.$this->suffix); 204 | if($fileCreateTime){ 205 | $logTimeFormat = substr(@date('YmdHis',$logTime),0,$this->timeLength); 206 | $createTimeFormat = substr(@date('YmdHis',$fileCreateTime),0,$this->timeLength); 207 | if(strcmp($logTimeFormat,$createTimeFormat) !== 0){ 208 | $newLogFilePath = $this->logFilePath.'_'.$createTimeFormat.$this->suffix; 209 | rename($this->logFilePath.$this->suffix,$newLogFilePath); 210 | } 211 | } 212 | } 213 | $message .= PHP_EOL; 214 | // 写入日志文件中 215 | return file_put_contents($this->logFilePath.$this->suffix,$message,LOCK_EX | FILE_APPEND); 216 | } 217 | } 218 | 219 | 220 | /** 221 | * 日志管理类 222 | * Class TLogger 223 | */ 224 | class TLogger{ 225 | const VERSION = '1.0.1'; // 日志组件版本号 226 | 227 | const LOG_MODE_CONSOLE = 'Console'; // 记录模式,记录到控制台 228 | const LOG_MODE_FILE = 'File'; // 记录模式,记录到文件 229 | const LOG_MODE_TCP = 'Tcp'; // 记录模式,记录到TCP 230 | const LOG_MODE_UDP = 'Udp'; // 记录模式,记录到UDP 231 | 232 | const LOG_FREQUENCY_NONE = 0; // 切割日志方式,存放的日志文件始终只有一个文件,形如 default.log 233 | const LOG_FREQUENCY_MINUTE = 1; // 切割日志方式,存放日志每隔一分钟换一个,形如 default_201601192357.log 234 | const LOG_FREQUENCY_HOUR = 2; // 切割日志方式,存放日志每隔一小时换一个,形如 default_2016011923.log 235 | const LOG_FREQUENCY_DAY = 3; // 切割日志方式,存放日志每隔一天换一个,形如 default_20160119.log 236 | const LOG_FREQUENCY_MONTH = 4; // 切割日志方式,存放日志每隔一月换一个,形如 default_201601.log 237 | 238 | const LOG_LEVEL_FATAL = 0; // 日志等级,严重错误 239 | const LOG_LEVEL_ERROR = 1; // 日志等级,错误 240 | const LOG_LEVEL_WARN = 2; // 日志等级,警告 241 | const LOG_LEVEL_INFO = 3; // 日志等级,信息记录 242 | const LOG_LEVEL_DEBUG = 4; // 日志等级,调试 243 | 244 | public static $g_isLogging = true; // 总开关,是否记录日志 245 | public static $g_basePath = '/data/log'; // 默认存储路径 246 | 247 | protected static $g_config_arr = array(); // 日志配置文件数组 248 | protected static $logPool = array(); // 日志对象池子 249 | 250 | 251 | /** 252 | * 获取数组中某个值 253 | * @param array $arr 数组 254 | * @param string $key key值 255 | * @param string $default 默认值 256 | * @param string $valueType 不为空,检查变量数据类型 257 | * @return mixed 258 | */ 259 | public static function getArrayValue(Array $arr, $key, $default='', $valueType=''){ 260 | $result = $default; 261 | if (isset($arr[$key])){ 262 | $result = $arr[$key]; 263 | } 264 | 265 | if (!empty($valueType) && gettype($result)!= $valueType){ 266 | $result = $default; 267 | } 268 | 269 | return $result; 270 | } 271 | 272 | /** 273 | * 以二维数组的形式加载多个日志配置文件 274 | * @param $arr array 二维数组 275 | */ 276 | public static function loadConfig($arr){ 277 | if(!empty($arr) && is_array($arr)){ 278 | foreach($arr as $name =>$config){ 279 | self::loadOneConfig($config,$name); 280 | } 281 | } 282 | } 283 | 284 | /** 285 | * 加载一个日志配置文件 286 | * @param array $arr 287 | * @param string $name 288 | * @return bool 289 | * @throws TLoggerException 290 | */ 291 | public static function loadOneConfig(Array $arr,$name=''){ 292 | $name = self::getArrayValue($arr,'name',$name,"string"); 293 | if (empty($name)){ 294 | throw new TLoggerException("no config name"); 295 | } 296 | $arr['name'] = $name; 297 | 298 | $handle = self::getArrayValue($arr,'mode',self::LOG_MODE_FILE,'string'); 299 | $handle = $handle.'Handle'; 300 | if (!class_exists($handle,false) || get_parent_class($handle) != 'LoggerHandle'){ 301 | throw new TLoggerException("({$name}) logger handle '{$handle}' not defined or is not sub-class of LoggerHandle"); 302 | } 303 | 304 | $arr = call_user_func(array($handle,'parseConfig'),$arr); 305 | $arr['handleClass'] = $handle; 306 | if (empty($arr)){ 307 | throw new TLoggerException("({$name}) logger handle {$$handle} config parse error"); 308 | } 309 | 310 | self::$g_config_arr[$name] = $arr; 311 | if (isset(self::$logPool[$name])){ 312 | unset(self::$logPool[$name]); 313 | } 314 | return true; 315 | } 316 | 317 | 318 | /** 319 | * 根据日志名称,获取一个日志实例 320 | * @param string $name 配置名称 321 | * @param bool $isNew 是否生成一个新的日志对象 322 | * @return object logger 323 | * @throws Exception 324 | */ 325 | public static function getLogger($name='default',$isNew=false){ 326 | if(!isset(self::$logPool[$name]) || $isNew == true){ 327 | // 是否加载了默认配置文件 328 | if (!isset(self::$g_config_arr['default'])){ 329 | self::loadOneConfig(array( 330 | 'name' => 'default', // 日志名称 331 | 'isLogging' => true, // 当前日志是否记录 332 | 'basePath' => self::$g_basePath, // 当前日志的记录根目录,没有,默认全局目录:g_basePath 333 | 'mode' => self::LOG_MODE_FILE, // 记录模式 334 | 'level' => self::LOG_LEVEL_DEBUG, // 日志等级 335 | 'frequency' => self::LOG_FREQUENCY_NONE, // 切割日志方式 336 | )); 337 | } 338 | 339 | // 删除已有的logger对象 340 | if(isset(self::$logPool[$name])){ 341 | unset(self::$logPool[$name]); 342 | } 343 | 344 | // 检测配置文件是否加载 345 | if(!isset(self::$g_config_arr[$name])){ 346 | throw new TLoggerException("Make sure that the log configuration which name is '{$name}' is loaded successfully"); 347 | } 348 | 349 | // 根据配置文件实例化logger,并保存到logger单例数组中 350 | $config = self::$g_config_arr[$name]; 351 | $reflector = new ReflectionClass($config['handleClass']); 352 | $logger = $reflector->newInstance($config); 353 | self::$logPool[$name] = $logger; 354 | } 355 | 356 | return self::$logPool[$name]; 357 | } 358 | } 359 | 360 | //测试 361 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 362 | $config = array( // 日志配置文件数组,default是默认配置项 363 | 'name' => 'test', 364 | 'level' => TLogger::LOG_LEVEL_INFO, 365 | 'frequency' => TLogger::LOG_FREQUENCY_MINUTE 366 | ); 367 | TLogger::$g_basePath = __DIR__.DIRECTORY_SEPARATOR.'log'; 368 | TLogger::loadOneConfig($config); 369 | 370 | $logger = TLogger::getLogger("test"); 371 | $logger->debug("this is debug info "); 372 | $logger->info(array("is","info","recode")); 373 | $logger->warn(21); 374 | $logger->error("error info "); 375 | $logger->fatal($logger); 376 | } -------------------------------------------------------------------------------- /framework/extension/TMysql.php: -------------------------------------------------------------------------------- 1 | connectionName = $connectionName; 17 | $this->dbName = $dbname; 18 | $dbConnection = new \mysqli($host, $username, $passwd, $dbname, $port, $socket); 19 | $dbConnection->options(MYSQLI_OPT_CONNECT_TIMEOUT,5); 20 | if ($dbConnection->connect_error){ 21 | throw new \Exception("mysql {$connectionName} connection failure:({$dbConnection->connect_errno}){$dbConnection->connect_error}",-1); 22 | } 23 | $dbConnection->set_charset($charset); 24 | $this->dbConnection = $dbConnection; 25 | } 26 | 27 | public function __destruct(){ 28 | if ($this->dbConnection){ 29 | // 操作对象销毁时候,关闭连接 30 | @$this->dbConnection->close(); 31 | } 32 | } 33 | 34 | /** 35 | * 查看当前连接是否关闭 36 | * @return bool 37 | */ 38 | public function ping(){ 39 | // $autoReconnect = empty(ini_get('mysqli.reconnect')); 40 | try{ 41 | if(@$this->dbConnection->ping()){ 42 | return true; 43 | }else{ 44 | @$this->dbConnection->close(); 45 | return false; 46 | } 47 | }catch (\Exception $e){ 48 | @$this->dbConnection->close(); 49 | return false; 50 | } 51 | } 52 | 53 | /** 54 | * 获取当前默认的字符集 55 | * @return string 字符集 56 | */ 57 | public function getCharset(){ 58 | return $this->dbConnection->character_set_name(); 59 | } 60 | 61 | 62 | /** 63 | * 获取select结果集中的所有结果 64 | * @param $option 65 | * @return array 66 | */ 67 | protected function fetch_all($option){ 68 | $result = array(); 69 | if(method_exists($option,'fetch_all')){ 70 | $result = $option->fetch_all(MYSQLI_ASSOC); 71 | }else{ 72 | while($row = $option->fetch_assoc()){ 73 | $result[] = $row; 74 | } 75 | } 76 | 77 | return $result; 78 | } 79 | 80 | 81 | 82 | /** 83 | * 切换默认数据库 84 | * @param $dbName String 要切换的数据库名称 85 | * @return $this 做链式访问 86 | * @throws Exception 87 | */ 88 | public function switchDb($dbName){ 89 | if (is_string($dbName) && !empty($dbName)){ 90 | if($this->dbConnection->select_db($dbName)){ 91 | return $this; 92 | }; 93 | throw new \Exception("mysql {$this->connectionName} switch db from {$this->dbName} to {$dbName} failure:({$this->dbConnection->errno}){$this->dbConnection->error}",-1); 94 | } 95 | throw new \Exception("mysql {$this->connectionName} switch db from {$this->dbName} to {$dbName} failure:param error",-1); 96 | } 97 | 98 | 99 | /** 100 | * 执行数据库操作 101 | * @param $sql String 要执行的sql语句 102 | * @param array $params 如果不为空,进行参数预处理 103 | * @return bool|int|mixed 执行select等返回结果集数组,执行insert返回自增值,执行其他返回受影响的行数。失败返回false 104 | * @throws Exception 支持出错抛出异常。 105 | */ 106 | public function query($sql,$params=array()){ 107 | $returnValue = true; 108 | $sql = trim($sql,' '); 109 | if (empty($params)){ 110 | $result = $this->dbConnection->query($sql); 111 | if ($result === false){ 112 | if ($this->dbConnection->error){ 113 | throw new \Exception("{$this->connectionName} query error:({$this->dbConnection->errno}){$this->dbConnection->error}"); 114 | }else{ 115 | return false; 116 | } 117 | } 118 | 119 | if ($result === true){ 120 | if (stripos($sql,'insert') === false){ 121 | return $this->dbConnection->affected_rows; 122 | }else{ 123 | return $this->dbConnection->insert_id; 124 | } 125 | } 126 | $returnValue = $this->fetch_all($result); 127 | $result->close(); 128 | }else{ 129 | $statement = $this->dbConnection->prepare($sql); 130 | if ($statement == false){ 131 | throw new \Exception("{$this->connectionName} prepare error:({$this->dbConnection->errno}){$this->dbConnection->error}"); 132 | } 133 | $types = ''; 134 | $data = array(''); 135 | foreach($params as $key=>$value){ 136 | $type = gettype($value); 137 | switch($type){ 138 | case 'string':{ 139 | $types .= 's'; 140 | $data[] = &$params[$key]; 141 | }break; 142 | case 'integer':{ 143 | $types .= 'i'; 144 | $data[] = &$params[$key]; 145 | }break; 146 | case 'double':{ 147 | $types .= 'd'; 148 | $data[] = &$params[$key]; 149 | }break; 150 | default: 151 | $types .= 'b'; 152 | $data[] = &$params[$key]; 153 | } 154 | } 155 | $data[0] = $types; 156 | $method = new \ReflectionMethod($statement,'bind_param'); 157 | if ($method->invokeArgs($statement,$data) == false){ 158 | throw new \Exception("{$this->connectionName} bind_param error:({$statement->errno}){$statement->error}"); 159 | } 160 | $rel = $statement->execute(); 161 | if ($rel == false){ 162 | throw new \Exception("{$this->connectionName} execute error:({$statement->errno}){$statement->error}"); 163 | } 164 | $arr = explode(' ',$sql); 165 | $type = isset($arr[0])?strtolower($arr[0]):''; 166 | switch($type){ 167 | case 'explain': 168 | case 'select':{ 169 | $rel = $statement->get_result(); 170 | if ($rel === false){ 171 | throw new \Exception("{$this->connectionName} select error:({$this->dbConnection->errno}){$this->dbConnection->error}"); 172 | } 173 | $returnValue = $this->fetch_all($rel); 174 | }break; 175 | case 'insert':{ 176 | $returnValue = $this->dbConnection->insert_id; 177 | }break; 178 | case 'update': 179 | case 'delete':{ 180 | $returnValue = $this->dbConnection->affected_rows; 181 | }; 182 | } 183 | $statement->close(); 184 | } 185 | 186 | return $returnValue; 187 | } 188 | 189 | /** 190 | * 根据数组插入一条记录 191 | * @param string $table 要插入的表名 192 | * @param array $arr 插入的数组(关联数组) 193 | * @return bool|int|mixed 194 | * @throws \Exception 195 | */ 196 | public function insert($table,$arr = array()){ 197 | if(empty($arr) || empty($table) || !is_string($table) || !is_array($arr)){ 198 | return false; 199 | } 200 | $fields = array_keys($arr); 201 | $tempArr = array_fill(0,count($fields),'?'); 202 | $params = array_values($arr); 203 | 204 | foreach ($fields as $index => $row){ 205 | $fields[$index] = "`{$row}`"; 206 | } 207 | $fields = '('.implode(',',$fields).')'; 208 | $tempArr = '('.implode(',',$tempArr).')'; 209 | $sql = "insert `{$table}`{$fields} values{$tempArr};"; 210 | return $this->query($sql,$params); 211 | } 212 | 213 | /** 214 | * 一次性插入多条记录 215 | * @param string $table 插入的表名称 216 | * @param array $arr 二维数组,具有相同的表结构 217 | * @return int 返回插入的最后一条记录的自增id 218 | */ 219 | public function insertMulti($table,$arr=array(array(),array())){ 220 | if(empty($arr) || empty($table) || !is_string($table) || !is_array($arr) || !is_array($arr[0])){ 221 | return false; 222 | } 223 | $fields = array_keys($arr[0]); 224 | $tempArr = array_fill(0,count($fields),'?'); 225 | foreach ($fields as $index => $row){ 226 | $fields[$index] = "`{$row}`"; 227 | } 228 | $fields = '('.implode(',',$fields).')'; 229 | 230 | $tempArrString = '('.implode(',',$tempArr).')'; 231 | $params = array(); 232 | $tempArr = array(); 233 | foreach ($arr as $row){ 234 | $tempArr[] = $tempArrString; 235 | $temp = array_values($row); 236 | $params = array_merge($params,$temp); 237 | } 238 | $tempArrString = implode(',',$tempArr); 239 | 240 | 241 | $sql = "insert `{$table}`{$fields} values{$tempArrString};"; 242 | return $this->query($sql,$params); 243 | } 244 | 245 | 246 | /** 247 | * 返回mysqli对象 248 | * @return mysqli|null 249 | */ 250 | public function getConnection(){ 251 | $connection = $this->dbConnection; 252 | if($connection instanceof \mysqli){ 253 | return $connection; 254 | }else{ 255 | return null; 256 | } 257 | } 258 | } 259 | 260 | 261 | class TMysql{ 262 | const VERSION = '1.0.1'; // 组件版本号 263 | private static $dbConnectionPool = array(); 264 | private static $configs = array(); 265 | 266 | public static function loadOneConfig($arr){ 267 | mysqli_report(MYSQLI_REPORT_ALL^MYSQLI_REPORT_INDEX); 268 | if (!extension_loaded('mysqli')){ 269 | throw new \Exception('need extension mysqli!', -1); 270 | } 271 | if (!is_array($arr) || empty($arr)){ 272 | throw new \Exception('config not empty!', -1); 273 | } 274 | if (!isset($arr['connectionName']) || empty($arr['connectionName']) || !is_string($arr['connectionName'])){ 275 | throw new \Exception('connectionName not exit or not a string!',-1); 276 | } 277 | 278 | $config = array(); 279 | $config['connectionName'] = $arr['connectionName']; 280 | $config['host'] = isset($arr['host'])?$arr['host']:''; 281 | $config['userName'] = isset($arr['userName'])?$arr['userName']:''; 282 | $config['password'] = isset($arr['password'])?$arr['password']:''; 283 | $config['dbName'] = isset($arr['dbName'])?$arr['dbName']:''; 284 | $config['port'] = isset($arr['port'])?$arr['port']:3306; 285 | $config['checkConnection'] = isset($arr['checkConnection'])?isset($arr['checkConnection']):false; 286 | $config['charset'] = isset($arr['charset'])?$arr['charset']:'utf8'; 287 | 288 | self::$configs[$config['connectionName']] = $config; 289 | if (isset(self::$dbConnectionPool[$config['connectionName']])){ 290 | unset(self::$dbConnectionPool[$config['connectionName']]); 291 | } 292 | return true; 293 | } 294 | 295 | public static function getConnection($connectionName){ 296 | if (!is_string($connectionName) || !isset(self::$configs[$connectionName])){ 297 | throw new \Exception("mysql config : {$connectionName} is't loaded !",-1); 298 | } 299 | 300 | // 获取连接前,判断连接是否可用 301 | if (isset(self::$dbConnectionPool[$connectionName]) && self::$configs[$connectionName]['checkConnection']){ 302 | $rel = self::$dbConnectionPool[$connectionName]->ping(); 303 | if($rel == false){ 304 | unset(self::$dbConnectionPool[$connectionName]); 305 | } 306 | } 307 | 308 | // 如果不存在连接,新建一个连接 309 | if(!isset(self::$dbConnectionPool[$connectionName])){ 310 | $config = self::$configs[$connectionName]; 311 | $dbConnection = new MysqlOperation($connectionName,$config['host'],$config['userName'],$config['password'],$config['dbName'],$config['port'],$config['charset']); 312 | self::$dbConnectionPool[$connectionName] = $dbConnection; 313 | } 314 | 315 | return self::$dbConnectionPool[$connectionName]; 316 | } 317 | 318 | public static function clearAllConnection(){ 319 | self::$dbConnectionPool = array(); 320 | } 321 | } 322 | 323 | 324 | //测试 325 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 326 | $config = array( 327 | 'connectionName' => 'dbLol', 328 | 'host'=> '*.*.*', 329 | 'userName' => 'xxxx', 330 | 'password' => '*****', 331 | 'dbName' => 'test', 332 | 'port' => '3306' 333 | ); 334 | 335 | TMysql::loadOneConfig($config); 336 | $lolConnection = TMysql::getConnection('dbLol'); 337 | $data = $lolConnection->query('select * from test where id<=?;',array(10)); 338 | print_r($data); 339 | } 340 | -------------------------------------------------------------------------------- /framework/extension/TRedis.php: -------------------------------------------------------------------------------- 1 | close(); 14 | } 15 | } 16 | 17 | class RedisOperationException extends \RedisException{ 18 | 19 | } 20 | 21 | class TRedis { 22 | const VERSION = '1.0.1'; // 组件版本号 23 | public static $maxConnectionTime = 5; // 最大连接时间 24 | protected static $connectionPool = array(); // 连接池对象 25 | 26 | protected static $configs = array(); // 配置项 27 | 28 | /** 29 | * 获取数组值 30 | * @param array $arr 31 | * @param string $key 32 | * @param null $default 33 | * @return mixed|null 34 | */ 35 | public static function getArrayValue($arr=array(), $key='',$default=null){ 36 | $response = null; 37 | if(is_array($arr) && isset($arr[$key])){ 38 | $response = $arr[$key]; 39 | } 40 | if($response == null){ 41 | $response = $default; 42 | } 43 | return $response; 44 | } 45 | 46 | /** 47 | * 加载单个配置文件 48 | * @param array $configArr 49 | */ 50 | public static function loadOneConfig($configArr= array()){ 51 | $tempConfig = array(); 52 | $tempConfig['connectionName'] = self::getArrayValue($configArr,'connectionName'); // 连接名称 53 | $tempConfig['host'] = self::getArrayValue($configArr,'host'); // 连接host 54 | $tempConfig['port'] = self::getArrayValue($configArr,'port',6379); // 端口号 55 | $tempConfig['database'] = self::getArrayValue($configArr,'database',0); // 使用数据库索引 56 | $tempConfig['password'] = self::getArrayValue($configArr,'password'); // 使用密码 57 | $tempConfig['prefix'] = self::getArrayValue($configArr,'prefix',''); // 数据库查询中的key前缀 58 | $tempConfig['checkConnection'] = self::getArrayValue($configArr,'checkConnection',false); 59 | if(empty($tempConfig['connectionName']) || !is_string($tempConfig['connectionName'])){ 60 | $tempConfig['name'] = 'default'; 61 | } 62 | self::$configs[$tempConfig['connectionName']] = $tempConfig; 63 | } 64 | 65 | /** 66 | * 获取一个连接对象 67 | * @param string $connectionName 68 | * @return mixed 69 | * @throws \Exception 70 | * @throws \RedisException 71 | */ 72 | public static function getConnection($connectionName ='default'){ 73 | if (!is_string($connectionName) || !isset(self::$configs[$connectionName])){ 74 | throw new RedisOperationException("redis config : {$connectionName} is't loaded !",-1); 75 | } 76 | 77 | // 检测连接是否断开,断开重连 78 | if(isset(self::$connectionPool[$connectionName]) && self::$configs[$connectionName]['checkConnection']){ 79 | try{ 80 | $redisObject = self::$connectionPool[$connectionName]; 81 | if($redisObject->ping() != '+PONG'){ 82 | throw new RedisOperationException("redis ping error",-1); 83 | } 84 | }catch (RedisOperationException $e){ 85 | @self::$connectionPool[$connectionName]->close(); 86 | unset(self::$connectionPool[$connectionName]); 87 | } 88 | } 89 | 90 | // 创建连接 91 | if(!isset(self::$connectionPool[$connectionName])){ 92 | $redisObject = new RedisOperation(); 93 | $rel = $redisObject->connect(self::$configs[$connectionName]['host'],self::$configs[$connectionName]['port'],self::$maxConnectionTime); 94 | if(!$rel){ 95 | throw new RedisOperationException("redis connection error:{$connectionName}",-1); 96 | } 97 | if(!empty(self::$configs[$connectionName]['password'])){ 98 | $rel = $redisObject->auth(self::$configs[$connectionName]['password']); 99 | if(!$rel){ 100 | $redisObject->close(); 101 | throw new RedisOperationException("redis connection auth error:{$connectionName}",-1); 102 | } 103 | } 104 | $db = (int)self::$configs[$connectionName]['database']; 105 | $rel = $redisObject->select($db); 106 | if(!$rel){ 107 | $redisObject->close(); 108 | throw new RedisOperationException("redis select db error:name-{$connectionName},db-{$db}",-1); 109 | } 110 | if(is_string(self::$configs[$connectionName]['prefix']) && !empty(self::$configs[$connectionName]['prefix'])){ 111 | $redisObject->setOption(\Redis::OPT_PREFIX,self::$configs[$connectionName]['prefix']); 112 | } 113 | 114 | 115 | self::$connectionPool[$connectionName] = $redisObject; 116 | } 117 | return self::$connectionPool[$connectionName]; 118 | } 119 | 120 | /** 121 | * 清理连接 122 | */ 123 | public static function clearAllConnection(){ 124 | self::$connectionPool = array(); 125 | } 126 | } 127 | 128 | 129 | //测试 130 | if(strtolower(PHP_SAPI) == 'cli' && isset($argv) && basename(__FILE__) == basename($argv[0])){ 131 | $config = array( 132 | 'connectionName' => 'cacheRd', 133 | 'host'=> '115.28.149.242', 134 | 'password' => '', 135 | 'database' => 0, 136 | 'port' => 6379, 137 | 'prefix' => '', 138 | 'checkConnection' => false 139 | ); 140 | 141 | TRedis::loadOneConfig($config); 142 | $redis = TRedis::getConnection('cacheRd'); 143 | echo $redis->dbSize(); 144 | print_r($redis->keys('*')); 145 | } -------------------------------------------------------------------------------- /framework/queue/LocalRequestQueue.php: -------------------------------------------------------------------------------- 1 | queue = new \SplQueue(); 19 | } 20 | 21 | public function add(Request $request){ 22 | $this->queue->enqueue($request); 23 | } 24 | 25 | public function get(){ 26 | if($this->isEmpty()){ 27 | return null; 28 | } 29 | return $this->queue->dequeue(); 30 | } 31 | 32 | public function isEmpty(){ 33 | return $this->queue->isEmpty(); 34 | } 35 | 36 | public function count(){ 37 | return $this->queue->count(); 38 | } 39 | 40 | public function isFull(){ 41 | if($this->count() >= $this->maxCount){ 42 | return true; 43 | } 44 | return false; 45 | } 46 | 47 | public function __destruct(){ 48 | unset($this->queue); 49 | } 50 | } -------------------------------------------------------------------------------- /framework/queue/RequestQueue.php: -------------------------------------------------------------------------------- 1 | downloader = $downloader; 22 | parent::__construct($taskName); 23 | } 24 | 25 | public function coroutine(){ 26 | do{ 27 | if (!TSpider::$requestQueue->isEmpty()){ 28 | do{ 29 | $request = TSpider::$requestQueue->get(); 30 | $rel = $this->downloader->addRequest($request); 31 | 32 | // 如果下载队列添加失败,将请求重新放回请求队列中 33 | if($request instanceof Request && !$rel){ 34 | TSpider::$requestQueue->add($request); 35 | } 36 | }while($rel); 37 | } 38 | yield true; 39 | }while(true); 40 | } 41 | } -------------------------------------------------------------------------------- /framework/task/DelayTimer.php: -------------------------------------------------------------------------------- 1 | afterDownload($request,$response); 30 | if($rel !== false){ 31 | $worker = new $request->workerName(); 32 | 33 | if($worker->beforeParse() !== false){ 34 | $worker->parse($request,$response); 35 | } 36 | $worker->afterParse(); 37 | } 38 | } 39 | 40 | /** 41 | * 初始化下载器 42 | * DownloadTask constructor. 43 | * @param string $taskName 44 | */ 45 | public function __construct($taskName=''){ 46 | $this->multiDownloader = curl_multi_init(); 47 | parent::__construct($taskName); 48 | } 49 | 50 | /** 51 | * 52 | * @param $request 53 | * @return bool 54 | */ 55 | public function addRequest($request){ 56 | $response = false; 57 | do{ 58 | if($this->multiDownloader == null){ 59 | break; 60 | } 61 | 62 | if(!($request instanceof Request)){ 63 | break; 64 | } 65 | 66 | $currentRequestNum = count($this->currentRequestMap); 67 | if($currentRequestNum >= $this->windowSize){ 68 | break; 69 | } 70 | 71 | $rel = $this->beforeDownload($request); 72 | if($rel === false){ 73 | break; 74 | } 75 | 76 | if($rel instanceof Request){ 77 | $request = $rel; 78 | } 79 | $ch = $request->createCurlObject(); 80 | $key = (string)$ch; 81 | $this->currentRequestMap[$key] = $request; 82 | curl_multi_add_handle($this->multiDownloader, $ch); 83 | 84 | $this->spareTime = 0; 85 | $response = true; 86 | }while(false); 87 | 88 | return $response; 89 | } 90 | 91 | public function beforeDownload(Request $request){ 92 | return null; 93 | } 94 | 95 | public function afterDownload(Request $request,Response $response){ 96 | return null; 97 | } 98 | 99 | // 获取下载器闲置时间 100 | public function getSpareTime(){ 101 | if($this->spareTime == 0){ 102 | return 0; 103 | }else{ 104 | return time()- $this->spareTime; 105 | } 106 | } 107 | 108 | public function coroutine() 109 | { 110 | do { 111 | while (($execrun = curl_multi_exec($this->multiDownloader, $running)) == CURLM_CALL_MULTI_PERFORM) ; 112 | if ($execrun != CURLM_OK) { 113 | if($this->spareTime == 0){ 114 | $this->spareTime = time(); 115 | } 116 | } 117 | 118 | // 一旦有一个请求完成,找出来,因为curl底层是select,所以最大受限于1024 119 | while ($done = curl_multi_info_read($this->multiDownloader)) 120 | { 121 | // 从请求中获取信息、内容、错误 122 | $info = curl_getinfo($done['handle']); 123 | $output = curl_multi_getcontent($done['handle']); 124 | $error = curl_error($done['handle']); 125 | $response = new Response($info,$output,$error); 126 | 127 | $key = (string)$done['handle']; 128 | $request = $this->currentRequestMap[$key]; 129 | $this->dispatch($request,$response); 130 | 131 | // 把请求已经完成了得 curl handle 删除 132 | unset($this->currentRequestMap[$key]); 133 | curl_multi_remove_handle($this->multiDownloader, $done['handle']); 134 | } 135 | 136 | // 当没有数据的时候进行堵塞,把 CPU 使用权交出来,避免上面 do 死循环空跑数据导致 CPU 100% 137 | if ($running) { 138 | $rel = curl_multi_select($this->multiDownloader, 1); 139 | if($rel == -1){ 140 | usleep(1000); 141 | } 142 | } 143 | 144 | if( $running == false){ 145 | if($this->spareTime == 0){ 146 | $this->spareTime = time(); 147 | } 148 | } 149 | yield true; 150 | } while (true); 151 | 152 | // 关闭任务 153 | curl_multi_close($this->multiDownloader); 154 | unset($this->multiDownloader); 155 | } 156 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## TSpider 2 | 单进程,事件循环+协程方式实现的php爬虫框架。可以参考如何使用php的协程与curl_multi_*的使用。 3 | 4 | 5 | [php中curl_multi函数集的用法](https://github.com/hirudy/article/blob/master/php/php%E4%B8%ADcurl_multi%E5%87%BD%E6%95%B0%E9%9B%86%E7%9A%84%E7%94%A8%E6%B3%95.md) 6 | 7 | [转】在PHP中使用协程实现多任务调度](http://www.jianshu.com/p/9904b03a140c) 8 | 9 | ## 例子 10 | 参见./example中的例子。 11 | --------------------------------------------------------------------------------