├── .gitignore ├── Func ├── Common.php └── file │ └── area.php ├── README.md ├── composer.json └── src ├── Library ├── Db.php ├── RabbitMq.php └── SwitchData.php ├── Models ├── Models.php ├── Table.php └── demo.php └── main.php /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/ 2 | -------------------------------------------------------------------------------- /Func/Common.php: -------------------------------------------------------------------------------- 1 | 'username' 49 | 说明:将源表的username字段原样转到新表的username字段 50 | 51 | * 一对一,格式转换 52 | 53 | 'pid' => 'pid/getNewpid' 54 | 说明:使用自定义函数将源表的pid字段转换后存入新表的pid字段 55 | 例 : 56 | public function getNewPid($p) 57 | { 58 | return ($p + 1000000001); 59 | } 60 | 61 | * 一对一,格式转换、数据过滤 62 | 63 | 'state' => 'status/getStatus', 64 | 说明:使用自定义函数getstatus处理status字段。 65 | 如果getstatus函数返回值为false,当前记录会被忽略。返回其它值,将返回值存入新表 66 | public function getstatus () { 67 | if(in_array($status, 1,2,3)) { 68 | //只迁移状态为1、2、3数据 69 | return $status; 70 | } else { 71 | //其它状态的数据不再迁移 72 | return false; 73 | } 74 | } 75 | * 多对一 76 | 77 | 'prokey' => 'prokey,prokeyword/getkey', 78 | 说明:将两个字段经过getkey函数处理后(返回值是字符串),存入新表的prokey字段 79 | * 多对多 80 | 81 | 'cate1-cate2-cate3' => 'cate1,cate2,cate3/getCate', 82 | 说明:getcate函数接收1个参数(数组),返回值为一个数组。 83 | * 将新表的字段设默认值 84 | 85 | 'newfieldname' => '4/returnme', 86 | 87 | 88 | ## 其它用途: 89 | 可以将此框架做为简单的并行读数据框架,callback中写自己需要做的事情。 90 | 例:user3.php 91 | 92 | 93 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dormscript/data-transfer", 3 | "description": "数据迁移框架", 4 | "type": "library", 5 | "require": { 6 | "ext-swoole": "^4.1@alpha" 7 | }, 8 | "license": "MIT", 9 | "authors": [ 10 | { 11 | "name": "shuai wen", 12 | "email": "wenshuaiying@gongchang.com" 13 | } 14 | ], 15 | "autoload": { 16 | "psr-4": { 17 | "dormscript\\Data\\": "src/" 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/Library/Db.php: -------------------------------------------------------------------------------- 1 | channel); 13 | $ex->setName($e_name); 14 | //$ex->setType(AMQP_EX_TYPE_FANOUT); //direct类型 15 | $ex->setType(AMQP_EX_TYPE_DIRECT); //direct类型 16 | $ex->setFlags(AMQP_DURABLE); //持久化 17 | //echo "Exchange Status:" . $ex->declare() . "\n"; 18 | //发送消息 19 | if ($isBroad) { 20 | $ex->publish($str, $routerKey, AMQP_NOPARAM, array('delivery_mode' => '2')); 21 | } else { 22 | $ex->publish($str, $routerKey); 23 | } 24 | } 25 | public function readQueue($q_name) 26 | { 27 | $q = new \AMQPQueue($this->channel); 28 | $q->setName($q_name); 29 | //$q->setFlags(AMQP_DURABLE); 30 | //$q->declare(); 31 | //$q->bind('exchange', $bindingkey); 32 | //消息获取 33 | $messages = $q->get(AMQP_AUTOACK); 34 | if ($messages) { 35 | return $messages->getBody(); 36 | } else { 37 | return false; 38 | } 39 | } 40 | 41 | public function reconnect($conn_args) 42 | { 43 | //创建连接和channel 44 | $this->conn = new \AMQPConnection($conn_args); 45 | try { 46 | if (!$this->conn->connect()) { 47 | die("Cannot connect to the broker!\n"); 48 | } 49 | } catch (EXCEPTION $e) { 50 | sleep(300); 51 | if (!$this->conn->connect()) { 52 | die("Cannot connect to the broker!\n"); 53 | } 54 | } 55 | $this->channel = new \AMQPChannel($this->conn); 56 | } 57 | /** 58 | * 入RabbitMQ队列 59 | * @param [type] $exName [交换机名] 60 | * @param [type] $routingKey [路由名] 61 | * @param [type] $value [队列的值] 62 | * @param [type] $dbType [数据库类型,默认为mysql] 63 | * 按照此规则生成的默认队列名称为 exName_routeKey_dbType;值为value 64 | */ 65 | public function set($exName, $routingKey, $value, $dbType = 'mysql') 66 | { 67 | //创建交换机,设置交换机名 68 | $ex = new \AMQPExchange($this->channel); 69 | $ex->setName($exName); 70 | $ex->setType(AMQP_EX_TYPE_DIRECT); //广播模式 71 | $ex->setFlags(AMQP_DURABLE); //交换器进行持久化,即 RabbitMQ 重启后会自动重建 72 | // $ex->declareExchange(); 73 | //设置队列名 74 | $queue = new \AMQPQueue($this->channel); 75 | $queue->setName($exName . '_' . $routingKey . '_' . $dbType); 76 | $queue->setFlags(AMQP_DURABLE); //队列进行持久化,即 RabbitMQ 重启后会自动重建 77 | $queue->declareQueue(); 78 | //交换机和路由绑定到队列 79 | $queue->bind($exName, $routingKey); 80 | //入队列 81 | if (is_array($value)) { 82 | $value = json_encode($value); 83 | } 84 | $ex->publish($value, $routingKey, AMQP_NOPARAM, array('delivery_mode' => '2')); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/Library/SwitchData.php: -------------------------------------------------------------------------------- 1 | tablename = $tablename; 18 | $this->startid = $startid; 19 | $this->endid = $endid; 20 | $this->taskid = $taskid; 21 | $this->modelObj = \dormscript\Data\Models\Models::getObj($tablename); //获取Models对象 22 | 23 | if (!empty($ser)) { 24 | $this->modelObj->setSwooleSer($ser); 25 | } 26 | try { 27 | $srcData = $this->readRs(); //从源表中读取数据 28 | if (empty($srcData)) { 29 | return true; 30 | } 31 | $newData = array(); 32 | foreach ($srcData as $key => $row) { 33 | $newRs = $this->analysisRs($row); //做数据格式转换 34 | if ($newRs !== false) { 35 | $newData[$key] = $newRs; 36 | } 37 | } 38 | $this->writeRs($newData); //将数据写入目标数据表 39 | if ($this->modelObj->callbackRow == 'src') { 40 | $this->modelObj->callback($srcData); 41 | } elseif ($this->modelObj->callbackRow == 'desc') { 42 | $this->modelObj->callback($newData); 43 | } elseif ($this->modelObj->callbackRow == 'merge') { 44 | $mergeData = array(); 45 | foreach ($srcData as $key => $value) { 46 | if (isset($newData[$key])) { 47 | $mergeData[$key] = array_merge($value, $newData[$key]); 48 | } 49 | } 50 | $this->modelObj->callback($mergeData); 51 | } 52 | } catch (\Exception $e) { 53 | error_log("\nerror:" . $e->getMessage(), 3, "error.log"); 54 | return false; 55 | } 56 | return true; 57 | } 58 | 59 | /** 60 | * 根据配置,获取转移对应关系 61 | * @return [type] [description] 62 | */ 63 | public function readRs() 64 | { 65 | $sql = ''; 66 | if (!$this->modelObj->srcSql) { 67 | $sql = "select * from {$this->modelObj->getTablename()} "; 68 | } else { 69 | $sql = $this->modelObj->srcSql; 70 | } 71 | $sql .= " where {$this->modelObj->primaryKey} >= {$this->startid} and {$this->modelObj->primaryKey} < {$this->endid} "; 72 | $sql .= 'order by ' . $this->modelObj->primaryKey . ' ASC'; 73 | $dbType = !empty($this->modelObj->readDbName) ? $this->modelObj->readDbName : 'read'; 74 | $row = \dormscript\Data\Library\Db::exeSql($dbType, $sql, $this->taskid); 75 | return $row; 76 | } 77 | 78 | /** 79 | * 根据字段对应关系将源数据$row转换(处理一行数据) 80 | * @param [type] $row [description] 81 | * @return [type] [description] 82 | */ 83 | public function analysisRs($row) 84 | { 85 | $Arr = array(); 86 | foreach ($this->modelObj->fieldMap as $des => $src) { 87 | if (strpos($src, '/') !== false) { 88 | $a = explode('/', $src); 89 | if (strpos($a[0], ',')) { 90 | //$par = ''; 91 | $param = explode(',', $a[0]); 92 | foreach ($param as $p) { 93 | if (array_key_exists($p, $row)) { 94 | $par[] = $row[$p]; 95 | } else { 96 | $par[] = $p; 97 | } 98 | } 99 | $Arr[$des] = $row[$des] = call_user_func(array($this->modelObj, $a[1]), $par); 100 | if ($Arr[$des] === false) { 101 | return false; 102 | } 103 | //自定义函数,返回false,表示跳过当前记录 104 | } else { 105 | if ($a['0'] == '*') { 106 | $Arr[$des] = $row[$des] = call_user_func(array($this->modelObj, $a[1]), $row); 107 | } elseif (array_key_exists($a['0'], $row)) { 108 | $Arr[$des] = $row[$des] = call_user_func(array($this->modelObj, $a[1]), $row[$a[0]]); 109 | } else { 110 | $Arr[$des] = $row[$des] = call_user_func(array($this->modelObj, $a[1]), $a[0]); 111 | } 112 | } 113 | if ($Arr[$des] === false) { 114 | return false; 115 | } 116 | } else { 117 | $Arr[$des] = $row[$des] = $row[$src]; 118 | } 119 | if (stripos($des, '-')) { 120 | $temp = array_combine(explode("-", $des), $Arr[$des]); 121 | unset($Arr[$des]); 122 | foreach ($temp as $key => $value) { 123 | $Arr[$key] = $row[$key] = $value; 124 | } 125 | } 126 | } 127 | return $Arr; 128 | } 129 | 130 | /** 131 | * 将数据写到目标数据库 132 | * @param [type] $newData [description] 133 | * @return [type] [description] 134 | */ 135 | public function writeRs($newData) 136 | { 137 | if (empty($newData)) { 138 | return ''; 139 | } 140 | $fieldarray = array_keys(current($newData)); 141 | $fields = implode(",", $fieldarray); 142 | 143 | $sql = "replace into {$this->modelObj->descTable} " . "(" . $fields . ") VALUES"; 144 | foreach ($newData as $line) { 145 | $sql .= "\n("; 146 | foreach ($line as $field) { 147 | $sql .= "'" . addslashes($field) . "',"; 148 | } 149 | $sql = substr($sql, 0, -1) . "),"; 150 | } 151 | $sql = substr($sql, 0, -1); 152 | $rs = \dormscript\Data\Library\Db::exeSql($this->modelObj->writeDbName, $sql, $this->taskid); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/Models/Models.php: -------------------------------------------------------------------------------- 1 | srcTable)) { 23 | return $this->srcTable; 24 | } 25 | die("\n" . get_class($this) . " 未配置srcTable"); 26 | } 27 | public function returnme($p) 28 | { 29 | return $p; 30 | } 31 | public function setSwooleSer($ser) 32 | { 33 | $this->swooleSer = $ser; 34 | } 35 | public function getSwooleSer() 36 | { 37 | return $this->swooleSer; 38 | } 39 | public function callback($row) 40 | { 41 | return true; 42 | } 43 | 44 | public function int10($p) 45 | { 46 | $p = intval($p); 47 | if ($p > 4294967296) { 48 | $p = 0; 49 | } 50 | return $p; 51 | } 52 | public function __call($funname, $arguments) 53 | { 54 | return call_user_func_array($funname, $arguments); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/Models/demo.php: -------------------------------------------------------------------------------- 1 | startTime = time(); 18 | $this->setting = $setting; 19 | $this->max_task_num = 200; 20 | list($this->curId, $this->maxId) = $this->getId(); 21 | $this->minId = $this->curId; 22 | 23 | $this->serv = new \swoole_server("0.0.0.0", 9501); 24 | $this->serv->set(array( 25 | 'worker_num' => 1, 26 | 'daemonize' => false, 27 | 'max_request' => 10000, 28 | 'dispatch_mode' => 3, 29 | 'debug_mode' => 1, 30 | 'task_worker_num' => $this->max_task_num, 31 | )); 32 | $this->serv->on('WorkerStart', array($this, 'onWorkerStart')); 33 | $this->serv->on('Close', array($this, 'onClose')); 34 | $this->serv->on('Receive', array($this, 'onReceive')); 35 | $this->serv->on('Task', array($this, 'onTask')); 36 | $this->serv->on('Finish', array($this, 'onFinish')); 37 | $this->serv->start(); 38 | } 39 | /** 40 | * 获取每个表的最大ID,最小ID 41 | * @return [type] [description] 42 | */ 43 | public function getId() 44 | { 45 | $min = $max = array(); 46 | foreach ($this->setting as $tablename => $taskNum) { 47 | $modelObj = Models\Models::getObj($tablename); 48 | $dbType = !empty($modelObj->readDbName) ? $modelObj->readDbName : 'read'; 49 | 50 | //获取表主键名字 51 | $realTableName = $modelObj->getTablename(); 52 | $descSql = "desc $realTableName"; 53 | $row = Library\Db::exeSql($dbType, $descSql, 1); 54 | $primiaryKey = current($row['0']); 55 | 56 | //查出最小ID 57 | $sql = "select * from $realTableName order by $primiaryKey ASC limit 0,1"; 58 | $row = Library\Db::exeSql($dbType, $sql, 1); 59 | $min[$tablename] = empty($row['0']) ? 0 : intval(current($row['0'])); 60 | 61 | //查出最大ID 62 | $sql = "select * from $realTableName order by $primiaryKey DESC limit 0,1"; 63 | $row = Library\Db::exeSql($dbType, $sql, 1); 64 | $max[$tablename] = empty($row['0']) ? 0 : intval(current($row['0'])); 65 | 66 | if ($modelObj->descTable) { 67 | //从目标库中读出相关信息 68 | $descSql = "desc " . $modelObj->descTable; 69 | $row = Library\Db::exeSql('write', $descSql, 1); 70 | $primiaryKey = current($row['0']); 71 | 72 | $sql = "select * from " . $modelObj->descTable . " order by $primiaryKey DESC limit 0,1"; 73 | $row = Library\Db::exeSql('write', $sql, 1); 74 | if (empty($row)) { 75 | $descMaxId = 0; 76 | } else { 77 | $descMaxId = intval(current($row['0'])); 78 | } 79 | if ($descMaxId > $min[$tablename]) { 80 | $min[$tablename] = $descMaxId; 81 | } 82 | } 83 | } 84 | return array($min, $max); 85 | } 86 | 87 | public function onWorkerStart($serv, $worker_id) 88 | { 89 | if ($worker_id != 0) { 90 | return ''; 91 | } 92 | sleep(1); //等待task进程启动 93 | $taskid = 1; 94 | //根据配置的进程数量,启动所有表的数据迁移 95 | foreach ($this->setting as $tablename => $maxnum) { 96 | //启用多个task进程来处理表 97 | for ($i = 0; $i < $maxnum; $i++) { 98 | $this->addTask($tablename, $taskid++); 99 | } 100 | } 101 | } 102 | 103 | public function onClose($serv, $fd, $from_id) 104 | { 105 | echo "Client {$fd} close connection\n"; 106 | } 107 | 108 | public function onReceive($serv, $fd, $from_id, $str) 109 | { 110 | $params = explode("-", $str); 111 | echo "\n get data:" . $params['0'] . "\t"; 112 | switch ($params['0']) { 113 | case 'reassign': 114 | $ret = $this->reassign(); 115 | break; 116 | case 'setCurID': 117 | $ret = $this->setCurID($params['1'], $params['2']); 118 | break; 119 | case 'getinfo': 120 | $ret = $this->getInfo(); 121 | break; 122 | case 'singleData': 123 | $ret = $this->singleData($params['1'], $params['2']); 124 | break; 125 | } 126 | $serv->send($fd, $ret); 127 | } 128 | 129 | public function onTask($serv, $task_id, $from_id, $param) 130 | { 131 | //处理数据表tablename中ID>=$startid && $ID<$startid+100的记录 132 | list($tablename, $startid, $endid, $taskid) = $param; 133 | echo "\n OnTask: $taskid \t $tablename \t $startid - $endid "; 134 | $obj = new Library\SwitchData(); 135 | $rs = $obj->run($tablename, $startid, $endid, $taskid, $serv); 136 | while (!$rs) { 137 | //执行出错时,一直重试。关闭数据库连接,sleep 5秒 138 | Library\Db::delDbPool(); 139 | Models\Models::delObj($tablename); 140 | echo "\n 处理( $tablename, $startid, $endid )出错,释放所有资源并重新连接mysql \n"; 141 | sleep(5); 142 | $rs = $obj->run($tablename, $startid, $endid, $taskid); 143 | } 144 | if ($startid + 1 == $endid) { 145 | //增量更新,处理完成后不再重新处理 146 | return array($taskid); 147 | } 148 | return $taskid; 149 | } 150 | 151 | public function onFinish($serv, $task_id, $param) 152 | { 153 | if (is_array($param)) { 154 | //增量 155 | $taskid = current($param); 156 | $this->taskinfo[$taskid] = ''; 157 | } elseif ($param) { 158 | echo "\n onFinish : taskid: { $param } "; 159 | if ($this->taskinfo[$param]) { 160 | $this->addTask($this->taskinfo[$param], $param); 161 | } 162 | } else { 163 | echo "\n close mysqli \n"; 164 | } 165 | } 166 | 167 | /** 168 | * 根据tablename获取数据表迁移到的最大ID 169 | * @param [type] $tablename [description] 170 | * @return [type] [description] 171 | */ 172 | private function getcurId($tablename) 173 | { 174 | $startid = $endid = 0; 175 | $tmp = str_replace(".", "_", $tablename); //对表名转义 176 | 177 | if (!isset($this->maxId[$tmp]) || !isset($this->curId[$tmp])) { 178 | list($this->curId, $this->maxId) = $this->getId(); 179 | $this->minId = $this->curId; 180 | } 181 | if (!isset($this->curId[$tmp])) { 182 | $this->curId[$tmp] = 1; 183 | } 184 | $startid = $this->curId[$tmp]; 185 | 186 | if ($startid > $this->maxId[$tmp]) { 187 | //所有数据已经处理结束 188 | return false; 189 | } elseif ($startid + $this->step > $this->maxId[$tmp]) { 190 | //最后一次取数据不足100条 191 | $endid = $this->maxId[$tmp] + 1; 192 | $this->curId[$tmp] = $endid; 193 | } else { 194 | $endid = $startid + $this->step; 195 | $this->curId[$tmp] = $endid; 196 | } 197 | return array($startid, $endid); 198 | } 199 | 200 | private function addTask($tablename, $taskid) 201 | { 202 | $Ids = $this->getcurId($tablename); //获取开始ID 203 | if ($Ids === false) { 204 | $this->taskinfo[$taskid] = ''; 205 | return false; 206 | } 207 | list($startid, $endid) = $Ids; 208 | 209 | echo "\n addTask: { $taskid }"; 210 | $this->taskinfo[$taskid] = $tablename; //taskid这个进程用来处理tablename。在进行进程数量调整之前,taskid不会去处理其它表数据 211 | $this->serv->task(array($tablename, $startid, $endid, $taskid), $taskid); //投递task任务 212 | } 213 | 214 | /** 215 | * 处理单条数据 216 | * @param string $tablename 库名+表名(.分隔) 217 | * @param int $id 主键ID 218 | * @return [type] [description] 219 | */ 220 | private function singleData($tablename, $id) 221 | { 222 | //找到一个空闲进程来处理这个请求(只有task400 - task500处理增量) 223 | for ($i = $this->max_task_num - 100; $i < $this->max_task_num; $i++) { 224 | if (empty($this->taskinfo[$i])) { 225 | $taskid = $i; 226 | break; 227 | } 228 | } 229 | //如果未找到空闲进程 230 | if ($i == $this->max_task_num) { 231 | return "fail"; 232 | } 233 | $this->taskinfo[$taskid] = $tablename; //taskid这个进程用来处理tablename。在进行进程数量调整之前,taskid不会去处理其它表数据 234 | $this->serv->task(array($tablename, $id, $id + 1, $taskid), $taskid); //投递task任务 235 | return 'OK'; 236 | } 237 | /** 238 | * 根据配置文件重新分配task 239 | * @return [type] [description] 240 | */ 241 | public function reassign() 242 | { 243 | $task_work_id = 0; 244 | global $setting; 245 | $this->setting = $setting; 246 | foreach ($this->setting as $tablename => $maxnum) { 247 | for ($i = 0; $i < $maxnum; $i++) { 248 | $task_work_id += 1; 249 | //如果进程正处于休息状态,启动进程 250 | if (empty($this->taskinfo[$task_work_id])) { 251 | $this->addTask($tablename, $task_work_id); 252 | } else { 253 | $this->taskinfo[$task_work_id] = $tablename; 254 | } 255 | } 256 | } 257 | for ($i = $task_work_id + 1; $i < $this->max_task_num; $i++) { 258 | $this->taskinfo[$i] = ''; 259 | } 260 | return json_encode(array_count_values($this->taskinfo)); 261 | } 262 | public function setCurID($tablename, $Id) 263 | { 264 | $tmp = str_replace(".", "_", $tablename); //对表名转义 265 | $this->curId[$tmp] = $Id; 266 | return "setCurId success !"; 267 | } 268 | /** 269 | * 获取当前处理的状态 270 | * @return [type] [description] 271 | */ 272 | public function getInfo() 273 | { 274 | $ret = ''; 275 | $taskNum = array_count_values($this->taskinfo); 276 | $ret .= json_encode($taskNum); 277 | //状态信息:所有表的进度: tablename 现在ID 最大ID 几个进程在处理 278 | foreach ($this->setting as $tablename => $v) { 279 | $status[] = array( 280 | 'tablename' => $tablename, 281 | 'curId' => $this->curId[str_replace('.', '_', $tablename)], 282 | 'maxId' => $this->maxId[str_replace('.', '_', $tablename)], 283 | 'minId' => $this->minId[str_replace('.', '_', $tablename)], 284 | ); 285 | } 286 | $ret .= "\n" . json_encode($status); 287 | $ret .= "\n" . json_encode(array_filter($this->taskinfo)); 288 | $ret .= "\n" . $this->startTime; 289 | return $ret; 290 | } 291 | } 292 | --------------------------------------------------------------------------------