├── README.md ├── moonwalker-scm-1.rockspec └── moonwalker.lua /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | # This module is supported under [tarantool/moonwalker](https://github.com/tarantool/moonwalker) 6 | 7 | # Per-space updater for Tarantool 1.6+ 8 | 9 | A Lua module for [Tarantool 1.6+](http://github.com/tarantool) that allows 10 | iterating over one space with the following logic: 11 | 12 | 1. Phase #1 (сollect): 13 | 1. Create an iterator and iterate over the space for not more than `pause` items. 14 | 2. Put items-to-update into a temporary Lua table. 15 | 3. Yield the fiber, then reposition the iterator to GT(`last selected tuple`). 16 | 4. If collected enough (`take`) tuples, switch to phase #2 (update). 17 | 18 | 2. Phase #2 (update): 19 | 20 | 1. Iterate over the temporary table. 21 | 2. For each element, call the `actor` function. 22 | 3. Reposition the iterator to GT(`last selected tuple`) and switch back to 23 | phase #1 (collect). 24 | 25 | ## Table of contents 26 | 27 | * [Parameters](#parameters) 28 | * [Examples](#examples) 29 | 30 | ## Parameters 31 | 32 | * `space` - the space to process. 33 | * `index` (optional) - the index to iterate by. If not defined, use the primary 34 | index. 35 | * `examine`: (optional, function:boolean) - called during phase #1 (collect). 36 | **Must not yield**. 37 | * `actor`: (function, altname: updater) - called during phase #2 (update) for 38 | every examined tuple. 39 | * `pause`: `1000` (number) - make `fiber.yield` after stepping over this number 40 | of items. 41 | * `take`: `500` (number) - how many items should be collected before switching to 42 | phase #2 (update). 43 | * `dryrun`: `false` (boolean) - don't call the actor, only print the statistics. 44 | * `limit`: `2^63` (optional, number) - process not more than this number of items. 45 | Useful for testing. 46 | * `progress`: `2%` (optional, string or number) - print a progress message every 47 | N records or percent. 48 | 49 | 50 | ## Examples 51 | 52 | ```lua 53 | local moonwalker = require 'moonwalker' 54 | 55 | -- update the whole database (the simplest example) 56 | moonwalker { 57 | space = box.space.users; 58 | actor = function(t) 59 | box.space.users:update({t[1]},{ 60 | {'=', 2, os.time()} 61 | }) 62 | end; 63 | } 64 | 65 | -- update the database, add missed fields (example with 'examine') 66 | moonwalker { 67 | space = box.space.users; 68 | examine = function(t) 69 | return #t < 4; -- user tuple has only 3 fields 70 | end; 71 | actor = function(t) 72 | box.space.users:update({t[1]},{ 73 | {'=', 4, "newfield"} 74 | }) 75 | end; 76 | } 77 | 78 | -- iterate by a specific index 79 | moonwalker { 80 | space = box.space.users; 81 | index = box.space.users.index.name; -- iterate over index 'name' 82 | pause = 100; -- be very polite, but slow: pause after every 100 records 83 | take = 100; -- collect 100 items for update 84 | limit = 1000; -- stop after examining the first 1000 tuples 85 | examine = function(t) 86 | return #t < 4; 87 | end; 88 | actor = function(t) 89 | box.space.users:update({t[1]},{ 90 | {'=',4,"newfield"} 91 | }) 92 | end; 93 | } 94 | ``` 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /moonwalker-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = 'moonwalker' 2 | version = 'scm-1' 3 | source = { 4 | url = 'git://github.com/tarantool/moonwalker.git', 5 | branch = 'master', 6 | } 7 | description = { 8 | summary = "Iterate over box.space and make updates without freezing db", 9 | homepage = 'https://github.com/tarantool/moonwalker.git', 10 | license = 'BSD', 11 | } 12 | dependencies = { 13 | 'lua >= 5.1' 14 | } 15 | build = { 16 | type = 'builtin', 17 | modules = { 18 | ['moonwalker'] = 'moonwalker.lua' 19 | } 20 | } 21 | 22 | -- vim: syntax=lua 23 | -------------------------------------------------------------------------------- /moonwalker.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | moonwalker 4 | 5 | Iterate over one space with the following logic 6 | 7 | Collect stage: 8 | 1. create an iterator and iterate over space for not more than `pause` items 9 | 2. put items for update into temporary lua table 10 | 3. yield fiber, then reposition iterator to GT(`last selected tuple`) 11 | 4. if collected enough (`take`) tuples, switch to update phase 12 | 13 | Update stage: 14 | 15 | 1. iterate over temporary table 16 | 2. for each element call `actor` 17 | 3. reposition iterator to GT(`last selected tuple`), switch to collect phase 18 | 19 | + Parameters 20 | + examine: (optional, function:boolean) - called during collect phase. **must not yield**. 21 | + actor: (function, altname: updater) - called during update phase for every examined tuple 22 | + pause: `1000` (number) - make fiber.yield after stepping over this count of items. 23 | + take: `500` (number) - how many items should be collected before calling updates 24 | + dryrun: `false` (boolean) - don't call actor, only print stats 25 | + limit: `` (optional, number) - process not more than limit items (useful for testing) 26 | + progress: `2%` (optional, string or number) - print progress message every N records or percent 27 | 28 | ]] 29 | 30 | local fiber = require 'fiber' 31 | local log = require 'log' 32 | local ffi = require 'ffi' 33 | local clock = require 'clock' 34 | 35 | local M = {} 36 | 37 | local function create_keyfields(index) 38 | local f = {} 39 | for k,v in pairs(index.parts) do 40 | table.insert(f, "t[".. v.fieldno .."]") 41 | end 42 | return loadstring('return function(t) return '..table.concat(f,",")..' end')() 43 | end 44 | 45 | local function iiterator(index, itype, key) 46 | local f, ctx, state = index:pairs(key, { iterator = itype }) 47 | local tuple 48 | return function () 49 | state, tuple = f(ctx,state) 50 | if not state then return nil end 51 | return tuple 52 | end 53 | end 54 | 55 | local function moonwalker(opts) 56 | local o = {} 57 | assert(opts.space, "Required option .space") 58 | local space = opts.space 59 | local waitevery = opts.pause or 1000 60 | local takeby = opts.take or 500 61 | local examine = opts.examine 62 | local updater = opts.actor or opts.updater 63 | assert(type(updater) == 'function', "Need .actor funtion") 64 | local dryrun = opts.dryrun or false 65 | local limit = opts.limit or 2^63 66 | local printevery = opts.progress or '2%' 67 | 68 | local index = opts.index or space.index[0] 69 | local keyfields = create_keyfields(index) 70 | if index.type ~= "TREE"then 71 | error("Index "..index.name.." in space "..space.name.." is non-iteratable",2) 72 | end 73 | 74 | local size = space:len() 75 | local start = clock.time() 76 | local prev = start 77 | 78 | if type(printevery) == 'string' then 79 | if printevery:match('%%$') then 80 | local num = math.floor(size * tonumber(printevery:match('^(%d+)')) / 100) 81 | if num > size or num < 0 then error("Bad value for progress",2) end 82 | -- print("use num ",num) 83 | printevery = num 84 | else 85 | printevery = tonumber(printevery) 86 | end 87 | end 88 | if printevery > size then 89 | printevery = math.floor(size/4) 90 | end 91 | 92 | log.info("Processing %d items in %s mode; wait: 1/%d; take: %d", size, dryrun and "dryrun" or "real", waitevery, takeby) 93 | -- if true then return end 94 | 95 | local working = true 96 | local function batch_update(toupdate) 97 | if not dryrun then 98 | for _,v in ipairs(toupdate) do 99 | local r,e = pcall(updater, v) 100 | if not r then 101 | local t = tostring(v) 102 | if #t > 1000 then t = string.sub(t,1,995)..'...' end 103 | error(3,string.format("failed to update %s: %s",t,e)) 104 | working = false 105 | break 106 | end 107 | end 108 | end 109 | end 110 | 111 | local it = iiterator( index, box.index.ALL ) 112 | local v 113 | local toupdate = {} 114 | local c = 0 115 | local u = 0 116 | local csw = 0 117 | local clock_sum = 0 118 | local clock1 = clock.proc() 119 | 120 | while working do c = c + 1 121 | if c % waitevery == 0 then 122 | clock_sum = clock_sum + ( clock.proc() - clock1 ) 123 | csw = csw + 1 124 | -- print("yield on ",c) 125 | fiber.sleep( 0 ) 126 | clock1 = clock.proc() 127 | it = iiterator( index, box.index.GT, keyfields(v) ) 128 | end 129 | v = it() 130 | -- print(v) 131 | 132 | if not v or c > limit then 133 | batch_update(toupdate) 134 | break 135 | end 136 | 137 | if not examine or examine(v) then 138 | u = u + 1 139 | table.insert(toupdate, v) 140 | end 141 | 142 | if #toupdate >= takeby then 143 | clock_sum = clock_sum + ( clock.proc() - clock1 ) 144 | csw = csw + 1 145 | batch_update(toupdate) 146 | clock1 = clock.proc() 147 | toupdate = {} 148 | it = iiterator(index, box.index.GT, keyfields(v)) 149 | end 150 | 151 | if c % printevery == 0 then 152 | local now = clock.time() 153 | local r,e = pcall(function() 154 | local run = now - start 155 | local run1 = now - prev 156 | local rps = c/run 157 | local rps1 = printevery/run1 158 | collectgarbage("collect") 159 | local mem = collectgarbage("count") 160 | log.info("Processed %d (%d) (%0.1f%%) in %0.3fs (rps: %.0f tot; %.0f/%.1fs; %.2fms/c) ETA:+%ds (or %ds) Mem: %dK", 161 | c, u, 162 | 100*c/size, 163 | run, 164 | c/run, rps1, run1, 165 | 1000*clock_sum/csw, 166 | 167 | (size - c)/rps1, 168 | (size - c)/rps, 169 | 170 | mem 171 | ) 172 | end) 173 | if not r then print(e) end 174 | prev = now 175 | end 176 | end 177 | return { processed = c-1; updated = u; yields = csw } 178 | end 179 | 180 | return moonwalker 181 | --------------------------------------------------------------------------------