├── README.md ├── moonwalker-scm-1.rockspec └── moonwalker.lua /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | # Per-space updater for Tarantool 1.6+ 6 | 7 | A Lua module for [Tarantool 1.6+](http://github.com/tarantool) that allows 8 | iterating over one space with the following logic: 9 | 10 | 1. Phase #1 (сollect): 11 | 1. Create an iterator and iterate over the space for not more than `pause` items. 12 | 2. Put items-to-update into a temporary Lua table. 13 | 3. Yield the fiber, then reposition the iterator to GT(`last selected tuple`). 14 | 4. If collected enough (`take`) tuples, switch to phase #2 (update). 15 | 16 | 2. Phase #2 (update): 17 | 18 | 1. Iterate over the temporary table. 19 | 2. For each element, call the `actor` function. 20 | 3. Reposition the iterator to GT(`last selected tuple`) and switch back to 21 | phase #1 (collect). 22 | 23 | ## Table of contents 24 | 25 | * [Parameters](#parameters) 26 | * [Examples](#examples) 27 | 28 | ## Parameters 29 | 30 | * `space` - the space to process. 31 | * `index` (optional) - the index to iterate by. If not defined, use the primary 32 | index. 33 | * `examine`: (optional, function:boolean) - called during phase #1 (collect). 34 | **Must not yield**. 35 | * `actor`: (function, altname: updater) - called during phase #2 (update) for 36 | every examined tuple. 37 | * `pause`: `1000` (number) - make `fiber.yield` after stepping over this number 38 | of items. 39 | * `take`: `600` (number) - how many items should be collected before switching to 40 | phase #2 (update). 41 | * `fp`: `3` (number) - fiber pool. how many fibers in parallel would update tuples 42 | * `txn`: `false` (boolean) - wrap update in transaction. Could improve the performance. 43 | * `dryrun`: `false` (boolean) - don't call the actor, only print the statistics. 44 | * `silent`: `false` (boolean) - don't do informational prints (useful for use in code) 45 | * `limit`: `2^63` (optional, number) - process not more than this number of items. 46 | Useful for testing. 47 | * `progress`: `2%` (optional, string or number) - print a progress message every 48 | N records or percent. 49 | 50 | 51 | ## Examples 52 | 53 | ```lua 54 | local moonwalker = require 'moonwalker' 55 | 56 | -- update the whole database (the simplest example) 57 | moonwalker { 58 | space = box.space.users; 59 | actor = function(t) 60 | box.space.users:update({t[1]},{ 61 | {'=', 2, os.time()} 62 | }) 63 | end; 64 | } 65 | 66 | -- update the database, add missed fields (example with 'examine') 67 | moonwalker { 68 | space = box.space.users; 69 | examine = function(t) 70 | return #t < 4; -- user tuple has only 3 fields 71 | end; 72 | actor = function(t) 73 | box.space.users:update({t[1]},{ 74 | {'=', 4, "newfield"} 75 | }) 76 | end; 77 | } 78 | 79 | -- iterate by a specific index 80 | moonwalker { 81 | space = box.space.users; 82 | index = box.space.users.index.name; -- iterate over index 'name' 83 | pause = 100; -- be very polite, but slow: pause after every 100 records 84 | take = 100; -- collect 100 items for update 85 | limit = 1000; -- stop after examining the first 1000 tuples 86 | examine = function(t) 87 | return #t < 4; 88 | end; 89 | actor = function(t) 90 | box.space.users:update({t[1]},{ 91 | {'=',4,"newfield"} 92 | }) 93 | end; 94 | } 95 | ``` 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /moonwalker-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = 'moonwalker' 2 | version = 'scm-1' 3 | source = { 4 | url = 'git+https://github.com/tarantool/moonwalker.git', 5 | branch = 'master', 6 | } 7 | description = { 8 | summary = "Iterate over box.space and make updates without freezing db", 9 | homepage = 'https://github.com/tarantool/moonwalker.git', 10 | license = 'BSD', 11 | } 12 | dependencies = { 13 | 'lua >= 5.1' 14 | } 15 | build = { 16 | type = 'builtin', 17 | modules = { 18 | ['moonwalker'] = 'moonwalker.lua' 19 | } 20 | } 21 | 22 | -- vim: syntax=lua 23 | -------------------------------------------------------------------------------- /moonwalker.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | moonwalker 4 | 5 | Iterate over one space with the following logic 6 | 7 | Collect stage: 8 | 1. create an iterator and iterate over space for not more than `pause` items 9 | 2. put items for update into temporary lua table 10 | 3. yield fiber, then reposition iterator to GT(`last selected tuple`) 11 | 4. if collected enough (`take`) tuples, switch to update phase 12 | 13 | Update stage: 14 | 15 | 1. iterate over temporary table 16 | 2. for each element call `actor` 17 | 3. reposition iterator to GT(`last selected tuple`), switch to collect phase 18 | 19 | + Parameters 20 | + examine: (optional, function:boolean) - called during collect phase. **must not yield**. 21 | + actor: (function, altname: updater) - called during update phase for every examined tuple 22 | + pause: `1000` (number) - make fiber.yield after stepping over this count of items. 23 | + take: `500` (number) - how many items should be collected before calling updates 24 | + dryrun: `false` (boolean) - don't call actor, only print stats 25 | + limit: `` (optional, number) - process not more than limit items (useful for testing) 26 | + progress: `2%` (optional, string or number) - print progress message every N records or percent 27 | 28 | ]] 29 | 30 | local fiber = require 'fiber' 31 | local log = require 'log' 32 | local ffi = require 'ffi' 33 | local clock = require 'clock' 34 | 35 | local M = {} 36 | 37 | local function create_keyfields(index) 38 | local f = {} 39 | for k,v in pairs(index.parts) do 40 | table.insert(f, "t[".. v.fieldno .."]") 41 | end 42 | return loadstring('return function(t) return {'..table.concat(f,",")..'} end')() 43 | end 44 | 45 | local function iiterator(index, itype, key) 46 | local f, ctx, state = index:pairs(key, { iterator = itype }) 47 | local tuple 48 | return function () 49 | state, tuple = f(ctx,state) 50 | if not state then return nil end 51 | return tuple 52 | end 53 | end 54 | 55 | local function moonwalker(opts) 56 | local o = {} 57 | assert(opts.space, "Required option .space") 58 | local space = opts.space 59 | local takeby = opts.take or 600 60 | local waitevery = opts.pause or takeby*10 61 | local examine = opts.examine 62 | local updater = opts.actor or opts.updater 63 | assert(type(updater) == 'function', "Need .actor funtion") 64 | local dryrun = opts.dryrun or false 65 | local limit = opts.limit or 2^63 66 | local printevery = opts.progress or '2%' 67 | local commit = opts.commit or takeby * 100 68 | local continue = opts.continue or opts.cont 69 | local silent = opts.silent 70 | if not opts.fp then opts.fp = 1 end 71 | 72 | local index = opts.index or space.index[0] 73 | local keyfields = create_keyfields(index) 74 | if index.type ~= "TREE" then 75 | error("Index "..index.name.." in space "..space.name.." is non-iteratable",2) 76 | end 77 | 78 | local size = space:len() 79 | local start = clock.time() 80 | local prev = start 81 | 82 | if type(printevery) == 'string' then 83 | if printevery:match('%%$') then 84 | local num = math.floor(size * tonumber(printevery:match('^(%d+)')) / 100) 85 | if num > size or num < 0 then error("Bad value for progress",2) end 86 | -- print("use num ",num) 87 | printevery = num 88 | else 89 | printevery = tonumber(printevery) 90 | end 91 | end 92 | if printevery > size then 93 | printevery = math.floor(size/4) 94 | end 95 | 96 | if not silent then 97 | log.info("Processing %d items in %s mode; wait: 1/%d; take: %d / %d %s; fid=%s; %s", 98 | size, dryrun and "dryrun" or "real", waitevery, takeby, opts.fp or 1, 99 | opts.txn and "txn" or "single", 100 | fiber.id(), continue and "from "..tostring(box.tuple.new(continue)) or '') 101 | end 102 | 103 | local working = true 104 | local function batch_update_s(toupdate) 105 | if not dryrun then 106 | if opts.txn then box.begin() end 107 | for _,v in ipairs(toupdate) do 108 | local r,e = pcall(updater, v) 109 | if not r then 110 | local t = tostring(v) 111 | if #t > 1000 then t = string.sub(t,1,995)..'...' end 112 | error(string.format("failed to update %s: %s",t,e),3) 113 | working = false 114 | break 115 | end 116 | end 117 | if opts.txn then box.commit() end 118 | end 119 | end 120 | local function batch_update_f(toupdate) 121 | if not dryrun then 122 | local N = opts.fp 123 | local part = math.ceil(#toupdate/N) 124 | local raise = false 125 | 126 | local wait = fiber.channel(1) 127 | local cv = 0 128 | for i = 0,N-1 do 129 | local start = i*part+1 130 | local finish = math.min((i+1)*part, #toupdate) 131 | cv = cv + 1 132 | fiber.create(function() 133 | if opts.txn then box.begin() end 134 | for x = start,finish do 135 | local r,e = pcall(updater, toupdate[x]) 136 | if not r then 137 | local t = tostring(toupdate[x]) 138 | if #t > 1000 then t = string.sub(t,1,995)..'...' end 139 | raise = string.format("failed to update %s: %s",t,e) 140 | working = false 141 | break 142 | end 143 | end 144 | if opts.txn and not raise then box.commit() end 145 | cv = cv - 1 146 | if cv == 0 then wait:put(true) end 147 | end) 148 | end 149 | wait:get() 150 | if raise then error(raise,3) end 151 | end 152 | end 153 | local batch_update 154 | 155 | if opts.fp and opts.fp > 1 then 156 | require'log'.error("fiber pool temporary disabled") 157 | batch_update = batch_update_s 158 | else 159 | batch_update = batch_update_s 160 | end 161 | 162 | local it 163 | if continue then 164 | it = iiterator( index, box.index.GT, continue ) 165 | else 166 | it = iiterator( index, box.index.ALL ) 167 | end 168 | 169 | local v 170 | local toupdate = {} 171 | local c = 0 172 | local u = 0 173 | local commit_at 174 | local csw = 0 175 | local clock_sum = 0 176 | local clock1 = clock.proc() 177 | 178 | local function work() -- noindent, commit separatedlty 179 | while working do c = c + 1 180 | if c % waitevery == 0 then 181 | clock_sum = clock_sum + ( clock.proc() - clock1 ) 182 | csw = csw + 1 183 | -- print("yield on ",c) 184 | fiber.sleep( 0 ) 185 | clock1 = clock.proc() 186 | it = iiterator( index, box.index.GT, keyfields(v) ) 187 | end 188 | v = it() 189 | -- print(v) 190 | 191 | if not v or c > limit then 192 | batch_update(toupdate) 193 | break 194 | end 195 | 196 | if not examine or examine(v) then 197 | if not commit_at then commit_at = c + commit end 198 | u = u + 1 199 | table.insert(toupdate, v) 200 | end 201 | 202 | if #toupdate >= takeby or (commit_at and c >= commit_at) then 203 | clock_sum = clock_sum + ( clock.proc() - clock1 ) 204 | csw = csw + 1 205 | batch_update(toupdate) 206 | clock1 = clock.proc() 207 | toupdate = {} 208 | commit_at = nil 209 | it = iiterator(index, box.index.GT, keyfields(v)) 210 | end 211 | 212 | if not silent and c % printevery == 0 then 213 | local now = clock.time() 214 | local r,e = pcall(function() 215 | local run = now - start 216 | local run1 = now - prev 217 | local rps = c/run 218 | local rps1 = printevery/run1 219 | collectgarbage("collect") 220 | local mem = collectgarbage("count") 221 | log.info("Processed %d (%d) (%0.1f%%) in %0.3fs (rps: %.0f tot; %.0f/%.1fs; %.2fms/c) ETA:+%ds (or %ds) Mem: %dK %s", 222 | c, u, 223 | 100*c/size, 224 | run, 225 | c/run, rps1, run1, 226 | 1000*clock_sum/csw, 227 | 228 | (size - c)/rps1, 229 | (size - c)/rps, 230 | 231 | mem, 232 | box.tuple.new(keyfields(v)) 233 | ) 234 | end) 235 | if not r then print(e) end 236 | prev = now 237 | end 238 | end -- noindent, commit separatedlty 239 | if not silent then 240 | log.info("Processed %d, updated %d items in %s mode; wait: 1/%d; take: %d / %d %s", c-1, u, dryrun and "dryrun" or "real", waitevery, takeby, opts.fp or 1, opts.txn and "txn" or "single") 241 | end 242 | return { processed = c-1; updated = u; yields = csw } 243 | end 244 | if opts.bg or opts.background then 245 | local name = opts.name or string.sub('moonwalker.'..space.name,1,32) 246 | local fib = fiber.create(function() 247 | fiber.name(name) 248 | if not silent then log.info("Started fiber %s",fiber.id()) end 249 | fiber.sleep(0.1) 250 | local r,e = pcall(work) 251 | if not r then 252 | log.error("Failed processing on step N %s with %s. Last tuple was %s", c, e, v and tostring(v) or '-') 253 | end 254 | end) 255 | if not silent then log.info("Started fiber %s/%s. Cancel: require'fiber'.find(%s):cancel()", fib:id(), name, fib:id()) end 256 | return { 257 | fiber = fib; 258 | cancel = string.format("require'fiber'.find(%s):cancel()",fib:id()); 259 | } 260 | else 261 | return work() 262 | end 263 | end 264 | 265 | return moonwalker 266 | --------------------------------------------------------------------------------