I’m attempting to profile how long it takes to execute two small functions. My results vary greatly — what could cause this?
I call each function 10m times, checking the time before and after, and then do that 10 times and calculate the average time delta. Both functions are very similar, and will produce a delta of ~1.7s most of the time. But every 8th run or so the delta will be ~0.9s or ~0.25s (for both functions).
- I build-and-run from the editor on Windows 10.
- My project only contains a single script with the test code.
- I have tested running all the code in
init
, as well as spreading out the 10 iterations over the first 10update
calls. - I call
collectgarbage("stop")
before starting the test (there are very few allocations either way).
Since the test takes a few seconds to run, I expect the CPU to boost up to max. I also would expect it to not get throttled from heat. But I might very well be wrong. Maybe the three distinct results (1.7, 0.9, 0.25) indicate CPU shenanigans. Edit: measuring with __rdtsc
and estimating core frequency gives me the expected 3.5Ghz of my CPU.
The messy code (not much to see):
local local_select = select
local local_xpcall = xpcall
local function error_handler(err_msg)
print(debug.traceback(err_msg, 3))
end
local call_count = 0
local behavior_i = function(context, a, b, c, d, e, f, g, h)
call_count = call_count + 1
context.dummy = context.dummy + a + b + c + d + e + f + g + h
end
local context_i = {dummy = 0}
local instance = {signals = {sid = {subscribers = {{behavior = behavior_i, context = context_i}}}}}
local behavior
local context
local args = {false, false, false, false, false, false, false, false}
local call_subscriber_fns = {
function() behavior(context) end,
function() behavior(context, args[1]) end,
function() behavior(context, args[1], args[2]) end,
function() behavior(context, args[1], args[2], args[3]) end,
function() behavior(context, args[1], args[2], args[3], args[4]) end,
function() behavior(context, args[1], args[2], args[3], args[4], args[5]) end,
function() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6]) end,
function() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6], args[7]) end,
function() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8]) end,
}
local function send_with_specialized_fn(t, id, ...)
args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8] = local_select(1, ...)
local count = local_select('#', ...)
call_subscriber_fn = call_subscriber_fns[count + 1]
local subscribers = t.signals[id].subscribers
for i = 1, #subscribers do
local subscriber = subscribers[i]
behavior = subscriber.behavior
context = subscriber.context
local_xpcall(call_subscriber_fn, error_handler)
end
end
local function call_subscriber_unified_fn() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8]) end
local function send_with_unified_fn(t, id, ...)
args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8] = local_select(1, ...)
local subscribers = t.signals[id].subscribers
for i = 1, #subscribers do
local subscriber = subscribers[i]
behavior = subscriber.behavior
context = subscriber.context
local_xpcall(call_subscriber_unified_fn, error_handler)
end
end
local function call_subscriber_unpack_fn() behavior(context, unpack(args)) end
local function send_with_unpack_fn(t, id, ...)
args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8] = local_select(1, ...)
local subscribers = t.signals[id].subscribers
for i = 1, #subscribers do
local subscriber = subscribers[i]
behavior = subscriber.behavior
context = subscriber.context
local_xpcall(call_subscriber_unpack_fn, error_handler)
end
end
local outer_iter = 6
local inner_iter = 30000000
local function test_send_with_specialized_fn(v)
local a, b, c, d, e, f, g, h = v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8
local time_begin = socket.gettime()
for _ = 1, inner_iter do send_with_specialized_fn(instance, 'sid', a, b, c, d, e, f, g, h) end
return socket.gettime() - time_begin
end
local function test_send_with_unified_fn(v)
local a, b, c, d, e, f, g, h = v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8
local time_begin = socket.gettime()
for _ = 1, inner_iter do send_with_unified_fn(instance, 'sid', a, b, c, d, e, f, g, h) end
return socket.gettime() - time_begin
end
local function test_send_with_unpack_fn(v)
local a, b, c, d, e, f, g, h = v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8
local time_begin = socket.gettime()
for _ = 1, inner_iter do send_with_unpack_fn(instance, 'sid', a, b, c, d, e, f, g, h) end
return socket.gettime() - time_begin
end
local case_index = 1
local cases = {
{fn = test_send_with_specialized_fn, acc = 0, delta = 0, l1 = '#a1', l2 = '#a2'},
{fn = test_send_with_unified_fn, acc = 0, delta = 0, l1 = '#b1', l2 = '#b2'},
--{fn = test_send_with_unpack_fn, acc = 0, delta = 0, l1 = '#b1', l2 = '#b2'},
}
local function process(case, dummy_value)
case.delta = case.fn(dummy_value)
case.acc = case.acc + case.delta
label.set_text(case.l1, tostring(call_count))
label.set_text(case.l2, string.format('%.9f', case.delta))
end
function init(self)
collectgarbage("stop")
for _ = 1, outer_iter do
process(cases[case_index], 1)
case_index = case_index == 1 and 2 or 1
process(cases[case_index], 1)
print('---')
print(string.format('%.9f', cases[1].delta))
print(string.format('%.9f', cases[2].delta))
end
local c1 = string.format('%.9f', cases[1].acc / outer_iter)
local c2 = string.format('%.9f', cases[2].acc / outer_iter)
label.set_text('#c1', c1)
label.set_text('#c2', c2)
print('===')
print(c1)
print(c2)
end