Great variation in execution time when trying to profile small functions

I’m attempting to profile how long it takes to execute two small functions. My results vary greatly — what could cause this?

I call each function 10m times, checking the time before and after, and then do that 10 times and calculate the average time delta. Both functions are very similar, and will produce a delta of ~1.7s most of the time. But every 8th run or so the delta will be ~0.9s or ~0.25s (for both functions).

  • I build-and-run from the editor on Windows 10.
  • My project only contains a single script with the test code.
  • I have tested running all the code in init, as well as spreading out the 10 iterations over the first 10 update calls.
  • I call collectgarbage("stop") before starting the test (there are very few allocations either way).

Since the test takes a few seconds to run, I expect the CPU to boost up to max. I also would expect it to not get throttled from heat. But I might very well be wrong. Maybe the three distinct results (1.7, 0.9, 0.25) indicate CPU shenanigans. Edit: measuring with __rdtsc and estimating core frequency gives me the expected 3.5Ghz of my CPU.

The messy code (not much to see):

local local_select = select
local local_xpcall = xpcall

local function error_handler(err_msg)
	print(debug.traceback(err_msg, 3))
end

local call_count = 0
local behavior_i = function(context, a, b, c, d, e, f, g, h)
	call_count = call_count + 1
	context.dummy = context.dummy + a + b + c + d + e + f + g + h
end
local context_i = {dummy = 0}
local instance = {signals = {sid = {subscribers = {{behavior = behavior_i, context = context_i}}}}}

local behavior
local context
local args = {false, false, false, false, false, false, false, false}

local call_subscriber_fns = {
	function() behavior(context) end,
	function() behavior(context, args[1]) end,
	function() behavior(context, args[1], args[2]) end,
	function() behavior(context, args[1], args[2], args[3]) end,
	function() behavior(context, args[1], args[2], args[3], args[4]) end,
	function() behavior(context, args[1], args[2], args[3], args[4], args[5]) end,
	function() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6]) end,
	function() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6], args[7]) end,
	function() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8]) end,
}
local function send_with_specialized_fn(t, id, ...)
	args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8] = local_select(1, ...)
	local count = local_select('#', ...)
	call_subscriber_fn = call_subscriber_fns[count + 1]
	local subscribers = t.signals[id].subscribers
	for i = 1, #subscribers do
		local subscriber = subscribers[i]
		behavior = subscriber.behavior
		context = subscriber.context
		local_xpcall(call_subscriber_fn, error_handler)
	end
end

local function call_subscriber_unified_fn() behavior(context, args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8]) end
local function send_with_unified_fn(t, id, ...)
	args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8] = local_select(1, ...)
	local subscribers = t.signals[id].subscribers
	for i = 1, #subscribers do
		local subscriber = subscribers[i]
		behavior = subscriber.behavior
		context = subscriber.context
		local_xpcall(call_subscriber_unified_fn, error_handler)
	end
end

local function call_subscriber_unpack_fn() behavior(context, unpack(args)) end
local function send_with_unpack_fn(t, id, ...)
	args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8] = local_select(1, ...)
	local subscribers = t.signals[id].subscribers
	for i = 1, #subscribers do
		local subscriber = subscribers[i]
		behavior = subscriber.behavior
		context = subscriber.context
		local_xpcall(call_subscriber_unpack_fn, error_handler)
	end
end

local outer_iter = 6
local inner_iter = 30000000

local function test_send_with_specialized_fn(v)
	local a, b, c, d, e, f, g, h = v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8
	local time_begin = socket.gettime()
	for _ = 1, inner_iter do send_with_specialized_fn(instance, 'sid', a, b, c, d, e, f, g, h) end
	return socket.gettime() - time_begin
end

local function test_send_with_unified_fn(v)
	local a, b, c, d, e, f, g, h = v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8
	local time_begin = socket.gettime()
	for _ = 1, inner_iter do send_with_unified_fn(instance, 'sid', a, b, c, d, e, f, g, h) end
	return socket.gettime() - time_begin
end

local function test_send_with_unpack_fn(v)
	local a, b, c, d, e, f, g, h = v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8
	local time_begin = socket.gettime()
	for _ = 1, inner_iter do send_with_unpack_fn(instance, 'sid', a, b, c, d, e, f, g, h) end
	return socket.gettime() - time_begin
end

local case_index = 1
local cases = {
	{fn = test_send_with_specialized_fn, acc = 0, delta = 0, l1 = '#a1', l2 = '#a2'},
	{fn = test_send_with_unified_fn, acc = 0, delta = 0, l1 = '#b1', l2 = '#b2'},
	--{fn = test_send_with_unpack_fn, acc = 0, delta = 0, l1 = '#b1', l2 = '#b2'},
}

local function process(case, dummy_value)
	case.delta = case.fn(dummy_value)
	case.acc = case.acc + case.delta
	label.set_text(case.l1, tostring(call_count))
	label.set_text(case.l2, string.format('%.9f', case.delta))
end

function init(self)
	collectgarbage("stop")
	for _ = 1, outer_iter do
		process(cases[case_index], 1)
		case_index = case_index == 1 and 2 or 1
		process(cases[case_index], 1)
		print('---')
		print(string.format('%.9f', cases[1].delta))
		print(string.format('%.9f', cases[2].delta))
	end

	local c1 = string.format('%.9f', cases[1].acc / outer_iter)
	local c2 = string.format('%.9f', cases[2].acc / outer_iter)
	label.set_text('#c1', c1)
	label.set_text('#c2', c2)
	print('===')
	print(c1)
	print(c2)
end