Comments on: Lock-Free Queue Thanks for updating this test! :) Your test gives a 5x faster for Lock queue on my machine, which is somewhat the mean of what I have observed earlier. I put a counter inside the SafeQueue.Enqueue() to roughly check how many average loops it does, before exit, and it's 2.5! So that's a lots of instructions to add a single element: at least 2.5 x Interlocked.CompareExchange + Interlocked.Increment() + the loop/conditional ReferenceEquals...etc. Not really surprised that It cost more than a simple lock (at least on a x86 architecture). Thanks for updating this test! :)
Your test gives a 5x faster for Lock queue on my machine, which is somewhat the mean of what I have observed earlier.

I put a counter inside the SafeQueue.Enqueue() to roughly check how many average loops it does, before exit, and it’s 2.5! So that’s a lots of instructions to add a single element: at least 2.5 x Interlocked.CompareExchange + Interlocked.Increment() + the loop/conditional ReferenceEquals…etc. Not really surprised that It cost more than a simple lock (at least on a x86 architecture).

]]>
By: Stefan Boberg/2011/03/09/lock-free-queue/#comment-1374 Stefan Boberg Wed, 09 Mar 2011 21:28:18 +0000

* @lx: here are my improvements on your TPL test program. Please note how I start execution of all threads at the same time and accumulate the time of all threads. (Not that it helped lock-free redeem itself!) <pre lang="csharp"> class Program { static TimeSpan Run(Action action) { var stopwatch = new Stopwatch(); stopwatch.Reset(); var tasks = new Task[10]; barrier = new Barrier(tasks.Length + 1); threadWait = new CountdownEvent(tasks.Length); for (int j = 0; j < tasks.Length; j++) { var task = new Task(action); tasks[j] = task; tasks[j].Start(); } //wait for threads threadWait.Wait(); stopwatch.Start(); barrier.SignalAndWait(); Task.WaitAll(tasks); stopwatch.Stop(); threadWait.Dispose(); threadWait = null; barrier.Dispose(); barrier = null; return stopwatch.Elapsed; } static CountdownEvent threadWait; static Barrier barrier; static void Main() { const int MaxCount = 100000; const int totalRounds = 10; TimeSpan totalSafeQueueTime = TimeSpan.Zero; TimeSpan totalLockQueueTime = TimeSpan.Zero; int output; var safeQueue = new SafeQueue<int>(); var lockQueue = new LockQueue<int>(); for (int round = 0; round < totalRounds; round++) { totalSafeQueueTime += Run(() => { threadWait.Signal(); barrier.SignalAndWait(); for (int i = 0; i < MaxCount; i++) { safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Enqueue(ref i); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); safeQueue.Dequeue(out output); } } ); if (!safeQueue.IsEmpty) { Console.WriteLine("SAFEQUEUE NOT THREAD SAFE!"); } totalLockQueueTime += Run(() => { threadWait.Signal(); barrier.SignalAndWait(); for (int i = 0; i < MaxCount; i++) { lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) lockQueue.Enqueue(i); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); lock (lockQueue) output = lockQueue.Dequeue(); } } ); if (lockQueue.Count != 0) { Console.WriteLine("LOCK-QUEUE NOT THREAD SAFE!"); } Console.Write('.'); } Console.WriteLine(string.Format("{0} itterations. SafeQueue = {1:0.0}, LockQueue = {2:0.0}. Ratio={3:0%}", totalRounds, totalSafeQueueTime.TotalSeconds, totalLockQueueTime.TotalSeconds, totalSafeQueueTime.TotalSeconds / totalLockQueueTime.TotalSeconds)); Console.ReadLine(); } } </pre> @lx: here are my improvements on your TPL test program. Please note how I start execution of all threads at the same time and accumulate the time of all threads. (Not that it helped lock-free redeem itself!)

class Program
 
  	{
 
  		static TimeSpan Run(Action action)
 
  		{
 
  			var stopwatch = new Stopwatch();
 
  			stopwatch.Reset();
 
  			var tasks = new Task[10];
 
   
 
  			barrier = new Barrier(tasks.Length + 1);
 
  			threadWait = new CountdownEvent(tasks.Length);
 
   
 
  			for (int j = 0; j &lt; tasks.Length; j++)
 
  			{
 
  				var task = new Task(action);
 
  				tasks[j] = task;
 
  				tasks[j].Start();
 
  			}
 
  			//wait for threads
 
  			threadWait.Wait();
 
  			stopwatch.Start();
 
  			barrier.SignalAndWait();
 
   
 
  			Task.WaitAll(tasks);
 
  			stopwatch.Stop();
 
   
 
  			threadWait.Dispose();
 
  			threadWait = null;
 
  			barrier.Dispose();
 
  			barrier = null;
 
   
 
  			return stopwatch.Elapsed;
 
  		}
 
   
 
  		static CountdownEvent threadWait;
 
  		static Barrier barrier;
 
   
 
  		static void Main()
 
  		{
 
  			const int MaxCount = 100000;
 
  			const int totalRounds = 10;
 
  			TimeSpan totalSafeQueueTime = TimeSpan.Zero;
 
  			TimeSpan totalLockQueueTime = TimeSpan.Zero;
 
   
 
  			int output;
 
   
 
   
 
  			var safeQueue = new SafeQueue&lt;int&gt;();
 
  			var lockQueue = new LockQueue&lt;int&gt;();
 
   
 
  			for (int round = 0; round &lt; totalRounds; round++)
 
  			{
 
   
 
   
 
  				totalSafeQueueTime += Run(() =&gt;
 
  				{
 
  					threadWait.Signal();
 
  					barrier.SignalAndWait();
 
   
 
  					for (int i = 0; i &lt; MaxCount; i++)
 
  					{
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
   
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
  						safeQueue.Enqueue(ref i);
 
   
 
   
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
   
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  						safeQueue.Dequeue(out output);
 
  					}
 
  				}
 
  				);
 
  				if (!safeQueue.IsEmpty)
 
  				{
 
  					Console.WriteLine(&quot;SAFEQUEUE NOT THREAD SAFE!&quot;);
 
  				}
 
   
 
   
 
  				totalLockQueueTime += Run(() =&gt;
 
  				{
 
  					threadWait.Signal();
 
  					barrier.SignalAndWait();
 
   
 
  					for (int i = 0; i &lt; MaxCount; i++)
 
  					{
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
   
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
  						lock (lockQueue) lockQueue.Enqueue(i);
 
   
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
   
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  						lock (lockQueue) output = lockQueue.Dequeue();
 
  					}
 
  				}
 
  				);
 
  				if (lockQueue.Count != 0)
 
  				{
 
  					Console.WriteLine(&quot;LOCK-QUEUE NOT THREAD SAFE!&quot;);
 
  				}
 
   
 
   
 
   
 
  				Console.Write('.');
 
  			}
 
  			Console.WriteLine(string.Format(&quot;{0} itterations.   SafeQueue = {1:0.0},  LockQueue = {2:0.0}.  Ratio={3:0%}&quot;, totalRounds, totalSafeQueueTime.TotalSeconds, totalLockQueueTime.TotalSeconds, totalSafeQueueTime.TotalSeconds / totalLockQueueTime.TotalSeconds));
 
  			Console.ReadLine();
 
  		}
 
  	}
]]>
By: Jason Swearingen/2011/03/09/lock-free-queue/#comment-1365 Jason Swearingen Wed, 09 Mar 2011 16:48:10 +0000 very interesting! first, I tried to address defects I see in your test case (only enqueues + each thread starts as soon as it's .Start() is invoked, so the first ones will not be run in-parallel) but those did not matter much. next i played around with the data access patterns (enqueue then dequeue VS enqueue/dequeue pairs) but that also didn't matter much. Indeed if there is a lesson here, is that (on PC) the lock-free doesnt run as fast as simple (rusty!) locks :) The best I got was SafeQueue running a little under 2x as long as lock queue. what about on PPC (Xbox?) it is something i will try out tomorrow! I guess that's what I get beliving a fancy research paper from some guys at IBM huh ;) very interesting!

first, I tried to address defects I see in your test case (only enqueues + each thread starts as soon as it’s .Start() is invoked, so the first ones will not be run in-parallel)

but those did not matter much.

next i played around with the data access patterns (enqueue then dequeue VS enqueue/dequeue pairs) but that also didn’t matter much.

Indeed if there is a lesson here, is that (on PC) the lock-free doesnt run as fast as simple (rusty!) locks :)

The best I got was SafeQueue running a little under 2x as long as lock queue.

what about on PPC (Xbox?) it is something i will try out tomorrow!

I guess that’s what I get beliving a fancy research paper from some guys at IBM huh ;)

]]>
By: Andreas Fredriksson/2011/03/09/lock-free-queue/#comment-1360 Andreas Fredriksson Wed, 09 Mar 2011 13:58:03 +0000 Here is a version using Task Parallel Library which gives similar results (I’m using
quick-escape for pasting this code)

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;

namespace TestSafeQueue
{
class Program
{
static void Run(Action action)
{
var tasks = new Task[10];
for (int j = 0; j < tasks.Length; j++)
{
var task = new Task(action);
tasks[j] = task;
tasks[j].Start();
}
Task.WaitAll(tasks);
}

static void Main()
{
const int MaxCount = 100000;

for (int round = 0; round < 10; round++)
{
var safeQueue = new SafeQueue<int>();
Run(() =>
{

var clock = new Stopwatch();
clock.Start();
for (int i = 0; i < MaxCount; i++)
{
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);

safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
safeQueue.Enqueue(i);
}
clock.Stop();
Console.WriteLine("{0}-{1}) SafeQueue => Elapsed {2}ms", round, Thread.CurrentThread.ManagedThreadId, clock.ElapsedMilliseconds);
}
);
Console.WriteLine("{0}) SafeQueue => Size {1}", round, safeQueue.Count);

Queue<int> lockQueue = new Queue<int>();
Run(() =>
{
var clock = new Stopwatch();
clock.Start();
for (int i = 0; i < MaxCount; i++)
{
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);

lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
lock (lockQueue) lockQueue.Enqueue(i);
}
clock.Stop();
Console.WriteLine("{0}-{1}) Queue+lock => Elapsed {2}ms", round, Thread.CurrentThread.ManagedThreadId, clock.ElapsedMilliseconds);
}
);
Console.WriteLine("{0}) Queue+lock => Size {1}", round, lockQueue.Count);
}
}
}
}

]]> By: @lx/2011/03/09/lock-free-queue/#comment-1356 @lx Wed, 09 Mar 2011 11:09:50 +0000 I'm running the test on a i5 quad core at 2.66GHz. For the <T>, you have to use HTML escape sequence like this <T> I’m running the test on a i5 quad core at 2.66GHz. For the <T>, you have to use HTML escape sequence like this &lt;T&gt;

]]>
By: Jason Swearingen/2011/03/09/lock-free-queue/#comment-1354 Jason Swearingen Wed, 09 Mar 2011 10:59:21 +0000 also, a question would be how many cores your computer has. if you had a single-core or HT then lockfree will be slower. but for a multicore system it should be MUCH faster than locks. again, i'll run your code in a few hours. my system is a 4 core 2.3ghz athlon. also, a question would be how many cores your computer has. if you had a single-core or HT then lockfree will be slower.

but for a multicore system it should be MUCH faster than locks.

again, i’ll run your code in a few hours. my system is a 4 core 2.3ghz athlon.

]]>
By: Jason Swearingen/2011/03/09/lock-free-queue/#comment-1352 Jason Swearingen Wed, 09 Mar 2011 10:43:54 +0000 oops, wrong version uploaded with wrong figures, declaration for var clock = new StopWatch(); should be moved inside thread, before clock.Reset()/Start() Still, the perf difference is x10 oops, wrong version uploaded with wrong figures, declaration for var clock = new StopWatch(); should be moved inside thread, before clock.Reset()/Start()

Still, the perf difference is x10

]]>
By: @lx/2011/03/09/lock-free-queue/#comment-1350 @lx Wed, 09 Mar 2011 10:24:09 +0000

var doneEvents = new ManualResetEvent[10];

for(int j = 0; j < doneEvents.Length; j++)
{
int threadIndex = j;
doneEvents[threadIndex] = new ManualResetEvent(false);
ThreadPool.QueueUserWorkItem((threadContext) =>
{

clock.Start();
for (int i = 0; i < COUNT; i++)
{
queue.Enqueue(i);
queue.Enqueue(i);
queue.Enqueue(i);
queue.Enqueue(i);
queue.Enqueue(i);

queue.Enqueue(i);
queue.Enqueue(i);
queue.Enqueue(i);
queue.Enqueue(i);
queue.Enqueue(i);
}
clock.Stop();
Console.WriteLine("1) Elapsed#{0} {1}ms", threadIndex, clock.ElapsedMilliseconds);
doneEvents[threadIndex].Set();
}
);
}
WaitHandle.WaitAll(doneEvents);
Console.WriteLine("Size {0}", queue.Count);

Queue<int> defaultQueue = new Queue<int>();
for (int j = 0; j < doneEvents.Length; j++)
{
int threadIndex = j;
doneEvents[threadIndex] = new ManualResetEvent(false);
ThreadPool.QueueUserWorkItem((threadContext) =>
{
clock.Reset();
clock.Start();
for (int i = 0; i < COUNT; i++)
{
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);

lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
lock (defaultQueue) defaultQueue.Enqueue(i);
}
clock.Stop();
Console.WriteLine("2) Elapsed#{0} {1}ms", threadIndex, clock.ElapsedMilliseconds);
doneEvents[threadIndex].Set();
}
);
}
WaitHandle.WaitAll(doneEvents);
Console.WriteLine();
Console.WriteLine("Size {0}", defaultQueue.Count);
}

]]>