Last week I was asked to create a normalisation routine for the Msp spectra that my application uses. No worry if you don’t know what Msp is. The thing is that it needed to find the maximum value in a collection of values and then do a calculation with that number over the other values in that collection.
First the value object.
Namespace Model
Public Class MspCoordinate
Private _A As Decimal
Private _WavelengthInNm As Decimal
Public Sub New()
Me.New(0, 0)
End Sub
Public Sub New(ByVal A As Decimal, ByVal WavelengthinNm As Decimal)
Me.A = A
Me.WavelengthInNm = WavelengthinNm
End Sub
Public Property A() As Decimal
Get
Return _A
End Get
Set(ByVal value As Decimal)
_A = value
End Set
End Property
Public Property WavelengthInNm() As Decimal
Get
Return _WavelengthInNm
End Get
Set(ByVal value As Decimal)
_WavelengthInNm = value
End Set
End Property
Public Overrides Function ToString() as String
Return string.Format ("_A: {0}, _WavelengthInNm: {1}", _A, _WavelengthInNm)
End Function
Public Overloads Function Equals(ByVal obj As MspCoordinate) As Boolean
If ReferenceEquals(Nothing, obj) Then Return False
If ReferenceEquals(Me, obj) Then Return True
Return obj._A = _A
End Function
Public Overloads Overrides Function Equals(ByVal obj As Object) As Boolean
If ReferenceEquals(Nothing, obj) Then Return False
If ReferenceEquals(Me, obj) Then Return True
If Not Equals(obj.GetType(), GetType(MspCoordinate)) Then Return False
Return Equals(DirectCast(obj, MspCoordinate))
End Function
Public Overrides Function GetHashCode() As Integer
Return _A.GetHashCode()
End Function
End Class
End Namespace```
And then some unittests to see if what I get from my mspobject is correct.
```vbnet
Imports NUnit.Framework
Namespace Tests
<TestFixture()> _
Public Class TestMspLinq
Dim _msp As Model.MspLinq
Dim _mspCoordinates As IList(Of Model.MspCoordinate)
<SetUp()> _
Public Sub Setup()
_mspCoordinates = New List(Of Model.MspCoordinate)
_mspCoordinates.Add(New Model.MspCoordinate(0.2, 380))
_mspCoordinates.Add(New Model.MspCoordinate(0.21, 381))
_mspCoordinates.Add(New Model.MspCoordinate(0.22, 382))
_mspCoordinates.Add(New Model.MspCoordinate(0.23, 383))
_mspCoordinates.Add(New Model.MspCoordinate(0.25, 384))
_mspCoordinates.Add(New Model.MspCoordinate(0.22, 385))
_mspCoordinates.Add(New Model.MspCoordinate(0.23, 386))
_mspCoordinates.Add(New Model.MspCoordinate(0.24, 387))
_mspCoordinates.Add(New Model.MspCoordinate(0.25, 388))
_mspCoordinates.Add(New Model.MspCoordinate(0.2, 389))
_mspCoordinates.Add(New Model.MspCoordinate(0.2, 390))
_msp = New Model.MspLinq(_mspCoordinates)
End Sub
<Test()> _
Public Sub MspCoordinatesNormalised_returns_same_number_of_results_as_MspCoordinates()
Assert.AreEqual(_msp.MspCoordinates.Count, _msp.MspCoordinatesNormalized.Count)
End Sub
<Test()> _
Public Sub MspCoordinatesNormalized_highest_A_should_be_1()
Assert.AreEqual(1, _msp.MspCoordinatesNormalized(4).A)
End Sub
<Test()> _
Public Sub MspCoordinatesNormalized_lowest_A_should_be_0_8()
Assert.AreEqual(0.8, _msp.MspCoordinatesNormalized(0).A)
End Sub
End Class
End Namespace```
There are more unittests I should have written but the blogpost is getting long enough as it is.
I started of with a baseclass and let the implementation details of the normalization routine over to the subclasses.
BASECLASS
```vbnet
Imports maxcollection.Model
Public MustInherit class MspBase
Protected _MspCoordinates As IList(Of MspCoordinate)
Protected _MspCoordinatesNormalised As IList(Of MspCoordinate)
Public Sub New(ByVal MspCoordinates As IList(Of MspCoordinate))
Me._MspCoordinates = MspCoordinates
End Sub
Public ReadOnly Property MspCoordinates() As IList(Of MspCoordinate)
Get
Return _MspCoordinates
End Get
End Property
Public ReadOnly Property MspCoordinatesNormalized() As IList(Of MspCoordinate)
Get
If _MspCoordinatesNormalised Is Nothing Then
CreateNormalizedMspCoordinates()
End If
Return _MspCoordinatesNormalised
End Get
End Property
MustOverride Protected Sub CreateNormalizedMspCoordinates()
Protected Sub CalculateNormalizedCoordinates(ByVal _Max As Decimal)
_MspCoordinatesNormalised = New List(Of MspCoordinate)
Dim _MspCoordinate As MspCoordinate
For Each _coordinate As MspCoordinate In _MspCoordinates
_MspCoordinate = New MspCoordinate
_MspCoordinate.A = _coordinate.A / _Max
_MspCoordinate.WavelengthInNm = _coordinate.WavelengthInNm
_MspCoordinatesNormalised.Add(_MspCoordinate)
Next
End Sub
Public Overloads Function Equals(ByVal obj As MspLinq) As Boolean
If ReferenceEquals(Nothing, obj) Then Return False
If ReferenceEquals(Me, obj) Then Return True
Return Equals(obj._MspCoordinates, _MspCoordinates)
End Function
Public Overloads Overrides Function Equals(ByVal obj As Object) As Boolean
If ReferenceEquals(Nothing, obj) Then Return False
If ReferenceEquals(Me, obj) Then Return True
If Not Equals(obj.GetType(), GetType(MspLinq)) Then Return False
Return Equals(DirectCast(obj, MspLinq))
End Function
Public Overrides Function GetHashCode() As Integer
If _MspCoordinates IsNot Nothing Then Return _MspCoordinates.GetHashCode()
Return 0
End Function
Public Overrides Function ToString() As String
Return String.Format("_MspCoordinates: {0}", _MspCoordinates)
End Function
end class```
SUBCLASS LINQ
```vbnet
Namespace Model
Public Class MspLinq
Inherits MspBase
Public Sub New(ByVal MspCoordinates As IList(Of MspCoordinate))
MyBase.New(MspCoordinates)
End Sub
Protected Overrides Sub CreateNormalizedMspCoordinates()
Dim _max = (From e In _MspCoordinates Select e.A).Max
CalculateNormalizedCoordinates(_max)
End Sub
End Class
End Namespace```
SUBCLASS FOR
```vbnet
Namespace Model
Public Class MspFor
Inherits MspBase
Public Sub New(ByVal MspCoordinates As IList(Of MspCoordinate))
MyBase.New(MspCoordinates)
End Sub
Protected Overrides Sub CreateNormalizedMspCoordinates()
Dim _max As Decimal = -100
For i As Integer = 0 To MspCoordinates.Count - 1
If _max < MspCoordinates(i).A Then
_max = MspCoordinates(i).A
End If
Next
CalculateNormalizedCoordinates(_max)
End Sub
End Class
End Namespace```
SUBCLASS FOREACH
```vbnet
Namespace Model
Public Class MspForEach
Inherits MspBase
Public Sub New(ByVal MspCoordinates As IList(Of MspCoordinate))
MyBase.New(MspCoordinates)
End Sub
Protected Overrides Sub CreateNormalizedMspCoordinates()
Dim _max As Decimal = -100
For Each _coordinate As MspCoordinate In MspCoordinates
If _max < _coordinate.A Then
_max = _coordinate.A
End If
Next
CalculateNormalizedCoordinates(_max)
End Sub
End Class
End Namespace```
I want this to be as fast as possible since I could potentialy be doing this for thousands of Msp’s at a time. And yes all of them can be shown on the screen.
AS you might have noticed I lazy loaded the normalized collection because I don’t want the calculations to be done everytime the object is created.
And this a testfixture I created for testing the perfomance of this.
```vbnet
Imports NUnit.Framework
Namespace Tests
<TestFixture()> _
Public Class PerformanceTestNormalizingMsp
Dim _mspLinq As Model.MspLinq
Dim _mspFor As Model.MspFor
Dim _mspForEach As Model.MspForEach
Dim _mspCoordinates As IList(Of Model.MspCoordinate)
<SetUp()> _
Public Sub Setup()
End Sub
Private Sub SetupCoordinates(ByVal number As Integer)
_mspCoordinates = New List(Of Model.MspCoordinate)
Dim _mspcoordinate As Model.MspCoordinate
Dim _Random As New Random
For i As Integer = 1 To number
_mspcoordinate = New Model.MspCoordinate
_mspcoordinate.A = _Random.NextDouble()
_mspcoordinate.WavelengthInNm = number
_mspCoordinates.Add(_mspcoordinate)
Next
End Sub
<Test()> _
Public Sub PerfomanceOfLinq()
Dim stopwatch As New Stopwatch
Dim number As Integer = 100000
SetupCoordinates(number)
stopwatch.Start()
_mspLinq = New Model.MspLinq(_mspCoordinates)
Assert.AreEqual(number, _mspLinq.MspCoordinatesNormalized.Count)
stopwatch.Stop()
Debug.WriteLine("Linq with " & number & " coordinates took " & stopwatch.ElapsedMilliseconds)
End Sub
<Test()> _
Public Sub PerfomanceOfFor()
Dim stopwatch As New Stopwatch
Dim number As Integer = 100000
SetupCoordinates(number)
stopwatch.Start()
_mspFor = New Model.MspFor(_mspCoordinates)
Assert.AreEqual(number, _mspFor.MspCoordinatesNormalized.Count)
stopwatch.Stop()
Debug.WriteLine("For with " & number & " coordinates took " & stopwatch.ElapsedMilliseconds)
End Sub
<Test()> _
Public Sub PerfomanceOfForEach()
Dim stopwatch As New Stopwatch
Dim number As Integer = 100000
SetupCoordinates(number)
stopwatch.Start()
_mspForEach = New Model.MspForEach(_mspCoordinates)
Assert.AreEqual(number, _mspForEach.MspCoordinatesNormalized.Count)
stopwatch.Stop()
Debug.WriteLine("ForEach with " & number & " coordinates took " & stopwatch.ElapsedMilliseconds)
End Sub
End Class
End Namespace```
And these are the results when I ran it with 10000 coordinates in the collection.
<span class="MT_blue">first run</p>
<p>
Linq with 10000 coordinates took 40<br /> For with 10000 coordinates took 12<br /> ForEach with 10000 coordinates took 11
</p>
<p>
second run
</p>
<p>
Linq with 10000 coordinates took 42<br /> For with 10000 coordinates took 12<br /> ForEach with 10000 coordinates took 11
</p>
<p>
third run
</p>
<p>
Linq with 10000 coordinates took 40<br /> For with 10000 coordinates took 11<br /> ForEach with 10000 coordinates took 11</span>
</p>
<p>
As you can see pretty consistent numbers.
</p>
<p>
Linq being the slowest by a factor of 4.
</p>
<p>
Now lets see what happens with 100000 coordinates
</p>
<p>
<span class="MT_blue">first run</p>
<p>
Linq with 100000 coordinates took 152<br /> For with 100000 coordinates took 169<br /> ForEach with 100000 coordinates took 123
</p>
<p>
second run
</p>
<p>
Linq with 100000 coordinates took 148<br /> For with 100000 coordinates took 163<br /> ForEach with 100000 coordinates took 128
</p>
<p>
third run
</p>
<p>
Linq with 100000 coordinates took 154<br /> For with 100000 coordinates took 159<br /> ForEach with 100000 coordinates took 126</span>
</p>
<p>
Mmm linq has caugth up with the other two. and is even faster than 4.
</p>
<p>
When I add another 0 I get these results
</p>
<p>
<span class="MT_blue">Linq with 1000000 coordinates took 1464<br /> For with 1000000 coordinates took 1392<br /> ForEach with 1000000 coordinates took 1419</span>
</p>
<p>
<span class="MT_red">The differences being small. So what can I conclude of all this.</p>
<p>
Not much really. For many elements in the collection linq is just as fast as the others but it is more readable and shorter to write. When it doesn’t really matter the difference is big but when it does matter the difference is small so we can pick any one of them.</span>
</p>