## Monday, 9. February 2009

Feb 09

Sometimes we need a function which finds the m smallest elements in a list or array.

We can use the idea of bubble sort to maintain an sorted array of the m smallest elements so far. This idea gives us a O(m*n) algorithm which solves our problem:

let bubble array element =
let rec bubbleStep i =
if i < (array |> Array.length) then
match array.[i] with
| None ->
if i = array.Length - 1 || array.[i+1] <> None then
array.[i] <- Some element
bubbleStep (i+1)
| Some x ->
if element < x then
if i = 0 then
array.[i] <- Some element
else
array.[i-1] <- Some x
array.[i] <- Some element
bubbleStep (i+1)
bubbleStep 0
array
let mMin seq m =
Seq.fold bubble (Array.create m None) seq

We can easily generalize this function to compute maximum and minimum:

let bubble **f** array element =
let rec bubbleStep i =
if i < (array |> Array.length) then
match array.[i] with
| None ->
if i = array.Length - 1 || array.[i+1] <> None then
array.[i] <- Some element
bubbleStep (i+1)
| Some x ->
if **f element x** then
if i = 0 then
array.[i] <- Some element
else
array.[i-1] <- Some x
array.[i] <- Some element
bubbleStep (i+1)
bubbleStep 0
array
let mBubble f seq m =
Seq.fold (bubble f) (Array.create m None) seq
let mMin seq m = mBubble (<) seq m
let mMax seq m = mBubble (>) seq m

If we donâ€™t want to store Option-Values we can use this simplification:

let bubble f m array element =
let rec bubbleStep i =
if i < (array |> Array.length) then
let x = array.[i]
if f element x then
if i = 0 then
array.[i] <- element
else
array.[i-1] <- x
array.[i] <- element
bubbleStep (i+1)
if Array.length array < m then
let newArray = Array.append [|element|] array
newArray
|> Array.sort
(fun a b ->
if a = b then 0
elif f a b then 1 else -1)
newArray
else
bubbleStep 0
array
let mBubble f seq m =
Seq.fold (bubble f m) Array.empty seq
let mMin seq m = mBubble (<) seq m
let mMax seq m = mBubble (>) seq m

Tags:

minimum
## Saturday, 31. January 2009

Jan 31

In the last post I show how we can calculate clusters of n-dimensional Objects with the K-means-Algorithm in F#. This time I will simplify the code by using the built-in type vector.

First of all we redefine the interface for â€œclusterableâ€ objects:

type IClusterable =
abstract DimValues: vector with get

This time we do not need any dimension information â€“ the vector type will care about this. Now we can reduce our distance function to:

let calcDist (p1:vector) (p2:vector) = (p1 - p2).Norm

The function Norm calculates the 2-Norm of a vector.

So calcDist will still compute the n-dimensional Euclidean distance.

The next step is the definition of a n-dimensional Centroid type. This time we will only use a type alias of vector:

type Centroid = vector

The calculation of the centroid and the centroid error can be written as:

let calcCentroid (items:'a list when 'a :> IClusterable)
(oldCentroid:Centroid) =
let count = items.Length
if count = 0 then oldCentroid else
let sum =
items
|> List.fold_left
(fun vec item -> vec + item.DimValues) // vector addition
(nullVector oldCentroid)
sum * (1./(count |> float)) // multiply with scalar
let calcError centroid (items:'a list when 'a :> IClusterable) =
let sq x = x * x
items
|> List.sum_by (fun i -> calcDist centroid i.DimValues |> sq)

The calcError function is straight forward but not perfect in terms of performance. As we use the calcDist function and hence the 2-Norm the function will first calculate a square root and then squares the result.

The rest of the code is pretty much same as in the last post:

/// creates n-dimensional 0 - vector
let nullVector (v:vector) = v * 0.
type Cluster<'a> when 'a :> IClusterable =
{Items: 'a list;
Count: int;
Centroid: Centroid;
Error: float}
member x.Print =
printfn "(Items: %d, Centroid: %A, Error: %.2f)"
x.Count x.Centroid x.Error
static member CreateCluster (item:'a) =
let items = [item]
let empty = nullVector item.DimValues
let centroid = calcCentroid items empty
{new Cluster<'a> with
Items = items and
Count = 1 and
Centroid = centroid and
Error = calcError centroid items}
member x.EvolveCluster (items:'a list) =
let l = items.Length
let centroid = calcCentroid items x.Centroid
{x with
Items = items;
Count = l;
Centroid = centroid;
Error = calcError centroid items}

let rand = new Random()
let InitClusters (k:int)
(items:'a array when 'a :> IClusterable) =
let length = items.Length
let getInitItem() = items.[rand.Next length]
Array.init k (fun x ->
getInitItem()
|> Cluster<'a>.CreateCluster)
let AssignItemsToClusters k (clusters:Cluster<'a> array)
(items:'a seq when 'a :> IClusterable) =
if k <= 0 then failwith "KMeans needs k > 0"
let findNearestCluster (item:'a) =
let minDist,nearest,lastPos =
clusters
|> Array.fold_left
(fun (minDist,nearest,pos) cluster ->
let distance = calcDist item.DimValues (cluster.Centroid)
if distance < minDist then
(distance,pos,pos+1)
else
(minDist,nearest,pos+1))
(Double.PositiveInfinity,0,0)
nearest
let assigned =
items
|> Seq.map (fun item -> item,findNearestCluster item)
let newClusters = Array.create k []
for item,nearest in assigned do
newClusters.[nearest] <- item::(newClusters.[nearest])
clusters
|> Array.mapi (fun i c -> c.EvolveCluster newClusters.[i])
let calcClusterError (clusters:Cluster<'a> array) =
clusters
|> Array.fold_left (fun acc cluster -> acc + cluster.Error) 0.
let kMeansClustering K epsilon
(items:'a array when 'a :> IClusterable) =
let k = if K <= 0 then 1 else K
let rec clustering lastError (clusters:Cluster<'a> array) =
let newClusters =
AssignItemsToClusters k clusters items
let newError = calcClusterError newClusters
if abs(lastError - newError) > epsilon then
clustering newError newClusters
else
newClusters,newError
InitClusters k items
|> clustering Double.PositiveInfinity

This vector approach shortens the code for the kMeans algorithm, but has a little longer runtime. I am interested in any suggestions.

Tags:

Centroid,

Clustering,

k-means,

KMeans,

vector
## Thursday, 29. January 2009

Jan 29

The K-means-Algorithm is one of the simplest unsupervised learning algorithms that solve the well known clustering problem. In this article I will show how we can implement this in F#.

First of all we define an interface for â€œclusterableâ€ objects:

type IClusterable =

abstract DimValues: float array with get

abstract Dimensions: int

The next step is to define a distance function. We will use n-dimensional Euclidean distance here.

let calcDist (p1:IClusterable) (p2:IClusterable) =

let sq x = x * x

if p1.Dimensions <> p2.Dimensions then

failwith "Cluster dimensions aren’t equal."

p2.DimValues

|> Array.fold2

(fun acc x y -> x – y |> sq |> (+) acc)

0. p1.DimValues

|> sqrt

Now we define a n-dimensional Centroid type. Our kMeans-Algorithm will minimize the squared distances to k centroids (or â€œmeansâ€).

type Centroid =

{Values: float array;

dimensions: int}

member x.Print = printfn "%A" x.Values

interface IClusterable with

member x.DimValues = x.Values

member x.Dimensions = x.dimensions

The centroid of a finite set of n-dimensional points x_{1}, x_{2}, â€¦, x_{n} is calculated as

let calcCentroid (items:’a list when ‘a :> IClusterable)

(oldCentroid:Centroid) =

let count = items.Length

if count = 0 then oldCentroid else

let mValues =

[|for d in 0..oldCentroid.dimensions-1 do

let sum =

items

|> List.sumBy (fun item -> item.DimValues.[d])

yield sum / (count |> float)|]

{ Values = mValues;

dimensions = oldCentroid.dimensions}

We made a small modification – if we donâ€™t have any items assigned to a cluster the centroid wonâ€™t change. This is important for the robustness of the algorithm.

Now we can calculate the squared errors to a centroid:

let calcError centroid (items:’a list when ‘a :> IClusterable) =

let calcDiffToCentroid (item:’a) =

centroid.Values

|> Array.fold2

(fun acc c i -> acc + (c-i)*(c-i))

0. item.DimValues

items

|> List.sumBy calcDiffToCentroid

For storing cluster information we create a cluster type:

type Cluster<‘a> when ‘a :> IClusterable =

{Items: ‘a list;

Count: int;

Centroid: Centroid;

Error: float}

member x.Print =

printfn "(Items: %d, Centroid: %A, Error: %.2f)"

x.Count x.Centroid x.Error

static member CreateCluster dimensions (item:’a) =

let items = [item]

let empty =

{ Values = [||];

dimensions = dimensions}

let centroid = calcCentroid items empty

{ Items = items;

Count = 1;

Centroid = centroid;

Error = calcError centroid items}

member x.EvolveCluster (items:’a list) =

let l = items.Length

let centroid = calcCentroid items x.Centroid

{x with

Items = items;

Count = l;

Centroid = centroid;

Error = calcError centroid items}

Now we need a function to initialize the clusters and a function to assign a items to the nearest cluster:

open System

let rand = new Random()

let InitClusters (k:int) dimensions

(items:’a array when ‘a :> IClusterable) =

let length = items.Length

let getInitItem() = items.[rand.Next length]

Array.init k (fun _ ->

getInitItem()

|> Cluster<‘a>.CreateCluster dimensions)

let AssignItemsToClusters k dimensions (clusters:Cluster<‘a> array)

(items:’a seq when ‘a :> IClusterable) =

if k <= 0 then failwith "KMeans needs k > 0"

let findNearestCluster item =

let minDist,nearest,lastPos =

clusters

|> Array.fold

(fun (minDist,nearest,pos) cluster ->

let distance = calcDist item (cluster.Centroid)

if distance < minDist then

(distance,pos,pos+1)

else

(minDist,nearest,pos+1))

(Double.PositiveInfinity,0,0)

nearest

let assigned =

items

|> Seq.map (fun item -> item,findNearestCluster item)

let newClusters = Array.create k []

for item,nearest in assigned do

newClusters.[nearest] <- item::(newClusters.[nearest])

clusters

|> Array.mapi (fun i c -> c.EvolveCluster newClusters.[i])

The last step is a function which calculates the squared error over all clusters:

let calcClusterError (clusters:Cluster<‘a> array) =

clusters

|> Array.sumBy (fun cluster -> cluster.Error)

Now it is an easy task to write the kMeans algorithm:

let kMeansClustering K dimensions epsilon

(items:’a array when ‘a :> IClusterable) =

let k = if K <= 0 then 1 else K

let rec clustering lastError (clusters:Cluster<‘a> array) =

let newClusters =

AssignItemsToClusters k dimensions clusters items

let newError = calcClusterError newClusters

if abs(lastError – newError) > epsilon then

clustering newError newClusters

else

newClusters,newError

InitClusters k dimensions items

|> clustering Double.PositiveInfinity

We can test this algorithm with random 2-dimensional points:

type Point =

{X:float; Y: float}

member x.Print = printfn "(%.2f,%.2f)" x.X x.Y

interface IClusterable with

member x.DimValues = [| x.X; x.Y |]

member x.Dimensions = 2

let point x y = {X = x; Y = y}

let getRandomCoordinate() = rand.NextDouble() – 0.5 |> (*) 10.

let randPoint _ = point (getRandomCoordinate()) (getRandomCoordinate())

let points n = Array.init n randPoint

let items = points 1000

printfn "Items:"

items |> Seq.iter (fun i -> i.Print)

let clusters,error = kMeansClustering 3 2 0.0001 items

printfn "\n"

clusters |> Seq.iter (fun c -> c.Print)

printfn "Error: %A" error

Next time I will show how we can simplify the code by using F#’s built-in type vector instead of array.

Tags:

Centroid,

Clustering,

F#,

k-means,

KMeans
## Sunday, 11. January 2009

Jan 11

In the last 3 posts I show how to set up a Continuous Integration environment for F# or C# projects with Subversion (part I), TeamCity (part II) and NUnit (part III).

This time I want to show how we can set up an automated documentation build.

##### Installing and using GhostDoc

â€œGhostDoc is a free add-in for Visual Studio that automatically generates XML documentation comments for C#. Either by using existing documentation inherited from base classes or implemented interfaces, or by deducing comments from name and type of e.g. methods, properties or parameters.â€

[product website]

GhostDoc is one of my favorite Visual Studio plugins. It allows me to generate comments for nearly all my C# functions. Of course these generated comments arenâ€™t sufficient in every case â€“ but they are a very good start.

*Unfortunately GhostDoc doesnâ€™t work for F# 🙁 â€“ the actual version works for C# and the support for VB.Net is called “experimental”.*

Download and install http://www.roland-weigelt.de/ghostdoc/.

Now you should be able to generate XML-based comments directly in your C# code:

The next step is to activate the xml-documentation in your Visual Studio build settings:

Commiting these changes and adjusting the build artifacts will produce the input for the documentation build:

##### Using Sandcastle to generate a documentation

â€œSandcastle produces accurate, MSDN style, comprehensive documentation by reflecting over the source assemblies and optionally integrating XML Documentation Comments. Sandcastle has the following key features:

- Works with or without authored comments
- Supports Generics and .NET Framework 2.0
- Sandcastle has 2 main components (MrefBuilder and Build Assembler)
- MrefBuilder generates reflection xml file for Build Assembler
- Build Assembler includes syntax generation, transformation..etc
**Sandcastle is used internally to build .Net Framework documentation**â€

[Microsoft.com]

Download and install â€œSandcastle – Documentation Compiler for Managed Class Librariesâ€ from Mircosoftâ€™s downloadpage or http://www.codeplex.com/Sandcastle.

For .chm generation you also have to install the “HTML Help Workshop“. If you want fancy HTMLHelp 2.x style (like MSDN has) you need â€œInnovasys HelpStudio Liteâ€ which is part of the Visual Studio 2008 SDK.

â€œHelpStudio Lite is offered with the Visual Studio SDK as an installed component that integrates with Visual Studio. HelpStudio Lite provides a set of authoring tools you use to author and build Help content, create and manage Help projects, and compile Help files that can be integrated with the Visual Studio Help collection.â€

[MSDN]

Last but not least I recommend to install the Sandcastle Help File Builder (SHFB) â€“ this tool gives you a GUI and helps to automate the Sandcastle process.

â€œSandcastle, created by Microsoft, is a tool used for creating MSDN-style documentation from .NET assemblies and their associated XML comments files. The current version is the May 2008 release. It is command line based and has no GUI front-end, project management features, or an automated build process like those that you can find in NDoc. The **Sandcastle Help File Builder** was created to fill in the gaps, provide the missing NDoc-like features that are used most often, and provide graphical and command line based tools to build a help file in an automated fashion.â€

[product homepage]

After the installation process start SHFB to generate a documentation project:

Add the TestCITestLib.dll to your project and add nunit.framework.dll as a dependency. Now try to compile your help project â€“ if everything is fine the output should look something like this:

##### Setting up the documentation build

One of the main principles of Continuous Integration is â€œKeep the Build Fastâ€ â€“ so I am working with staged builds here. The documentation build should only be started if the first build was successful and all UnitTests are positive. *For most projects it is enough to generate the documentation daily or even weekly.*

First of all we have to create a simple MSBuild file which executes the SHFB project:

<Project ToolsVersion="3.5" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- 3rd Party Program Settings -->
<PropertyGroup>
<SandCastleHFBPath>c:\Program Files (x86)\EWSoftware\Sandcastle Help File Builder\</SandCastleHFBPath>
<SandCastleHFBCmd>$(SandCastleHFBPath)SandcastleBuilderConsole.exe</SandCastleHFBCmd>
<SandCastleHFBProject>HelpProject.shfb</SandCastleHFBProject>
</PropertyGroup>
<Target Name="BuildDocumentation">
<!-- Build source code docs -->
<Exec Command="%22$(SandCastleHFBCmd)%22 %22$(SandCastleHFBProject)%22" />
</Target>
</Project>

Add this build file and the SHFB project to your Visual Studio solution folder and commit these changes.

Now we can create a new TeamCity build configuration:

Take the same Version Control Settings like in the first build but use MSBuild as the build runner:

We want the documentation to be generated after a successful main build so we add a â€œdependency build triggerâ€:

Now we need the artifacts from the main build as the input for our documentation build:

Be sure you copy the artifacts to the right directory as given in your .shfb-project. Now run the DocumentationBuild â€“ if everything is fine the DocumentationBuild should give you the Documentation.chm as a new artifact:

Tags:

Continuous Integration,

F#,

GhostDoc,

JetBrains,

NDoc,

Sandcastle,

Sandcastle Help File Build,

subversion,

TeamCity,

Visual Studio 2008,

Visual Studio SDK
## Thursday, 8. January 2009

Jan 08

In the last two posts I showed how to set up a Subversion (part I: Setting up Source Control) and a TeamCity server (part II: Setting up a Continuous Integration Server).

This time I will show how we can integrate NUnit to run automated test at each build. *TeamCity supports all major Testing Frameworks (including MS Test) but I will concentrate on NUnit here.*

"NUnit is a unit-testing framework for all .Net languages. Initially ported from JUnit, the current production release, version 2.4, is the fifth major release of this xUnit based unit testing tool for Microsoft .NET. It is written entirely in C# and has been completely redesigned to take advantage of many .NET language features, for example custom attributes and other reflection related capabilities. NUnit brings xUnit to all .NET languages."

[product homepage]

##### Creating a TestProject

First of all download and install NUnit 2.4.8 (or higher) from http://www.nunit.org/.

Now we add a small function to our F# source code:

let rec factorial = function
| 0 -> 1
| n when n > 0 -> n * factorial (n-1)
| _ -> invalid_arg "Argument not valid"

This is the function we want to test. We add a new C# class library to our solution (e.g. â€œTestCITestLibâ€ 😉 ) and add a reference to *nunit.framework*. Inside this new TestLibrary we add a TestClass with the following code:

namespace TestCITestLib
{
using NUnit.Framework;
[TestFixture]
public class FactorialTest
{
[Test]
public void TestFactorial()
{
Assert.AreEqual(1, Program.factorial(0));
Assert.AreEqual(1, Program.factorial(1));
Assert.AreEqual(120, Program.factorial(5));
}
[Test]
public void TestFactorialException()
{
Program.factorial(-1);
}
}
}

To ensure the build runner is able to compile our solution we put the nunit.framework.dll near to our TestProject and commit our changes.

##### Configure TeamCity for UnitTesting

The next step is to tell TeamCity that the build runner should run our UnitTests:

If we now run the build we should get the following error:

Our second test function failed, because we didnâ€™t expect the *System.ArgumentException*. We can fix this issue by adding the corresponding attribute to the Testfunction:

[Test,
ExpectedException(typeof(System.ArgumentException))]
public void TestFactorialException()
{
Program.factorial(-1);
}0

##### Configure the build output

At this point we have a minimalistic Continuous Integration infrastructure. Every time someone performs a Commit on our repository a automated build will be started and the sources will be tested against the given UnitTests. Now we should concentrate on getting our build output â€“ the artifacts. The term artifact is usually used to refer to files or directories produced during a build. Examples of such artifacts are:

- Binaries (*.exe, *.dll)
- Software packages and installers (*.zip, *.msi)
- Documentation files (e.g. help files)
- Reports (test reports, coverage reports, â€¦)

At this time we are only interested in the binaries (this means CITestLib.dll). We can add the following artifact definition to our TeamCity project:

If we now rebuild our solution the build runner collects the configured artifacts and stores them with all build information:

Next time I will show how we can add more artifacts â€“ e.g. an automated documentation.

Tags:

Continuous Integration,

F#,

JetBrains,

MSTest,

nunit,

subversion,

TeamCity,

UnitTest,

UnitTesting,

Visual Studio 2008
Jan 08

In the last post I showed how easy it is to install Subversion and how it can be integrated into Visual Studio 2008 via AnkhSVN. This time we will set up a Continuous Integration server and configure a build runner.

As a Continuous Integration Server I recommend JetBrains TeamCity. You can download the free professional edition at http://www.jetbrains.com/teamcity/.

##### Installing TeamCity

During the installation process TeamCity wants to get a port number. Be sure that there will be no conflict with other web applications on your server. I chose port 8085 â€“ and my first build agent got this default settings:

In the next step you have to sign the License Agreement and to create an administrator account:

##### Creating a Project

Now you can create your project and set up the build configuration:

##### Setting up a build runner

For setting up specific build runners see the TeamCity documentation. For now I will use the â€œsln2008â€-Build runner (the Runner for Microsoft Visual Studio 2008 solution files).

Now add a build trigger. Whenever someone performs a Commit on the Subversion repository the server has to start a build.

##### Testing the build runner

After this step we have two options to start a build. The first one is by clicking the â€œRunâ€-button on the project website and the second is doing a checkin:

After performing the Commit the pending build appears on the project website:

After 60 seconds (see my configuration above) the build is starting. After the build is complete one can see the results in different ways. The simplest is via the project website:

Of cause TeamCity gives you a lot of different notification and monitoring possibilities including mail, RSS feeds or System Tray Notifications.

Next time I will show how we can integrate UnitTesting in our automated build scenario.

Tags:

Continuous Integration,

F#,

JetBrains,

subversion,

TeamCity,

Visual Studio 2008
Jan 08

In this post series I will show how one can easily set up a Continuous Integration scenario for F# or C# projects with completely free products.

â€œContinuous Integration is a software development practice where members of a team integrate their work frequently, usually each person integrates at least daily – leading to multiple integrations per day. Each integration is verified by an automated build (including test) to detect integration errors as quickly as possible.â€

[Martin Fowler]

The first step for Continuous Integration is to set up a Source Control environment. For many good reasons I choose Subversion – some of them are:

- Atomic commits
- Rename/Move/Copy actions preserve the revision history
- Directories are versioned
- Multiple repository access protocols including HTTP and HTTPS
- There is a nice Visual Studio integration (see below)
- Last but not least: it is completely free 🙂

##### Source code version control with Subversion

All you need for setting up a complete Subversion environment is to download and install VisualSVN Server from http://www.visualsvn.com/.

â€œVisualSVN Server is a package that contains everything you need to install, configure and manage **Subversion server** for your team on **Windows platform.** It includes Subversion, Apache and a management console.â€

[product homepage]

Now you can create user accounts and a repository â€CITestâ€ in the VisualSVN Server management console.

##### Subversion integration in Visual Studio 2008

Download and install AnkhSVN 2.0.x from http://ankhsvn.open.collab.net/.

â€œAnkhSVN is a Subversion SourceControl Provider for Visual Studio. The software allows you to perform the most common version control operations directly from inside the Microsoft Visual Studio IDE. With AnkhSVN you no longer need to leave your IDE to perform tasks like viewing the status of your source code, updating your Subversion working copy and committing changes. You can even browse your repository and you can plug-in your favorite diff tool.â€

[product homepage]

Now you can add your C#/F#-solution to your CITest-repository:

- Open the solution in Visual Studio 2008
- Open â€œView/Repository Explorerâ€ and add your repository to AnkhSVN
- You can copy the URL from the VisualSVN Server management console (In my case this is https://omega:8443/svn/CITest/)
- You also have to give AnkhSVN your Subversion login

- Now click the right mouse button on your solution in the Solution Explorer and choose â€œAdd Solution to Subversionâ€

Now we can modify our solution and commit our changes via â€œCommit solution changesâ€ in the Solution Explorer:

We can easily control our changes via AnkhSVNâ€™s â€œRepository Explorerâ€ and â€œHistory Viewerâ€ in Visual Studio 2008:

If we do any changes in the Program.fs file, we can see a diff via the â€œShow changesâ€ functionality:

If you donâ€™t like the default Diff tool you might try WinMerge.

Next time I will show how to set up a Continuous Integration server.

Tags:

AnkhSVN,

Continuous Integration,

F#,

Source Control,

subversion,

Visual Studio 2008,

VisualSVN,

WinMerge
## Monday, 8. December 2008

Dec 08

Claudio Cherubino (from fsharp.it/) posted his solution to Euler Project – problem 53. As I dealed a lot with Dynamic Programming in the recent time, I tried to solve the problem with a dynamic program in F#.

Project Euler – Problem 53:

How many values of C(*n*,*k*), for 1 â‰¤ *n* â‰¤ 100, exceed one-million?

Remark: C(n,k) are the binomial coefficients.

As it turned out, this is not that complicated if one knows the recursive function for the binomial coefficients (see Wikipedia):

with

This is easily transformed into a F# program:

let binomials = Array2.create 101 101 1I
let answer = ref 0
for n in [1..100] do
for k in [1..n - 1] do
binomials.[n, k] <- binomials.[n - 1,k] + binomials.[n - 1,k - 1]
if binomials.[n, k] > 1000000I then
answer := !answer + 1
!answer |> printf "Answer: %A"

Claudio’s program took 1315ms on my computer. The dynamic program needs only 63ms. But we can still do better if we use the symmetry of Pascal’s triangle.

This leads to an algorithm, which calculates only half of the binomial coefficients.

let binomials = Array2.create 101 101 1I
let answer = ref 0
for n in [1..100] do
for k in [1..n/2] do
let b = binomials.[n - 1,k] + binomials.[n - 1,k - 1]
binomials.[n, k] <- b
binomials.[n, n - k] <- b
if b > 1000000I then
if k = n-k then
answer := !answer + 1
else
answer := !answer + 2

!answer |> printf "Answer: %A"

This version needs only 45ms – but we are not ready. I mentioned Pascal’s triangle and its symmetry. But we can use another property. We don’t have to calculate the complete row, if we exceed 100000. All values behind this threshold have to be greater.

let binomials = Array2.create 101 101 1I
let answer = ref 0
for n in [1..100] do
let threshold_reached = ref false
let c = ref 0
for k in [1..n/2] do
if not !threshold_reached then
let b = binomials.[n - 1,k] + binomials.[n - 1,k - 1]
binomials.[n, k] <- b
binomials.[n, n - k] <- b
if b > 1000000I then
threshold_reached := true
else
c := !c + 1
if !threshold_reached then
answer := !answer + (n - 1) - (2 * !c)

!answer |> printf "Answer: %A"

This final version took only 29 ms.

In the next posting I will show a version using memoization.

Tags:

binomial coefficient,

dynamic program,

F#,

problem 53,

project euler
## Sunday, 7. December 2008

Dec 07

The “Subset Sum”-problem is given as the following:

SUBSET SUM

Input: Numbers a_{1}, a_{2}, . . . , a_{n}, S âˆˆ N.

Question: Is there a subset I âŠ† {1,…,n} with âˆ‘ a_{i} = S?

Finding a solution for this decision problem is a very easy task in F#.

let hasSubsetSum_Naive (numbers: int list) S =
let rec hasSubsetSum (a: int list) lastSum =
match a with
| [] -> false
| x::rest ->
if lastSum + x = S then
true
elif lastSum + x > S then
false
else
hasSubsetSum rest lastSum || hasSubsetSum rest (lastSum+x)
hasSubsetSum numbers 0
let numbers = [ 5;4;3;6;7;12 ]
let searchedSum = 33
hasSubsetSum_Naive numbers searchedSum

Of course this small program can be easily adjusted to give the subset I back. Unfortunately this naÃ¯ve approach leads to a running time of O(2^n).

But if S is small we can build a dynamic program and decide the problem in O(n*S) time.

This can be easily transformed into F# code.

let hasSubsetSum (numbers: int array) S =
if numbers |> Array.exists (fun x -> x = S) then
true
else
let a = numbers |> Array.filter (fun x -> x < S)
let n = a.Length
if n = 0 then
false
else
let v = Array2.create n (S+1) 0
let u = Array2.create n (S+1) false
let t = Array2.create n (S+1) 0
for j in [1..S] do
for i in [0..n-1] do
if j - a.[i] >= 0 && not u.[i,j - a.[i]] then
v.[i,j] <- t.[i,j - a.[i]] + a.[i]
if ((i = 0) || (i > 0 && t.[i-1,j] <> j)) && v.[i,j] = j then
u.[i,j] <- true
if v.[i,j] = j then
t.[i,j] <- j
else
if i > 0 then
t.[i,j] <- max t.[i-1,j] t.[i,j-1]
else
t.[i,j] <- t.[0,j-1]
**t.[n-1,S] = S**

Tags:

decision problem,

dynamic programming,

F#,

SubsetSum
## Friday, 5. December 2008

Dec 05

If we want to calculate the distance between two objects, we need some sort of distance function. I recently showed how we can compute the “edit distance” between two strings (see Damerau-Levenshtein-Distance in F# – part I, part II and part III). This time I will consider the distance between two points in the plane and on the surface of the earth.

First of all we need some sort of location type, which stores information about the coordinates (I used this type before). I will use the “Units of Measure”-feature in F# to distinct between <degree> vs. <radian> and <m> vs. <km>. If you are not familiar with this concept see Andrew Kennedy’s excellent blog series about Units of Measures in F#.

open Microsoft.FSharp.Math.SI
[<Measure>]
type degree
[<Measure>]
type radian =
static member per_degree = Math.PI/180.0<degree/radian>
[<Measure>]
type km =
static member per_meter = 1000.<m/km>

type Location =
{Latitude:float<degree>;
Longitude:float<degree> }
member x.YPos = x.Longitude
member x.XPos = x.Latitude
member x.Longitude_radian = x.Longitude * radian.per_degree
member x.Latitude_radian = x.Latitude * radian.per_degree
member x.Coordinates = (x.Longitude,x.Latitude)
static member CreateLocation(latitude, longitude) =
{new Location with
Latitude = latitude and
Longitude = longitude}

Now we can easily compute the Euclidean distance for two points in the plane.

let sq x = x * x
let CalcEuclideanDistance (s:Location) (t:Location) =
let d1 = (t.Longitude - s.Longitude) / 1.<degree>
let d2 = (t.Latitude - s.Latitude) / 1.<degree>
((sq d1 + sq d2) |> Math.Sqrt) * 1.<m>

If we want to approximate the distance of two cities on the surface of earth, we can use the “Great circle distance“.

“The great-circle distance is the shortest distance between any two points on the surface of a sphere measured along a path on the surface of the sphere (as opposed to going through the sphere’s interior)” – Wikipedia

let kmFactor = 6372.795<km>
let sin_rad x = Math.Sin(x / 1.<radian>)
let cos_rad x = Math.Cos(x / 1.<radian>)
/// Great Circle Distance
let CalcGreatCircleDistance (s:Location) (t:Location)=
if s = t then
0.<m>
else
let factor = kmFactor * km.per_meter
let b =
sin_rad s.Latitude_radian * sin_rad t.Latitude_radian +
cos_rad s.Latitude_radian * cos_rad t.Latitude_radian *
cos_rad (t.Longitude_radian - s.Longitude_radian)
Math.Acos(b) * factor

Tags:

distance on the surface of the earth,

Euclidean distance,

F#,

Great circle distance