Rash thoughts about .NET, C#, F# and Dynamics NAV.


"Every solution will only lead to new problems."

Category English posts

Monday, 9. February 2009


Finding the m smallest elements in a collection in F#

Filed under: English posts,F#,Informatik — Steffen Forkmann at 11:56 Uhr

Sometimes we need a function which finds the m smallest elements in a list or array.

We can use the idea of bubble sort to maintain an sorted array of the m smallest elements so far. This idea gives us a O(m*n) algorithm which solves our problem:

let bubble array element =
  let rec bubbleStep i =
    if i < (array |> Array.length) then
      match array.[i] with
        | None -> 
           if i = array.Length - 1 || array.[i+1] <> None then
             array.[i] <- Some element
                          
           bubbleStep (i+1)
        | Some x -> 
           if element < x then
             if i = 0 then 
               array.[i] <- Some element 
             else
               array.[i-1] <- Some x
               array.[i] <- Some element
             bubbleStep (i+1)
  
  bubbleStep 0
  array    
  
let mMin seq m =
  Seq.fold bubble (Array.create m None) seq

We can easily generalize this function to compute maximum and minimum:

let bubble f array element =
  let rec bubbleStep i =
    if i < (array |> Array.length) then
      match array.[i] with
        | None -> 
           if i = array.Length - 1 || array.[i+1] <> None then
             array.[i] <- Some element
                          
           bubbleStep (i+1)
        | Some x -> 
           if f element x then
             if i = 0 then 
               array.[i] <- Some element 
             else
               array.[i-1] <- Some x
               array.[i] <- Some element
             bubbleStep (i+1)
  
  bubbleStep 0
  array    
    
let mBubble f seq m =
  Seq.fold (bubble f) (Array.create m None) seq
  
let mMin seq m = mBubble (<) seq m
let mMax seq m = mBubble (>) seq m

If we don’t want to store Option-Values we can use this simplification:

let bubble f m array element =
  let rec bubbleStep i =
    if i < (array |> Array.length) then
      let x = array.[i]
      if f element x then
         if i = 0 then 
           array.[i] <- element 
         else
           array.[i-1] <- x
           array.[i] <- element
         bubbleStep (i+1)
  
  if Array.length array < m then
    let newArray = Array.append [|element|] array 
    newArray 
      |> Array.sort 
          (fun a b -> 
            if a = b then 0 
            elif f a b then 1 else -1)
    newArray
  else
    bubbleStep 0
    array    
    
let mBubble f seq m =    
  Seq.fold (bubble f m) Array.empty seq
  
let mMin seq m = mBubble (<) seq m
let mMax seq m = mBubble (>) seq m
Tags:

Saturday, 31. January 2009


n-dimensional k-means clustering with F# – part II

Filed under: BioInformatik,English posts,F#,Informatik — Steffen Forkmann at 11:28 Uhr

In the last post I show how we can calculate clusters of n-dimensional Objects with the K-means-Algorithm in F#. This time I will simplify the code by using the built-in type vector.

First of all we redefine the interface for “clusterable” objects:

type IClusterable =
  abstract DimValues: vector with get

This time we do not need any dimension information – the vector type will care about this. Now we can reduce our distance function to:

let calcDist (p1:vector) (p2:vector) = (p1 - p2).Norm

The function Norm calculates the 2-Norm of a vector.

2-Norm ==> Eulidean distance 

So calcDist will still compute the n-dimensional Euclidean distance.

The next step is the definition of a n-dimensional Centroid type. This time we will only use a type alias of vector:

type Centroid = vector

The calculation of the centroid and the centroid error can be written as:

let calcCentroid (items:'a list when 'a :> IClusterable)
     (oldCentroid:Centroid) =
  let count = items.Length
  if count = 0 then oldCentroid else  
  let sum =
    items
      |> List.fold_left 
        (fun vec item -> vec + item.DimValues) // vector addition
        (nullVector oldCentroid)
   
  sum * (1./(count |> float))  // multiply with scalar
     

let calcError centroid (items:'a list when 'a :> IClusterable) =
  let sq x = x * x

  items
    |> List.sum_by (fun i -> calcDist centroid i.DimValues |> sq)

The calcError function is straight forward but not perfect in terms of performance. As we use the calcDist function and hence the 2-Norm the function will first calculate a square root and then squares the result.

The rest of the code is pretty much same as in the last post:

/// creates n-dimensional 0 - vector
let nullVector (v:vector) = v * 0.
    
type Cluster<'a> when 'a :> IClusterable =
  {Items: 'a list;
   Count: int;
   Centroid: Centroid;
   Error: float}

  member x.Print =
     printfn "(Items: %d, Centroid: %A, Error: %.2f)"
       x.Count x.Centroid x.Error

  static member CreateCluster (item:'a) =
    let items = [item]
    let empty = nullVector item.DimValues

    let centroid = calcCentroid items empty
    {new Cluster<'a> with
      Items = items and
      Count = 1 and
      Centroid = centroid and
      Error = calcError centroid items}

  member x.EvolveCluster (items:'a list) =
    let l = items.Length
    let centroid = calcCentroid items x.Centroid
    {x with
      Items = items;
      Count = l;
      Centroid = centroid;
      Error = calcError centroid items} 

let rand = new Random()  

let InitClusters (k:int)
      (items:'a array when 'a :> IClusterable)  =
  let length = items.Length
  let getInitItem() = items.[rand.Next length]
  Array.init k (fun x ->
                  getInitItem()
                    |> Cluster<'a>.CreateCluster)
                    
                    
let AssignItemsToClusters k (clusters:Cluster<'a> array)
      (items:'a seq when 'a :> IClusterable) =
  if k <= 0 then failwith "KMeans needs k > 0"
  let findNearestCluster (item:'a) =
    let minDist,nearest,lastPos =
      clusters
        |> Array.fold_left
            (fun (minDist,nearest,pos) cluster ->
               let distance = calcDist item.DimValues (cluster.Centroid)
               if distance < minDist then
                 (distance,pos,pos+1)
               else
                 (minDist,nearest,pos+1))
            (Double.PositiveInfinity,0,0)
    nearest

  let assigned =
    items
      |> Seq.map (fun item -> item,findNearestCluster item)

  let newClusters = Array.create k []
  for item,nearest in assigned do
    newClusters.[nearest] <- item::(newClusters.[nearest])

  clusters
    |> Array.mapi (fun i c -> c.EvolveCluster newClusters.[i])  

    
let calcClusterError (clusters:Cluster<'a> array) =
  clusters
    |> Array.fold_left (fun acc cluster -> acc + cluster.Error) 0.  
                      
let kMeansClustering K epsilon
       (items:'a array when 'a :> IClusterable) =
  let k = if K <= 0 then 1 else K
  let rec clustering lastError (clusters:Cluster<'a> array) =
    let newClusters =
      AssignItemsToClusters k clusters items
    let newError = calcClusterError newClusters

    if abs(lastError - newError) > epsilon then
      clustering newError newClusters
    else
      newClusters,newError    

  InitClusters k items
    |> clustering Double.PositiveInfinity

This vector approach shortens the code for the kMeans algorithm, but has a little longer runtime. I am interested in any suggestions.

Tags: , , , ,

Thursday, 29. January 2009


n-dimensional k-means clustering with F#

Filed under: BioInformatik,English posts,F#,Informatik — Steffen Forkmann at 11:38 Uhr

The K-means-Algorithm is one of the simplest unsupervised learning algorithms that solve the well known clustering problem. In this article I will show how we can implement this in F#.

First of all we define an interface for “clusterable” objects:

type IClusterable =

  abstract DimValues: float array with get

  abstract Dimensions: int

The next step is to define a distance function. We will use n-dimensional Euclidean distance here.

Euclidean distance

let calcDist (p1:IClusterable) (p2:IClusterable) =

  let sq x = x * x

  if p1.Dimensions <> p2.Dimensions then

    failwith "Cluster dimensions aren’t equal."

 

  p2.DimValues

    |> Array.fold2

        (fun acc x y -> x – y |> sq |> (+) acc)

        0. p1.DimValues

   |> sqrt 

Now we define a n-dimensional Centroid type. Our kMeans-Algorithm will minimize the squared distances to k centroids (or “means”).

type Centroid =

  {Values: float array;

   dimensions: int}

  member x.Print = printfn "%A" x.Values

  interface IClusterable with

    member x.DimValues = x.Values   

    member x.Dimensions = x.dimensions   

The centroid of a finite set of n-dimensional points x1, x2, …, xn is calculated as

image

let calcCentroid (items:’a list when ‘a :> IClusterable)

     (oldCentroid:Centroid) =

  let count = items.Length

  if count = 0 then oldCentroid else

  let mValues =    

    [|for d in 0..oldCentroid.dimensions-1 do

        let sum =

          items

            |> List.sumBy (fun item -> item.DimValues.[d])

 

        yield sum / (count |> float)|]

 

  { Values = mValues;

    dimensions = oldCentroid.dimensions}

We made a small modification – if we don’t have any items assigned to a cluster the centroid won’t change. This is important for the robustness of the algorithm.

Now we can calculate the squared errors to a centroid:

let calcError centroid (items:’a list when ‘a :> IClusterable) =

  let calcDiffToCentroid (item:’a) =

    centroid.Values

      |> Array.fold2

          (fun acc c i -> acc + (c-i)*(c-i))

          0. item.DimValues

 

  items

    |> List.sumBy calcDiffToCentroid 

For storing cluster information we create a cluster type:

type Cluster<‘a> when ‘a :> IClusterable =

  {Items: ‘a list;

   Count: int;

   Centroid: Centroid;

   Error: float}

 

  member x.Print =

     printfn "(Items: %d, Centroid: %A, Error: %.2f)"

       x.Count x.Centroid x.Error

 

  static member CreateCluster dimensions (item:’a) =

    let items = [item]

    let empty =

      { Values = [||];

        dimensions = dimensions}

 

    let centroid = calcCentroid items empty

    { Items = items;

      Count = 1;

      Centroid = centroid;

      Error = calcError centroid items}

 

  member x.EvolveCluster (items:’a list) =

    let l = items.Length

    let centroid = calcCentroid items x.Centroid

    {x with

      Items = items;

      Count = l;

      Centroid = centroid;

      Error = calcError centroid items} 

Now we need a function to initialize the clusters and a function to assign a items to the nearest cluster:

open System

 

let rand = new Random() 

 

let InitClusters (k:int) dimensions

      (items:’a array when ‘a :> IClusterable)  =

  let length = items.Length

  let getInitItem() = items.[rand.Next length]

  Array.init k (fun _ ->

                  getInitItem()

                    |> Cluster<‘a>.CreateCluster dimensions) 

 

let AssignItemsToClusters k dimensions (clusters:Cluster<‘a> array)

      (items:’a seq when ‘a :> IClusterable) =

  if k <= 0 then failwith "KMeans needs k > 0"  

  let findNearestCluster item =

    let minDist,nearest,lastPos =

      clusters

        |> Array.fold

            (fun (minDist,nearest,pos) cluster ->

               let distance = calcDist item (cluster.Centroid)

               if distance < minDist then

                 (distance,pos,pos+1)

               else

                 (minDist,nearest,pos+1))

            (Double.PositiveInfinity,0,0) 

    nearest

 

  let assigned =

    items

      |> Seq.map (fun item -> item,findNearestCluster item)

 

  let newClusters = Array.create k []  

  for item,nearest in assigned do        

    newClusters.[nearest] <- item::(newClusters.[nearest])

 

  clusters

    |> Array.mapi (fun i c -> c.EvolveCluster newClusters.[i])   

The last step is a function which calculates the squared error over all clusters:

let calcClusterError (clusters:Cluster<‘a> array) =

  clusters

    |> Array.sumBy (fun cluster -> cluster.Error)

Now it is an easy task to write the kMeans algorithm:

let kMeansClustering K dimensions epsilon

       (items:’a array when ‘a :> IClusterable) =

  let k = if K <= 0 then 1 else K

  let rec clustering lastError (clusters:Cluster<‘a> array) =

    let newClusters =

      AssignItemsToClusters k dimensions clusters items

    let newError = calcClusterError newClusters

 

    if abs(lastError – newError) > epsilon then       

      clustering newError newClusters

    else

      newClusters,newError   

 

  InitClusters k dimensions items

    |> clustering Double.PositiveInfinity 

We can test this algorithm with random 2-dimensional points:

type Point =

  {X:float; Y: float} 

  member x.Print = printfn "(%.2f,%.2f)" x.X x.Y

  interface IClusterable with

    member x.DimValues = [| x.X; x.Y |]

    member x.Dimensions = 2  

 

let point x y = {X = x; Y = y}

let getRandomCoordinate() = rand.NextDouble() – 0.5 |> (*) 10. 

let randPoint _ = point (getRandomCoordinate()) (getRandomCoordinate())    

let points n = Array.init n randPoint

let items = points 1000

 

printfn "Items:"

items |> Seq.iter (fun i -> i.Print)

 

let clusters,error = kMeansClustering 3 2 0.0001 items

printfn "\n"

clusters |> Seq.iter (fun c -> c.Print)   

printfn "Error: %A" error   

Next time I will show how we can simplify the code by using F#’s built-in type vector instead of array.

Tags: , , , ,

Sunday, 11. January 2009


How I do Continuous Integration with my C# / F# projects – part IV: Adding a documentation

Filed under: C#,English posts,F#,Tools,Visual Studio — Steffen Forkmann at 12:46 Uhr

In the last 3 posts I show how to set up a Continuous Integration environment for F# or C# projects with Subversion (part I), TeamCity (part II) and NUnit (part III).

This time I want to show how we can set up an automated documentation build.

Installing and using GhostDoc

“GhostDoc is a free add-in for Visual Studio that automatically generates XML documentation comments for C#. Either by using existing documentation inherited from base classes or implemented interfaces, or by deducing comments from name and type of e.g. methods, properties or parameters.”

[product website]

GhostDoc is one of my favorite Visual Studio plugins. It allows me to generate comments for nearly all my C# functions. Of course these generated comments aren’t sufficient in every case – but they are a very good start.

Unfortunately GhostDoc doesn’t work for F# :-( – the actual version works for C# and the support for VB.Net is called “experimental”.

Download and install http://www.roland-weigelt.de/ghostdoc/.

Now you should be able to generate XML-based comments directly in your C# code:

Using GhostDoc

Generated XML-comment

The next step is to activate the xml-documentation in your Visual Studio build settings:

image

Commiting these changes and adjusting the build artifacts will produce the input for the documentation build:

Adjust the build artifacts

Build artifacts

Using Sandcastle to generate a documentation

“Sandcastle produces accurate, MSDN style, comprehensive documentation by reflecting over the source assemblies and optionally integrating XML Documentation Comments. Sandcastle has the following key features:

  • Works with or without authored comments
  • Supports Generics and .NET Framework 2.0
  • Sandcastle has 2 main components (MrefBuilder and Build Assembler)
  • MrefBuilder generates reflection xml file for Build Assembler
  • Build Assembler includes syntax generation, transformation..etc
  • Sandcastle is used internally to build .Net Framework documentation

[Microsoft.com]

Download and install “Sandcastle – Documentation Compiler for Managed Class Libraries” from Mircosoft’s downloadpage or http://www.codeplex.com/Sandcastle.

For .chm generation you also have to install the “HTML Help Workshop“. If you want fancy HTMLHelp 2.x style (like MSDN has) you need “Innovasys HelpStudio Lite” which is part of the Visual Studio 2008 SDK.

“HelpStudio Lite is offered with the Visual Studio SDK as an installed component that integrates with Visual Studio. HelpStudio Lite provides a set of authoring tools you use to author and build Help content, create and manage Help projects, and compile Help files that can be integrated with the Visual Studio Help collection.”

[MSDN]

Last but not least I recommend to install the Sandcastle Help File Builder (SHFB) – this tool gives you a GUI and helps to automate the Sandcastle process.

“Sandcastle, created by Microsoft, is a tool used for creating MSDN-style documentation from .NET assemblies and their associated XML comments files. The current version is the May 2008 release. It is command line based and has no GUI front-end, project management features, or an automated build process like those that you can find in NDoc. The Sandcastle Help File Builder was created to fill in the gaps, provide the missing NDoc-like features that are used most often, and provide graphical and command line based tools to build a help file in an automated fashion.”

[product homepage]

After the installation process start SHFB to generate a documentation project:

Sandcastle Help File Builder

Add the TestCITestLib.dll to your project and add nunit.framework.dll as a dependency. Now try to compile your help project – if everything is fine the output should look something like this:

Generated Help

Setting up the documentation build

One of the main principles of Continuous Integration is “Keep the Build Fast” – so I am working with staged builds here. The documentation build should only be started if the first build was successful and all UnitTests are positive. For most projects it is enough to generate the documentation daily or even weekly.

First of all we have to create a simple MSBuild file which executes the SHFB project:

<Project ToolsVersion="3.5" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <!-- 3rd Party Program Settings -->
  <PropertyGroup>
    <SandCastleHFBPath>c:\Program Files (x86)\EWSoftware\Sandcastle Help File Builder\</SandCastleHFBPath>
    <SandCastleHFBCmd>$(SandCastleHFBPath)SandcastleBuilderConsole.exe</SandCastleHFBCmd>
    <SandCastleHFBProject>HelpProject.shfb</SandCastleHFBProject>
  </PropertyGroup>

  <Target Name="BuildDocumentation">
    <!-- Build source code docs -->
    <Exec Command="%22$(SandCastleHFBCmd)%22 %22$(SandCastleHFBProject)%22" />
  </Target>
</Project>

Add this build file and the SHFB project to your Visual Studio solution folder and commit these changes.

Put the build settings into the solution folder

Now we can create a new TeamCity build configuration:

Create a new build configuration

Take the same Version Control Settings like in the first build but use MSBuild as the build runner:

Take the MSBuild runner

We want the documentation to be generated after a successful main build so we add a “dependency build trigger”:

image

Now we need the artifacts from the main build as the input for our documentation build:

Set up artifacts dependencies

Be sure you copy the artifacts to the right directory as given in your .shfb-project. Now run the DocumentationBuild – if everything is fine the DocumentationBuild should give you the Documentation.chm as a new artifact:

image

Tags: , , , , , , , , , ,

Thursday, 8. January 2009


How I do Continuous Integration with my C# / F# projects – part III: Running automated UnitTests

Filed under: C#,English posts,F#,Tools,Visual Studio — Steffen Forkmann at 19:13 Uhr

In the last two posts I showed how to set up a Subversion (part I: Setting up Source Control) and a TeamCity server (part II: Setting up a Continuous Integration Server).

This time I will show how we can integrate NUnit to run automated test at each build. TeamCity supports all major Testing Frameworks (including MS Test) but I will concentrate on NUnit here.

"NUnit is a unit-testing framework for all .Net languages. Initially ported from JUnit, the current production release, version 2.4, is the fifth major release of this xUnit based unit testing tool for Microsoft .NET. It is written entirely in C# and has been completely redesigned to take advantage of many .NET language features, for example custom attributes and other reflection related capabilities. NUnit brings xUnit to all .NET languages."

[product homepage]

Creating a TestProject

First of all download and install NUnit 2.4.8 (or higher) from http://www.nunit.org/.

Now we add a small function to our F# source code:

let rec factorial = function  
  | 0 -> 1
  | n when n > 0 -> n * factorial (n-1)
  | _ -> invalid_arg "Argument not valid"

This is the function we want to test. We add a new C# class library to our solution (e.g. “TestCITestLib” ;-) ) and add a reference to nunit.framework. Inside this new TestLibrary we add a TestClass with the following code:

namespace TestCITestLib
{
    using NUnit.Framework;

    [TestFixture]
    public class FactorialTest
    {
        [Test]
        public void TestFactorial()
        {
            Assert.AreEqual(1, Program.factorial(0));
            Assert.AreEqual(1, Program.factorial(1));
            Assert.AreEqual(120, Program.factorial(5));
        }

        [Test]
        public void TestFactorialException()
        {
            Program.factorial(-1);
        }
    }
}

To ensure the build runner is able to compile our solution we put the nunit.framework.dll near to our TestProject and commit our changes.

Adding Nunit.framework.dll

Configure TeamCity for UnitTesting

The next step is to tell TeamCity that the build runner should run our UnitTests:

Configure build runner for NUnit

If we now run the build we should get the following error:

UnitTest error during automated build

Our second test function failed, because we didn’t expect the System.ArgumentException. We can fix this issue by adding the corresponding attribute to the Testfunction:

[Test,
 ExpectedException(typeof(System.ArgumentException))]
public void TestFactorialException()
{
    Program.factorial(-1);
}0

Tests passed

Configure the build output

At this point we have a minimalistic Continuous Integration infrastructure. Every time someone performs a Commit on our repository a automated build will be started and the sources will be tested against the given UnitTests. Now we should concentrate on getting our build output – the artifacts. The term artifact is usually used to refer to files or directories produced during a build. Examples of such artifacts are:

  • Binaries (*.exe, *.dll)
  • Software packages and installers (*.zip, *.msi)
  • Documentation files (e.g. help files)
  • Reports (test reports, coverage reports, …)

At this time we are only interested in the binaries (this means CITestLib.dll). We can add the following artifact definition to our TeamCity project:

Configure artifacts in TeamCity

If we now rebuild our solution the build runner collects the configured artifacts and stores them with all build information:

Collected artifacts

Next time I will show how we can add more artifacts – e.g. an automated documentation.

Tags: , , , , , , , , ,

How I do Continuous Integration with my C# / F# projects – part II: Setting up a Continuous Integration Server

Filed under: C#,English posts,F#,Tools,Visual Studio — Steffen Forkmann at 14:50 Uhr

In the last post I showed how easy it is to install Subversion and how it can be integrated into Visual Studio 2008 via AnkhSVN. This time we will set up a Continuous Integration server and configure a build runner.

As a Continuous Integration Server I recommend JetBrains TeamCity. You can download the free professional edition at http://www.jetbrains.com/teamcity/.

Installing TeamCity

During the installation process TeamCity wants to get a port number. Be sure that there will be no conflict with other web applications on your server. I chose port 8085 – and my first build agent got this default settings:

Configure a Build Agent

In the next step you have to sign the License Agreement and to create an administrator account:

Set up administrator account

Creating a Project

Now you can create your project and set up the build configuration:

Create project on your TeamCity Server

Create CITest project

Create Build Configuration

Set up version control root

Setting up a build runner

For setting up specific build runners see the TeamCity documentation. For now I will use the “sln2008”-Build runner (the Runner for Microsoft Visual Studio 2008 solution files).

Runner for Microsoft Visual Studio 2008 solution files

Now add a build trigger. Whenever someone performs a Commit on the Subversion repository the server has to start a build.

Build trigger

Testing the build runner

After this step we have two options to start a build. The first one is by clicking the “Run”-button on the project website and the second is doing a checkin:

Triggering the build running with a checkin

After performing the Commit the pending build appears on the project website:

Pending build 

After 60 seconds (see my configuration above) the build is starting. After the build is complete one can see the results in different ways. The simplest is via the project website:

View build results

Of cause TeamCity gives you a lot of different notification and monitoring possibilities including mail, RSS feeds or System Tray Notifications.

Next time I will show how we can integrate UnitTesting in our automated build scenario.

Tags: , , , , ,

How I do Continuous Integration with my C# / F# projects – part I: Setting up Source Control

Filed under: C#,English posts,F#,Tools,Visual Studio — Steffen Forkmann at 14:31 Uhr

In this post series I will show how one can easily set up a Continuous Integration scenario for F# or C# projects with completely free products.

“Continuous Integration is a software development practice where members of a team integrate their work frequently, usually each person integrates at least daily – leading to multiple integrations per day. Each integration is verified by an automated build (including test) to detect integration errors as quickly as possible.”

[Martin Fowler]

The first step for Continuous Integration is to set up a Source Control environment. For many good reasons I choose Subversion – some of them are:

  • Atomic commits
  • Rename/Move/Copy actions preserve the revision history
  • Directories are versioned
  • Multiple repository access protocols including HTTP and HTTPS
  • There is a nice Visual Studio integration (see below)
  • Last but not least: it is completely free :-)
Source code version control with Subversion

All you need for setting up a complete Subversion environment is to download and install VisualSVN Server from http://www.visualsvn.com/.

“VisualSVN Server is a package that contains everything you need to install, configure and manage Subversion server for your team on Windows platform. It includes Subversion, Apache and a management console.”

[product homepage]

Now you can create user accounts and a repository ”CITest” in the VisualSVN Server management console.

Create a repository

VisualSVN Server management console

Subversion integration in Visual Studio 2008

Download and install AnkhSVN 2.0.x from http://ankhsvn.open.collab.net/.

“AnkhSVN is a Subversion SourceControl Provider for Visual Studio. The software allows you to perform the most common version control operations directly from inside the Microsoft Visual Studio IDE. With AnkhSVN you no longer need to leave your IDE to perform tasks like viewing the status of your source code, updating your Subversion working copy and committing changes. You can even browse your repository and you can plug-in your favorite diff tool.”

[product homepage]

Now you can add your C#/F#-solution to your CITest-repository:

  • Open the solution in Visual Studio 2008
  • Open “View/Repository Explorer” and add your repository to AnkhSVN
    • You can copy the URL from the VisualSVN Server management console (In my case this is https://omega:8443/svn/CITest/)
    • You also have to give AnkhSVN your Subversion login
  • Now click the right mouse button on your solution in the Solution Explorer and choose “Add Solution to Subversion”

Add Solution to Subversion

Don't forget to specify a Log Message

Now we can modify our solution and commit our changes via “Commit solution changes” in the Solution Explorer:

Commit Solution Changes

We can easily control our changes via AnkhSVN’s “Repository Explorer” and “History Viewer” in Visual Studio 2008:

Repository Explorer and History Viewer

If we do any changes in the Program.fs file, we can see a diff via the “Show changes” functionality:

Diff with AnkhSVN

If you don’t like the default Diff tool you might try WinMerge.

Next time I will show how to set up a Continuous Integration server.

Tags: , , , , , , ,

Monday, 8. December 2008


Project Euler in F# – Problem 53 – Dynamic Programming

Filed under: English posts,F#,Theoretische — Steffen Forkmann at 12:23 Uhr

Claudio Cherubino (from fsharp.it/) posted his solution to Euler Project – problem 53. As I dealed a lot with Dynamic Programming in the recent time, I tried to solve the problem with a dynamic program in F#.

Project Euler – Problem 53:

How many values of C(n,k), for 1 ≤ n ≤ 100, exceed one-million?

Remark: C(n,k) are the binomial coefficients.

As it turned out, this is not that complicated if one knows the recursive function for the binomial coefficients (see Wikipedia):

Recursive definition for the Binomial coefficient with Recursive definition for the Binomial coefficient

This is easily transformed into a F# program:

let binomials = Array2.create 101 101 1I
let answer = ref 0
for n in [1..100] do
  for k in [1..n - 1] do
    binomials.[n, k] <- binomials.[n - 1,k] + binomials.[n - 1,k - 1]
    if binomials.[n, k] > 1000000I then
      answer := !answer + 1

!answer |> printf "Answer: %A"

Claudio’s program took 1315ms on my computer. The dynamic program needs only 63ms. But we can still do better if we use the symmetry of Pascal’s triangle.

Symmetry of Pascal's triangle

This leads to an algorithm, which calculates only half of the binomial coefficients.

let binomials = Array2.create 101 101 1I
let answer = ref 0
for n in [1..100] do
  for k in [1..n/2] do
    let b = binomials.[n - 1,k] + binomials.[n - 1,k - 1]
    binomials.[n, k] <- b
    binomials.[n, n - k] <- b
    if b > 1000000I then
      if k = n-k then
        answer := !answer + 1
      else
        answer := !answer + 2
!answer |> printf "Answer: %A"

This version needs only 45ms – but we are not ready. I mentioned Pascal’s triangle and its symmetry. But we can use another property. We don’t have to calculate the complete row, if we exceed 100000. All values behind this threshold have to be greater.

let binomials = Array2.create 101 101 1I
let answer = ref 0
for n in [1..100] do
  let threshold_reached = ref false
  let c = ref 0
  for k in [1..n/2] do
    if not !threshold_reached then
      let b = binomials.[n - 1,k] + binomials.[n - 1,k - 1]
      binomials.[n, k] <- b
      binomials.[n, n - k] <- b
      if b > 1000000I then
        threshold_reached := true
      else
        c := !c + 1

  if !threshold_reached then
    answer := !answer + (n - 1) - (2 * !c)
!answer |> printf "Answer: %A"

This final version took only 29 ms.

In the next posting I will show a version using memoization.

Tags: , , , ,

Sunday, 7. December 2008


SubsetSum in O(nS)

Filed under: English posts,F#,Informatik — Steffen Forkmann at 13:17 Uhr

The “Subset Sum”-problem is given as the following:

SUBSET SUM
Input: Numbers a1, a2, . . . , an, S ∈ N.
Question: Is there a subset I ⊆ {1,…,n} with ∑ ai = S?

Finding a solution for this decision problem is a very easy task in F#.

let hasSubsetSum_Naive (numbers: int list) S =
  let rec hasSubsetSum (a: int list) lastSum =
    match a with 
      | [] -> false
      | x::rest -> 
        if lastSum + x = S then
          true
        elif lastSum + x > S then
          false
        else
          hasSubsetSum rest lastSum || hasSubsetSum rest (lastSum+x)
  
  hasSubsetSum numbers 0

let numbers = [ 5;4;3;6;7;12 ]
let searchedSum = 33  
hasSubsetSum_Naive numbers searchedSum

Of course this small program can be easily adjusted to give the subset I back. Unfortunately this naïve approach leads to a running time of O(2^n).

But if S is small we can build a dynamic program and decide the problem in O(n*S) time.

 

This can be easily transformed into F# code.

let hasSubsetSum (numbers: int array) S =
   if numbers |> Array.exists (fun x -> x = S) then
     true
   else
     let a = numbers |> Array.filter (fun x -> x < S)      
     let n = a.Length
     if n = 0 then
       false
     else
       let v = Array2.create n (S+1) 0
       let u = Array2.create n (S+1) false
       let t = Array2.create n (S+1) 0
       
       for j in [1..S] do
         for i in [0..n-1] do                           
           if j - a.[i] >= 0 && not u.[i,j - a.[i]] then
             v.[i,j] <- t.[i,j - a.[i]] + a.[i]
           if ((i = 0) || (i > 0 && t.[i-1,j] <> j)) && v.[i,j] = j then
             u.[i,j] <- true
           if v.[i,j] = j then
             t.[i,j] <- j
           else
             if i > 0 then
               t.[i,j] <- max t.[i-1,j] t.[i,j-1]
             else
               t.[i,j] <- t.[0,j-1]
               
       t.[n-1,S] = S
Tags: , , ,

Friday, 5. December 2008


The distance of two cities on the surface of the earth in F#

Filed under: English posts,F#,Informatik — Steffen Forkmann at 12:19 Uhr

If we want to calculate the distance between two objects, we need some sort of distance function. I recently showed how we can compute the “edit distance” between two strings (see Damerau-Levenshtein-Distance in F# – part I, part II and part III). This time I will consider the distance between two points in the plane and on the surface of the earth.

First of all we need some sort of location type, which stores information about the coordinates (I used this type before). I will use the “Units of Measure”-feature in F# to distinct between <degree> vs. <radian> and <m> vs. <km>. If you are not familiar with this concept see Andrew Kennedy’s excellent blog series about Units of Measures in F#.

open Microsoft.FSharp.Math.SI
  
[<Measure>]
type degree

[<Measure>]
type radian =
  static member per_degree = Math.PI/180.0<degree/radian>
  
[<Measure>]
type km =  
  static member per_meter = 1000.<m/km>
type Location =
  {Latitude:float<degree>; 
   Longitude:float<degree> }
    
  member x.YPos = x.Longitude
  member x.XPos = x.Latitude        
  member x.Longitude_radian = x.Longitude * radian.per_degree  
  member x.Latitude_radian = x.Latitude * radian.per_degree   
  member x.Coordinates = (x.Longitude,x.Latitude)  
  
  static member CreateLocation(latitude, longitude) = 
    {new Location with
      Latitude = latitude and
      Longitude = longitude}

Now we can easily compute the Euclidean distance for two points in the plane.

let sq x = x * x
let CalcEuclideanDistance (s:Location) (t:Location) =

let d1 = (t.Longitude - s.Longitude) / 1.<degree> let d2 = (t.Latitude - s.Latitude) / 1.<degree> ((sq d1 + sq d2) |> Math.Sqrt) * 1.<m>

If we want to approximate the distance of two cities on the surface of earth, we can use the “Great circle distance“.

“The great-circle distance is the shortest distance between any two points on the surface of a sphere measured along a path on the surface of the sphere (as opposed to going through the sphere’s interior)” – Wikipedia

let kmFactor = 6372.795<km>
let sin_rad x = Math.Sin(x / 1.<radian>)
let cos_rad x = Math.Cos(x / 1.<radian>)

/// Great Circle Distance 
let CalcGreatCircleDistance (s:Location) (t:Location)=
  if s = t then
    0.<m>
  else
    let factor = kmFactor * km.per_meter
    let b =
         sin_rad s.Latitude_radian * sin_rad t.Latitude_radian +
         cos_rad s.Latitude_radian * cos_rad t.Latitude_radian *
           cos_rad (t.Longitude_radian - s.Longitude_radian) 
    Math.Acos(b) * factor
Tags: , , ,