Rash thoughts about .NET, C#, F# and Dynamics NAV.


"Every solution will only lead to new problems."

Saturday, 1. November 2008


Damerau-Levenshtein-Distance in F# – part III – O(m+n) space and functional style

Filed under: BioInformatik,F#,Informatik,Mathematik — Steffen Forkmann at 16:31 Uhr

In the first part of this series I showed a naïve algorithm for the Damerau-Levenshtein distance which needs O(m*n) space. In the last post I improved the algorithm to use only O(m+n) space. This time I will show a more functional implementation which uses only immutable F#-Lists and works still in O(m+n) space. This version doesn’t need any mutable data.

/// Calcs the damerau levenshtein distance.    
let calcDL (a:'a array) (b: 'a array) =       
  let n = a.Length + 1
  let m = b.Length + 1
  
  let processCell i j act l1 l2 ll1 =
    let cost = 
      if a.[i-1] = b.[j-1] then 0 else 1
    let deletion = l2 + 1
    let insertion = act + 1
    let substitution = l1 + cost
    let min1 =  
      deletion 
      |> min insertion 
      |> min substitution

    if i > 1 && j > 1 &&
      a.[i-1] = b.[j-2] && a.[i-2] = b.[j-1] then
        min min1 <| ll1 + cost
    else
      min1
  
  let processLine i lastL lastLastL =
    let processNext (actL,lastL,lastLastL) j =
      match actL with 
        | act::actRest -> 
          match lastL with
            | l1::l2::lastRest -> 
              if i > 1 && j > 1 then
                match lastLastL with
                  | ll1::lastLastRest -> 
                    (processCell i j act l1 l2 ll1 :: actL,
                     l2::lastRest,
                     lastLastRest)
                  | _ -> failwith "can't be"
              else
                (processCell i j act l1 l2 0 :: actL,
                 l2::lastRest,
                 lastLastL)                 
            | _ -> failwith "can't be"
        | [] -> failwith "can't be"
      
    let (act,last,lastLast) =
      [1..b.Length]
        |> List.fold_left processNext ([i],lastL,lastLastL)
    act |> List.rev
    
  let (lastLine,lastLastLine) =               
    [1..a.Length]
      |> List.fold_left
          (fun (lastL,lastLastL) i -> 
             (processLine i lastL lastLastL,lastL))
          ([0..m-1],[])
              
  lastLine.[b.Length]    
 
let damerauLevenshtein(a:'a array) (b:'a array) =
  if a.Length > b.Length then
    calcDL a b
  else
    calcDL b a

I admit the code is still a little messy but it works fine. Maybe I will find the time to cleanup a bit and post a final version.

Tags: , , , , , , , ,

1 Comment »

  1. Thank you for posting, this is excellent. I would like to use it in my senior project, but there are no licensing terms posted. What are your terms of use?

    3-clause permissive? share-alike? public domain?

    Thanks again

    Comment by Hans — Thursday, 9. June 2011 um 19:40 Uhr

RSS feed for comments on this post. | TrackBack URI

Leave a comment

XHTML ( You can use these tags): <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong> .