FreePascal Information Logo Friend of FreePascal Compiler Title
Articles with Feedback, FPC News Library, PDF Collection, Mail Lists, Books, Newsgroups, IRC Open online discussion areas Research and Tutorials Tools, Compilers and Utilities Blurbs about us, advertising, etc.
Welcome to the FoFPC Research Notes: Carverphone

Data Searching using Carverphone

by: G.E. Ozz Nixon Jr.
Published: October 2007
©opyright 2009 by Friends of FPC



     Data Searching is a very complex art in itself. Ignoring the long history of Internet "search engines" growing from Archie in 1992 to today's Google engine. Original techniques to provide users with "full text" search required a solution designed for common mistakes, one of which is called Carverphone.

     Carverphone is not a well published phonetic algorithm, it was on a private web site for students to see and understand phonetic comparisons. If you get familiar with the logic, you will places that you can extend this code and make a perfect hit for our samples - and even our harder to match patterns. All results use a 10 byte key, useful for binary searching.

Code snippet from DXSock 6.0 WLM (Courtesy of Brain Patchwork DX, LLC)
function Carverphone(Const S: string): string; // another theory (ported from C#)
var
   Loop:Integer;
   MaxLoop:Integer;
   TmpStr:String;

Begin
   Result:='';
   If (S='') then Exit;
   TmpStr:=Lowercase(SpacesOnly(UpperCase(S)));
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV Loop, EAX;    // Put into Result
   End;
{$ELSE}
   Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop,1)='e' then Delete(TmpStr,Loop,1);
   if Loop=0 then exit;
   TmpStr:=StringReplace(TmpStr,'ugh','u2f',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'gn','2n',[rfReplaceAll]);
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV Loop, EAX;    // Put into Result
   End;
{$ELSE}
   Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop-1,2)='mb' then begin
{$IFDEF ASSEMBLY}
      asm // 1090108: By Ozz (FASTER THAN Length()!)
        MOV EAX, TmpStr;       // Store Str Address
        MOV EAX, [EAX-$04]; // Move to "Size" Int32
        MOV Loop, EAX;    // Put into Result
      End;
{$ELSE}
      Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop-1,2)='mb' then begin
{$IFDEF ASSEMBLY}
      asm // 1090108: By Ozz (FASTER THAN Length()!)
        MOV EAX, TmpStr;       // Store Str Address
        MOV EAX, [EAX-$04]; // Move to "Size" Int32
        MOV Loop, EAX;    // Put into Result
      End;
{$ELSE}
      Loop:=Length(TmpStr);
{$ENDIF}
      Delete(TmpStr,Loop-1,2);
      TmpStr:=TmpStr+'2';
   end;
   TmpStr:=StringReplace(TmpStr,'cq','2q',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'ci','si',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'ce','se',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'cy','sy',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'tch','2ch',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'c','k',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'q','k',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'x','k',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'v','f',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'dg','2g',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'tio','sio',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'tia','sia',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'d','t',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'ph','fh',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'b','p',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'sh','s2',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'z','s',[rfReplaceAll]);
   if TmpStr[1] in ['a','e','i','o','u'] then TmpStr[1]:='A';
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV MaxLoop, EAX;    // Put into Result
   End;
{$ELSE}
   MaxLoop:=Length(TmpStr);
{$ENDIF}
   if TmpStr[loop] in ['a','e','i','o','u'] then TmpStr[Loop]:='3';
   TmpStr:=StringReplace(TmpStr,'j','y',[rfReplaceAll]);
   If Copy(TmpStr,1,2)='y3' then TmpStr[1]:='Y';
   If Copy(TmpStr,1,1)='y' then TmpStr[1]:='A';
   TmpStr:=StringReplace(TmpStr,'y','3',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'3gh3','3kh3',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'gh','22',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'g','k',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'s','S',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'t','T',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'p','P',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'k','K',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'f','F',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'m','M',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'n','N',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'w3','W3',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'wh3','Wh3',[rfReplaceAll]);
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV Loop, EAX;    // Put into Result
   End;
{$ELSE}
   Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop,1)='w' then TmpStr[Loop]:='3';
   TmpStr:=StringReplace(TmpStr,'w','2',[rfReplaceAll]);
   If Copy(TmpStr,1,1)='h' then TmpStr[1]:='A';
   TmpStr:=StringReplace(TmpStr,'h','2',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'r3','R3',[rfReplaceAll]);
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV Loop, EAX;    // Put into Result
   End;
{$ELSE}
   Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop,1)='r' then TmpStr[Loop]:='3';
   TmpStr:=StringReplace(TmpStr,'r','2',[rfReplaceAll]);
   TmpStr:=StringReplace(TmpStr,'l3','L3',[rfReplaceAll]);
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV Loop, EAX;    // Put into Result
   End;
{$ELSE}
   Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop,1)='l' then TmpStr[Loop]:='3';
   TmpStr:=StringReplace(TmpStr,'l','2',[rfReplaceAll]);
{$IFDEF ASSEMBLY}
   asm // 1090108: By Ozz (FASTER THAN Length()!)
     MOV EAX, TmpStr;       // Store Str Address
     MOV EAX, [EAX-$04]; // Move to "Size" Int32
     MOV Loop, EAX;    // Put into Result
   End;
{$ELSE}
   Loop:=Length(TmpStr);
{$ENDIF}
   If Copy(TmpStr,Loop,1)='3' then TmpStr[Loop]:='A';
   TmpStr:=StringReplace(TmpStr,'3','',[rfReplaceAll]);
   Result:=Copy(TmpStr+'1111111111',1,10);
End;

     Using this algorithm, both "Robert" and "Rupert" return the same string "RP2T111111" while "Rubin" and "Robin" yields "RPN1111111". "Ashcroft" and "Ashcraft" yields "AS2KRFT111". However, "George" and "Jorge" yields "K2K1111111" and "Y2K1111111" and "Dave" and "David" yields "TF11111111" and "TFT1111111".

     This simple example shows an improvement for Ashcroft and Ashcraft over Soundex, however, George and Jorge and Dave and David need an improved theory. Read more on other comparison techniques I used in my search engines.

G.E. Ozz Nixon Jr.
 Links and Products we find useful



ButtonGenerator.com
Valid XHTML 1.0 Transitional Internet Map
Programmer's Heaven
grat-i-fi-ca-tion - noun
the state of being gratified; great satisfaction.


"If you have a testimonial you would like to share, feel free to email me directly at ozznixon (at) (gmail.com)"

Your Web Master
Friends-Of-FPC.org
Locations of visitors to this page world map hits counter
Copyright 2009 by 3F, LLC. All rights reserved. Worldwide.
Your request was processed by server #3 in 0.198992 secs.

sponsor
Click to visit our paid sponsor