Hi all
I am in need of a soundex algorithm supporting arabic language.All what I found is a php class,but I have no experience with php to translate the class into vfp>Any help will be appreciated>Following is the php class:
<?php
// ----------------------------------------------------------------------
// Copyright (C) 2006 by Khaled Al-Shamaa.
// // ----------------------------------------------------------------------
// LICENSE
// This program is open source product; you can redistribute it and/or
// modify it under the terms of the GNU General Public License (GPL)
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// To read the license please visit // ----------------------------------------------------------------------
// Class Name: Arabic Soundex
// Filename: ASoundex.class.php
// Original Author(s): Khaled Al-Sham'aa <khaled.alshamaa@gmail.com>
// Purpose: Arabic soundex algorithm takes Arabic word as an input
// and produces a character string which identifies a set words
// that are (roughly) phonetically alike.
// ----------------------------------------------------------------------
class ASoundex {
var $asoundexCode = array('/ا|و|ي|ع|ح|ه/',
'/ب|ف/',
'/خ|ج|ز|س|ص|ظ|ق|ك|غ|ش/',
'/ت|ث|د|ذ|ض|ط|ة/',
'/ل/',
'/م|ن/',
'/ر/'
);
var $aphonixCode = array('/ا|و|ي|ع|ح|ه/',
'/ب/',
'/خ|ج|ص|ظ|ق|ك|غ|ش/',
'/ت|ث|د|ذ|ض|ط|ة/',
'/ل/',
'/م|ن/',
'/ر/',
'/ف/',
'/ز|س/'
);
var $transliteration = array('ا' => 'A',
'ب' => 'B',
'ت' => 'T',
'ث' => 'T',
'ج' => 'J',
'ح' => 'H',
'خ' => 'K',
'د' => 'D',
'ذ' => 'Z',
'ر' => 'R',
'ز' => 'Z',
'س' => 'S',
'ش' => 'S',
'ص' => 'S',
'ض' => 'D',
'ط' => 'T',
'ظ' => 'Z',
'ع' => 'A',
'غ' => 'G',
'ف' => 'F',
'ق' => 'Q',
'ك' => 'K',
'ل' => 'L',
'م' => 'M',
'ن' => 'N',
'ه' => 'H',
'و' => 'W',
'ي' => 'Y'
);
var $len;
var $lang;
var $code;
function ASoundex($len=4, $lang='en', $code='soundex'){
$this->len = $len;
$this->lang = $lang;
$this->code = $code;
}
/**
* @return String : the calculated soundex/phonix numeric code
* @param String : the word that we want to encode it
* [soundex|phonix] : define mapping code to be used in this converting
* @desc mapCode : methode to create soundex/phonix numric code for a given word
* @author Khaled Al-Shamaa
*/
function mapCode($word){
$encodedWord = $word;
if($this->code == 'phonix'){ $map = $this->aphonixCode; }else{ $map = $this->asoundexCode; }
foreach($map as $code=>$condition){
$encodedWord = preg_replace($condition, $code, $encodedWord);
}
$encodedWord = preg_replace('/\D/', '0', $encodedWord);
return $encodedWord;
}
function trimRep($word){
$chars = preg_split('//',$word);
foreach($chars as $char){
if($char != $lastChar){ $cleanWord .= $char; }
$lastChar = $char;
}
return $cleanWord;
}
function soundex($word){
list($dump, $soundex, $rest) = preg_split('//',$word,3);
if($this->lang == 'en'){ $soundex = $this->transliteration[$soundex]; }
$encodedRest = $this->mapCode($rest);
$cleanEncodedRest = $this->trimRep($encodedRest);
$soundex .= $cleanEncodedRest;
$soundex = preg_replace('/0/', '', $soundex);
$totalLen = strlen($soundex);
if($totalLen > $this->len){
$soundex = substr($soundex, 0, $this->len);
}else{
$soundex .= str_repeat('0', $this->len - $totalLen);
}
return $soundex;
}
}
thank you
yahya
I am in need of a soundex algorithm supporting arabic language.All what I found is a php class,but I have no experience with php to translate the class into vfp>Any help will be appreciated>Following is the php class:
<?php
// ----------------------------------------------------------------------
// Copyright (C) 2006 by Khaled Al-Shamaa.
// // ----------------------------------------------------------------------
// LICENSE
// This program is open source product; you can redistribute it and/or
// modify it under the terms of the GNU General Public License (GPL)
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// To read the license please visit // ----------------------------------------------------------------------
// Class Name: Arabic Soundex
// Filename: ASoundex.class.php
// Original Author(s): Khaled Al-Sham'aa <khaled.alshamaa@gmail.com>
// Purpose: Arabic soundex algorithm takes Arabic word as an input
// and produces a character string which identifies a set words
// that are (roughly) phonetically alike.
// ----------------------------------------------------------------------
class ASoundex {
var $asoundexCode = array('/ا|و|ي|ع|ح|ه/',
'/ب|ف/',
'/خ|ج|ز|س|ص|ظ|ق|ك|غ|ش/',
'/ت|ث|د|ذ|ض|ط|ة/',
'/ل/',
'/م|ن/',
'/ر/'
);
var $aphonixCode = array('/ا|و|ي|ع|ح|ه/',
'/ب/',
'/خ|ج|ص|ظ|ق|ك|غ|ش/',
'/ت|ث|د|ذ|ض|ط|ة/',
'/ل/',
'/م|ن/',
'/ر/',
'/ف/',
'/ز|س/'
);
var $transliteration = array('ا' => 'A',
'ب' => 'B',
'ت' => 'T',
'ث' => 'T',
'ج' => 'J',
'ح' => 'H',
'خ' => 'K',
'د' => 'D',
'ذ' => 'Z',
'ر' => 'R',
'ز' => 'Z',
'س' => 'S',
'ش' => 'S',
'ص' => 'S',
'ض' => 'D',
'ط' => 'T',
'ظ' => 'Z',
'ع' => 'A',
'غ' => 'G',
'ف' => 'F',
'ق' => 'Q',
'ك' => 'K',
'ل' => 'L',
'م' => 'M',
'ن' => 'N',
'ه' => 'H',
'و' => 'W',
'ي' => 'Y'
);
var $len;
var $lang;
var $code;
function ASoundex($len=4, $lang='en', $code='soundex'){
$this->len = $len;
$this->lang = $lang;
$this->code = $code;
}
/**
* @return String : the calculated soundex/phonix numeric code
* @param String : the word that we want to encode it
* [soundex|phonix] : define mapping code to be used in this converting
* @desc mapCode : methode to create soundex/phonix numric code for a given word
* @author Khaled Al-Shamaa
*/
function mapCode($word){
$encodedWord = $word;
if($this->code == 'phonix'){ $map = $this->aphonixCode; }else{ $map = $this->asoundexCode; }
foreach($map as $code=>$condition){
$encodedWord = preg_replace($condition, $code, $encodedWord);
}
$encodedWord = preg_replace('/\D/', '0', $encodedWord);
return $encodedWord;
}
function trimRep($word){
$chars = preg_split('//',$word);
foreach($chars as $char){
if($char != $lastChar){ $cleanWord .= $char; }
$lastChar = $char;
}
return $cleanWord;
}
function soundex($word){
list($dump, $soundex, $rest) = preg_split('//',$word,3);
if($this->lang == 'en'){ $soundex = $this->transliteration[$soundex]; }
$encodedRest = $this->mapCode($rest);
$cleanEncodedRest = $this->trimRep($encodedRest);
$soundex .= $cleanEncodedRest;
$soundex = preg_replace('/0/', '', $soundex);
$totalLen = strlen($soundex);
if($totalLen > $this->len){
$soundex = substr($soundex, 0, $this->len);
}else{
$soundex .= str_repeat('0', $this->len - $totalLen);
}
return $soundex;
}
}
thank you
yahya