My Spam Filter is working great using Bayesian Filters and for those of you who want to know how Bayesian Values are calculated the following procedure should explain it. Anyway on with my problem. I want parse ALL the Tokens that I have collected when I process the Email to a Stored Procedure and let SQL so all the hard work leaving my application to continue with what it does best. The Stored Procedure below does a perfect job of doing this one at a time but I want to parse all the Tokens with a predefined Delimiter, Split them into Single Tokens and call this SP to do the updating. I am not an experienced DBA so any help woud be appreciated. First let me explain:
Example Token List:
"VIAGRA^3|SPAMWORD1^2|SPAMWORD2^4"
Which means that:
VIAGRA occurrs 3 times
SPAMWORD1 occurrs 2 times
SPAMWORD2 occurrs 4 times
I seem to recall that there is alimit to how long this string can be an I can break it into more manageable lengths.
Here is the SP for Parsing SINGLE Tokens:
CREATE PROCEDURE spUpdateToken
(
@Token nVarChar(50),
@IsSpam Bit,
@Occurs Int,
@Correction Bit
)
AS
SET NoCount ON
SET Xact_Abort ON
BEGIN
IF EXISTS(SELECT 0 FROM dbo.Tokens WHERE dbo.Tokens.Token = @TOKEN)
BEGIN
IF @IsSpam = 1
BEGIN
IF @CORRECTION = 1 -- Add to Spam and Deduct from Ham
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Spams = dbo.Tokens.Spams + @OCCURS,
dbo.Tokens.Hams = dbo.Tokens.Hams - @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam + 1,
dbo.Params.BayeHam = dbo.Params.BayeHam - 1,
dbo.Params.FalsePos = dbo.Params.FalsePos + 1
WHERE dbo.Params.Config = 0
END
ELSE
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Spams = dbo.Tokens.Spams + @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam + 1
WHERE dbo.Params.Config = 0
END
END
ELSE
BEGIN
IF @CORRECTION = 1 -- Add to Ham AND Deduct from Spam
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Hams = dbo.Tokens.Hams + @OCCURS,
dbo.Tokens.Spams = dbo.Tokens.Spams - @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam - 1,
dbo.Params.BayeHam = dbo.Params.BayeHam + 1,
dbo.Params.FalseNeg = dbo.Params.FalseNeg + 1
WHERE dbo.Params.Config = 0
END
ELSE -- Just add to Ham
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Hams = dbo.Tokens.Hams + @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeHam = dbo.Params.BayeHam + 1
WHERE dbo.Params.Config = 0
END
END
END
ELSE
BEGIN
IF @IsSpam = 1 -- No Tokens - Just add to Corpus
BEGIN
INSERT Tokens (Token, Spams, Hams)
VALUES(@TOKEN, @OCCURS, 0)
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam + 1
WHERE dbo.Params.Config = 0
END
ELSE -- No Tokens - Just add to Corpus
BEGIN
INSERT Tokens (Token, Spams, Hams)
VALUES(@TOKEN, 0, @OCCURS)
UPDATE dbo.Params
SET dbo.Params.BayeHam = dbo.Params.BayeHam + 1
WHERE dbo.Params.Config = 0
END
END
-- UPDATE THE SCORE
DECLARE @HAMS Decimal(10,3),
@SPAMS Decimal(10,3),
@GOOD Decimal(10,3),
@BAD Decimal(10,3),
@SCORE Decimal(10,3)
SET @HAMS = (SELECT dbo.Params.BayeHam FROM dbo.Params WHERE Config = 0)
SET @SPAMS = (SELECT dbo.Params.BayeSpam FROM dbo.Params WHERE Config = 0)
SET @GOOD = (SELECT dbo.Tokens.Hams FROM dbo.Tokens WHERE Token =@TOKEN)
SET @BAD = (SELECT dbo.Tokens.Spams FROM dbo.Tokens WHERE Token =@TOKEN)
IF @HAMS < 0 SET @HAMS = 0
IF @SPAMS < 0 SET @SPAMS = 0
IF @SPAMS = 0 OR @HAMS = 0
BEGIN
IF @HAMS > 0
SET @SCORE = 0
ELSE
SET @SCORE = 1
END
ELSE
BEGIN
SET @SCORE = (@BAD / @SPAMS) / ((@GOOD / @HAMS) + (@BAD / @SPAMS))
IF @SCORE > 1 SET @SCORE = 1
IF @SCORE < 0 SET @SCORE = 0
END
UPDATE dbo.Tokens
SET dbo.Tokens.Score = (@SCORE)
WHERE dbo.Tokens.Token = @TOKEN
RETURN @SCORE
END
GO
Brian Gillham
FailSafe Systems
Example Token List:
"VIAGRA^3|SPAMWORD1^2|SPAMWORD2^4"
Which means that:
VIAGRA occurrs 3 times
SPAMWORD1 occurrs 2 times
SPAMWORD2 occurrs 4 times
I seem to recall that there is alimit to how long this string can be an I can break it into more manageable lengths.
Here is the SP for Parsing SINGLE Tokens:
CREATE PROCEDURE spUpdateToken
(
@Token nVarChar(50),
@IsSpam Bit,
@Occurs Int,
@Correction Bit
)
AS
SET NoCount ON
SET Xact_Abort ON
BEGIN
IF EXISTS(SELECT 0 FROM dbo.Tokens WHERE dbo.Tokens.Token = @TOKEN)
BEGIN
IF @IsSpam = 1
BEGIN
IF @CORRECTION = 1 -- Add to Spam and Deduct from Ham
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Spams = dbo.Tokens.Spams + @OCCURS,
dbo.Tokens.Hams = dbo.Tokens.Hams - @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam + 1,
dbo.Params.BayeHam = dbo.Params.BayeHam - 1,
dbo.Params.FalsePos = dbo.Params.FalsePos + 1
WHERE dbo.Params.Config = 0
END
ELSE
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Spams = dbo.Tokens.Spams + @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam + 1
WHERE dbo.Params.Config = 0
END
END
ELSE
BEGIN
IF @CORRECTION = 1 -- Add to Ham AND Deduct from Spam
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Hams = dbo.Tokens.Hams + @OCCURS,
dbo.Tokens.Spams = dbo.Tokens.Spams - @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam - 1,
dbo.Params.BayeHam = dbo.Params.BayeHam + 1,
dbo.Params.FalseNeg = dbo.Params.FalseNeg + 1
WHERE dbo.Params.Config = 0
END
ELSE -- Just add to Ham
BEGIN
UPDATE dbo.Tokens
SET dbo.Tokens.Hams = dbo.Tokens.Hams + @OCCURS
WHERE dbo.Tokens.Token = @TOKEN
UPDATE dbo.Params
SET dbo.Params.BayeHam = dbo.Params.BayeHam + 1
WHERE dbo.Params.Config = 0
END
END
END
ELSE
BEGIN
IF @IsSpam = 1 -- No Tokens - Just add to Corpus
BEGIN
INSERT Tokens (Token, Spams, Hams)
VALUES(@TOKEN, @OCCURS, 0)
UPDATE dbo.Params
SET dbo.Params.BayeSpam = dbo.Params.BayeSpam + 1
WHERE dbo.Params.Config = 0
END
ELSE -- No Tokens - Just add to Corpus
BEGIN
INSERT Tokens (Token, Spams, Hams)
VALUES(@TOKEN, 0, @OCCURS)
UPDATE dbo.Params
SET dbo.Params.BayeHam = dbo.Params.BayeHam + 1
WHERE dbo.Params.Config = 0
END
END
-- UPDATE THE SCORE
DECLARE @HAMS Decimal(10,3),
@SPAMS Decimal(10,3),
@GOOD Decimal(10,3),
@BAD Decimal(10,3),
@SCORE Decimal(10,3)
SET @HAMS = (SELECT dbo.Params.BayeHam FROM dbo.Params WHERE Config = 0)
SET @SPAMS = (SELECT dbo.Params.BayeSpam FROM dbo.Params WHERE Config = 0)
SET @GOOD = (SELECT dbo.Tokens.Hams FROM dbo.Tokens WHERE Token =@TOKEN)
SET @BAD = (SELECT dbo.Tokens.Spams FROM dbo.Tokens WHERE Token =@TOKEN)
IF @HAMS < 0 SET @HAMS = 0
IF @SPAMS < 0 SET @SPAMS = 0
IF @SPAMS = 0 OR @HAMS = 0
BEGIN
IF @HAMS > 0
SET @SCORE = 0
ELSE
SET @SCORE = 1
END
ELSE
BEGIN
SET @SCORE = (@BAD / @SPAMS) / ((@GOOD / @HAMS) + (@BAD / @SPAMS))
IF @SCORE > 1 SET @SCORE = 1
IF @SCORE < 0 SET @SCORE = 0
END
UPDATE dbo.Tokens
SET dbo.Tokens.Score = (@SCORE)
WHERE dbo.Tokens.Token = @TOKEN
RETURN @SCORE
END
GO
Brian Gillham
FailSafe Systems