-- create database Tree
-- use Tree
go
set nocount on
go
/*
drop table Tree
drop table Numbers4
drop view TextTree
drop view NodeAncestor
drop view NodeAncestorBinary
drop view NodeDescendant
drop view NodeDescendantBinary
*/
go
create table Tree (
NodeID int not null identity (1,1) constraint PK_Tree primary key clustered,
NodeBinary as Convert(varbinary(4), NodeID), -- not sure if this is helpful. Probably not.
ParentID int null -- Nodes with null as Parent are root Nodes. Other schemes are possible.
constraint FK_Tree_ParentID_NodeID foreign key (ParentID) references Tree(NodeID),
NodeName varchar(256) not null,
Path varbinary(900) not null constraint DF_Tree default (0x),
Depth int not null constraint DF_Tree_Depth default (1),
constraint CK_Tree_ParentID_Ne_NodeID check (IsNull(ParentID, 0) <> NodeID)
)
-- Path column will include self first for ease of including Parent Node in whole branch selection
-- and for ease of ordering nodes by hierarchy without extra calculations
-- tradeoff: reduces maximum Depth by 1, lengthens all Paths by 4 bytes,
-- makes what sargable queries there are against this column pretty useless (and the index useless).
go
create unique nonclustered index IX_Tree_NodeName on Tree (NodeName)
create nonclustered index IX_Tree_ParentID on Tree (ParentID)
-- not sure the following is helpful at ALL. It may even be harmful.
create unique nonclustered index IX_Tree_Path on Tree (Path)
go
create trigger TR_Tree_I on Tree for insert
as
set nocount on
update T
set
T.Path = IsNull(P.Path, 0x) + Convert(varbinary(4), T.NodeID),
T.Depth = IsNull(P.Depth, 0) + 1 -- saves math later to be 1 instead of 0
from
Inserted I
inner join Tree T on I.NodeID = T.NodeID
left join Tree P on T.ParentID = P.NodeID
go
create trigger TR_Tree_U on Tree for update
-- for simplicity one could require that updates to ParentIDs only occur one at a time
-- but an interesting problem is to handle updates of the ParentIds of almost every node in the tree simultaneously:
as
set nocount on
if not update (parentid) return
-- collect all children of affected rows as its own sub-tree
create table #Affected (
NodeID int,
ParentID int,
NewPath varbinary(900),
Depth int,
AncestorID int
)
declare @rowcount int
insert #Affected
select
I.NodeID,
I.ParentID,
NewPath = I.NodeID,
Depth = 1,
AncestorID = I.ParentID -- the greatest ancestor known for all affected nodes
from
Inserted I
-- Don't bother with rows that haven't had ParentID updated
-- Depends on particular patterns of table updates, but I'm gambling here that
-- these not-ParentID updated rows won't be children of ParentID-updated rows
-- Since otherwise they'll just be added in the next step with perhaps more overhead
inner join Deleted D ON I.NodeID = D.NodeID and IsNull(I.ParentID, 0) <> IsNull(D.ParentID, 0)
set @rowcount = @@rowcount
if @rowcount = 0 return
while @rowcount > 0 begin
insert #Affected
select
C.NodeID,
C.ParentID,
NewPath = P.NewPath + Convert(varbinary(4), C.NodeID),
Depth = P.Depth + 1,
AncestorID = P.AncestorID -- get as far as we can now
from
Tree C
inner join #Affected P ON C.ParentID = P.NodeID
left join #Affected X ON C.NodeID = X.NodeID
where
X.NodeID IS NULL -- don't insert nodes that already exist
set @rowcount = @@rowcount
end
-- calculate paths and depths for just the #Affected sub-tree
set @rowcount = 1
while @rowcount > 0 begin
update C
set
C.NewPath = P.NewPath + Convert(varbinary(4), C.NodeID),
C.Depth = C.Depth + 1,
C.AncestorID = P.AncestorID
from
#Affected C
inner join #Affected P on C.ParentID = P.NodeID
where
C.NodeID IS NOT NULL -- don't update new root nodes
AND C.NewPath <> P.NewPath + Convert(varbinary(4), C.NodeID)
-- ( ... OR IsNull(C.AncestorID, 0) <> IsNull(P.AncestorID, 0)) -- can't use simpler clause because of circular reference issues
set @rowcount = @@rowcount
if not exists(select * from #Affected where Depth = 1) begin
-- no depth 1 nodes in affected branch = circular reference
raiserror('Update failed. A node cannot be its own ancestor.', 16, 1)
rollback tran
return
end
end
-- now finally correct all updated nodes' paths and depths
update T
set
T.Path = IsNull(R.Path, 0x) + A.NewPath,
T.Depth = IsNull(R.Depth, 0) + A.Depth
from
Tree T
inner join #Affected A on T.NodeID = A.NodeID
left join Tree R on A.AncestorID = R.NodeID
go
create trigger TR_Tree_D on Tree instead of delete
as
set nocount on
-- collect all children of affected rows as its own sub-tree
create table #Deleted (NodeId int)
insert #Deleted select NodeID from Deleted
while @@rowcount > 0 begin
insert #Deleted
select
T.NodeID
from
Tree T
inner join #Deleted D ON T.ParentID = D.NodeID
left join #Deleted X ON T.NodeID = X.NodeID
where
X.NodeID IS NULL -- don't insert nodes that already exist in #Deleted
end
delete T
from
Tree T
inner join #Deleted D on T.NodeID = D.NodeID
go
-- helper table to make Path investigation easier, although I found a less costly way for most queries
create table Numbers4 (Number int not null identity(1, 4))
insert Numbers4 default values
while scope_identity() < 897 insert Numbers4 default values
go
-- some views for convenience. It might be better to issue the queries directly in many cases, though
go
create view TextTree
as
select top 100 percent
NodeDescr = replicate(' ', Depth * 3 - 3) + '+- ' + Convert(varchar(11), NodeID) + ' ' + NodeName,
*
from Tree
order by Path
go
-- this query was my first stab
/*
create view NodeAncestor
as
select
DescendantID = T.NodeId,
AncestorID = convert(int, (substring(T.Path, N.Number, 4))),
AncestorDepth = N.Number / 4, -- absolute depth of ancestor
DescendantDepth = T.Depth, -- absolute depth of descendant
RelativeDepth = T.Depth - N.Number / 4
from dbo.Tree T
inner join dbo.Numbers4 N
on N.Number < len(T.Path) - 4 --exclude self
-- minus 4 to exclude self, or leave out because some queries are easier that way
-- but then one has to use RelativeDepth = 0 for some other queries
*/
go
--but I found a more efficient way (about 1/2 to 1/3rd the cost because no table scan and no join to Numbers)
go
create view NodeAncestor
as
select
DescendantID = C.NodeID,
AncestorID = N.NodeID,
AncestorDepth = N.Depth, -- absolute depth of ancestor
DescendantDepth = C.Depth, -- absolute depth of descendant
RelativeDepth = C.Depth - N.Depth
from
Tree N
inner join Tree C on left(C.Path, N.Depth * 4) = N.Path AND C.NodeID <> N.NodeID -- excluding self is optional
go
create view NodeAncestorBinary
as
select
DescendantBinary = C.NodeBinary,
AncestorBinary = N.NodeBinary,
AncestorDepth = N.Depth,
DescendantDepth = C.Depth,
RelativeDepth = C.Depth - N.Depth
from
Tree N
inner join Tree C on left(C.Path, N.Depth * 4) = N.Path AND C.NodeID <> N.NodeID
go
create view NodeDescendant
as
select
AncestorID = N.NodeID,
DescendantID = C.NodeID,
DescendantDepth = C.Depth,
AncestorDepth = N.Depth,
RelativeDepth = C.Depth - N.Depth
from
Tree N
inner join Tree C on left(C.Path, N.Depth * 4) = N.Path AND C.NodeID <> N.NodeID
go
create view NodeDescendantBinary
as
select
AncestorBinary = N.NodeBinary,
DescendantBinary = C.NodeBinary,
DescendantDepth = C.Depth, -- absolute depth of descendant
AncestorDepth = N.Depth, -- absolute depth of ancestor,
RelativeDepth = C.Depth - N.Depth
from
Tree N
inner join Tree C on left(C.Path, N.Depth * 4) = N.Path AND C.NodeID <> N.NodeID
go
-------- test 1
insert Tree (ParentID, NodeName) select NULL, 'Root 1'
insert Tree (ParentID, NodeName) select 1, 'Trunk 1'
insert Tree (ParentID, NodeName) select 2, 'Branch 1'
insert Tree (ParentID, NodeName) select 2, 'Branch 2'
insert Tree (ParentID, NodeName) select 2, 'Branch 3'
insert Tree (ParentID, NodeName) select 2, 'Branch 4'
insert Tree (ParentID, NodeName) select 3, 'Twig 1'
insert Tree (ParentID, NodeName) select 3, 'Twig 2'
insert Tree (ParentID, NodeName) select 4, 'Twig 3'
insert Tree (ParentID, NodeName) select 5, 'Twig 4'
insert Tree (ParentID, NodeName) select 5, 'Twig 5'
insert Tree (ParentID, NodeName) select 5, 'Twig 6'
insert Tree (ParentID, NodeName) select 6, 'Twig 7'
insert Tree (ParentID, NodeName) select 6, 'Twig 8'
insert Tree (ParentID, NodeName) select 6, 'Twig 9'
insert Tree (ParentID, NodeName) select 6, 'Twig 10'
insert Tree (ParentID, NodeName) select 1, 'Corporeal'
insert Tree (ParentID, NodeName) select 1, 'Incorporeal'
insert Tree (ParentID, NodeName) select 17, 'Animal'
insert Tree (ParentID, NodeName) select 17, 'Mineral'
insert Tree (ParentID, NodeName) select 17, 'Vegetable'
insert Tree (ParentID, NodeName) select 19, 'Cat'
insert Tree (ParentID, NodeName) select 19, 'Hippopotamus'
insert Tree (ParentID, NodeName) select 19, 'Dinosaur'
insert Tree (ParentID, NodeName) select 13, 'Tiny Twig 1'
insert Tree (ParentID, NodeName) select 13, 'Tiny Twig 2'
insert Tree (ParentID, NodeName) select 13, 'Tiny Twig 3'
insert Tree (ParentID, NodeName) select 13, 'Tiny Twig 4'
insert Tree (ParentID, NodeName) select 27, 'Tinier Twig 1'
insert Tree (ParentID, NodeName) select 27, 'Tinier Twig 2'
insert Tree (ParentID, NodeName) select 29, 'Leaf 1'
insert Tree (ParentID, NodeName) select 29, 'Leaf 2'
insert Tree (ParentID, NodeName) select 29, 'Leaf 3'
insert Tree (ParentID, NodeName) select 30, 'Leaf 4'
insert Tree (ParentID, NodeName) select 30, 'Leaf 5'
insert Tree (ParentID, NodeName) select 18, 'Feelings'
insert Tree (ParentID, NodeName) select 36, 'Love'
insert Tree (ParentID, NodeName) select 36, 'Hate'
insert Tree (ParentID, NodeName) select 36, 'Apathy'
go
-- create statistics that execution plans in QA says are missing
-- but not sure if Path is very useful since all start with root Node values such as 0x00000001
CREATE STATISTICS [Statistic_Path] ON [dbo].[Tree] ([Path]) WITH FULLSCAN
CREATE STATISTICS [Statistic_Number] ON [dbo].[Numbers4] ([Number]) WITH FULLSCAN
go
--see tree
select NodeDescr from TextTree
go
-- All nodes under a specified Parent
-- Not sure if the less-than clause is better... I think it is for very large trees, but so far it has been slightly more costly
-- This first stab not as good as later method below
select C.*
from Tree C
inner join Numbers4 N
on N.Number < len(C.Path)
and substring(C.Path, N.Number, 4) = (select NodeBinary from Tree where NodeName = 'Branch 4')
order by C.Path
go
-- All nodes under a specified Parent, a better way (no numbers table)
-- execution plan looks more than twice as good, and actual cpu cost/reads cut by a factor of 7 - 10!
select C.*
from
Tree C
inner join Tree P on left(C.Path, P.Depth * 4) = P.Path
where
P.NodeName = 'Branch 4'
order by C.Path
-- test 2 -------- fill tree as deep as possible
truncate table tree
insert tree (parentid, nodename) select null, 'Root'
while scope_identity() < 225
insert tree (parentid, nodename) select scope_identity(), 'Child ' + convert(varchar(11), scope_identity())
-- break tree on node depth 226
insert tree (parentid, nodename) select scope_identity(), 'Child ' + convert(varchar(11), scope_identity())
---------- test 3 - fill tree very wide
truncate table tree
insert tree (parentid, nodename) select null, 'Root'
while scope_identity() < 65535
insert tree (parentid, nodename) select 1, 'Child ' + convert(varchar(11), scope_identity())
go
--try a massive update
update tree set parentid = floor(sqrt(nodeid)) where nodeid > 3 and parentid = 1
-- evidently 65532 rows at once takes too long, try batches:
set rowcount 100
update tree set parentid = floor(sqrt(nodeid)) where nodeid > 3 and parentid = 1
while @@rowcount > 0
update tree set parentid = floor(sqrt(nodeid)) where nodeid > 3 and parentid = 1
set rowcount 0
-- not too bad.
select * from texttree
--------- test 4 - insert a lot of nodes at once (wide)
-- CURRENTLY FAILING (failure = insert takes too long)
-- I haven't been able to figure this one out yet, it should be working!
declare @tree table (nodeid int identity(2,1))
insert @tree default values
while scope_identity() < 65535 insert @tree default values
-- alter table tree disable trigger TR_Tree_I
insert tree (parentid, nodename, path)
select
1,
'Child ' + convert(varchar(11), nodeid),
0x00000000 + convert(varbinary(4), nodeid) -- on multi-row inserts this is required to avoid problems with Path column unique index
from @tree
---- test 5 - insert an entire tree at once (wide + deep)
select * into #temptree from tree
truncate table tree
insert tree (parentid, nodename, path) select parentid, nodename, path from #temptree
drop table #temptree
go
use tempdb
drop database tree
go