fredericofonseca
IS-IT--Management
Ok.
Have done the SQL below, it works, but I think theres a better way of doing it, so would like to see what others say about it.
At the bottom of this post there is code to create the required tables and populate them with sample data.
Real tables have 2million records, with around 500k records missing at the moment.
New records are added 5 days a week, and each time we add new ones, we need to see if there is a date gap between the record that is to be inserted and the previous one. compare is done based on fund and date. e.g.
if highest date for fund A1 is 2010/01/10, and we are to insert a new record fund A1, date 2010/01/15 then 4 new records will need to be inserted, for dates 2010/01/11, 12,13,14 with the values from the record of 2010/01/10.
The missing gaps can and will be different for each fund being inserted.
Daily code will be a modified version of the code below which I've created to get all the currently missing records.
Really looking for a better way of doing this. Saw something on another site recently that would do what I want, but I cant find it at the moment.
Thanks
This will run in a SQL 2008 R2
Sample code used.
the commented out code below will insert the data into the table.
Run it first commented to see what are the missing rows, and then uncommented to update the table with the missing records
Code to insert sample data into dates
Code to insert sample data into funds
Regards
Frederico Fonseca
SysSoft Integrated Ltd
FAQ219-2884
FAQ181-2886
Have done the SQL below, it works, but I think theres a better way of doing it, so would like to see what others say about it.
At the bottom of this post there is code to create the required tables and populate them with sample data.
Real tables have 2million records, with around 500k records missing at the moment.
New records are added 5 days a week, and each time we add new ones, we need to see if there is a date gap between the record that is to be inserted and the previous one. compare is done based on fund and date. e.g.
if highest date for fund A1 is 2010/01/10, and we are to insert a new record fund A1, date 2010/01/15 then 4 new records will need to be inserted, for dates 2010/01/11, 12,13,14 with the values from the record of 2010/01/10.
The missing gaps can and will be different for each fund being inserted.
Daily code will be a modified version of the code below which I've created to get all the currently missing records.
Really looking for a better way of doing this. Saw something on another site recently that would do what I want, but I cant find it at the moment.
Thanks
This will run in a SQL 2008 R2
Sample code used.
the commented out code below will insert the data into the table.
Run it first commented to see what are the missing rows, and then uncommented to update the table with the missing records
Code:
with g_fund (fund, max_dt, min_dt) as
(
select fund, max(dt1) as max_dt, min(dt1) as min_dt
from funds
group by fund
)
,
missing_dates (date, fund) as
(
select distinct f1.date, f1.fund
from (
select date, fund
from dates
cross join
(select fund
from funds group by fund) as z
) f1
left outer join funds f2
on f1.date = f2.dt1
and f1.fund = f2.fund
where f2.fund is null
)
---
--insert into funds (dt1,fund,val,type)
--select f1.date, f1.fund,f2.val,'U'
--from (
select f2.fund
,f2.date
,(select top 1 f4.dt1
from funds f4
where f4.fund = f2.fund
and f4.dt1 < f2.date
order by dt1 desc) as base_date
from g_fund f1
inner join missing_dates f2
on f1.fund = f2.fund
and f2.date between f1.min_dt and f1.max_dt
--) f1
--inner join funds f2
--on f1.fund = f2.fund
--and f1.base_date = f2.dt1
Code:
CREATE TABLE [dbo].[funds](
[id] [int] IDENTITY(1,1) NOT NULL,
[dt1] [date] NOT NULL,
[fund] [varchar](50) NOT NULL,
[val] [numeric](18, 6) NULL,
[type] [varchar](10) NULL,
CONSTRAINT [PK_funds] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_funds] ON [dbo].[funds]
(
[fund] ASC,
[dt1] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE TABLE [dbo].[dates](
[id] [int] IDENTITY(1,1) NOT NULL,
[date] [date] NOT NULL,
CONSTRAINT [PK_dates] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_dates] ON [dbo].[dates]
(
[date] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,
SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF,
ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
Code to insert sample data into dates
Code:
SET DATEFORMAT dmy;
GO
DECLARE @StartDate DATETIME
DECLARE @EndDate DATETIME
SET @StartDate = '01/01/2010'
SET @EndDate = '30/10/2010'
WHILE @StartDate <= @EndDate
BEGIN
INSERT INTO dates
(date)
SELECT
@StartDate
SET @StartDate = DATEADD(dd, 1, @StartDate)
END
Code:
SET DATEFORMAT ymd;
GO
insert into funds (DT1,fund,val,type)values('2010/01/01','A1',8818.06391,'O');
insert into funds (DT1,fund,val,type)values('2010/01/02','A1',6126.85689,'O');
insert into funds (DT1,fund,val,type)values('2010/01/03','A1',465.40677,'O');
insert into funds (DT1,fund,val,type)values('2010/01/04','A1',1932.43667,'O');
insert into funds (DT1,fund,val,type)values('2010/01/05','A1',2826.36455,'O');
insert into funds (DT1,fund,val,type)values('2010/01/06','A1',9993.18179,'O');
insert into funds (DT1,fund,val,type)values('2010/01/07','A1',9120.06484,'O');
insert into funds (DT1,fund,val,type)values('2010/01/10','A1',2034.3318,'O');
insert into funds (DT1,fund,val,type)values('2010/01/11','A1',1055.60192,'O');
insert into funds (DT1,fund,val,type)values('2010/01/12','A1',8021.2361,'O');
insert into funds (DT1,fund,val,type)values('2010/01/13','A1',6439.91889,'O');
insert into funds (DT1,fund,val,type)values('2010/01/14','A1',4080.38079,'O');
insert into funds (DT1,fund,val,type)values('2010/01/15','A1',8556.93475,'O');
insert into funds (DT1,fund,val,type)values('2010/01/16','A1',7266.49633,'O');
insert into funds (DT1,fund,val,type)values('2010/01/17','A1',2791.97906,'O');
insert into funds (DT1,fund,val,type)values('2010/01/18','A1',7297.38277,'O');
insert into funds (DT1,fund,val,type)values('2010/01/19','A1',352.25359,'O');
insert into funds (DT1,fund,val,type)values('2010/01/20','A1',5147.2565,'O');
insert into funds (DT1,fund,val,type)values('2010/01/23','A1',416.52155,'O');
insert into funds (DT1,fund,val,type)values('2010/01/24','A1',7436.22197,'O');
insert into funds (DT1,fund,val,type)values('2010/01/25','A1',4342.87888,'O');
insert into funds (DT1,fund,val,type)values('2010/01/26','A1',2679.84738,'O');
insert into funds (DT1,fund,val,type)values('2010/01/27','A1',5281.91381,'O');
insert into funds (DT1,fund,val,type)values('2010/01/28','A1',8313.26362,'O');
insert into funds (DT1,fund,val,type)values('2010/01/29','A1',9900.35108,'O');
insert into funds (DT1,fund,val,type)values('2010/01/30','A1',3177.73013,'O');
insert into funds (DT1,fund,val,type)values('2010/01/31','A1',7083.39143,'O');
insert into funds (DT1,fund,val,type)values('2010/02/01','A1',6194.60007,'O');
insert into funds (DT1,fund,val,type)values('2010/02/02','A1',6134.6484,'O');
insert into funds (DT1,fund,val,type)values('2010/02/03','A1',7176.51201,'O');
insert into funds (DT1,fund,val,type)values('2010/02/04','A1',2609.04205,'O');
insert into funds (DT1,fund,val,type)values('2010/02/05','A1',6194.52768,'O');
insert into funds (DT1,fund,val,type)values('2010/02/06','A1',615.92811,'O');
insert into funds (DT1,fund,val,type)values('2010/02/09','A1',5020.59007,'O');
insert into funds (DT1,fund,val,type)values('2010/02/10','A1',1744.30497,'O');
insert into funds (DT1,fund,val,type)values('2010/01/04','A2',5225.99193,'O');
insert into funds (DT1,fund,val,type)values('2010/01/05','A2',5901.8695,'O');
insert into funds (DT1,fund,val,type)values('2010/01/06','A2',1132.72741,'O');
insert into funds (DT1,fund,val,type)values('2010/01/07','A2',6720.12163,'O');
insert into funds (DT1,fund,val,type)values('2010/01/10','A2',9027.57755,'O');
insert into funds (DT1,fund,val,type)values('2010/01/11','A2',4555.55221,'O');
insert into funds (DT1,fund,val,type)values('2010/01/12','A2',4286.6759,'O');
insert into funds (DT1,fund,val,type)values('2010/01/13','A2',2085.4367,'O');
insert into funds (DT1,fund,val,type)values('2010/01/14','A2',72.33286,'O');
insert into funds (DT1,fund,val,type)values('2010/01/15','A2',3411.15727,'O');
insert into funds (DT1,fund,val,type)values('2010/01/16','A2',2262.89075,'O');
insert into funds (DT1,fund,val,type)values('2010/01/17','A2',3527.54524,'O');
insert into funds (DT1,fund,val,type)values('2010/01/18','A2',8581.69139,'O');
insert into funds (DT1,fund,val,type)values('2010/01/19','A2',2480.91826,'O');
insert into funds (DT1,fund,val,type)values('2010/01/20','A2',720.98755,'O');
insert into funds (DT1,fund,val,type)values('2010/01/23','A2',3520.25077,'O');
insert into funds (DT1,fund,val,type)values('2010/01/24','A2',141.31637,'O');
insert into funds (DT1,fund,val,type)values('2010/01/25','A2',7350.81759,'O');
insert into funds (DT1,fund,val,type)values('2010/01/26','A2',1073.34543,'O');
insert into funds (DT1,fund,val,type)values('2010/01/27','A2',3782.0554,'O');
insert into funds (DT1,fund,val,type)values('2010/01/28','A2',4306.88184,'O');
insert into funds (DT1,fund,val,type)values('2010/01/29','A2',3332.84038,'O');
insert into funds (DT1,fund,val,type)values('2010/01/30','A2',9492.25227,'O');
insert into funds (DT1,fund,val,type)values('2010/01/31','A2',9202.49073,'O');
insert into funds (DT1,fund,val,type)values('2010/02/01','A2',2441.57043,'O');
insert into funds (DT1,fund,val,type)values('2010/02/02','A2',8475.75,'O');
insert into funds (DT1,fund,val,type)values('2010/02/03','A2',1136.24877,'O');
insert into funds (DT1,fund,val,type)values('2010/02/04','A2',1261.12739,'O');
Regards
Frederico Fonseca
SysSoft Integrated Ltd
FAQ219-2884
FAQ181-2886