Unlock a world of possibilities! Login now and discover the exclusive benefits awaiting you.
I have a data source (1M Rows) with many (approx 50) dates (DD/MM/YYYY) in each row and i only need to retain rows where at least one date per row is > 30/10/2019,
Some date fields are null
So any rows that have no dates above 30/10/2019 are not needed
Here's some example data to work with...
ID | Date1 | Date2 | Date3 |
1 | 22/11/2019 | 13/02/2018 | |
2 | 04/10/2019 | 13/04/2018 | |
3 | |||
4 | 17/11/2019 | ||
5 | 07/02/2017 | 29/06/2018 |
So in the example above only ID 1 & 4 would be needed
Maybe try this:
Modified the way to grab maxDate.
Instead of concatenating all dates into one table, then perform max(Date), this script will take today's date as maxDate.
Raw:
load * inline [
ID Date1 Date2 Date3
1 22/11/2019 13/02/2018
2 04/10/2019 13/04/2018
3
4 17/11/2019
5 07/02/2017 29/06/2018
](delimiter is ' ');
NoConcatenate
Data:
load *, Date1&'|'&Date2&'|'&Date3 as Dates;
load ID, if(isnull(Date1) or len(Date1)<1,null(),Date#(Date1,'DD/MM/YYYY')) as Date1
,if(isnull(Date2) or len(Date2)<1,null(),Date#(Date2,'DD/MM/YYYY')) as Date2 ,
if(isnull(Date3) or len(Date3)<1,null(),Date#(Date3,'DD/MM/YYYY')) as Date3
resident Raw;
drop table Raw;
//Modified the way to grab maxDate
//Instead of concatenating all dates into one, take today as maxDate
let maxDate = num(today());
let minDate = NUM(Date#('30/10/2019','DD/MM/YYYY'));
Calendar:
load Date($(minDate)-1+IterNo(),'DD/MM/YYYY') as Date
AUTOGENERATE 1
WHILE $(minDate)-1+IterNo() < $(maxDate)+1;
let vRow=NoOfRows('Calendar');
FOR i=0 to $(vRow)-1
let vDate=peek('Date',$(i),'Calendar');
Data2:
load * ,1 AS Flag resident Data
where WildMatch(Dates,'*$(vDate)*');
next i
drop table Data;
exit script;
Try this:
Script:
Raw:
load * inline [
ID Date1 Date2 Date3
1 22/11/2019 13/02/2018
2 04/10/2019 13/04/2018
3
4 17/11/2019
5 07/02/2017 29/06/2018
](delimiter is ' ');
NoConcatenate
Data:
load *, Date1&'|'&Date2&'|'&Date3 as Dates;
load ID, if(isnull(Date1) or len(Date1)<1,null(),Date#(Date1,'DD/MM/YYYY')) as Date1
,if(isnull(Date2) or len(Date2)<1,null(),Date#(Date2,'DD/MM/YYYY')) as Date2 ,
if(isnull(Date3) or len(Date3)<1,null(),Date#(Date3,'DD/MM/YYYY')) as Date3
resident Raw;
drop table Raw;
Data3:
load distinct Date1 as Date, ID
resident Data;
load distinct Date2 as Date,ID
resident Data;
load distinct Date3 as Date,ID
resident Data;
NoConcatenate
Date:
load max(Date) as maxDate
resident Data3;
let maxDate = peek('maxDate',0,'Date');
let minDate = NUM(Date#('30/10/2019','DD/MM/YYYY'));
drop table Date;
Calendar:
load Date($(minDate)-1+IterNo(),'DD/MM/YYYY') as Date
AUTOGENERATE 1
WHILE $(minDate)-1+IterNo() < $(maxDate)+1;
//exit Script;
let vRow=NoOfRows('Calendar');
FOR i=0 to $(vRow)-1
let vDate=peek('Date',$(i),'Calendar');
//NoConcatenate
Data2:
load * ,1 AS Flag resident Data
where WildMatch(Dates,'*$(vDate)*');
next i
drop table Data;
exit script;
Thanks and regards,
Arthur Fong
Thanks Arthur
Looks interesting I'll give this a go this week
Thanks for reply Burkhard
I'd need to filter our rows that don't have a date >30/10/2019 too
Sorry for this misunderstanding, Haymarketpaul.
If you need to filter out all dates > 30/11/2019 then you can do it like this:
So only Id's with dates > 30/11/2019 are left.
Hope this is ok for you.
Burkhard
Maybe try this:
Modified the way to grab maxDate.
Instead of concatenating all dates into one table, then perform max(Date), this script will take today's date as maxDate.
Raw:
load * inline [
ID Date1 Date2 Date3
1 22/11/2019 13/02/2018
2 04/10/2019 13/04/2018
3
4 17/11/2019
5 07/02/2017 29/06/2018
](delimiter is ' ');
NoConcatenate
Data:
load *, Date1&'|'&Date2&'|'&Date3 as Dates;
load ID, if(isnull(Date1) or len(Date1)<1,null(),Date#(Date1,'DD/MM/YYYY')) as Date1
,if(isnull(Date2) or len(Date2)<1,null(),Date#(Date2,'DD/MM/YYYY')) as Date2 ,
if(isnull(Date3) or len(Date3)<1,null(),Date#(Date3,'DD/MM/YYYY')) as Date3
resident Raw;
drop table Raw;
//Modified the way to grab maxDate
//Instead of concatenating all dates into one, take today as maxDate
let maxDate = num(today());
let minDate = NUM(Date#('30/10/2019','DD/MM/YYYY'));
Calendar:
load Date($(minDate)-1+IterNo(),'DD/MM/YYYY') as Date
AUTOGENERATE 1
WHILE $(minDate)-1+IterNo() < $(maxDate)+1;
let vRow=NoOfRows('Calendar');
FOR i=0 to $(vRow)-1
let vDate=peek('Date',$(i),'Calendar');
Data2:
load * ,1 AS Flag resident Data
where WildMatch(Dates,'*$(vDate)*');
next i
drop table Data;
exit script;