Unlock a world of possibilities! Login now and discover the exclusive benefits awaiting you.
Hi All,
I need to create a cluster of products each year based on below conditions
1) Need to pick minimum 3 products for any cluster for that year
2) If sum of sales for those products in that cluster is less than 80% of the target then need to add next product in same cluster. Keep on adding product in that cluster until 80% target is reached
3) If 80% target is reached in that cluster, we need to pick next 3 product and follow the steps 1) & 2) to create new cluster
4) Remaining products for that year which do not meet target should be flagged as 0.
5) Restart Cluster creation for New year
I have below dataset
Year | Product | Sales |
2023 | P1 | 920 |
2023 | P2 | 886 |
2023 | P3 | 804 |
2023 | P4 | 735 |
2023 | P5 | 699 |
2023 | P6 | 625 |
2023 | P7 | 602 |
2023 | P8 | 564 |
2023 | P9 | 548 |
2023 | P10 | 525 |
2023 | P11 | 392 |
2023 | P12 | 352 |
2023 | P13 | 316 |
2023 | P14 | 208 |
2024 | P1 | 1000 |
2024 | P2 | 966 |
2024 | P3 | 746 |
2024 | P4 | 686 |
2024 | P5 | 583 |
2024 | P6 | 543 |
2024 | P7 | 486 |
2024 | P8 | 335 |
2024 | P9 | 314 |
2024 | P10 | 301 |
2024 | P11 | 294 |
2024 | P12 | 248 |
2024 | P13 | 234 |
2024 | P14 | 177 |
Target for year is below
Year | Target |
2023 | 3200 |
2024 | 3300 |
I need below output
Year | Product | Sales | Cluster | %Cluster_share | %Cluster_share |
2023 | P1 | 920 | 1 | 35.25% | =920/Cluster 1 Total |
2023 | P2 | 886 | 1 | 33.95% | |
2023 | P3 | 804 | 1 | 30.80% | |
2023 | P4 | 735 | 2 | 27.62% | =735/ Cluster 2 Total |
2023 | P5 | 699 | 2 | 26.27% | |
2023 | P6 | 625 | 2 | 23.49% | |
2023 | P7 | 602 | 2 | 22.62% | |
2023 | P8 | 564 | 3 | 20.91% | =564/Cluster 3 Total |
2023 | P9 | 548 | 3 | 20.32% | |
2023 | P10 | 525 | 3 | 19.47% | |
2023 | P11 | 392 | 3 | 14.53% | |
2023 | P12 | 352 | 3 | 13.05% | |
2023 | P13 | 316 | 3 | 11.72% | |
2023 | P14 | 208 | 0 | 100% | |
2024 | P1 | 1000 | 1 | 36.87% | |
2024 | P2 | 966 | 1 | 35.62% | |
2024 | P3 | 746 | 1 | 27.51% | |
2024 | P4 | 686 | 2 | 23.28% | |
2024 | P5 | 583 | 2 | 19.78% | |
2024 | P6 | 543 | 2 | 18.43% | |
2024 | P7 | 486 | 2 | 16.49% | |
2024 | P8 | 335 | 2 | 11.37% | |
2024 | P9 | 314 | 2 | 10.65% | |
2024 | P10 | 301 | 0 | 24.00% | |
2024 | P11 | 294 | 0 | 23.44% | |
2024 | P12 | 248 | 0 | 19.78% | |
2024 | P13 | 234 | 0 | 18.66% | |
2024 | P14 | 177 | 0 | 14.11% |
For example, for Year 2023, we picked first 3 products in Cluster P1,P2 & P3. Sum of the sales for these products becomes 2610(920+886+804). If I divide this by target 2023, 2610/3200 = 81.56% which is more than 80%. Hence these products (P1,P2,P3) would create Cluster 1. To create a next cluster, next 3 product will be picked up P4,P5 & P6. Sum of these products becomes 2059 (735+699+625). If I divide it by Target, 2059/3200=64.34% which is less than 80% so we need to add next product in this cluster which is P7. Now sum of the sales for this cluster becomes 2661 and percentage becomes 83.16. Hence, Cluster 2 now contains (P4,P5,P6,P7). If remaining products would not meet 80% criteria or remaining products are less than 3, we need to flag them as Cluster 0
I hope the requirement is clear. I need to create Columns Cluster & %Cluster_share in the script
Data:
Load * Inline [
Year Product Sales
2023 P1 920
2023 P2 886
2023 P3 804
2023 P4 735
2023 P5 699
2023 P6 625
2023 P7 602
2023 P8 564
2023 P9 548
2023 P10 525
2023 P11 392
2023 P12 352
2023 P13 316
2023 P14 208
2024 P1 1000
2024 P2 966
2024 P3 746
2024 P4 686
2024 P5 583
2024 P6 543
2024 P7 486
2024 P8 335
2024 P9 314
2024 P10 301
2024 P11 294
2024 P12 248
2024 P13 234
2024 P14 177
](delimiter is '\t');
New:
Load *,
RowNo() as Row
Resident Data
Order by Year,Sales desc;
Left Join(New)
Load * Inline [
Year, Target
2023, 3200
2024, 3300 ];
Drop Table Data;
let vRowStart =1 ;
let vRowEnd =3;
let vMaxRow = Peek('Row',-1,'New');
let vCluster =1;
Cluster:
Load 0 as Temp
AutoGenerate 0 ;
do while vRowEnd <= vMaxRow
temp:
NoConcatenate
Load Row,
Year,
Product,
Target,
Sales
Resident New
where Row >= $(vRowStart) and Row <= $(vRowEnd);
cluster_sales:
Load sum(Sales) as cluster_sales
Resident temp;
let vTarget = Peek('Target',-1,'temp');
let vSales = Peek('cluster_sales');
let vYearRowStart = Peek('Year',0,'temp');
let vYearRowEnd = Peek('Year',-1,'temp');
Drop Table cluster_sales;
if $(vYearRowStart)=$(vYearRowEnd) and $(vSales)/$(vTarget) >=0.8 then
Concatenate(Cluster)
Load *,
num(Sales/$(vSales),'#0.00%') as cluster_share,
$(vCluster) as Cluster
Resident temp;
Drop Table temp;
let vCluster= '$(vCluster)'+1;
let vRowStart = '$(vRowEnd)'+1;
let vRowEnd = '$(vRowEnd)'+3;
elseif $(vYearRowStart)=$(vYearRowEnd) and $(vSales)/$(vTarget) < 0.8 then
let vRowEnd = '$(vRowEnd)'+1;
if vRowEnd < vMaxRow then
Drop Table temp;
elseif vRowEnd = vMaxRow then
Drop Table temp;
temp:
NoConcatenate
Load Row,
Year,
Product,
Target,
Sales
Resident New
where Row >= $(vRowStart) and Row <= $(vRowEnd);
cluster_sales:
Load sum(Sales) as cluster_sales
Resident temp;
let vSales = Peek('cluster_sales');
Drop Table cluster_sales;
Concatenate(Cluster)
Load *,
num(Sales/$(vSales),'#0.00%') as cluster_share,
0 as Cluster
Resident temp;
Drop Table temp;
Exit do
EndIf
elseif $(vYearRowStart)<> $(vYearRowEnd) and vRowEnd < vMaxRow then
Drop Table temp;
temp:
NoConcatenate
Load Row,
Year,
Product,
Target,
Sales
Resident New
where Year = $(vYearRowStart) and Row >= $(vRowStart);
let vRowStart = Peek('Row',0,'temp');
let vRowEnd = Peek('Row',-1,'temp');
cluster_sales:
Load sum(Sales) as cluster_sales
Resident temp;
let vSales = Peek('cluster_sales');
Drop Table cluster_sales;
Concatenate(Cluster)
Load *,
num(Sales/$(vSales),'#0.00%') as cluster_share,
0 as Cluster
Resident temp;
Drop Table temp;
let vRowStart = '$(vRowEnd)'+1;
let vRowEnd = '$(vRowEnd)'+3;
let vCluster =1;
EndIf
let vYearRowStart = Null();
let vYearRowEnd= Null();
let vTarget = Null();
let vSales = Null();
loop
Drop Table New;
Drop Fields Row,Temp;
Data:
Load * Inline [
Year Product Sales
2023 P1 920
2023 P2 886
2023 P3 804
2023 P4 735
2023 P5 699
2023 P6 625
2023 P7 602
2023 P8 564
2023 P9 548
2023 P10 525
2023 P11 392
2023 P12 352
2023 P13 316
2023 P14 208
2024 P1 1000
2024 P2 966
2024 P3 746
2024 P4 686
2024 P5 583
2024 P6 543
2024 P7 486
2024 P8 335
2024 P9 314
2024 P10 301
2024 P11 294
2024 P12 248
2024 P13 234
2024 P14 177
](delimiter is '\t');
New:
Load *,
RowNo() as Row
Resident Data
Order by Year,Sales desc;
Left Join(New)
Load * Inline [
Year, Target
2023, 3200
2024, 3300 ];
Drop Table Data;
let vRowStart =1 ;
let vRowEnd =3;
let vMaxRow = Peek('Row',-1,'New');
let vCluster =1;
Cluster:
Load 0 as Temp
AutoGenerate 0 ;
do while vRowEnd <= vMaxRow
temp:
NoConcatenate
Load Row,
Year,
Product,
Target,
Sales
Resident New
where Row >= $(vRowStart) and Row <= $(vRowEnd);
cluster_sales:
Load sum(Sales) as cluster_sales
Resident temp;
let vTarget = Peek('Target',-1,'temp');
let vSales = Peek('cluster_sales');
let vYearRowStart = Peek('Year',0,'temp');
let vYearRowEnd = Peek('Year',-1,'temp');
Drop Table cluster_sales;
if $(vYearRowStart)=$(vYearRowEnd) and $(vSales)/$(vTarget) >=0.8 then
Concatenate(Cluster)
Load *,
num(Sales/$(vSales),'#0.00%') as cluster_share,
$(vCluster) as Cluster
Resident temp;
Drop Table temp;
let vCluster= '$(vCluster)'+1;
let vRowStart = '$(vRowEnd)'+1;
let vRowEnd = '$(vRowEnd)'+3;
elseif $(vYearRowStart)=$(vYearRowEnd) and $(vSales)/$(vTarget) < 0.8 then
let vRowEnd = '$(vRowEnd)'+1;
if vRowEnd < vMaxRow then
Drop Table temp;
elseif vRowEnd = vMaxRow then
Drop Table temp;
temp:
NoConcatenate
Load Row,
Year,
Product,
Target,
Sales
Resident New
where Row >= $(vRowStart) and Row <= $(vRowEnd);
cluster_sales:
Load sum(Sales) as cluster_sales
Resident temp;
let vSales = Peek('cluster_sales');
Drop Table cluster_sales;
Concatenate(Cluster)
Load *,
num(Sales/$(vSales),'#0.00%') as cluster_share,
0 as Cluster
Resident temp;
Drop Table temp;
Exit do
EndIf
elseif $(vYearRowStart)<> $(vYearRowEnd) and vRowEnd < vMaxRow then
Drop Table temp;
temp:
NoConcatenate
Load Row,
Year,
Product,
Target,
Sales
Resident New
where Year = $(vYearRowStart) and Row >= $(vRowStart);
let vRowStart = Peek('Row',0,'temp');
let vRowEnd = Peek('Row',-1,'temp');
cluster_sales:
Load sum(Sales) as cluster_sales
Resident temp;
let vSales = Peek('cluster_sales');
Drop Table cluster_sales;
Concatenate(Cluster)
Load *,
num(Sales/$(vSales),'#0.00%') as cluster_share,
0 as Cluster
Resident temp;
Drop Table temp;
let vRowStart = '$(vRowEnd)'+1;
let vRowEnd = '$(vRowEnd)'+3;
let vCluster =1;
EndIf
let vYearRowStart = Null();
let vYearRowEnd= Null();
let vTarget = Null();
let vSales = Null();
loop
Drop Table New;
Drop Fields Row,Temp;
@Kushal_Chawda Thanks.