Skip to main content
Announcements
SYSTEM MAINTENANCE: Thurs., Sept. 19, 1 AM ET, Platform will be unavailable for approx. 60 minutes.
cancel
Showing results for 
Search instead for 
Did you mean: 
Kain_F
Contributor III
Contributor III

Correlation Matrix

Hi all,

Not really a question but more of a contribution. 

I was looking for a way to create a correlation matrix in qliksense. I found many articles related to creating a correlation matrix in r and then loading it up to qliksense, not really using the correl script function built-in in qliksense. 

Anyways, the code underneath contains a sub which creates the correlation matrix of any given dimensions you like and underneath that there is an example with sample data. 

I hope this can help some people

 

 

 

SUB correlationMatrix(SourceTable,VarList)
// ** The idea of this sub is to make a correlation matrix of any number of variables we choose ourselves 
// ** VarList is a comma separated list which contains the variables you want to create the correlation matrix

let vEnd = SubStringCount('$(VarList)',',') + 1;

// STEP 1: We create our final table that will be filled with the correlation coefficients
// this will consist only of the group column that contains all of the X values 
for i = 1 to $(vEnd)
  let current_X_Value = SubField('$(VarList)',',',$(i));

  if $(i) = 1 then 
      FINAL:
      load * Inline [
      GRP
      $(current_X_Value)
      ];
  else 
      Concatenate(FINAL)
      load * Inline [
      GRP
      $(current_X_Value)
      ];
  endif;
next;


// STEP 2: We create a table for each y value in which we will make all of the correlations of each x value to that y value
for i =  1 to $(vEnd)
let current_Y_Value = SubField('$(VarList)',',',$(i));
	for j = 1 to $(vEnd)
    let current_X_Value = SubField('$(VarList)',',',$(j));
    
    // STEP 2.1: To calculate each correlation coefficient we need to first create a TEMP table with both fields and their respective values
    // https://help.qlik.com/en-US/sense/May2023/Subsystems/Hub/Content/Sense_Hub/Scripting/StatisticalAggregationFunctions/correl.htm
    if not '$(current_X_Value)' = '$(current_Y_Value)' then    
      TEMP:
      NoConcatenate
      Load
      '$(current_X_Value)' as GRP,
       *;

       Load
       $(current_X_Value),
       $(current_Y_Value)
       resident '$(SourceTable)' where not ($(current_Y_Value) = '-' or isnull($(current_Y_Value)));
    
    // if our x and y value we will get an error saying that fieldnames need to be unique in tables thus we create a temp table with only our current_y_value as column (this is the same as the current x value)
    else     
      TEMP:
      NoConcatenate
      Load
      '$(current_X_Value)' as GRP,
       *;

       Load
       $(current_Y_Value)
       resident '$(SourceTable)' where not ($(current_Y_Value) = '-' or isnull($(current_Y_Value)));
     
     endif;

	 // STEP 2.2: once we have created our temp tables we will start to make a record for each correlationcoefficient being (x1,y),(x2,y),..(xn,y)
     // to initialize this table we will first need to make a table and then fill it with concatenate 
     if $(j) = 1  then 
    
     TEMP.$(current_Y_Value):
       NoConcatenate
       LOAD 
       GRP,
       Correl($(current_X_Value),$(current_Y_Value)) as Correl_$(current_Y_Value)
       Resident TEMP Group By GRP;
       drop table TEMP;
  
     else
     Concatenate (TEMP.$(current_Y_Value))
       LOAD 
       GRP,
       Correl($(current_X_Value),$(current_Y_Value)) as Correl_$(current_Y_Value)
       Resident TEMP Group By GRP;
       drop table TEMP;
     endif;
  next;
next;

// STEP 3: After creating these tables with the correlation coefficients we will link them back to our initial final table to get the correlation matrix
for i = 1 to $(vEnd)
let current_Y_Value = SubField('$(VarList)',',',$(i));

left join (FINAL)
	load 
    *
    Resident TEMP.$(current_Y_Value);
    drop Table TEMP.$(current_Y_Value); 
next;

END SUB

 

 

 

After inserting this sub into your data load script you can use it as in the following example:

 

 

TEST:
load * Inline [
correlation_dimension_1;correlation_dimension_2;correlation_dimension_3;correlation_dimension_4
0;4;4;2
2;3;1;2
0;3;5;2
3;1;5;2
3;0;4;5
3;1;4;3
0;4;2;1
3;0;0;3
2;2;4;5
5;5;1;3
4;2;4;5
2;5;4;5
5;1;1;0
3;0;0;5
3;3;0;4
4;5;1;0
5;3;4;5
1;5;2;2
2;1;1;2
4;5;0;4
4;1;3;3
3;5;5;3
1;4;3;3
4;1;0;0
4;5;5;4
5;2;0;3
5;2;5;5
5;0;0;2
5;3;2;5
1;0;2;1
2;0;4;2
5;0;5;4
0;1;3;3
4;0;0;0
3;0;2;3
5;3;5;5
0;5;3;3
1;5;3;4
0;1;4;3
0;4;4;4
3;3;0;4
3;5;1;1
4;3;1;0
1;5;5;4
4;3;0;4
4;3;1;5
0;4;2;4
2;4;1;2
3;0;3;2
] (delimiter is ';');

call correlationMatrix('TEST','correlation_dimension_1,correlation_dimension_2,correlation_dimension_3,correlation_dimension_4');

 

 

 

The resulting output will be that you have created a new table called correlation matrix. 

 

extract.JPG

Hope this can help some people and if someone sees ways to improve the sub feel free to let me know.

 

Labels (4)
2 Replies
Kushal_Chawda

@Kain_F  one more way could be

 

 

Data:
load *, 1 as GRP Inline [
correlation_dimension_1;correlation_dimension_2;correlation_dimension_3;correlation_dimension_4
0;4;4;2
2;3;1;2
0;3;5;2
3;1;5;2
3;0;4;5
3;1;4;3
0;4;2;1
3;0;0;3
2;2;4;5
5;5;1;3
4;2;4;5
2;5;4;5
5;1;1;0
3;0;0;5
3;3;0;4
4;5;1;0
5;3;4;5
1;5;2;2
2;1;1;2
4;5;0;4
4;1;3;3
3;5;5;3
1;4;3;3
4;1;0;0
4;5;5;4
5;2;0;3
5;2;5;5
5;0;0;2
5;3;2;5
1;0;2;1
2;0;4;2
5;0;5;4
0;1;3;3
4;0;0;0
3;0;2;3
5;3;5;5
0;5;3;3
1;5;3;4
0;1;4;3
0;4;4;4
3;3;0;4
3;5;1;1
4;3;1;0
1;5;5;4
4;3;0;4
4;3;1;5
0;4;2;4
2;4;1;2
3;0;3;2
] (delimiter is ';');

Dimension:
Load * Inline [
Correlation_Dimension
correlation_dimension_1
correlation_dimension_2
correlation_dimension_3
correlation_dimension_4 ];

// cross join to generate all possible combination of each dimesnion
Join(Dimension)
Load Correlation_Dimension as Correlation_Dimension2
Resident Dimension;

// create dynamic correlation function which genreates separate field for each combination of dimension
Correlation_Function:
Load Concat(Correlation_Function,',') as Correlation_Function;
Load 
    'Correl('&Correlation_Dimension& ',' & Correlation_Dimension2 & ')' &
     ' as ' & '[' & Correlation_Dimension & ':' & Correlation_Dimension2 & ']'   as Correlation_Function
Resident Dimension;

Drop Table Dimension;

let vCorrelation_Function = Peek('Correlation_Function');

Drop Table Correlation_Function;

Correlation:
CrossTable(Fields,Correlation,1)
Load GRP,
     $(vCorrelation_Function)
Resident Data
Group by GRP;

Drop Field GRP;

CorrelationMatrix:
Generic Load SubField(Fields,':',1) as GRP,
             'Corr_'& SubField(Fields,':',2) as CorrelationDimension,
             Correlation
Resident Correlation;

Drop Table Correlation;

 

 

Screenshot 2023-06-13 at 12.49.52.png

 

RanOuerg
Creator
Creator

Hello all,

Do you have any idea  how can calculate the correlation between categorical data exemple [Job, skill, Level]

i need to test the relationships between each job based on the skills needed and the level