Do not input private or sensitive data. View Qlik Privacy & Cookie Policy.
Skip to main content

Announcements
Join us in Bucharest on Sept 18th for Qlik's AI Reality Tour! Register Now
cancel
Showing results for 
Search instead for 
Did you mean: 
Kain_F
Contributor III
Contributor III

Correlation Matrix

Hi all,

Not really a question but more of a contribution. 

I was looking for a way to create a correlation matrix in qliksense. I found many articles related to creating a correlation matrix in r and then loading it up to qliksense, not really using the correl script function built-in in qliksense. 

Anyways, the code underneath contains a sub which creates the correlation matrix of any given dimensions you like and underneath that there is an example with sample data. 

I hope this can help some people

 

 

 

SUB correlationMatrix(SourceTable,VarList)
// ** The idea of this sub is to make a correlation matrix of any number of variables we choose ourselves 
// ** VarList is a comma separated list which contains the variables you want to create the correlation matrix

let vEnd = SubStringCount('$(VarList)',',') + 1;

// STEP 1: We create our final table that will be filled with the correlation coefficients
// this will consist only of the group column that contains all of the X values 
for i = 1 to $(vEnd)
  let current_X_Value = SubField('$(VarList)',',',$(i));

  if $(i) = 1 then 
      FINAL:
      load * Inline [
      GRP
      $(current_X_Value)
      ];
  else 
      Concatenate(FINAL)
      load * Inline [
      GRP
      $(current_X_Value)
      ];
  endif;
next;


// STEP 2: We create a table for each y value in which we will make all of the correlations of each x value to that y value
for i =  1 to $(vEnd)
let current_Y_Value = SubField('$(VarList)',',',$(i));
	for j = 1 to $(vEnd)
    let current_X_Value = SubField('$(VarList)',',',$(j));
    
    // STEP 2.1: To calculate each correlation coefficient we need to first create a TEMP table with both fields and their respective values
    // https://help.qlik.com/en-US/sense/May2023/Subsystems/Hub/Content/Sense_Hub/Scripting/StatisticalAggregationFunctions/correl.htm
    if not '$(current_X_Value)' = '$(current_Y_Value)' then    
      TEMP:
      NoConcatenate
      Load
      '$(current_X_Value)' as GRP,
       *;

       Load
       $(current_X_Value),
       $(current_Y_Value)
       resident '$(SourceTable)' where not ($(current_Y_Value) = '-' or isnull($(current_Y_Value)));
    
    // if our x and y value we will get an error saying that fieldnames need to be unique in tables thus we create a temp table with only our current_y_value as column (this is the same as the current x value)
    else     
      TEMP:
      NoConcatenate
      Load
      '$(current_X_Value)' as GRP,
       *;

       Load
       $(current_Y_Value)
       resident '$(SourceTable)' where not ($(current_Y_Value) = '-' or isnull($(current_Y_Value)));
     
     endif;

	 // STEP 2.2: once we have created our temp tables we will start to make a record for each correlationcoefficient being (x1,y),(x2,y),..(xn,y)
     // to initialize this table we will first need to make a table and then fill it with concatenate 
     if $(j) = 1  then 
    
     TEMP.$(current_Y_Value):
       NoConcatenate
       LOAD 
       GRP,
       Correl($(current_X_Value),$(current_Y_Value)) as Correl_$(current_Y_Value)
       Resident TEMP Group By GRP;
       drop table TEMP;
  
     else
     Concatenate (TEMP.$(current_Y_Value))
       LOAD 
       GRP,
       Correl($(current_X_Value),$(current_Y_Value)) as Correl_$(current_Y_Value)
       Resident TEMP Group By GRP;
       drop table TEMP;
     endif;
  next;
next;

// STEP 3: After creating these tables with the correlation coefficients we will link them back to our initial final table to get the correlation matrix
for i = 1 to $(vEnd)
let current_Y_Value = SubField('$(VarList)',',',$(i));

left join (FINAL)
	load 
    *
    Resident TEMP.$(current_Y_Value);
    drop Table TEMP.$(current_Y_Value); 
next;

END SUB

 

 

 

After inserting this sub into your data load script you can use it as in the following example:

 

 

TEST:
load * Inline [
correlation_dimension_1;correlation_dimension_2;correlation_dimension_3;correlation_dimension_4
0;4;4;2
2;3;1;2
0;3;5;2
3;1;5;2
3;0;4;5
3;1;4;3
0;4;2;1
3;0;0;3
2;2;4;5
5;5;1;3
4;2;4;5
2;5;4;5
5;1;1;0
3;0;0;5
3;3;0;4
4;5;1;0
5;3;4;5
1;5;2;2
2;1;1;2
4;5;0;4
4;1;3;3
3;5;5;3
1;4;3;3
4;1;0;0
4;5;5;4
5;2;0;3
5;2;5;5
5;0;0;2
5;3;2;5
1;0;2;1
2;0;4;2
5;0;5;4
0;1;3;3
4;0;0;0
3;0;2;3
5;3;5;5
0;5;3;3
1;5;3;4
0;1;4;3
0;4;4;4
3;3;0;4
3;5;1;1
4;3;1;0
1;5;5;4
4;3;0;4
4;3;1;5
0;4;2;4
2;4;1;2
3;0;3;2
] (delimiter is ';');

call correlationMatrix('TEST','correlation_dimension_1,correlation_dimension_2,correlation_dimension_3,correlation_dimension_4');

 

 

 

The resulting output will be that you have created a new table called correlation matrix. 

 

extract.JPG

Hope this can help some people and if someone sees ways to improve the sub feel free to let me know.

 

Labels (4)
2 Replies
Kushal_Chawda

@Kain_F  one more way could be

 

 

Data:
load *, 1 as GRP Inline [
correlation_dimension_1;correlation_dimension_2;correlation_dimension_3;correlation_dimension_4
0;4;4;2
2;3;1;2
0;3;5;2
3;1;5;2
3;0;4;5
3;1;4;3
0;4;2;1
3;0;0;3
2;2;4;5
5;5;1;3
4;2;4;5
2;5;4;5
5;1;1;0
3;0;0;5
3;3;0;4
4;5;1;0
5;3;4;5
1;5;2;2
2;1;1;2
4;5;0;4
4;1;3;3
3;5;5;3
1;4;3;3
4;1;0;0
4;5;5;4
5;2;0;3
5;2;5;5
5;0;0;2
5;3;2;5
1;0;2;1
2;0;4;2
5;0;5;4
0;1;3;3
4;0;0;0
3;0;2;3
5;3;5;5
0;5;3;3
1;5;3;4
0;1;4;3
0;4;4;4
3;3;0;4
3;5;1;1
4;3;1;0
1;5;5;4
4;3;0;4
4;3;1;5
0;4;2;4
2;4;1;2
3;0;3;2
] (delimiter is ';');

Dimension:
Load * Inline [
Correlation_Dimension
correlation_dimension_1
correlation_dimension_2
correlation_dimension_3
correlation_dimension_4 ];

// cross join to generate all possible combination of each dimesnion
Join(Dimension)
Load Correlation_Dimension as Correlation_Dimension2
Resident Dimension;

// create dynamic correlation function which genreates separate field for each combination of dimension
Correlation_Function:
Load Concat(Correlation_Function,',') as Correlation_Function;
Load 
    'Correl('&Correlation_Dimension& ',' & Correlation_Dimension2 & ')' &
     ' as ' & '[' & Correlation_Dimension & ':' & Correlation_Dimension2 & ']'   as Correlation_Function
Resident Dimension;

Drop Table Dimension;

let vCorrelation_Function = Peek('Correlation_Function');

Drop Table Correlation_Function;

Correlation:
CrossTable(Fields,Correlation,1)
Load GRP,
     $(vCorrelation_Function)
Resident Data
Group by GRP;

Drop Field GRP;

CorrelationMatrix:
Generic Load SubField(Fields,':',1) as GRP,
             'Corr_'& SubField(Fields,':',2) as CorrelationDimension,
             Correlation
Resident Correlation;

Drop Table Correlation;

 

 

Screenshot 2023-06-13 at 12.49.52.png

 

RanOuerg
Creator
Creator

Hello all,

Do you have any idea  how can calculate the correlation between categorical data exemple [Job, skill, Level]

i need to test the relationships between each job based on the skills needed and the level