Do not input private or sensitive data. View Qlik Privacy & Cookie Policy.
Skip to main content

Announcements
Talend Cloud AWS EU Scheduled Outage: Starting Tues 26 May 21:00 CEST with expected completion Wed 27 May 01:00 CEST
cancel
Showing results for 
Search instead for 
Did you mean: 
Joberth_Gambati
Contributor
Contributor

About Loops and GeoAnalytics Issues over 50000 registers (INTERSECT)

Hi everyone,

I'm facing a performance issue with a Qlik Sense script that performs GeoAnalytics Intersects operations within a loop.

The script works perfectly for the first batch of data, completing the intersect operation quickly. However, when it moves to the second batch, the process either slows down dramatically or seems to stop completely.

Interestingly, when I run the same intersect operations on individual QVD files with separate LOAD statements, each one finishes in less than 5 minutes. This suggests the issue is not with the core Intersects function itself, but with how the script handles the data flow and memory across multiple iterations.

I suspect it may be related to memory management, as the script generates inline tables for each intersect operation, which might not be fully cleared between runs, or there might be an issue with how the GeoAnalytics connector handles repeated calls.

Could someone take a look at my code and help me identify what could be causing this? Any suggestions on how to improve the script's performance for subsequent batches would be greatly appreciated.

I tried to create 2 approaches and the same problem arises.

Thanks in advance!

THE FIRST CODE

// =========================================================================
// LOAD DATA FOR 'CONSERVATION_UNITS'
// =========================================================================
LET vTable_CU = 'CONSERVATION_UNITS';
LET vQVD_Path_CU = 'lib://Downloads/CGMA_QVDs/UNIDADES_CONSERVACAO.qvd';

$(vTable_CU):
LOAD
Geometry AS $(vTable_CU).GEOMETRY,
_autoIndex_ AS $(vTable_CU).AUTOID
FROM [$(vQVD_Path_CU)] (qvd);

// Check load
LET vCU_Count = NoOfRows('$(vTable_CU)');
TRACE $(vTable_CU) loaded with $(vCU_Count) records.;

// =========================================================================
// INITIAL SETUP
// =========================================================================
LET vTable_ATP = 'NEW_ATP_REQUEST';
LET vQVD_Path_ATP = 'lib://Downloads/CGMA_QVDs/REQUERIMENTO_ATP_NOVO.qvd';
LET vBatchSize = 20000;

// =========================================================================
// Load the entire table just to count records
// =========================================================================
$(vTable_ATP)_Full:
LOAD *
FROM [$(vQVD_Path_ATP)] (qvd);

LET vTotalRecords = NoOfRows('$(vTable_ATP)_Full');
TRACE Total records in $(vTable_ATP): $(vTotalRecords);

LET vNumBatches = Ceil($(vTotalRecords)/$(vBatchSize));
TRACE Number of batches of $(vBatchSize) records: $(vNumBatches);

// Remove full table loaded just for counting
DROP TABLE $(vTable_ATP)_Full;

// =========================================================================
// BATCH LOOP
// =========================================================================
FOR i = 0 TO $(vNumBatches)-1
LIB CONNECT TO 'GeoAnalyticsSEMA (sema-qliksense_administrator)';

LET vStart = $(i) * $(vBatchSize);
LET vEnd = $(vStart) + $(vBatchSize);

TRACE Loading batch $(i+1): records $(vStart+1) to Min($(vEnd),$(vTotalRecords));

// Load specific batch
$(vTable_ATP)_Batch_$(i+1):
LOAD
Geometry AS $(vTable_ATP).GEOMETRY,
_autoIndex_ AS $(vTable_ATP).AUTOID
FROM [$(vQVD_Path_ATP)] (qvd)
WHERE RecNo() > $(vStart) AND RecNo() <= $(vEnd);

// =========================================================================
// Special character mapping
// =========================================================================
[_inlineMap_]:
mapping LOAD * inline [
_char_, _utf_
"'", '\u0027'
'"', '\u0022'
"[", '\u005b'
"/", '\u002f'
"*", '\u002a'
";", '\u003b'
"}", '\u007d'
"{", '\u007b'
"`", '\u0060'
"´", '\u00b4'
" ", '\u0009'
];

// =========================================================================
// Check for mandatory fields in CONSERVATION_UNITS table
// =========================================================================
IF FieldNumber('UNIDADES_CONSERVACAO.AUTOID', 'UNIDADES_CONSERVACAO') = 0 THEN
call InvalidInlineData('The field UNIDADES_CONSERVACAO.AUTOID in UNIDADES_CONSERVACAO is not available');
END IF
IF FieldNumber('UNIDADES_CONSERVACAO.GEOMETRIA', 'UNIDADES_CONSERVACAO') = 0 THEN
call InvalidInlineData('The field UNIDADES_CONSERVACAO.GEOMETRIA in UNIDADES_CONSERVACAO is not available');
END IF

// =========================================================================
// Prepare CONSERVATION_UNITS inline table
// =========================================================================
Let [Dataset1InlineTable] = 'UNIDADES_CONSERVACAO.AUTOID' & Chr(9) & 'UNIDADES_CONSERVACAO.GEOMETRIA';
Let numRowsUC = NoOfRows('UNIDADES_CONSERVACAO');
Let chunkSize = 1000;
Let chunksUC = numRowsUC/chunkSize;

For nUC = 0 to chunksUC
Let chunkTextUC = '';
Let chunkUC = nUC*chunkSize;
For iUC = 0 To chunkSize-1
Let rowNrUC = chunkUC + iUC;
Exit for when rowNrUC >= numRowsUC;
Let rowUC = '';
For Each f In 'UNIDADES_CONSERVACAO.AUTOID','UNIDADES_CONSERVACAO.GEOMETRIA'
rowUC = rowUC & Chr(9) & MapSubString('_inlineMap_', Peek('$(f)', $(rowNrUC), 'UNIDADES_CONSERVACAO'));
Next
chunkTextUC = chunkTextUC & Chr(10) & Mid('$(rowUC)', 2);
Next
[Dataset1InlineTable] = [Dataset1InlineTable] & chunkTextUC;
Next

// =========================================================================
// Check for mandatory fields in the ATP batch table
// =========================================================================
IF FieldNumber('REQUERIMENTO_ATP_NOVO.AUTOID', '$(vTable_ATP)_Batch_$(i+1)') = 0 THEN
call InvalidInlineData('The field REQUERIMENTO_ATP_NOVO.AUTOID is not available');
END IF
IF FieldNumber('REQUERIMENTO_ATP_NOVO.GEOMETRIA', '$(vTable_ATP)_Batch_$(i+1)') = 0 THEN
call InvalidInlineData('The field REQUERIMENTO_ATP_NOVO.GEOMETRIA is not available');
END IF

// =========================================================================
// Prepare the batch inline table
// =========================================================================
Let [REQUERIMENTO_ATP_NOVOInlineTable] = 'REQUERIMENTO_ATP_NOVO.AUTOID' & Chr(9) & 'REQUERIMENTO_ATP_NOVO.GEOMETRIA';
Let numRowsATP = NoOfRows('$(vTable_ATP)_Batch_$(i+1)');
Let chunksATP = numRowsATP/chunkSize;

For nATP = 0 to chunksATP
Let chunkTextATP = '';
Let chunkATP = nATP*chunkSize;
For iATP = 0 To chunkSize-1
Let rowNrATP = chunkATP + iATP;
Exit for when rowNrATP >= numRowsATP;
Let rowATP = '';
For Each f In 'REQUERIMENTO_ATP_NOVO.AUTOID','REQUERIMENTO_ATP_NOVO.GEOMETRIA'
rowATP = rowATP & Chr(9) & MapSubString('_inlineMap_', Peek('$(f)', $(rowNrATP), '$(vTable_ATP)_Batch_$(i+1)'));
Next
chunkTextATP = chunkTextATP & Chr(10) & Mid('$(rowATP)', 2);
Next
[REQUERIMENTO_ATP_NOVOInlineTable] = [REQUERIMENTO_ATP_NOVOInlineTable] & chunkTextATP;
Next

// =========================================================================
// Execute GeoAnalytics Intersects operation
// =========================================================================
[IntersectsTable_Batch_$(i+1)]:
SQL SELECT [Dataset1_REQUERIMENTO_ATP_NOVO_RelationKey],
[UNIDADES_CONSERVACAO.AUTOID],
[REQUERIMENTO_ATP_NOVO.AUTOID],
[Dataset1_RelativeOverlap],
[REQUERIMENTO_ATP_NOVO_RelativeOverlap]
FROM Intersects(intersectsCount='0', dataset1='Dataset1', dataset2='REQUERIMENTO_ATP_NOVO')
DATASOURCE Dataset1 INLINE tableName='UNIDADES_CONSERVACAO', tableFields='UNIDADES_CONSERVACAO.AUTOID,UNIDADES_CONSERVACAO.GEOMETRIA', geometryType='POLYGON', loadDistinct='NO', suffix='', crs='Auto' {$(Dataset1InlineTable)}
DATASOURCE REQUERIMENTO_ATP_NOVO INLINE tableName='REQUERIMENTO_ATP_NOVO', tableFields='REQUERIMENTO_ATP_NOVO.AUTOID,REQUERIMENTO_ATP_NOVO.GEOMETRIA', geometryType='POLYGON', loadDistinct='NO', suffix='', crs='Auto' {$(REQUERIMENTO_ATP_NOVOInlineTable)}
;

//tag field [Dataset1_REQUERIMENTO_ATP_NOVO_RelationKey] with '$primarykey';

// Load the destination QVD if it already exists, to perform concatenation
LET vPathQVD = 'lib://Downloads/CGMA/Processamento/intersect_2.qvd';
IF NOT IsNull(FileTime('$(vPathQVD)')) THEN
CONCATENATE([IntersectsTable_TEMP])
LOAD * FROM [$(vPathQVD)] (qvd);
END IF;

[Dataset1InlineTable] = '';
[REQUERIMENTO_ATP_NOVOInlineTable] = '';

// Drop batch table
DROP TABLE $(vTable_ATP)_Batch_$(i+1);

// Drop temporary loop variables
LET Dataset1InlineTable = '';
LET REQUERIMENTO_ATP_NOVOInlineTable = '';
LET chunkTextUC = '';
LET chunkTextATP = '';
LET rowUC = '';
LET rowATP = '';
LET rowNrUC = '';
LET rowNrATP = '';
LET chunkUC = '';
LET chunkATP = '';
LET nUC = '';
LET iUC = '';
LET nATP = '';
LET iATP = '';

// Drop batch variables
LET vStart = '';
LET vEnd = '';

TRACE Memory cleared after batch $(i+1);

DISCONNECT;
SLEEP(10000);
NEXT i

// ==========================================
// FINAL CLEANUP OF ALL POSSIBLE VARIABLES
// ==========================================
LET vTable_ATP = '';
LET vQVD_Path_ATP = '';
LET vBatchSize = '';
LET vTotalRecords = '';
LET vNumBatches = '';

TRACE Final memory cleanup completed.;

TRACE Full load of $(vTable_ATP) in $(vNumBatches) batches;

 

-----------------------------

THE SECCOND CODE

 

// Character mapping
[_inlineMap_]:
mapping LOAD * inline [
_char_, _utf
"'", '\u0027'
'"', '\u0022'
"[", '\u005b'
"/", '\u002f'
"*", '\u002a'
";", '\u003b'
"}", '\u007d'
"{", '\u007b'
"`", '\u0060'
"´", '\u00b4'
"	", '\u0009'
];

// --- SUB-ROUTINE DEFINITION: IntersectArquivo (IntersectFile) ---
SUB IntersectFile(vPath, vFileName, vIntersectingTable, vIntersectingTable_ID, vIntersectingTable_Geom)
LIB CONNECT TO 'GeoAnalyticsSEMA (sema-qliksense_administrator)';
// TRACE for debugging
TRACE PATH=$(vPath);
TRACE INTERSECTEDFILE=$(vFileName);
TRACE INTERSECTOR=$(vIntersectingTable);
TRACE INTERSECTOR_ID=$(vIntersectingTable_ID);
TRACE INTERSECTOR_GEOMETRY=$(vIntersectingTable_Geom);

// Connect to the GeoAnalytics database
LIB CONNECT TO 'GeoAnalyticsSEMA (sema-qliksense_administrator)';

// Extract the base file name to use as the table name
LET vBaseName = SubField(SubField('$(vFileName)', '/', -1), '.', 1);

// The intersecting table (smaller and complete table)
// Loaded only once
INTERSECTING_TABLE:
NoConcatenate
LOAD
$(vIntersectingTable_Geom) AS INTERSECTING_TABLE.GEOMETRY,
$(vIntersectingTable_ID) AS INTERSECTING_TABLE.AUTOID
RESIDENT $(vIntersectingTable);

// Generates the inline table for the INTERSECTING_TABLE
// The INTERSECTING_TABLE is generated only once, outside the loop
LET [INTERSECTING_TABLEInlineTable] = 'INTERSECTING_TABLE.AUTOID' & Chr(9) & 'INTERSECTING_TABLE.GEOMETRY';
Let numRows_Intersecting = NoOfRows('INTERSECTING_TABLE');
For i = 0 To numRows_Intersecting - 1
Let row = '';
For Each f In 'INTERSECTING_TABLE.AUTOID', 'INTERSECTING_TABLE.GEOMETRY'
row = row & Chr(9) & MapSubString('_inlineMap_', Peek('$(f)', $(i), 'INTERSECTING_TABLE'));
Next
[INTERSECTING_TABLEInlineTable] = [INTERSECTING_TABLEInlineTable] & Chr(10) & Mid('$(row)', 2);
Next


// Load the QVD file for the current chunk
// This code is the body of the loop that will iterate over the files.
// It will be executed for each file passed to the sub-routine.

// The table to be intersected (the current chunk)
INTERSECTED_TABLE:
LOAD
$(vBaseName).GEOMETRY as INTERSECTED_TABLE.GEOMETRY,
$(vBaseName).AUTOID as INTERSECTED_TABLE.AUTOID
FROM [$(vFileName)] (qvd);

// Generates the inline table for the current chunk of the INTERSECTED_TABLE
LET [INTERSECTED_TABLEInlineTable] = 'INTERSECTED_TABLE.AUTOID' & Chr(9) & 'INTERSECTED_TABLE.GEOMETRY';
Let numRows_Intersected = NoOfRows('INTERSECTED_TABLE');
For i = 0 To numRows_Intersected - 1
Let row = '';
For Each f In 'INTERSECTED_TABLE.AUTOID', 'INTERSECTED_TABLE.GEOMETRY'
row = row & Chr(9) & MapSubString('_inlineMap_', Peek('$(f)', $(i), 'INTERSECTED_TABLE'));
Next
[INTERSECTED_TABLEInlineTable] = [INTERSECTED_TABLEInlineTable] & Chr(10) & Mid('$(row)', 2);
Next


// Connection and Intersect operation with the inline tables
[IntersectsTable_TEMP]:
SQL SELECT [INTERSECTING_TABLE_INTERSECTED_RelationKey], [INTERSECTING_TABLE.AUTOID], [INTERSECTED_TABLE.AUTOID], [INTERSECTING_TABLE_RelativeOverlap], [INTERSECTED_TABLE_RelativeOverlap] FROM Intersects(intersectsCount='0', dataset1='INTERSECTING_TABLE', dataset2='INTERSECTED_TABLE')
DATASOURCE INTERSECTING_TABLE INLINE tableName='INTERSECTING_TABLE', tableFields='INTERSECTING_TABLE.AUTOID,INTERSECTING_TABLE.GEOMETRY', geometryType='POLYGON', loadDistinct='NO', suffix='', crs='Auto' {$(INTERSECTING_TABLEInlineTable)}
DATASOURCE INTERSECTED_TABLE INLINE tableName='INTERSECTED_TABLE', tableFields='INTERSECTED_TABLE.AUTOID,INTERSECTED_TABLE.GEOMETRY', geometryType='POLYGON', loadDistinct='NO', suffix='', crs='Auto' {$(INTERSECTED_TABLEInlineTable)}
;

// Load the destination QVD if it already exists, to perform concatenation
LET vPathQVD = 'lib://Downloads/CGMA/Processamento/intersect.qvd';
IF NOT IsNull(FileTime('$(vPathQVD)')) THEN
CONCATENATE([IntersectsTable_TEMP])
LOAD * FROM [$(vPathQVD)] (qvd);
END IF;

// Save the final table
STORE [IntersectsTable_TEMP] INTO 'lib://Downloads/CGMA/Processamento/intersect.qvd' (qvd);

// Clean up temporary tables and variables
DROP TABLES [IntersectsTable_TEMP], INTERSECTED_TABLE, INTERSECTING_TABLE;

// Clear the inline table for the next use
LET [INTERSECTED_TABLEInlineTable] = '';
LET [INTERSECTING_TABLEInlineTable] = '';
// ==========================
// FINAL MEMORY CLEANUP
// ==========================

// Clear the variables used (assign empty string)
LET INTERSECTING_TABLEInlineTable = '';
LET INTERSECTED_TABLEInlineTable = '';
LET vBaseName = '';
LET numRows_Intersecting = '';
LET numRows_Intersected = '';
LET row = '';
LET f = '';
LET i = '';
LET vPath = '';
LET vFileName = '';
LET vIntersectingTable = '';
LET vIntersectingTable_ID = '';
LET vIntersectingTable_Geom = '';
LET vPathQVD = '';

DISCONNECT;
SLEEP(2000);

END SUB;
Labels (3)
0 Replies