Unlock a world of possibilities! Login now and discover the exclusive benefits awaiting you.
Hi all,
I have a data source that I want to scramble certain fields via the load script.
Really I want to use the same functionality that is within the Document Properties > Scramble tab. The data is over 3 million customers with multiple fields to scramble so a mapping table would be too onerous.
Any ideas how to leverage scramble via the load script?
thanks
Stuart
I have quickly wrote a piece of script which can scrambled chars and number of given fields of a given table.
It has to be improved to load QVD, to store them, or to treat all tables in memory, but it works (row by row and char by char so performance can be very poor).
LET tableName='toto' ;
LET scrambledChars='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ;
LET scrambledChars='$(scrambledChars)$(scrambledChars)' ;
LET scrambledNumbers='0123456789' ;
LET scrambledNumbers='$(scrambledNumbers)$(scrambledNumbers)' ;
$(tableName):
load * Inline
[field1,field2,field3
michel,michel.michel@gmail.com,345
gerard,gerard878@yahoo.com,123
sophie,sof@gmail.com,789] ;
FieldToScramble:
load * Inline
[tablename,fieldname
toto,field1
toto,field2] ;
// where tablename='$(tableName)'
IF NoOfRows('FieldToScramble')=0 or NoOfRows('$(tableName)')=0 then
EXIT Script ;
ENDIF
Rename Table $(tableName) to TableToScramble ;
LET nbFields=NoOfRows('FieldToScramble') ;
FOR i=1 to $(nbFields)
LET fieldName=Peek('fieldname',i-1,'FieldToScramble') ;
tmp1:
NoConcatenate LOAD Distinct $(fieldName) Resident TableToScramble ;
LET nbRows=NoOfRows('tmp1') ;
FOR j=1 to $(nbRows)
LET valueBefore=Peek('$(fieldName)',j-1,'tmp1') ;
LET valueAfter='' ;
LET nbChar=Len('$(valueBefore)') ;
IF nbChar>0 then
FOR k=1 to $(nbChar)
LET charBefore=Mid('$(valueBefore)',k,1) ;
LET charAfter='$(charBefore)' ;
IF FindOneOf('$(charBefore)','$(scrambledChars)')>0 then
LET charAfter=Mid('$(scrambledChars)',Index('$(scrambledChars)','$(charBefore)')+floor(Rand()*26),1) ;
ELSEIF FindOneOf('$(charBefore)','$(scrambledNumbers)')>0 then
LET charAfter=Mid('$(scrambledNumbers)',Index('$(scrambledNumbers)','$(charBefore)')+floor(Rand()*10),1) ;
ENDIF ;
LET valueAfter='$(valueAfter)$(charAfter)';
NEXT k ;
ENDIF ;
tmp2:
LOAD
'$(valueBefore)' as $(fieldName)
,'$(valueAfter)' as [%$#@!$(fieldName)%$#@!$]
AutoGenerate 1 ;
NEXT j ;
Inner Join (TableToScramble)
LOAD $(fieldName),[%$#@!$(fieldName)%$#@!$] Resident tmp2 ;
DROP Table tmp2,tmp1 ;
DROP Field $(fieldName) From TableToScramble ;
RENAME Field [%$#@!$(fieldName)%$#@!$] to $(fieldName) ;
NEXT i ;
If numeric field, you could possibly use Rand(), and for string may be using some string fucntions like:
Load
Amount*Rand()*100 as NewAmount,
Mid(PurgeChar( Customer, 'aeiou'), 3) as NewCustomer
Thanks I can use that for numeric.
A lot of the fields are text based (like email address for example)
Hi Suart,
Don't know the level of security you need, but if you replace your values by the hash of your values it will be hard for a user to crack it but not for hackers (for example hash128(client_name) as client_name). Else you can also use the autonumber function to have numbers.
Regards,
Vincent
Thanks Vincent
I need the data in the scrambled format as I would still like to check the scrambled data for data quality. Items such as field containing a @ sign etc
Well, except treating line by line, field by field and char by char your data, I don't see a solution. And it will be complicated and will take ages. But it's feasible.
And are you retrieving the data from a database? For example if you are using Oracle, the Translate function with a randomized string can do the job easily.
We are using QV to pull the data straight from CRM.
Is there no way to leverage the scramble functionality using macros? I am just thinking it is very easy via the GUI to scramble the data why not using the script
I have quickly wrote a piece of script which can scrambled chars and number of given fields of a given table.
It has to be improved to load QVD, to store them, or to treat all tables in memory, but it works (row by row and char by char so performance can be very poor).
LET tableName='toto' ;
LET scrambledChars='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ;
LET scrambledChars='$(scrambledChars)$(scrambledChars)' ;
LET scrambledNumbers='0123456789' ;
LET scrambledNumbers='$(scrambledNumbers)$(scrambledNumbers)' ;
$(tableName):
load * Inline
[field1,field2,field3
michel,michel.michel@gmail.com,345
gerard,gerard878@yahoo.com,123
sophie,sof@gmail.com,789] ;
FieldToScramble:
load * Inline
[tablename,fieldname
toto,field1
toto,field2] ;
// where tablename='$(tableName)'
IF NoOfRows('FieldToScramble')=0 or NoOfRows('$(tableName)')=0 then
EXIT Script ;
ENDIF
Rename Table $(tableName) to TableToScramble ;
LET nbFields=NoOfRows('FieldToScramble') ;
FOR i=1 to $(nbFields)
LET fieldName=Peek('fieldname',i-1,'FieldToScramble') ;
tmp1:
NoConcatenate LOAD Distinct $(fieldName) Resident TableToScramble ;
LET nbRows=NoOfRows('tmp1') ;
FOR j=1 to $(nbRows)
LET valueBefore=Peek('$(fieldName)',j-1,'tmp1') ;
LET valueAfter='' ;
LET nbChar=Len('$(valueBefore)') ;
IF nbChar>0 then
FOR k=1 to $(nbChar)
LET charBefore=Mid('$(valueBefore)',k,1) ;
LET charAfter='$(charBefore)' ;
IF FindOneOf('$(charBefore)','$(scrambledChars)')>0 then
LET charAfter=Mid('$(scrambledChars)',Index('$(scrambledChars)','$(charBefore)')+floor(Rand()*26),1) ;
ELSEIF FindOneOf('$(charBefore)','$(scrambledNumbers)')>0 then
LET charAfter=Mid('$(scrambledNumbers)',Index('$(scrambledNumbers)','$(charBefore)')+floor(Rand()*10),1) ;
ENDIF ;
LET valueAfter='$(valueAfter)$(charAfter)';
NEXT k ;
ENDIF ;
tmp2:
LOAD
'$(valueBefore)' as $(fieldName)
,'$(valueAfter)' as [%$#@!$(fieldName)%$#@!$]
AutoGenerate 1 ;
NEXT j ;
Inner Join (TableToScramble)
LOAD $(fieldName),[%$#@!$(fieldName)%$#@!$] Resident tmp2 ;
DROP Table tmp2,tmp1 ;
DROP Field $(fieldName) From TableToScramble ;
RENAME Field [%$#@!$(fieldName)%$#@!$] to $(fieldName) ;
NEXT i ;
Try like this
Hash126(Email) as Email
thanks