Unlock a world of possibilities! Login now and discover the exclusive benefits awaiting you.
Hi,
I'm working on an application for analysing some Apache logs.
I have to load CSV logs files from different servers, the problem is format:
- in the first ones, the separator is space, and have 10 columns
- in the others the separator is comma, and have 11 columns
Is there a way to "detect" csv files format before loading it?
You need a load from the first row of your files, check them and then create a variable load-statement. My example looks quite complicated and includes some more functionalities but I think you could to adapt your part (in bold):
MapCharset:
Mapping Load Zeichenfolge, Kodierung From [..\Control\Other\ZeichensatzMapping.xls] (biff, embedded labels, table is Zeichensatz$);
Delimiter:
Load DelimiterName, Delimiter as DelimiterChar From [..\Control\Other\ZeichensatzMapping.xls] (biff, embedded labels, table is Delimiter$);
CheckBOM:
First 1
LOAD
filename() as FileName, fileextension() as FileExtension, filepath() as FilePath,
applymap('MapCharset', @1:3, 'ansi') as Zeichensatz,
if(match(fileextension(), 'xls', 'xlsx'), 'None',
if(substringcount(@1:n, chr(9)) >=1, 'Tab',
if(substringcount(@1:n, chr(59)) >= 1, 'Semikolon',
if(substringcount(@1:n, chr(44)) >= 1, 'Komma',
if(substringcount(@1:n, chr(32)) >= 1, 'Leerzeichen', 'None'))))) as Delimiter
FROM [..\..\Data\RawData\14 Data\Data_201????.????] (fix, codepage is 1252);
for i = 0 to noofrows('CheckBOM') - 1
let vFilePath = peek('FilePath', $(i), 'CheckBOM');
let vDelimiter = peek('Delimiter', $(i), 'CheckBOM');
let vDelimiter = fieldvalue('DelimiterChar', fieldindex('DelimiterName', '$(vDelimiter)'));
let vZeichensatz = peek('Zeichensatz', $(i), 'CheckBOM');
let vFileTyp = if(match($(vZeichensatz), 'ansi', 'UTF8'), 'txt , ', '');
let vTable = ''; // ', table is XYZ$'
let vFileFormat = '(' & '$(vFileType)' & '$(vZeichensatz)' & ', embedded labels, delimiter is ' & chr(39) & '$(vDelimiter)' & chr(39) & '$(vTable)' & ')';
let vConcatenate = if($(i) = 0, '', 'concatenate(t1)');
t1:
$(vConcatenate)
first 5
Load * From $(vFilePath) $(vFileFormat);
next
drop tables CheckBOM, Delimiter;
- Marcus
Dear,
Can you attach the sample file.
so that we can help you in better way.
Thanks,
Mukram.
Hi,
You can use alt function to the same.
If possible can you post sample data.
Regards
ASHFAQ
Here is the loop for loading files with comma, but not working for the other format:
FOR index = DateLogStart TO DateLogEnd
Let currentDate = Date(index, 'YYYY.MM.DD');
TRACE $(currentDate);
For each vFile in FileList('$(vData)\apache_file_log_name*_$(currentDate).log')
Concatenate(Logs)
LOAD
ApplyMap('Serveurs', [@1], null()) as serveur,
[@1] as ipserveur,
date(floor(num([@2])), 'DD/MM/YYYY') as datelog,
time(num([@2]), 'hh:mm:ss') as timelog,
Hour(Time([@2]))&'-'&Floor(Minute(Time(@2)),5) as tranche5,
Hour(Time([@2]))&'-'&Floor(Minute(Time(@2)),30) as tranche30,
/* OTHERS COLUMNS */
[@9] as codeclient,
[@10] as ipuser
FROM
[$(vFile)]
(txt, codepage is 1252, no labels, delimiter is ';', msq)
where date(floor(num([@2]))) >= Date($(DateLogStart));
Next vFile
NEXT index
Hi,
try below
alt(date#(DateString, 'YYYY MM DD'), date#(DateString, 'DD,MM,YYYY'), 'No Date Found') AS Date
replace proper date format
Or if you can post sample date it would be great.
Regards
ASHFAQ.
Sorry but I don't understand the answer...
I am not asking about the load of the file (that is working fine), I am asking about how to detect differents CSV file formats for do a conditional loading.
I'm trying to do this:
IF file_format = 1 THEN
// Loading CSV file with comma separator / 11 columns
ELSE IF file_forlat = 2 THEN
// Loading CSV file with space separator / 10 columns
END IF
You need a load from the first row of your files, check them and then create a variable load-statement. My example looks quite complicated and includes some more functionalities but I think you could to adapt your part (in bold):
MapCharset:
Mapping Load Zeichenfolge, Kodierung From [..\Control\Other\ZeichensatzMapping.xls] (biff, embedded labels, table is Zeichensatz$);
Delimiter:
Load DelimiterName, Delimiter as DelimiterChar From [..\Control\Other\ZeichensatzMapping.xls] (biff, embedded labels, table is Delimiter$);
CheckBOM:
First 1
LOAD
filename() as FileName, fileextension() as FileExtension, filepath() as FilePath,
applymap('MapCharset', @1:3, 'ansi') as Zeichensatz,
if(match(fileextension(), 'xls', 'xlsx'), 'None',
if(substringcount(@1:n, chr(9)) >=1, 'Tab',
if(substringcount(@1:n, chr(59)) >= 1, 'Semikolon',
if(substringcount(@1:n, chr(44)) >= 1, 'Komma',
if(substringcount(@1:n, chr(32)) >= 1, 'Leerzeichen', 'None'))))) as Delimiter
FROM [..\..\Data\RawData\14 Data\Data_201????.????] (fix, codepage is 1252);
for i = 0 to noofrows('CheckBOM') - 1
let vFilePath = peek('FilePath', $(i), 'CheckBOM');
let vDelimiter = peek('Delimiter', $(i), 'CheckBOM');
let vDelimiter = fieldvalue('DelimiterChar', fieldindex('DelimiterName', '$(vDelimiter)'));
let vZeichensatz = peek('Zeichensatz', $(i), 'CheckBOM');
let vFileTyp = if(match($(vZeichensatz), 'ansi', 'UTF8'), 'txt , ', '');
let vTable = ''; // ', table is XYZ$'
let vFileFormat = '(' & '$(vFileType)' & '$(vZeichensatz)' & ', embedded labels, delimiter is ' & chr(39) & '$(vDelimiter)' & chr(39) & '$(vTable)' & ')';
let vConcatenate = if($(i) = 0, '', 'concatenate(t1)');
t1:
$(vConcatenate)
first 5
Load * From $(vFilePath) $(vFileFormat);
next
drop tables CheckBOM, Delimiter;
- Marcus
Hi, If you have fixed no files with some fixed filename, then make small table and write the for loop to write conditional script, or follow the Marcus approach.
Exactly what I wanted!
Thanks a lot!