<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic merge multiple parquet files to single or multiple files in Talend Studio</title>
    <link>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345128#M112703</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I have multiple 1000 parquet files say of 1MB each. Want to merge them in to single or multiple files.&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Say 200 files in file1.parquet,&lt;/LI&gt;&lt;LI&gt;next 200 files in file2.parquet&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;so on. I was looking a component, however haven't found it.&lt;/P&gt;&lt;P&gt;Is there a way by which we can do this ? there custom java library or python scripts are available, however was looking for Talend component&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;cdhemant&lt;/P&gt;</description>
    <pubDate>Fri, 15 Nov 2024 21:47:29 GMT</pubDate>
    <dc:creator>cdhemant</dc:creator>
    <dc:date>2024-11-15T21:47:29Z</dc:date>
    <item>
      <title>merge multiple parquet files to single or multiple files</title>
      <link>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345128#M112703</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I have multiple 1000 parquet files say of 1MB each. Want to merge them in to single or multiple files.&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Say 200 files in file1.parquet,&lt;/LI&gt;&lt;LI&gt;next 200 files in file2.parquet&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;so on. I was looking a component, however haven't found it.&lt;/P&gt;&lt;P&gt;Is there a way by which we can do this ? there custom java library or python scripts are available, however was looking for Talend component&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;cdhemant&lt;/P&gt;</description>
      <pubDate>Fri, 15 Nov 2024 21:47:29 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345128#M112703</guid>
      <dc:creator>cdhemant</dc:creator>
      <dc:date>2024-11-15T21:47:29Z</dc:date>
    </item>
    <item>
      <title>Re: merge multiple parquet files to single or multiple files</title>
      <link>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345129#M112704</link>
      <description>&lt;P&gt;Hi &lt;/P&gt;&lt;P&gt;Using a tFileList to iterate each file, here is a demo job, see below.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="0695b00000hsUjpAAE.png"&gt;&lt;img src="https://community.qlik.com/t5/image/serverpage/image-id/140678iF0993F44CF8DC8B4/image-size/large?v=v2&amp;amp;px=999" role="button" title="0695b00000hsUjpAAE.png" alt="0695b00000hsUjpAAE.png" /&gt;&lt;/span&gt;tJava: define a dynamic output file name.&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;P&gt;&lt;I&gt;int i=((Integer)globalMap.get("tFileList_1_NB_FILE"));&lt;/I&gt;&lt;/P&gt;&lt;P&gt;&lt;I&gt;context.filename="out"+i/10+".parquet";&lt;/I&gt;&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;P&gt;tFileInputParquet_1: read the current parquet file, set the file path as:&lt;/P&gt;&lt;P&gt;((String)globalMap.get("tFileList_1_CURRENT_FILEPATH"))&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;P&gt;tDBOutput_1: store the data into DB table.&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;P&gt;Set the condition of runIF connector as:&lt;/P&gt;&lt;P&gt;((Integer)globalMap.get("tFileList_1_NB_FILE"))%200==0&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;P&gt;//whenever 200 files are read, read all data from DB table and write them to a new parquet file.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;on FileOutputParquet_1: set the file path as:&lt;/P&gt;&lt;P&gt;"D:/files/temp/output/"+context.filename&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;tDBRow: truncate the table.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Can ​you try and let me know if you have any questions?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;Shong&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;</description>
      <pubDate>Fri, 19 May 2023 11:09:59 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345129#M112704</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-05-19T11:09:59Z</dc:date>
    </item>
    <item>
      <title>Re: merge multiple parquet files to single or multiple files</title>
      <link>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345130#M112705</link>
      <description>&lt;P&gt;Thanks Shong.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Definitely this a solution for the problem, however it adds up a new infra component Database which have cost and maintenance involved.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am trying to have python script which will try to create single file.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 22 May 2023 08:02:22 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345130#M112705</guid>
      <dc:creator>cdhemant</dc:creator>
      <dc:date>2023-05-22T08:02:22Z</dc:date>
    </item>
    <item>
      <title>Re: merge multiple parquet files to single or multiple files</title>
      <link>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345131#M112706</link>
      <description>&lt;P&gt;Yes, you can also store the data into local file instead of DB, but you has lot of files to process and the files are big, I'm afraid the performance is poor.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 24 May 2023 04:30:29 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/merge-multiple-parquet-files-to-single-or-multiple-files/m-p/2345131#M112706</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-05-24T04:30:29Z</dc:date>
    </item>
  </channel>
</rss>

