<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Sorting before tuniq... in Talend Studio</title>
    <link>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217699#M13182</link>
    <description>&lt;P&gt;&lt;A href="https://community.qlik.com/s/profile/0053p000007LOmyAAG"&gt;@DaveG2008&lt;/A&gt;,since your source is DB right,i will suggest you to do in the DB level to remove duplicates.&lt;/P&gt; 
&lt;P&gt;&amp;nbsp;&lt;/P&gt; 
&lt;P&gt;if you feel your DB server will not able to handle then go with&amp;nbsp;&lt;SPAN&gt;tUniqRow.&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 26 Sep 2018 08:59:41 GMT</pubDate>
    <dc:creator>manodwhb</dc:creator>
    <dc:date>2018-09-26T08:59:41Z</dc:date>
    <item>
      <title>Sorting before tuniq...</title>
      <link>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217697#M13180</link>
      <description>&lt;P&gt;Hi&lt;/P&gt;&lt;P&gt;I have a MySQL table of 60m rows and need to dedupe the table keying on all 6 columns.&amp;nbsp; Do I need to sort using tSortRow first then follow with a tUniqRow or can I go straight into a tUniqRow and let the component deal with it.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any advice on whether this is the right approach or if there's a better way would be great!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 16 Nov 2024 07:35:12 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217697#M13180</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2024-11-16T07:35:12Z</dc:date>
    </item>
    <item>
      <title>Re: Sorting before tuniq...</title>
      <link>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217698#M13181</link>
      <description>&lt;P&gt;Why not dedupe and sort in your database? That is what a database is good at. If you have 60m rows where only a third are duped, that is 20m rows that you unnecessarily send to Talend for them to be thrown away. While Talend is a great ETL tool, it uses Java. Java is good at many things, but it isn't as quick as a database at sorting and filtering.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'd recommend sorting and filtering your data in your database by writing a query to do that in your DB component. This way only the necessary data will enter your job and in the correct order. After that your job will have a lot less work to do.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 25 Sep 2018 21:35:43 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217698#M13181</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2018-09-25T21:35:43Z</dc:date>
    </item>
    <item>
      <title>Re: Sorting before tuniq...</title>
      <link>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217699#M13182</link>
      <description>&lt;P&gt;&lt;A href="https://community.qlik.com/s/profile/0053p000007LOmyAAG"&gt;@DaveG2008&lt;/A&gt;,since your source is DB right,i will suggest you to do in the DB level to remove duplicates.&lt;/P&gt; 
&lt;P&gt;&amp;nbsp;&lt;/P&gt; 
&lt;P&gt;if you feel your DB server will not able to handle then go with&amp;nbsp;&lt;SPAN&gt;tUniqRow.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 26 Sep 2018 08:59:41 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/Sorting-before-tuniq/m-p/2217699#M13182</guid>
      <dc:creator>manodwhb</dc:creator>
      <dc:date>2018-09-26T08:59:41Z</dc:date>
    </item>
  </channel>
</rss>

