<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Web Scraping in Talend Studio</title>
    <link>https://community.qlik.com/t5/Talend-Studio/Web-Scraping/m-p/2267183#M46163</link>
    <description>Hello&amp;nbsp; 
&lt;BR /&gt;Take a look at 
&lt;A href="https://help.talend.com/pages/viewpage.action?pageId=37851099" target="_blank" rel="nofollow noopener noreferrer"&gt;tHttpRequest &lt;/A&gt;component, this component can be used to send a http request to the serve and get the page content from the URL, and then use regular expression or tExtractXMLFields component to extract all links from the response, finally, iterate link one by one. For example: 
&lt;BR /&gt;tHttpRequest--main--tExtractXMLField-main-tFlowToIterate--iterate--tHttpRequest--main--tLogRow 
&lt;BR /&gt;Best regards 
&lt;BR /&gt;Shong</description>
    <pubDate>Fri, 08 May 2015 09:48:07 GMT</pubDate>
    <dc:creator>Anonymous</dc:creator>
    <dc:date>2015-05-08T09:48:07Z</dc:date>
    <item>
      <title>Web Scraping</title>
      <link>https://community.qlik.com/t5/Talend-Studio/Web-Scraping/m-p/2267182#M46162</link>
      <description>Hi everyone,
&lt;BR /&gt;I have the URL of a web page. In this, there are some links. For each link, I have to scrape all its content.
&lt;BR /&gt;I want to make it with TOS. It's the first time that I make something like that.
&lt;BR /&gt;Have I need to use a script, for example in Python, to combine with a talend job? Or can I do everything through specific talend components (so without scripts)? Which components have I to use?
&lt;BR /&gt;Thanks all</description>
      <pubDate>Sat, 16 Nov 2024 11:13:59 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/Web-Scraping/m-p/2267182#M46162</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2024-11-16T11:13:59Z</dc:date>
    </item>
    <item>
      <title>Re: Web Scraping</title>
      <link>https://community.qlik.com/t5/Talend-Studio/Web-Scraping/m-p/2267183#M46163</link>
      <description>Hello&amp;nbsp; 
&lt;BR /&gt;Take a look at 
&lt;A href="https://help.talend.com/pages/viewpage.action?pageId=37851099" target="_blank" rel="nofollow noopener noreferrer"&gt;tHttpRequest &lt;/A&gt;component, this component can be used to send a http request to the serve and get the page content from the URL, and then use regular expression or tExtractXMLFields component to extract all links from the response, finally, iterate link one by one. For example: 
&lt;BR /&gt;tHttpRequest--main--tExtractXMLField-main-tFlowToIterate--iterate--tHttpRequest--main--tLogRow 
&lt;BR /&gt;Best regards 
&lt;BR /&gt;Shong</description>
      <pubDate>Fri, 08 May 2015 09:48:07 GMT</pubDate>
      <guid>https://community.qlik.com/t5/Talend-Studio/Web-Scraping/m-p/2267183#M46163</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2015-05-08T09:48:07Z</dc:date>
    </item>
  </channel>
</rss>

