<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Data from PDF in QlikView</title>
    <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167924#M39310</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thanks a lot Peter, it worked good, &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; i got a very good idea about how to load data from .txt as well.&lt;/P&gt;&lt;P&gt;i tried for the bigger file, its taking some time, so i am splitting the .txt file in to many and tried your method. Great.&lt;/P&gt;&lt;P&gt;God bless you..&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Sun, 07 Nov 2010 18:27:08 GMT</pubDate>
    <dc:creator>renjithpl</dc:creator>
    <dc:date>2010-11-07T18:27:08Z</dc:date>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167916#M39302</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I have very very urget requirement,&lt;/P&gt;&lt;P&gt;I have lot of pdf files (ocr converted) which is in table format.&lt;/P&gt;&lt;P&gt;I just want to read raw data from those pdf and load it in qlikview.&lt;/P&gt;&lt;P&gt;Is there any any way of doing it.&lt;/P&gt;&lt;P&gt;I tried copy pasting in excel, but some of alphabets (eg. S may change in to 8 and L may change into 1)&lt;/P&gt;&lt;P&gt;so what would you guys suggest, i am very eager to know about this...&lt;/P&gt;&lt;P&gt;Please help me.&lt;/P&gt;&lt;P&gt;Ren&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 06:34:19 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167916#M39302</guid>
      <dc:creator>renjithpl</dc:creator>
      <dc:date>2010-11-02T06:34:19Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167917#M39303</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;With PDF you still have a bunch of scanned files. Configure the OCR software to write to text files instead&lt;/P&gt;&lt;P&gt;-Alex&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 08:13:27 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167917#M39303</guid>
      <dc:creator />
      <dc:date>2010-11-02T08:13:27Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167918#M39304</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;You may also use - at least with Adobe Reader - the possibility to export to Text, provided the initial document is not scanned, but created by using an PDF-printer.&lt;/P&gt;&lt;P&gt;HTH&lt;BR /&gt;Peter&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 14:58:32 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167918#M39304</guid>
      <dc:creator>prieper</dc:creator>
      <dc:date>2010-11-02T14:58:32Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167919#M39305</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi Peter,&lt;/P&gt;&lt;P&gt;This is not an answer, but a question again,&lt;/P&gt;&lt;P&gt;I have saved my pdf file as .txt format,&lt;/P&gt;&lt;P&gt;see the attachement,&lt;/P&gt;&lt;P&gt;There is a "Serial Number" with 49 entries, then comes the "Dollar value", then "Date ISS" and then Reference Number&lt;/P&gt;&lt;P&gt;Please guide me how to load data from .txt, since this is my first time where i will be loading data from .txt file.&lt;/P&gt;&lt;P&gt;the .txt file has lot of pages, where ever the serialno, dollar value, date iss and referenceno in the file, it should read from one .txt file.&lt;/P&gt;&lt;P&gt;is it possible, or you can give any kind of suggestion.&lt;/P&gt;&lt;P&gt;My out put should look like.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;TABLE border="0" cellpadding="0" cellspacing="0" style="width:287pt;border-collapse:collapse;" width="383"&gt;&lt;COLGROUP&gt;&lt;COL style="width:80pt;mso-width-source:userset;mso-width-alt:3913;" width="107" /&gt;&lt;COL style="width:79pt;mso-width-source:userset;mso-width-alt:3840;" width="105" /&gt;&lt;COL style="width:51pt;mso-width-source:userset;mso-width-alt:2486;" width="68" /&gt;&lt;COL style="width:77pt;mso-width-source:userset;mso-width-alt:3766;" width="103" /&gt;&lt;/COLGROUP&gt;&lt;TBODY&gt;&lt;TR style="height:15pt;"&gt;&lt;TD class="xl64" height="20" style="width:80pt;height:15pt;background-color:transparent;border:#ece9d8;" width="107"&gt;&lt;STRONG&gt;SerialNO&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD class="xl64" style="width:79pt;background-color:transparent;border:#ece9d8;" width="105"&gt;&lt;STRONG&gt;DollarValue&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD class="xl64" style="width:51pt;background-color:transparent;border:#ece9d8;" width="68"&gt;&lt;STRONG&gt;DateISS&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD class="xl64" style="width:77pt;background-color:transparent;border:#ece9d8;" width="103"&gt;&lt;STRONG&gt;ReferenceNO&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR style="height:15pt;"&gt;&lt;TD align="right" height="20" style="height:15pt;background-color:transparent;border:#ece9d8;"&gt;6381122&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;85.00S&lt;/TD&gt;&lt;TD align="right" class="xl63" style="background-color:transparent;border:#ece9d8;"&gt;1/17/2002&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;S2000849&lt;/TD&gt;&lt;/TR&gt;&lt;TR style="height:15pt;"&gt;&lt;TD align="right" height="20" style="height:15pt;background-color:transparent;border:#ece9d8;"&gt;6381207&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;15,465.00S&lt;/TD&gt;&lt;TD align="right" class="xl63" style="background-color:transparent;border:#ece9d8;"&gt;1/17/2002&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;S2000934&lt;/TD&gt;&lt;/TR&gt;&lt;TR style="height:15pt;"&gt;&lt;TD align="right" height="20" style="height:15pt;background-color:transparent;border:#ece9d8;"&gt;6381212&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;720.00S&lt;/TD&gt;&lt;TD align="right" class="xl63" style="background-color:transparent;border:#ece9d8;"&gt;1/17/2002&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;S2000939&lt;/TD&gt;&lt;/TR&gt;&lt;TR style="height:15pt;"&gt;&lt;TD align="right" height="20" style="height:15pt;background-color:transparent;border:#ece9d8;"&gt;6381300&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;599.99S&lt;/TD&gt;&lt;TD align="right" class="xl63" style="background-color:transparent;border:#ece9d8;"&gt;1/18/2002&lt;/TD&gt;&lt;TD style="background-color:transparent;border:#ece9d8;"&gt;A2003262&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;BR /&gt;&lt;BR /&gt; &lt;P&gt;THanks in advance,&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 15:25:36 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167919#M39305</guid>
      <dc:creator>renjithpl</dc:creator>
      <dc:date>2010-11-02T15:25:36Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167920#M39306</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;hi peter,&lt;/P&gt;&lt;P&gt;in my post i have attached the .txt file... Please have a look at it, and reply if you can figure out some kind of solution.&lt;/P&gt;&lt;P&gt;thanks&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 15:28:01 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167920#M39306</guid>
      <dc:creator>renjithpl</dc:creator>
      <dc:date>2010-11-02T15:28:01Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167921#M39307</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Is the PDF-file really having the columns shown one below the other?&lt;BR /&gt;It seems to me very much to be scanned text run through an OCX-program.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;If it is always the serial with x entries, then followed by Dollar with exactly x entries etc, it should be workable, but looking into the text-file in the second section starting in line 240 there are only 17 entries for Dollar, then some references and then probably Dollars again.&lt;BR /&gt;This seems to be lots of garbage and might be cleansed within the program creating the PDF.&lt;/P&gt;&lt;P&gt;++&lt;BR /&gt;Peter&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 15:58:25 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167921#M39307</guid>
      <dc:creator>prieper</dc:creator>
      <dc:date>2010-11-02T15:58:25Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167922#M39308</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi Peter&lt;/P&gt;&lt;P&gt;Thanks for your reply, i know the data is really a mess. Ok lets take it this way. Take first two Pages where the data is almost clear with 49 entries, then how can i load data from those two pages. you can delete all other Pages and keep first two Pages so that it can be pretty clear.&lt;/P&gt;&lt;P&gt;You can even take one page too and work out.&lt;/P&gt;&lt;P&gt;The problem is i dont know how to load .txt file if such type of .txt file is given.&lt;/P&gt;&lt;P&gt;Thanks a lot Peter. &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Nov 2010 16:05:11 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167922#M39308</guid>
      <dc:creator>renjithpl</dc:creator>
      <dc:date>2010-11-02T16:05:11Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167923#M39309</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Good day,&lt;/P&gt;&lt;P&gt;as kind of starting-idea you may use the below script, which reads only the first pages.&lt;BR /&gt;Major problem are the garbled base-data, meaning that whenever your OCR thinks that there needs a line to be added (like row 406). You may bring in a further validation to trap this error, but you will run into serious problem, whenever the OCR breaks one record into two lines.....&lt;BR /&gt;Also might make sense to transform the Dollar Amount into correct numbers (eliminating "S" at the end, which might have been originally "$", also eliminating the "," as thousand-separator etc).&lt;BR /&gt;You may also need to grap some header-information from the text, like bank account or the like, or to break up the date from the Reference etc.etc.&lt;/P&gt;&lt;P&gt;Script might be:&lt;/P&gt;&lt;BLOCKQUOTE style="overflow-x: scroll;"&gt;&lt;PRE style="margin: 0px;"&gt;// ==== Load Line by Line and identify the headers ========================&lt;BR /&gt; RawData:&lt;BR /&gt; LOAD&lt;BR /&gt; *,&lt;BR /&gt; RowNo() AS LineNo,&lt;BR /&gt; IF(WILDMATCH(Line, '*SERIAL*', '*DOLLAR*', '*REFERENCE*'),&lt;BR /&gt; 'HEADER', 'DATA') AS DataIdentifyer;&lt;BR /&gt; LOAD&lt;BR /&gt; @1:n AS Line&lt;BR /&gt; FROM&lt;BR /&gt; [.\test PDF2TXT.txt]&lt;BR /&gt; (fix, codepage is 1252);&lt;BR /&gt;// ==== Indentify headers and starting-lines of each section ==============&lt;BR /&gt; StartingPoints:&lt;BR /&gt; LOAD&lt;BR /&gt; LineNo +2 AS StartRow,&lt;BR /&gt; Line AS Header&lt;BR /&gt; RESIDENT&lt;BR /&gt; RawData&lt;BR /&gt; WHERE&lt;BR /&gt; DataIdentifyer = 'HEADER'&lt;BR /&gt; ORDER BY&lt;BR /&gt; LineNo DESC;&lt;BR /&gt;&lt;BR /&gt;// ==== From each Header downwards load 49 records ========================&lt;BR /&gt; FOR i = 0 TO NOOFROWS('StartingPoints') -1&lt;BR /&gt; LET sHeader = '[' &amp;amp; PEEK('Header', i, 'StartingPoints') &amp;amp; ']'; // Title of field&lt;BR /&gt; LET iDataStart = PEEK('StartRow', i, 'StartingPoints'); // Starting Row to read&lt;BR /&gt; LET iDataEnd = iDataStart + 49;&lt;BR /&gt; IF i = 0 THEN // Load Parameter&lt;BR /&gt; LET sLoadString = 'LOAD ';&lt;BR /&gt; ELSE&lt;BR /&gt; LET sLoadString = 'JOIN (Data) LOAD ';&lt;BR /&gt; END IF&lt;BR /&gt;&lt;BR /&gt; Data:&lt;BR /&gt; $(sLoadString)&lt;BR /&gt; RowNo() AS RowNo_PartTable, // Join-Parameter&lt;BR /&gt; Line AS $(sHeader)&lt;BR /&gt; RESIDENT&lt;BR /&gt; RawData&lt;BR /&gt; WHERE&lt;BR /&gt; LineNo &amp;gt; $(iDataStart)&lt;BR /&gt; AND&lt;BR /&gt; LineNo &amp;lt; $(iDataEnd);&lt;BR /&gt; NEXT i&lt;BR /&gt;&lt;BR /&gt;DROP TABLE RawData;&lt;BR /&gt;DROP TABLE StartingPoints;&lt;/PRE&gt;&lt;/BLOCKQUOTE&gt;&lt;BR /&gt;&lt;BR /&gt; &lt;P&gt;HTH&lt;BR /&gt;Peter&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 05 Nov 2010 08:36:39 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167923#M39309</guid>
      <dc:creator>prieper</dc:creator>
      <dc:date>2010-11-05T08:36:39Z</dc:date>
    </item>
    <item>
      <title>Data from PDF</title>
      <link>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167924#M39310</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thanks a lot Peter, it worked good, &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; i got a very good idea about how to load data from .txt as well.&lt;/P&gt;&lt;P&gt;i tried for the bigger file, its taking some time, so i am splitting the .txt file in to many and tried your method. Great.&lt;/P&gt;&lt;P&gt;God bless you..&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 07 Nov 2010 18:27:08 GMT</pubDate>
      <guid>https://community.qlik.com/t5/QlikView/Data-from-PDF/m-p/167924#M39310</guid>
      <dc:creator>renjithpl</dc:creator>
      <dc:date>2010-11-07T18:27:08Z</dc:date>
    </item>
  </channel>
</rss>

