1 Reply Latest reply: Oct 23, 2017 3:13 PM by Cassandra Baqir RSS

    Max length exceeds 4001 characters

    Cassandra Baqir

      I am running a VB module to clean up some HTML formatting on a field called DESCRIPTION and then using TEXTBETWEEN to separate DESCRIPTION into 5 separate fields but I am finding that the length of my field exceeds 4001 characters and is truncating so the last couple of fields doesn't populate correctly. It looks like the max length that has been loaded is 4001 characters. Any suggestions on how to fix this? Maybe something like loading the first 4000 chars as one field and the next 4000 chars after that as another?

       

      Maybe something along these lines:

        left(DESCRIPTION, 4000) as DESCRIPTIONA,
      right(DESCRIPTION, max(len(DESCRIPTION)-4000)) as DESCRIPTIONB,
      RemoveHTMLTags2(DESCRIPTIONA) as DESCRIPTION_CLEANEDA,
      RemoveHTMLTags2(DESCRIPTIONB) as DESCRIPTION_CLEANEDB,
      TextBetween( RemoveHTMLTags2(DESCRIPTIONA) , 'Current State Summary:' , 'Problem Statement:' )  as CUR_STATE_SUMMARYA,
      TextBetween( RemoveHTMLTags2(DESCRIPTIONA) , 'Problem Statement:' , 'Proposed Solution:' )  as PROBLEM_STATEMENTA,
      TextBetween( RemoveHTMLTags2(DESCRIPTIONA) , 'Proposed Solution:' , 'Voting Statement:' )  as PROP_SOLUTIONA,
      TextBetween( RemoveHTMLTags2(DESCRIPTIONB) , 'Voting Statement:' , 'Analytics:' )  as VOTING_STATEMENTB,
      TextBetween( RemoveHTMLTags2(DESCRIPTIONB) , 'Analytics:' , '' )  as ANALYTICSB,

       

      Source data:

       

      QV Screenshot:

       

      Load Script excerpt:

      RemoveHTMLTags2(DESCRIPTION) as DESCRIPTION_CLEANED,
      TextBetween( RemoveHTMLTags2(DESCRIPTION) , 'Current State Summary:' , 'Problem Statement:' )  as CUR_STATE_SUMMARY,
      TextBetween( RemoveHTMLTags2(DESCRIPTION) , 'Problem Statement:' , 'Proposed Solution:' )  as PROBLEM_STATEMENT,
      TextBetween( RemoveHTMLTags2(DESCRIPTION) , 'Proposed Solution:' , 'Voting Statement:' )  as PROP_SOLUTION,
      TextBetween( RemoveHTMLTags2(DESCRIPTION) , 'Voting Statement:' , 'Analytics:' )  as VOTING_STATEMENT,
      TextBetween( RemoveHTMLTags2(DESCRIPTION) , 'Analytics:' , '' )  as ANALYTICS,

       

      Module:

       

      Function RemoveHTMLTags2(strText)
      '*********************************************************************************
      '*********************** Function RemoveHTMLTags2 ********************************
      '*********************************************************************************
      ' Will remove HTML tag from a string and return a non-html tag string
      ' Also removes   html syntax

      Dim TAGLIST
      TAGLIST = TAGLIST & ";!--;!DOCTYPE;A;ACRONYM;ADDRESS;APPLET;AREA;B;BASE;BASEFONT;"
      TAGLIST = TAGLIST & "BGSOUND;BIG;BLOCKQUOTE;BODY;BUTTON;CAPTION;CENTER;CITE;CODE;"
      TAGLIST = TAGLIST & "COL;COLGROUP;COMMENT;DD;DEL;DFN;DIR;DIV;DL;DT;EM;EMBED;FIELDSET;"
      TAGLIST = TAGLIST & "FONT;FORM;FRAME;FRAMESET;HEAD;H1;H2;H3;H4;H5;H6;HR;HTML;I;IFRAME;IMG;"
      TAGLIST = TAGLIST & "INPUT;INS;ISINDEX;KBD;LABEL;LAYER;LAGEND;LI;LINK;LISTING;MAP;MARQUEE;"
      TAGLIST = TAGLIST & "MENU;META;NOBR;NOFRAMES;NOSCRIPT;OBJECT;OL;OPTION;P;PARAM;PLAINTEXT;"
      TAGLIST = TAGLIST & "PRE;Q;S;SAMP;SCRIPT;SELECT;SMALL;SPAN;STRIKE;STRONG;STYLE;SUB;SUP;"
      TAGLIST = TAGLIST & "TABLE;TBODY;TD;TEXTAREA;TFOOT;TH;THEAD;TITLE;TR;TT;U;UL;VAR;WBR;XMP;"

      ' TAGLIST = ";!--;!DOCTYPE;A;ACRONYM;ADDRESS;APPLET;AREA;B;BASE;BASEFONT;" &_
      ' "BGSOUND;BIG;BLOCKQUOTE;BODY;BUTTON;CAPTION;CENTER;CITE;CODE;" &_
      ' "COL;COLGROUP;COMMENT;DD;DEL;DFN;DIR;DIV;DL;DT;EM;EMBED;FIELDSET;" &_
      ' "FONT;FORM;FRAME;FRAMESET;HEAD;H1;H2;H3;H4;H5;H6;HR;HTML;I;IFRAME;IMG;" &_
      ' "INPUT;INS;ISINDEX;KBD;LABEL;LAYER;LAGEND;LI;LINK;LISTING;MAP;MARQUEE;" &_
      ' "MENU;META;NOBR;NOFRAMES;NOSCRIPT;OBJECT;OL;OPTION;P;PARAM;PLAINTEXT;" &_
      ' "PRE;Q;S;SAMP;SCRIPT;SELECT;SMALL;SPAN;STRIKE;STRONG;STYLE;SUB;SUP;" &_
      ' "TABLE;TBODY;TD;TEXTAREA;TFOOT;TH;THEAD;TITLE;TR;TT;U;UL;VAR;WBR;XMP;"

      Const BLOCKTAGLIST = ";APPLET;EMBED;FRAMESET;HEAD;NOFRAMES;NOSCRIPT;OBJECT;SCRIPT;STYLE;"

      Dim nPos1
      Dim nPos2
      Dim nPos3
      Dim strResult
      Dim strTagName
      Dim bRemove
      Dim bSearchForBlock

      strText = Replace(strText, "</li>", "</li><br>") 'dgreer added 2012-04-25 to add breaks for lists

      nPos1 = InStr(strText, "<")
      Do While nPos1 > 0
      nPos2 = InStr(nPos1 + 1, strText, ">")
      If nPos2 > 0 Then
      strTagName = Mid(strText, nPos1 + 1, nPos2 - nPos1 - 1)
      strTagName = Replace(Replace(strTagName, vbCr, " "), vbLf, " ")

      nPos3 = InStr(strTagName, " ")
      If nPos3 > 0 Then
      strTagName = Left(strTagName, nPos3 - 1)
      End If

      If Left(strTagName, 1) = "/" Then
      strTagName = Mid(strTagName, 2)
      bSearchForBlock = False
      Else
      bSearchForBlock = True
      End If

      If InStr(1, TAGLIST, ";" & strTagName & ";", vbTextCompare) > 0 Then
      bRemove = True
      If bSearchForBlock Then
      If InStr(1, BLOCKTAGLIST, ";" & strTagName & ";", vbTextCompare) > 0 Then
      nPos2 = Len(strText)
      nPos3 = InStr(nPos1 + 1, strText, "</" & strTagName, vbTextCompare)
      If nPos3 > 0 Then
      nPos3 = InStr(nPos3 + 1, strText, ">")
      End If

      If nPos3 > 0 Then
      nPos2 = nPos3
      End If
      End If
      End If
      Else
      bRemove = False
      End If

      If bRemove Then
      strResult = strResult & Left(strText, nPos1 - 1)
      strText = Mid(strText, nPos2 + 1)
      Else
      strResult = strResult & Left(strText, nPos1)
      strText = Mid(strText, nPos1 + 1)
      End If
      Else
      strResult = strResult & strText
      strText = ""
      End If

      nPos1 = InStr(strText, "<")
      Loop
      strResult = strResult & strText

      ' Check for   tags to address a specific issue with the memo field comparison
      ' replace   with a space
      strResult = Replace(strResult, chr(10), "") 'dgreer 2012-04-25 remove hex 0A causing issues in excel
      strResult = Replace(strResult, chr(13), "") 'dgreer 2012-04-25 remove hex 0D causing issues in excel

      strResult = Replace(strResult, "<br>", vbLf)
      strResult = Replace(strResult, "<br />", vbLf) ' mherbert 2014-09-18 Added this additional line as per HP QC CR#231
      strResult = Replace(strResult, " ", " ")
      strResult = Replace(strResult, "&gt;", ">")
      strResult = Replace(strResult, "&lt;", "<")
      strResult = Replace(strResult, "&quot;", Chr(34))
      RemoveHTMLTags2 = strResult
      End Function