Excel: Remove HTML Tags and Escape Codes from an Excel selection

The code below 1 can be used to remove HTML tags and escape codes from an Excel selection.  Simply page the code into a new module, select the relevant cells to be processed, and press run.

I would suggest that you copy the original cells to another sheet and then try it on the copy first, just to make sure it works the way you want.

I have found a list of the Escape Codes, at http://www.bryanlprice.com/specials.html, but there are others.  I haven’t included all of them in the code below, as they were not relevant for my purposes.

I have also found a list of HTML character codes, at http://www.btinternet.com/~andrew.murphy/html_character_set.html.

Option Explicit
Private Sub RemoveHTMLTagsAndEscapeCodes()

    On Error Resume Next
    Dim iLT As Long
    Dim iGT As Long
    Dim sTag As String
    Dim sHTML As String
   
    Dim Cell As Range
    'For each cell within the active selection
    For Each Cell In Selection
        sHTML = Cell.Value
     
        'Find and remove all HTML tags
        iLT = InStr(sHTML, "<")
        iGT = InStr(sHTML, ">")
       
        Do While iLT > 0 And iGT > 0 And iGT > iLT
            'Extract the tag
            sTag = Mid$(sHTML, iLT, iGT - iLT + 1)
           
            'Remove the tag from the HTML string
            sHTML = Replace(sHTML, sTag, "")

            'Find the next HTML tag
            iLT = InStr(sHTML, "<")
            iGT = InStr(sHTML, ">")
        Loop

        'Remove carriage returns and line feeds from the beginning
        Do While Left(sHTML, 1) = vbCr Or Left(sHTML, 1) = vbLf
            sHTML = Mid$(sHTML, 2)
        Loop
        'And, from the end
        Do While Right(sHTML, 1) = vbCr Or Right(sHTML, 1) = vbLf
            sHTML = Left$(sHTML, Len(sHTML) - 1)
        Loop
     
        'Replace HTML escape codes with hyphens
        sHTML = Replace(sHTML, "–", "-")
        sHTML = Replace(sHTML, "—", "-")
        sHTML = Replace(sHTML, "­", "-")
       
        'Replace HTML escape codes with single quotes
        sHTML = Replace(sHTML, "‘", "'")
        sHTML = Replace(sHTML, "’", "'")
        sHTML = Replace(sHTML, "'", "'")
       
        'Replace HTML escape codes with double quotes
        sHTML = Replace(sHTML, "“", Chr(34))
        sHTML = Replace(sHTML, "”", Chr(34))
        sHTML = Replace(sHTML, """, Chr(34))
       
        'Replace other HTML escape codes
        sHTML = Replace(sHTML, " ", " ")
        sHTML = Replace(sHTML, "&", "&")
        sHTML = Replace(sHTML, "&#", "#")
        sHTML = Replace(sHTML, "<", "<")
        sHTML = Replace(sHTML, ">", ">")
        sHTML = Replace(sHTML, "£", "£")
        sHTML = Replace(sHTML, "€", "€")
        sHTML = Replace(sHTML, "®", "(R)")
        sHTML = Replace(sHTML, "©", "(C)")

        'Replace HTML character codes
        sHTML = Replace(sHTML, "%20;", " ")
        sHTML = Replace(sHTML, "#38;", "&")
        sHTML = Replace(sHTML, "#39;", "'")
        sHTML = Replace(sHTML, "#160;", "á")
        sHTML = Replace(sHTML, "#163;", "£")
        sHTML = Replace(sHTML, "#187;", "+")
        sHTML = Replace(sHTML, "#233;", "é")
        sHTML = Replace(sHTML, "#729;", " ")
        sHTML = Replace(sHTML, "#937;", " ")
        sHTML = Replace(sHTML, "#8260;", "/")
        sHTML = Replace(sHTML, "#8800;", "-")
        sHTML = Replace(sHTML, "#8232;", " ")
        sHTML = Replace(sHTML, "#8710;", " ")
        sHTML = Replace(sHTML, "#8722;", "-")
        sHTML = Replace(sHTML, "#8734;", "°")
        sHTML = Replace(sHTML, "#8747;", " ")
       
        'Remove any double spaces
        Do While Instr(sHTML, "  ") > 0
                sHTML = Replace(sHTML, "  ", " ")
        Loop
        'Remove leading or trailing spaces
        sHTML = Trim$(sHTML)

        'Return the result
        Cell.Value = sHTML
    Next
End Sub
  1. Please ensure you test any script taken from my website on a test/development machine, before running on a production server.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.