.net过滤HTML标签的几个函数

摘要： 过滤HTML标签的几个函数

以下是在网上收集的几种对一一段带有html标签的字符串进行过滤：

片段1

public static string NoHTML(string Htmlstring)
{
  //删除脚本
  Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",RegexOptions.IgnoreCase);
  //删除HTML
  Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",RegexOptions.IgnoreCase);
  Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",RegexOptions.IgnoreCase);
  Htmlstring.Replace("<", "");
  Htmlstring.Replace(">", "");
  Htmlstring.Replace("\r\n", "");
  Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
  return Htmlstring;
}

片段2

public static string StripHTML(string strHtml)
  {
    string[]aryReg =
    {
      @"<script[^>]*?>.*?</script>",
      @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+", @
        "&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @
        "&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);",
        @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*\n"
    };
    string[]aryRep =
    {
      "", "", "", "\"", "&", "<", ">", "", "\xa1",  //chr(161),"\xa2",  //chr(162),"\xa3",  //chr(163),"\xa9",  //chr(169),"", "\r\n", ""
    };
    string newReg = aryReg[0];
    string strOutput = strHtml;
    for (int i = 0; i < aryReg.Length; i++)
    {
      Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
      strOutput = regex.Replace(strOutput, aryRep[i]);
    }
    strOutput.Replace("<", "");
    strOutput.Replace(">", "");
    strOutput.Replace("\r\n", "");
    return strOutput;
  }

片段3

public static string ParseTags(string HTMLStr)
{
  return System.Text.RegularExpressions.Regex.Replace(HTMLStr, "<[^>]*>", "");
}