摘要:
过滤HTML标签的几个函数
以下是在网上收集的几种对一一段带有html标签的字符串进行过滤:
片段1
public static string NoHTML(string Htmlstring) { //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",RegexOptions.IgnoreCase); //删除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", ""); Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); return Htmlstring; }
片段2
public static string StripHTML(string strHtml) { string[]aryReg = { @"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+", @ "&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @ "&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*\n" }; string[]aryRep = { "", "", "", "\"", "&", "<", ">", "", "\xa1", //chr(161),"\xa2", //chr(162),"\xa3", //chr(163),"\xa9", //chr(169),"", "\r\n", "" }; string newReg = aryReg[0]; string strOutput = strHtml; for (int i = 0; i < aryReg.Length; i++) { Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase); strOutput = regex.Replace(strOutput, aryRep[i]); } strOutput.Replace("<", ""); strOutput.Replace(">", ""); strOutput.Replace("\r\n", ""); return strOutput; }片段3
public static string ParseTags(string HTMLStr) { return System.Text.RegularExpressions.Regex.Replace(HTMLStr, "<[^>]*>", ""); }