Simple removing of HTML tags with Regex.Replace method
Sometimes you want to remove tags from HTML and get only plain text. In general, this is simple task but there are few drawbacks in some scenarios. The simplest solution is to just remove all tags from given HTML without any formatting.using System.Text.RegularExpressions;
Then...
// Below code is used for .NET sites
private string StripHtml(string source)
{
string output;
// Replace < with < symblol and > with > symblol
source = source.Replace("<", "<").Replace(">", ">");
//get rid of HTML tags
output = Regex.Replace(source, "<[^>]*>", string.Empty);
output = Regex.Replace(output, " ", "");
//get rid of multiple blank lines
output = Regex.Replace(output, @"^\s*$\n", string.Empty, RegexOptions.Multiline);
return output;
}
// Below code is used for SharePoint sites
private string StripHtml(string str)
{
str = Regex.Replace(str, "<", "<");
str = Regex.Replace(str, ">", ">");
str= SPHttpUtility.ConvertSimpleHtmlToText(str, -1);
str= Regex.Replace(str, " ", "");
str = Regex.Replace(str, @"^\s*$\n", string.Empty, RegexOptions.Multiline);
return str;
}
0 comments:
Post a Comment