/// <summary>
    /// 将Html转换成文本,使用正则表达式
    /// </summary>
    /// <param name="html"></param>
    /// <returns></returns>
    /// 来自jadepark的Blog,感谢jadepark!
    /// http://www.cnblogs.com/jadepark/archive/2007/08/04/838907.html
    /// Written:      [CHINA] Zhang Liu
    /// Date:         1,Jun,2006
    /// Version:      1.0
    /// Support:      MYBASK  http://www.mybask.net
    /// Looking for latest version or similar implementation of this function, please visit: http://www.mybask.net
    public static string ConvertHTML2TextRegex(string html)
    {

        string[] aryRegex ={@"<%=[\w\W]*?%>",     @"<script[\w\W]*?</script>",      @"<style[\w\W]*?</style>",    @"<[/]?[\w\W]*?>",    @"([\r\n])[\s]+",
                              @"&(nbsp|#160);",     @"&(iexcl|#161);",                @"&(cent|#162);",             @"&(pound|#163);",    @"&(copy|#169);",
                              @"&#(\d+);",          @"-->",                           @"<!--.*\n"};

        string[] aryReplacment = { "", "", "", "", "", " ", "", "", "", "", "", "", "" };
        string strStripped = html;
     
            for (int i = 0; i < aryRegex.Length; i++)
            {
                Regex regex = new Regex(aryRegex[i], RegexOptions.IgnoreCase);
                strStripped = regex.Replace(strStripped, aryReplacment[i]);
            }
   

        strStripped.Replace("\r\n", "");
        strStripped.Replace("\t", "");
       
        return strStripped;


    }


Leave a Reply