我想通过将HTML内容传递给函数来生成PDF。我已经为此使用了iTextSharp,但它在遇到表和布局时表现不佳。

有没有更好的办法?


当前回答

下面是一个使用iTextSharp将html + css转换为PDF的示例(iTextSharp + iTextSharp .xmlworker)

using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.tool.xml;


byte[] pdf; // result will be here

var cssText = File.ReadAllText(MapPath("~/css/test.css"));
var html = File.ReadAllText(MapPath("~/css/test.html"));

using (var memoryStream = new MemoryStream())
{
        var document = new Document(PageSize.A4, 50, 50, 60, 60);
        var writer = PdfWriter.GetInstance(document, memoryStream);
        document.Open();

        using (var cssMemoryStream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(cssText)))
        {
            using (var htmlMemoryStream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(html)))
            {
                XMLWorkerHelper.GetInstance().ParseXHtml(writer, document, htmlMemoryStream, cssMemoryStream);
            }
        }

        document.Close();

        pdf = memoryStream.ToArray();
}

其他回答

如果你已经使用itextsharp dll,不需要添加第三方dll的(插件),我认为你正在使用htmlworker而不是它使用xmlworker,你可以很容易地将你的html转换为pdf。 一些css不能工作,他们是受支持的css 完整的解释与示例参考点击这里

        MemoryStream memStream = new MemoryStream();
        TextReader xmlString = new StringReader(outXml);
        using (Document document = new Document())
        {
            PdfWriter writer = PdfWriter.GetInstance(document, memStream);
            //document.SetPageSize(iTextSharp.text.PageSize.A4);
            document.Open();
            byte[] byteArray = System.Text.Encoding.UTF8.GetBytes(outXml);
            MemoryStream ms = new MemoryStream(byteArray);
            XMLWorkerHelper.GetInstance().ParseXHtml(writer, document, ms, System.Text.Encoding.UTF8);
            document.Close();
        }

        Response.ContentType = "application/pdf";
        Response.AddHeader("content-disposition", "attachment;filename=" + filename + ".pdf");
        Response.Cache.SetCacheability(HttpCacheability.NoCache);
        Response.BinaryWrite(memStream.ToArray());
        Response.End();
        Response.Flush();

如果你想让用户在浏览器中下载渲染页面的pdf,那么最简单的解决方案是

window.print(); 

在客户端,它将提示用户保存当前页面的PDF。您还可以通过链接样式自定义pdf的外观

<link rel="stylesheet" type="text/css" href="print.css" media="print">

css在打印时应用于HTML。

限制

不能将文件存储在服务器端。 用户提示打印页面时,必须手动保存页面。 页必须在选项卡中呈现。

你可以使用WebBrowser控件的另一个技巧,下面是我的完整工作代码

在我的例子中,为文本框控件分配Url

  protected void Page_Load(object sender, EventArgs e)
{

   txtweburl.Text = "https://www.google.com/";

 }

下面是使用线程生成屏幕的代码

  protected void btnscreenshot_click(object sender, EventArgs e)
  {
    //  btnscreenshot.Visible = false;
    allpanels.Visible = true;
    Thread thread = new Thread(GenerateThumbnail);
    thread.SetApartmentState(ApartmentState.STA);
    thread.Start();
    thread.Join();

}

private void GenerateThumbnail()
{
    //  btnscreenshot.Visible = false;
    WebBrowser webrowse = new WebBrowser();
    webrowse.ScrollBarsEnabled = false;
    webrowse.AllowNavigation = true;
    string url = txtweburl.Text.Trim();
    webrowse.Navigate(url);
    webrowse.Width = 1400;
    webrowse.Height = 50000;

    webrowse.DocumentCompleted += webbrowse_DocumentCompleted;
    while (webrowse.ReadyState != WebBrowserReadyState.Complete)
    {
        System.Windows.Forms.Application.DoEvents();
    }
}

在下面的代码中,我下载后保存pdf文件

        private void webbrowse_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
    // btnscreenshot.Visible = false;
    string folderPath = Server.MapPath("~/ImageFiles/");

    WebBrowser webrowse = sender as WebBrowser;
    //Bitmap bitmap = new Bitmap(webrowse.Width, webrowse.Height);

    Bitmap bitmap = new Bitmap(webrowse.Width, webrowse.Height, PixelFormat.Format16bppRgb565);

    webrowse.DrawToBitmap(bitmap, webrowse.Bounds);


    string Systemimagedownloadpath = System.Configuration.ConfigurationManager.AppSettings["Systemimagedownloadpath"].ToString();
    string fullOutputPath = Systemimagedownloadpath + Request.QueryString["VisitedId"].ToString() + ".png";
    MemoryStream stream = new MemoryStream();
    bitmap.Save(fullOutputPath, System.Drawing.Imaging.ImageFormat.Jpeg);



    //generating pdf code 
     Document pdfDoc = new Document(new iTextSharp.text.Rectangle(1100f, 20000.25f));
     PdfWriter writer = PdfWriter.GetInstance(pdfDoc, Response.OutputStream);
     pdfDoc.Open();
     iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(fullOutputPath);   
     img.ScaleAbsoluteHeight(20000);
     img.ScaleAbsoluteWidth(1024);     
     pdfDoc.Add(img);
     pdfDoc.Close();
     //Download the PDF file.
     Response.ContentType = "application/pdf";
     Response.AddHeader("content-disposition", "attachment;filename=ImageExport.pdf");
     Response.Cache.SetCacheability(HttpCacheability.NoCache);
     Response.Write(pdfDoc);
     Response.End();


}

你也可以参考我最老的帖子了解更多信息:在asp.net web表单中导航到网页被取消了

不是直接将HTML解析为PDF,而是可以创建HTML页面的位图,然后将位图插入到PDF中,例如使用iTextSharp。

这是一个代码如何获得一个URL的位图。我在这里找到了它,如果我找到了源,我会链接它。

public System.Drawing.Bitmap HTMLToImage(String strHTML)
{
    System.Drawing.Bitmap myBitmap = null;

    System.Threading.Thread myThread = new System.Threading.Thread(delegate()
    {
        // create a hidden web browser, which will navigate to the page
        System.Windows.Forms.WebBrowser myWebBrowser = new System.Windows.Forms.WebBrowser();
        // we don't want scrollbars on our image
        myWebBrowser.ScrollBarsEnabled = false;
        // don't let any errors shine through
        myWebBrowser.ScriptErrorsSuppressed = true;
        // let's load up that page!    
        myWebBrowser.Navigate("about:blank");

        // wait until the page is fully loaded
        while (myWebBrowser.ReadyState != System.Windows.Forms.WebBrowserReadyState.Complete)
            System.Windows.Forms.Application.DoEvents();

        myWebBrowser.Document.Body.InnerHtml = strHTML;

        // set the size of our web browser to be the same size as the page
        int intScrollPadding = 20;
        int intDocumentWidth = myWebBrowser.Document.Body.ScrollRectangle.Width + intScrollPadding;
        int intDocumentHeight = myWebBrowser.Document.Body.ScrollRectangle.Height + intScrollPadding;
        myWebBrowser.Width = intDocumentWidth;
        myWebBrowser.Height = intDocumentHeight;
        // a bitmap that we will draw to
        myBitmap = new System.Drawing.Bitmap(intDocumentWidth - intScrollPadding, intDocumentHeight - intScrollPadding);
        // draw the web browser to the bitmap
        myWebBrowser.DrawToBitmap(myBitmap, new System.Drawing.Rectangle(0, 0, intDocumentWidth - intScrollPadding, intDocumentHeight - intScrollPadding));
    });
    myThread.SetApartmentState(System.Threading.ApartmentState.STA);
    myThread.Start();
    myThread.Join();

    return myBitmap;
}

我强烈推荐NReco。它有免费版和付费版,真的很值得。它在后台使用wkhtmtopdf,但只需要一个程序集。太棒了。

使用示例:

通过NuGet安装。

var htmlContent = String.Format("<body>Hello world: {0}</body>", DateTime.Now);
var pdfBytes = (new NReco.PdfGenerator.HtmlToPdfConverter()).GeneratePdf(htmlContent);

免责声明:我不是开发者,只是这个项目的粉丝:)