Convert HTML string containing accented letters to word document using OpenXML in ASP.Net

Last Reply one month ago By dharmendr

Posted one month ago


I am working on a project in with c#.

I take data from mysql database in html format and to convert html from mysql database in word document (docx or doc) 

Convert HTML string from database to Word document using OpenXML in ASP.Net

My problem are accented letters é, è, à, ò, ù and special characters because in exported word file I have è, ’, Ã, â, ù

How to do resolve this?

Thanks in advance for any help

You are viewing reply posted by: dharmendr one month ago.
Posted one month ago

Hi comunidadmexi...,

Refer below code.



using System.Data;
using System.IO;
using System.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;


Imports System.Data
Imports System.IO
Imports System.Text
Imports DocumentFormat.OpenXml
Imports DocumentFormat.OpenXml.Packaging
Imports DocumentFormat.OpenXml.Wordprocessing



protected void Page_Load(object sender, EventArgs e)
    if (!this.IsPostBack)
        DataTable dt = new DataTable();
        dt.Columns.AddRange(new DataColumn[]
            new DataColumn("contents"),
            new DataColumn("image"),
            new DataColumn("ppt"),
            new DataColumn("xls"),
            new DataColumn("pdf")
        dt.Rows.Add("<h4><strong>- Nearé, è, à, ò, ù</strong></h4>", "b8fa9493.jpg", "rtfghb8fa9493.ppt", "kioplb8fa9493.xls", "xqab8fa9493.pdf");
        dt.Rows.Add("<p><h3><span style='color:#0000ff;'><strong>- Miss</strong></span></h3></p>", "b8fa9493.jpg", "rtfghb8fa9493.ppt", "kioplb8fa9493.xls", "xqab8fa9493.pdf");
        using (MemoryStream ms = new MemoryStream())
            using (WordprocessingDocument doc = WordprocessingDocument.Create(ms, WordprocessingDocumentType.Document))
                MainDocumentPart mainPart = doc.AddMainDocumentPart();
                mainPart.Document = new Document();
                Body body = mainPart.Document.AppendChild(new Body());
                for (int i = 0; i < dt.Rows.Count; i++)
                    string altChunkId = "AltChunkId" + (i + 1);
                    AlternativeFormatImportPart afip = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.Html, altChunkId);
                    string html = "<html><head></head><body><table><tr>";
                    html += "<td>" + dt.Rows[i]["contents"].ToString() + "</td>";
                    html += "<td><img src='" + dt.Rows[i]["image"].ToString() + "' alt='" + dt.Rows[i]["image"].ToString() + "' /></td>";
                    html += "<td><a href='" + dt.Rows[i]["ppt"].ToString() + "' ><img alt='PPT' src='' /></a></td>";
                    html += "<td><a href='" + dt.Rows[i]["xls"].ToString() + "' ><img alt='Excel' src='' /></a></td>";
                    html += "<td><a href='" + dt.Rows[i]["pdf"].ToString() + "' ><img alt='PDF' src='' /></a></td>";
                    html += "</tr></table></body></html>";
                    byte[] utf8Bytes = Encoding.UTF8.GetBytes(html);
                    byte[] win1252Bytes = Encoding.Convert(Encoding.UTF8, Encoding.GetEncoding("Windows-1252"), utf8Bytes);
                    afip.FeedData(new MemoryStream(win1252Bytes));
                    AltChunk altChunk = new AltChunk();
                    altChunk.Id = altChunkId;

            Response.AppendHeader("Content-Disposition", "attachment;filename=HTML.docx");
            Response.ContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";


Protected Sub Page_Load(ByVal sender As Object, ByVal e As EventArgs) Handles Me.Load
    If Not Me.IsPostBack Then
        Dim dt As DataTable = New DataTable()
        dt.Columns.AddRange(New DataColumn() {
                            New DataColumn("contents"),
                            New DataColumn("image"),
                            New DataColumn("ppt"),
                            New DataColumn("xls"),
                            New DataColumn("pdf")})
        dt.Rows.Add("<h4><strong>- Nearé, è, à, ò, ù</strong></h4>", "b8fa9493.jpg", "rtfghb8fa9493.ppt", "kioplb8fa9493.xls", "xqab8fa9493.pdf")
        dt.Rows.Add("<p><h3><span style='color:#0000ff;'><strong>- Miss</strong></span></h3></p>", "b8fa9493.jpg", "rtfghb8fa9493.ppt", "kioplb8fa9493.xls", "xqab8fa9493.pdf")

        Using ms As MemoryStream = New MemoryStream()
            Using doc As WordprocessingDocument = WordprocessingDocument.Create(ms, WordprocessingDocumentType.Document)
                Dim mainPart As MainDocumentPart = doc.AddMainDocumentPart()
                mainPart.Document = New Document()
                Dim body As Body = mainPart.Document.AppendChild(New Body())
                For i As Integer = 0 To dt.Rows.Count - 1
                    Dim altChunkId As String = "AltChunkId" & (i + 1)
                    Dim afip As AlternativeFormatImportPart = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.Html, altChunkId)
                    Dim html As String = "<html><head></head><body><table><tr>"
                    html += "<td>" & dt.Rows(i)("contents").ToString() & "</td>"
                    html += "<td><img src='" & dt.Rows(i)("image").ToString() & "' alt='" + dt.Rows(i)("image").ToString() & "' /></td>"
                    html += "<td><a href='" & dt.Rows(i)("ppt").ToString() & "' ><img alt='PPT' src='' /></a></td>"
                    html += "<td><a href='" & dt.Rows(i)("xls").ToString() & "' ><img alt='Excel' src='' /></a></td>"
                    html += "<td><a href='" & dt.Rows(i)("pdf").ToString() & "' ><img alt='PDF' src='' /></a></td>"
                    html += "</tr></table></body></html>"
                    Dim utf8Bytes As Byte() = Encoding.UTF8.GetBytes(html)
                    Dim win1252Bytes As Byte() = Encoding.Convert(Encoding.UTF8, Encoding.GetEncoding("Windows-1252"), utf8Bytes)
                    afip.FeedData(New MemoryStream(win1252Bytes))
                    Dim altChunk As AltChunk = New AltChunk()
                    altChunk.Id = altChunkId
            End Using

            Response.AppendHeader("Content-Disposition", "attachment;filename=HTML.docx")
            Response.ContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        End Using
    End If
End Sub