使用NOPI读取Word、Excel文档内容

使用NOPI读取Excel的例子很多,读取Word的例子不多。

Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

using NPOI.POIFS.FileSystem;
using NPOI.SS.UserModel;
using NPOI.XSSF.UserModel;
using NPOI.XWPF.UserModel;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Text;

namespace eyuan
{
public static class NOPIHandler
{
  /// <summary>
  /// 
  /// </summary>
  /// <param name="fileName"></param>
  /// <returns></returns>
  public static List<List<List<string>>> ReadExcel(string fileName)
  {
    //打开Excel工作簿
    XSSFWorkbook hssfworkbook = null;
    try
    {
      using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
      {
        hssfworkbook = new XSSFWorkbook(file);
      }
    }
    catch (Exception e)
    {
      LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));
    }
    //循环Sheet页
    int sheetsCount = hssfworkbook.NumberOfSheets;
    List<List<List<string>>> workBookContent = new List<List<List<string>>>();
    for (int i = 0; i < sheetsCount; i++)
    {
      //Sheet索引从0开始
      ISheet sheet = hssfworkbook.GetSheetAt(i);
      //循环行
      List<List<string>> sheetContent = new List<List<string>>();
      int rowCount = sheet.PhysicalNumberOfRows;
      for (int j = 0; j < rowCount; j++)
      {
        //Row(逻辑行)的索引从0开始
        IRow row = sheet.GetRow(j);
        //循环列(各行的列数可能不同)
        List<string> rowContent = new List<string>();
        int cellCount = row.PhysicalNumberOfCells;
        for (int k = 0; k < cellCount; k++)
        {
          //ICell cell = row.GetCell(k);
          ICell cell = row.Cells[k];
          if (cell == null)
          {
            rowContent.Add("NIL");
          }
          else
          {
            rowContent.Add(cell.ToString());
            //rowContent.Add(cell.StringCellValue);
          }
        }
        //添加行到集合中
        sheetContent.Add(rowContent);
      }
      //添加Sheet到集合中
      workBookContent.Add(sheetContent);
    }

    return workBookContent;
  }

  /// <summary>
  /// 
  /// </summary>
  /// <param name="fileName"></param>
  /// <returns></returns>
  public static string ReadExcelText(string fileName)
  {
    string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"];
    string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"];
    string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"];
    //
    List<List<List<string>>> excelContent = ReadExcel(fileName);
    string fileText = string.Empty;
    StringBuilder sbFileText = new StringBuilder();
    //循环处理WorkBook中的各Sheet页
    List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();
    while (enumeratorWorkBook.MoveNext())
    {

      //循环处理当期Sheet页中的各行
      List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();
      while (enumeratorSheet.MoveNext())
      {

        string[] rowContent = enumeratorSheet.Current.ToArray();
        sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));
        sbFileText.Append(ExcelRowSeparator);
      }
      sbFileText.Append(ExcelSheetSeparator);
    }
    //
    fileText = sbFileText.ToString();
    return fileText;
  }

  /// <summary>
  /// 读取Word内容
  /// </summary>
  /// <param name="fileName"></param>
  /// <returns></returns>
  public static string ReadWordText(string fileName)
  {
    string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];
    string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];
    string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];
    //
    string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];
    string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];
    string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];
    string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];
    //
    string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];
    //
    string fileText = string.Empty;
    StringBuilder sbFileText = new StringBuilder();

    #region 打开文档
    XWPFDocument document = null;
    try
    {
      using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
      {
        document = new XWPFDocument(file);
      }
    }
    catch (Exception e)
    {
      LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));
    }
    #endregion

    #region 页眉、页脚
    //页眉
    if (CaptureWordHeader == "true")
    {
      sbFileText.AppendLine("Capture Header Begin");
      foreach (XWPFHeader xwpfHeader in document.HeaderList)
      {
        sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));
      }
      sbFileText.AppendLine("Capture Header End");
    }
    //页脚
    if (CaptureWordFooter == "true")
    {
      sbFileText.AppendLine("Capture Footer Begin");
      foreach (XWPFFooter xwpfFooter in document.FooterList)
      {
        sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));
      }
      sbFileText.AppendLine("Capture Footer End");
    }
    #endregion

    #region 表格
    if (CaptureWordTable == "true")
    {
      sbFileText.AppendLine("Capture Table Begin");
      foreach (XWPFTable table in document.Tables)
      {
        //循环表格行
        foreach (XWPFTableRow row in table.Rows)
        {
          foreach (XWPFTableCell cell in row.GetTableCells())
          {
            sbFileText.Append(cell.GetText());
            //
            sbFileText.Append(WordTableCellSeparator);
          }

          sbFileText.Append(WordTableRowSeparator);
        }
        sbFileText.Append(WordTableSeparator);
      }
      sbFileText.AppendLine("Capture Table End");
    }
    #endregion

    #region 图片
    if (CaptureWordImage == "true")
    {
      sbFileText.AppendLine("Capture Image Begin");
      foreach (XWPFPictureData pictureData in document.AllPictures)
      {
        string picExtName = pictureData.suggestFileExtension();
        string picFileName = pictureData.GetFileName();
        byte[] picFileContent = pictureData.GetData();
        //
        string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });
        //
        using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))
        {
          fs.Write(picFileContent, 0, picFileContent.Length);
          fs.Close();
        }
        //
        sbFileText.AppendLine(picTempName);
      }
      sbFileText.AppendLine("Capture Image End");
    }
    #endregion

    //正文段落
    sbFileText.AppendLine("Capture Paragraph Begin");
    foreach (XWPFParagraph paragraph in document.Paragraphs)
    {
      sbFileText.AppendLine(paragraph.ParagraphText);

    }
    sbFileText.AppendLine("Capture Paragraph End");
    //

    //
    fileText = sbFileText.ToString();
    return fileText;
  }


}
}

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持编程宝库

一维数组的插入:实现效果:在1 2 3 后面插入4using System;using System.Collections;using System.Collections.Generic;using Syste ...