3 years ago
using DSWeb.Areas.MvcShipping.DB;
using DSWeb.MvcShipping.Comm;
using Newtonsoft.Json;
using org.apache.pdfbox.cos;
using org.apache.pdfbox.pdmodel;
using org.apache.pdfbox.pdmodel.common;
using org.apache.pdfbox.util;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.Caching;
using System.Text;
using System.Web;
using System.Web.Mvc;
namespace DSWeb.MvcShipping.Controllers
public class OcrController : Controller
private static readonly string ocrFilePath = "ocr_temp_files";
private static readonly string[] allowExts = new string[] { ".pdf", ".xls", ".xlsx", ".doc", ".docx",".rtf" };
//private static readonly int ImageScale = 1;
private static readonly int CacheExpire = 5;//缓存到期分钟
public ActionResult UpOcrAuto(string errMsg)
ViewData["errMsg"] = errMsg;
return View();
public ActionResult UpOcrFile(float scale = 1.5f)
if (Request.Files.Count > 0)
var ext = Path.GetExtension(Request.Files[0].FileName).ToLower();
if (!allowExts.Contains(ext))
return RedirectToAction("UpOcrAuto", new { errMsg = "只能上传pdf、excel和word文件" });
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
if (!Directory.Exists(ocrDir))
var tmpFileName = Guid.NewGuid().ToString();
var saveFilePathName = Path.Combine(ocrDir, tmpFileName + ext);
if (ext == ".xls" || ext == ".xlsx")
Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook(saveFilePathName);
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
wb.Save(targetPath, Aspose.Cells.SaveFormat.Pdf);
else if (ext == ".doc" || ext == ".docx" || ext == ".rtf")
Aspose.Words.Document doc = new Aspose.Words.Document(saveFilePathName);
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
doc.Save(targetPath, Aspose.Words.SaveFormat.Pdf);
ReadPdf(tmpFileName, scale);
#region 挂载到单据管理
var BSNO = Request.Form["BSNO"];
var CUSTID = Guid.NewGuid().ToString();
var path = Server.MapPath("../../UploadFiles/MsOpFiles/" + CUSTID);
if (!Directory.Exists(path))
var size = Request.Files[0].ContentLength;
var name = Path.GetFileName(Request.Files[0].FileName);
var usercode = Convert.ToString(Session["USERCODE"]);
var partname = usercode + DateTime.Now.ToString("yyyyMMddHHmmssfff") + name;
string filename = path + "\\" + partname;
string filenameURL = "../../UploadFiles/MsOpFiles/" + CUSTID + "/" + partname;
if (System.IO.File.Exists(filename))
ReceiptContext receiptContext = new ReceiptContext();
Receipt_Doc recDoc = new Receipt_Doc();
recDoc.GID = Guid.NewGuid().ToString();
recDoc.BSNO = BSNO;
recDoc.URL = name;
recDoc.Driect_URL = filenameURL;
recDoc.MODIFIEDUSER = Convert.ToString(Session["USERID"]);
recDoc.MODIFIEDTIME = DateTime.Now;
recDoc.RECEIPTTYPE = string.Empty;
recDoc.documentAttachedCode = string.Empty;
recDoc.documentAttachedNo = string.Empty;
return RedirectToAction("SelectRegion", new { fileName = tmpFileName });
return RedirectToAction("UpOcrAuto");
public ActionResult SelectRegion(string fileName)
ViewData["FileName"] = fileName;
return View();
public ContentResult GetRegionText(string fileName, int x, int y, int w, int h, float scale = 1.5f)
if (!MemoryCache.Default.Contains(fileName))
ReadPdf(fileName, scale);
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
Rectangle rectMouse = new Rectangle(x, y, w, h);
List<TextSizeLocation> listTextLocations = obj.Result;
Size size = obj.Size;
StringBuilder sb = new StringBuilder();
int lastY = 0;
foreach (var item in listTextLocations)
int xItem = (int)(item.X * scale);
int yItem = (int)(size.Height * scale - item.Y * scale);
int wItem = (int)(item.Width * scale);
int hItem = (int)(item.Height * scale);
Rectangle rect = new Rectangle(xItem, yItem, wItem, hItem);
if (rect.IntersectsWith(rectMouse))
if (lastY != 0 && Math.Abs(lastY - item.Y) > 1)
lastY = (int)item.Y;
return Content(sb.ToString());
return Content("");
public ActionResult GetCanvasImage(string fileName, float scale = 1.5f)
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
if (!System.IO.File.Exists(imgPath))
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
Size size = obj.Size;
DrawImg((int)(size.Width * scale), (int)(size.Height * scale), fileName, scale);
if (System.IO.File.Exists(imgPath))
return File(imgPath, "image/jpg");
return null;
private void ReadPdf(string fileName, float scale)
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
var pdfFileName = System.IO.Path.Combine(ocrDir, fileName + ".pdf");
if (!System.IO.File.Exists(pdfFileName))
PDDocument document = PDDocument.load(pdfFileName);
PrintTextLocations printer = new PrintTextLocations();
java.util.List allPages = document.getDocumentCatalog().getAllPages();
PDPage page = (PDPage)allPages.get(0);
PDStream contents = page.getContents();
if (contents != null)
printer.processStream(page, page.findResources(), page.getContents().getStream());
var listT = new List<TextSizeLocation>();
TextSizeLocation t = null;
foreach (var item in printer.TextWithLocations)
if (t == null)
t = item.Clone();
if (Math.Abs(t.Width + t.X - item.X) <= item.Width / 2 && Math.Abs(t.Height - item.Height) <= 1) //首尾相接
t.Width += item.Width;
t.Text += item.Text;
t = item.Clone();
var query = from it in listT orderby it.Y descending, it.X select it;
listT = query.ToList();
var objStore = new { Size = new Size((int)page.findCropBox().getWidth(), (int)page.findCropBox().getHeight()), Result = listT };
MemoryCache.Default.Add(new CacheItem(fileName, objStore), new CacheItemPolicy() { AbsoluteExpiration = new DateTimeOffset(DateTime.Now.AddMinutes(CacheExpire)) });
DrawImg((int)(page.findCropBox().getWidth() * scale), (int)(page.findCropBox().getHeight() * scale), fileName, scale);
private void DrawImg(int width, int height, string fileName, float scale)
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
Bitmap bitmap = new Bitmap(width, height);
Graphics graphics = Graphics.FromImage(bitmap);
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
List<TextSizeLocation> listTextLocations = obj.Result;
Size size = obj.Size;
Random rnd = new Random();
foreach (var item in listTextLocations)
float textSize = GetMatchTextSize(item.Width * scale, item.Height * scale, item.Text);
graphics.DrawString(item.Text, new Font("黑体", textSize), Brushes.Black, item.X * scale, bitmap.Height - item.Y * scale);
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
private float GetMatchTextSize(float width, float height, string str)
float plus = 0.1f;
float rtn = 1;
Bitmap bm = new Bitmap(1, 1);
Graphics g = Graphics.FromImage(bm);
for (float i = 1; i < 100; i += plus)
var font = new Font("黑体", i);
SizeF size = g.MeasureString(str, font);
//if (size.Width > width || size.Height > height)
if (size.Width > width)
//if (size.Height > height)
if (i > 1)
rtn = i - plus;
return rtn;
public class TextSizeLocation
public TextSizeLocation() { }
public TextSizeLocation(float x, float y, float width, float height, string text)
this.X = x;
this.Y = y;
this.Width = width;
this.Height = height;
this.Text = text;
public float X { get; set; }
public float Y { get; set; }
public float Width { get; set; }
public float Height { get; set; }
public string Text { get; set; }
public TextSizeLocation Clone()
return new TextSizeLocation()
X = this.X,
Y = this.Y,
Width = this.Width,
Height = this.Height,
Text = this.Text
public class PrintTextLocations : PDFTextStripper
public List<TextSizeLocation> TextWithLocations { get; set; }
public List<TextPosition> TextPositions { get; set; }
public PrintTextLocations()
this.TextWithLocations = new List<TextSizeLocation>();
this.TextPositions = new List<TextPosition>();
public override void processStream(PDPage aPage, PDResources resources, COSStream cosStream)
base.processStream(aPage, resources, cosStream);
protected override void processTextPosition(TextPosition text)
this.TextWithLocations.Add(new TextSizeLocation(text.getTextPos().getXPosition(), text.getTextPos().getYPosition(), text.getWidth(), text.getHeight(), text.getCharacter()));