using DSWeb.Areas.MvcShipping.DB; using DSWeb.MvcShipping.Comm; using Newtonsoft.Json; using org.apache.pdfbox.cos; using org.apache.pdfbox.pdmodel; using org.apache.pdfbox.pdmodel.common; using org.apache.pdfbox.util; using System; using System.Collections.Generic; using System.Drawing; using System.IO; using System.Linq; using System.Runtime.Caching; using System.Text; using System.Web; using System.Web.Mvc; namespace DSWeb.MvcShipping.Controllers { public class OcrController : Controller { private static readonly string ocrFilePath = "ocr_temp_files"; private static readonly string[] allowExts = new string[] { ".pdf", ".xls", ".xlsx", ".doc", ".docx",".rtf" }; //private static readonly int ImageScale = 1; private static readonly int CacheExpire = 5;//缓存到期分钟 [HttpGet] public ActionResult UpOcrAuto(string errMsg) { ViewData["errMsg"] = errMsg; return View(); } [HttpPost] public ActionResult UpOcrFile(float scale = 1.5f) { if (Request.Files.Count > 0) { var ext = Path.GetExtension(Request.Files[0].FileName).ToLower(); if (!allowExts.Contains(ext)) { return RedirectToAction("UpOcrAuto", new { errMsg = "只能上传pdf、excel和word文件!" }); } var ocrDir = Server.MapPath($"~/{ocrFilePath}"); if (!Directory.Exists(ocrDir)) { Directory.CreateDirectory(ocrDir); } var tmpFileName = Guid.NewGuid().ToString(); var saveFilePathName = Path.Combine(ocrDir, tmpFileName + ext); Request.Files[0].SaveAs(saveFilePathName); if (ext == ".xls" || ext == ".xlsx") { Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook(saveFilePathName); string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf"); wb.Save(targetPath, Aspose.Cells.SaveFormat.Pdf); } else if (ext == ".doc" || ext == ".docx" || ext == ".rtf") { Aspose.Words.Document doc = new Aspose.Words.Document(saveFilePathName); string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf"); doc.Save(targetPath, Aspose.Words.SaveFormat.Pdf); } ReadPdf(tmpFileName, scale); #region 挂载到单据管理 var BSNO = Request.Form["BSNO"]; var CUSTID = Guid.NewGuid().ToString(); var path = Server.MapPath("../../UploadFiles/MsOpFiles/" + CUSTID); if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } var size = Request.Files[0].ContentLength; var name = Path.GetFileName(Request.Files[0].FileName); var usercode = Convert.ToString(Session["USERCODE"]); var partname = usercode + DateTime.Now.ToString("yyyyMMddHHmmssfff") + name; string filename = path + "\\" + partname; string filenameURL = "../../UploadFiles/MsOpFiles/" + CUSTID + "/" + partname; if (System.IO.File.Exists(filename)) { System.IO.File.Delete(filename); } Request.Files[0].SaveAs(filename); ReceiptContext receiptContext = new ReceiptContext(); Receipt_Doc recDoc = new Receipt_Doc(); recDoc.GID = Guid.NewGuid().ToString(); recDoc.BSNO = BSNO; recDoc.URL = name; recDoc.Driect_URL = filenameURL; recDoc.MODIFIEDUSER = Convert.ToString(Session["USERID"]); recDoc.MODIFIEDTIME = DateTime.Now; recDoc.RECEIPTTYPE = string.Empty; recDoc.documentAttachedCode = string.Empty; recDoc.documentAttachedNo = string.Empty; receiptContext.ReceiptDocs.Add(recDoc); receiptContext.SaveChanges(); #endregion return RedirectToAction("SelectRegion", new { fileName = tmpFileName }); } return RedirectToAction("UpOcrAuto"); } [HttpGet] public ActionResult SelectRegion(string fileName) { ViewData["FileName"] = fileName; return View(); } [HttpPost] public ContentResult GetRegionText(string fileName, int x, int y, int w, int h, float scale = 1.5f) { if (!MemoryCache.Default.Contains(fileName)) { ReadPdf(fileName, scale); } dynamic obj = MemoryCache.Default[fileName]; if (obj != null) { Rectangle rectMouse = new Rectangle(x, y, w, h); List<TextSizeLocation> listTextLocations = obj.Result; Size size = obj.Size; StringBuilder sb = new StringBuilder(); int lastY = 0; foreach (var item in listTextLocations) { int xItem = (int)(item.X * scale); int yItem = (int)(size.Height * scale - item.Y * scale); int wItem = (int)(item.Width * scale); int hItem = (int)(item.Height * scale); Rectangle rect = new Rectangle(xItem, yItem, wItem, hItem); if (rect.IntersectsWith(rectMouse)) { if (lastY != 0 && Math.Abs(lastY - item.Y) > 1) { sb.AppendLine(); } sb.Append(item.Text); lastY = (int)item.Y; } } return Content(sb.ToString()); } return Content(""); } [HttpGet] public ActionResult GetCanvasImage(string fileName, float scale = 1.5f) { var ocrDir = Server.MapPath($"~/{ocrFilePath}"); var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg"); if (!System.IO.File.Exists(imgPath)) { dynamic obj = MemoryCache.Default[fileName]; if (obj != null) { Size size = obj.Size; DrawImg((int)(size.Width * scale), (int)(size.Height * scale), fileName, scale); } } if (System.IO.File.Exists(imgPath)) { return File(imgPath, "image/jpg"); } return null; } private void ReadPdf(string fileName, float scale) { var ocrDir = Server.MapPath($"~/{ocrFilePath}"); var pdfFileName = System.IO.Path.Combine(ocrDir, fileName + ".pdf"); if (!System.IO.File.Exists(pdfFileName)) { return; } PDDocument document = PDDocument.load(pdfFileName); PrintTextLocations printer = new PrintTextLocations(); java.util.List allPages = document.getDocumentCatalog().getAllPages(); PDPage page = (PDPage)allPages.get(0); PDStream contents = page.getContents(); if (contents != null) { printer.processStream(page, page.findResources(), page.getContents().getStream()); //处理 var listT = new List<TextSizeLocation>(); TextSizeLocation t = null; foreach (var item in printer.TextWithLocations) { if (t == null) { t = item.Clone(); listT.Add(t); } else { if (Math.Abs(t.Width + t.X - item.X) <= item.Width / 2 && Math.Abs(t.Height - item.Height) <= 1) //首尾相接 { t.Width += item.Width; t.Text += item.Text; } else { t = item.Clone(); listT.Add(t); } } } var query = from it in listT orderby it.Y descending, it.X select it; listT = query.ToList(); //缓存 MemoryCache.Default.Remove(fileName); var objStore = new { Size = new Size((int)page.findCropBox().getWidth(), (int)page.findCropBox().getHeight()), Result = listT }; MemoryCache.Default.Add(new CacheItem(fileName, objStore), new CacheItemPolicy() { AbsoluteExpiration = new DateTimeOffset(DateTime.Now.AddMinutes(CacheExpire)) }); DrawImg((int)(page.findCropBox().getWidth() * scale), (int)(page.findCropBox().getHeight() * scale), fileName, scale); } document.close(); } private void DrawImg(int width, int height, string fileName, float scale) { var ocrDir = Server.MapPath($"~/{ocrFilePath}"); Bitmap bitmap = new Bitmap(width, height); Graphics graphics = Graphics.FromImage(bitmap); graphics.Clear(Color.White); dynamic obj = MemoryCache.Default[fileName]; if (obj != null) { List<TextSizeLocation> listTextLocations = obj.Result; Size size = obj.Size; Random rnd = new Random(); foreach (var item in listTextLocations) { float textSize = GetMatchTextSize(item.Width * scale, item.Height * scale, item.Text); graphics.DrawString(item.Text, new Font("黑体", textSize), Brushes.Black, item.X * scale, bitmap.Height - item.Y * scale); } var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg"); bitmap.Save(imgPath); } } private float GetMatchTextSize(float width, float height, string str) { float plus = 0.1f; float rtn = 1; Bitmap bm = new Bitmap(1, 1); Graphics g = Graphics.FromImage(bm); for (float i = 1; i < 100; i += plus) { var font = new Font("黑体", i); SizeF size = g.MeasureString(str, font); //if (size.Width > width || size.Height > height) if (size.Width > width) //if (size.Height > height) { if (i > 1) { rtn = i - plus; } break; } } return rtn; } } public class TextSizeLocation { public TextSizeLocation() { } public TextSizeLocation(float x, float y, float width, float height, string text) { this.X = x; this.Y = y; this.Width = width; this.Height = height; this.Text = text; } public float X { get; set; } public float Y { get; set; } public float Width { get; set; } public float Height { get; set; } public string Text { get; set; } public TextSizeLocation Clone() { return new TextSizeLocation() { X = this.X, Y = this.Y, Width = this.Width, Height = this.Height, Text = this.Text }; } } public class PrintTextLocations : PDFTextStripper { public List<TextSizeLocation> TextWithLocations { get; set; } public List<TextPosition> TextPositions { get; set; } public PrintTextLocations() { this.TextWithLocations = new List<TextSizeLocation>(); this.TextPositions = new List<TextPosition>(); } public override void processStream(PDPage aPage, PDResources resources, COSStream cosStream) { this.TextWithLocations.Clear(); this.TextPositions.Clear(); base.processStream(aPage, resources, cosStream); } protected override void processTextPosition(TextPosition text) { this.TextPositions.Add(text); this.TextWithLocations.Add(new TextSizeLocation(text.getTextPos().getXPosition(), text.getTextPos().getYPosition(), text.getWidth(), text.getHeight(), text.getCharacter())); } } }