You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DS7/DSWeb/Areas/MvcShipping/Controllers/OcrController.cs

362 lines
13 KiB
C#

2 years ago
using DSWeb.Areas.MvcShipping.DB;
using DSWeb.MvcShipping.Comm;
using Newtonsoft.Json;
using org.apache.pdfbox.cos;
using org.apache.pdfbox.pdmodel;
using org.apache.pdfbox.pdmodel.common;
using org.apache.pdfbox.util;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.Caching;
using System.Text;
using System.Web;
using System.Web.Mvc;
namespace DSWeb.MvcShipping.Controllers
{
public class OcrController : Controller
{
private static readonly string ocrFilePath = "ocr_temp_files";
private static readonly string[] allowExts = new string[] { ".pdf", ".xls", ".xlsx", ".doc", ".docx",".rtf" };
//private static readonly int ImageScale = 1;
private static readonly int CacheExpire = 5;//缓存到期分钟
[HttpGet]
public ActionResult UpOcrAuto(string errMsg)
{
ViewData["errMsg"] = errMsg;
return View();
}
[HttpPost]
public ActionResult UpOcrFile(float scale = 1.5f)
{
if (Request.Files.Count > 0)
{
var ext = Path.GetExtension(Request.Files[0].FileName).ToLower();
if (!allowExts.Contains(ext))
{
return RedirectToAction("UpOcrAuto", new { errMsg = "只能上传pdf、excel和word文件" });
}
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
if (!Directory.Exists(ocrDir))
{
Directory.CreateDirectory(ocrDir);
}
var tmpFileName = Guid.NewGuid().ToString();
var saveFilePathName = Path.Combine(ocrDir, tmpFileName + ext);
Request.Files[0].SaveAs(saveFilePathName);
if (ext == ".xls" || ext == ".xlsx")
{
Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook(saveFilePathName);
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
wb.Save(targetPath, Aspose.Cells.SaveFormat.Pdf);
}
else if (ext == ".doc" || ext == ".docx" || ext == ".rtf")
{
Aspose.Words.Document doc = new Aspose.Words.Document(saveFilePathName);
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
doc.Save(targetPath, Aspose.Words.SaveFormat.Pdf);
}
ReadPdf(tmpFileName, scale);
#region 挂载到单据管理
var BSNO = Request.Form["BSNO"];
var CUSTID = Guid.NewGuid().ToString();
var path = Server.MapPath("../../UploadFiles/MsOpFiles/" + CUSTID);
if (!Directory.Exists(path))
{
Directory.CreateDirectory(path);
}
var size = Request.Files[0].ContentLength;
var name = Path.GetFileName(Request.Files[0].FileName);
var usercode = Convert.ToString(Session["USERCODE"]);
var partname = usercode + DateTime.Now.ToString("yyyyMMddHHmmssfff") + name;
string filename = path + "\\" + partname;
string filenameURL = "../../UploadFiles/MsOpFiles/" + CUSTID + "/" + partname;
if (System.IO.File.Exists(filename))
{
System.IO.File.Delete(filename);
}
Request.Files[0].SaveAs(filename);
ReceiptContext receiptContext = new ReceiptContext();
Receipt_Doc recDoc = new Receipt_Doc();
recDoc.GID = Guid.NewGuid().ToString();
recDoc.BSNO = BSNO;
recDoc.URL = name;
recDoc.Driect_URL = filenameURL;
recDoc.MODIFIEDUSER = Convert.ToString(Session["USERID"]);
recDoc.MODIFIEDTIME = DateTime.Now;
recDoc.RECEIPTTYPE = string.Empty;
recDoc.documentAttachedCode = string.Empty;
recDoc.documentAttachedNo = string.Empty;
receiptContext.ReceiptDocs.Add(recDoc);
receiptContext.SaveChanges();
#endregion
return RedirectToAction("SelectRegion", new { fileName = tmpFileName });
}
return RedirectToAction("UpOcrAuto");
}
[HttpGet]
public ActionResult SelectRegion(string fileName)
{
ViewData["FileName"] = fileName;
return View();
}
[HttpPost]
public ContentResult GetRegionText(string fileName, int x, int y, int w, int h, float scale = 1.5f)
{
if (!MemoryCache.Default.Contains(fileName))
{
ReadPdf(fileName, scale);
}
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
{
Rectangle rectMouse = new Rectangle(x, y, w, h);
List<TextSizeLocation> listTextLocations = obj.Result;
Size size = obj.Size;
StringBuilder sb = new StringBuilder();
int lastY = 0;
foreach (var item in listTextLocations)
{
int xItem = (int)(item.X * scale);
int yItem = (int)(size.Height * scale - item.Y * scale);
int wItem = (int)(item.Width * scale);
int hItem = (int)(item.Height * scale);
Rectangle rect = new Rectangle(xItem, yItem, wItem, hItem);
if (rect.IntersectsWith(rectMouse))
{
if (lastY != 0 && Math.Abs(lastY - item.Y) > 1)
{
sb.AppendLine();
}
sb.Append(item.Text);
lastY = (int)item.Y;
}
}
return Content(sb.ToString());
}
return Content("");
}
[HttpGet]
public ActionResult GetCanvasImage(string fileName, float scale = 1.5f)
{
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
if (!System.IO.File.Exists(imgPath))
{
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
{
Size size = obj.Size;
DrawImg((int)(size.Width * scale), (int)(size.Height * scale), fileName, scale);
}
}
if (System.IO.File.Exists(imgPath))
{
return File(imgPath, "image/jpg");
}
return null;
}
private void ReadPdf(string fileName, float scale)
{
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
var pdfFileName = System.IO.Path.Combine(ocrDir, fileName + ".pdf");
if (!System.IO.File.Exists(pdfFileName))
{
return;
}
PDDocument document = PDDocument.load(pdfFileName);
PrintTextLocations printer = new PrintTextLocations();
java.util.List allPages = document.getDocumentCatalog().getAllPages();
PDPage page = (PDPage)allPages.get(0);
PDStream contents = page.getContents();
if (contents != null)
{
printer.processStream(page, page.findResources(), page.getContents().getStream());
//处理
var listT = new List<TextSizeLocation>();
TextSizeLocation t = null;
foreach (var item in printer.TextWithLocations)
{
if (t == null)
{
t = item.Clone();
listT.Add(t);
}
else
{
if (Math.Abs(t.Width + t.X - item.X) <= item.Width / 2 && Math.Abs(t.Height - item.Height) <= 1) //首尾相接
{
t.Width += item.Width;
t.Text += item.Text;
}
else
{
t = item.Clone();
listT.Add(t);
}
}
}
var query = from it in listT orderby it.Y descending, it.X select it;
listT = query.ToList();
//缓存
MemoryCache.Default.Remove(fileName);
var objStore = new { Size = new Size((int)page.findCropBox().getWidth(), (int)page.findCropBox().getHeight()), Result = listT };
MemoryCache.Default.Add(new CacheItem(fileName, objStore), new CacheItemPolicy() { AbsoluteExpiration = new DateTimeOffset(DateTime.Now.AddMinutes(CacheExpire)) });
DrawImg((int)(page.findCropBox().getWidth() * scale), (int)(page.findCropBox().getHeight() * scale), fileName, scale);
}
document.close();
}
private void DrawImg(int width, int height, string fileName, float scale)
{
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
Bitmap bitmap = new Bitmap(width, height);
Graphics graphics = Graphics.FromImage(bitmap);
graphics.Clear(Color.White);
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
{
List<TextSizeLocation> listTextLocations = obj.Result;
Size size = obj.Size;
Random rnd = new Random();
foreach (var item in listTextLocations)
{
float textSize = GetMatchTextSize(item.Width * scale, item.Height * scale, item.Text);
graphics.DrawString(item.Text, new Font("黑体", textSize), Brushes.Black, item.X * scale, bitmap.Height - item.Y * scale);
}
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
bitmap.Save(imgPath);
}
}
private float GetMatchTextSize(float width, float height, string str)
{
float plus = 0.1f;
float rtn = 1;
Bitmap bm = new Bitmap(1, 1);
Graphics g = Graphics.FromImage(bm);
for (float i = 1; i < 100; i += plus)
{
var font = new Font("黑体", i);
SizeF size = g.MeasureString(str, font);
//if (size.Width > width || size.Height > height)
if (size.Width > width)
//if (size.Height > height)
{
if (i > 1)
{
rtn = i - plus;
}
break;
}
}
return rtn;
}
}
public class TextSizeLocation
{
public TextSizeLocation() { }
public TextSizeLocation(float x, float y, float width, float height, string text)
{
this.X = x;
this.Y = y;
this.Width = width;
this.Height = height;
this.Text = text;
}
public float X { get; set; }
public float Y { get; set; }
public float Width { get; set; }
public float Height { get; set; }
public string Text { get; set; }
public TextSizeLocation Clone()
{
return new TextSizeLocation()
{
X = this.X,
Y = this.Y,
Width = this.Width,
Height = this.Height,
Text = this.Text
};
}
}
public class PrintTextLocations : PDFTextStripper
{
public List<TextSizeLocation> TextWithLocations { get; set; }
public List<TextPosition> TextPositions { get; set; }
public PrintTextLocations()
{
this.TextWithLocations = new List<TextSizeLocation>();
this.TextPositions = new List<TextPosition>();
}
public override void processStream(PDPage aPage, PDResources resources, COSStream cosStream)
{
this.TextWithLocations.Clear();
this.TextPositions.Clear();
base.processStream(aPage, resources, cosStream);
}
protected override void processTextPosition(TextPosition text)
{
this.TextPositions.Add(text);
this.TextWithLocations.Add(new TextSizeLocation(text.getTextPos().getXPosition(), text.getTextPos().getYPosition(), text.getWidth(), text.getHeight(), text.getCharacter()));
}
}
}