You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DS7/DSWeb/Areas/MvcShipping/Controllers/OcrController.cs

362 lines
13 KiB
C#

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using DSWeb.Areas.MvcShipping.DB;
using DSWeb.MvcShipping.Comm;
using Newtonsoft.Json;
using org.apache.pdfbox.cos;
using org.apache.pdfbox.pdmodel;
using org.apache.pdfbox.pdmodel.common;
using org.apache.pdfbox.util;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.Caching;
using System.Text;
using System.Web;
using System.Web.Mvc;
namespace DSWeb.MvcShipping.Controllers
{
public class OcrController : Controller
{
private static readonly string ocrFilePath = "ocr_temp_files";
private static readonly string[] allowExts = new string[] { ".pdf", ".xls", ".xlsx", ".doc", ".docx",".rtf" };
//private static readonly int ImageScale = 1;
private static readonly int CacheExpire = 5;//缓存到期分钟
[HttpGet]
public ActionResult UpOcrAuto(string errMsg)
{
ViewData["errMsg"] = errMsg;
return View();
}
[HttpPost]
public ActionResult UpOcrFile(float scale = 1.5f)
{
if (Request.Files.Count > 0)
{
var ext = Path.GetExtension(Request.Files[0].FileName).ToLower();
if (!allowExts.Contains(ext))
{
return RedirectToAction("UpOcrAuto", new { errMsg = "只能上传pdf、excel和word文件" });
}
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
if (!Directory.Exists(ocrDir))
{
Directory.CreateDirectory(ocrDir);
}
var tmpFileName = Guid.NewGuid().ToString();
var saveFilePathName = Path.Combine(ocrDir, tmpFileName + ext);
Request.Files[0].SaveAs(saveFilePathName);
if (ext == ".xls" || ext == ".xlsx")
{
Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook(saveFilePathName);
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
wb.Save(targetPath, Aspose.Cells.SaveFormat.Pdf);
}
else if (ext == ".doc" || ext == ".docx" || ext == ".rtf")
{
Aspose.Words.Document doc = new Aspose.Words.Document(saveFilePathName);
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
doc.Save(targetPath, Aspose.Words.SaveFormat.Pdf);
}
ReadPdf(tmpFileName, scale);
#region 挂载到单据管理
var BSNO = Request.Form["BSNO"];
var CUSTID = Guid.NewGuid().ToString();
var path = Server.MapPath("../../UploadFiles/MsOpFiles/" + CUSTID);
if (!Directory.Exists(path))
{
Directory.CreateDirectory(path);
}
var size = Request.Files[0].ContentLength;
var name = Path.GetFileName(Request.Files[0].FileName);
var usercode = Convert.ToString(Session["USERCODE"]);
var partname = usercode + DateTime.Now.ToString("yyyyMMddHHmmssfff") + name;
string filename = path + "\\" + partname;
string filenameURL = "../../UploadFiles/MsOpFiles/" + CUSTID + "/" + partname;
if (System.IO.File.Exists(filename))
{
System.IO.File.Delete(filename);
}
Request.Files[0].SaveAs(filename);
ReceiptContext receiptContext = new ReceiptContext();
Receipt_Doc recDoc = new Receipt_Doc();
recDoc.GID = Guid.NewGuid().ToString();
recDoc.BSNO = BSNO;
recDoc.URL = name;
recDoc.Driect_URL = filenameURL;
recDoc.MODIFIEDUSER = Convert.ToString(Session["USERID"]);
recDoc.MODIFIEDTIME = DateTime.Now;
recDoc.RECEIPTTYPE = string.Empty;
recDoc.documentAttachedCode = string.Empty;
recDoc.documentAttachedNo = string.Empty;
receiptContext.ReceiptDocs.Add(recDoc);
receiptContext.SaveChanges();
#endregion
return RedirectToAction("SelectRegion", new { fileName = tmpFileName });
}
return RedirectToAction("UpOcrAuto");
}
[HttpGet]
public ActionResult SelectRegion(string fileName)
{
ViewData["FileName"] = fileName;
return View();
}
[HttpPost]
public ContentResult GetRegionText(string fileName, int x, int y, int w, int h, float scale = 1.5f)
{
if (!MemoryCache.Default.Contains(fileName))
{
ReadPdf(fileName, scale);
}
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
{
Rectangle rectMouse = new Rectangle(x, y, w, h);
List<TextSizeLocation> listTextLocations = obj.Result;
Size size = obj.Size;
StringBuilder sb = new StringBuilder();
int lastY = 0;
foreach (var item in listTextLocations)
{
int xItem = (int)(item.X * scale);
int yItem = (int)(size.Height * scale - item.Y * scale);
int wItem = (int)(item.Width * scale);
int hItem = (int)(item.Height * scale);
Rectangle rect = new Rectangle(xItem, yItem, wItem, hItem);
if (rect.IntersectsWith(rectMouse))
{
if (lastY != 0 && Math.Abs(lastY - item.Y) > 1)
{
sb.AppendLine();
}
sb.Append(item.Text);
lastY = (int)item.Y;
}
}
return Content(sb.ToString());
}
return Content("");
}
[HttpGet]
public ActionResult GetCanvasImage(string fileName, float scale = 1.5f)
{
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
if (!System.IO.File.Exists(imgPath))
{
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
{
Size size = obj.Size;
DrawImg((int)(size.Width * scale), (int)(size.Height * scale), fileName, scale);
}
}
if (System.IO.File.Exists(imgPath))
{
return File(imgPath, "image/jpg");
}
return null;
}
private void ReadPdf(string fileName, float scale)
{
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
var pdfFileName = System.IO.Path.Combine(ocrDir, fileName + ".pdf");
if (!System.IO.File.Exists(pdfFileName))
{
return;
}
PDDocument document = PDDocument.load(pdfFileName);
PrintTextLocations printer = new PrintTextLocations();
java.util.List allPages = document.getDocumentCatalog().getAllPages();
PDPage page = (PDPage)allPages.get(0);
PDStream contents = page.getContents();
if (contents != null)
{
printer.processStream(page, page.findResources(), page.getContents().getStream());
//处理
var listT = new List<TextSizeLocation>();
TextSizeLocation t = null;
foreach (var item in printer.TextWithLocations)
{
if (t == null)
{
t = item.Clone();
listT.Add(t);
}
else
{
if (Math.Abs(t.Width + t.X - item.X) <= item.Width / 2 && Math.Abs(t.Height - item.Height) <= 1) //首尾相接
{
t.Width += item.Width;
t.Text += item.Text;
}
else
{
t = item.Clone();
listT.Add(t);
}
}
}
var query = from it in listT orderby it.Y descending, it.X select it;
listT = query.ToList();
//缓存
MemoryCache.Default.Remove(fileName);
var objStore = new { Size = new Size((int)page.findCropBox().getWidth(), (int)page.findCropBox().getHeight()), Result = listT };
MemoryCache.Default.Add(new CacheItem(fileName, objStore), new CacheItemPolicy() { AbsoluteExpiration = new DateTimeOffset(DateTime.Now.AddMinutes(CacheExpire)) });
DrawImg((int)(page.findCropBox().getWidth() * scale), (int)(page.findCropBox().getHeight() * scale), fileName, scale);
}
document.close();
}
private void DrawImg(int width, int height, string fileName, float scale)
{
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
Bitmap bitmap = new Bitmap(width, height);
Graphics graphics = Graphics.FromImage(bitmap);
graphics.Clear(Color.White);
dynamic obj = MemoryCache.Default[fileName];
if (obj != null)
{
List<TextSizeLocation> listTextLocations = obj.Result;
Size size = obj.Size;
Random rnd = new Random();
foreach (var item in listTextLocations)
{
float textSize = GetMatchTextSize(item.Width * scale, item.Height * scale, item.Text);
graphics.DrawString(item.Text, new Font("黑体", textSize), Brushes.Black, item.X * scale, bitmap.Height - item.Y * scale);
}
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
bitmap.Save(imgPath);
}
}
private float GetMatchTextSize(float width, float height, string str)
{
float plus = 0.1f;
float rtn = 1;
Bitmap bm = new Bitmap(1, 1);
Graphics g = Graphics.FromImage(bm);
for (float i = 1; i < 100; i += plus)
{
var font = new Font("黑体", i);
SizeF size = g.MeasureString(str, font);
//if (size.Width > width || size.Height > height)
if (size.Width > width)
//if (size.Height > height)
{
if (i > 1)
{
rtn = i - plus;
}
break;
}
}
return rtn;
}
}
public class TextSizeLocation
{
public TextSizeLocation() { }
public TextSizeLocation(float x, float y, float width, float height, string text)
{
this.X = x;
this.Y = y;
this.Width = width;
this.Height = height;
this.Text = text;
}
public float X { get; set; }
public float Y { get; set; }
public float Width { get; set; }
public float Height { get; set; }
public string Text { get; set; }
public TextSizeLocation Clone()
{
return new TextSizeLocation()
{
X = this.X,
Y = this.Y,
Width = this.Width,
Height = this.Height,
Text = this.Text
};
}
}
public class PrintTextLocations : PDFTextStripper
{
public List<TextSizeLocation> TextWithLocations { get; set; }
public List<TextPosition> TextPositions { get; set; }
public PrintTextLocations()
{
this.TextWithLocations = new List<TextSizeLocation>();
this.TextPositions = new List<TextPosition>();
}
public override void processStream(PDPage aPage, PDResources resources, COSStream cosStream)
{
this.TextWithLocations.Clear();
this.TextPositions.Clear();
base.processStream(aPage, resources, cosStream);
}
protected override void processTextPosition(TextPosition text)
{
this.TextPositions.Add(text);
this.TextWithLocations.Add(new TextSizeLocation(text.getTextPos().getXPosition(), text.getTextPos().getYPosition(), text.getWidth(), text.getHeight(), text.getCharacter()));
}
}
}