|
|
using DSWeb.Areas.MvcShipping.DB;
|
|
|
using DSWeb.MvcShipping.Comm;
|
|
|
using Newtonsoft.Json;
|
|
|
using org.apache.pdfbox.cos;
|
|
|
using org.apache.pdfbox.pdmodel;
|
|
|
using org.apache.pdfbox.pdmodel.common;
|
|
|
using org.apache.pdfbox.util;
|
|
|
using System;
|
|
|
using System.Collections.Generic;
|
|
|
using System.Drawing;
|
|
|
using System.IO;
|
|
|
using System.Linq;
|
|
|
using System.Runtime.Caching;
|
|
|
using System.Text;
|
|
|
using System.Web;
|
|
|
using System.Web.Mvc;
|
|
|
|
|
|
namespace DSWeb.MvcShipping.Controllers
|
|
|
{
|
|
|
public class OcrController : Controller
|
|
|
{
|
|
|
private static readonly string ocrFilePath = "ocr_temp_files";
|
|
|
private static readonly string[] allowExts = new string[] { ".pdf", ".xls", ".xlsx", ".doc", ".docx",".rtf" };
|
|
|
//private static readonly int ImageScale = 1;
|
|
|
private static readonly int CacheExpire = 5;//缓存到期分钟
|
|
|
|
|
|
[HttpGet]
|
|
|
public ActionResult UpOcrAuto(string errMsg)
|
|
|
{
|
|
|
ViewData["errMsg"] = errMsg;
|
|
|
return View();
|
|
|
}
|
|
|
|
|
|
[HttpPost]
|
|
|
public ActionResult UpOcrFile(float scale = 1.5f)
|
|
|
{
|
|
|
if (Request.Files.Count > 0)
|
|
|
{
|
|
|
var ext = Path.GetExtension(Request.Files[0].FileName).ToLower();
|
|
|
if (!allowExts.Contains(ext))
|
|
|
{
|
|
|
return RedirectToAction("UpOcrAuto", new { errMsg = "只能上传pdf、excel和word文件!" });
|
|
|
}
|
|
|
|
|
|
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
|
|
|
if (!Directory.Exists(ocrDir))
|
|
|
{
|
|
|
Directory.CreateDirectory(ocrDir);
|
|
|
}
|
|
|
|
|
|
var tmpFileName = Guid.NewGuid().ToString();
|
|
|
var saveFilePathName = Path.Combine(ocrDir, tmpFileName + ext);
|
|
|
Request.Files[0].SaveAs(saveFilePathName);
|
|
|
|
|
|
if (ext == ".xls" || ext == ".xlsx")
|
|
|
{
|
|
|
Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook(saveFilePathName);
|
|
|
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
|
|
|
wb.Save(targetPath, Aspose.Cells.SaveFormat.Pdf);
|
|
|
}
|
|
|
else if (ext == ".doc" || ext == ".docx" || ext == ".rtf")
|
|
|
{
|
|
|
Aspose.Words.Document doc = new Aspose.Words.Document(saveFilePathName);
|
|
|
string targetPath = Path.Combine(ocrDir, $"{tmpFileName}.pdf");
|
|
|
doc.Save(targetPath, Aspose.Words.SaveFormat.Pdf);
|
|
|
}
|
|
|
|
|
|
ReadPdf(tmpFileName, scale);
|
|
|
|
|
|
#region 挂载到单据管理
|
|
|
var BSNO = Request.Form["BSNO"];
|
|
|
var CUSTID = Guid.NewGuid().ToString();
|
|
|
var path = Server.MapPath("../../UploadFiles/MsOpFiles/" + CUSTID);
|
|
|
|
|
|
if (!Directory.Exists(path))
|
|
|
{
|
|
|
Directory.CreateDirectory(path);
|
|
|
}
|
|
|
|
|
|
var size = Request.Files[0].ContentLength;
|
|
|
var name = Path.GetFileName(Request.Files[0].FileName);
|
|
|
|
|
|
var usercode = Convert.ToString(Session["USERCODE"]);
|
|
|
var partname = usercode + DateTime.Now.ToString("yyyyMMddHHmmssfff") + name;
|
|
|
string filename = path + "\\" + partname;
|
|
|
string filenameURL = "../../UploadFiles/MsOpFiles/" + CUSTID + "/" + partname;
|
|
|
if (System.IO.File.Exists(filename))
|
|
|
{
|
|
|
System.IO.File.Delete(filename);
|
|
|
}
|
|
|
Request.Files[0].SaveAs(filename);
|
|
|
|
|
|
|
|
|
ReceiptContext receiptContext = new ReceiptContext();
|
|
|
Receipt_Doc recDoc = new Receipt_Doc();
|
|
|
recDoc.GID = Guid.NewGuid().ToString();
|
|
|
recDoc.BSNO = BSNO;
|
|
|
recDoc.URL = name;
|
|
|
recDoc.Driect_URL = filenameURL;
|
|
|
recDoc.MODIFIEDUSER = Convert.ToString(Session["USERID"]);
|
|
|
recDoc.MODIFIEDTIME = DateTime.Now;
|
|
|
recDoc.RECEIPTTYPE = string.Empty;
|
|
|
recDoc.documentAttachedCode = string.Empty;
|
|
|
recDoc.documentAttachedNo = string.Empty;
|
|
|
receiptContext.ReceiptDocs.Add(recDoc);
|
|
|
receiptContext.SaveChanges();
|
|
|
#endregion
|
|
|
|
|
|
|
|
|
return RedirectToAction("SelectRegion", new { fileName = tmpFileName });
|
|
|
}
|
|
|
|
|
|
return RedirectToAction("UpOcrAuto");
|
|
|
|
|
|
}
|
|
|
|
|
|
[HttpGet]
|
|
|
public ActionResult SelectRegion(string fileName)
|
|
|
{
|
|
|
ViewData["FileName"] = fileName;
|
|
|
return View();
|
|
|
}
|
|
|
|
|
|
[HttpPost]
|
|
|
public ContentResult GetRegionText(string fileName, int x, int y, int w, int h, float scale = 1.5f)
|
|
|
{
|
|
|
if (!MemoryCache.Default.Contains(fileName))
|
|
|
{
|
|
|
ReadPdf(fileName, scale);
|
|
|
}
|
|
|
|
|
|
dynamic obj = MemoryCache.Default[fileName];
|
|
|
if (obj != null)
|
|
|
{
|
|
|
Rectangle rectMouse = new Rectangle(x, y, w, h);
|
|
|
List<TextSizeLocation> listTextLocations = obj.Result;
|
|
|
Size size = obj.Size;
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
int lastY = 0;
|
|
|
foreach (var item in listTextLocations)
|
|
|
{
|
|
|
int xItem = (int)(item.X * scale);
|
|
|
int yItem = (int)(size.Height * scale - item.Y * scale);
|
|
|
int wItem = (int)(item.Width * scale);
|
|
|
int hItem = (int)(item.Height * scale);
|
|
|
|
|
|
Rectangle rect = new Rectangle(xItem, yItem, wItem, hItem);
|
|
|
|
|
|
if (rect.IntersectsWith(rectMouse))
|
|
|
{
|
|
|
if (lastY != 0 && Math.Abs(lastY - item.Y) > 1)
|
|
|
{
|
|
|
sb.AppendLine();
|
|
|
}
|
|
|
|
|
|
sb.Append(item.Text);
|
|
|
|
|
|
lastY = (int)item.Y;
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return Content(sb.ToString());
|
|
|
}
|
|
|
|
|
|
return Content("");
|
|
|
}
|
|
|
|
|
|
|
|
|
[HttpGet]
|
|
|
public ActionResult GetCanvasImage(string fileName, float scale = 1.5f)
|
|
|
{
|
|
|
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
|
|
|
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
|
|
|
if (!System.IO.File.Exists(imgPath))
|
|
|
{
|
|
|
dynamic obj = MemoryCache.Default[fileName];
|
|
|
if (obj != null)
|
|
|
{
|
|
|
Size size = obj.Size;
|
|
|
DrawImg((int)(size.Width * scale), (int)(size.Height * scale), fileName, scale);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if (System.IO.File.Exists(imgPath))
|
|
|
{
|
|
|
return File(imgPath, "image/jpg");
|
|
|
}
|
|
|
|
|
|
return null;
|
|
|
}
|
|
|
|
|
|
private void ReadPdf(string fileName, float scale)
|
|
|
{
|
|
|
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
|
|
|
var pdfFileName = System.IO.Path.Combine(ocrDir, fileName + ".pdf");
|
|
|
|
|
|
if (!System.IO.File.Exists(pdfFileName))
|
|
|
{
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
PDDocument document = PDDocument.load(pdfFileName);
|
|
|
PrintTextLocations printer = new PrintTextLocations();
|
|
|
java.util.List allPages = document.getDocumentCatalog().getAllPages();
|
|
|
PDPage page = (PDPage)allPages.get(0);
|
|
|
PDStream contents = page.getContents();
|
|
|
if (contents != null)
|
|
|
{
|
|
|
printer.processStream(page, page.findResources(), page.getContents().getStream());
|
|
|
|
|
|
//处理
|
|
|
var listT = new List<TextSizeLocation>();
|
|
|
TextSizeLocation t = null;
|
|
|
foreach (var item in printer.TextWithLocations)
|
|
|
{
|
|
|
if (t == null)
|
|
|
{
|
|
|
t = item.Clone();
|
|
|
listT.Add(t);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
|
|
|
if (Math.Abs(t.Width + t.X - item.X) <= item.Width / 2 && Math.Abs(t.Height - item.Height) <= 1) //首尾相接
|
|
|
{
|
|
|
t.Width += item.Width;
|
|
|
t.Text += item.Text;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
t = item.Clone();
|
|
|
listT.Add(t);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
var query = from it in listT orderby it.Y descending, it.X select it;
|
|
|
listT = query.ToList();
|
|
|
|
|
|
//缓存
|
|
|
MemoryCache.Default.Remove(fileName);
|
|
|
var objStore = new { Size = new Size((int)page.findCropBox().getWidth(), (int)page.findCropBox().getHeight()), Result = listT };
|
|
|
MemoryCache.Default.Add(new CacheItem(fileName, objStore), new CacheItemPolicy() { AbsoluteExpiration = new DateTimeOffset(DateTime.Now.AddMinutes(CacheExpire)) });
|
|
|
|
|
|
DrawImg((int)(page.findCropBox().getWidth() * scale), (int)(page.findCropBox().getHeight() * scale), fileName, scale);
|
|
|
}
|
|
|
|
|
|
document.close();
|
|
|
}
|
|
|
|
|
|
private void DrawImg(int width, int height, string fileName, float scale)
|
|
|
{
|
|
|
var ocrDir = Server.MapPath($"~/{ocrFilePath}");
|
|
|
Bitmap bitmap = new Bitmap(width, height);
|
|
|
Graphics graphics = Graphics.FromImage(bitmap);
|
|
|
graphics.Clear(Color.White);
|
|
|
|
|
|
dynamic obj = MemoryCache.Default[fileName];
|
|
|
if (obj != null)
|
|
|
{
|
|
|
List<TextSizeLocation> listTextLocations = obj.Result;
|
|
|
Size size = obj.Size;
|
|
|
|
|
|
Random rnd = new Random();
|
|
|
foreach (var item in listTextLocations)
|
|
|
{
|
|
|
float textSize = GetMatchTextSize(item.Width * scale, item.Height * scale, item.Text);
|
|
|
graphics.DrawString(item.Text, new Font("黑体", textSize), Brushes.Black, item.X * scale, bitmap.Height - item.Y * scale);
|
|
|
|
|
|
}
|
|
|
|
|
|
var imgPath = Path.Combine(ocrDir, $"{fileName}_{scale}.jpg");
|
|
|
bitmap.Save(imgPath);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
private float GetMatchTextSize(float width, float height, string str)
|
|
|
{
|
|
|
float plus = 0.1f;
|
|
|
float rtn = 1;
|
|
|
Bitmap bm = new Bitmap(1, 1);
|
|
|
Graphics g = Graphics.FromImage(bm);
|
|
|
for (float i = 1; i < 100; i += plus)
|
|
|
{
|
|
|
var font = new Font("黑体", i);
|
|
|
SizeF size = g.MeasureString(str, font);
|
|
|
//if (size.Width > width || size.Height > height)
|
|
|
if (size.Width > width)
|
|
|
//if (size.Height > height)
|
|
|
{
|
|
|
if (i > 1)
|
|
|
{
|
|
|
rtn = i - plus;
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return rtn;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
public class TextSizeLocation
|
|
|
{
|
|
|
public TextSizeLocation() { }
|
|
|
|
|
|
public TextSizeLocation(float x, float y, float width, float height, string text)
|
|
|
{
|
|
|
this.X = x;
|
|
|
this.Y = y;
|
|
|
this.Width = width;
|
|
|
this.Height = height;
|
|
|
this.Text = text;
|
|
|
}
|
|
|
|
|
|
public float X { get; set; }
|
|
|
public float Y { get; set; }
|
|
|
public float Width { get; set; }
|
|
|
public float Height { get; set; }
|
|
|
public string Text { get; set; }
|
|
|
|
|
|
public TextSizeLocation Clone()
|
|
|
{
|
|
|
return new TextSizeLocation()
|
|
|
{
|
|
|
X = this.X,
|
|
|
Y = this.Y,
|
|
|
Width = this.Width,
|
|
|
Height = this.Height,
|
|
|
Text = this.Text
|
|
|
};
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public class PrintTextLocations : PDFTextStripper
|
|
|
{
|
|
|
public List<TextSizeLocation> TextWithLocations { get; set; }
|
|
|
public List<TextPosition> TextPositions { get; set; }
|
|
|
|
|
|
public PrintTextLocations()
|
|
|
{
|
|
|
this.TextWithLocations = new List<TextSizeLocation>();
|
|
|
this.TextPositions = new List<TextPosition>();
|
|
|
}
|
|
|
|
|
|
public override void processStream(PDPage aPage, PDResources resources, COSStream cosStream)
|
|
|
{
|
|
|
this.TextWithLocations.Clear();
|
|
|
this.TextPositions.Clear();
|
|
|
base.processStream(aPage, resources, cosStream);
|
|
|
}
|
|
|
|
|
|
protected override void processTextPosition(TextPosition text)
|
|
|
{
|
|
|
this.TextPositions.Add(text);
|
|
|
this.TextWithLocations.Add(new TextSizeLocation(text.getTextPos().getXPosition(), text.getTextPos().getYPosition(), text.getWidth(), text.getHeight(), text.getCharacter()));
|
|
|
}
|
|
|
}
|
|
|
} |