测试html解析

master^2
jianghaiqing 1 week ago
parent 212363e516
commit c42a8402b8

@ -27,6 +27,8 @@ using static System.Net.Mime.MediaTypeNames;
using RabbitMQ.Client.Events;
using Org.BouncyCastle.Utilities.Bzip2;
using ICSharpCode.SharpZipLib.BZip2;
using StackExchange.Profiling.Internal;
using System.Drawing;
Serve.RunGeneric(additional: services =>
{
@ -796,6 +798,694 @@ public class SyncHisRecord: ISyncHisRecord,ITransient
/*
*/
string mailFilePath = "C:\\Users\\Administrator\\Desktop\\日志\\Your Transport Plan has Changed - Maersk241111.eml";
//string mailFilePath = "C:\\Users\\Administrator\\Desktop\\日志\\Your Transport Plan has Changed - Maersk-带reason.eml";
//读取邮件,解析邮件正文
MimeMessage mimeMsg = MimeMessage.Load(mailFilePath);
HtmlDocument html = new HtmlDocument();
html.LoadHtml(mimeMsg.HtmlBody);
var divWrapperList = html.DocumentNode.SelectNodes("//div[@class='wrapper']");
int perNum = divWrapperList.Count / 4;
List<TransPlanHasChangeDto> list = new List<TransPlanHasChangeDto>();
int start = 1;
int currListIdx = 0;
for (int i = 0; i < divWrapperList.Count; i++)
{
/*
4
1 BILLNO,
2 ETDETA
3
*/
if (start == 1)
{
#region 第一行 BILLNO, 原因
TransPlanHasChangeDto dto = new TransPlanHasChangeDto
{
From = new TransPlanHasChangeDetailDto
{
portList = new List<TransPlanHasChangePortDto>(),
dateList = new List<TransPlanHasChangeDateDto>(),
vesselList = new List<TransPlanHasChangeVesselVoynoDto>()
},
To = new TransPlanHasChangeDetailDto
{
portList = new List<TransPlanHasChangePortDto>(),
dateList = new List<TransPlanHasChangeDateDto>(),
vesselList = new List<TransPlanHasChangeVesselVoynoDto>()
},
Carrier = "MSK",
ContaNoList = new List<string>()
};
list.Add(dto);
currListIdx = list.Count - 1;
var billnoNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[1]/td[1]/div");
if (billnoNode != null && !string.IsNullOrWhiteSpace(billnoNode.InnerText))
{
var s = billnoNode.InnerText;
if (Regex.IsMatch(s, "Bill\\s+of\\s+Lading\\s{0,}:"))
{
var name = Regex.Match(s, "Bill\\s+of\\s+Lading\\s{0,}:").Value;
list[currListIdx].BillNo = Regex.Match(s, "(?<=" + name + ")\\s{0,}\\w+").Value?.Trim();
}
}
var bookNoNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td[1]/div");
if (bookNoNode != null && !string.IsNullOrWhiteSpace(bookNoNode.InnerText))
{
var s = bookNoNode.InnerText;
if (Regex.IsMatch(s, "Booking\\s+Number\\s{0,}:"))
{
var name = Regex.Match(s, "Booking\\s+Number\\s{0,}:").Value;
list[currListIdx].BookingNo = Regex.Match(s, "(?<=" + name + ")\\s{0,}\\w+").Value?.Trim();
}
}
var reasonNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[1]/td[1]/div");
if (reasonNode != null && !string.IsNullOrWhiteSpace(reasonNode.InnerText) &&
Regex.IsMatch(reasonNode.InnerText, "Reason\\s+for\\s+the\\s+change:"))
{
var reasonTxtNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td[1]/div");
if (reasonTxtNode != null && !string.IsNullOrWhiteSpace(reasonTxtNode.InnerText))
{
list[currListIdx].ChangeReasonNotes = reasonTxtNode.InnerText;
}
}
#endregion
}
else if (start == 3)
{
#region 处理装货港
TransPlanHasChangePortDto from = null;
TransPlanHasChangePortDto from2 = null;
var loadPortNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td/div/div/b");
if (loadPortNode != null && !string.IsNullOrWhiteSpace(loadPortNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(loadPortNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
from = new TransPlanHasChangePortDto
{
Indx = 1,
PortName = s.Trim(),
IsRemoved = false,
};
if (s.IndexOf(",") >= 0)
{
var sArg = s.Split(new char[] { ',' });
if (sArg.Length == 2)
{
from.PortName = sArg[0].Trim();
from.TerminalName = sArg[1].Trim();
}
}
}
else
{
loadPortNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td/div/div");
if (loadPortNode != null && !string.IsNullOrWhiteSpace(loadPortNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(loadPortNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
from = new TransPlanHasChangePortDto
{
Indx = 1,
PortName = s.Trim(),
IsRemoved = false,
};
if (s.IndexOf(",") >= 0)
{
var sArg = s.Split(new char[] { ',' });
if (sArg.Length == 2)
{
from.PortName = sArg[0].Trim();
from.TerminalName = sArg[1].Trim();
}
}
}
}
if (from != null)
{
list[currListIdx].From.portList.Add(from);
}
var wasLoadPortNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td/div/div/div");
if (wasLoadPortNode != null && !string.IsNullOrWhiteSpace(wasLoadPortNode.InnerText))
{
var s = wasLoadPortNode.InnerText;
if (Regex.IsMatch(s, "\\bwas\\s+"))
{
s = Regex.Replace(Regex.Replace(s, "\r\n", " "), "\\s{2,}", " ").Trim();
s = Regex.Match(s, "(?<=\\bwas\\s).*").Value;
from2 = new TransPlanHasChangePortDto
{
Indx = 2,
PortName = s.Trim(),
IsRemoved = true,
};
if (s.IndexOf(",") >= 0)
{
var sArg = s.Split(new char[] { ',' });
if (sArg.Length == 2)
{
from2.PortName = sArg[0].Trim();
from2.TerminalName = sArg[1].Trim();
}
}
}
}
if (from2 != null)
{
list[currListIdx].From.portList.Add(from2);
}
#endregion
#region 处理ETD
TransPlanHasChangeDateDto fromETD = null;
TransPlanHasChangeDateDto fromETD2 = null;
var etdNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[3]/td/div/div/b");
if (etdNode != null && !string.IsNullOrWhiteSpace(etdNode.InnerText))
{
var s = etdNode.InnerText;
s = Regex.Replace(s, "\r\n", " ").Trim();
s = Regex.Replace(s, "\\(\\s{0,}ETD\\s{0,}\\)", " ").Trim();
s = Regex.Replace(Regex.Replace(s,","," "), "\\s{2,}", " ").Trim();
DateTime etd = DateTime.Parse(s);
fromETD = new TransPlanHasChangeDateDto
{
Indx = 1,
DateVal = etd,
OrigDateTxt = Regex.Replace(etdNode.InnerText, "\r\n", " ").Trim(),
IsRemoved = false,
};
}
else
{
etdNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[3]/td/div/div");
if (etdNode != null && !string.IsNullOrWhiteSpace(etdNode.InnerText))
{
var s = etdNode.InnerText;
s = Regex.Replace(s, "\r\n", " ").Trim();
s = Regex.Replace(s, "\\(\\s{0,}ETD\\s{0,}\\)", " ").Trim();
s = Regex.Replace(Regex.Replace(s, ",", " "), "\\s{2,}", " ").Trim();
DateTime etd = DateTime.Parse(s);
fromETD = new TransPlanHasChangeDateDto
{
Indx = 1,
DateVal = etd,
OrigDateTxt = Regex.Replace(etdNode.InnerText, "\r\n", " ").Trim(),
IsRemoved = false,
};
}
}
if (fromETD != null)
{
list[currListIdx].From.dateList.Add(fromETD);
}
var wasETDNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[3]/td/div/div/div");
if (wasETDNode != null && !string.IsNullOrWhiteSpace(wasETDNode.InnerText))
{
var s = wasETDNode.InnerText;
if (Regex.IsMatch(s, "\\bwas\\s+"))
{
s = Regex.Replace(Regex.Replace(s, "\r\n", " "), "\\s{2,}", " ").Trim();
s = Regex.Match(s, "(?<=\\bwas\\s).*").Value;
s = Regex.Replace(s, "\\(\\s{0,}ETD\\s{0,}\\)", " ").Trim();
s = Regex.Replace(Regex.Replace(s, ",", " "), "\\s{2,}", " ").Trim();
DateTime etd = DateTime.Parse(s);
fromETD2 = new TransPlanHasChangeDateDto
{
Indx = 2,
DateVal = etd,
OrigDateTxt = Regex.Replace(wasETDNode.InnerText, "\r\n", " ").Trim(),
IsRemoved = true,
};
}
}
if (fromETD2 != null)
{
list[currListIdx].From.dateList.Add(fromETD2);
}
#endregion
#region 处理船名
TransPlanHasChangeVesselVoynoDto vessel = null;
TransPlanHasChangeVesselVoynoDto vessel2 = null;
var vesselNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[4]/td/div/div/b");
if (vesselNode != null && !string.IsNullOrWhiteSpace(vesselNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(vesselNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
vessel = new TransPlanHasChangeVesselVoynoDto
{
Indx = 1,
Vessel = s.Trim(),
IsRemoved = false,
};
}
else
{
vesselNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[4]/td/div/div");
if (vesselNode != null && !string.IsNullOrWhiteSpace(vesselNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(vesselNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
vessel = new TransPlanHasChangeVesselVoynoDto
{
Indx = 1,
Vessel = s.Trim(),
IsRemoved = false,
};
}
}
if (vessel != null)
{
list[currListIdx].From.vesselList.Add(vessel);
}
var wasVesselNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[1]/table/tbody/tr[1]/td[1]/table/tbody/tr[4]/td/div/div/div");
if (wasVesselNode != null && !string.IsNullOrWhiteSpace(wasVesselNode.InnerText))
{
var s = wasVesselNode.InnerText;
if (Regex.IsMatch(s, "\\bwas\\s+"))
{
s = Regex.Replace(Regex.Replace(s, "\r\n", " "), "\\s{2,}", " ").Trim();
s = Regex.Match(s, "(?<=\\bwas\\s).*").Value;
vessel2 = new TransPlanHasChangeVesselVoynoDto
{
Indx = 2,
Vessel = s.Trim(),
IsRemoved = true,
};
}
}
if (vessel2 != null)
{
list[currListIdx].From.vesselList.Add(vessel2);
}
#endregion
#region 处理卸货港
TransPlanHasChangePortDto to = null;
TransPlanHasChangePortDto to2 = null;
var dischargeNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td/div/div/b");
if (dischargeNode != null && !string.IsNullOrWhiteSpace(dischargeNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(dischargeNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
to = new TransPlanHasChangePortDto
{
Indx = 1,
PortName = s.Trim(),
IsRemoved = false,
};
if (s.IndexOf(",") >= 0)
{
var sArg = s.Split(new char[] { ',' });
if (sArg.Length == 2)
{
to.PortName = sArg[0].Trim();
to.TerminalName = sArg[1].Trim();
}
}
}
else
{
dischargeNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td/div/div");
if (dischargeNode != null && !string.IsNullOrWhiteSpace(dischargeNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(dischargeNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
to = new TransPlanHasChangePortDto
{
Indx = 1,
PortName = s.Trim(),
IsRemoved = false,
};
if (s.IndexOf(",") >= 0)
{
var sArg = s.Split(new char[] { ',' });
if (sArg.Length == 2)
{
to.PortName = sArg[0].Trim();
to.TerminalName = sArg[1].Trim();
}
}
}
}
if (to != null)
{
list[currListIdx].To.portList.Add(to);
}
var wasDischargeNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[2]/td/div/div/div");
if (wasDischargeNode != null && !string.IsNullOrWhiteSpace(wasDischargeNode.InnerText))
{
var s = wasDischargeNode.InnerText;
if (Regex.IsMatch(s, "\\bwas\\s+"))
{
s = Regex.Replace(Regex.Replace(s, "\r\n", " "), "\\s{2,}", " ").Trim();
s = Regex.Match(s, "(?<=\\bwas\\s).*").Value;
to2 = new TransPlanHasChangePortDto
{
Indx = 2,
PortName = s.Trim(),
IsRemoved = true,
};
if (s.IndexOf(",") >= 0)
{
var sArg = s.Split(new char[] { ',' });
if (sArg.Length == 2)
{
to2.PortName = sArg[0].Trim();
to2.TerminalName = sArg[1].Trim();
}
}
}
}
if (to2 != null)
{
list[currListIdx].To.portList.Add(to2);
}
#endregion
#region 处理ETD
TransPlanHasChangeDateDto toETA = null;
TransPlanHasChangeDateDto toETA2 = null;
var etaNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[3]/td/div/div/b");
if (etaNode != null && !string.IsNullOrWhiteSpace(etaNode.InnerText))
{
var s = etaNode.InnerText;
s = Regex.Replace(s, "\r\n", " ").Trim();
s = Regex.Replace(s, "\\(\\s{0,}ETA\\s{0,}\\)", " ").Trim();
s = Regex.Replace(Regex.Replace(s, ",", " "), "\\s{2,}", " ").Trim();
DateTime eta = DateTime.Parse(s);
toETA = new TransPlanHasChangeDateDto
{
Indx = 1,
DateVal = eta,
OrigDateTxt = Regex.Replace(etaNode.InnerText, "\r\n", " ").Trim(),
IsRemoved = false,
};
}
else
{
etaNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[3]/td/div/div");
if (etaNode != null && !string.IsNullOrWhiteSpace(etaNode.InnerText))
{
var s = etaNode.InnerText;
s = Regex.Replace(s, "\r\n", " ").Trim();
s = Regex.Replace(s, "\\(\\s{0,}ETA\\s{0,}\\)", " ").Trim();
s = Regex.Replace(Regex.Replace(s, ",", " "), "\\s{2,}", " ").Trim();
DateTime eta = DateTime.Parse(s);
toETA = new TransPlanHasChangeDateDto
{
Indx = 1,
DateVal = eta,
OrigDateTxt = Regex.Replace(etaNode.InnerText, "\r\n", " ").Trim(),
IsRemoved = false,
};
}
}
if (toETA != null)
{
list[currListIdx].To.dateList.Add(toETA);
}
var wasETANode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[3]/td/div/div/div");
if (wasETANode != null && !string.IsNullOrWhiteSpace(wasETANode.InnerText))
{
var s = wasETANode.InnerText;
if (Regex.IsMatch(s, "\\bwas\\s+"))
{
s = Regex.Replace(Regex.Replace(s, "\r\n", " "), "\\s{2,}", " ").Trim();
s = Regex.Match(s, "(?<=\\bwas\\s).*").Value;
s = Regex.Replace(s, "\\(\\s{0,}ETA\\s{0,}\\)", " ").Trim();
s = Regex.Replace(Regex.Replace(s, ",", " "), "\\s{2,}", " ").Trim();
DateTime eta = DateTime.Parse(s);
toETA2 = new TransPlanHasChangeDateDto
{
Indx = 2,
DateVal = eta,
OrigDateTxt = Regex.Replace(wasETANode.InnerText, "\r\n", " ").Trim(),
IsRemoved = true,
};
}
}
if (toETA2 != null)
{
list[currListIdx].To.dateList.Add(toETA2);
}
#endregion
#region 处理船名
TransPlanHasChangeVesselVoynoDto vesselPOD = null;
TransPlanHasChangeVesselVoynoDto vesselPOD2 = null;
var vesselPODNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[4]/td/div/div/b");
if (vesselPODNode != null && !string.IsNullOrWhiteSpace(vesselPODNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(vesselPODNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
vesselPOD = new TransPlanHasChangeVesselVoynoDto
{
Indx = 1,
Vessel = s.Trim(),
IsRemoved = false,
};
}
else
{
vesselNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[4]/td/div/div");
if (vesselNode != null && !string.IsNullOrWhiteSpace(vesselNode.InnerText))
{
var s = Regex.Replace(Regex.Replace(vesselNode.InnerText, "\r\n", " "), "\\s{2,}", " ").Trim();
vesselPOD = new TransPlanHasChangeVesselVoynoDto
{
Indx = 1,
Vessel = s.Trim(),
IsRemoved = false,
};
}
}
if (vesselPOD != null)
{
list[currListIdx].To.vesselList.Add(vesselPOD);
}
var wasVesselPODNode = divWrapperList[i].SelectSingleNode("./table/tbody/tr[1]/td[1]/div/table/tbody/tr[1]/td[1]/div[2]/table/tbody/tr[1]/td[1]/table/tbody/tr[4]/td/div/div/div");
if (wasVesselPODNode != null && !string.IsNullOrWhiteSpace(wasVesselPODNode.InnerText))
{
var s = wasVesselPODNode.InnerText;
if (Regex.IsMatch(s, "\\bwas\\s+"))
{
s = Regex.Replace(Regex.Replace(s, "\r\n", " "), "\\s{2,}", " ").Trim();
s = Regex.Match(s, "(?<=\\bwas\\s).*").Value;
vesselPOD2 = new TransPlanHasChangeVesselVoynoDto
{
Indx = 2,
Vessel = s.Trim(),
IsRemoved = true,
};
}
}
if (vesselPOD2 != null)
{
list[currListIdx].To.vesselList.Add(vesselPOD2);
}
#endregion
}
//每4行处理一次
if ((start + 1) == 5)
{
start = 1;
}
else
{
start++;
}
}
var jsonRlt = JSON.Serialize(list);
}
public string SplitSubjectValue(string subjectText, string strStart, string strEnd = null)
{
var startIdx = subjectText.IndexOf(strStart);
if (startIdx > -1)
{
subjectText = subjectText.Substring(startIdx + strStart.Length);
if (!string.IsNullOrEmpty(strEnd))
{
startIdx = subjectText.IndexOf(strEnd);
if (startIdx > -1)
{
subjectText = subjectText.Substring(0, startIdx);
}
}
return subjectText.Trim();
}
return string.Empty;
}
/// <summary>
/// 文本类型的正文,根据条件提取字段内容
/// </summary>
/// <param name="bodyText">邮件正文(文本类型)</param>
/// <param name="strStart">行起始关键字文本</param>
/// <param name="strEnd">行结束关键字文本</param>
/// <param name="afterLine">从指定行之后提取</param>
/// <returns></returns>
public string SplitFieldValue(string bodyText, string strStart, string strEnd = null, string afterLine = null)
{
var lines = bodyText.Split("\r\n".ToArray(), StringSplitOptions.RemoveEmptyEntries);
var findArr = new List<string>(lines);
if (!string.IsNullOrEmpty(afterLine))
{
var strList = findArr.Where(l => l.Trim().StartsWith(afterLine)).ToList();
if (strList.Count > 0)
{
var idx = findArr.IndexOf(strList[0]);
if (idx > -1)
{
findArr.RemoveRange(0, idx + 1);
}
}
}
//没有开始标志取afterLine的下一行
if (string.IsNullOrEmpty(strStart) && !string.IsNullOrEmpty(afterLine))
{
return findArr[0].Trim();
}
else
{
var lineFind = findArr.FirstOrDefault(x => x.Trim().StartsWith(strStart));
if (lineFind != null)
{
if (string.IsNullOrEmpty(strEnd))
{
return lineFind.Replace(strStart, "").Trim();
}
else
{
var tmp = lineFind.Substring(strStart.Length);
var endIdx = tmp.IndexOf(strEnd);
if (endIdx == -1)
{
return tmp.Trim();
}
else
{
return tmp.Substring(0, endIdx).Trim();
}
}
}
}
return string.Empty;
}
public void SyncServiceProjectRecord5()

Loading…
Cancel
Save