Jsuop进行链接提取
Document doc = Jsoup.parse(info.getWebc().getWebContent());
Elements srcLinks = doc.select("img[src]");
String imagesPath = "";
for (Element link : srcLinks) {
String imagesPaths = link.attr("src");
String ht = imagesPaths.substring(0, 4);
String htt = imagesPaths.substring(0, 1);
if (!ht.equals("http") && htt.equals("/")) {
imagesPath = imagesPaths.trim().replaceAll(ContextPath, "");
imagesPath = imagesPath.substring(imagesPath.lastIndexOf("/") + 1);
} else {
imagesPath = "";
}
if (!imagesPath.equals("")) {
importCopy("/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/"+imagesPath, path, strInfoID,"/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/");
}
}
String filePaths = "";
Elements linehrefs = doc.select("a[href]");
for (Element link : linehrefs) {
filePaths = link.attr("href").trim().replaceAll(ContextPath, "");
filePaths = filePaths.substring(filePaths.lastIndexOf("/") + 1);
if (!filePaths.equals("")) {
importCopy("/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/"+filePaths, path, strInfoID,"/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/");
}
}