Java自动登录并抓取网页相关内容
JAVA自动登录并抓取内容!!!
package com.taobao.test;import java.io.IOException;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import org.jsoup.Connection;import org.jsoup.Jsoup;import org.jsoup.Connection.Method;import org.jsoup.Connection.Response;import org.jsoup.nodes.Document;import org.jsoup.select.Elements;public class Test {public static void main(String[] args) throws Exception {Map<String,String> loginParams = new HashMap<String,String>();loginParams.put("username", "xxxx@123.com");loginParams.put("password", "xxxxx");loginParams.put("list", "1");loginParams.put("remuser", "on");Object[] result = loginWebSize("http://new.cnzz.com/user/login.php", loginParams, null);showWebInfo("1421407", (Map<String,String>)result[1], "xxxx");showWebInfo("2842384", (Map<String,String>)result[1], "xxxx");}public static void showWebInfo(String webId,Map<String,String>cookies,String webName){String mainUrl = "http://new.cnzz.com/v1/go_site.php?siteid={WEBID}&s_id=402";String onlineUrl = "http://new.cnzz.com/v1/main.php?siteid={WEBID}&s=online_list";Document doc = getUrlDocument(mainUrl.replace("{WEBID}",webId), cookies, "http://new.cnzz.com/v1/main.php?s=site_list", false);Elements element = doc.select(".gktable tr:gt(1)");StringBuffer msg = new StringBuffer(webName + "\n");for(int i = 0 ; i < 2 ; i++){Elements tdElements = element.eq(i).select("td");msg.append(i == 0 ? "今日统计\n" : "昨日统计\n");msg.append("PV:" + tdElements.get(1).text()).append("\n");msg.append("独立访客:" + tdElements.get(2).text()).append("\n");msg.append("IP:" + tdElements.get(3).text()).append("\n");msg.append("新独立访客:" + tdElements.get(4).text()).append("\n");msg.append("人均浏览次数:" + tdElements.get(5).text()).append("\n\n");}doc = getUrlDocument(onlineUrl.replace("{WEBID}",webId), cookies,mainUrl, false);Elements curOnlineElems = doc.select(".online_d1");Elements pageViewElems = doc.select(".online_d2");msg.append(curOnlineElems.select(".num3").html() + ":" + curOnlineElems.select(".num4").html()).append("\n");msg.append(pageViewElems.select(".num3").html() + ":" + pageViewElems.select(".num4").html()).append("\n");javax.swing.JOptionPane.showMessageDialog(null,msg.toString(),"CNZZ统计",javax.swing.JOptionPane.YES_OPTION);}public static Object[] loginWebSize(String loginUrl,Map<String,String> loginParams,String referrer){Connection conn = Jsoup.connect(loginUrl);conn.method(Method.POST).timeout(10000).followRedirects(true);if(referrer != null && !referrer.equals(""))conn.referrer(referrer);if(loginParams != null){Iterator<String> keyIt = loginParams.keySet().iterator();while(keyIt.hasNext()){String key = keyIt.next();conn.data(key,loginParams.get(key));}}try{Response response = conn.execute();return new Object[]{response,conn.response().cookies()};}catch(IOException ex){ex.printStackTrace();}return null;}public static Document getUrlDocument(String url,Map<String,String> cookies,String referrer,boolean isPostMethod){Connection conn = Jsoup.connect(url).timeout(10000).followRedirects(true);if(referrer != null && !referrer.equals(""))conn.referrer(referrer);conn.method(isPostMethod ? Method.POST : Method.GET);setCookies(conn, cookies);Document doc = null;try{doc = isPostMethod ? conn.post() : conn.get();}catch(IOException ex){ex.printStackTrace();}return doc;}/*** 设置Cookies* @param conn* @param cookies*/public static void setCookies(Connection conn,Map<String,String>cookies){if(cookies == null)return;Iterator<String> it = cookies.keySet().iterator();while(it.hasNext()){String key = it.next();conn.cookie(key, cookies.get(key));}}}