0
点赞
收藏
分享

微信扫一扫

模拟web访问有登录且有验证码的登录后抓取数据

模拟web访问有登录且有验证码的登录后抓取数据

1 取验证码

1 在窗体上放一个picturebox (imgValidate)存放获取的验证码图片,

2 用浏览器的开发者工具firefox (f12) 分析出验证码的网址

private void GetValidateImage()

{

cookies = new CookieContainer();

string strUrl = "http://www.xxx.com/ValidateCodePicture.aspx?Key="+strValidCode; //验证码页面 strValidCode这个随机码要先取出来


CookieContainer cc = new CookieContainer();

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);


//set request args

request.Method = "Get";

request.CookieContainer = cc;

request.KeepAlive = true;


//request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";

request.ContentType = "text/html";


//模拟goole浏览器访问

request.UserAgent =

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36";

//request.Referer = strUrl;

request.Headers.Add("x-requested-with:XMLHttpRequest");

request.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.8,en;q=0.6,nl;q=0.4,zh-TW;q=0.2");

//request.ContentLength = postdataByte.Length; text/html; charset=utf-8

request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";

request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip |

DecompressionMethods.None;

//支持跳转页面,查询结果将是跳转后的页面

request.AllowAutoRedirect = true;


request.Headers.Add("Accept-Encoding", "gzip, deflate");

if (request.Method == "POST")

{

(request as HttpWebRequest).ContentType = "application/x-www-form-urlencoded";

}


HttpWebResponse response = (HttpWebResponse)request.GetResponse();


MemoryStream ms = null;

using (var stream = response.GetResponseStream())

{

Byte[] buffer = new Byte[response.ContentLength];

int offset = 0, actuallyRead = 0;

do

{

actuallyRead = stream.Read(buffer, offset, buffer.Length - offset);

offset += actuallyRead;

}

while (actuallyRead > 0);

ms = new MemoryStream(buffer);

}

response.Close();


cookies = request.CookieContainer; //保存cookies

strCookies = request.CookieContainer.GetCookieHeader(request.RequestUri); //把cookies转换成字符串


Bitmap sourcebm = new Bitmap((Stream)ms);//初始化Bitmap图片

imgValidate.Image = sourcebm;

}


2 取js赋值的内容
有的网页用查看网页源代码的方式看不到控件的值,需要用到下面的方法

即用C#自带的webbrowse来加载网页,再用webBrowser1.Document来取对应控件的值,如

tring strMsg2 = webBrowser1.Document.GetElementById("hdValidateCodeID").OuterHtml;

3 取得要提交的参数
如果是asp.net的网页还有提交”__EVENTTARGET“,"__EVENTARGUMENT","__VIEWSTATE"这三个参数,这个也可以在开发者工具-网络-参数里看到

可以用httpRequest先取得源代码再分析出

这里用的是webbrowse里加载好的



private void GetViewState()

{

string strMsg = webBrowser1.Document.GetElementById("__VIEWSTATE").OuterHtml;

//取viewstate value

//<INPUT id=__VIEWSTATE type=hidden value=/wEPDwUKMTg0NTk3Mjg2N2Rk name=__VIEWSTATE>

MatchCollection mc = Regex.Matches(strMsg, "id=__VIEWSTATE.*(?<viewstate>value[^>]*)", RegexOptions.IgnoreCase);


if (mc.Count > 0)

{

foreach (Match m in mc)

{

strViewState = m.Groups["viewstate"].Value.ToString().Trim();

if (strViewState.Length > 0)

{

strViewState = strViewState.Replace("value=", "").Replace("\"", "").Replace("\\", "").Replace("name=__VIEWSTATE","").Replace(" ","");

}

}

}


//<INPUT id=hdValidateCodeID type=hidden value=c1b52d3a-1f8b-1dc4-0d44-32a4b46ef8af name=hdValidateCodeID>

string strMsg2 = webBrowser1.Document.GetElementById("hdValidateCodeID").OuterHtml;

MatchCollection mc2 = Regex.Matches(strMsg2, "id=hdValidateCodeID.*(?<validatecode>value[^>]*)", RegexOptions.IgnoreCase);


if (mc2.Count > 0)

{

foreach (Match m in mc2)

{

strValidCode = m.Groups["validatecode"].Value.ToString().Trim();

if (strValidCode.Length > 0)

{

strValidCode = strValidCode.Replace("value=", "").Replace("\"", "").Replace("\\", "").Replace("/", "").Replace("name=hdValidateCodeID","").Replace(" ","");

}

}

}

txtValidCode.Text = strValidCode;

txtViewState.Text = strViewState;


//String 的Cookie 要转成 Cookie型的 并放入CookieContainer中

string cookieStr = webBrowser1.Document.Cookie;

string[] cookstr = cookieStr.Split(';');


foreach (string str in cookstr)

{

try

{

string[] cookieNameValue = str.Split('=');

Cookie ck = new Cookie(cookieNameValue[0].Trim().ToString(), cookieNameValue[1].Trim().ToString());

ck.Domain = "XXX.com"; //必须写对

myCookieContainer.Add(ck);

}

catch

{

}

}

}



4 登录并且存取cookie
提交参数,并存下cookie,供后续用

private void Login()

{

cookies = new CookieContainer();

string strUrl = "http://www.xxx.com/Login.aspx"; //验证码页面


HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);


//set request args

request.Method = "POST";

request.CookieContainer = myCookieContainer;

request.KeepAlive = true;


//request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";

request.ContentType = "text/html";



//模拟goole浏览器访问

request.UserAgent =

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36";

//request.Referer = strUrl;

request.Headers.Add("x-requested-with:XMLHttpRequest");

request.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.8,en;q=0.6,nl;q=0.4,zh-TW;q=0.2");

//request.ContentLength = postdataByte.Length; text/html; charset=utf-8

request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";

request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip |

DecompressionMethods.None;

//支持跳转页面,查询结果将是跳转后的页面

request.AllowAutoRedirect = true;


request.Headers.Add("Accept-Encoding", "gzip, deflate");

if (request.Method == "POST")

{

(request as HttpWebRequest).ContentType = "application/x-www-form-urlencoded";

}


//---begin



string postData = string.Format("txtUserName={0}&txtPassword={1}&txtValidateCode={2}&hdValidateCodeID={3}&ddlLanguage=CN&btnLogin=登录&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE={4}", txtUserName.Text, txtPassword.Text, txtValidate.Text,strValidCode,strViewState); //这里按照前面FireBug中查到的POST字符串做相应修改。

byte[] postdatabyte = Encoding.UTF8.GetBytes(postData);



request.ContentLength = postdatabyte.Length;


using (Stream stream = request.GetRequestStream())

{

stream.Write(postdatabyte, 0, postdatabyte.Length);

}

//---end---


HttpWebResponse response = (HttpWebResponse)request.GetResponse();


//StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));

StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);

string strMsg = reader.ReadToEnd();


response.Close();


cookies = request.CookieContainer; //保存cookies,后面再请求其它网页就可用这个cookie,不用在登录了

lbLogin.Text = "已登录";

btnSearchResume.Enabled = true;


}

举报

相关推荐

0 条评论