易本地工作室-Ebend Software&Network Studio - 群发,采集,管理类软件定制开发服务【易本地工作室】
本工作室提供信息采集,管理,群发类软件定制服务,6年软件定制经验!

通过SOCKET获取HTML源码并自动保存更新COOKIES信息的C#类源码

为了方便获取源码,自己写了个这个获取源码的类,主要能自动保存COOKIES,感觉比WEBCLIENT和HTTPWEBREQUEST都好用多了,嘿嘿...自己实现的HTTP协议,主要有什么协议不兼容什么的都可以自己再改改

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
using System.Net.Sockets;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;
namespace ClassHttpSocket
{
    /// <summary>
    /// HTTP类的SOCKET实现
    /// </summary>
    public class HttpSocket
    {
         private string sCookies="";
         /// <summary>
         /// COOKIES设置
        /// </summary>
          public string Cookies
        {
            get { return sCookies; }
            set { sCookies = value; }
        }
       
        private string sReferer="";
        /// <summary>
        /// 来源页
        /// </summary>
        public string Referer
        {
            get { return sReferer; }
            set { sReferer = value; }
        }
       
        private string sUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
        /// <summary>
        /// 浏览器标识
        /// </summary>
        public string UserAgent
        {
            get { return sUserAgent; }
            set { sUserAgent = value; }
        }
        private string sAccept = "text/html, application/xhtml+xml, */*";
        /// <summary>
        /// 文件类型
        /// </summary>
        public string Accept
        {
            get { return sAccept; }
            set { sAccept = value; }
        }
       
      
        private string sEncoding;
        /// <summary>
        /// 编码
        /// </summary>
        public string Encoding
        {
            get { return sEncoding; }
            set { sEncoding = value; }
        }
        private string sContentType = "application/x-www-form-urlencoded";
        /// <summary>
        /// 类型
        /// </summary>
        public string ContentType
        {
            get { return sContentType; }
            set { sContentType = value; }
        }
        /// <summary>
        /// 构造函数
        /// </summary>
        public HttpSocket()
        {
           
        }
        /// <summary>
        /// 析构函数
        /// </summary>
        ~HttpSocket()
        {
        }
      
        /// <summary> 
        /// Url结构 
        /// </summary> 
        struct UrlInfo
        {
            /// <summary>
            /// 主机地址
            /// </summary>
            public string Host;
            /// <summary>
            /// 端口
            /// </summary>
            public int Port;
            /// <summary>
            /// 访问文件
            /// </summary>
            public string File;
            /// <summary>
            /// 访问参数
            /// </summary>
            public string Body;
        }
        private void SetCookies(string sHtml)
        {
            //Set-Cookie: b_110128=0; domain=.qidian.com; expires=Fri, 15-Sep-2023 15:48:41 GMT; path=/
            string sName = "";
            string sValue = "";
            MatchCollection mc;
            Match m;
            Regex r;
            if (!sCookies.EndsWith(";")&&sCookies!="")
            {
                sCookies += ";";
            }
            r = new Regex("Set-Cookie:\\s*(?<sName>.*?)=(?<sValue>.*?);", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnoreCase);
            mc = r.Matches(sHtml);
            for (int i = 0; i < mc.Count; i++)
            {
                
                    sName = mc[i].Groups["sName"].Value.Trim();
                    sValue = mc[i].Groups["sValue"].Value.Trim();
                    r = new Regex(sName + "\\s*=\\s*.*?;", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnoreCase);
                    m = r.Match(sCookies);
                    if (m.Success)
                    {
                        sCookies = sCookies.Replace(m.Value, sName + "=" + sValue + ";");
                    }
                    else
                    {
                        sCookies += sName + "=" + sValue+";";
                    }
               
            }
            try
            {
                if (sCookies.StartsWith(";"))
                {
                    sCookies = sCookies.Substring(1, sCookies.Length - 1);
                }
            }
            catch
            {
            }
        }
        /// <summary> 
        /// 解析URL 
        /// </summary> 
        /// <param name="url"></param> 
        /// <returns></returns> 
        private UrlInfo ParseURL(string url)
        {
            if (!url.ToLower().StartsWith("http://"))
            {
                url = "http://" + url;
            }
            UrlInfo urlInfo = new UrlInfo();
            string[] strTemp = null;
            urlInfo.Host = "";
            urlInfo.Port = 80;
            urlInfo.File = "/";
            urlInfo.Body = "";
            int intIndex = url.ToLower().IndexOf("http://");
            if (intIndex != -1)
            {
                url = url.Substring(7);
                intIndex = url.IndexOf("/");
                if (intIndex == -1)
                {
                    urlInfo.Host = url;
                }
                else
                {
                    urlInfo.Host = url.Substring(0, intIndex);
                    url = url.Substring(intIndex);
                    intIndex = urlInfo.Host.IndexOf(":");
                    if (intIndex != -1)
                    {
                        strTemp = urlInfo.Host.Split(':');
                        urlInfo.Host = strTemp[0];
                        int.TryParse(strTemp[1], out urlInfo.Port);
                    }
                    intIndex = url.IndexOf("?");
                    if (intIndex == -1)
                    {
                        urlInfo.File = url;
                    }
                    else
                    {
                        strTemp = url.Split('?');
                        urlInfo.File = strTemp[0];
                        urlInfo.Body = strTemp[1];
                    }
                }
            }
            return urlInfo;
        }
        /// <summary> 
        /// 发出请求并获取响应 
        /// </summary> 
        /// <param name="host"></param> 
        /// <param name="port"></param> 
        /// <param name="body"></param> 
        /// <param name="sCode"></param> 
        /// <returns></returns> 
        private string GetResponse(string host, int port, string body, string sCode, out string sHeaders)
        {
            Encoding encode = System.Text.Encoding.GetEncoding(sCode);
            sHeaders = string.Empty;
            string strResult = string.Empty;
            byte[] bteSend = System.Text.Encoding.ASCII.GetBytes(body);
            byte[] bteReceive = new byte[40960];
            int intLen = 0;
            using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
            {
                try
                {
                    socket.Connect(host, port);
                    if (socket.Connected)
                    {
                        socket.Send(bteSend, bteSend.Length, 0);
                        while ((intLen = socket.Receive(bteReceive, bteReceive.Length, 0)) > 0)
                        {
                            strResult += encode.GetString(bteReceive, 0, intLen);
                        }
                    }
                    socket.Close();
                }
                catch { }
            }
            SetCookies(strResult);
            Match m;
            Regex r;
            r = new Regex("^(.*?)\r\n\r\n", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnoreCase);
            m = r.Match(strResult);
            if (m.Success)//获取主机头
            {
                sHeaders = m.Value;
                strResult = strResult.Substring(sHeaders.Length, strResult.Length - sHeaders.Length);
                sHeaders=sHeaders.Trim();
            }
            return strResult;
        }
        /// <summary>
        /// 下载文件
        /// </summary>
        /// <param name="sFileUrl">文件URL</param>
        /// <param name="FilePath">文件保存路径</param>
        /// <returns>下载是否成功</returns>
        public bool DownloadFile(string sFileUrl, string sFilePath,out string sMsg)
        {
            sMsg = "";
            if (sFileUrl == "" || sFilePath == "")
            {
                sMsg = "文件URL和文件保存路径都不能为空";
                return false;
            }
            string filePath = sFilePath.Remove(sFilePath.LastIndexOf('\\'));
            if (Directory.Exists(filePath))
            {
                if (File.Exists(sFilePath))
                {
                    File.Delete(sFilePath);
                }
            }
            else
            {
                Directory.CreateDirectory(filePath);
            }
            UrlInfo urlInfo = ParseURL(sFileUrl);
            string strRequest = "";
            if (urlInfo.Body!="")
            {
                 strRequest=string.Format("GET {0}?{1} HTTP/1.1\r\n", urlInfo.File, urlInfo.Body);
            }
            else
            {
                strRequest=string.Format("GET {0} HTTP/1.1\r\n", urlInfo.File);
        
            }
            strRequest += string.Format("Host:{0}:{1}\r\n", urlInfo.Host, urlInfo.Port.ToString());
             strRequest += string.Format("Referer:{0}\r\n", sReferer);
            strRequest += string.Format("User-Agent:{0}\r\n", sUserAgent);
            strRequest += string.Format("Connection:Close\r\n");
            strRequest += string.Format("Cookie:{0}", sCookies); 
            strRequest += "\r\n\r\n";
            string strResult = string.Empty;
            byte[] bteSend = System.Text.Encoding.ASCII.GetBytes(strRequest);
            byte[] bteReceive = new byte[1];
            int intLen = 0;
            
            using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp))
            {
                try
                {
                    socket.Connect(urlInfo.Host, urlInfo.Port);
                    if (socket.Connected)
                    {
                        socket.Send(bteSend, bteSend.Length, 0);
                        while ((intLen = socket.Receive(bteReceive, bteReceive.Length, 0)) > 0)
                        {
                            strResult += System.Text.Encoding.ASCII.GetString(bteReceive, 0, intLen);
                            if (strResult.IndexOf("\r\n\r\n") > -1)
                            {
                                break;
                            }
                        }
                       bteReceive = new byte[1024];
                      Stream so = new System.IO.FileStream(sFilePath, System.IO.FileMode.Create);
                   
                        while ((intLen = socket.Receive(bteReceive, bteReceive.Length, 0)) > 0)
                        {
                            so.Write(bteReceive, 0, intLen);
                        }
                        so.Close();
                        so.Dispose();
                    }
                    socket.Close();
                    
                }
                catch { }
            }
            SetCookies(strResult);
            return true;
        }
        /// <summary>
        /// 下载文件利用WebClient
        /// </summary>
        /// <param name="sFileUrl">文件URL</param>
        /// <param name="FilePath">文件保存路径</param>
        /// <returns>下载是否成功</returns>
        public bool DownloadFile2(string sFileUrl, string sFilePath, out string sMsg)
        {
            sMsg = "";
            if (sFileUrl == "" || sFilePath == "")
            {
                sMsg = "文件URL和文件保存路径都不能为空";
                return false;
            }
            string filePath = sFilePath.Remove(sFilePath.LastIndexOf('\\'));
            if (Directory.Exists(filePath))
            {
                if (File.Exists(sFilePath))
                {
                    File.Delete(sFilePath);
                }
            }
            else
            {
                Directory.CreateDirectory(filePath);
            }
            WebClient WC = new WebClient();
            WC.Headers.Add("Referer",sReferer);
            WC.Headers.Add("Cookie",sCookies);
            WC.Headers.Add("User-Agent", sUserAgent);
            WC.DownloadFile(sFileUrl, sFilePath);
            WC.Dispose();
            return true;
        }
        public string UrlEncode(string str, string CharSet)
        {
            if (CharSet.Trim() == "")
                return str;
            StringBuilder sb = new StringBuilder();
            byte[] byStr = System.Text.Encoding.GetEncoding(CharSet).GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
            for (int i = 0; i < byStr.Length; i++)
            {
                if (Convert.ToInt32(byStr[i]) < 128)
                {
                    sb.Append((char)byStr[i]);
                }
                else
                {
                    sb.Append(@"%" + Convert.ToString(byStr[i], 16).ToUpper());
                }
            }
            return (sb.ToString());
        }
        /// <summary> 
        /// GET请求 
        /// </summary> 
        /// <param name="url">请求地址</param>
        /// <param name="encode">编码类型</param>
        /// <returns></returns> 
         public string Get(string url, string encode)
        {
            string sHeaders;
            return Get(url, encode,out sHeaders);
        }
         /// <summary> 
         /// GET请求 
         /// </summary> 
         /// <param name="url">请求地址</param>
         /// <param name="encode">编码类型</param>
         /// <param name="sHeaders">HTTP头</param>
         /// <returns></returns> 
     
        public string Get(string url, string encode, out string sHeaders)
        {
            
            UrlInfo urlInfo = ParseURL(url);
            string strRequest = string.Format("GET {0}?{1} HTTP/1.1\r\n", urlInfo.File, urlInfo.Body);
            strRequest += string.Format("Host:{0}:{1}\r\n", urlInfo.Host, urlInfo.Port.ToString());
             strRequest += string.Format("Content-Type:{0}\r\n", ContentType);
            strRequest += string.Format("Referer:{0}\r\n", sReferer);
            strRequest += string.Format("User-Agent:{0}\r\n", sUserAgent);
            strRequest += string.Format("Connection:Close\r\n");
            strRequest += string.Format("Cookie:{0}", sCookies); 
            strRequest += "\r\n\r\n";
            return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode, out  sHeaders);
        }
        /// <summary>
        /// POST请求
        /// </summary>
        /// <param name="url">请求地址</param>
        /// <param name="sPostString">请求参数</param>
        /// <param name="encode">编码类型</param>
        /// <returns></returns>
        public string Post(string url, string sPostString, string encode)
        {
            string sHeaders;
          
            return Post(url, sPostString, encode,out sHeaders);
        }
        /// <summary>
        /// POST请求
        /// </summary>
        /// <param name="url">请求地址</param>
        /// <param name="sPostString">请求参数</param>
        /// <param name="encode">编码类型</param>
        /// <param name="sHeaders">HTTP头</param>
        /// <returns></returns>
        public string Post(string url, string sPostString, string encode,out string sHeaders)
        {
            UrlInfo urlInfo = ParseURL(url);
            //urlInfo.Body = sPostString;
            string strRequest = string.Format("POST {0}?{1} HTTP/1.1\r\n", urlInfo.File,urlInfo.Body);
            strRequest += string.Format("Host:{0}:{1}\r\n",urlInfo.Host, urlInfo.Port.ToString());
            strRequest += string.Format("Content-Length:{0}\r\n",  sPostString.Length.ToString());
            strRequest += string.Format("Content-Type:{0}\r\n",ContentType );
            strRequest += string.Format("Referer:{0}\r\n",sReferer);
            strRequest += string.Format("User-Agent:{0}\r\n",sUserAgent);
            strRequest += string.Format("Connection:Close\r\n");
            strRequest += string.Format("Cookie:{0}", sCookies);
            strRequest += string.Format("\r\n\r\n");
            strRequest += string.Format("{0}",sPostString);
            return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode, out sHeaders);
        }
    }
}


标签:SOCKETHTMLCOOKIES
分类:C#.NET 原创软件| 发布:adobo| 查看: | 发表时间:2013/11/24
原创文章如转载,请注明:转载自易本地工作室-Ebend Software&Network Studio http://www.ebend.net/
本文链接:http://www.ebend.net/post/ClassHttpSocketWithAutoSaveCookies.html

已经有 ( 0 ) 位网友发表了评论,你也评一评吧!