Imports System.IO Imports System.Net Imports System.Text.RegularExpressions Imports ICSharpCode.SharpZipLib ''' <summary> ''' This is a class similar to <c>System.Net.WebClient</c>, but with: ''' - Autodetection of content encoding ''' - Proxy support ''' - Authentication support ''' - Cookie retention ''' - HTTP compression (via SharpZipLib) ''' - IfModifiedSince ''' </summary> ''' <remarks> ''' Jeff Atwood ''' http://www.codinghorror.com/ ''' </remarks> Friend Class WebClientEx Private Const _AcceptedEncodings As String = "gzip,deflate" Private _DefaultEncoding As System.Text.Encoding Private _DetectedEncoding As System.Text.Encoding Private _ForcedEncoding As System.Text.Encoding Private _DetectedContentType As String Private _ContentLocation As String Private _ResponseBytes As Byte() Private _AuthenticationRequired As Boolean Private _ProxyAuthenticationRequired As Boolean Private _ProxyUrl As String Private _ProxyUser As String Private _ProxyPassword As String Private _AuthenticationUser As String Private _AuthenticationPassword As String Private _KeepCookies As Boolean Private _RequestTimeoutMilliseconds As Integer Private _PersistedCookies As CookieContainer '-- http://www.zytrax.com/tech/web/browser_ids.htm Private _HttpUserAgent As String Public Sub New() '-- sets default values Clear() End Sub #Region " Properties" ''' <summary> ''' this is the string encoding that was autodetected from the HTML content ''' </summary> Public ReadOnly Property DetectedEncoding() As System.Text.Encoding Get Return _DetectedEncoding End Get End Property ''' <summary> ''' Bypass detection of content encoding and force a specific encoding ''' </summary> Public Property ForcedEncoding() As System.Text.Encoding Get Return _ForcedEncoding End Get Set(ByVal Value As System.Text.Encoding) _ForcedEncoding = Value End Set End Property ''' <summary> ''' if the correct string encoding type cannot be detected, or detection is disabled ''' this is the default string encoding that will be used. ''' </summary> Public Property DefaultEncoding() As System.Text.Encoding Get Return _DefaultEncoding End Get Set(ByVal Value As System.Text.Encoding) _DefaultEncoding = Value End Set End Property ''' <summary> ''' this is the string encoding that was autodetected from the HTML content ''' </summary> Public ReadOnly Property ResponseContentType() As String Get Return _DetectedContentType End Get End Property ''' <summary> ''' Returns true if the last HTTP response was in a non-text format ''' </summary> Public ReadOnly Property ResponseIsBinary() As Boolean Get '-- if we truly have no content-type, we're kinda hosed, but '-- let's assume the response is text data to be on the safe side If _DetectedContentType = "" Then Return False Else Return _DetectedContentType.IndexOf("text") = -1 End If End Get End Property ''' <summary> ''' Returns a string representation, with encoding, of the last HTTP response data ''' </summary> Public ReadOnly Property ResponseString() As String Get If Me.ResponseIsBinary Then Return "(" & _ResponseBytes.Length & " bytes of binary data)" Else If _ForcedEncoding Is Nothing Then Return _DetectedEncoding.GetString(_ResponseBytes) Else Return _ForcedEncoding.GetString(_ResponseBytes) End If End If End Get End Property ''' <summary> ''' Returns the raw bytestream representing the last HTTP response data ''' </summary> Public ReadOnly Property ResponseBytes() As Byte() Get Return _ResponseBytes End Get End Property ''' <summary> ''' Returns the actual location of the downloaded content, which can ''' be different than the requested URL, eg, http://web.com/IsThisAfolderOrNot ''' </summary> Public ReadOnly Property ContentLocation() As String Get Return _ContentLocation End Get End Property ''' <summary> ''' Browser ID string to send with web requests ''' note that many popular websites will serve alternate content based on this value ''' </summary> ''' <remarks> ''' defaults to browser ID string of vanilla IE6 as seen in XP SP2 ''' </remarks> Public Property BrowserIdString() As String Get Return _HttpUserAgent End Get Set(ByVal Value As String) _HttpUserAgent = Value End Set End Property ''' <summary> ''' how long, in milliseconds, to wait for HTTP content to arrive before timing out ''' </summary> Public Property TimeoutMilliseconds() As Integer Get Return _RequestTimeoutMilliseconds End Get Set(ByVal Value As Integer) _RequestTimeoutMilliseconds = Value End Set End Property ''' <summary> ''' URL of the web proxy to use ''' if left blank, no Proxy will be used; if provided, will ALWAYS be used! ''' </summary> Public Property ProxyUrl() As String Get Return _ProxyUrl End Get Set(ByVal Value As String) _ProxyUrl = Value End Set End Property ''' <summary> ''' username to use for Proxy authentication ''' if left blank, the current user's credentials will be sent ''' </summary> Public Property ProxyUser() As String Get Return _ProxyUser End Get Set(ByVal Value As String) _ProxyUser = Value End Set End Property ''' <summary> ''' password to use for Proxy authentication ''' if left blank, the current user's credentials will be sent ''' </summary> Public Property ProxyPassword() As String Get Return _ProxyPassword End Get Set(ByVal Value As String) _ProxyPassword = Value End Set End Property ''' <summary> ''' the Proxy requires authentication ''' if credentials are not explicitly provided, the current user's credentials will automatically be sent ''' </summary> Public Property ProxyAuthenticationRequired() As Boolean Get Return _ProxyAuthenticationRequired End Get Set(ByVal Value As Boolean) _ProxyAuthenticationRequired = Value End Set End Property ''' <summary> ''' username for authentication to the target URL ''' if left blank, the current user's credentials will be sent ''' </summary> Public Property AuthenticationUser() As String Get Return _AuthenticationUser End Get Set(ByVal Value As String) _AuthenticationUser = Value End Set End Property ''' <summary> ''' password for authentication to the target URL ''' if left blank, the current user's credentials will be sent ''' </summary> Public Property AuthenticationPassword() As String Get Return _AuthenticationPassword End Get Set(ByVal Value As String) _AuthenticationPassword = Value End Set End Property ''' <summary> ''' the target URL requires authentication ''' if credentials are not explicitly provided, the current user's credentials will automatically be sent ''' </summary> Public Property AuthenticationRequired() As Boolean Get Return _AuthenticationRequired End Get Set(ByVal Value As Boolean) _AuthenticationRequired = Value End Set End Property ''' <summary> ''' Retains cookies for all subsequent HTTP requests from this object ''' </summary> Public Property KeepCookies() As Boolean Get Return _KeepCookies End Get Set(ByVal Value As Boolean) _KeepCookies = Value End Set End Property #End Region ''' <summary> ''' The Content-Encoding entity-header field is used as a modifier to the media-type. ''' When present, its value indicates what additional content codings have been applied ''' to the entity-body, and thus what decoding mechanisms must be applied in order to ''' obtain the media-type referenced by the Content-Type header field. Content-Encoding ''' is primarily used to allow a document to be compressed without losing the identity ''' of its underlying media type. ''' </summary> ''' <remarks> ''' http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.5 ''' </remarks> Private Enum HttpContentEncoding None Gzip Compress Deflate Unknown End Enum ''' <summary> ''' attempt to convert this charset string into a named .NET text encoding ''' </summary> Private Function CharsetToEncoding(ByVal Charset As String) As System.Text.Encoding If Charset = "" Then Return Nothing Try Return System.Text.Encoding.GetEncoding(Charset) Catch ex As System.ArgumentException Return Nothing End Try End Function ''' <summary> ''' try to determine string encoding using Content-Type HTTP header and ''' raw HTTP content bytes ''' "Content-Type: text/html; charset=us-ascii" ''' <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> ''' </summary> Private Function DetectEncoding(ByVal ContentTypeHeader As String, ByVal ResponseBytes() As Byte) As System.Text.Encoding Dim s As String Dim encoding As System.Text.Encoding '-- first try the header s = Regex.Match(ContentTypeHeader, "charset=([^;""'/>]+)", _ RegexOptions.IgnoreCase).Groups(1).ToString.ToLower encoding = CharsetToEncoding(s) '-- if we can't get it from header, try the body bytes forced to ASCII If encoding Is Nothing Then s = Regex.Match(System.Text.Encoding.ASCII.GetString(ResponseBytes), _ "<meta[^>]+content-type[^>]+charset=([^;""'/>]+)", _ RegexOptions.IgnoreCase).Groups(1).ToString.ToLower encoding = CharsetToEncoding(s) If encoding Is Nothing Then Return _DefaultEncoding End If End If Return encoding End Function ''' <summary> ''' returns a collection of bytes from a Url ''' </summary> ''' <param name="Url">URL to retrieve</param> Public Sub GetUrlData(ByVal Url As String, ByVal ifModifiedSince As DateTime) Dim wreq As HttpWebRequest = DirectCast(WebRequest.Create(Url), HttpWebRequest) '-- do we need to use a proxy to get to the web? If _ProxyUrl <> "" Then Dim wp As New WebProxy(_ProxyUrl) If _ProxyAuthenticationRequired Then If _ProxyUser <> "" And _ProxyPassword <> "" Then wp.Credentials = New NetworkCredential(_ProxyUser, _ProxyPassword) Else wp.Credentials = CredentialCache.DefaultCredentials End If wreq.Proxy = wp End If End If '-- does the target website require credentials? If _AuthenticationRequired Then If _AuthenticationUser <> "" And _AuthenticationPassword <> "" Then wreq.Credentials = New NetworkCredential(_AuthenticationUser, _AuthenticationPassword) Else wreq.Credentials = CredentialCache.DefaultCredentials End If End If wreq.Method = "GET" wreq.Timeout = _RequestTimeoutMilliseconds wreq.UserAgent = _HttpUserAgent wreq.Headers.Add("Accept-Encoding", _AcceptedEncodings) '-- note that, if present, this will trigger a 304 exception '-- if the URL being retrieved is not newer than the specified '-- date/time If ifModifiedSince <> DateTime.MinValue Then wreq.IfModifiedSince = ifModifiedSince End If '-- sometimes we need to transfer cookies to another URL; '-- this keeps them around in the object If KeepCookies Then If _PersistedCookies Is Nothing Then _PersistedCookies = New CookieContainer End If wreq.CookieContainer = _PersistedCookies End If '-- download the target URL into a byte array Dim wresp As HttpWebResponse = DirectCast(wreq.GetResponse, HttpWebResponse) '-- convert response stream to byte array Dim ebr As New ExtendedBinaryReader(wresp.GetResponseStream) _ResponseBytes = ebr.ReadToEnd() '-- determine if body bytes are compressed, and if so, '-- decompress the bytes Dim ContentEncoding As HttpContentEncoding If wresp.Headers.Item("Content-Encoding") Is Nothing Then ContentEncoding = HttpContentEncoding.None Else Select Case wresp.Headers.Item("Content-Encoding").ToLower Case "gzip" ContentEncoding = HttpContentEncoding.Gzip Case "deflate" ContentEncoding = HttpContentEncoding.Deflate Case Else ContentEncoding = HttpContentEncoding.Unknown End Select _ResponseBytes = Decompress(_ResponseBytes, ContentEncoding) End If '-- sometimes URL is indeterminate, eg, "http://website.com/myfolder" '-- in that case the folder and file resolution MUST be done on '-- the server, and returned to the client as ContentLocation _ContentLocation = wresp.Headers("Content-Location") If _ContentLocation Is Nothing Then _ContentLocation = "" End If '-- if we have string content, determine encoding type '-- (must cast to prevent Nothing) _DetectedContentType = wresp.Headers("Content-Type") If _DetectedContentType Is Nothing Then _DetectedContentType = "" End If If Me.ResponseIsBinary Then _DetectedEncoding = Nothing Else If _ForcedEncoding Is Nothing Then _DetectedEncoding = DetectEncoding(_DetectedContentType, _ResponseBytes) End If End If End Sub ''' <summary> ''' decompresses a compressed array of bytes via the specified HTTP compression type ''' </summary> ''' <returns>decompressed array of bytes</returns> Private Function Decompress(ByVal b() As Byte, ByVal CompressionType As HttpContentEncoding) As Byte() Dim s As Stream Select Case CompressionType Case HttpContentEncoding.Deflate s = New Zip.Compression.Streams.InflaterInputStream(New MemoryStream(b), _ New Zip.Compression.Inflater(True)) Case HttpContentEncoding.Gzip s = New GZip.GZipInputStream(New MemoryStream(b)) Case Else Return b End Select Dim ms As New MemoryStream Const chunkSize As Integer = 2048 Dim sizeRead As Integer Dim unzipBytes(chunkSize) As Byte While True sizeRead = s.Read(unzipBytes, 0, chunkSize) If sizeRead > 0 Then ms.Write(unzipBytes, 0, sizeRead) Else Exit While End If End While s.Close() Return ms.ToArray End Function ''' <summary> ''' download URL contents to a file, using HTTP compression if possible ''' </summary> Public Sub DownloadFile(ByVal Url As String, ByVal FilePath As String) DownloadFile(Url, FilePath, DateTime.MinValue) End Sub ''' <summary> ''' download URL contents to a file, using HTTP compression if possible ''' URL contents will only be downloaded if newer than the specified date ''' </summary> Public Sub DownloadFile(ByVal Url As String, ByVal FilePath As String, ByVal ifModifiedSince As DateTime) GetUrlData(Url, ifModifiedSince) SaveResponseToFile(FilePath) End Sub Private Sub SaveResponseToFile(ByVal FilePath As String) Dim fs As FileStream Dim bw As BinaryWriter Try fs = New FileStream(FilePath, FileMode.OpenOrCreate) bw = New BinaryWriter(fs) bw.Write(_ResponseBytes) bw.Close() Finally If Not fs Is Nothing Then fs.Close() End Try Return End Sub ''' <summary> ''' download URL contents to an array of bytes, using HTTP compression if possible ''' </summary> Public Function DownloadBytes(ByVal Url As String) As Byte() Return DownloadBytes(Url, DateTime.MinValue) End Function ''' <summary> ''' download URL contents to an array of bytes, using HTTP compression if possible ''' URL contents will only be downloaded if newer than the specified date ''' </summary> Public Function DownloadBytes(ByVal Url As String, ByVal ifModifiedSince As DateTime) As Byte() GetUrlData(Url, ifModifiedSince) Return _ResponseBytes End Function ''' <summary> ''' download URL contents to a string (with appropriate encoding), using HTTP compression if possible ''' </summary> Public Function DownloadString(ByVal Url As String) As String GetUrlData(Url, DateTime.MinValue) Return Me.ResponseString End Function ''' <summary> ''' download URL contents to a string (with appropriate encoding), using HTTP compression if possible ''' URL contents will only be downloaded if newer than the specified date ''' </summary> Public Function DownloadString(ByVal Url As String, ByVal ifModifiedSince As DateTime) As String GetUrlData(Url, ifModifiedSince) Return Me.ResponseString End Function ''' <summary> ''' clears any downloaded content and resets all properties to default values ''' </summary> Public Sub Clear() ClearDownload() _DefaultEncoding = System.Text.Encoding.GetEncoding("Windows-1252") _ForcedEncoding = Nothing _AuthenticationRequired = False _ProxyAuthenticationRequired = False _ProxyUrl = "" _ProxyUser = "" _ProxyPassword = "" _AuthenticationUser = "" _AuthenticationPassword = "" _KeepCookies = False _RequestTimeoutMilliseconds = 60000 _HttpUserAgent = _ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)" End Sub ''' <summary> ''' clears any downloaded content ''' </summary> Public Sub ClearDownload() _ResponseBytes = Nothing _DetectedEncoding = Nothing _DetectedContentType = "" _ContentLocation = "" _PersistedCookies = Nothing End Sub #Region " ExtendedBinaryReader" ''' <summary> ''' Extends the <c>System.IO.BinaryReader</c> class by a <c>ReadToEnd</c> ''' method that can be used to read a whole file. ''' </summary> ''' <remarks> ''' http://dotnet.mvps.org/dotnet/faqs/?id=readfile&lang=en ''' </remarks> Public Class ExtendedBinaryReader Inherits BinaryReader ''' <summary> ''' Creates a new instance of the <c>ExtendedBinaryReader</c> class. ''' </summary> ''' <param name="Input">A stream.</param> Public Sub New(ByVal Input As Stream) MyBase.New(Input) End Sub ''' <summary> ''' Creates a new instance of the <c>ExtendedBinaryReader</c> class. ''' </summary> ''' <param name="Input">The provided stream.</param> ''' <param name="Encoding">The character encoding.</param> Public Sub New(ByVal Input As Stream, ByVal Encoding As System.Text.Encoding) MyBase.New(Input, Encoding) End Sub ''' <summary> ''' Reads the whole data in the base stream and returns it in an ''' array of bytes. ''' </summary> ''' <returns>The streams whole binary data.</returns> Public Function ReadToEnd() As Byte() Return ReadToEnd(Short.MaxValue) End Function ''' <summary> ''' Reads the whole data in the base stream and returns it in an ''' array of bytes. ''' </summary> ''' <param name="InitialLength">The initial buffer length.</param> ''' <returns>The stream's whole binary data.</returns> ' Based on an implementation by Jon Skeet [MVP] ' (<URL:http://www.yoda.arachsys.com/csharp/readbinary.html>). Public Function ReadToEnd(ByVal InitialLength As Integer) As Byte() ' If an unhelpful initial length was passed, just use 32K. If InitialLength < 1 Then InitialLength = Short.MaxValue End If Dim Buffer(InitialLength - 1) As Byte Dim Read As Integer Dim Chunk As Integer = _ Me.BaseStream.Read(Buffer, Read, Buffer.Length - Read) Do While Chunk > 0 Read = Read + Chunk ' If the end of the buffer is reached, check to see if there is ' any more data. If Read = Buffer.Length Then Dim NextByte As Integer = Me.BaseStream.ReadByte() ' If the end of the stream is reached, we are done. If NextByte = -1 Then Return Buffer End If ' Nope. Resize the buffer, put in the byte we have just ' read, and continue. Dim NewBuffer(Buffer.Length * 2 - 1) As Byte System.Buffer.BlockCopy( _ Buffer, _ 0, _ NewBuffer, _ 0, _ Buffer.Length _ ) 'Array.Copy(Buffer, NewBuffer, Buffer.Length) NewBuffer(Read) = CByte(NextByte) Buffer = NewBuffer Read = Read + 1 End If Chunk = Me.BaseStream.Read(Buffer, Read, Buffer.Length - Read) Loop ' The buffer is now too big. Shrink it. Dim ReturnBuffer(Read - 1) As Byte System.Buffer.BlockCopy(Buffer, 0, ReturnBuffer, 0, Read) 'Array.Copy(Buffer, ReturnBuffer, Read) Return ReturnBuffer End Function End Class #End Region End Class
bytes=23855