Using System.Net.WebClient class, we can easily send data to or recieve data from an URI.
Below is a simple example of a web crawler using System.Net.WebClient class.
This will check for anchor tags in specified URI and adds all links to a ListBox. After getting the link, we can use it for whatever purpose.
ASPX.CS
________
protected void btnSubmit_Click(object sender, EventArgs e)
    {
        string strURL = txtLink.Text;
        System.Net.WebClient wc = new System.Net.WebClient();
        byte[] data = wc.DownloadData(strURL);
        mshtml.HTMLDocumentClass ms = new mshtml.HTMLDocumentClass();
        string strHTML =System.Text.Encoding.ASCII.GetString(data);
        mshtml.IHTMLDocument2 objMyDoc = (mshtml.IHTMLDocument2)ms;
        objMyDoc.write(strHTML);
        mshtml.IHTMLElementCollection ec = (mshtml.IHTMLElementCollection)objMyDoc.links;
        for (int i = 0; i < ec.length; i++)
        {
            string strLink;
            mshtml.HTMLAnchorElementClass objAnchor;
            try
            {
                objAnchor = (mshtml.HTMLAnchorElementClass)ec.item(i, 0);
                strLink = objAnchor.href;
                lstLinks.Items.Add(strLink);
            }
            catch
            {
                continue;
            }
        }
    }
where txtLink is textbox where we enter URI
ex: http://www.google.com    //  URI - starts from http://
lstLinks is ListBox and btnSubmit is Button
for getting System.Net.WebClient   add reference to Microsoft HTML Object Library on your solution
Subscribe to:
Post Comments (Atom)
 
 
 
No comments:
Post a Comment