using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Net;
using System.IO;
using System.Threading;
namespace WebCrawler
{
///
/// Summary description for WinForm.
///
public class WinForm : System.Windows.Forms.Form
{
///
/// Required designer variable.
///
private System.ComponentModel.IContainer components;
private System.Windows.Forms.Timer timer;
private System.Windows.Forms.Panel panel2;
private System.Windows.Forms.Button PauseButton;
private System.Windows.Forms.Button CrawlButton;
private System.Windows.Forms.Label label1;
private System.Windows.Forms.Panel panel1;
private System.Windows.Forms.ListBox URLListBox;
private System.Windows.Forms.Splitter splitter1;
private System.Windows.Forms.CheckBox diffDomainCheckBox;
private System.Windows.Forms.TrackBar updateSpeedTrackBar;
private System.Windows.Forms.Label label2;
private System.Windows.Forms.ContextMenu contextMenu1;
private System.Windows.Forms.MenuItem menuItem1;
private System.Windows.Forms.TextBox urlEdit;
private Label countLabel;
private ListBox imageListBox;
private Label imageCountLabel;
private WebBrowser WebBrowser;
private Thread crawlerThread;
public WinForm()
{
//
// Required for Windows Form Designer support
//
InitializeComponent();
Application.EnableVisualStyles();
Application.DoEvents();
Application.Idle += new EventHandler(Application_Idle);
urlEdit.Text = Properties.Settings.Default.StartURL;
updateSpeedTrackBar.Value = Properties.Settings.Default.UpdateSpeed;
diffDomainCheckBox.Checked = Properties.Settings.Default.PreferDifferentDomains;
}
///
/// Clean up any resources being used.
///
protected override void Dispose (bool disposing)
{
if (disposing)
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
///
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
///
private void InitializeComponent()
{
this.components = new System.ComponentModel.Container();
this.timer = new System.Windows.Forms.Timer(this.components);
this.panel2 = new System.Windows.Forms.Panel();
this.imageCountLabel = new System.Windows.Forms.Label();
this.countLabel = new System.Windows.Forms.Label();
this.label2 = new System.Windows.Forms.Label();
this.updateSpeedTrackBar = new System.Windows.Forms.TrackBar();
this.diffDomainCheckBox = new System.Windows.Forms.CheckBox();
this.PauseButton = new System.Windows.Forms.Button();
this.CrawlButton = new System.Windows.Forms.Button();
this.label1 = new System.Windows.Forms.Label();
this.urlEdit = new System.Windows.Forms.TextBox();
this.panel1 = new System.Windows.Forms.Panel();
this.imageListBox = new System.Windows.Forms.ListBox();
this.URLListBox = new System.Windows.Forms.ListBox();
this.contextMenu1 = new System.Windows.Forms.ContextMenu();
this.menuItem1 = new System.Windows.Forms.MenuItem();
this.splitter1 = new System.Windows.Forms.Splitter();
this.WebBrowser = new System.Windows.Forms.WebBrowser();
this.panel2.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.updateSpeedTrackBar)).BeginInit();
this.panel1.SuspendLayout();
this.SuspendLayout();
//
// timer
//
this.timer.Interval = 10000;
this.timer.Tick += new System.EventHandler(this.timer_Tick);
//
// panel2
//
this.panel2.Controls.Add(this.imageCountLabel);
this.panel2.Controls.Add(this.countLabel);
this.panel2.Controls.Add(this.label2);
this.panel2.Controls.Add(this.updateSpeedTrackBar);
this.panel2.Controls.Add(this.diffDomainCheckBox);
this.panel2.Controls.Add(this.PauseButton);
this.panel2.Controls.Add(this.CrawlButton);
this.panel2.Controls.Add(this.label1);
this.panel2.Controls.Add(this.urlEdit);
this.panel2.Dock = System.Windows.Forms.DockStyle.Top;
this.panel2.Location = new System.Drawing.Point(0, 0);
this.panel2.Name = "panel2";
this.panel2.Size = new System.Drawing.Size(878, 64);
this.panel2.TabIndex = 9;
//
// imageCountLabel
//
this.imageCountLabel.AutoSize = true;
this.imageCountLabel.Location = new System.Drawing.Point(689, 9);
this.imageCountLabel.Name = "imageCountLabel";
this.imageCountLabel.Size = new System.Drawing.Size(49, 13);
this.imageCountLabel.TabIndex = 14;
this.imageCountLabel.Text = "0 images";
//
// countLabel
//
this.countLabel.AutoSize = true;
this.countLabel.Location = new System.Drawing.Point(423, 37);
this.countLabel.Name = "countLabel";
this.countLabel.Size = new System.Drawing.Size(67, 13);
this.countLabel.TabIndex = 13;
this.countLabel.Text = "Pages found";
//
// label2
//
this.label2.Location = new System.Drawing.Point(183, 37);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(136, 16);
this.label2.TabIndex = 12;
this.label2.Text = "Update speed (1-10 secs)";
//
// updateSpeedTrackBar
//
this.updateSpeedTrackBar.Location = new System.Drawing.Point(313, 32);
this.updateSpeedTrackBar.Minimum = 1;
this.updateSpeedTrackBar.Name = "updateSpeedTrackBar";
this.updateSpeedTrackBar.Size = new System.Drawing.Size(104, 45);
this.updateSpeedTrackBar.TabIndex = 11;
this.updateSpeedTrackBar.Value = 10;
this.updateSpeedTrackBar.ValueChanged += new System.EventHandler(this.trackBar1_ValueChanged);
//
// diffDomainCheckBox
//
this.diffDomainCheckBox.Checked = true;
this.diffDomainCheckBox.CheckState = System.Windows.Forms.CheckState.Checked;
this.diffDomainCheckBox.Location = new System.Drawing.Point(8, 32);
this.diffDomainCheckBox.Name = "diffDomainCheckBox";
this.diffDomainCheckBox.Size = new System.Drawing.Size(144, 24);
this.diffDomainCheckBox.TabIndex = 10;
this.diffDomainCheckBox.Text = "Prefer different domains";
//
// PauseButton
//
this.PauseButton.Location = new System.Drawing.Point(495, 5);
this.PauseButton.Name = "PauseButton";
this.PauseButton.Size = new System.Drawing.Size(75, 24);
this.PauseButton.TabIndex = 9;
this.PauseButton.Text = "Pause";
this.PauseButton.Click += new System.EventHandler(this.PauseButton_Click);
//
// CrawlButton
//
this.CrawlButton.Location = new System.Drawing.Point(414, 5);
this.CrawlButton.Name = "CrawlButton";
this.CrawlButton.Size = new System.Drawing.Size(75, 24);
this.CrawlButton.TabIndex = 8;
this.CrawlButton.Text = "Crawl!";
this.CrawlButton.Click += new System.EventHandler(this.CrawlButton_Click);
//
// label1
//
this.label1.Location = new System.Drawing.Point(10, 11);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(56, 17);
this.label1.TabIndex = 6;
this.label1.Text = "Start URL";
//
// urlEdit
//
this.urlEdit.AutoCompleteMode = System.Windows.Forms.AutoCompleteMode.Suggest;
this.urlEdit.AutoCompleteSource = System.Windows.Forms.AutoCompleteSource.AllUrl;
this.urlEdit.Location = new System.Drawing.Point(72, 8);
this.urlEdit.Name = "urlEdit";
this.urlEdit.Size = new System.Drawing.Size(336, 20);
this.urlEdit.TabIndex = 7;
this.urlEdit.Text = "http://www.doogal.co.uk/links.php";
//
// panel1
//
this.panel1.Controls.Add(this.imageListBox);
this.panel1.Controls.Add(this.URLListBox);
this.panel1.Dock = System.Windows.Forms.DockStyle.Top;
this.panel1.Location = new System.Drawing.Point(0, 64);
this.panel1.Name = "panel1";
this.panel1.Size = new System.Drawing.Size(878, 69);
this.panel1.TabIndex = 10;
//
// imageListBox
//
this.imageListBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.imageListBox.FormattingEnabled = true;
this.imageListBox.Location = new System.Drawing.Point(416, 0);
this.imageListBox.Name = "imageListBox";
this.imageListBox.Size = new System.Drawing.Size(462, 69);
this.imageListBox.TabIndex = 1;
//
// URLListBox
//
this.URLListBox.ContextMenu = this.contextMenu1;
this.URLListBox.Location = new System.Drawing.Point(0, 0);
this.URLListBox.Name = "URLListBox";
this.URLListBox.Size = new System.Drawing.Size(400, 69);
this.URLListBox.TabIndex = 0;
//
// contextMenu1
//
this.contextMenu1.MenuItems.AddRange(new System.Windows.Forms.MenuItem[] {
this.menuItem1});
//
// menuItem1
//
this.menuItem1.Index = 0;
this.menuItem1.Text = "Copy";
this.menuItem1.Click += new System.EventHandler(this.menuItem1_Click);
//
// splitter1
//
this.splitter1.Dock = System.Windows.Forms.DockStyle.Top;
this.splitter1.Location = new System.Drawing.Point(0, 133);
this.splitter1.Name = "splitter1";
this.splitter1.Size = new System.Drawing.Size(878, 10);
this.splitter1.TabIndex = 11;
this.splitter1.TabStop = false;
//
// WebBrowser
//
this.WebBrowser.Dock = System.Windows.Forms.DockStyle.Fill;
this.WebBrowser.Location = new System.Drawing.Point(0, 143);
this.WebBrowser.Name = "WebBrowser";
this.WebBrowser.ScriptErrorsSuppressed = true;
this.WebBrowser.Size = new System.Drawing.Size(878, 364);
this.WebBrowser.TabIndex = 12;
this.WebBrowser.NewWindow += new System.ComponentModel.CancelEventHandler(this.WebBrowser_NewWindow);
this.WebBrowser.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.WebBrowser_DocumentCompleted);
//
// WinForm
//
this.AutoScaleBaseSize = new System.Drawing.Size(5, 13);
this.ClientSize = new System.Drawing.Size(878, 507);
this.Controls.Add(this.WebBrowser);
this.Controls.Add(this.splitter1);
this.Controls.Add(this.panel1);
this.Controls.Add(this.panel2);
this.Name = "WinForm";
this.Text = "Web Crawler";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.WinForm_FormClosing);
this.panel2.ResumeLayout(false);
this.panel2.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.updateSpeedTrackBar)).EndInit();
this.panel1.ResumeLayout(false);
this.ResumeLayout(false);
}
#endregion
///
/// The main entry point for the application.
///
[STAThread]
static void Main()
{
Application.Run(new WinForm());
}
private int Index, imageCount;
private void ParseDocument()
{
HtmlDocument htmlDoc = WebBrowser.Document;
if (htmlDoc != null)
{
HtmlElementCollection LElements = htmlDoc.All;
for (int Loop = 0; Loop < LElements.Count; Loop++)
{
HtmlElement LField = LElements[Loop];
try
{
if (LField.TagName.ToUpper() == "A")
{
string URL = LField.GetAttribute("href");
String URLtoUpper = URL.ToUpper();
if (URL != null)
{
AddLink(URL);
if ((URLtoUpper.EndsWith(".JPG")) || (URLtoUpper.EndsWith(".GIF")) || (URLtoUpper.EndsWith(".PNG")))
{
if (imageListBox.Items.IndexOf(URL) == -1)
{
Stream ImageStream = new WebClient().OpenRead(URL);
Image img = Image.FromStream(ImageStream);
int width = img.Width;
int height = img.Height;
imageListBox.Items.Add(URL + ": " + width + " x " + height);
imageCount++;
imageCountLabel.Text = imageCount + " images";
}
}
}
}
if (LField.TagName.ToUpper() == "IMG")
{
string src = LField.GetAttribute("src");
if (imageListBox.Items.IndexOf(src) == -1)
{
Stream ImageStream = new WebClient().OpenRead(src);
Image img = Image.FromStream(ImageStream);
int width = img.Width;
int height = img.Height;
imageListBox.Items.Add(src + ": " + width + " x " + height);
imageCount++;
imageCountLabel.Text = imageCount + " images";
}
}
}
catch (ArgumentException ex)
{
}
catch (WebException ex)
{
}
catch (Exception ex)
{
}
}
}
if (Index < URLListBox.Items.Count-1)
{
Index++;
WebBrowser.Navigate(URLListBox.Items[Index].ToString());
}
}
private string GetDomainName(string URL)
{
Uri uri = new Uri(URL);
return uri.Host;
}
private void AddLink(string URL)
{
// don't add it if we already have it
if (URLListBox.Items.IndexOf(URL) == -1)
{
// only add valid http links
if (URL.StartsWith("http"))
{
// we prefer different domain names since these mean we do more crawling
bool AlreadyGotDomain = false;
if (diffDomainCheckBox.Checked)
{
string URLDomainName = GetDomainName(URL);
int Loop = 0;
while (Loop -1)
Clipboard.SetDataObject(URLListBox.Items[URLListBox.SelectedIndex]);
}
private void WebBrowser_NewWindow(object sender, CancelEventArgs e)
{
e.Cancel = true;
}
private void WinForm_FormClosing(object sender, FormClosingEventArgs e)
{
Properties.Settings.Default.StartURL = urlEdit.Text;
Properties.Settings.Default.UpdateSpeed = updateSpeedTrackBar.Value;
Properties.Settings.Default.PreferDifferentDomains = diffDomainCheckBox.Checked;
Properties.Settings.Default.Save();
crawlerThread.Abort();
}
private void Application_Idle(object sender, EventArgs e)
{
countLabel.Text = "Pages found: " + URLListBox.Items.Count.ToString();
}
private void WebBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
URLListBox.SelectedIndex = Index;
}
}
}