#include
#include
#include
#include
#include
#include
#include
#pragma comment(lib,"ws2_32.lib")
using namespace std;
queue URL;
hash_set visitedurl;
hash_set visitedimg;
int g_ImgCnt = 1;
#define DEFAULT_PAGE_BUF_SIZE 1048576;//
bool ParseURL(const string url, string host, string resource)
{
size_t found = url.find("http://");
if(found == string::npos)
return false;
found += strlen("http://");
size_t found1 = url.find_first_of('/',found);
if(found1 == string::npos)
return false;
host = url.substr(found,found1 - found);
resource = url.substr(found1, url.size() - found1);
return true;
}
bool gethttpresponse(const string host,const string resource, string response,int bytes)
{
struct hostent *hp = gethostbyname(host.c_str());
if(hp == NULL){
couth_addr,4);
//
if(connect(sock,(SOCKADDR*)sa,sizeof(sa)) != 0){
cout 0){
ret = recv(sock,buf + bytesread, m_page_bufsize - bytesread, 0);
if(ret > 0){
bytesread += ret;
}
if(m_page_bufsize - bytesread imgurls)
{
string url;
string http = "href=\"http://";
size_t found = response.find(http);
ofstream ofile("url.txt",ios::app);
while(found != string::npos){
found += strlen("href=\"");
SIZE_T found1 = response.find('"',found+1);
string tmpurl = response.substr(found,found1 - found);
if(visitedurl.find(tmpurl) == visitedurl.end()){
visitedurl.insert(visitedurl.end(),tmpurl);
if(visitedurl.size() > 100000)
visitedurl.clear();
ofile100000)
visitedimg.clear();
imgurls.push_back(imgurl);
}
found = response.find(img,found1 + imgurl.size());
}
cout' url[i] != '|' url[i] != '"'
url[i] != '.' url[i] != '-' url[i] != ' ' ){
filename += url[i];
}
}
filename += ".txt";
return true;
}
void Downloads(const vector imgurls, const string url)
{
int size = imgurls.size();
string filename;
if(Tofilename(url,filename) == 0){
cout imgurls;
if(ParseHtml(response,imgurls) == 0){
cout 0){
string str = URL.front();
cout 200000)
break;
}
WSACleanup();
return 0;
}