| 讓我的C++程序直接閱讀網(wǎng)頁(yè)(4) HTTP訪(fǎng)問(wèn) |
作者: xyun 時(shí)間: 2005-9-19 12:24:48 瀏覽: 263 |
![]() |
4. HTTP訪(fǎng)問(wèn)Web簡(jiǎn)單看就是HTML + HTTP。前面提供的HTML讀取函數所需要的原始HTML文本,得通過(guò)HTTP協(xié)議從各個(gè)web site上讀取到。用socket可以實(shí)現HTPP訪(fǎng)問(wèn),但是想比較全面地支持HTTP協(xié)議,使用現成的HTTP服務(wù)顯得更有效率。 微軟提供了Windows平臺上訪(fǎng)問(wèn)HTTP的二組API組合,WinINet, WinHTTP 這里給出二個(gè)讀取Web網(wǎng)頁(yè)的函數, * 使用WinINet的readHTTPFile * 使用WinHTTP的getHTTPFile void getHttpFile( const HINTERNET h_site, string & rd, const string & site, const string & path, UInt32 flags = 0 ) { if ( path.empty() ) return; HINTERNET h_file = NULL; wchar_t w_str[WSTR_LENGTH]; ::memset( w_str, 0, WSTR_LENGTH * sizeof(wchar_t) ); try { UInt32 l_0 = 0; UInt32 l_1 = 0; char *p_buf = NULL; ::mbstowcs( w_str, path.c_str(), path.size() ); h_file = ::WinHttpOpenRequest( h_site, L"GET", w_str, NULL, WINHTTP_NO_REFERER, WINHTTP_DEFAULT_ACCEPT_TYPES, flags ); if ( h_file == NULL ) throw ::GetLastError(); BOOL b_res = ::WinHttpSendRequest( h_file, WINHTTP_NO_ADDITIONAL_HEADERS, 0, WINHTTP_NO_REQUEST_DATA, 0, 0, 0 ); if ( ! b_res ) throw ::GetLastError(); b_res = ::WinHttpReceiveResponse( h_file, NULL ); if ( ! b_res ) throw ::GetLastError(); // 如果需要可以在這里或稍后讀入http cookies do { l_0 = 0; b_res = ::WinHttpQueryDataAvailable( h_file, &l_0 ); p_buf = new char[l_0 + 1]; ::ZeroMemory( p_buf, l_0 + 1 ); if ( b_res ) { b_res = ::WinHttpReadData( h_file, p_buf, l_0, &l_1 ); if ( b_res ) { if ( l_1 > 0 ) rd.append( p_buf, l_1 ); } } delete [] p_buf; } while ( l_0 > 0 );
if ( ! b_res ) throw ::GetLastError(); canonHTML( rd ); ::WinHttpCloseHandle( h_file ); } catch ( ... ) { if ( h_file != NULL ) ::WinHttpCloseHandle( h_file ); throw; } } void readHttpFile( string &rd, const CHttpConnection &server, const string & src_page ) { if ( src_page.empty() ) return; DWORD dw_ret; CHttpFile *p_file = NULL; CHttpConnection *p_svr = const_cast<CHttpConnection *>(&server); char *rd_buf = NULL; try { p_file = p_svr->OpenRequest( CHttpConnection::HTTP_VERB_GET, src_page.c_str(), NULL, 1, NULL, NULL, INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_RELOAD ); p_file->SendRequest( "\r\n", 2 ); p_file->QueryInfoStatusCode( dw_ret ); if ( dw_ret != HTTP_STATUS_OK ) throw std::exception( "failed" ); rd_buf = new char[BUF_SIZE]; if ( rd_buf == NULL ) throw std::exception( "insufficientMemory" ); rd.erase(); memset( rd_buf, 0, BUF_SIZE ); int l = p_file->Read( rd_buf, BUF_SIZE ); while ( l > 0 ) { rd.append( rd_buf, l ); l = p_file->Read( rd_buf, BUF_SIZE ); }
if ( rd.empty() ) throw std::exception( "noContent" );
canonHTML( rd ); p_file->Close(); delete [] rd_buf; } catch ( CInternetException *p_ex ) { p_file->Close(); if ( rd_buf ) delete [] rd_buf; TCHAR sz_err[255]; p_ex->GetErrorMessage( sz_err, 255 ); throw std::exception( sz_err ); } catch ( ... ) { p_file->Close(); if ( rd_buf ) delete [] rd_buf; throw; } } |
聯(lián)系客服