#include #include #include #include #include #include #include #include #include #include #include #include #include #include "get.h" #ifndef INADDR_NONE #define INADDR_NONE 0xffffffff #endif // A pair of strings. typedef std::pair string_pair; // Deja vu, c++ style. static bool same_string(std::string, std::string); static std::string retrieve(const url_parts &, resource &); static bool split_string( std::string, const char * const, string_pair &); static std::string open_connection(const url_parts &, int &); static std::string send_request(int, const url_parts &); std::string get(const std::string & url, resource & document) { // Retrieve the document associated with the given url and store it in the // given results. Return an empty string if everything went ok, otherwise // return a non-empty error message. document.size = 0; document.data = 0; const url_parts parts = parse_url(url); if (parts.protocol.empty()) return "no protocol specified in url"; if (parts.protocol != "http") return std::string("non-http protocol \"") + parts.protocol + "\" in url"; if (parts.host.empty()) return "empty host in url"; return retrieve(parts, document); } static std::string open_connection(const url_parts & parts, int & skt) { // Open a connection to the web server given in the parts; return the // connection in skt. if (!same_string(parts.protocol, "http")) return "non-http protocol"; if (parts.host.empty()) return "no host given"; const int port = atoi(parts.port.empty() ? "80" : parts.port.c_str()); const int max_short = 0x10000; if ((port < 0) or (port >= max_short)) return "port number out of range"; skt = socket(PF_INET, SOCK_STREAM, 0); if (skt < 0) return std::string("Can't create a socket, ") + strerror(errno); struct sockaddr_in sin; memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_port = htons(static_cast(port)); struct hostent * phe = gethostbyname(parts.host.c_str()); if (phe) memcpy(&sin.sin_addr, phe->h_addr, phe->h_length); else if ((sin.sin_addr.s_addr = inet_addr(parts.host.c_str())) == INADDR_NONE) { close(skt); return parts.host + " is an unrecognized host"; } if (connect(skt, (struct sockaddr *) &sin, sizeof(sin)) < 0) { close(skt); return std::string("connection attempt failed, ") + strerror(errno); } return ""; } url_parts parse_url(const std::string & url) { // Parse the given url into parts, storing the parts in the given location. // Return true iff everything the parse was without error. string_pair spair; url_parts parts = { "", "", "", "" }; if (not split_string(url, "://", spair)) { parts.directory = url; return parts; } parts.protocol = spair.first; spair.first = spair.second; if (not split_string(spair.first, "/", spair)) { parts.host = spair.first; return parts; } parts.directory = std::string("/") + spair.second; if (split_string(spair.first, ":", spair)) parts.port = spair.second; parts.host = spair.first; return parts; } static std::string rd(int skt, char * buffer, unsigned & cnt) { // Read at most cnt bytes of data from skt and store it in buffer; upon // return, cnt contains the number of bytes read. Return an empty string if // everything went ok; otherwise return an informative error message. size_t read_amt = cnt; while (read_amt > 0) { const ssize_t e = read(skt, buffer, read_amt); if (e < 0) return std::string("server read failed, ") + strerror(errno); if (e == 0) break; read_amt -= e; buffer += e; } cnt -= read_amt; return ""; } static std::string receive_response(int skt, resource & document, unsigned cnt) { // Read the given socket and store the data read in the given array, which // should be freed by the caller. const size_t bsize = 10000; char * buffer = new char [bsize]; size_t read_amt = bsize; std::string emsg = rd(skt, buffer, read_amt); if (emsg.empty()) if (read_amt < bsize) { document.size = cnt*bsize + read_amt; document.data = new char [document.size]; memcpy(document.data + cnt*bsize, buffer, read_amt); } else { emsg = receive_response(skt, document, cnt + 1); if (emsg.empty()) memcpy(document.data + cnt*bsize, buffer, bsize); } delete [] buffer; return emsg; } static std::string retrieve(const url_parts & parts, resource & document) { // Retrieve the given url and store the document associated in the given // results. int skt; std::string emsg = open_connection(parts, skt); if (emsg.empty()) { emsg = send_request(skt, parts); if (emsg.empty()) emsg = receive_response(skt, document, 0); close(skt); } return emsg; } static bool same_string(std::string s1, std::string s2) { // Return true iff the given strings are the same, ignoring letter case. std::for_each(s1.begin(), s1.end(), tolower); std::for_each(s2.begin(), s2.end(), tolower); return s1 == s2; } static std::string send_request(int skt, const url_parts & parts) { // Send a request along skt to the server in parts. const std::string msg = "GET /" + parts.directory + " HTTP/1.0\r\n\r\n"; ssize_t e = write(skt, msg.data(), msg.size()); if (e < 0) return std::string("host write failed, ") + strerror(errno); if (static_cast(e) != msg.size()) { const size_t ems = 100; char emsg[ems]; snprintf(emsg, ems, "tried to write %u bytes to the host, only wrote %u", msg.size(), e); return std::string(emsg); } if (shutdown(skt, 1)) return std::string("socket shutdown failed, ") + strerror(errno); return ""; } static bool split_string( std::string str, const char * const pattern, string_pair & spair) { // Split str at the left-most occurrence of pattern, storing the left and // right parts of the split string in spair (the pattern is part of neither // part). Return true if str was split, false otherwise (in which case the // contents of spair is undefined). std::string::size_type i = str.find(pattern); if (std::string::npos == i) { spair.first = str; spair.second = ""; return false; } spair.first = str.substr(0, i); i += strlen(pattern); spair.second = str.substr(i, str.size() - i); return true; } #ifdef GET_TESTING // g++ -o test-get -gstabs -ansi -pedantic -DGET_TESTING get.cc -lsocket -lnsl && ./test-get int main() { string_pair spair; assert(split_string("a:b", ":", spair)); assert(spair.first == "a"); assert(spair.second == "b"); assert(!split_string("a:b", "*", spair)); assert(spair.first == "a:b"); assert(spair.second == ""); url_parts parts = parse_url("http://www.awl.com"); assert(parts.protocol == "http"); assert(parts.host == "www.awl.com"); assert(parts.port == ""); assert(parts.directory == ""); parts = parse_url("file://www.awl.com:8888/joe:blow?lang=en"); assert(parts.protocol == "file"); assert(parts.host == "www.awl.com"); assert(parts.port == "8888"); assert(parts.directory == "/joe:blow?lang=en"); } #endif // $Log: get.cc,v $ // Revision 1.2 2003/09/20 21:33:45 rclayton // Define parse_url(). // // Revision 1.1 2003/09/03 22:30:07 rclayton // Initial revision //