root/trunk/whisperlib/net/http/http_request.h

Revision 7, 17.7 kB (checked in by whispercastorg, 2 years ago)

version 0.2.0

Line 
1 // Copyright (c) 2009, Whispersoft s.r.l.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Whispersoft s.r.l. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Catalin Popescu
31
32
33 #ifndef __NET_HTTP_HTTP_REQUEST_H__
34 #define __NET_HTTP_HTTP_REQUEST_H__
35
36 #include <string>
37
38 #include <whisperlib/common/base/types.h>
39 #include <whisperlib/net/http/http_consts.h>
40 #include <whisperlib/net/http/http_header.h>
41 #include <whisperlib/common/io/buffer/memory_stream.h>
42 #include <whisperlib/common/io/zlib/zlibwrapper.h>
43 #include <whisperlib/net/url/url.h>
44
45 namespace http {
46
47 //
48 // This class encompasses a request to an HTTP server (or from an HTTP server)
49 //
50 // client_* members - things set by the client, the HttpClientConnection would
51 //                    send them (header / content) to the server. The server
52 //                    connection set these as per client request.
53 // server_* members - things set by the server - the HttpServerConnection
54 //                    would sent these to the client.
55 //
56
57 class Request {
58  public:
59   Request(bool strict_headers = true,
60           common::ByteOrder client_byte_order = common::kByteOrder,
61           int32 client_block_size = io::DataBlock::kDefaultBufferSize,
62           common::ByteOrder server_byte_order = common::kByteOrder,
63           int32 server_block_size = io::DataBlock::kDefaultBufferSize);
64   ~Request();
65
66   // Normal accessors
67   io::MemoryStream* client_data() { return &client_data_; }
68   http::Header* client_header() { return &client_header_; }
69   io::MemoryStream* server_data() { return &server_data_; }
70   http::Header* server_header() { return &server_header_; }
71   const io::MemoryStream* client_data() const { return &client_data_; }
72   const http::Header* client_header() const { return &client_header_; }
73   const io::MemoryStream* server_data() const { return &server_data_; }
74   const http::Header* server_header() const { return &server_header_; }
75
76   URL* url() { return url_; }   // NULL -> Invalid
77   const URL* url() const { return url_; }   // NULL -> Invalid
78
79   // This inisializes the internal URL from the client requst and returns it
80   // (protocol is the protocol to use, unless specified in the URI)
81   URL* InitializeUrlFromClientRequest(const URL* absolute_root);
82
83   // Appends the client request to the given memory stream
84   // (does chunking / gzip encoding as required).
85   //
86   // * You need to set the proper headers for what you want to do
87   // w/ the request (e.g. Content-Encoding, Transfer-Encoding,
88   // before hand) - These are also affected by the version specified
89   // in the protocol.
90   //
91   // This *may* modify the header (e.g. fields as Content-Length,
92   // Transfer-Encoding or Content-Encoding).
93   // Will also clear the client_data_ :)
94   //
95   void AppendClientRequest(io::MemoryStream* out, int64 max_chunk_size = -1);
96
97   // Appends the current content of client_data_ as another http chunk
98   // to the given string (and clears the client_data_).
99   // Called with an empty client_data_, this will close the chunk stream.
100   //
101   // IMPORTANT NOTE: do not try to break the protocol specification w/
102   //   this call (i.e. no chunks if Content-Lenght is set and Transfer-Encoding
103   //   is not chunked)
104   //
105   // NOTE / TODO: we do not append trailing http headers..
106   bool AppendClientChunk(io::MemoryStream* out, int64 max_chunk_size = -1);
107
108   // Appends the server reply to the given memory stream
109   // (does chunking / gzip encoding as required).
110   // This *may* modify the header (e.g. fields as Content-Length,
111   //  or Content-Encoding) - These are also affected by the version specified
112   // in the protocol.
113   //
114   // NOTE: if you want to encode chunked stuff, you need to enable
115   //       it by setting the Transfer-Encoding to chunked. In this
116   //       case Content-Length is not sent (normally).
117   // If the client accepts gzip encoding and you enable it w/
118   // server_use_gzip_encoding we use it and set the proper header.
119   void AppendServerReply(io::MemoryStream* out,
120                          bool streaming,
121                          bool do_chunks,
122                          int64 max_chunk_size = -1);
123
124   // Appends the current content of server_data_ as another http chunk
125   // to the given memory stream (and clears the server_data_).
126   // Called with an empty client_data_, this will close the chunk stream.
127   //
128   // IMPORTANT NOTE: do not try to break the protocol specification w/
129   //   this call (i.e. no chunks if Content-Lenght is set and Transfer-Encoding
130   //   is not chunked in server header or if this http status is set to
131   //   NO_CONTENT or other stuff - or in response to a HEAD request
132   //   (we check the client_header_ !) this will result in an assertion
133   //   failure !)
134   //
135   // This does not affect the headers.
136   // NOTE: Chunked encoding needs to be enabled in the server_header.
137   bool AppendServerChunk(io::MemoryStream* out,
138                          bool do_chunks,
139                          int64 max_chunk_size = -1);
140
141   // When this is turned on we will try to use gzip encoding whenever possible
142   bool server_use_gzip_encoding() const {
143     return server_use_gzip_encoding_;
144   }
145   void set_server_use_gzip_encoding(bool use_gzip_encoding) {
146     server_use_gzip_encoding_ = use_gzip_encoding;
147   }
148
149   // In these cases no body must be transmitted
150   bool NoServerBodyTransmitted() {
151     const HttpReturnCode code = server_header_.status_code();
152     return (client_header_.method() == METHOD_HEAD ||
153             (code >= 100 && code < 200) ||
154             code == NO_CONTENT ||
155             code == NOT_MODIFIED);
156   }
157
158  private:
159   // Utility function that really does the chunk appending. Returns true
160   // iff the last chunk and trailer was appended.
161   bool AppendChunkHelper(const http::Header* src_header,
162                          io::MemoryStream* src_data,
163                          io::MemoryStream* out,
164                          bool add_decorations,
165                          int64 max_chunk_size = -1);
166
167   // The data sent / to be sent by the client (normally the payload of
168   // a POST or a PUT)
169   io::MemoryStream client_data_;
170   // The HTTP header sent / to be sent by the client
171   http::Header client_header_;
172
173   // The data sent / to be sent by the server (the reply body)
174   io::MemoryStream server_data_;
175   // The HTTP header sent / to be sent by the server
176   http::Header server_header_;
177
178   // The URL of the request
179   URL* url_;
180
181   // We were expecting some end of chunk encoding
182   bool in_chunk_encoding_;
183
184   // Used for compression;
185   io::ZlibDeflateWrapper* deflate_zwrapper_;
186   bool gzip_state_begin_;
187   io::ZlibGzipEncodeWrapper* gzip_zwrapper_;
188
189   bool server_use_gzip_encoding_;
190   enum CompressOption {
191     COMPRESS_NONE,
192     COMPRESS_GZIP,
193     COMPRESS_DEFLATE
194   };
195   CompressOption compress_option_;
196
197   DISALLOW_EVIL_CONSTRUCTORS(Request);
198 };
199
200 ////////////////////////////////////////////////////////////////////////////////
201 //
202 // Class that knows to parse requests from a memory stream. Can be used
203 // 'hand-in-hand' with the requests. We expect for a connection to instantiate
204 // a single RequestParser and parse the incoming requests, while using the
205 // Request serialization methods to put the output data on the wire.
206 //
207 class RequestParser {
208  public:
209   RequestParser(
210       const char* name,
211       int32 max_header_size       = 16384,
212       int64 max_body_size         = 4 << 20,
213       int64 max_chunk_size        = 1 << 20,
214       int64 max_num_chunks        = -1,
215       bool accept_wrong_method    = false,
216       bool accept_wrong_version   = false,
217       bool accept_no_content_length = false,
218       http::Header::ParseError
219       worst_accepted_header_error = Header::READ_NO_STATUS_REASON);
220   ~RequestParser();
221
222   enum ParseState {
223     // Fresh new parsing state
224     STATE_INITIALIZED                           = 0,
225
226     // Not states, but state range separators
227     FIRST_FINAL_STATE                           = 100,
228     FIRST_ERROR_STATE                           = 200,
229
230     // In process of reading the headers
231     STATE_HEADER_READING                        = 1,
232     // Got the header fully
233     STATE_END_OF_HEADER                         = 2,
234     // Got the header fully and we do not need to read the body
235     // (e.g. HEAD request etc)
236     STATE_END_OF_HEADER_FINAL                   = 100,
237
238     // In the process of reading a normal (not chunked) message body
239     STATE_BODY_READING                          = 10,
240     // Fully got the body.
241     STATE_BODY_END                              = 110,
242
243     // Waiting to read a chunk header (size)
244     STATE_CHUNK_HEAD_READING                    = 21,
245     // In the process of reading chunk data
246     STATE_CHUNK_READING                         = 22,
247     // At end of chunk data - waiting for the \r\n at end of chunk
248     STATE_END_OF_CHUNK                          = 23,
249     // Aftre the last (empty) chunk - need to read the trailing header
250     STATE_LAST_CHUNK_READ                       = 24,
251     // Final state at the end of all chunks and chunk trail header
252     STATE_END_OF_TRAIL_HEADER                   = 120,
253
254     // Errors that can appear in the processing of the message header
255     ERROR_HEADER_BAD                            = 200,
256     ERROR_HEADER_BAD_CONTENT_LEN                = 201,
257     ERROR_HEADER_TOO_LONG                       = 202,
258     ERROR_HEADER_LINE                           = 203,
259
260     // Errors that can appear in the processing of regular message body
261     ERROR_CONTENT_TOO_LONG                      = 210,
262     ERROR_TRANSFER_ENCODING_UNKNOWN             = 211,
263     ERROR_CONTENT_ENCODING_UNKNOWN              = 212,
264     ERROR_CONTENT_GZIP_TOO_LONG                 = 213,
265     ERROR_CONTENT_GZIP_ERROR                    = 214,
266     ERROR_CONTENT_GZIP_UNFINISHED               = 215,
267
268     // Errors that can appear in the processing of chunks
269     ERROR_CHUNK_HEADER_TOO_LONG                 = 220,
270     ERROR_CHUNK_TOO_LONG                        = 221,
271     ERROR_CHUNK_TOO_MANY                        = 222,
272     ERROR_CHUNK_TRAIL_HEADER                    = 223,
273     ERROR_CHUNK_BAD_CHUNK_LENGTH                = 224,
274     ERROR_CHUNK_BAD_CHUNK_TERMINATION           = 225,
275     ERROR_CHUNK_BIGGER_THEN_DECLARED            = 226,
276     ERROR_CHUNK_UNFINISHED_GZIP_CONTENT         = 227,
277     ERROR_CHUNK_CONTINUED_FINISHED_GZIP_CONTENT = 228,
278     ERROR_CHUNK_CONTENT_GZIP_TOO_LONG           = 229,
279     ERROR_CHUNK_CONTENT_GZIP_ERROR              = 230,
280     ERROR_CHUNK_TRAILER_TOO_LONG                = 231,
281   };
282   const char* ParseStateName() { return ParseStateName(parse_state_); }
283   static const char* ParseStateName(ParseState state);
284
285   // Call this before starting to parse a new request - and you better do it !
286   void Clear();
287
288   enum ReadState {
289     HEADER_READ  = 1,   // header fully parsed
290     BODY_READING = 2,   // in the state of reading the body (some already in
291     // the body data MemoryStream - and valid.
292     // Attention - we don't turn this if body needs
293     // post-processing (like gzip decoding)
294     CHUNKED_BODY_READING = 4,    // same as BODY_READING - but for chunked body.
295     CHUNKED_TRAILER_READING = 8,  // reading the traing header of a chunked body
296     BODY_FINISHED = 16,        // Body is fully finished.
297     CHUNKS_FINISHED = 32,      // Chunked body is fully finished.
298     REQUEST_FINISHED = 64,     // The whole request is fully finished
299     CONTINUE = 128,            // informs the caller to call us again soon
300   };
301   static string ReadStateName(int32 read_state);
302
303   // VERY IMPORTANT: once started the parsing of a request / reply - continue
304   // it with subsequent calls
305   //
306   // Example of use (you may want to add some timeout provisions):
307   //
308   // bool HttpClientConnection::HandleRead() {
309   //   if ( !BufferedConnection::HandleRead() ) {
310   //     return false;
311   //   }
312   //   if ( !parser_.ParseServerReply(&req_) &
313   //        http::Parser::REQUEST_FINISHED ) {
314   //     // Wait for more data ...
315   //     return true;
316   //   }
317   //   if ( parser_.InErrorState() ) {
318   //     LOG_WARNING << "Error parsing server response";
319   //     parser_.Clear();
320   //     return false;
321   //   }
322   //   parser_.Clear();
323   //   return ProcessValidRequest(req_);
324   // }
325   //
326
327   // Parses a client request from memory stream and updates the data from req.
328   // Returns an OR on the values in ReadState.
329   // (e.g. HEADER_READ | REQUEST_FINISHED may be for a normal GET request,
330   //  while HEADER_READ | BODY_READING | BODY_FINISHED may be at the end
331   //  of a parsed POST request).
332   int32 ParseClientRequest(io::MemoryStream* out, Request* req);
333
334   // Parses a server reply from memory stream and updates the data from req.
335   // Returns an OR on the values in ReadState.
336   int32 ParseServerReply(io::MemoryStream* out, Request* req);
337
338   ParseState parse_state() const {
339     return parse_state_;
340   }
341   bool InFinalState() const {
342     return parse_state_ >= FIRST_FINAL_STATE;
343   }
344   bool InErrorState() const {
345     return parse_state_ >= FIRST_ERROR_STATE;
346   }
347   bool dlog_level() const {
348     return dlog_level_;
349   }
350   void set_dlog_level(bool dlog_level) {
351     dlog_level_ = dlog_level;
352   }
353   const string& name() const {
354     return name_;
355   }
356   void set_name(const string& s) {
357     name_ = s;
358   }
359
360   // Returns true if we know to parse the content encoding specified in the
361   // given header (we know only identity and gzip)
362   static bool IsKnownContentEncoding(const http::Header* header);
363
364   // Returns true if we know to parse the transfer encoding specified in the
365   // given header (we know only identity and chunked)
366   static bool IsKnownTransferEncoding(const http::Header* header);
367
368   // We may need to change this parameter as we can accept different #
369   // of chunks depending on request
370   void set_max_num_chunks(int64 max_num_chunks) {
371     max_num_chunks_ = max_num_chunks;
372   }
373   void set_max_body_size(int64 max_body_size) {
374     max_body_size_ = max_body_size;
375   }
376  private:
377   void set_parse_state(ParseState state) {
378     if ( dlog_level_ ) {
379       LOG_INFO << name() << " State change: " << ParseStateName()
380                << " => " << ParseStateName(state);
381     }
382     parse_state_ = state;
383   }
384
385   //////////////////////////////////////////////////////////////////////
386   //
387   // Various parse helper functions
388   //
389
390   // Parses the payload of a message (body..)
391   int32 ParsePayloadInternal(io::MemoryStream* in, http::Header* header,
392                              io::MemoryStream* out);
393   // Parses the body of a normally encoded transmission
394   int32 ParseBodyInternal(io::MemoryStream* in, http::Header* header,
395                           io::MemoryStream* out);
396   // Parses the chunks of a chunked encoded transmission
397   int32 ParseChunksInternal(io::MemoryStream* in, http::Header* header,
398                             io::MemoryStream* out);
399   // Parses the trail header of a chunked encoded transmission
400   int32 ParseTrailHeader(io::MemoryStream* in, http::Header* header,
401                          io::MemoryStream* out);
402
403   // Protocol limits:
404   const int32 max_header_size_;
405   int64 max_body_size_;
406   const int64 max_chunk_size_;
407   int64 max_num_chunks_;
408   const bool accept_wrong_method_;
409   const bool accept_wrong_version_;
410   const bool accept_no_content_length_;
411   const http::Header::ParseError worst_accepted_header_error_;
412
413   // A name for this parser (good to distinguish at log time)
414   string name_;
415   // Shall this parser log more ?
416   bool dlog_level_;
417
418
419   // The next members hold the current parsing state. Call Clear() before
420   // starting a new parsing "session"
421   enum NextChunkExpectation {
422     EXPECT_CHUNK_NONE,       // we hace no expectaion for the next chunk
423     EXPECT_CHUNK_EMPTY,      // we want an empty chunk next (i.e. eos)
424     EXPECT_CHUNK_NON_EMPTY,  // we want a non-empty chunk next (ie. NO eos)
425   };
426   ParseState parse_state_;          // the state we are in (do not continue
427                                     // parsing from a final state
428   int64 body_size_to_read_;         // how much body is left to be read ?
429   int64 chunk_size_to_read_;        // hom much data is left to be read in the
430                                     // current chunk ?
431   int64 num_chunks_read_;           // how many chuncks were read so far ?
432   NextChunkExpectation next_chunk_expectation_;
433                                     // what to expect from the next chunk ?
434                                     // (depends on gzip decompression state).
435   io::MemoryStream partial_data_;   // intermediate data holder
436   http::Header trail_header_;       // we parse the chunk trailing header here
437
438   // Used for decompression:
439   io::ZlibInflateWrapper* inflate_zwrapper_;
440   io::ZlibGzipDecodeWrapper* gzip_zwrapper_;
441
442  private:
443   DISALLOW_EVIL_CONSTRUCTORS(RequestParser);
444 };
445 }
446
447 #endif  // __NET_HTTP_HTTP_REQUEST_H__
Note: See TracBrowser for help on using the browser.