root/trunk/whisperlib/common/io/buffer/data_block.h

Revision 7, 13.5 kB (checked in by whispercastorg, 2 years ago)

version 0.2.0

Line 
1 // Copyright (c) 2009, Whispersoft s.r.l.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Whispersoft s.r.l. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Catalin Popescu
31
32 //////////////////////////////////////////////////////////////////////
33 //
34 // VERY IMPORTANT : ********  Not Thread Safe  ********
35 //
36 //////////////////////////////////////////////////////////////////////
37
38 #ifndef __COMMON_IO_BUFFER_DATA_BUFFER_H__
39 #define __COMMON_IO_BUFFER_DATA_BUFFER_H__
40
41 #define __USE_VECTOR_FOR_BLOCK_DQUEUE__
42 #ifdef __USE_VECTOR_FOR_BLOCK_DQUEUE__
43 #include <vector>
44 #else
45 #include <deque>
46 #endif
47
48 #include <string>
49 #include <whisperlib/common/base/types.h>
50 #include <whisperlib/common/base/free_list.h>
51 #include <whisperlib/common/io/input_stream.h>
52 #include <whisperlib/common/io/output_stream.h>
53 #include <whisperlib/common/io/iomarker.h>
54 #include <whisperlib/common/io/seeker.h>
55
56 namespace io {
57
58 typedef int32 BlockId;
59 typedef int32 BlockSize;
60
61 enum TokenReadError {
62   TOKEN_OK = 0,
63   TOKEN_QUOTED_OK = 1,
64   TOKEN_SEP_OK = 2,
65   TOKEN_ERROR_CHAR = 3,
66   TOKEN_NO_DATA = 4,
67 };
68
69 class DataBlock;
70
71 #ifdef __USE_VECTOR_FOR_BLOCK_DQUEUE__
72
73 const int32 kResizeThreshold = 25;
74 class BlockDqueue : public vector<DataBlock*> {
75  public:
76   BlockDqueue()
77     : vector<DataBlock*>(),
78       begin_id_(0),
79       correction_(0) {
80     // This reserve would make the performance slightly worse in whispercast
81     // reserve(kResizeThreshold);
82   }
83   const DataBlock* block(BlockId id) const {
84     return *(buffer_it(id));
85   }
86   DataBlock* mutable_block(BlockId id) const {
87     return *(buffer_it(id));
88   }
89   BlockId begin_id() const {
90     return begin_id_ + correction_;
91   }
92   BlockId end_id() const {
93     return vector<DataBlock*>::size() + correction_;
94   }
95   void pop_front() {
96     begin_id_++;
97   }
98   BlockDqueue::const_iterator buffer_it(BlockId id) const {
99     return vector<DataBlock*>::begin() + (id - correction_);
100   }
101   const DataBlock* front() const {
102     return *begin();
103   }
104   DataBlock* front() {
105     return *begin();
106   }
107   BlockDqueue::const_iterator begin() const {
108     return vector<DataBlock*>::begin() + begin_id_;
109   }
110   BlockDqueue::iterator begin() {
111     return vector<DataBlock*>::begin() + begin_id_;
112   }
113   int32 size() const {
114     return vector<DataBlock*>::size() - begin_id_;
115   }
116   bool empty() const {
117     return begin_id_ == vector<DataBlock*>::size();
118   }
119   void clear() {
120     vector<DataBlock*>::clear();
121     correction_ += begin_id_;
122     begin_id_ = 0;
123   }
124   DataBlock* operator[](size_t n) {
125     return *(begin() + n);
126   }
127   const DataBlock* operator[](size_t n) const {
128     return *(begin() + n);
129   }
130
131   void correct_buffer() {
132     const int32 elem_size = size();
133     if ( begin_id_ > kResizeThreshold && begin_id_ > elem_size ) {
134       vector<DataBlock*>::iterator it = vector<DataBlock*>::begin();
135       for ( int32 i = 0; i < elem_size; ++i ) {
136         *it = *(it + begin_id_);
137         ++it;
138       }
139       vector<DataBlock*>::resize(elem_size);
140       correction_ += begin_id_;
141       begin_id_ = 0;
142     }
143   }
144  private:
145   BlockId begin_id_;
146   BlockId correction_;
147   DISALLOW_EVIL_CONSTRUCTORS(BlockDqueue);
148 };
149
150
151 #else
152
153 //////////////////////////////////////////////////////////////////////
154
155 class BlockDqueue : public deque<DataBlock*> {
156  public:
157   BlockDqueue()
158     : deque<DataBlock*>(),
159       begin_id_(0) {
160   }
161   const DataBlock* block(BlockId id) const {
162     DCHECK_GE(id, begin_id_);
163     DCHECK_LT(id, end_id());
164     return *(begin() + (id - begin_id_));
165   }
166   DataBlock* mutable_block(BlockId id) const {
167     DCHECK_GE(id, begin_id_);
168     DCHECK_LT(id, end_id());
169     return *(begin() + (id - begin_id_));
170   }
171   BlockId begin_id() const {
172     return begin_id_;
173   }
174   BlockId end_id() const {
175     return begin_id_ + size();
176   }
177   void pop_front() {
178     begin_id_++;
179     deque<DataBlock*>::pop_front();
180   }
181   BlockDqueue::const_iterator buffer_it(BlockId id) const {
182     DCHECK_GE(id, begin_id_);
183     DCHECK_LT(id, end_id());
184     return begin() + (id - begin_id_);
185   }
186   void correct_buffer() {
187   }
188
189  private:
190   BlockId begin_id_;
191   DISALLOW_EVIL_CONSTRUCTORS(BlockDqueue);
192 };
193
194 #endif
195
196 //////////////////////////////////////////////////////////////////////
197
198 class DataBlock {
199  public:
200   static const BlockSize kDefaultBufferSize = 16384;
201   // Constructs a buffer that is writable w/ a given size
202   explicit DataBlock(BlockSize buffer_size = kDefaultBufferSize);
203   // Constructs a raw buffer - we do not own it so we cannot write it
204   explicit DataBlock(const char* buffer, BlockSize size,
205                      util::FreeArrayList<char>* disposer,
206                      DataBlock* alloc_block);
207
208   ~DataBlock();
209
210   // Accessors
211   BlockSize buffer_size() const {
212     CHECK_EQ(guard_1_, 0xfedcba10);
213     CHECK_EQ(guard_, 0x01abcdef);
214     return buffer_size_;
215   }
216   BlockSize size() const {
217     CHECK_EQ(guard_1_, 0xfedcba10);
218     CHECK_EQ(guard_, 0x01abcdef);
219     return size_;
220   }
221   void set_size(BlockSize size) {
222     CHECK_EQ(guard_1_, 0xfedcba10);
223     CHECK_EQ(guard_, 0x01abcdef);
224     DCHECK_LE(size, buffer_size_);
225     size_ = size;
226   }
227
228   bool is_mutable() const {
229     CHECK_EQ(guard_1_, 0xfedcba10);
230     CHECK_EQ(guard_, 0x01abcdef);
231     return writable_buffer_ != NULL;
232   }
233   const char* buffer() const {
234     CHECK_EQ(guard_1_, 0xfedcba10);
235     CHECK_EQ(guard_, 0x01abcdef);
236     return readable_buffer_;
237   }
238   char* mutable_buffer() {
239     CHECK_EQ(guard_1_, 0xfedcba10);
240     CHECK_EQ(guard_, 0x01abcdef);
241     DCHECK_EQ(writable_buffer_, readable_buffer_);
242     return writable_buffer_;
243   }
244   DataBlock* GetAllocBlock()  {
245     CHECK_EQ(guard_1_, 0xfedcba10);
246     CHECK_EQ(guard_, 0x01abcdef);
247     return (alloc_block_ == NULL) ? this : alloc_block_;
248   }
249   // Reference counting functions:
250   void IncRef() {
251     CHECK_EQ(guard_1_, 0xfedcba10);
252     CHECK_EQ(guard_, 0x01abcdef);
253     ref_count_++;
254   }
255   void DecRef(bool delete_on_zero = true) {
256     CHECK_EQ(guard_1_, 0xfedcba10);
257     CHECK_EQ(guard_, 0x01abcdef);
258     ref_count_--;
259     if ( ref_count_ == 0 ) { delete this; }
260   }
261   int32 ref_count() const {
262     CHECK_EQ(guard_1_, 0xfedcba10);
263     CHECK_EQ(guard_, 0x01abcdef);
264     return ref_count_;
265   }
266
267  private:
268   // The beginning of the writable memory buffer (normally, if writable,
269   // writable_buffer_ == readable_buffer_
270   char* writable_buffer_;
271   // The beginning of the readable memory buffer
272   const char* readable_buffer_;
273   // The allocation belongs to this guy..
274   DataBlock* const alloc_block_;
275   // Size of the buffer - total allocated memory
276   const BlockSize buffer_size_;
277   // Size of the data in the buffer
278   BlockSize size_;
279   // How many guys use the buffer ?
280   int32 ref_count_;
281
282   uint32 guard_1_;
283
284   // If we should displose raw buffer via this guy ..
285   util::FreeArrayList<char>* const disposer_;
286
287
288   uint32 guard_;
289
290   DISALLOW_EVIL_CONSTRUCTORS(DataBlock);
291 };
292
293 //////////////////////////////////////////////////////////////////////
294
295 class DataBlockPointer {
296  public:
297   DataBlockPointer(const BlockDqueue* owner,
298                    BlockId block_id,
299                    BlockSize pos)
300     : owner_(owner),
301       block_id_(block_id),
302       pos_(pos) {
303   }
304   ~DataBlockPointer() {
305   }
306
307   const BlockDqueue* owner() const { return owner_; }
308   BlockId block_id() const { return block_id_; }
309   BlockSize pos() const { return pos_; }
310   void set_pos(BlockSize pos) { pos_ = pos; }
311   void set_block_id(BlockId block_id) { block_id_ = block_id; }
312
313   //////////////////////////////////////////////////////////////////////
314   //
315   // IMPORTANT: these are expensive - use with care !!
316   //
317   const DataBlock* block() const {
318     return owner_->block(block_id_);
319   }
320   DataBlock* mutable_block() const {
321     return owner_->mutable_block(block_id_);
322   }
323   BlockDqueue::const_iterator block_it() const {
324     return owner_->buffer_it(block_id_);
325     // return owner_->begin() + (block_id_ - owner_->begin_id());
326   }
327   //////////////////////////////////////////////////////////////////////
328
329   void Clear() {
330     block_id_ = -1;
331     pos_ = -1;
332   }
333   bool IsNull() const {
334     return block_id_ == -1;
335   }
336   // Returns the space available for write in the current block
337   BlockSize AvailableForWrite() const {
338     if ( IsNull() ) return 0;
339     return block()->buffer_size() - pos_;
340   }
341   // Advances the pointer to the end of current block, marking the data
342   // in between as written.
343   BlockSize AdvanceToCurrentBlockEnd() {
344     if ( IsNull() ) return 0;
345     DataBlock* block = mutable_block();
346     const BlockSize ret = block->buffer_size() - pos_;
347     block->set_size(block->buffer_size());
348     pos_ = block->size();
349     return ret;
350   }
351   // Marks the current block size to the current pointer position
352   void MarkCurrentBlockEndAtPointer() {
353     mutable_block()->set_size(pos_);
354   }
355
356   // Operators:
357   const DataBlockPointer& operator=(const DataBlockPointer& m) {
358     CHECK(owner_ == m.owner_);
359     pos_ = m.pos_;
360     set_block_id(m.block_id());
361     return *this;
362   }
363   bool operator<(const DataBlockPointer& m) const {
364     CHECK(m.owner_ == owner_);
365     if ( block_id_ == m.block_id_ )
366       return pos_ < m.pos_;
367     return block_id_ < m.block_id_;
368   }
369   bool operator>(const DataBlockPointer& m) const {
370     CHECK(m.owner_ == owner_);
371     if ( block_id_ == m.block_id_ )
372       return pos_ > m.pos_;
373     return block_id_ > m.block_id_;
374   }
375   bool operator==(const DataBlockPointer& m) const {
376     DCHECK(m.owner_ == owner_);
377     return ( block_id_ == m.block_id_ ) && (pos_ == m.pos_);
378   }
379   bool operator<=(const DataBlockPointer& m) const {
380     return (*this < m) || (*this == m);
381   }
382   bool operator>=(const DataBlockPointer& m) const {
383     return (*this > m) || (*this == m);
384   }
385
386   // Returns how much data is available for read in the owning queue, starting
387   // from the current position
388   BlockSize ReadableSize() const;
389
390   // Returns the distance in *valid* bytes between two pointers of the
391   // same container
392   BlockSize Distance(const DataBlockPointer& m) const;
393
394   // Moves the pointer forward in the data list of block w/ cb bytes
395   // Returns how much it was advanced.
396   BlockSize Advance(BlockSize cb);
397
398   // Moves the pointer backwords in the list of block w/ cb bytes
399   // Returns how much it was devanced.
400   BlockSize Devance(BlockSize cb);
401
402   // Appends data at the current position - extending in the neighbouring blocks
403   // (if available). We update our position at the end ot the written data.
404   BlockSize WriteData(const char* buffer, BlockSize len);
405
406   // Reads data starting w/ current position and continuing until we
407   // read len bytes or we reach the end of the owner buffer chain;
408   // We return the # of read bytes.
409   BlockSize ReadData(char* buffer, BlockSize len);
410
411   // Reads at most len bytes into s (Same as above)
412   BlockSize ReadStringData(string* s, BlockSize len);
413
414   // Reads the entire available data to given string
415   void ReadToString(string* s);
416
417   // Utility to read from the pointer to a CRLF. Will leave the pointer
418   // after the CRLF or at the start position. Returns true (and reads)
419   // a line if found. On true, s will contain the line *and* the CRLF
420   bool ReadCRLFLine(string* s) {
421     return ReadToChars('\n', '\r', s);
422   }
423
424   // Same as above, but looks only for \n
425   bool ReadLFLine(string* s) {
426     return ReadToChars('\n', '\0', s);
427   }
428
429   // Utility to read a token from a string.
430   TokenReadError ReadNextAsciiToken(string* s, int* len_covered);
431
432   // This will return in buffer and len the acutal block pointer and size in
433   // the underneath data list.
434   bool ReadBlock(const char** buffer, BlockSize* len);
435
436  private:
437   // Helper - if we are at the end of a block, it advances the pointer
438   // to the beginning of the next block.
439   bool AdvanceToNextBlock(BlockDqueue::const_iterator* it);
440
441   // Helper to read from the pointer into s until the two chars are
442   // found: fin at the end and prev before that. (If prev == '\0'
443   // then the prev condition is ignored).
444   bool ReadToChars(char fin, char prev, string* s);
445
446   const BlockDqueue* const owner_;  // which container owns the iterator ?
447   BlockId block_id_;                // points to the block in owner
448   BlockSize pos_;                   // the position in the *
449 };
450
451 inline ostream& operator<<(ostream& os, const DataBlockPointer& dbp) {
452   return os << "[DBP " << dbp.block_id() << ":" << dbp.pos() << "]";
453 }
454
455 //////////////////////////////////////////////////////////////////////
456 }
457 #endif  // __COMMON_IO_BUFFER_DATA_BUFFER_H__
Note: See TracBrowser for help on using the browser.