root/trunk/whisperlib/common/io/buffer/data_block.cc

Revision 7, 13.6 kB (checked in by whispercastorg, 2 years ago)

version 0.2.0

Line 
1 // Copyright (c) 2009, Whispersoft s.r.l.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Whispersoft s.r.l. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Catalin Popescu
31
32 #include "common/base/log.h"
33 #include "common/io/buffer/data_block.h"
34
35 namespace io {
36
37 //////////////////////////////////////////////////////////////////////
38
39 DataBlock::DataBlock(BlockSize buffer_size)
40   : writable_buffer_(new char[buffer_size]),
41     readable_buffer_(writable_buffer_),
42     alloc_block_(NULL),
43     buffer_size_(buffer_size),
44     size_(0),
45     ref_count_(0),
46     guard_1_(0xfedcba10),
47     disposer_(NULL),
48     guard_(0x01abcdef) {
49   CHECK_GT(buffer_size_, 0);
50 }
51
52 // Constructs a raw buffer - we do not own it so we cannot write it
53 DataBlock::DataBlock(const char* buffer,
54                      BlockSize size,
55                      util::FreeArrayList<char>* disposer,
56                      DataBlock* alloc_block)
57   : writable_buffer_(NULL),
58     readable_buffer_(buffer),
59     alloc_block_(alloc_block),
60     buffer_size_(size),
61     size_(size),
62     ref_count_(0),
63     guard_1_(0xfedcba10),
64     disposer_(disposer),
65     guard_(0x01abcdef) {
66   CHECK_GT(size_, 0);
67 }
68
69 DataBlock::~DataBlock() {
70   CHECK_EQ(ref_count_, 0);
71   CHECK_EQ(guard_1_, 0xfedcba10);
72   CHECK_EQ(guard_, 0x01abcdef);
73   if ( alloc_block_ != NULL ) {
74     alloc_block_->DecRef();
75   } else {
76     if ( disposer_ != NULL ) {
77       disposer_->Dispose(const_cast<char*>(readable_buffer_));
78     } else {
79       delete[] readable_buffer_;
80     }
81   }
82 }
83
84
85 //////////////////////////////////////////////////////////////////////
86
87 BlockSize DataBlockPointer::ReadableSize() const {
88   BlockSize size = 0;
89   DCHECK_LE(pos_, block()->size());
90   BlockDqueue::const_iterator it = block_it();
91   for ( BlockId i = block_id_; i < owner_->end_id(); ++i ) {
92     size += (*it)->size();
93     ++it;
94   }
95   return size - pos_;
96 }
97
98 BlockSize DataBlockPointer::Distance(const DataBlockPointer& m) const {
99   DCHECK(m.owner_ == owner_);
100   if ( m < *this )
101     return m.Distance(*this);
102   BlockSize distance = 0;
103   BlockDqueue::const_iterator it = block_it();
104   for ( BlockId i = block_id_; i < m.block_id(); ++i ) {
105     distance += (*it)->size();
106     ++it;
107   }
108   return distance - pos_ + m.pos_;
109 }
110
111 BlockSize DataBlockPointer::Advance(BlockSize cb) {
112   if ( cb < 0 ) {
113     return Devance(-cb);
114   }
115   BlockSize delta = 0;
116   BlockDqueue::const_iterator it = block_it();
117   while ( cb > 0 || pos_ == (*it)->size() ) {
118     if ( pos_ == (*it)->size() ) {
119       if ( block_id_ + 1 >= owner_->end_id() ) {
120         return delta;
121       }
122       ++block_id_;
123       ++it;
124       pos_ = 0;
125     } else {
126       const BlockSize to_add = min((*it)->size() - pos_, cb);
127       pos_ += to_add;
128       cb -= to_add;
129       delta += to_add;
130       DCHECK_LE(pos_, (*it)->size());
131     }
132   }
133   return delta;
134 }
135
136 BlockSize DataBlockPointer::Devance(BlockSize cb) {
137   if ( cb < 0 ) {
138     return Advance(-cb);
139   }
140   BlockSize delta = 0;
141   BlockDqueue::const_iterator it = block_it();
142   while ( cb > 0 || pos_ == 0 ) {
143     if ( pos_ == 0 ) {
144       if ( block_id_ == owner_->begin_id() ) {
145         return delta;
146       }
147       --block_id_;
148       --it;
149       pos_ = (*it)->size();
150     } else {
151       const BlockSize to_del = min(pos_, cb);
152       pos_ -= to_del;
153       cb -= to_del;
154       delta += to_del;
155       DCHECK_GE(pos_, 0);
156     }
157   }
158   return delta;
159 }
160
161 BlockSize DataBlockPointer::WriteData(const char* buffer, BlockSize len) {
162   BlockSize cb = 0;
163   BlockDqueue::const_iterator it = block_it();
164   while ( len > 0 ) {
165     if ( pos_ == (*it)->buffer_size() ) {
166       if ( block_id_ + 1 >= owner_->end_id() ) {
167         return cb;
168       }
169       ++block_id_;
170       ++it;
171       CHECK_EQ((*it)->size(), 0) << "Stomping over data !";
172       pos_ = 0;
173     } else {
174       const BlockSize to_write = min((*it)->buffer_size() - pos_, len);
175       DCHECK_GT(to_write, 0);
176       DCHECK((*it)->is_mutable());
177       memcpy((*it)->mutable_buffer() + pos_, buffer, to_write);
178       (*it)->set_size((*it)->size() + to_write);
179       pos_ += to_write;
180       cb += to_write;
181       len -= to_write;
182       buffer += to_write;
183     }
184   }
185   return cb;
186 }
187
188 bool DataBlockPointer::ReadBlock(const char** buffer, BlockSize* len) {
189   BlockDqueue::const_iterator it = block_it();
190   while ( pos_ == (*it)->size() ) {
191     if ( !AdvanceToNextBlock(&it) ) {
192       *len = 0;
193       return false;
194     }
195   }
196   *buffer = (*it)->buffer() + pos_;
197   if ( *len > 0 ) {
198     *len = min(*len, (*it)->size() - pos_);
199   } else {
200     *len = (*it)->size() - pos_;
201   }
202   pos_ += *len;
203   return true;
204 }
205
206 BlockSize DataBlockPointer::ReadData(char* buffer, BlockSize len) {
207   BlockSize cb = 0;
208   BlockDqueue::const_iterator it = block_it();
209   while ( len > 0 ) {
210     if ( pos_ == (*it)->size() ) {
211       if ( !AdvanceToNextBlock(&it) ) {
212         return cb;
213       }
214     } else {
215       const BlockSize to_read = min((*it)->size() - pos_, len);
216       DCHECK_GT(to_read, 0);
217       memcpy(buffer, (*it)->buffer() + pos_, to_read);
218       pos_ += to_read;
219       cb += to_read;
220       len -= to_read;
221       buffer += to_read;
222     }
223   }
224   return cb;
225 }
226
227 bool DataBlockPointer::AdvanceToNextBlock(
228   BlockDqueue::const_iterator* it) {
229   DCHECK(pos_ == (**it)->size());
230   // Determine if we are at the very end ..
231   BlockDqueue::const_iterator it_next = *it + 1;
232   BlockId block_id = block_id_ + 1;
233   while ( block_id < owner_->end_id() && (*it_next)->size() == 0 ) {
234     ++it_next;
235     ++block_id;
236   }
237   if ( block_id >= owner_->end_id() ) {
238     return false;
239   }
240   block_id_ = block_id;
241   *it = it_next;
242   pos_ = 0;
243   return true;
244 }
245
246 }
247 namespace {
248
249 enum CharAttr {
250   ERROR = 1,
251   QUOTE = 2,
252   SEP   = 4,
253   CHAR  = 8,
254   SPACE = 16,
255 };
256 const unsigned char kCharLookup[0x100] = {
257 //   NULL     control chars...
258
259      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  SPACE,  SPACE,  ERROR,  ERROR,  SPACE,  ERROR,  ERROR,
260 //   control chars...
261      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
262 //   ' '      !        "        #        $        %        &        '        (        )        *        +        ,        -        .        /
263      SPACE,   SEP,   QUOTE | SEP,  SEP,  SEP,    SEP,  SEP,    QUOTE | SEP,    SEP,    SEP,    SEP,    CHAR,    SEP,     CHAR,    CHAR,    CHAR,
264 //   0        1        2        3        4        5        6        7        8        9        :        ;        <        =        >        ?
265      CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    SEP,     SEP,     SEP,     SEP,     SEP,     CHAR,
266 //   @        A        B        C        D        E        F        G        H        I        J        K        L        M        N        O
267      CHAR,     CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,   CHAR,   CHAR,     CHAR,    CHAR,
268 //   P        Q        R        S        T        U        V        W        X        Y        Z        [        \        ]        ^        _
269      CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    SEP,     SEP,     SEP,     SEP,     CHAR,
270 //   `        a        b        c        d        e        f        g        h        i        j        k        l        m        n        o
271      SEP,     CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,     CHAR,
272 //   p        q        r        s        t        u        v        w        x        y        z        {        |        }        ~        <NBSP>
273      CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    CHAR,    SEP,     SEP,     SEP,     SEP,     CHAR,
274 //   ...all the high-bit characters are escaped
275      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
276      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
277      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
278      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
279      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
280      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
281      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,
282      ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR,  ERROR};
283
284 inline bool AttrIsSpace(unsigned char c) {
285   return  (kCharLookup[c] & SPACE) != 0;
286 }
287 inline bool AttrIsQuote(unsigned char c) {
288   return  (kCharLookup[c] & QUOTE) != 0;
289 }
290 inline bool AttrIsError(unsigned char c) {
291   return  (kCharLookup[c] & ERROR) != 0;
292 }
293 inline bool AttrIsSep(unsigned char c) {
294   return  (kCharLookup[c] & SEP) != 0;
295 }
296 inline bool AttrIsChar(unsigned char c) {
297   return  (kCharLookup[c] & CHAR) != 0;
298 }
299 }
300
301 namespace io {
302 TokenReadError DataBlockPointer::ReadNextAsciiToken(string* s,
303                                                     int* len_covered) {
304   *len_covered = 0;
305   BlockSize len = 0;
306   DataBlockPointer saved(*this);
307   DataBlockPointer begin(*this);
308   BlockDqueue::const_iterator it = block_it();
309   bool last_was_escape = false;
310   char quote_char = '\0';
311
312   int skipped = 0;
313   while ( true ) {
314     if ( pos_ == (*it)->size() ) {
315       if ( !AdvanceToNextBlock(&it) ) {
316         break;
317       }
318     } else {
319       const char* p = (*it)->buffer() + pos_;
320       while ( pos_ < (*it)->size() ) {
321         if ( !quote_char ) {
322           //
323           // Outside quoted stream
324           //
325           if ( AttrIsError(*p) ) {
326             // We accept these in quotes.. somehow..
327             *this = saved;
328             return TOKEN_ERROR_CHAR;
329           } else if ( len ) {
330             if ( AttrIsChar(*p) ) {
331               ++len;
332             } else {
333               *this = begin;
334               ReadStringData(s, len);
335               *len_covered = len + skipped;
336               return TOKEN_OK;
337             }
338           } else if ( AttrIsQuote(*p) ) {
339             begin = *this;
340             quote_char = *p;
341             ++len;
342           } else if ( AttrIsSep(*p) ) {
343             s->assign(1, *p);
344             Advance(1);
345             *len_covered = 1 + skipped;
346             return TOKEN_SEP_OK;
347           } else if ( AttrIsSpace(*p) ) {
348             ++skipped;
349           } else {
350             begin = *this;
351             ++len;
352           }
353         } else {
354           //
355           // In a quoted stream
356           //
357           if ( quote_char == *p && !last_was_escape ) {
358             *this = begin;
359             ReadStringData(s, len + 1);  // include the last quote
360             *len_covered = len + skipped + 1;
361             return TOKEN_QUOTED_OK;
362           } else if ( !last_was_escape && *p == '\\' ) {
363             last_was_escape = true;
364           } else {
365             last_was_escape = false;
366           }
367           ++len;
368         }
369         ++pos_;
370         ++p;
371       }
372     }
373   }
374   *this = saved;
375   *len_covered = 0;
376   return TOKEN_NO_DATA;
377 }
378
379 bool DataBlockPointer::ReadToChars(char fin, char prev, string* s) {
380   BlockSize len = 0;
381   char last = '\0';
382   DataBlockPointer saved(*this);
383   BlockDqueue::const_iterator it = block_it();
384   while ( true ) {
385     if ( pos_ == (*it)->size() ) {
386       if ( !AdvanceToNextBlock(&it) ) {
387         break;
388       }
389     } else {
390       const char* p = (*it)->buffer() + pos_;
391       while ( pos_ < (*it)->size() ) {
392         ++pos_;
393         ++len;
394         if ( *p == fin && (!prev || last == prev) ) {
395           *this = saved;
396           ReadStringData(s, len);
397           return true;
398         }
399         last = *p++;
400       }
401     }
402   }
403   *this = saved;
404   return false;
405 }
406
407 BlockSize DataBlockPointer::ReadStringData(string* s, BlockSize len) {
408   string tmp;
409   tmp.reserve(len);
410   const BlockSize cb = ReadData(&tmp[0], len);
411   s->assign(tmp.c_str(), cb);
412   return cb;
413 }
414
415 void DataBlockPointer::ReadToString(string* s) {
416   const BlockSize len = ReadableSize();
417   CHECK_EQ(ReadStringData(s, len), len);
418 }
419 //////////////////////////////////////////////////////////////////////
420 }
Note: See TracBrowser for help on using the browser.