00001 /** @file rutz/bzip2stream.cc handle bzip2-encoding through a c++ iostreams interface */ 00002 00003 /////////////////////////////////////////////////////////////////////// 00004 // 00005 // Copyright (c) 2005-2007 University of Southern California 00006 // Rob Peters <rjpeters at usc dot edu> 00007 // 00008 // created: Wed Jun 14 15:02:40 2006 00009 // commit: $Id: bzip2stream.cc 8249 2007-04-12 06:03:40Z rjpeters $ 00010 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/rutz/bzip2stream.cc $ 00011 // 00012 // -------------------------------------------------------------------- 00013 // 00014 // This file is part of GroovX. 00015 // [http://www.klab.caltech.edu/rjpeters/groovx/] 00016 // 00017 // GroovX is free software; you can redistribute it and/or modify it 00018 // under the terms of the GNU General Public License as published by 00019 // the Free Software Foundation; either version 2 of the License, or 00020 // (at your option) any later version. 00021 // 00022 // GroovX is distributed in the hope that it will be useful, but 00023 // WITHOUT ANY WARRANTY; without even the implied warranty of 00024 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00025 // General Public License for more details. 00026 // 00027 // You should have received a copy of the GNU General Public License 00028 // along with GroovX; if not, write to the Free Software Foundation, 00029 // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 00030 // 00031 /////////////////////////////////////////////////////////////////////// 00032 00033 #ifndef GROOVX_RUTZ_BZIP2STREAM_CC_UTC20060614220240_DEFINED 00034 #define GROOVX_RUTZ_BZIP2STREAM_CC_UTC20060614220240_DEFINED 00035 00036 #include "rutz/bzip2stream.h" 00037 00038 #include "rutz/error.h" 00039 #include "rutz/fstring.h" 00040 #include "rutz/sfmt.h" 00041 #include "rutz/shared_ptr.h" 00042 00043 #ifdef HAVE_BZLIB_H 00044 #include <bzlib.h> 00045 #endif 00046 #include <fstream> 00047 #include <iostream> 00048 00049 #include "rutz/trace.h" 00050 00051 using rutz::sfmt; 00052 using rutz::error; 00053 using rutz::fstring; 00054 using rutz::shared_ptr; 00055 00056 #ifndef HAVE_BZLIB_H 00057 00058 shared_ptr<std::ostream> rutz::obzip2open(const fstring& filename, 00059 std::ios::openmode flags) 00060 { 00061 fstring bzip2_ext(".bz2"); 00062 00063 if (filename.ends_with(bzip2_ext)) 00064 { 00065 throw error(sfmt("couldn't open file '%s' for writing: " 00066 "bzip2 libraries must be installed", 00067 filename.c_str()), SRC_POS); 00068 } 00069 else 00070 { 00071 shared_ptr<std::ostream> result = 00072 make_shared(new std::ofstream(filename.c_str(), flags)); 00073 if (result->fail()) 00074 throw error(sfmt("couldn't open file '%s' for writing", 00075 filename.c_str()), SRC_POS); 00076 00077 return result; 00078 } 00079 } 00080 00081 shared_ptr<std::istream> rutz::ibzip2open(const fstring& filename, 00082 std::ios::openmode flags) 00083 { 00084 fstring bzip2_ext(".bz2"); 00085 00086 if (filename.ends_with(bzip2_ext)) 00087 { 00088 throw error(sfmt("couldn't open file '%s' for reading: " 00089 "bzip2 libraries must be installed", 00090 filename.c_str()), SRC_POS); 00091 } 00092 else 00093 { 00094 shared_ptr<std::istream> result = 00095 make_shared(new std::ifstream(filename.c_str(), flags)); 00096 if (result->fail()) 00097 throw error(sfmt("couldn't open file '%s' for reading", 00098 filename.c_str()), SRC_POS); 00099 00100 return result; 00101 } 00102 } 00103 00104 #else 00105 00106 namespace 00107 { 00108 class bzip2streambuf : public std::streambuf 00109 { 00110 private: 00111 bool m_opened; 00112 int m_mode; 00113 FILE* m_file; 00114 BZFILE* m_bzfile; 00115 00116 bzip2streambuf(const bzip2streambuf&); 00117 bzip2streambuf& operator=(const bzip2streambuf&); 00118 00119 static const int s_buf_size = 4092; 00120 static const int s_pback_size = 4; 00121 char m_buf[s_buf_size]; 00122 00123 int flushoutput(); 00124 00125 public: 00126 bzip2streambuf(const char* name, int om); 00127 ~bzip2streambuf() { close(); } 00128 00129 bool is_open() { return m_opened; } 00130 00131 void ensure_open(); 00132 00133 void close(); 00134 00135 virtual int underflow(); 00136 00137 virtual int overflow(int c); 00138 00139 virtual int sync(); 00140 }; 00141 00142 class bzip2stream : public std::iostream 00143 { 00144 private: 00145 bzip2streambuf m_buf; 00146 public: 00147 bzip2stream(const char* filename_cstr, 00148 std::ios::openmode mode) 00149 : 00150 std::iostream(0), 00151 m_buf(filename_cstr, mode) 00152 { 00153 rdbuf(&m_buf); 00154 } 00155 }; 00156 00157 bzip2streambuf::bzip2streambuf(const char* name, int om) 00158 : 00159 m_opened(false), 00160 m_mode(0), 00161 m_file(0), 00162 m_bzfile(0) 00163 { 00164 // no append nor read/write mode 00165 if ( (om & std::ios::ate) || (om & std::ios::app) 00166 || ((om & std::ios::in) && (om & std::ios::out)) ) 00167 { 00168 /* do nothing -- opening fails */; 00169 } 00170 else 00171 { 00172 if (om & std::ios::in) 00173 { 00174 m_file = fopen(name, "rb"); 00175 00176 if (m_file == 0) 00177 throw error(sfmt("couldn't open file '%s' for reading", 00178 name), SRC_POS); 00179 00180 int bzerror = BZ_OK; 00181 m_bzfile = BZ2_bzReadOpen(&bzerror, m_file, 00182 /*verbosity*/ 0, 00183 /*small*/ 0, 00184 /*unused_memory*/ 0, 00185 /*nunused*/ 0); 00186 00187 if (m_bzfile == 0) 00188 { 00189 fclose(m_file); 00190 00191 throw error(sfmt("couldn't open file '%s' for " 00192 "bzip2 decompression", name), SRC_POS); 00193 } 00194 00195 setg(m_buf+s_pback_size, 00196 m_buf+s_pback_size, 00197 m_buf+s_pback_size); 00198 } 00199 else if (om & std::ios::out) 00200 { 00201 m_file = fopen(name, "wb"); 00202 00203 if (m_file == 0) 00204 throw error(sfmt("couldn't open file '%s' for writing", 00205 name), SRC_POS); 00206 00207 int bzerror = BZ_OK; 00208 m_bzfile = BZ2_bzWriteOpen(&bzerror, m_file, 00209 /*blockSize100k*/ 9, 00210 /*verbosity*/ 0, 00211 /*workFactor*/ 30); 00212 00213 if (m_bzfile == 0) 00214 { 00215 fclose(m_file); 00216 00217 throw error(sfmt("couldn't open file '%s' for " 00218 "bzip2 compression", name), SRC_POS); 00219 } 00220 00221 setp(m_buf, m_buf+(s_buf_size-1)); 00222 } 00223 00224 if (m_bzfile != NULL) 00225 { 00226 m_opened = true; 00227 m_mode = om; 00228 } 00229 } 00230 00231 if (!m_opened) 00232 { 00233 if (om & std::ios::in) 00234 { 00235 throw error(sfmt("couldn't open file '%s' for reading", 00236 name), SRC_POS); 00237 } 00238 else if (om & std::ios::out) 00239 { 00240 throw error(sfmt("couldn't open file '%s' for writing", 00241 name), SRC_POS); 00242 } 00243 } 00244 } 00245 00246 void bzip2streambuf::close() 00247 { 00248 if (m_opened) 00249 { 00250 sync(); 00251 m_opened = false; 00252 00253 int bzerror = BZ_OK; 00254 00255 if (m_mode & std::ios::in) 00256 BZ2_bzReadClose(&bzerror, m_bzfile); 00257 else 00258 BZ2_bzWriteClose(&bzerror, m_bzfile, 0, 0, 0); 00259 00260 fclose(m_file); 00261 } 00262 } 00263 00264 int bzip2streambuf::underflow() // with help from Josuttis, p. 678 00265 { 00266 GVX_TRACE("bzip2streambuf::underflow"); 00267 // is read position before end of buffer? 00268 if (gptr() < egptr()) 00269 return *gptr(); 00270 00271 int numPutback = 0; 00272 if (s_pback_size > 0) 00273 { 00274 // process size of putback area 00275 // -use number of characters read 00276 // -but at most four 00277 numPutback = gptr() - eback(); 00278 if (numPutback > 4) 00279 numPutback = 4; 00280 00281 // copy up to four characters previously read into the putback 00282 // buffer (area of first four characters) 00283 std::memcpy (m_buf+(4-numPutback), gptr()-numPutback, 00284 numPutback); 00285 } 00286 00287 // read new characters 00288 int bzerror = BZ_OK; 00289 const int num = 00290 BZ2_bzRead(&bzerror, m_bzfile, 00291 m_buf+s_pback_size, s_buf_size-s_pback_size); 00292 00293 if (num <= 0) // error (0) or end-of-file (-1) 00294 return EOF; 00295 00296 // BZ_STREAM_END isn't really an error as long as we still read 00297 // more than zero bytes... in that case, we don't want to return 00298 // EOF yet 00299 00300 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) 00301 return EOF; 00302 00303 // reset buffer pointers 00304 setg (m_buf+s_pback_size-numPutback, 00305 m_buf+s_pback_size, 00306 m_buf+s_pback_size+num); 00307 00308 // return next character Hrmph. We have to cast to unsigned char to 00309 // avoid problems with eof. Problem is, -1 is a valid char value to 00310 // return. However, without a cast, char(-1) (0xff) gets converted 00311 // to int(-1), which is 0xffffffff, which is EOF! What we want is 00312 // int(0x000000ff), which we have to get by int(unsigned char(-1)). 00313 return static_cast<unsigned char>(*gptr()); 00314 } 00315 00316 int bzip2streambuf::overflow(int c) 00317 { 00318 GVX_TRACE("bzip2streambuf::overflow"); 00319 if (!(m_mode & std::ios::out) || !m_opened) return EOF; 00320 00321 if (c != EOF) 00322 { 00323 // insert the character into the buffer 00324 *pptr() = c; 00325 pbump(1); 00326 } 00327 00328 if (flushoutput() == EOF) 00329 { 00330 return -1; // ERROR 00331 } 00332 00333 return c; 00334 } 00335 00336 int bzip2streambuf::sync() 00337 { 00338 if (flushoutput() == EOF) 00339 { 00340 return -1; // ERROR 00341 } 00342 return 0; 00343 } 00344 00345 int bzip2streambuf::flushoutput() 00346 { 00347 if (!(m_mode & std::ios::out) || !m_opened) return EOF; 00348 00349 const int num = pptr()-pbase(); 00350 int bzerror = BZ_OK; 00351 BZ2_bzWrite(&bzerror, m_bzfile, pbase(), num); 00352 00353 if (bzerror != BZ_OK) 00354 return EOF; // ERROR 00355 00356 pbump(-num); 00357 return num; 00358 } 00359 } 00360 00361 shared_ptr<std::ostream> rutz::obzip2open(const fstring& filename, 00362 std::ios::openmode flags) 00363 { 00364 fstring bzip2_ext(".bz2"); 00365 00366 if (filename.ends_with(bzip2_ext)) 00367 { 00368 return shared_ptr<std::ostream> 00369 (new bzip2stream(filename.c_str(), std::ios::out|flags)); 00370 } 00371 else 00372 { 00373 shared_ptr<std::ostream> result = 00374 make_shared(new std::ofstream(filename.c_str(), flags)); 00375 if (result->fail()) 00376 throw error(sfmt("couldn't open file '%s' for writing", 00377 filename.c_str()), SRC_POS); 00378 00379 return result; 00380 } 00381 } 00382 00383 shared_ptr<std::istream> rutz::ibzip2open(const fstring& filename, 00384 std::ios::openmode flags) 00385 { 00386 fstring bzip2_ext(".bz2"); 00387 00388 if (filename.ends_with(bzip2_ext)) 00389 { 00390 return shared_ptr<std::istream> 00391 (new bzip2stream(filename.c_str(), std::ios::in|flags)); 00392 } 00393 else 00394 { 00395 shared_ptr<std::istream> result = 00396 make_shared(new std::ifstream(filename.c_str(), flags)); 00397 if (result->fail()) 00398 throw error(sfmt("couldn't open file '%s' for reading", 00399 filename.c_str()), SRC_POS); 00400 00401 return result; 00402 } 00403 } 00404 00405 #endif // defined(HAVE_BZLIB_H) 00406 00407 shared_ptr<std::ostream> rutz::obzip2open(const char* filename, 00408 std::ios::openmode flags) 00409 { 00410 return obzip2open(fstring(filename), flags); 00411 } 00412 00413 shared_ptr<std::istream> rutz::ibzip2open(const char* filename, 00414 std::ios::openmode flags) 00415 { 00416 return ibzip2open(fstring(filename), flags); 00417 } 00418 00419 static const char vcid_groovx_rutz_bzip2stream_cc_utc20060614220240[] = "$Id: bzip2stream.cc 8249 2007-04-12 06:03:40Z rjpeters $ $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/rutz/bzip2stream.cc $"; 00420 #endif // !GROOVX_RUTZ_BZIP2STREAM_CC_UTC20060614220240DEFINED