00001 /** @file rutz/gzstreambuf.cc handle gzip-encoding through a c++ 00002 iostreams interface */ 00003 /////////////////////////////////////////////////////////////////////// 00004 // 00005 // Copyright (c) 2001-2004 California Institute of Technology 00006 // Copyright (c) 2004-2007 University of Southern California 00007 // Rob Peters <rjpeters at usc dot edu> 00008 // 00009 // created: Fri Jul 20 13:13:22 2001 00010 // commit: $Id: gzstreambuf.cc 8249 2007-04-12 06:03:40Z rjpeters $ 00011 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/rutz/gzstreambuf.cc $ 00012 // 00013 // -------------------------------------------------------------------- 00014 // 00015 // This file is part of GroovX. 00016 // [http://ilab.usc.edu/rjpeters/groovx/] 00017 // 00018 // GroovX is free software; you can redistribute it and/or modify it 00019 // under the terms of the GNU General Public License as published by 00020 // the Free Software Foundation; either version 2 of the License, or 00021 // (at your option) any later version. 00022 // 00023 // GroovX is distributed in the hope that it will be useful, but 00024 // WITHOUT ANY WARRANTY; without even the implied warranty of 00025 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00026 // General Public License for more details. 00027 // 00028 // You should have received a copy of the GNU General Public License 00029 // along with GroovX; if not, write to the Free Software Foundation, 00030 // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 00031 // 00032 /////////////////////////////////////////////////////////////////////// 00033 00034 #ifndef GROOVX_RUTZ_GZSTREAMBUF_CC_UTC20050626084020_DEFINED 00035 #define GROOVX_RUTZ_GZSTREAMBUF_CC_UTC20050626084020_DEFINED 00036 00037 #include "rutz/gzstreambuf.h" 00038 00039 #include "rutz/error.h" 00040 #include "rutz/fstring.h" 00041 #include "rutz/sfmt.h" 00042 #include "rutz/shared_ptr.h" 00043 00044 #include <fstream> 00045 00046 #include "rutz/trace.h" 00047 00048 using rutz::fstring; 00049 using rutz::shared_ptr; 00050 00051 rutz::gzstreambuf::gzstreambuf(const char* name, int om, 00052 bool throw_exception) 00053 : 00054 m_opened(false), 00055 m_mode(0), 00056 m_gzfile(0) 00057 { 00058 // no append nor read/write mode 00059 if ( (om & std::ios::ate) || (om & std::ios::app) 00060 || ((om & std::ios::in) && (om & std::ios::out)) ) 00061 { 00062 /* do nothing -- opening fails */; 00063 } 00064 else 00065 { 00066 char fmode[10]; 00067 char* fmodeptr = fmode; 00068 00069 if (om & std::ios::in) 00070 { 00071 *fmodeptr++ = 'r'; 00072 setg(m_buf+s_pback_size, 00073 m_buf+s_pback_size, 00074 m_buf+s_pback_size); 00075 } 00076 else if (om & std::ios::out) 00077 { 00078 *fmodeptr++ = 'w'; 00079 setp(m_buf, m_buf+(s_buf_size-1)); 00080 } 00081 00082 *fmodeptr++ = 'b'; 00083 *fmodeptr = '\0'; 00084 00085 m_gzfile = gzopen(name,fmode); 00086 00087 if (m_gzfile != NULL) 00088 { 00089 m_opened = true; 00090 m_mode = om; 00091 } 00092 } 00093 00094 if (throw_exception && !m_opened) 00095 { 00096 if (om & std::ios::in) 00097 { 00098 throw rutz::error(rutz::sfmt("couldn't open file '%s' " 00099 "for reading", name), SRC_POS); 00100 } 00101 else if (om & std::ios::out) 00102 { 00103 throw rutz::error(rutz::sfmt("couldn't open file '%s' " 00104 "for writing", name), SRC_POS); 00105 } 00106 } 00107 } 00108 00109 void rutz::gzstreambuf::close() 00110 { 00111 if (m_opened) 00112 { 00113 sync(); 00114 m_opened = false; 00115 gzclose(m_gzfile); 00116 } 00117 } 00118 00119 int rutz::gzstreambuf::underflow() // with help from Josuttis, p. 678 00120 { 00121 GVX_TRACE("rutz::gzstreambuf::underflow"); 00122 // is read position before end of buffer? 00123 if (gptr() < egptr()) 00124 return *gptr(); 00125 00126 int numPutback = 0; 00127 if (s_pback_size > 0) 00128 { 00129 // process size of putback area 00130 // -use number of characters read 00131 // -but at most four 00132 numPutback = gptr() - eback(); 00133 if (numPutback > 4) 00134 numPutback = 4; 00135 00136 // copy up to four characters previously read into the putback 00137 // buffer (area of first four characters) 00138 std::memcpy (m_buf+(4-numPutback), gptr()-numPutback, 00139 numPutback); 00140 } 00141 00142 // read new characters 00143 const int num = 00144 gzread(m_gzfile, m_buf+s_pback_size, s_buf_size-s_pback_size); 00145 00146 if (num <= 0) // error (0) or end-of-file (-1) 00147 return EOF; 00148 00149 // reset buffer pointers 00150 setg (m_buf+s_pback_size-numPutback, 00151 m_buf+s_pback_size, 00152 m_buf+s_pback_size+num); 00153 00154 // return next character Hrmph. We have to cast to unsigned char to 00155 // avoid problems with eof. Problem is, -1 is a valid char value to 00156 // return. However, without a cast, char(-1) (0xff) gets converted 00157 // to int(-1), which is 0xffffffff, which is EOF! What we want is 00158 // int(0x000000ff), which we have to get by int(unsigned char(-1)). 00159 return static_cast<unsigned char>(*gptr()); 00160 } 00161 00162 int rutz::gzstreambuf::overflow(int c) 00163 { 00164 GVX_TRACE("rutz::gzstreambuf::overflow"); 00165 if (!(m_mode & std::ios::out) || !m_opened) return EOF; 00166 00167 if (c != EOF) 00168 { 00169 // insert the character into the buffer 00170 *pptr() = c; 00171 pbump(1); 00172 } 00173 00174 if (flushoutput() == EOF) 00175 { 00176 return -1; // ERROR 00177 } 00178 00179 return c; 00180 } 00181 00182 int rutz::gzstreambuf::sync() 00183 { 00184 if (flushoutput() == EOF) 00185 { 00186 return -1; // ERROR 00187 } 00188 return 0; 00189 } 00190 00191 int rutz::gzstreambuf::flushoutput() 00192 { 00193 if (!(m_mode & std::ios::out) || !m_opened) return EOF; 00194 00195 int num = pptr()-pbase(); 00196 if ( gzwrite(m_gzfile, pbase(), num) != num ) 00197 { 00198 return EOF; 00199 } 00200 00201 pbump(-num); 00202 return num; 00203 } 00204 00205 namespace 00206 { 00207 class gzstream : public std::iostream 00208 { 00209 private: 00210 rutz::gzstreambuf m_buf; 00211 public: 00212 gzstream(const char* filename_cstr, 00213 std::ios::openmode mode, 00214 bool throw_exception) 00215 : 00216 std::iostream(0), 00217 m_buf(filename_cstr, mode, throw_exception) 00218 { 00219 rdbuf(&m_buf); 00220 } 00221 }; 00222 } 00223 00224 shared_ptr<std::ostream> rutz::ogzopen(const fstring& filename, 00225 std::ios::openmode flags) 00226 { 00227 static fstring gz_ext(".gz"); 00228 00229 if (filename.ends_with(gz_ext)) 00230 { 00231 return shared_ptr<std::ostream> 00232 (new gzstream(filename.c_str(), std::ios::out|flags, true)); 00233 } 00234 else 00235 { 00236 shared_ptr<std::ostream> result = 00237 make_shared(new std::ofstream(filename.c_str(), flags)); 00238 if (result->fail()) 00239 throw rutz::error(rutz::sfmt("couldn't open file '%s' " 00240 "for writing", filename.c_str()), 00241 SRC_POS); 00242 00243 return result; 00244 } 00245 } 00246 00247 shared_ptr<std::ostream> rutz::ogzopen(const char* filename, 00248 std::ios::openmode flags) 00249 { 00250 return ogzopen(fstring(filename), flags); 00251 } 00252 00253 shared_ptr<std::istream> rutz::igzopen(const char* filename, 00254 std::ios::openmode flags) 00255 { 00256 return shared_ptr<std::iostream> 00257 (new gzstream(filename, std::ios::in|flags, true)); 00258 } 00259 00260 shared_ptr<std::istream> rutz::igzopen(const fstring& filename, 00261 std::ios::openmode flags) 00262 { 00263 return igzopen(filename.c_str(), flags); 00264 } 00265 00266 // sample test code 00267 00268 // #include "src/util/gzstreambuf.h" 00269 00270 // int main() { 00271 // { 00272 // rutz::gzstreambuf buf("test.gz", std::ios::out); 00273 00274 // std::ostream os(&buf); 00275 00276 // os << "Hello, World!\n"; 00277 // } 00278 00279 // { 00280 // rutz::gzstreambuf buf2("test.gz", std::ios::in); 00281 00282 // std::istream is(&buf2); 00283 00284 // int c; 00285 // while ( (c=is.get()) != EOF ) 00286 // { 00287 // std::cout << char(c); 00288 // } 00289 // } 00290 00291 // return 0; 00292 // } 00293 00294 static const char __attribute__((used)) vcid_groovx_rutz_gzstreambuf_cc_utc20050626084020[] = "$Id: gzstreambuf.cc 8249 2007-04-12 06:03:40Z rjpeters $ $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/rutz/gzstreambuf.cc $"; 00295 #endif // !GROOVX_RUTZ_GZSTREAMBUF_CC_UTC20050626084020_DEFINED