svm.cpp

00001 /*
00002 Copyright (c) 2000-2007 Chih-Chung Chang and Chih-Jen Lin
00003 All rights reserved.
00004 
00005 Redistribution and use in source and binary forms, with or without
00006 modification, are permitted provided that the following conditions
00007 are met:
00008 
00009 1. Redistributions of source code must retain the above copyright
00010 notice, this list of conditions and the following disclaimer.
00011 
00012 2. Redistributions in binary form must reproduce the above copyright
00013 notice, this list of conditions and the following disclaimer in the
00014 documentation and/or other materials provided with the distribution.
00015 
00016 3. Neither name of copyright holders nor the names of its contributors
00017 may be used to endorse or promote products derived from this software
00018 without specific prior written permission.
00019 
00020 
00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 */
00033 
00034 #include <math.h>
00035 #include <stdio.h>
00036 #include <stdlib.h>
00037 #include <ctype.h>
00038 #include <float.h>
00039 #include <string.h>
00040 #include <stdarg.h>
00041 #include "svm.h"
00042 typedef float Qfloat;
00043 typedef signed char schar;
00044 #ifndef min
00045 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00046 #endif
00047 #ifndef max
00048 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00049 #endif
00050 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00051 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00052 {
00053         dst = new T[n];
00054         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00055 }
00056 inline double powi(double base, int times)
00057 {
00058         double tmp = base, ret = 1.0;
00059 
00060         for(int t=times; t>0; t/=2)
00061         {
00062                 if(t%2==1) ret*=tmp;
00063                 tmp = tmp * tmp;
00064         }
00065         return ret;
00066 }
00067 #define INF HUGE_VAL
00068 #define TAU 1e-12
00069 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00070 #if 1
00071 void info(const char *fmt,...)
00072 {
00073         va_list ap;
00074         va_start(ap,fmt);
00075         vprintf(fmt,ap);
00076         va_end(ap);
00077 }
00078 void info_flush()
00079 {
00080         fflush(stdout);
00081 }
00082 #else
00083 void info(char *fmt,...) {}
00084 void info_flush() {}
00085 #endif
00086 
00087 //
00088 // Kernel Cache
00089 //
00090 // l is the number of total data items
00091 // size is the cache size limit in bytes
00092 //
00093 class Cache
00094 {
00095 public:
00096         Cache(int l,long int size);
00097         ~Cache();
00098 
00099         // request data [0,len)
00100         // return some position p where [p,len) need to be filled
00101         // (p >= len if nothing needs to be filled)
00102         int get_data(const int index, Qfloat **data, int len);
00103         void swap_index(int i, int j);        // future_option
00104 private:
00105         int l;
00106         long int size;
00107         struct head_t
00108         {
00109                 head_t *prev, *next;        // a cicular list
00110                 Qfloat *data;
00111                 int len;                // data[0,len) is cached in this entry
00112         };
00113 
00114         head_t *head;
00115         head_t lru_head;
00116         void lru_delete(head_t *h);
00117         void lru_insert(head_t *h);
00118 };
00119 
00120 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00121 {
00122         head = (head_t *)calloc(l,sizeof(head_t));        // initialized to 0
00123         size /= sizeof(Qfloat);
00124         size -= l * sizeof(head_t) / sizeof(Qfloat);
00125         size = max(size, 2 * (long int) l);        // cache must be large enough for two columns
00126         lru_head.next = lru_head.prev = &lru_head;
00127 }
00128 
00129 Cache::~Cache()
00130 {
00131         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00132                 free(h->data);
00133         free(head);
00134 }
00135 
00136 void Cache::lru_delete(head_t *h)
00137 {
00138         // delete from current location
00139         h->prev->next = h->next;
00140         h->next->prev = h->prev;
00141 }
00142 
00143 void Cache::lru_insert(head_t *h)
00144 {
00145         // insert to last position
00146         h->next = &lru_head;
00147         h->prev = lru_head.prev;
00148         h->prev->next = h;
00149         h->next->prev = h;
00150 }
00151 
00152 int Cache::get_data(const int index, Qfloat **data, int len)
00153 {
00154         head_t *h = &head[index];
00155         if(h->len) lru_delete(h);
00156         int more = len - h->len;
00157 
00158         if(more > 0)
00159         {
00160                 // free old space
00161                 while(size < more)
00162                 {
00163                         head_t *old = lru_head.next;
00164                         lru_delete(old);
00165                         free(old->data);
00166                         size += old->len;
00167                         old->data = 0;
00168                         old->len = 0;
00169                 }
00170 
00171                 // allocate new space
00172                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00173                 size -= more;
00174                 swap(h->len,len);
00175         }
00176 
00177         lru_insert(h);
00178         *data = h->data;
00179         return len;
00180 }
00181 
00182 void Cache::swap_index(int i, int j)
00183 {
00184         if(i==j) return;
00185 
00186         if(head[i].len) lru_delete(&head[i]);
00187         if(head[j].len) lru_delete(&head[j]);
00188         swap(head[i].data,head[j].data);
00189         swap(head[i].len,head[j].len);
00190         if(head[i].len) lru_insert(&head[i]);
00191         if(head[j].len) lru_insert(&head[j]);
00192 
00193         if(i>j) swap(i,j);
00194         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00195         {
00196                 if(h->len > i)
00197                 {
00198                         if(h->len > j)
00199                                 swap(h->data[i],h->data[j]);
00200                         else
00201                         {
00202                                 // give up
00203                                 lru_delete(h);
00204                                 free(h->data);
00205                                 size += h->len;
00206                                 h->data = 0;
00207                                 h->len = 0;
00208                         }
00209                 }
00210         }
00211 }
00212 
00213 //
00214 // Kernel evaluation
00215 //
00216 // the static method k_function is for doing single kernel evaluation
00217 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00218 // the member function get_Q is for getting one column from the Q Matrix
00219 //
00220 class QMatrix {
00221 public:
00222         virtual Qfloat *get_Q(int column, int len) const = 0;
00223         virtual Qfloat *get_QD() const = 0;
00224         virtual void swap_index(int i, int j) const = 0;
00225         virtual ~QMatrix() {}
00226 };
00227 
00228 class Kernel: public QMatrix {
00229 public:
00230         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00231         virtual ~Kernel();
00232 
00233         static double k_function(const svm_node *x, const svm_node *y,
00234                                  const svm_parameter& param);
00235         virtual Qfloat *get_Q(int column, int len) const = 0;
00236         virtual Qfloat *get_QD() const = 0;
00237         virtual void swap_index(int i, int j) const        // no so const...
00238         {
00239                 swap(x[i],x[j]);
00240                 if(x_square) swap(x_square[i],x_square[j]);
00241         }
00242 protected:
00243 
00244         double (Kernel::*kernel_function)(int i, int j) const;
00245 
00246 private:
00247         const svm_node **x;
00248         double *x_square;
00249 
00250         // svm_parameter
00251         const int kernel_type;
00252         const int degree;
00253         const double gamma;
00254         const double coef0;
00255 
00256         static double dot(const svm_node *px, const svm_node *py);
00257         double kernel_linear(int i, int j) const
00258         {
00259                 return dot(x[i],x[j]);
00260         }
00261         double kernel_poly(int i, int j) const
00262         {
00263                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00264         }
00265         double kernel_rbf(int i, int j) const
00266         {
00267                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00268         }
00269         double kernel_sigmoid(int i, int j) const
00270         {
00271                 return tanh(gamma*dot(x[i],x[j])+coef0);
00272         }
00273         double kernel_precomputed(int i, int j) const
00274         {
00275                 return x[i][(int)(x[j][0].value)].value;
00276         }
00277 };
00278 
00279 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00280 :kernel_type(param.kernel_type), degree(param.degree),
00281  gamma(param.gamma), coef0(param.coef0)
00282 {
00283         switch(kernel_type)
00284         {
00285                 case LINEAR:
00286                         kernel_function = &Kernel::kernel_linear;
00287                         break;
00288                 case POLY:
00289                         kernel_function = &Kernel::kernel_poly;
00290                         break;
00291                 case RBF:
00292                         kernel_function = &Kernel::kernel_rbf;
00293                         break;
00294                 case SIGMOID:
00295                         kernel_function = &Kernel::kernel_sigmoid;
00296                         break;
00297                 case PRECOMPUTED:
00298                         kernel_function = &Kernel::kernel_precomputed;
00299                         break;
00300         }
00301 
00302         clone(x,x_,l);
00303 
00304         if(kernel_type == RBF)
00305         {
00306                 x_square = new double[l];
00307                 for(int i=0;i<l;i++)
00308                         x_square[i] = dot(x[i],x[i]);
00309         }
00310         else
00311                 x_square = 0;
00312 }
00313 
00314 Kernel::~Kernel()
00315 {
00316         delete[] x;
00317         delete[] x_square;
00318 }
00319 
00320 double Kernel::dot(const svm_node *px, const svm_node *py)
00321 {
00322         double sum = 0;
00323         while(px->index != -1 && py->index != -1)
00324         {
00325                 if(px->index == py->index)
00326                 {
00327                         sum += px->value * py->value;
00328                         ++px;
00329                         ++py;
00330                 }
00331                 else
00332                 {
00333                         if(px->index > py->index)
00334                                 ++py;
00335                         else
00336                                 ++px;
00337                 }
00338         }
00339         return sum;
00340 }
00341 
00342 double Kernel::k_function(const svm_node *x, const svm_node *y,
00343                           const svm_parameter& param)
00344 {
00345         switch(param.kernel_type)
00346         {
00347                 case LINEAR:
00348                         return dot(x,y);
00349                 case POLY:
00350                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00351                 case RBF:
00352                 {
00353                         double sum = 0;
00354                         while(x->index != -1 && y->index !=-1)
00355                         {
00356                                 if(x->index == y->index)
00357                                 {
00358                                         double d = x->value - y->value;
00359                                         sum += d*d;
00360                                         ++x;
00361                                         ++y;
00362                                 }
00363                                 else
00364                                 {
00365                                         if(x->index > y->index)
00366                                         {
00367                                                 sum += y->value * y->value;
00368                                                 ++y;
00369                                         }
00370                                         else
00371                                         {
00372                                                 sum += x->value * x->value;
00373                                                 ++x;
00374                                         }
00375                                 }
00376                         }
00377 
00378                         while(x->index != -1)
00379                         {
00380                                 sum += x->value * x->value;
00381                                 ++x;
00382                         }
00383 
00384                         while(y->index != -1)
00385                         {
00386                                 sum += y->value * y->value;
00387                                 ++y;
00388                         }
00389 
00390                         return exp(-param.gamma*sum);
00391                 }
00392                 case SIGMOID:
00393                         return tanh(param.gamma*dot(x,y)+param.coef0);
00394                 case PRECOMPUTED:  //x: test (validation), y: SV
00395                         return x[(int)(y->value)].value;
00396                 default:
00397                         return 0;  // Unreachable
00398         }
00399 }
00400 
00401 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00402 // Solves:
00403 //
00404 //        min 0.5(\alpha^T Q \alpha) + p^T \alpha
00405 //
00406 //                y^T \alpha = \delta
00407 //                y_i = +1 or -1
00408 //                0 <= alpha_i <= Cp for y_i = 1
00409 //                0 <= alpha_i <= Cn for y_i = -1
00410 //
00411 // Given:
00412 //
00413 //        Q, p, y, Cp, Cn, and an initial feasible point \alpha
00414 //        l is the size of vectors and matrices
00415 //        eps is the stopping tolerance
00416 //
00417 // solution will be put in \alpha, objective value will be put in obj
00418 //
00419 class Solver {
00420 public:
00421         Solver() {};
00422         virtual ~Solver() {};
00423 
00424         struct SolutionInfo {
00425                 double obj;
00426                 double rho;
00427                 double upper_bound_p;
00428                 double upper_bound_n;
00429                 double r;        // for Solver_NU
00430         };
00431 
00432         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00433                    double *alpha_, double Cp, double Cn, double eps,
00434                    SolutionInfo* si, int shrinking);
00435 protected:
00436         int active_size;
00437         schar *y;
00438         double *G;                // gradient of objective function
00439         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00440         char *alpha_status;        // LOWER_BOUND, UPPER_BOUND, FREE
00441         double *alpha;
00442         const QMatrix *Q;
00443         const Qfloat *QD;
00444         double eps;
00445         double Cp,Cn;
00446         double *p;
00447         int *active_set;
00448         double *G_bar;                // gradient, if we treat free variables as 0
00449         int l;
00450         bool unshrinked;        // XXX
00451 
00452         double get_C(int i)
00453         {
00454                 return (y[i] > 0)? Cp : Cn;
00455         }
00456         void update_alpha_status(int i)
00457         {
00458                 if(alpha[i] >= get_C(i))
00459                         alpha_status[i] = UPPER_BOUND;
00460                 else if(alpha[i] <= 0)
00461                         alpha_status[i] = LOWER_BOUND;
00462                 else alpha_status[i] = FREE;
00463         }
00464         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00465         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00466         bool is_free(int i) { return alpha_status[i] == FREE; }
00467         void swap_index(int i, int j);
00468         void reconstruct_gradient();
00469         virtual int select_working_set(int &i, int &j);
00470         virtual double calculate_rho();
00471         virtual void do_shrinking();
00472 private:
00473         bool be_shrunken(int i, double Gmax1, double Gmax2);
00474 };
00475 
00476 void Solver::swap_index(int i, int j)
00477 {
00478         Q->swap_index(i,j);
00479         swap(y[i],y[j]);
00480         swap(G[i],G[j]);
00481         swap(alpha_status[i],alpha_status[j]);
00482         swap(alpha[i],alpha[j]);
00483         swap(p[i],p[j]);
00484         swap(active_set[i],active_set[j]);
00485         swap(G_bar[i],G_bar[j]);
00486 }
00487 
00488 void Solver::reconstruct_gradient()
00489 {
00490         // reconstruct inactive elements of G from G_bar and free variables
00491 
00492         if(active_size == l) return;
00493 
00494         int i;
00495         for(i=active_size;i<l;i++)
00496                 G[i] = G_bar[i] + p[i];
00497 
00498         for(i=0;i<active_size;i++)
00499                 if(is_free(i))
00500                 {
00501                         const Qfloat *Q_i = Q->get_Q(i,l);
00502                         double alpha_i = alpha[i];
00503                         for(int j=active_size;j<l;j++)
00504                                 G[j] += alpha_i * Q_i[j];
00505                 }
00506 }
00507 
00508 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00509                    double *alpha_, double Cp, double Cn, double eps,
00510                    SolutionInfo* si, int shrinking)
00511 {
00512         this->l = l;
00513         this->Q = &Q;
00514         QD=Q.get_QD();
00515         clone(p, p_,l);
00516         clone(y, y_,l);
00517         clone(alpha,alpha_,l);
00518         this->Cp = Cp;
00519         this->Cn = Cn;
00520         this->eps = eps;
00521         unshrinked = false;
00522 
00523         // initialize alpha_status
00524         {
00525                 alpha_status = new char[l];
00526                 for(int i=0;i<l;i++)
00527                         update_alpha_status(i);
00528         }
00529 
00530         // initialize active set (for shrinking)
00531         {
00532                 active_set = new int[l];
00533                 for(int i=0;i<l;i++)
00534                         active_set[i] = i;
00535                 active_size = l;
00536         }
00537 
00538         // initialize gradient
00539         {
00540                 G = new double[l];
00541                 G_bar = new double[l];
00542                 int i;
00543                 for(i=0;i<l;i++)
00544                 {
00545                         G[i] = p[i];
00546                         G_bar[i] = 0;
00547                 }
00548                 for(i=0;i<l;i++)
00549                         if(!is_lower_bound(i))
00550                         {
00551                                 const Qfloat *Q_i = Q.get_Q(i,l);
00552                                 double alpha_i = alpha[i];
00553                                 int j;
00554                                 for(j=0;j<l;j++)
00555                                         G[j] += alpha_i*Q_i[j];
00556                                 if(is_upper_bound(i))
00557                                         for(j=0;j<l;j++)
00558                                                 G_bar[j] += get_C(i) * Q_i[j];
00559                         }
00560         }
00561 
00562         // optimization step
00563 
00564         int iter = 0;
00565         int counter = min(l,1000)+1;
00566 
00567         while(1)
00568         {
00569                 // show progress and do shrinking
00570 
00571                 if(--counter == 0)
00572                 {
00573                         counter = min(l,1000);
00574                         if(shrinking) do_shrinking();
00575                         info("."); info_flush();
00576                 }
00577 
00578                 int i,j;
00579                 if(select_working_set(i,j)!=0)
00580                 {
00581                         // reconstruct the whole gradient
00582                         reconstruct_gradient();
00583                         // reset active set size and check
00584                         active_size = l;
00585                         info("*"); info_flush();
00586                         if(select_working_set(i,j)!=0)
00587                                 break;
00588                         else
00589                                 counter = 1;        // do shrinking next iteration
00590                 }
00591 
00592                 ++iter;
00593 
00594                 // update alpha[i] and alpha[j], handle bounds carefully
00595 
00596                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00597                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00598 
00599                 double C_i = get_C(i);
00600                 double C_j = get_C(j);
00601 
00602                 double old_alpha_i = alpha[i];
00603                 double old_alpha_j = alpha[j];
00604 
00605                 if(y[i]!=y[j])
00606                 {
00607                         double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00608                         if (quad_coef <= 0)
00609                                 quad_coef = TAU;
00610                         double delta = (-G[i]-G[j])/quad_coef;
00611                         double diff = alpha[i] - alpha[j];
00612                         alpha[i] += delta;
00613                         alpha[j] += delta;
00614 
00615                         if(diff > 0)
00616                         {
00617                                 if(alpha[j] < 0)
00618                                 {
00619                                         alpha[j] = 0;
00620                                         alpha[i] = diff;
00621                                 }
00622                         }
00623                         else
00624                         {
00625                                 if(alpha[i] < 0)
00626                                 {
00627                                         alpha[i] = 0;
00628                                         alpha[j] = -diff;
00629                                 }
00630                         }
00631                         if(diff > C_i - C_j)
00632                         {
00633                                 if(alpha[i] > C_i)
00634                                 {
00635                                         alpha[i] = C_i;
00636                                         alpha[j] = C_i - diff;
00637                                 }
00638                         }
00639                         else
00640                         {
00641                                 if(alpha[j] > C_j)
00642                                 {
00643                                         alpha[j] = C_j;
00644                                         alpha[i] = C_j + diff;
00645                                 }
00646                         }
00647                 }
00648                 else
00649                 {
00650                         double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00651                         if (quad_coef <= 0)
00652                                 quad_coef = TAU;
00653                         double delta = (G[i]-G[j])/quad_coef;
00654                         double sum = alpha[i] + alpha[j];
00655                         alpha[i] -= delta;
00656                         alpha[j] += delta;
00657 
00658                         if(sum > C_i)
00659                         {
00660                                 if(alpha[i] > C_i)
00661                                 {
00662                                         alpha[i] = C_i;
00663                                         alpha[j] = sum - C_i;
00664                                 }
00665                         }
00666                         else
00667                         {
00668                                 if(alpha[j] < 0)
00669                                 {
00670                                         alpha[j] = 0;
00671                                         alpha[i] = sum;
00672                                 }
00673                         }
00674                         if(sum > C_j)
00675                         {
00676                                 if(alpha[j] > C_j)
00677                                 {
00678                                         alpha[j] = C_j;
00679                                         alpha[i] = sum - C_j;
00680                                 }
00681                         }
00682                         else
00683                         {
00684                                 if(alpha[i] < 0)
00685                                 {
00686                                         alpha[i] = 0;
00687                                         alpha[j] = sum;
00688                                 }
00689                         }
00690                 }
00691 
00692                 // update G
00693 
00694                 double delta_alpha_i = alpha[i] - old_alpha_i;
00695                 double delta_alpha_j = alpha[j] - old_alpha_j;
00696 
00697                 for(int k=0;k<active_size;k++)
00698                 {
00699                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00700                 }
00701 
00702                 // update alpha_status and G_bar
00703 
00704                 {
00705                         bool ui = is_upper_bound(i);
00706                         bool uj = is_upper_bound(j);
00707                         update_alpha_status(i);
00708                         update_alpha_status(j);
00709                         int k;
00710                         if(ui != is_upper_bound(i))
00711                         {
00712                                 Q_i = Q.get_Q(i,l);
00713                                 if(ui)
00714                                         for(k=0;k<l;k++)
00715                                                 G_bar[k] -= C_i * Q_i[k];
00716                                 else
00717                                         for(k=0;k<l;k++)
00718                                                 G_bar[k] += C_i * Q_i[k];
00719                         }
00720 
00721                         if(uj != is_upper_bound(j))
00722                         {
00723                                 Q_j = Q.get_Q(j,l);
00724                                 if(uj)
00725                                         for(k=0;k<l;k++)
00726                                                 G_bar[k] -= C_j * Q_j[k];
00727                                 else
00728                                         for(k=0;k<l;k++)
00729                                                 G_bar[k] += C_j * Q_j[k];
00730                         }
00731                 }
00732         }
00733 
00734         // calculate rho
00735 
00736         si->rho = calculate_rho();
00737 
00738         // calculate objective value
00739         {
00740                 double v = 0;
00741                 int i;
00742                 for(i=0;i<l;i++)
00743                         v += alpha[i] * (G[i] + p[i]);
00744 
00745                 si->obj = v/2;
00746         }
00747 
00748         // put back the solution
00749         {
00750                 for(int i=0;i<l;i++)
00751                         alpha_[active_set[i]] = alpha[i];
00752         }
00753 
00754         // juggle everything back
00755         /*{
00756                 for(int i=0;i<l;i++)
00757                         while(active_set[i] != i)
00758                                 swap_index(i,active_set[i]);
00759                                 // or Q.swap_index(i,active_set[i]);
00760         }*/
00761 
00762         si->upper_bound_p = Cp;
00763         si->upper_bound_n = Cn;
00764 
00765         info("\noptimization finished, #iter = %d\n",iter);
00766 
00767         delete[] p;
00768         delete[] y;
00769         delete[] alpha;
00770         delete[] alpha_status;
00771         delete[] active_set;
00772         delete[] G;
00773         delete[] G_bar;
00774 }
00775 
00776 // return 1 if already optimal, return 0 otherwise
00777 int Solver::select_working_set(int &out_i, int &out_j)
00778 {
00779         // return i,j such that
00780         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00781         // j: minimizes the decrease of obj value
00782         //    (if quadratic coefficeint <= 0, replace it with tau)
00783         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00784 
00785         double Gmax = -INF;
00786         double Gmax2 = -INF;
00787         int Gmax_idx = -1;
00788         int Gmin_idx = -1;
00789         double obj_diff_min = INF;
00790 
00791         for(int t=0;t<active_size;t++)
00792                 if(y[t]==+1)
00793                 {
00794                         if(!is_upper_bound(t))
00795                                 if(-G[t] >= Gmax)
00796                                 {
00797                                         Gmax = -G[t];
00798                                         Gmax_idx = t;
00799                                 }
00800                 }
00801                 else
00802                 {
00803                         if(!is_lower_bound(t))
00804                                 if(G[t] >= Gmax)
00805                                 {
00806                                         Gmax = G[t];
00807                                         Gmax_idx = t;
00808                                 }
00809                 }
00810 
00811         int i = Gmax_idx;
00812         const Qfloat *Q_i = NULL;
00813         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00814                 Q_i = Q->get_Q(i,active_size);
00815 
00816         for(int j=0;j<active_size;j++)
00817         {
00818                 if(y[j]==+1)
00819                 {
00820                         if (!is_lower_bound(j))
00821                         {
00822                                 double grad_diff=Gmax+G[j];
00823                                 if (G[j] >= Gmax2)
00824                                         Gmax2 = G[j];
00825                                 if (grad_diff > 0)
00826                                 {
00827                                         double obj_diff;
00828                                         double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
00829                                         if (quad_coef > 0)
00830                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00831                                         else
00832                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00833 
00834                                         if (obj_diff <= obj_diff_min)
00835                                         {
00836                                                 Gmin_idx=j;
00837                                                 obj_diff_min = obj_diff;
00838                                         }
00839                                 }
00840                         }
00841                 }
00842                 else
00843                 {
00844                         if (!is_upper_bound(j))
00845                         {
00846                                 double grad_diff= Gmax-G[j];
00847                                 if (-G[j] >= Gmax2)
00848                                         Gmax2 = -G[j];
00849                                 if (grad_diff > 0)
00850                                 {
00851                                         double obj_diff;
00852                                         double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
00853                                         if (quad_coef > 0)
00854                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00855                                         else
00856                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00857 
00858                                         if (obj_diff <= obj_diff_min)
00859                                         {
00860                                                 Gmin_idx=j;
00861                                                 obj_diff_min = obj_diff;
00862                                         }
00863                                 }
00864                         }
00865                 }
00866         }
00867 
00868         if(Gmax+Gmax2 < eps)
00869                 return 1;
00870 
00871         out_i = Gmax_idx;
00872         out_j = Gmin_idx;
00873         return 0;
00874 }
00875 
00876 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2)
00877 {
00878         if(is_upper_bound(i))
00879         {
00880                 if(y[i]==+1)
00881                         return(-G[i] > Gmax1);
00882                 else
00883                         return(-G[i] > Gmax2);
00884         }
00885         else if(is_lower_bound(i))
00886         {
00887                 if(y[i]==+1)
00888                         return(G[i] > Gmax2);
00889                 else
00890                         return(G[i] > Gmax1);
00891         }
00892         else
00893                 return(false);
00894 }
00895 
00896 void Solver::do_shrinking()
00897 {
00898         int i;
00899         double Gmax1 = -INF;                // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00900         double Gmax2 = -INF;                // max { y_i * grad(f)_i | i in I_low(\alpha) }
00901 
00902         // find maximal violating pair first
00903         for(i=0;i<active_size;i++)
00904         {
00905                 if(y[i]==+1)
00906                 {
00907                         if(!is_upper_bound(i))
00908                         {
00909                                 if(-G[i] >= Gmax1)
00910                                         Gmax1 = -G[i];
00911                         }
00912                         if(!is_lower_bound(i))
00913                         {
00914                                 if(G[i] >= Gmax2)
00915                                         Gmax2 = G[i];
00916                         }
00917                 }
00918                 else
00919                 {
00920                         if(!is_upper_bound(i))
00921                         {
00922                                 if(-G[i] >= Gmax2)
00923                                         Gmax2 = -G[i];
00924                         }
00925                         if(!is_lower_bound(i))
00926                         {
00927                                 if(G[i] >= Gmax1)
00928                                         Gmax1 = G[i];
00929                         }
00930                 }
00931         }
00932 
00933         // shrink
00934 
00935         for(i=0;i<active_size;i++)
00936                 if (be_shrunken(i, Gmax1, Gmax2))
00937                 {
00938                         active_size--;
00939                         while (active_size > i)
00940                         {
00941                                 if (!be_shrunken(active_size, Gmax1, Gmax2))
00942                                 {
00943                                         swap_index(i,active_size);
00944                                         break;
00945                                 }
00946                                 active_size--;
00947                         }
00948                 }
00949 
00950         // unshrink, check all variables again before final iterations
00951 
00952         if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
00953 
00954         unshrinked = true;
00955         reconstruct_gradient();
00956 
00957         for(i=l-1;i>=active_size;i--)
00958                 if (!be_shrunken(i, Gmax1, Gmax2))
00959                 {
00960                         while (active_size < i)
00961                         {
00962                                 if (be_shrunken(active_size, Gmax1, Gmax2))
00963                                 {
00964                                         swap_index(i,active_size);
00965                                         break;
00966                                 }
00967                                 active_size++;
00968                         }
00969                         active_size++;
00970                 }
00971 }
00972 
00973 double Solver::calculate_rho()
00974 {
00975         double r;
00976         int nr_free = 0;
00977         double ub = INF, lb = -INF, sum_free = 0;
00978         for(int i=0;i<active_size;i++)
00979         {
00980                 double yG = y[i]*G[i];
00981 
00982                 if(is_upper_bound(i))
00983                 {
00984                         if(y[i]==-1)
00985                                 ub = min(ub,yG);
00986                         else
00987                                 lb = max(lb,yG);
00988                 }
00989                 else if(is_lower_bound(i))
00990                 {
00991                         if(y[i]==+1)
00992                                 ub = min(ub,yG);
00993                         else
00994                                 lb = max(lb,yG);
00995                 }
00996                 else
00997                 {
00998                         ++nr_free;
00999                         sum_free += yG;
01000                 }
01001         }
01002 
01003         if(nr_free>0)
01004                 r = sum_free/nr_free;
01005         else
01006                 r = (ub+lb)/2;
01007 
01008         return r;
01009 }
01010 
01011 //
01012 // Solver for nu-svm classification and regression
01013 //
01014 // additional constraint: e^T \alpha = constant
01015 //
01016 class Solver_NU : public Solver
01017 {
01018 public:
01019         Solver_NU() {}
01020         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01021                    double *alpha, double Cp, double Cn, double eps,
01022                    SolutionInfo* si, int shrinking)
01023         {
01024                 this->si = si;
01025                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01026         }
01027 private:
01028         SolutionInfo *si;
01029         int select_working_set(int &i, int &j);
01030         double calculate_rho();
01031         bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01032         void do_shrinking();
01033 };
01034 
01035 // return 1 if already optimal, return 0 otherwise
01036 int Solver_NU::select_working_set(int &out_i, int &out_j)
01037 {
01038         // return i,j such that y_i = y_j and
01039         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01040         // j: minimizes the decrease of obj value
01041         //    (if quadratic coefficeint <= 0, replace it with tau)
01042         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01043 
01044         double Gmaxp = -INF;
01045         double Gmaxp2 = -INF;
01046         int Gmaxp_idx = -1;
01047 
01048         double Gmaxn = -INF;
01049         double Gmaxn2 = -INF;
01050         int Gmaxn_idx = -1;
01051 
01052         int Gmin_idx = -1;
01053         double obj_diff_min = INF;
01054 
01055         for(int t=0;t<active_size;t++)
01056                 if(y[t]==+1)
01057                 {
01058                         if(!is_upper_bound(t))
01059                                 if(-G[t] >= Gmaxp)
01060                                 {
01061                                         Gmaxp = -G[t];
01062                                         Gmaxp_idx = t;
01063                                 }
01064                 }
01065                 else
01066                 {
01067                         if(!is_lower_bound(t))
01068                                 if(G[t] >= Gmaxn)
01069                                 {
01070                                         Gmaxn = G[t];
01071                                         Gmaxn_idx = t;
01072                                 }
01073                 }
01074 
01075         int ip = Gmaxp_idx;
01076         int in = Gmaxn_idx;
01077         const Qfloat *Q_ip = NULL;
01078         const Qfloat *Q_in = NULL;
01079         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01080                 Q_ip = Q->get_Q(ip,active_size);
01081         if(in != -1)
01082                 Q_in = Q->get_Q(in,active_size);
01083 
01084         for(int j=0;j<active_size;j++)
01085         {
01086                 if(y[j]==+1)
01087                 {
01088                         if (!is_lower_bound(j))
01089                         {
01090                                 double grad_diff=Gmaxp+G[j];
01091                                 if (G[j] >= Gmaxp2)
01092                                         Gmaxp2 = G[j];
01093                                 if (grad_diff > 0)
01094                                 {
01095                                         double obj_diff;
01096                                         double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01097                                         if (quad_coef > 0)
01098                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01099                                         else
01100                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01101 
01102                                         if (obj_diff <= obj_diff_min)
01103                                         {
01104                                                 Gmin_idx=j;
01105                                                 obj_diff_min = obj_diff;
01106                                         }
01107                                 }
01108                         }
01109                 }
01110                 else
01111                 {
01112                         if (!is_upper_bound(j))
01113                         {
01114                                 double grad_diff=Gmaxn-G[j];
01115                                 if (-G[j] >= Gmaxn2)
01116                                         Gmaxn2 = -G[j];
01117                                 if (grad_diff > 0)
01118                                 {
01119                                         double obj_diff;
01120                                         double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01121                                         if (quad_coef > 0)
01122                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01123                                         else
01124                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01125 
01126                                         if (obj_diff <= obj_diff_min)
01127                                         {
01128                                                 Gmin_idx=j;
01129                                                 obj_diff_min = obj_diff;
01130                                         }
01131                                 }
01132                         }
01133                 }
01134         }
01135 
01136         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01137                  return 1;
01138 
01139         if (y[Gmin_idx] == +1)
01140                 out_i = Gmaxp_idx;
01141         else
01142                 out_i = Gmaxn_idx;
01143         out_j = Gmin_idx;
01144 
01145         return 0;
01146 }
01147 
01148 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01149 {
01150         if(is_upper_bound(i))
01151         {
01152                 if(y[i]==+1)
01153                         return(-G[i] > Gmax1);
01154                 else
01155                         return(-G[i] > Gmax4);
01156         }
01157         else if(is_lower_bound(i))
01158         {
01159                 if(y[i]==+1)
01160                         return(G[i] > Gmax2);
01161                 else
01162                         return(G[i] > Gmax3);
01163         }
01164         else
01165                 return(false);
01166 }
01167 
01168 void Solver_NU::do_shrinking()
01169 {
01170         double Gmax1 = -INF;        // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01171         double Gmax2 = -INF;        // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01172         double Gmax3 = -INF;        // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01173         double Gmax4 = -INF;        // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01174 
01175         // find maximal violating pair first
01176         int i;
01177         for(i=0;i<active_size;i++)
01178         {
01179                 if(!is_upper_bound(i))
01180                 {
01181                         if(y[i]==+1)
01182                         {
01183                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01184                         }
01185                         else        if(-G[i] > Gmax4) Gmax4 = -G[i];
01186                 }
01187                 if(!is_lower_bound(i))
01188                 {
01189                         if(y[i]==+1)
01190                         {
01191                                 if(G[i] > Gmax2) Gmax2 = G[i];
01192                         }
01193                         else        if(G[i] > Gmax3) Gmax3 = G[i];
01194                 }
01195         }
01196 
01197         // shrinking
01198 
01199         for(i=0;i<active_size;i++)
01200                 if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01201                 {
01202                         active_size--;
01203                         while (active_size > i)
01204                         {
01205                                 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01206                                 {
01207                                         swap_index(i,active_size);
01208                                         break;
01209                                 }
01210                                 active_size--;
01211                         }
01212                 }
01213 
01214         // unshrink, check all variables again before final iterations
01215 
01216         if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
01217 
01218         unshrinked = true;
01219         reconstruct_gradient();
01220 
01221         for(i=l-1;i>=active_size;i--)
01222                 if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01223                 {
01224                         while (active_size < i)
01225                         {
01226                                 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01227                                 {
01228                                         swap_index(i,active_size);
01229                                         break;
01230                                 }
01231                                 active_size++;
01232                         }
01233                         active_size++;
01234                 }
01235 }
01236 
01237 double Solver_NU::calculate_rho()
01238 {
01239         int nr_free1 = 0,nr_free2 = 0;
01240         double ub1 = INF, ub2 = INF;
01241         double lb1 = -INF, lb2 = -INF;
01242         double sum_free1 = 0, sum_free2 = 0;
01243 
01244         for(int i=0;i<active_size;i++)
01245         {
01246                 if(y[i]==+1)
01247                 {
01248                         if(is_upper_bound(i))
01249                                 lb1 = max(lb1,G[i]);
01250                         else if(is_lower_bound(i))
01251                                 ub1 = min(ub1,G[i]);
01252                         else
01253                         {
01254                                 ++nr_free1;
01255                                 sum_free1 += G[i];
01256                         }
01257                 }
01258                 else
01259                 {
01260                         if(is_upper_bound(i))
01261                                 lb2 = max(lb2,G[i]);
01262                         else if(is_lower_bound(i))
01263                                 ub2 = min(ub2,G[i]);
01264                         else
01265                         {
01266                                 ++nr_free2;
01267                                 sum_free2 += G[i];
01268                         }
01269                 }
01270         }
01271 
01272         double r1,r2;
01273         if(nr_free1 > 0)
01274                 r1 = sum_free1/nr_free1;
01275         else
01276                 r1 = (ub1+lb1)/2;
01277 
01278         if(nr_free2 > 0)
01279                 r2 = sum_free2/nr_free2;
01280         else
01281                 r2 = (ub2+lb2)/2;
01282 
01283         si->r = (r1+r2)/2;
01284         return (r1-r2)/2;
01285 }
01286 
01287 //
01288 // Q matrices for various formulations
01289 //
01290 class SVC_Q: public Kernel
01291 {
01292 public:
01293         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01294         :Kernel(prob.l, prob.x, param)
01295         {
01296                 clone(y,y_,prob.l);
01297                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01298                 QD = new Qfloat[prob.l];
01299                 for(int i=0;i<prob.l;i++)
01300                         QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01301         }
01302 
01303         Qfloat *get_Q(int i, int len) const
01304         {
01305                 Qfloat *data;
01306                 int start;
01307                 if((start = cache->get_data(i,&data,len)) < len)
01308                 {
01309                         for(int j=start;j<len;j++)
01310                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01311                 }
01312                 return data;
01313         }
01314 
01315         Qfloat *get_QD() const
01316         {
01317                 return QD;
01318         }
01319 
01320         void swap_index(int i, int j) const
01321         {
01322                 cache->swap_index(i,j);
01323                 Kernel::swap_index(i,j);
01324                 swap(y[i],y[j]);
01325                 swap(QD[i],QD[j]);
01326         }
01327 
01328         ~SVC_Q()
01329         {
01330                 delete[] y;
01331                 delete cache;
01332                 delete[] QD;
01333         }
01334 private:
01335         schar *y;
01336         Cache *cache;
01337         Qfloat *QD;
01338 };
01339 
01340 class ONE_CLASS_Q: public Kernel
01341 {
01342 public:
01343         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01344         :Kernel(prob.l, prob.x, param)
01345         {
01346                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01347                 QD = new Qfloat[prob.l];
01348                 for(int i=0;i<prob.l;i++)
01349                         QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01350         }
01351 
01352         Qfloat *get_Q(int i, int len) const
01353         {
01354                 Qfloat *data;
01355                 int start;
01356                 if((start = cache->get_data(i,&data,len)) < len)
01357                 {
01358                         for(int j=start;j<len;j++)
01359                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01360                 }
01361                 return data;
01362         }
01363 
01364         Qfloat *get_QD() const
01365         {
01366                 return QD;
01367         }
01368 
01369         void swap_index(int i, int j) const
01370         {
01371                 cache->swap_index(i,j);
01372                 Kernel::swap_index(i,j);
01373                 swap(QD[i],QD[j]);
01374         }
01375 
01376         ~ONE_CLASS_Q()
01377         {
01378                 delete cache;
01379                 delete[] QD;
01380         }
01381 private:
01382         Cache *cache;
01383         Qfloat *QD;
01384 };
01385 
01386 class SVR_Q: public Kernel
01387 {
01388 public:
01389         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01390         :Kernel(prob.l, prob.x, param)
01391         {
01392                 l = prob.l;
01393                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01394                 QD = new Qfloat[2*l];
01395                 sign = new schar[2*l];
01396                 index = new int[2*l];
01397                 for(int k=0;k<l;k++)
01398                 {
01399                         sign[k] = 1;
01400                         sign[k+l] = -1;
01401                         index[k] = k;
01402                         index[k+l] = k;
01403                         QD[k]= (Qfloat)(this->*kernel_function)(k,k);
01404                         QD[k+l]=QD[k];
01405                 }
01406                 buffer[0] = new Qfloat[2*l];
01407                 buffer[1] = new Qfloat[2*l];
01408                 next_buffer = 0;
01409         }
01410 
01411         void swap_index(int i, int j) const
01412         {
01413                 swap(sign[i],sign[j]);
01414                 swap(index[i],index[j]);
01415                 swap(QD[i],QD[j]);
01416         }
01417 
01418         Qfloat *get_Q(int i, int len) const
01419         {
01420                 Qfloat *data;
01421                 int real_i = index[i];
01422                 if(cache->get_data(real_i,&data,l) < l)
01423                 {
01424                         for(int j=0;j<l;j++)
01425                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01426                 }
01427 
01428                 // reorder and copy
01429                 Qfloat *buf = buffer[next_buffer];
01430                 next_buffer = 1 - next_buffer;
01431                 schar si = sign[i];
01432                 for(int j=0;j<len;j++)
01433                         buf[j] = si * sign[j] * data[index[j]];
01434                 return buf;
01435         }
01436 
01437         Qfloat *get_QD() const
01438         {
01439                 return QD;
01440         }
01441 
01442         ~SVR_Q()
01443         {
01444                 delete cache;
01445                 delete[] sign;
01446                 delete[] index;
01447                 delete[] buffer[0];
01448                 delete[] buffer[1];
01449                 delete[] QD;
01450         }
01451 private:
01452         int l;
01453         Cache *cache;
01454         schar *sign;
01455         int *index;
01456         mutable int next_buffer;
01457         Qfloat *buffer[2];
01458         Qfloat *QD;
01459 };
01460 
01461 //
01462 // construct and solve various formulations
01463 //
01464 static void solve_c_svc(
01465         const svm_problem *prob, const svm_parameter* param,
01466         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01467 {
01468         int l = prob->l;
01469         double *minus_ones = new double[l];
01470         schar *y = new schar[l];
01471 
01472         int i;
01473 
01474         for(i=0;i<l;i++)
01475         {
01476                 alpha[i] = 0;
01477                 minus_ones[i] = -1;
01478                 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01479         }
01480 
01481         Solver s;
01482         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01483                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01484 
01485         double sum_alpha=0;
01486         for(i=0;i<l;i++)
01487                 sum_alpha += alpha[i];
01488 
01489         if (Cp==Cn)
01490                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01491 
01492         for(i=0;i<l;i++)
01493                 alpha[i] *= y[i];
01494 
01495         delete[] minus_ones;
01496         delete[] y;
01497 }
01498 
01499 static void solve_nu_svc(
01500         const svm_problem *prob, const svm_parameter *param,
01501         double *alpha, Solver::SolutionInfo* si)
01502 {
01503         int i;
01504         int l = prob->l;
01505         double nu = param->nu;
01506 
01507         schar *y = new schar[l];
01508 
01509         for(i=0;i<l;i++)
01510                 if(prob->y[i]>0)
01511                         y[i] = +1;
01512                 else
01513                         y[i] = -1;
01514 
01515         double sum_pos = nu*l/2;
01516         double sum_neg = nu*l/2;
01517 
01518         for(i=0;i<l;i++)
01519                 if(y[i] == +1)
01520                 {
01521                         alpha[i] = min(1.0,sum_pos);
01522                         sum_pos -= alpha[i];
01523                 }
01524                 else
01525                 {
01526                         alpha[i] = min(1.0,sum_neg);
01527                         sum_neg -= alpha[i];
01528                 }
01529 
01530         double *zeros = new double[l];
01531 
01532         for(i=0;i<l;i++)
01533                 zeros[i] = 0;
01534 
01535         Solver_NU s;
01536         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01537                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01538         double r = si->r;
01539 
01540         info("C = %f\n",1/r);
01541 
01542         for(i=0;i<l;i++)
01543                 alpha[i] *= y[i]/r;
01544 
01545         si->rho /= r;
01546         si->obj /= (r*r);
01547         si->upper_bound_p = 1/r;
01548         si->upper_bound_n = 1/r;
01549 
01550         delete[] y;
01551         delete[] zeros;
01552 }
01553 
01554 static void solve_one_class(
01555         const svm_problem *prob, const svm_parameter *param,
01556         double *alpha, Solver::SolutionInfo* si)
01557 {
01558         int l = prob->l;
01559         double *zeros = new double[l];
01560         schar *ones = new schar[l];
01561         int i;
01562 
01563         int n = (int)(param->nu*prob->l);        // # of alpha's at upper bound
01564 
01565         for(i=0;i<n;i++)
01566                 alpha[i] = 1;
01567         if(n<prob->l)
01568                 alpha[n] = param->nu * prob->l - n;
01569         for(i=n+1;i<l;i++)
01570                 alpha[i] = 0;
01571 
01572         for(i=0;i<l;i++)
01573         {
01574                 zeros[i] = 0;
01575                 ones[i] = 1;
01576         }
01577 
01578         Solver s;
01579         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01580                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01581 
01582         delete[] zeros;
01583         delete[] ones;
01584 }
01585 
01586 static void solve_epsilon_svr(
01587         const svm_problem *prob, const svm_parameter *param,
01588         double *alpha, Solver::SolutionInfo* si)
01589 {
01590         int l = prob->l;
01591         double *alpha2 = new double[2*l];
01592         double *linear_term = new double[2*l];
01593         schar *y = new schar[2*l];
01594         int i;
01595 
01596         for(i=0;i<l;i++)
01597         {
01598                 alpha2[i] = 0;
01599                 linear_term[i] = param->p - prob->y[i];
01600                 y[i] = 1;
01601 
01602                 alpha2[i+l] = 0;
01603                 linear_term[i+l] = param->p + prob->y[i];
01604                 y[i+l] = -1;
01605         }
01606 
01607         Solver s;
01608         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01609                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01610 
01611         double sum_alpha = 0;
01612         for(i=0;i<l;i++)
01613         {
01614                 alpha[i] = alpha2[i] - alpha2[i+l];
01615                 sum_alpha += fabs(alpha[i]);
01616         }
01617         info("nu = %f\n",sum_alpha/(param->C*l));
01618 
01619         delete[] alpha2;
01620         delete[] linear_term;
01621         delete[] y;
01622 }
01623 
01624 static void solve_nu_svr(
01625         const svm_problem *prob, const svm_parameter *param,
01626         double *alpha, Solver::SolutionInfo* si)
01627 {
01628         int l = prob->l;
01629         double C = param->C;
01630         double *alpha2 = new double[2*l];
01631         double *linear_term = new double[2*l];
01632         schar *y = new schar[2*l];
01633         int i;
01634 
01635         double sum = C * param->nu * l / 2;
01636         for(i=0;i<l;i++)
01637         {
01638                 alpha2[i] = alpha2[i+l] = min(sum,C);
01639                 sum -= alpha2[i];
01640 
01641                 linear_term[i] = - prob->y[i];
01642                 y[i] = 1;
01643 
01644                 linear_term[i+l] = prob->y[i];
01645                 y[i+l] = -1;
01646         }
01647 
01648         Solver_NU s;
01649         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01650                 alpha2, C, C, param->eps, si, param->shrinking);
01651 
01652         info("epsilon = %f\n",-si->r);
01653 
01654         for(i=0;i<l;i++)
01655                 alpha[i] = alpha2[i] - alpha2[i+l];
01656 
01657         delete[] alpha2;
01658         delete[] linear_term;
01659         delete[] y;
01660 }
01661 
01662 //
01663 // decision_function
01664 //
01665 struct decision_function
01666 {
01667         double *alpha;
01668         double rho;
01669 };
01670 
01671 decision_function svm_train_one(
01672         const svm_problem *prob, const svm_parameter *param,
01673         double Cp, double Cn)
01674 {
01675         double *alpha = Malloc(double,prob->l);
01676         Solver::SolutionInfo si;
01677         switch(param->svm_type)
01678         {
01679                 case C_SVC:
01680                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01681                         break;
01682                 case NU_SVC:
01683                         solve_nu_svc(prob,param,alpha,&si);
01684                         break;
01685                 case ONE_CLASS:
01686                         solve_one_class(prob,param,alpha,&si);
01687                         break;
01688                 case EPSILON_SVR:
01689                         solve_epsilon_svr(prob,param,alpha,&si);
01690                         break;
01691                 case NU_SVR:
01692                         solve_nu_svr(prob,param,alpha,&si);
01693                         break;
01694         }
01695 
01696         info("obj = %f, rho = %f\n",si.obj,si.rho);
01697 
01698         // output SVs
01699 
01700         int nSV = 0;
01701         int nBSV = 0;
01702         for(int i=0;i<prob->l;i++)
01703         {
01704                 if(fabs(alpha[i]) > 0)
01705                 {
01706                         ++nSV;
01707                         if(prob->y[i] > 0)
01708                         {
01709                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01710                                         ++nBSV;
01711                         }
01712                         else
01713                         {
01714                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01715                                         ++nBSV;
01716                         }
01717                 }
01718         }
01719 
01720         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01721 
01722         decision_function f;
01723         f.alpha = alpha;
01724         f.rho = si.rho;
01725         return f;
01726 }
01727 
01728 //
01729 // svm_model
01730 //
01731 struct svm_model
01732 {
01733         svm_parameter param;        // parameter
01734         int nr_class;                // number of classes, = 2 in regression/one class svm
01735         int l;                        // total #SV
01736         svm_node **SV;                // SVs (SV[l])
01737         double **sv_coef;        // coefficients for SVs in decision functions (sv_coef[k-1][l])
01738         double *rho;                // constants in decision functions (rho[k*(k-1)/2])
01739         double *probA;          // pariwise probability information
01740         double *probB;
01741 
01742         // for classification only
01743 
01744         int *label;                // label of each class (label[k])
01745         int *nSV;                // number of SVs for each class (nSV[k])
01746                                 // nSV[0] + nSV[1] + ... + nSV[k-1] = l
01747         // XXX
01748         int free_sv;                // 1 if svm_model is created by svm_load_model
01749                                 // 0 if svm_model is created by svm_train
01750 };
01751 
01752 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01753 void sigmoid_train(
01754         int l, const double *dec_values, const double *labels,
01755         double& A, double& B)
01756 {
01757         double prior1=0, prior0 = 0;
01758         int i;
01759 
01760         for (i=0;i<l;i++)
01761                 if (labels[i] > 0) prior1+=1;
01762                 else prior0+=1;
01763 
01764         int max_iter=100;         // Maximal number of iterations
01765         double min_step=1e-10;        // Minimal step taken in line search
01766         double sigma=1e-12;        // For numerically strict PD of Hessian
01767         double eps=1e-5;
01768         double hiTarget=(prior1+1.0)/(prior1+2.0);
01769         double loTarget=1/(prior0+2.0);
01770         double *t=Malloc(double,l);
01771         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01772         double newA,newB,newf,d1,d2;
01773         int iter;
01774 
01775         // Initial Point and Initial Fun Value
01776         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01777         double fval = 0.0;
01778 
01779         for (i=0;i<l;i++)
01780         {
01781                 if (labels[i]>0) t[i]=hiTarget;
01782                 else t[i]=loTarget;
01783                 fApB = dec_values[i]*A+B;
01784                 if (fApB>=0)
01785                         fval += t[i]*fApB + log(1+exp(-fApB));
01786                 else
01787                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01788         }
01789         for (iter=0;iter<max_iter;iter++)
01790         {
01791                 // Update Gradient and Hessian (use H' = H + sigma I)
01792                 h11=sigma; // numerically ensures strict PD
01793                 h22=sigma;
01794                 h21=0.0;g1=0.0;g2=0.0;
01795                 for (i=0;i<l;i++)
01796                 {
01797                         fApB = dec_values[i]*A+B;
01798                         if (fApB >= 0)
01799                         {
01800                                 p=exp(-fApB)/(1.0+exp(-fApB));
01801                                 q=1.0/(1.0+exp(-fApB));
01802                         }
01803                         else
01804                         {
01805                                 p=1.0/(1.0+exp(fApB));
01806                                 q=exp(fApB)/(1.0+exp(fApB));
01807                         }
01808                         d2=p*q;
01809                         h11+=dec_values[i]*dec_values[i]*d2;
01810                         h22+=d2;
01811                         h21+=dec_values[i]*d2;
01812                         d1=t[i]-p;
01813                         g1+=dec_values[i]*d1;
01814                         g2+=d1;
01815                 }
01816 
01817                 // Stopping Criteria
01818                 if (fabs(g1)<eps && fabs(g2)<eps)
01819                         break;
01820 
01821                 // Finding Newton direction: -inv(H') * g
01822                 det=h11*h22-h21*h21;
01823                 dA=-(h22*g1 - h21 * g2) / det;
01824                 dB=-(-h21*g1+ h11 * g2) / det;
01825                 gd=g1*dA+g2*dB;
01826 
01827 
01828                 stepsize = 1;                 // Line Search
01829                 while (stepsize >= min_step)
01830                 {
01831                         newA = A + stepsize * dA;
01832                         newB = B + stepsize * dB;
01833 
01834                         // New function value
01835                         newf = 0.0;
01836                         for (i=0;i<l;i++)
01837                         {
01838                                 fApB = dec_values[i]*newA+newB;
01839                                 if (fApB >= 0)
01840                                         newf += t[i]*fApB + log(1+exp(-fApB));
01841                                 else
01842                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01843                         }
01844                         // Check sufficient decrease
01845                         if (newf<fval+0.0001*stepsize*gd)
01846                         {
01847                                 A=newA;B=newB;fval=newf;
01848                                 break;
01849                         }
01850                         else
01851                                 stepsize = stepsize / 2.0;
01852                 }
01853 
01854                 if (stepsize < min_step)
01855                 {
01856                         info("Line search fails in two-class probability estimates\n");
01857                         break;
01858                 }
01859         }
01860 
01861         if (iter>=max_iter)
01862                 info("Reaching maximal iterations in two-class probability estimates\n");
01863         free(t);
01864 }
01865 
01866 double sigmoid_predict(double decision_value, double A, double B)
01867 {
01868         double fApB = decision_value*A+B;
01869         if (fApB >= 0)
01870                 return exp(-fApB)/(1.0+exp(-fApB));
01871         else
01872                 return 1.0/(1+exp(fApB)) ;
01873 }
01874 
01875 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01876 void multiclass_probability(int k, double **r, double *p)
01877 {
01878         int t,j;
01879         int iter = 0, max_iter=max(100,k);
01880         double **Q=Malloc(double *,k);
01881         double *Qp=Malloc(double,k);
01882         double pQp, eps=0.005/k;
01883 
01884         for (t=0;t<k;t++)
01885         {
01886                 p[t]=1.0/k;  // Valid if k = 1
01887                 Q[t]=Malloc(double,k);
01888                 Q[t][t]=0;
01889                 for (j=0;j<t;j++)
01890                 {
01891                         Q[t][t]+=r[j][t]*r[j][t];
01892                         Q[t][j]=Q[j][t];
01893                 }
01894                 for (j=t+1;j<k;j++)
01895                 {
01896                         Q[t][t]+=r[j][t]*r[j][t];
01897                         Q[t][j]=-r[j][t]*r[t][j];
01898                 }
01899         }
01900         for (iter=0;iter<max_iter;iter++)
01901         {
01902                 // stopping condition, recalculate QP,pQP for numerical accuracy
01903                 pQp=0;
01904                 for (t=0;t<k;t++)
01905                 {
01906                         Qp[t]=0;
01907                         for (j=0;j<k;j++)
01908                                 Qp[t]+=Q[t][j]*p[j];
01909                         pQp+=p[t]*Qp[t];
01910                 }
01911                 double max_error=0;
01912                 for (t=0;t<k;t++)
01913                 {
01914                         double error=fabs(Qp[t]-pQp);
01915                         if (error>max_error)
01916                                 max_error=error;
01917                 }
01918                 if (max_error<eps) break;
01919 
01920                 for (t=0;t<k;t++)
01921                 {
01922                         double diff=(-Qp[t]+pQp)/Q[t][t];
01923                         p[t]+=diff;
01924                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01925                         for (j=0;j<k;j++)
01926                         {
01927                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01928                                 p[j]/=(1+diff);
01929                         }
01930                 }
01931         }
01932         if (iter>=max_iter)
01933                 info("Exceeds max_iter in multiclass_prob\n");
01934         for(t=0;t<k;t++) free(Q[t]);
01935         free(Q);
01936         free(Qp);
01937 }
01938 
01939 // Cross-validation decision values for probability estimates
01940 void svm_binary_svc_probability(
01941         const svm_problem *prob, const svm_parameter *param,
01942         double Cp, double Cn, double& probA, double& probB)
01943 {
01944         int i;
01945         int nr_fold = 5;
01946         int *perm = Malloc(int,prob->l);
01947         double *dec_values = Malloc(double,prob->l);
01948 
01949         // random shuffle
01950         for(i=0;i<prob->l;i++) perm[i]=i;
01951         for(i=0;i<prob->l;i++)
01952         {
01953                 int j = i+rand()%(prob->l-i);
01954                 swap(perm[i],perm[j]);
01955         }
01956         for(i=0;i<nr_fold;i++)
01957         {
01958                 int begin = i*prob->l/nr_fold;
01959                 int end = (i+1)*prob->l/nr_fold;
01960                 int j,k;
01961                 struct svm_problem subprob;
01962 
01963                 subprob.l = prob->l-(end-begin);
01964                 subprob.x = Malloc(struct svm_node*,subprob.l);
01965                 subprob.y = Malloc(double,subprob.l);
01966 
01967                 k=0;
01968                 for(j=0;j<begin;j++)
01969                 {
01970                         subprob.x[k] = prob->x[perm[j]];
01971                         subprob.y[k] = prob->y[perm[j]];
01972                         ++k;
01973                 }
01974                 for(j=end;j<prob->l;j++)
01975                 {
01976                         subprob.x[k] = prob->x[perm[j]];
01977                         subprob.y[k] = prob->y[perm[j]];
01978                         ++k;
01979                 }
01980                 int p_count=0,n_count=0;
01981                 for(j=0;j<k;j++)
01982                         if(subprob.y[j]>0)
01983                                 p_count++;
01984                         else
01985                                 n_count++;
01986 
01987                 if(p_count==0 && n_count==0)
01988                         for(j=begin;j<end;j++)
01989                                 dec_values[perm[j]] = 0;
01990                 else if(p_count > 0 && n_count == 0)
01991                         for(j=begin;j<end;j++)
01992                                 dec_values[perm[j]] = 1;
01993                 else if(p_count == 0 && n_count > 0)
01994                         for(j=begin;j<end;j++)
01995                                 dec_values[perm[j]] = -1;
01996                 else
01997                 {
01998                         svm_parameter subparam = *param;
01999                         subparam.probability=0;
02000                         subparam.C=1.0;
02001                         subparam.nr_weight=2;
02002                         subparam.weight_label = Malloc(int,2);
02003                         subparam.weight = Malloc(double,2);
02004                         subparam.weight_label[0]=+1;
02005                         subparam.weight_label[1]=-1;
02006                         subparam.weight[0]=Cp;
02007                         subparam.weight[1]=Cn;
02008                         struct svm_model *submodel = svm_train(&subprob,&subparam);
02009                         for(j=begin;j<end;j++)
02010                         {
02011                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
02012                                 // ensure +1 -1 order; reason not using CV subroutine
02013                                 dec_values[perm[j]] *= submodel->label[0];
02014                         }
02015                         svm_destroy_model(submodel);
02016                         svm_destroy_param(&subparam);
02017                 }
02018                 free(subprob.x);
02019                 free(subprob.y);
02020         }
02021         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
02022         free(dec_values);
02023         free(perm);
02024 }
02025 
02026 // Return parameter of a Laplace distribution
02027 double svm_svr_probability(
02028         const svm_problem *prob, const svm_parameter *param)
02029 {
02030         int i;
02031         int nr_fold = 5;
02032         double *ymv = Malloc(double,prob->l);
02033         double mae = 0;
02034 
02035         svm_parameter newparam = *param;
02036         newparam.probability = 0;
02037         svm_cross_validation(prob,&newparam,nr_fold,ymv);
02038         for(i=0;i<prob->l;i++)
02039         {
02040                 ymv[i]=prob->y[i]-ymv[i];
02041                 mae += fabs(ymv[i]);
02042         }
02043         mae /= prob->l;
02044         double std=sqrt(2*mae*mae);
02045         int count=0;
02046         mae=0;
02047         for(i=0;i<prob->l;i++)
02048                 if (fabs(ymv[i]) > 5*std)
02049                         count=count+1;
02050                 else
02051                         mae+=fabs(ymv[i]);
02052         mae /= (prob->l-count);
02053         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02054         free(ymv);
02055         return mae;
02056 }
02057 
02058 
02059 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02060 // perm, length l, must be allocated before calling this subroutine
02061 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02062 {
02063         int l = prob->l;
02064         int max_nr_class = 16;
02065         int nr_class = 0;
02066         int *label = Malloc(int,max_nr_class);
02067         int *count = Malloc(int,max_nr_class);
02068         int *data_label = Malloc(int,l);
02069         int i;
02070 
02071         for(i=0;i<l;i++)
02072         {
02073                 int this_label = (int)prob->y[i];
02074                 int j;
02075                 for(j=0;j<nr_class;j++)
02076                 {
02077                         if(this_label == label[j])
02078                         {
02079                                 ++count[j];
02080                                 break;
02081                         }
02082                 }
02083                 data_label[i] = j;
02084                 if(j == nr_class)
02085                 {
02086                         if(nr_class == max_nr_class)
02087                         {
02088                                 max_nr_class *= 2;
02089                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02090                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02091                         }
02092                         label[nr_class] = this_label;
02093                         count[nr_class] = 1;
02094                         ++nr_class;
02095                 }
02096         }
02097 
02098         int *start = Malloc(int,nr_class);
02099         start[0] = 0;
02100         for(i=1;i<nr_class;i++)
02101                 start[i] = start[i-1]+count[i-1];
02102         for(i=0;i<l;i++)
02103         {
02104                 perm[start[data_label[i]]] = i;
02105                 ++start[data_label[i]];
02106         }
02107         start[0] = 0;
02108         for(i=1;i<nr_class;i++)
02109                 start[i] = start[i-1]+count[i-1];
02110 
02111         *nr_class_ret = nr_class;
02112         *label_ret = label;
02113         *start_ret = start;
02114         *count_ret = count;
02115         free(data_label);
02116 }
02117 
02118 //
02119 // Interface functions
02120 //
02121 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02122 {
02123         svm_model *model = Malloc(svm_model,1);
02124         model->param = *param;
02125         model->free_sv = 0;        // XXX
02126 
02127         if(param->svm_type == ONE_CLASS ||
02128            param->svm_type == EPSILON_SVR ||
02129            param->svm_type == NU_SVR)
02130         {
02131                 // regression or one-class-svm
02132                 model->nr_class = 2;
02133                 model->label = NULL;
02134                 model->nSV = NULL;
02135                 model->probA = NULL; model->probB = NULL;
02136                 model->sv_coef = Malloc(double *,1);
02137 
02138                 if(param->probability &&
02139                    (param->svm_type == EPSILON_SVR ||
02140                     param->svm_type == NU_SVR))
02141                 {
02142                         model->probA = Malloc(double,1);
02143                         model->probA[0] = svm_svr_probability(prob,param);
02144                 }
02145 
02146                 decision_function f = svm_train_one(prob,param,0,0);
02147                 model->rho = Malloc(double,1);
02148                 model->rho[0] = f.rho;
02149 
02150                 int nSV = 0;
02151                 int i;
02152                 for(i=0;i<prob->l;i++)
02153                         if(fabs(f.alpha[i]) > 0) ++nSV;
02154                 model->l = nSV;
02155                 model->SV = Malloc(svm_node *,nSV);
02156                 model->sv_coef[0] = Malloc(double,nSV);
02157                 int j = 0;
02158                 for(i=0;i<prob->l;i++)
02159                         if(fabs(f.alpha[i]) > 0)
02160                         {
02161                                 model->SV[j] = prob->x[i];
02162                                 model->sv_coef[0][j] = f.alpha[i];
02163                                 ++j;
02164                         }
02165 
02166                 free(f.alpha);
02167         }
02168         else
02169         {
02170                 // classification
02171                 int l = prob->l;
02172                 int nr_class;
02173                 int *label = NULL;
02174                 int *start = NULL;
02175                 int *count = NULL;
02176                 int *perm = Malloc(int,l);
02177 
02178                 // group training data of the same class
02179                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02180                 svm_node **x = Malloc(svm_node *,l);
02181                 int i;
02182                 for(i=0;i<l;i++)
02183                         x[i] = prob->x[perm[i]];
02184 
02185                 // calculate weighted C
02186 
02187                 double *weighted_C = Malloc(double, nr_class);
02188                 for(i=0;i<nr_class;i++)
02189                         weighted_C[i] = param->C;
02190                 for(i=0;i<param->nr_weight;i++)
02191                 {
02192                         int j;
02193                         for(j=0;j<nr_class;j++)
02194                                 if(param->weight_label[i] == label[j])
02195                                         break;
02196                         if(j == nr_class)
02197                                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02198                         else
02199                                 weighted_C[j] *= param->weight[i];
02200                 }
02201 
02202                 // train k*(k-1)/2 models
02203 
02204                 bool *nonzero = Malloc(bool,l);
02205                 for(i=0;i<l;i++)
02206                         nonzero[i] = false;
02207                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02208 
02209                 double *probA=NULL,*probB=NULL;
02210                 if (param->probability)
02211                 {
02212                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02213                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02214                 }
02215 
02216                 int p = 0;
02217                 for(i=0;i<nr_class;i++)
02218                         for(int j=i+1;j<nr_class;j++)
02219                         {
02220                                 svm_problem sub_prob;
02221                                 int si = start[i], sj = start[j];
02222                                 int ci = count[i], cj = count[j];
02223                                 sub_prob.l = ci+cj;
02224                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02225                                 sub_prob.y = Malloc(double,sub_prob.l);
02226                                 int k;
02227                                 for(k=0;k<ci;k++)
02228                                 {
02229                                         sub_prob.x[k] = x[si+k];
02230                                         sub_prob.y[k] = +1;
02231                                 }
02232                                 for(k=0;k<cj;k++)
02233                                 {
02234                                         sub_prob.x[ci+k] = x[sj+k];
02235                                         sub_prob.y[ci+k] = -1;
02236                                 }
02237 
02238                                 if(param->probability)
02239                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02240 
02241                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02242                                 for(k=0;k<ci;k++)
02243                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02244                                                 nonzero[si+k] = true;
02245                                 for(k=0;k<cj;k++)
02246                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02247                                                 nonzero[sj+k] = true;
02248                                 free(sub_prob.x);
02249                                 free(sub_prob.y);
02250                                 ++p;
02251                         }
02252 
02253                 // build output
02254 
02255                 model->nr_class = nr_class;
02256 
02257                 model->label = Malloc(int,nr_class);
02258                 for(i=0;i<nr_class;i++)
02259                         model->label[i] = label[i];
02260 
02261                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02262                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02263                         model->rho[i] = f[i].rho;
02264 
02265                 if(param->probability)
02266                 {
02267                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02268                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02269                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02270                         {
02271                                 model->probA[i] = probA[i];
02272                                 model->probB[i] = probB[i];
02273                         }
02274                 }
02275                 else
02276                 {
02277                         model->probA=NULL;
02278                         model->probB=NULL;
02279                 }
02280 
02281                 int total_sv = 0;
02282                 int *nz_count = Malloc(int,nr_class);
02283                 model->nSV = Malloc(int,nr_class);
02284                 for(i=0;i<nr_class;i++)
02285                 {
02286                         int nSV = 0;
02287                         for(int j=0;j<count[i];j++)
02288                                 if(nonzero[start[i]+j])
02289                                 {
02290                                         ++nSV;
02291                                         ++total_sv;
02292                                 }
02293                         model->nSV[i] = nSV;
02294                         nz_count[i] = nSV;
02295                 }
02296 
02297                 info("Total nSV = %d\n",total_sv);
02298 
02299                 model->l = total_sv;
02300                 model->SV = Malloc(svm_node *,total_sv);
02301                 p = 0;
02302                 for(i=0;i<l;i++)
02303                         if(nonzero[i]) model->SV[p++] = x[i];
02304 
02305                 int *nz_start = Malloc(int,nr_class);
02306                 nz_start[0] = 0;
02307                 for(i=1;i<nr_class;i++)
02308                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02309 
02310                 model->sv_coef = Malloc(double *,nr_class-1);
02311                 for(i=0;i<nr_class-1;i++)
02312                         model->sv_coef[i] = Malloc(double,total_sv);
02313 
02314                 p = 0;
02315                 for(i=0;i<nr_class;i++)
02316                         for(int j=i+1;j<nr_class;j++)
02317                         {
02318                                 // classifier (i,j): coefficients with
02319                                 // i are in sv_coef[j-1][nz_start[i]...],
02320                                 // j are in sv_coef[i][nz_start[j]...]
02321 
02322                                 int si = start[i];
02323                                 int sj = start[j];
02324                                 int ci = count[i];
02325                                 int cj = count[j];
02326 
02327                                 int q = nz_start[i];
02328                                 int k;
02329                                 for(k=0;k<ci;k++)
02330                                         if(nonzero[si+k])
02331                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02332                                 q = nz_start[j];
02333                                 for(k=0;k<cj;k++)
02334                                         if(nonzero[sj+k])
02335                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02336                                 ++p;
02337                         }
02338 
02339                 free(label);
02340                 free(probA);
02341                 free(probB);
02342                 free(count);
02343                 free(perm);
02344                 free(start);
02345                 free(x);
02346                 free(weighted_C);
02347                 free(nonzero);
02348                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02349                         free(f[i].alpha);
02350                 free(f);
02351                 free(nz_count);
02352                 free(nz_start);
02353         }
02354         return model;
02355 }
02356 
02357 // Stratified cross validation
02358 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02359 {
02360         int i;
02361         int *fold_start = Malloc(int,nr_fold+1);
02362         int l = prob->l;
02363         int *perm = Malloc(int,l);
02364         int nr_class;
02365 
02366         // stratified cv may not give leave-one-out rate
02367         // Each class to l folds -> some folds may have zero elements
02368         if((param->svm_type == C_SVC ||
02369             param->svm_type == NU_SVC) && nr_fold < l)
02370         {
02371                 int *start = NULL;
02372                 int *label = NULL;
02373                 int *count = NULL;
02374                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02375 
02376                 // random shuffle and then data grouped by fold using the array perm
02377                 int *fold_count = Malloc(int,nr_fold);
02378                 int c;
02379                 int *index = Malloc(int,l);
02380                 for(i=0;i<l;i++)
02381                         index[i]=perm[i];
02382                 for (c=0; c<nr_class; c++)
02383                         for(i=0;i<count[c];i++)
02384                         {
02385                                 int j = i+rand()%(count[c]-i);
02386                                 swap(index[start[c]+j],index[start[c]+i]);
02387                         }
02388                 for(i=0;i<nr_fold;i++)
02389                 {
02390                         fold_count[i] = 0;
02391                         for (c=0; c<nr_class;c++)
02392                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02393                 }
02394                 fold_start[0]=0;
02395                 for (i=1;i<=nr_fold;i++)
02396                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02397                 for (c=0; c<nr_class;c++)
02398                         for(i=0;i<nr_fold;i++)
02399                         {
02400                                 int begin = start[c]+i*count[c]/nr_fold;
02401                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02402                                 for(int j=begin;j<end;j++)
02403                                 {
02404                                         perm[fold_start[i]] = index[j];
02405                                         fold_start[i]++;
02406                                 }
02407                         }
02408                 fold_start[0]=0;
02409                 for (i=1;i<=nr_fold;i++)
02410                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02411                 free(start);
02412                 free(label);
02413                 free(count);
02414                 free(index);
02415                 free(fold_count);
02416         }
02417         else
02418         {
02419                 for(i=0;i<l;i++) perm[i]=i;
02420                 for(i=0;i<l;i++)
02421                 {
02422                         int j = i+rand()%(l-i);
02423                         swap(perm[i],perm[j]);
02424                 }
02425                 for(i=0;i<=nr_fold;i++)
02426                         fold_start[i]=i*l/nr_fold;
02427         }
02428 
02429         for(i=0;i<nr_fold;i++)
02430         {
02431                 int begin = fold_start[i];
02432                 int end = fold_start[i+1];
02433                 int j,k;
02434                 struct svm_problem subprob;
02435 
02436                 subprob.l = l-(end-begin);
02437                 subprob.x = Malloc(struct svm_node*,subprob.l);
02438                 subprob.y = Malloc(double,subprob.l);
02439 
02440                 k=0;
02441                 for(j=0;j<begin;j++)
02442                 {
02443                         subprob.x[k] = prob->x[perm[j]];
02444                         subprob.y[k] = prob->y[perm[j]];
02445                         ++k;
02446                 }
02447                 for(j=end;j<l;j++)
02448                 {
02449                         subprob.x[k] = prob->x[perm[j]];
02450                         subprob.y[k] = prob->y[perm[j]];
02451                         ++k;
02452                 }
02453                 struct svm_model *submodel = svm_train(&subprob,param);
02454                 if(param->probability &&
02455                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02456                 {
02457                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02458                         for(j=begin;j<end;j++)
02459                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02460                         free(prob_estimates);
02461                 }
02462                 else
02463                         for(j=begin;j<end;j++)
02464                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02465                 svm_destroy_model(submodel);
02466                 free(subprob.x);
02467                 free(subprob.y);
02468         }
02469         free(fold_start);
02470         free(perm);
02471 }
02472 
02473 
02474 int svm_get_svm_type(const svm_model *model)
02475 {
02476         return model->param.svm_type;
02477 }
02478 
02479 int svm_get_nr_class(const svm_model *model)
02480 {
02481         return model->nr_class;
02482 }
02483 
02484 void svm_get_labels(const svm_model *model, int* label)
02485 {
02486         if (model->label != NULL)
02487                 for(int i=0;i<model->nr_class;i++)
02488                         label[i] = model->label[i];
02489 }
02490 
02491 double svm_get_svr_probability(const svm_model *model)
02492 {
02493         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02494             model->probA!=NULL)
02495                 return model->probA[0];
02496         else
02497         {
02498                 info("Model doesn't contain information for SVR probability inference\n");
02499                 return 0;
02500         }
02501 }
02502 
02503 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02504 {
02505         if(model->param.svm_type == ONE_CLASS ||
02506            model->param.svm_type == EPSILON_SVR ||
02507            model->param.svm_type == NU_SVR)
02508         {
02509                 double *sv_coef = model->sv_coef[0];
02510                 double sum = 0;
02511                 for(int i=0;i<model->l;i++)
02512                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02513                 sum -= model->rho[0];
02514                 *dec_values = sum;
02515         }
02516         else
02517         {
02518                 int i;
02519                 int nr_class = model->nr_class;
02520                 int l = model->l;
02521 
02522                 double *kvalue = Malloc(double,l);
02523                 for(i=0;i<l;i++)
02524                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02525 
02526                 int *start = Malloc(int,nr_class);
02527                 start[0] = 0;
02528                 for(i=1;i<nr_class;i++)
02529                         start[i] = start[i-1]+model->nSV[i-1];
02530 
02531                 int p=0;
02532                 for(i=0;i<nr_class;i++)
02533                         for(int j=i+1;j<nr_class;j++)
02534                         {
02535                                 double sum = 0;
02536                                 int si = start[i];
02537                                 int sj = start[j];
02538                                 int ci = model->nSV[i];
02539                                 int cj = model->nSV[j];
02540 
02541                                 int k;
02542                                 double *coef1 = model->sv_coef[j-1];
02543                                 double *coef2 = model->sv_coef[i];
02544                                 for(k=0;k<ci;k++)
02545                                         sum += coef1[si+k] * kvalue[si+k];
02546                                 for(k=0;k<cj;k++)
02547                                         sum += coef2[sj+k] * kvalue[sj+k];
02548                                 sum -= model->rho[p];
02549                                 dec_values[p] = sum;
02550                                 p++;
02551                         }
02552 
02553                 free(kvalue);
02554                 free(start);
02555         }
02556 }
02557 
02558 double svm_predict(const svm_model *model, const svm_node *x)
02559 {
02560         if(model->param.svm_type == ONE_CLASS ||
02561            model->param.svm_type == EPSILON_SVR ||
02562            model->param.svm_type == NU_SVR)
02563         {
02564                 double res;
02565                 svm_predict_values(model, x, &res);
02566 
02567                 if(model->param.svm_type == ONE_CLASS)
02568                         return (res>0)?1:-1;
02569                 else
02570                         return res;
02571         }
02572         else
02573         {
02574                 int i;
02575                 int nr_class = model->nr_class;
02576                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02577                 svm_predict_values(model, x, dec_values);
02578 
02579                 int *vote = Malloc(int,nr_class);
02580                 for(i=0;i<nr_class;i++)
02581                         vote[i] = 0;
02582                 int pos=0;
02583                 for(i=0;i<nr_class;i++)
02584                         for(int j=i+1;j<nr_class;j++)
02585                         {
02586                                 if(dec_values[pos++] > 0)
02587                                         ++vote[i];
02588                                 else
02589                                         ++vote[j];
02590                         }
02591 
02592                 int vote_max_idx = 0;
02593                 for(i=1;i<nr_class;i++)
02594                         if(vote[i] > vote[vote_max_idx])
02595                                 vote_max_idx = i;
02596                 free(vote);
02597                 free(dec_values);
02598                 return model->label[vote_max_idx];
02599         }
02600 }
02601 
02602 double svm_predict_probability(
02603         const svm_model *model, const svm_node *x, double *prob_estimates)
02604 {
02605         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02606             model->probA!=NULL && model->probB!=NULL)
02607         {
02608                 int i;
02609                 int nr_class = model->nr_class;
02610                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02611                 svm_predict_values(model, x, dec_values);
02612 
02613                 double min_prob=1e-7;
02614                 double **pairwise_prob=Malloc(double *,nr_class);
02615                 for(i=0;i<nr_class;i++)
02616                         pairwise_prob[i]=Malloc(double,nr_class);
02617                 int k=0;
02618                 for(i=0;i<nr_class;i++)
02619                         for(int j=i+1;j<nr_class;j++)
02620                         {
02621                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02622                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02623                                 k++;
02624                         }
02625                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02626 
02627                 int prob_max_idx = 0;
02628                 for(i=1;i<nr_class;i++)
02629                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02630                                 prob_max_idx = i;
02631                 for(i=0;i<nr_class;i++)
02632                         free(pairwise_prob[i]);
02633                 free(dec_values);
02634                 free(pairwise_prob);
02635                 return model->label[prob_max_idx];
02636         }
02637         else
02638                 return svm_predict(model, x);
02639 }
02640 
02641 const char *svm_type_table[] =
02642 {
02643         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02644 };
02645 
02646 const char *kernel_type_table[]=
02647 {
02648         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02649 };
02650 
02651 void ignore_retval_gcc46(int ret) { /* keep gcc46 happy */ }
02652 
02653 int svm_save_model(const char *model_file_name, const svm_model *model)
02654 {
02655         FILE *fp = fopen(model_file_name,"w");
02656         if(fp==NULL) return -1;
02657 
02658         const svm_parameter& param = model->param;
02659 
02660         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02661         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02662 
02663         if(param.kernel_type == POLY)
02664                 fprintf(fp,"degree %d\n", param.degree);
02665 
02666         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02667                 fprintf(fp,"gamma %g\n", param.gamma);
02668 
02669         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02670                 fprintf(fp,"coef0 %g\n", param.coef0);
02671 
02672         int nr_class = model->nr_class;
02673         int l = model->l;
02674         fprintf(fp, "nr_class %d\n", nr_class);
02675         fprintf(fp, "total_sv %d\n",l);
02676 
02677         {
02678                 fprintf(fp, "rho");
02679                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02680                         fprintf(fp," %g",model->rho[i]);
02681                 fprintf(fp, "\n");
02682         }
02683 
02684         if(model->label)
02685         {
02686                 fprintf(fp, "label");
02687                 for(int i=0;i<nr_class;i++)
02688                         fprintf(fp," %d",model->label[i]);
02689                 fprintf(fp, "\n");
02690         }
02691 
02692         if(model->probA) // regression has probA only
02693         {
02694                 fprintf(fp, "probA");
02695                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02696                         fprintf(fp," %g",model->probA[i]);
02697                 fprintf(fp, "\n");
02698         }
02699         if(model->probB)
02700         {
02701                 fprintf(fp, "probB");
02702                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02703                         fprintf(fp," %g",model->probB[i]);
02704                 fprintf(fp, "\n");
02705         }
02706 
02707         if(model->nSV)
02708         {
02709                 fprintf(fp, "nr_sv");
02710                 for(int i=0;i<nr_class;i++)
02711                         fprintf(fp," %d",model->nSV[i]);
02712                 fprintf(fp, "\n");
02713         }
02714 
02715         fprintf(fp, "SV\n");
02716         const double * const *sv_coef = model->sv_coef;
02717         const svm_node * const *SV = model->SV;
02718 
02719         for(int i=0;i<l;i++)
02720         {
02721                 for(int j=0;j<nr_class-1;j++)
02722                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02723 
02724                 const svm_node *p = SV[i];
02725 
02726                 if(param.kernel_type == PRECOMPUTED)
02727                         fprintf(fp,"0:%d ",(int)(p->value));
02728                 else
02729                         while(p->index != -1)
02730                         {
02731                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02732                                 p++;
02733                         }
02734                 fprintf(fp, "\n");
02735         }
02736         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02737         else return 0;
02738 }
02739 
02740 svm_model *svm_load_model(const char *model_file_name)
02741 {
02742         FILE *fp = fopen(model_file_name,"r");
02743         if(fp==NULL) return NULL;
02744 
02745         // read parameters
02746 
02747         svm_model *model = Malloc(svm_model,1);
02748         svm_parameter& param = model->param;
02749         model->rho = NULL;
02750         model->probA = NULL;
02751         model->probB = NULL;
02752         model->label = NULL;
02753         model->nSV = NULL;
02754 
02755         char cmd[81];
02756         int ret;
02757         while(1)
02758         {
02759                 ret = fscanf(fp,"%80s",cmd);
02760 
02761                 if(strcmp(cmd,"svm_type")==0)
02762                 {
02763                         ret = fscanf(fp,"%80s",cmd);
02764                         int i;
02765                         for(i=0;svm_type_table[i];i++)
02766                         {
02767                                 if(strcmp(svm_type_table[i],cmd)==0)
02768                                 {
02769                                         param.svm_type=i;
02770                                         break;
02771                                 }
02772                         }
02773                         if(svm_type_table[i] == NULL)
02774                         {
02775                                 fprintf(stderr,"unknown svm type.\n");
02776                                 free(model->rho);
02777                                 free(model->label);
02778                                 free(model->nSV);
02779                                 free(model);
02780                                 return NULL;
02781                         }
02782                 }
02783                 else if(strcmp(cmd,"kernel_type")==0)
02784                 {
02785                         ret = fscanf(fp,"%80s",cmd);
02786                         int i;
02787                         for(i=0;kernel_type_table[i];i++)
02788                         {
02789                                 if(strcmp(kernel_type_table[i],cmd)==0)
02790                                 {
02791                                         param.kernel_type=i;
02792                                         break;
02793                                 }
02794                         }
02795                         if(kernel_type_table[i] == NULL)
02796                         {
02797                                 fprintf(stderr,"unknown kernel function.\n");
02798                                 free(model->rho);
02799                                 free(model->label);
02800                                 free(model->nSV);
02801                                 free(model);
02802                                 return NULL;
02803                         }
02804                 }
02805                 else if(strcmp(cmd,"degree")==0)
02806                         ret = fscanf(fp,"%d",&param.degree);
02807                 else if(strcmp(cmd,"gamma")==0)
02808                         ret = fscanf(fp,"%lf",&param.gamma);
02809                 else if(strcmp(cmd,"coef0")==0)
02810                         ret = fscanf(fp,"%lf",&param.coef0);
02811                 else if(strcmp(cmd,"nr_class")==0)
02812                         ret = fscanf(fp,"%d",&model->nr_class);
02813                 else if(strcmp(cmd,"total_sv")==0)
02814                         ret = fscanf(fp,"%d",&model->l);
02815                 else if(strcmp(cmd,"rho")==0)
02816                 {
02817                         int n = model->nr_class * (model->nr_class-1)/2;
02818                         model->rho = Malloc(double,n);
02819                         for(int i=0;i<n;i++)
02820                                 ret = fscanf(fp,"%lf",&model->rho[i]);
02821                 }
02822                 else if(strcmp(cmd,"label")==0)
02823                 {
02824                         int n = model->nr_class;
02825                         model->label = Malloc(int,n);
02826                         for(int i=0;i<n;i++)
02827                                 ret = fscanf(fp,"%d",&model->label[i]);
02828                 }
02829                 else if(strcmp(cmd,"probA")==0)
02830                 {
02831                         int n = model->nr_class * (model->nr_class-1)/2;
02832                         model->probA = Malloc(double,n);
02833                         for(int i=0;i<n;i++)
02834                                 ret = fscanf(fp,"%lf",&model->probA[i]);
02835                 }
02836                 else if(strcmp(cmd,"probB")==0)
02837                 {
02838                         int n = model->nr_class * (model->nr_class-1)/2;
02839                         model->probB = Malloc(double,n);
02840                         for(int i=0;i<n;i++)
02841                                 ret = fscanf(fp,"%lf",&model->probB[i]);
02842                 }
02843                 else if(strcmp(cmd,"nr_sv")==0)
02844                 {
02845                         int n = model->nr_class;
02846                         model->nSV = Malloc(int,n);
02847                         for(int i=0;i<n;i++)
02848                                 ret = fscanf(fp,"%d",&model->nSV[i]);
02849                 }
02850                 else if(strcmp(cmd,"SV")==0)
02851                 {
02852                         while(1)
02853                         {
02854                                 int c = getc(fp);
02855                                 if(c==EOF || c=='\n') break;
02856                         }
02857                         break;
02858                 }
02859                 else
02860                 {
02861                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02862                         free(model->rho);
02863                         free(model->label);
02864                         free(model->nSV);
02865                         free(model);
02866                         return NULL;
02867                 }
02868         }
02869 
02870         // read sv_coef and SV
02871 
02872         int elements = 0;
02873         long pos = ftell(fp);
02874 
02875         while(1)
02876         {
02877                 int c = fgetc(fp);
02878                 switch(c)
02879                 {
02880                         case '\n':
02881                                 // count the '-1' element
02882                         case ':':
02883                                 ++elements;
02884                                 break;
02885                         case EOF:
02886                                 goto out;
02887                         default:
02888                                 ;
02889                 }
02890         }
02891 out:
02892         fseek(fp,pos,SEEK_SET);
02893 
02894         int m = model->nr_class - 1;
02895         int l = model->l;
02896         model->sv_coef = Malloc(double *,m);
02897         int i;
02898         for(i=0;i<m;i++)
02899                 model->sv_coef[i] = Malloc(double,l);
02900         model->SV = Malloc(svm_node*,l);
02901         svm_node *x_space=NULL;
02902         if(l>0) x_space = Malloc(svm_node,elements);
02903 
02904         int j=0;
02905         for(i=0;i<l;i++)
02906         {
02907                 model->SV[i] = &x_space[j];
02908                 for(int k=0;k<m;k++)
02909                         ret = fscanf(fp,"%lf",&model->sv_coef[k][i]);
02910                 while(1)
02911                 {
02912                         int c;
02913                         do {
02914                                 c = getc(fp);
02915                                 if(c=='\n') goto out2;
02916                         } while(isspace(c));
02917                         ungetc(c,fp);
02918                         ret = fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
02919                         ++j;
02920                 }
02921 out2:
02922                 x_space[j++].index = -1;
02923         }
02924         if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
02925 
02926         model->free_sv = 1;        // XXX
02927 
02928         ignore_retval_gcc46(ret);
02929 
02930         return model;
02931 }
02932 
02933 void svm_destroy_model(svm_model* model)
02934 {
02935         if(model->free_sv && model->l > 0)
02936                 free((void *)(model->SV[0]));
02937         for(int i=0;i<model->nr_class-1;i++)
02938                 free(model->sv_coef[i]);
02939         free(model->SV);
02940         free(model->sv_coef);
02941         free(model->rho);
02942         free(model->label);
02943         free(model->probA);
02944         free(model->probB);
02945         free(model->nSV);
02946         free(model);
02947 }
02948 
02949 void svm_destroy_param(svm_parameter* param)
02950 {
02951         free(param->weight_label);
02952         free(param->weight);
02953 }
02954 
02955 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02956 {
02957         // svm_type
02958 
02959         int svm_type = param->svm_type;
02960         if(svm_type != C_SVC &&
02961            svm_type != NU_SVC &&
02962            svm_type != ONE_CLASS &&
02963            svm_type != EPSILON_SVR &&
02964            svm_type != NU_SVR)
02965                 return "unknown svm type";
02966 
02967         // kernel_type, degree
02968 
02969         int kernel_type = param->kernel_type;
02970         if(kernel_type != LINEAR &&
02971            kernel_type != POLY &&
02972            kernel_type != RBF &&
02973            kernel_type != SIGMOID &&
02974            kernel_type != PRECOMPUTED)
02975                 return "unknown kernel type";
02976 
02977         if(param->degree < 0)
02978                 return "degree of polynomial kernel < 0";
02979 
02980         // cache_size,eps,C,nu,p,shrinking
02981 
02982         if(param->cache_size <= 0)
02983                 return "cache_size <= 0";
02984 
02985         if(param->eps <= 0)
02986                 return "eps <= 0";
02987 
02988         if(svm_type == C_SVC ||
02989            svm_type == EPSILON_SVR ||
02990            svm_type == NU_SVR)
02991                 if(param->C <= 0)
02992                         return "C <= 0";
02993 
02994         if(svm_type == NU_SVC ||
02995            svm_type == ONE_CLASS ||
02996            svm_type == NU_SVR)
02997                 if(param->nu <= 0 || param->nu > 1)
02998                         return "nu <= 0 or nu > 1";
02999 
03000         if(svm_type == EPSILON_SVR)
03001                 if(param->p < 0)
03002                         return "p < 0";
03003 
03004         if(param->shrinking != 0 &&
03005            param->shrinking != 1)
03006                 return "shrinking != 0 and shrinking != 1";
03007 
03008         if(param->probability != 0 &&
03009            param->probability != 1)
03010                 return "probability != 0 and probability != 1";
03011 
03012         if(param->probability == 1 &&
03013            svm_type == ONE_CLASS)
03014                 return "one-class SVM probability output not supported yet";
03015 
03016 
03017         // check whether nu-svc is feasible
03018 
03019         if(svm_type == NU_SVC)
03020         {
03021                 int l = prob->l;
03022                 int max_nr_class = 16;
03023                 int nr_class = 0;
03024                 int *label = Malloc(int,max_nr_class);
03025                 int *count = Malloc(int,max_nr_class);
03026 
03027                 int i;
03028                 for(i=0;i<l;i++)
03029                 {
03030                         int this_label = (int)prob->y[i];
03031                         int j;
03032                         for(j=0;j<nr_class;j++)
03033                                 if(this_label == label[j])
03034                                 {
03035                                         ++count[j];
03036                                         break;
03037                                 }
03038                         if(j == nr_class)
03039                         {
03040                                 if(nr_class == max_nr_class)
03041                                 {
03042                                         max_nr_class *= 2;
03043                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03044                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03045                                 }
03046                                 label[nr_class] = this_label;
03047                                 count[nr_class] = 1;
03048                                 ++nr_class;
03049                         }
03050                 }
03051 
03052                 for(i=0;i<nr_class;i++)
03053                 {
03054                         int n1 = count[i];
03055                         for(int j=i+1;j<nr_class;j++)
03056                         {
03057                                 int n2 = count[j];
03058                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03059                                 {
03060                                         free(label);
03061                                         free(count);
03062                                         return "specified nu is infeasible";
03063                                 }
03064                         }
03065                 }
03066                 free(label);
03067                 free(count);
03068         }
03069 
03070         return NULL;
03071 }
03072 
03073 int svm_check_probability_model(const svm_model *model)
03074 {
03075         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03076                 model->probA!=NULL && model->probB!=NULL) ||
03077                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03078                  model->probA!=NULL);
03079 }