00001 /* 00002 Copyright (c) 2000-2007 Chih-Chung Chang and Chih-Jen Lin 00003 All rights reserved. 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions 00007 are met: 00008 00009 1. Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 2. Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 3. Neither name of copyright holders nor the names of its contributors 00017 may be used to endorse or promote products derived from this software 00018 without specific prior written permission. 00019 00020 00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 00034 #include <math.h> 00035 #include <stdio.h> 00036 #include <stdlib.h> 00037 #include <ctype.h> 00038 #include <float.h> 00039 #include <string.h> 00040 #include <stdarg.h> 00041 #include "svm.h" 00042 typedef float Qfloat; 00043 typedef signed char schar; 00044 #ifndef min 00045 template <class T> inline T min(T x,T y) { return (x<y)?x:y; } 00046 #endif 00047 #ifndef max 00048 template <class T> inline T max(T x,T y) { return (x>y)?x:y; } 00049 #endif 00050 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; } 00051 template <class S, class T> inline void clone(T*& dst, S* src, int n) 00052 { 00053 dst = new T[n]; 00054 memcpy((void *)dst,(void *)src,sizeof(T)*n); 00055 } 00056 inline double powi(double base, int times) 00057 { 00058 double tmp = base, ret = 1.0; 00059 00060 for(int t=times; t>0; t/=2) 00061 { 00062 if(t%2==1) ret*=tmp; 00063 tmp = tmp * tmp; 00064 } 00065 return ret; 00066 } 00067 #define INF HUGE_VAL 00068 #define TAU 1e-12 00069 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 00070 #if 1 00071 void info(const char *fmt,...) 00072 { 00073 va_list ap; 00074 va_start(ap,fmt); 00075 vprintf(fmt,ap); 00076 va_end(ap); 00077 } 00078 void info_flush() 00079 { 00080 fflush(stdout); 00081 } 00082 #else 00083 void info(char *fmt,...) {} 00084 void info_flush() {} 00085 #endif 00086 00087 // 00088 // Kernel Cache 00089 // 00090 // l is the number of total data items 00091 // size is the cache size limit in bytes 00092 // 00093 class Cache 00094 { 00095 public: 00096 Cache(int l,long int size); 00097 ~Cache(); 00098 00099 // request data [0,len) 00100 // return some position p where [p,len) need to be filled 00101 // (p >= len if nothing needs to be filled) 00102 int get_data(const int index, Qfloat **data, int len); 00103 void swap_index(int i, int j); // future_option 00104 private: 00105 int l; 00106 long int size; 00107 struct head_t 00108 { 00109 head_t *prev, *next; // a cicular list 00110 Qfloat *data; 00111 int len; // data[0,len) is cached in this entry 00112 }; 00113 00114 head_t *head; 00115 head_t lru_head; 00116 void lru_delete(head_t *h); 00117 void lru_insert(head_t *h); 00118 }; 00119 00120 Cache::Cache(int l_,long int size_):l(l_),size(size_) 00121 { 00122 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 00123 size /= sizeof(Qfloat); 00124 size -= l * sizeof(head_t) / sizeof(Qfloat); 00125 size = max(size, 2 * (long int) l); // cache must be large enough for two columns 00126 lru_head.next = lru_head.prev = &lru_head; 00127 } 00128 00129 Cache::~Cache() 00130 { 00131 for(head_t *h = lru_head.next; h != &lru_head; h=h->next) 00132 free(h->data); 00133 free(head); 00134 } 00135 00136 void Cache::lru_delete(head_t *h) 00137 { 00138 // delete from current location 00139 h->prev->next = h->next; 00140 h->next->prev = h->prev; 00141 } 00142 00143 void Cache::lru_insert(head_t *h) 00144 { 00145 // insert to last position 00146 h->next = &lru_head; 00147 h->prev = lru_head.prev; 00148 h->prev->next = h; 00149 h->next->prev = h; 00150 } 00151 00152 int Cache::get_data(const int index, Qfloat **data, int len) 00153 { 00154 head_t *h = &head[index]; 00155 if(h->len) lru_delete(h); 00156 int more = len - h->len; 00157 00158 if(more > 0) 00159 { 00160 // free old space 00161 while(size < more) 00162 { 00163 head_t *old = lru_head.next; 00164 lru_delete(old); 00165 free(old->data); 00166 size += old->len; 00167 old->data = 0; 00168 old->len = 0; 00169 } 00170 00171 // allocate new space 00172 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); 00173 size -= more; 00174 swap(h->len,len); 00175 } 00176 00177 lru_insert(h); 00178 *data = h->data; 00179 return len; 00180 } 00181 00182 void Cache::swap_index(int i, int j) 00183 { 00184 if(i==j) return; 00185 00186 if(head[i].len) lru_delete(&head[i]); 00187 if(head[j].len) lru_delete(&head[j]); 00188 swap(head[i].data,head[j].data); 00189 swap(head[i].len,head[j].len); 00190 if(head[i].len) lru_insert(&head[i]); 00191 if(head[j].len) lru_insert(&head[j]); 00192 00193 if(i>j) swap(i,j); 00194 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) 00195 { 00196 if(h->len > i) 00197 { 00198 if(h->len > j) 00199 swap(h->data[i],h->data[j]); 00200 else 00201 { 00202 // give up 00203 lru_delete(h); 00204 free(h->data); 00205 size += h->len; 00206 h->data = 0; 00207 h->len = 0; 00208 } 00209 } 00210 } 00211 } 00212 00213 // 00214 // Kernel evaluation 00215 // 00216 // the static method k_function is for doing single kernel evaluation 00217 // the constructor of Kernel prepares to calculate the l*l kernel matrix 00218 // the member function get_Q is for getting one column from the Q Matrix 00219 // 00220 class QMatrix { 00221 public: 00222 virtual Qfloat *get_Q(int column, int len) const = 0; 00223 virtual Qfloat *get_QD() const = 0; 00224 virtual void swap_index(int i, int j) const = 0; 00225 virtual ~QMatrix() {} 00226 }; 00227 00228 class Kernel: public QMatrix { 00229 public: 00230 Kernel(int l, svm_node * const * x, const svm_parameter& param); 00231 virtual ~Kernel(); 00232 00233 static double k_function(const svm_node *x, const svm_node *y, 00234 const svm_parameter& param); 00235 virtual Qfloat *get_Q(int column, int len) const = 0; 00236 virtual Qfloat *get_QD() const = 0; 00237 virtual void swap_index(int i, int j) const // no so const... 00238 { 00239 swap(x[i],x[j]); 00240 if(x_square) swap(x_square[i],x_square[j]); 00241 } 00242 protected: 00243 00244 double (Kernel::*kernel_function)(int i, int j) const; 00245 00246 private: 00247 const svm_node **x; 00248 double *x_square; 00249 00250 // svm_parameter 00251 const int kernel_type; 00252 const int degree; 00253 const double gamma; 00254 const double coef0; 00255 00256 static double dot(const svm_node *px, const svm_node *py); 00257 double kernel_linear(int i, int j) const 00258 { 00259 return dot(x[i],x[j]); 00260 } 00261 double kernel_poly(int i, int j) const 00262 { 00263 return powi(gamma*dot(x[i],x[j])+coef0,degree); 00264 } 00265 double kernel_rbf(int i, int j) const 00266 { 00267 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); 00268 } 00269 double kernel_sigmoid(int i, int j) const 00270 { 00271 return tanh(gamma*dot(x[i],x[j])+coef0); 00272 } 00273 double kernel_precomputed(int i, int j) const 00274 { 00275 return x[i][(int)(x[j][0].value)].value; 00276 } 00277 }; 00278 00279 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) 00280 :kernel_type(param.kernel_type), degree(param.degree), 00281 gamma(param.gamma), coef0(param.coef0) 00282 { 00283 switch(kernel_type) 00284 { 00285 case LINEAR: 00286 kernel_function = &Kernel::kernel_linear; 00287 break; 00288 case POLY: 00289 kernel_function = &Kernel::kernel_poly; 00290 break; 00291 case RBF: 00292 kernel_function = &Kernel::kernel_rbf; 00293 break; 00294 case SIGMOID: 00295 kernel_function = &Kernel::kernel_sigmoid; 00296 break; 00297 case PRECOMPUTED: 00298 kernel_function = &Kernel::kernel_precomputed; 00299 break; 00300 } 00301 00302 clone(x,x_,l); 00303 00304 if(kernel_type == RBF) 00305 { 00306 x_square = new double[l]; 00307 for(int i=0;i<l;i++) 00308 x_square[i] = dot(x[i],x[i]); 00309 } 00310 else 00311 x_square = 0; 00312 } 00313 00314 Kernel::~Kernel() 00315 { 00316 delete[] x; 00317 delete[] x_square; 00318 } 00319 00320 double Kernel::dot(const svm_node *px, const svm_node *py) 00321 { 00322 double sum = 0; 00323 while(px->index != -1 && py->index != -1) 00324 { 00325 if(px->index == py->index) 00326 { 00327 sum += px->value * py->value; 00328 ++px; 00329 ++py; 00330 } 00331 else 00332 { 00333 if(px->index > py->index) 00334 ++py; 00335 else 00336 ++px; 00337 } 00338 } 00339 return sum; 00340 } 00341 00342 double Kernel::k_function(const svm_node *x, const svm_node *y, 00343 const svm_parameter& param) 00344 { 00345 switch(param.kernel_type) 00346 { 00347 case LINEAR: 00348 return dot(x,y); 00349 case POLY: 00350 return powi(param.gamma*dot(x,y)+param.coef0,param.degree); 00351 case RBF: 00352 { 00353 double sum = 0; 00354 while(x->index != -1 && y->index !=-1) 00355 { 00356 if(x->index == y->index) 00357 { 00358 double d = x->value - y->value; 00359 sum += d*d; 00360 ++x; 00361 ++y; 00362 } 00363 else 00364 { 00365 if(x->index > y->index) 00366 { 00367 sum += y->value * y->value; 00368 ++y; 00369 } 00370 else 00371 { 00372 sum += x->value * x->value; 00373 ++x; 00374 } 00375 } 00376 } 00377 00378 while(x->index != -1) 00379 { 00380 sum += x->value * x->value; 00381 ++x; 00382 } 00383 00384 while(y->index != -1) 00385 { 00386 sum += y->value * y->value; 00387 ++y; 00388 } 00389 00390 return exp(-param.gamma*sum); 00391 } 00392 case SIGMOID: 00393 return tanh(param.gamma*dot(x,y)+param.coef0); 00394 case PRECOMPUTED: //x: test (validation), y: SV 00395 return x[(int)(y->value)].value; 00396 default: 00397 return 0; // Unreachable 00398 } 00399 } 00400 00401 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918 00402 // Solves: 00403 // 00404 // min 0.5(\alpha^T Q \alpha) + p^T \alpha 00405 // 00406 // y^T \alpha = \delta 00407 // y_i = +1 or -1 00408 // 0 <= alpha_i <= Cp for y_i = 1 00409 // 0 <= alpha_i <= Cn for y_i = -1 00410 // 00411 // Given: 00412 // 00413 // Q, p, y, Cp, Cn, and an initial feasible point \alpha 00414 // l is the size of vectors and matrices 00415 // eps is the stopping tolerance 00416 // 00417 // solution will be put in \alpha, objective value will be put in obj 00418 // 00419 class Solver { 00420 public: 00421 Solver() {}; 00422 virtual ~Solver() {}; 00423 00424 struct SolutionInfo { 00425 double obj; 00426 double rho; 00427 double upper_bound_p; 00428 double upper_bound_n; 00429 double r; // for Solver_NU 00430 }; 00431 00432 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, 00433 double *alpha_, double Cp, double Cn, double eps, 00434 SolutionInfo* si, int shrinking); 00435 protected: 00436 int active_size; 00437 schar *y; 00438 double *G; // gradient of objective function 00439 enum { LOWER_BOUND, UPPER_BOUND, FREE }; 00440 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE 00441 double *alpha; 00442 const QMatrix *Q; 00443 const Qfloat *QD; 00444 double eps; 00445 double Cp,Cn; 00446 double *p; 00447 int *active_set; 00448 double *G_bar; // gradient, if we treat free variables as 0 00449 int l; 00450 bool unshrinked; // XXX 00451 00452 double get_C(int i) 00453 { 00454 return (y[i] > 0)? Cp : Cn; 00455 } 00456 void update_alpha_status(int i) 00457 { 00458 if(alpha[i] >= get_C(i)) 00459 alpha_status[i] = UPPER_BOUND; 00460 else if(alpha[i] <= 0) 00461 alpha_status[i] = LOWER_BOUND; 00462 else alpha_status[i] = FREE; 00463 } 00464 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } 00465 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } 00466 bool is_free(int i) { return alpha_status[i] == FREE; } 00467 void swap_index(int i, int j); 00468 void reconstruct_gradient(); 00469 virtual int select_working_set(int &i, int &j); 00470 virtual double calculate_rho(); 00471 virtual void do_shrinking(); 00472 private: 00473 bool be_shrunken(int i, double Gmax1, double Gmax2); 00474 }; 00475 00476 void Solver::swap_index(int i, int j) 00477 { 00478 Q->swap_index(i,j); 00479 swap(y[i],y[j]); 00480 swap(G[i],G[j]); 00481 swap(alpha_status[i],alpha_status[j]); 00482 swap(alpha[i],alpha[j]); 00483 swap(p[i],p[j]); 00484 swap(active_set[i],active_set[j]); 00485 swap(G_bar[i],G_bar[j]); 00486 } 00487 00488 void Solver::reconstruct_gradient() 00489 { 00490 // reconstruct inactive elements of G from G_bar and free variables 00491 00492 if(active_size == l) return; 00493 00494 int i; 00495 for(i=active_size;i<l;i++) 00496 G[i] = G_bar[i] + p[i]; 00497 00498 for(i=0;i<active_size;i++) 00499 if(is_free(i)) 00500 { 00501 const Qfloat *Q_i = Q->get_Q(i,l); 00502 double alpha_i = alpha[i]; 00503 for(int j=active_size;j<l;j++) 00504 G[j] += alpha_i * Q_i[j]; 00505 } 00506 } 00507 00508 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, 00509 double *alpha_, double Cp, double Cn, double eps, 00510 SolutionInfo* si, int shrinking) 00511 { 00512 this->l = l; 00513 this->Q = &Q; 00514 QD=Q.get_QD(); 00515 clone(p, p_,l); 00516 clone(y, y_,l); 00517 clone(alpha,alpha_,l); 00518 this->Cp = Cp; 00519 this->Cn = Cn; 00520 this->eps = eps; 00521 unshrinked = false; 00522 00523 // initialize alpha_status 00524 { 00525 alpha_status = new char[l]; 00526 for(int i=0;i<l;i++) 00527 update_alpha_status(i); 00528 } 00529 00530 // initialize active set (for shrinking) 00531 { 00532 active_set = new int[l]; 00533 for(int i=0;i<l;i++) 00534 active_set[i] = i; 00535 active_size = l; 00536 } 00537 00538 // initialize gradient 00539 { 00540 G = new double[l]; 00541 G_bar = new double[l]; 00542 int i; 00543 for(i=0;i<l;i++) 00544 { 00545 G[i] = p[i]; 00546 G_bar[i] = 0; 00547 } 00548 for(i=0;i<l;i++) 00549 if(!is_lower_bound(i)) 00550 { 00551 const Qfloat *Q_i = Q.get_Q(i,l); 00552 double alpha_i = alpha[i]; 00553 int j; 00554 for(j=0;j<l;j++) 00555 G[j] += alpha_i*Q_i[j]; 00556 if(is_upper_bound(i)) 00557 for(j=0;j<l;j++) 00558 G_bar[j] += get_C(i) * Q_i[j]; 00559 } 00560 } 00561 00562 // optimization step 00563 00564 int iter = 0; 00565 int counter = min(l,1000)+1; 00566 00567 while(1) 00568 { 00569 // show progress and do shrinking 00570 00571 if(--counter == 0) 00572 { 00573 counter = min(l,1000); 00574 if(shrinking) do_shrinking(); 00575 info("."); info_flush(); 00576 } 00577 00578 int i,j; 00579 if(select_working_set(i,j)!=0) 00580 { 00581 // reconstruct the whole gradient 00582 reconstruct_gradient(); 00583 // reset active set size and check 00584 active_size = l; 00585 info("*"); info_flush(); 00586 if(select_working_set(i,j)!=0) 00587 break; 00588 else 00589 counter = 1; // do shrinking next iteration 00590 } 00591 00592 ++iter; 00593 00594 // update alpha[i] and alpha[j], handle bounds carefully 00595 00596 const Qfloat *Q_i = Q.get_Q(i,active_size); 00597 const Qfloat *Q_j = Q.get_Q(j,active_size); 00598 00599 double C_i = get_C(i); 00600 double C_j = get_C(j); 00601 00602 double old_alpha_i = alpha[i]; 00603 double old_alpha_j = alpha[j]; 00604 00605 if(y[i]!=y[j]) 00606 { 00607 double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; 00608 if (quad_coef <= 0) 00609 quad_coef = TAU; 00610 double delta = (-G[i]-G[j])/quad_coef; 00611 double diff = alpha[i] - alpha[j]; 00612 alpha[i] += delta; 00613 alpha[j] += delta; 00614 00615 if(diff > 0) 00616 { 00617 if(alpha[j] < 0) 00618 { 00619 alpha[j] = 0; 00620 alpha[i] = diff; 00621 } 00622 } 00623 else 00624 { 00625 if(alpha[i] < 0) 00626 { 00627 alpha[i] = 0; 00628 alpha[j] = -diff; 00629 } 00630 } 00631 if(diff > C_i - C_j) 00632 { 00633 if(alpha[i] > C_i) 00634 { 00635 alpha[i] = C_i; 00636 alpha[j] = C_i - diff; 00637 } 00638 } 00639 else 00640 { 00641 if(alpha[j] > C_j) 00642 { 00643 alpha[j] = C_j; 00644 alpha[i] = C_j + diff; 00645 } 00646 } 00647 } 00648 else 00649 { 00650 double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; 00651 if (quad_coef <= 0) 00652 quad_coef = TAU; 00653 double delta = (G[i]-G[j])/quad_coef; 00654 double sum = alpha[i] + alpha[j]; 00655 alpha[i] -= delta; 00656 alpha[j] += delta; 00657 00658 if(sum > C_i) 00659 { 00660 if(alpha[i] > C_i) 00661 { 00662 alpha[i] = C_i; 00663 alpha[j] = sum - C_i; 00664 } 00665 } 00666 else 00667 { 00668 if(alpha[j] < 0) 00669 { 00670 alpha[j] = 0; 00671 alpha[i] = sum; 00672 } 00673 } 00674 if(sum > C_j) 00675 { 00676 if(alpha[j] > C_j) 00677 { 00678 alpha[j] = C_j; 00679 alpha[i] = sum - C_j; 00680 } 00681 } 00682 else 00683 { 00684 if(alpha[i] < 0) 00685 { 00686 alpha[i] = 0; 00687 alpha[j] = sum; 00688 } 00689 } 00690 } 00691 00692 // update G 00693 00694 double delta_alpha_i = alpha[i] - old_alpha_i; 00695 double delta_alpha_j = alpha[j] - old_alpha_j; 00696 00697 for(int k=0;k<active_size;k++) 00698 { 00699 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; 00700 } 00701 00702 // update alpha_status and G_bar 00703 00704 { 00705 bool ui = is_upper_bound(i); 00706 bool uj = is_upper_bound(j); 00707 update_alpha_status(i); 00708 update_alpha_status(j); 00709 int k; 00710 if(ui != is_upper_bound(i)) 00711 { 00712 Q_i = Q.get_Q(i,l); 00713 if(ui) 00714 for(k=0;k<l;k++) 00715 G_bar[k] -= C_i * Q_i[k]; 00716 else 00717 for(k=0;k<l;k++) 00718 G_bar[k] += C_i * Q_i[k]; 00719 } 00720 00721 if(uj != is_upper_bound(j)) 00722 { 00723 Q_j = Q.get_Q(j,l); 00724 if(uj) 00725 for(k=0;k<l;k++) 00726 G_bar[k] -= C_j * Q_j[k]; 00727 else 00728 for(k=0;k<l;k++) 00729 G_bar[k] += C_j * Q_j[k]; 00730 } 00731 } 00732 } 00733 00734 // calculate rho 00735 00736 si->rho = calculate_rho(); 00737 00738 // calculate objective value 00739 { 00740 double v = 0; 00741 int i; 00742 for(i=0;i<l;i++) 00743 v += alpha[i] * (G[i] + p[i]); 00744 00745 si->obj = v/2; 00746 } 00747 00748 // put back the solution 00749 { 00750 for(int i=0;i<l;i++) 00751 alpha_[active_set[i]] = alpha[i]; 00752 } 00753 00754 // juggle everything back 00755 /*{ 00756 for(int i=0;i<l;i++) 00757 while(active_set[i] != i) 00758 swap_index(i,active_set[i]); 00759 // or Q.swap_index(i,active_set[i]); 00760 }*/ 00761 00762 si->upper_bound_p = Cp; 00763 si->upper_bound_n = Cn; 00764 00765 info("\noptimization finished, #iter = %d\n",iter); 00766 00767 delete[] p; 00768 delete[] y; 00769 delete[] alpha; 00770 delete[] alpha_status; 00771 delete[] active_set; 00772 delete[] G; 00773 delete[] G_bar; 00774 } 00775 00776 // return 1 if already optimal, return 0 otherwise 00777 int Solver::select_working_set(int &out_i, int &out_j) 00778 { 00779 // return i,j such that 00780 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 00781 // j: minimizes the decrease of obj value 00782 // (if quadratic coefficeint <= 0, replace it with tau) 00783 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 00784 00785 double Gmax = -INF; 00786 double Gmax2 = -INF; 00787 int Gmax_idx = -1; 00788 int Gmin_idx = -1; 00789 double obj_diff_min = INF; 00790 00791 for(int t=0;t<active_size;t++) 00792 if(y[t]==+1) 00793 { 00794 if(!is_upper_bound(t)) 00795 if(-G[t] >= Gmax) 00796 { 00797 Gmax = -G[t]; 00798 Gmax_idx = t; 00799 } 00800 } 00801 else 00802 { 00803 if(!is_lower_bound(t)) 00804 if(G[t] >= Gmax) 00805 { 00806 Gmax = G[t]; 00807 Gmax_idx = t; 00808 } 00809 } 00810 00811 int i = Gmax_idx; 00812 const Qfloat *Q_i = NULL; 00813 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 00814 Q_i = Q->get_Q(i,active_size); 00815 00816 for(int j=0;j<active_size;j++) 00817 { 00818 if(y[j]==+1) 00819 { 00820 if (!is_lower_bound(j)) 00821 { 00822 double grad_diff=Gmax+G[j]; 00823 if (G[j] >= Gmax2) 00824 Gmax2 = G[j]; 00825 if (grad_diff > 0) 00826 { 00827 double obj_diff; 00828 double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j]; 00829 if (quad_coef > 0) 00830 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00831 else 00832 obj_diff = -(grad_diff*grad_diff)/TAU; 00833 00834 if (obj_diff <= obj_diff_min) 00835 { 00836 Gmin_idx=j; 00837 obj_diff_min = obj_diff; 00838 } 00839 } 00840 } 00841 } 00842 else 00843 { 00844 if (!is_upper_bound(j)) 00845 { 00846 double grad_diff= Gmax-G[j]; 00847 if (-G[j] >= Gmax2) 00848 Gmax2 = -G[j]; 00849 if (grad_diff > 0) 00850 { 00851 double obj_diff; 00852 double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j]; 00853 if (quad_coef > 0) 00854 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00855 else 00856 obj_diff = -(grad_diff*grad_diff)/TAU; 00857 00858 if (obj_diff <= obj_diff_min) 00859 { 00860 Gmin_idx=j; 00861 obj_diff_min = obj_diff; 00862 } 00863 } 00864 } 00865 } 00866 } 00867 00868 if(Gmax+Gmax2 < eps) 00869 return 1; 00870 00871 out_i = Gmax_idx; 00872 out_j = Gmin_idx; 00873 return 0; 00874 } 00875 00876 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2) 00877 { 00878 if(is_upper_bound(i)) 00879 { 00880 if(y[i]==+1) 00881 return(-G[i] > Gmax1); 00882 else 00883 return(-G[i] > Gmax2); 00884 } 00885 else if(is_lower_bound(i)) 00886 { 00887 if(y[i]==+1) 00888 return(G[i] > Gmax2); 00889 else 00890 return(G[i] > Gmax1); 00891 } 00892 else 00893 return(false); 00894 } 00895 00896 void Solver::do_shrinking() 00897 { 00898 int i; 00899 double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } 00900 double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } 00901 00902 // find maximal violating pair first 00903 for(i=0;i<active_size;i++) 00904 { 00905 if(y[i]==+1) 00906 { 00907 if(!is_upper_bound(i)) 00908 { 00909 if(-G[i] >= Gmax1) 00910 Gmax1 = -G[i]; 00911 } 00912 if(!is_lower_bound(i)) 00913 { 00914 if(G[i] >= Gmax2) 00915 Gmax2 = G[i]; 00916 } 00917 } 00918 else 00919 { 00920 if(!is_upper_bound(i)) 00921 { 00922 if(-G[i] >= Gmax2) 00923 Gmax2 = -G[i]; 00924 } 00925 if(!is_lower_bound(i)) 00926 { 00927 if(G[i] >= Gmax1) 00928 Gmax1 = G[i]; 00929 } 00930 } 00931 } 00932 00933 // shrink 00934 00935 for(i=0;i<active_size;i++) 00936 if (be_shrunken(i, Gmax1, Gmax2)) 00937 { 00938 active_size--; 00939 while (active_size > i) 00940 { 00941 if (!be_shrunken(active_size, Gmax1, Gmax2)) 00942 { 00943 swap_index(i,active_size); 00944 break; 00945 } 00946 active_size--; 00947 } 00948 } 00949 00950 // unshrink, check all variables again before final iterations 00951 00952 if(unshrinked || Gmax1 + Gmax2 > eps*10) return; 00953 00954 unshrinked = true; 00955 reconstruct_gradient(); 00956 00957 for(i=l-1;i>=active_size;i--) 00958 if (!be_shrunken(i, Gmax1, Gmax2)) 00959 { 00960 while (active_size < i) 00961 { 00962 if (be_shrunken(active_size, Gmax1, Gmax2)) 00963 { 00964 swap_index(i,active_size); 00965 break; 00966 } 00967 active_size++; 00968 } 00969 active_size++; 00970 } 00971 } 00972 00973 double Solver::calculate_rho() 00974 { 00975 double r; 00976 int nr_free = 0; 00977 double ub = INF, lb = -INF, sum_free = 0; 00978 for(int i=0;i<active_size;i++) 00979 { 00980 double yG = y[i]*G[i]; 00981 00982 if(is_upper_bound(i)) 00983 { 00984 if(y[i]==-1) 00985 ub = min(ub,yG); 00986 else 00987 lb = max(lb,yG); 00988 } 00989 else if(is_lower_bound(i)) 00990 { 00991 if(y[i]==+1) 00992 ub = min(ub,yG); 00993 else 00994 lb = max(lb,yG); 00995 } 00996 else 00997 { 00998 ++nr_free; 00999 sum_free += yG; 01000 } 01001 } 01002 01003 if(nr_free>0) 01004 r = sum_free/nr_free; 01005 else 01006 r = (ub+lb)/2; 01007 01008 return r; 01009 } 01010 01011 // 01012 // Solver for nu-svm classification and regression 01013 // 01014 // additional constraint: e^T \alpha = constant 01015 // 01016 class Solver_NU : public Solver 01017 { 01018 public: 01019 Solver_NU() {} 01020 void Solve(int l, const QMatrix& Q, const double *p, const schar *y, 01021 double *alpha, double Cp, double Cn, double eps, 01022 SolutionInfo* si, int shrinking) 01023 { 01024 this->si = si; 01025 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking); 01026 } 01027 private: 01028 SolutionInfo *si; 01029 int select_working_set(int &i, int &j); 01030 double calculate_rho(); 01031 bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); 01032 void do_shrinking(); 01033 }; 01034 01035 // return 1 if already optimal, return 0 otherwise 01036 int Solver_NU::select_working_set(int &out_i, int &out_j) 01037 { 01038 // return i,j such that y_i = y_j and 01039 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 01040 // j: minimizes the decrease of obj value 01041 // (if quadratic coefficeint <= 0, replace it with tau) 01042 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 01043 01044 double Gmaxp = -INF; 01045 double Gmaxp2 = -INF; 01046 int Gmaxp_idx = -1; 01047 01048 double Gmaxn = -INF; 01049 double Gmaxn2 = -INF; 01050 int Gmaxn_idx = -1; 01051 01052 int Gmin_idx = -1; 01053 double obj_diff_min = INF; 01054 01055 for(int t=0;t<active_size;t++) 01056 if(y[t]==+1) 01057 { 01058 if(!is_upper_bound(t)) 01059 if(-G[t] >= Gmaxp) 01060 { 01061 Gmaxp = -G[t]; 01062 Gmaxp_idx = t; 01063 } 01064 } 01065 else 01066 { 01067 if(!is_lower_bound(t)) 01068 if(G[t] >= Gmaxn) 01069 { 01070 Gmaxn = G[t]; 01071 Gmaxn_idx = t; 01072 } 01073 } 01074 01075 int ip = Gmaxp_idx; 01076 int in = Gmaxn_idx; 01077 const Qfloat *Q_ip = NULL; 01078 const Qfloat *Q_in = NULL; 01079 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01080 Q_ip = Q->get_Q(ip,active_size); 01081 if(in != -1) 01082 Q_in = Q->get_Q(in,active_size); 01083 01084 for(int j=0;j<active_size;j++) 01085 { 01086 if(y[j]==+1) 01087 { 01088 if (!is_lower_bound(j)) 01089 { 01090 double grad_diff=Gmaxp+G[j]; 01091 if (G[j] >= Gmaxp2) 01092 Gmaxp2 = G[j]; 01093 if (grad_diff > 0) 01094 { 01095 double obj_diff; 01096 double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01097 if (quad_coef > 0) 01098 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01099 else 01100 obj_diff = -(grad_diff*grad_diff)/TAU; 01101 01102 if (obj_diff <= obj_diff_min) 01103 { 01104 Gmin_idx=j; 01105 obj_diff_min = obj_diff; 01106 } 01107 } 01108 } 01109 } 01110 else 01111 { 01112 if (!is_upper_bound(j)) 01113 { 01114 double grad_diff=Gmaxn-G[j]; 01115 if (-G[j] >= Gmaxn2) 01116 Gmaxn2 = -G[j]; 01117 if (grad_diff > 0) 01118 { 01119 double obj_diff; 01120 double quad_coef = Q_in[in]+QD[j]-2*Q_in[j]; 01121 if (quad_coef > 0) 01122 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01123 else 01124 obj_diff = -(grad_diff*grad_diff)/TAU; 01125 01126 if (obj_diff <= obj_diff_min) 01127 { 01128 Gmin_idx=j; 01129 obj_diff_min = obj_diff; 01130 } 01131 } 01132 } 01133 } 01134 } 01135 01136 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps) 01137 return 1; 01138 01139 if (y[Gmin_idx] == +1) 01140 out_i = Gmaxp_idx; 01141 else 01142 out_i = Gmaxn_idx; 01143 out_j = Gmin_idx; 01144 01145 return 0; 01146 } 01147 01148 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) 01149 { 01150 if(is_upper_bound(i)) 01151 { 01152 if(y[i]==+1) 01153 return(-G[i] > Gmax1); 01154 else 01155 return(-G[i] > Gmax4); 01156 } 01157 else if(is_lower_bound(i)) 01158 { 01159 if(y[i]==+1) 01160 return(G[i] > Gmax2); 01161 else 01162 return(G[i] > Gmax3); 01163 } 01164 else 01165 return(false); 01166 } 01167 01168 void Solver_NU::do_shrinking() 01169 { 01170 double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } 01171 double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } 01172 double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } 01173 double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } 01174 01175 // find maximal violating pair first 01176 int i; 01177 for(i=0;i<active_size;i++) 01178 { 01179 if(!is_upper_bound(i)) 01180 { 01181 if(y[i]==+1) 01182 { 01183 if(-G[i] > Gmax1) Gmax1 = -G[i]; 01184 } 01185 else if(-G[i] > Gmax4) Gmax4 = -G[i]; 01186 } 01187 if(!is_lower_bound(i)) 01188 { 01189 if(y[i]==+1) 01190 { 01191 if(G[i] > Gmax2) Gmax2 = G[i]; 01192 } 01193 else if(G[i] > Gmax3) Gmax3 = G[i]; 01194 } 01195 } 01196 01197 // shrinking 01198 01199 for(i=0;i<active_size;i++) 01200 if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4)) 01201 { 01202 active_size--; 01203 while (active_size > i) 01204 { 01205 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) 01206 { 01207 swap_index(i,active_size); 01208 break; 01209 } 01210 active_size--; 01211 } 01212 } 01213 01214 // unshrink, check all variables again before final iterations 01215 01216 if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return; 01217 01218 unshrinked = true; 01219 reconstruct_gradient(); 01220 01221 for(i=l-1;i>=active_size;i--) 01222 if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4)) 01223 { 01224 while (active_size < i) 01225 { 01226 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) 01227 { 01228 swap_index(i,active_size); 01229 break; 01230 } 01231 active_size++; 01232 } 01233 active_size++; 01234 } 01235 } 01236 01237 double Solver_NU::calculate_rho() 01238 { 01239 int nr_free1 = 0,nr_free2 = 0; 01240 double ub1 = INF, ub2 = INF; 01241 double lb1 = -INF, lb2 = -INF; 01242 double sum_free1 = 0, sum_free2 = 0; 01243 01244 for(int i=0;i<active_size;i++) 01245 { 01246 if(y[i]==+1) 01247 { 01248 if(is_upper_bound(i)) 01249 lb1 = max(lb1,G[i]); 01250 else if(is_lower_bound(i)) 01251 ub1 = min(ub1,G[i]); 01252 else 01253 { 01254 ++nr_free1; 01255 sum_free1 += G[i]; 01256 } 01257 } 01258 else 01259 { 01260 if(is_upper_bound(i)) 01261 lb2 = max(lb2,G[i]); 01262 else if(is_lower_bound(i)) 01263 ub2 = min(ub2,G[i]); 01264 else 01265 { 01266 ++nr_free2; 01267 sum_free2 += G[i]; 01268 } 01269 } 01270 } 01271 01272 double r1,r2; 01273 if(nr_free1 > 0) 01274 r1 = sum_free1/nr_free1; 01275 else 01276 r1 = (ub1+lb1)/2; 01277 01278 if(nr_free2 > 0) 01279 r2 = sum_free2/nr_free2; 01280 else 01281 r2 = (ub2+lb2)/2; 01282 01283 si->r = (r1+r2)/2; 01284 return (r1-r2)/2; 01285 } 01286 01287 // 01288 // Q matrices for various formulations 01289 // 01290 class SVC_Q: public Kernel 01291 { 01292 public: 01293 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) 01294 :Kernel(prob.l, prob.x, param) 01295 { 01296 clone(y,y_,prob.l); 01297 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20))); 01298 QD = new Qfloat[prob.l]; 01299 for(int i=0;i<prob.l;i++) 01300 QD[i]= (Qfloat)(this->*kernel_function)(i,i); 01301 } 01302 01303 Qfloat *get_Q(int i, int len) const 01304 { 01305 Qfloat *data; 01306 int start; 01307 if((start = cache->get_data(i,&data,len)) < len) 01308 { 01309 for(int j=start;j<len;j++) 01310 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j)); 01311 } 01312 return data; 01313 } 01314 01315 Qfloat *get_QD() const 01316 { 01317 return QD; 01318 } 01319 01320 void swap_index(int i, int j) const 01321 { 01322 cache->swap_index(i,j); 01323 Kernel::swap_index(i,j); 01324 swap(y[i],y[j]); 01325 swap(QD[i],QD[j]); 01326 } 01327 01328 ~SVC_Q() 01329 { 01330 delete[] y; 01331 delete cache; 01332 delete[] QD; 01333 } 01334 private: 01335 schar *y; 01336 Cache *cache; 01337 Qfloat *QD; 01338 }; 01339 01340 class ONE_CLASS_Q: public Kernel 01341 { 01342 public: 01343 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) 01344 :Kernel(prob.l, prob.x, param) 01345 { 01346 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20))); 01347 QD = new Qfloat[prob.l]; 01348 for(int i=0;i<prob.l;i++) 01349 QD[i]= (Qfloat)(this->*kernel_function)(i,i); 01350 } 01351 01352 Qfloat *get_Q(int i, int len) const 01353 { 01354 Qfloat *data; 01355 int start; 01356 if((start = cache->get_data(i,&data,len)) < len) 01357 { 01358 for(int j=start;j<len;j++) 01359 data[j] = (Qfloat)(this->*kernel_function)(i,j); 01360 } 01361 return data; 01362 } 01363 01364 Qfloat *get_QD() const 01365 { 01366 return QD; 01367 } 01368 01369 void swap_index(int i, int j) const 01370 { 01371 cache->swap_index(i,j); 01372 Kernel::swap_index(i,j); 01373 swap(QD[i],QD[j]); 01374 } 01375 01376 ~ONE_CLASS_Q() 01377 { 01378 delete cache; 01379 delete[] QD; 01380 } 01381 private: 01382 Cache *cache; 01383 Qfloat *QD; 01384 }; 01385 01386 class SVR_Q: public Kernel 01387 { 01388 public: 01389 SVR_Q(const svm_problem& prob, const svm_parameter& param) 01390 :Kernel(prob.l, prob.x, param) 01391 { 01392 l = prob.l; 01393 cache = new Cache(l,(long int)(param.cache_size*(1<<20))); 01394 QD = new Qfloat[2*l]; 01395 sign = new schar[2*l]; 01396 index = new int[2*l]; 01397 for(int k=0;k<l;k++) 01398 { 01399 sign[k] = 1; 01400 sign[k+l] = -1; 01401 index[k] = k; 01402 index[k+l] = k; 01403 QD[k]= (Qfloat)(this->*kernel_function)(k,k); 01404 QD[k+l]=QD[k]; 01405 } 01406 buffer[0] = new Qfloat[2*l]; 01407 buffer[1] = new Qfloat[2*l]; 01408 next_buffer = 0; 01409 } 01410 01411 void swap_index(int i, int j) const 01412 { 01413 swap(sign[i],sign[j]); 01414 swap(index[i],index[j]); 01415 swap(QD[i],QD[j]); 01416 } 01417 01418 Qfloat *get_Q(int i, int len) const 01419 { 01420 Qfloat *data; 01421 int real_i = index[i]; 01422 if(cache->get_data(real_i,&data,l) < l) 01423 { 01424 for(int j=0;j<l;j++) 01425 data[j] = (Qfloat)(this->*kernel_function)(real_i,j); 01426 } 01427 01428 // reorder and copy 01429 Qfloat *buf = buffer[next_buffer]; 01430 next_buffer = 1 - next_buffer; 01431 schar si = sign[i]; 01432 for(int j=0;j<len;j++) 01433 buf[j] = si * sign[j] * data[index[j]]; 01434 return buf; 01435 } 01436 01437 Qfloat *get_QD() const 01438 { 01439 return QD; 01440 } 01441 01442 ~SVR_Q() 01443 { 01444 delete cache; 01445 delete[] sign; 01446 delete[] index; 01447 delete[] buffer[0]; 01448 delete[] buffer[1]; 01449 delete[] QD; 01450 } 01451 private: 01452 int l; 01453 Cache *cache; 01454 schar *sign; 01455 int *index; 01456 mutable int next_buffer; 01457 Qfloat *buffer[2]; 01458 Qfloat *QD; 01459 }; 01460 01461 // 01462 // construct and solve various formulations 01463 // 01464 static void solve_c_svc( 01465 const svm_problem *prob, const svm_parameter* param, 01466 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn) 01467 { 01468 int l = prob->l; 01469 double *minus_ones = new double[l]; 01470 schar *y = new schar[l]; 01471 01472 int i; 01473 01474 for(i=0;i<l;i++) 01475 { 01476 alpha[i] = 0; 01477 minus_ones[i] = -1; 01478 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01479 } 01480 01481 Solver s; 01482 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, 01483 alpha, Cp, Cn, param->eps, si, param->shrinking); 01484 01485 double sum_alpha=0; 01486 for(i=0;i<l;i++) 01487 sum_alpha += alpha[i]; 01488 01489 if (Cp==Cn) 01490 info("nu = %f\n", sum_alpha/(Cp*prob->l)); 01491 01492 for(i=0;i<l;i++) 01493 alpha[i] *= y[i]; 01494 01495 delete[] minus_ones; 01496 delete[] y; 01497 } 01498 01499 static void solve_nu_svc( 01500 const svm_problem *prob, const svm_parameter *param, 01501 double *alpha, Solver::SolutionInfo* si) 01502 { 01503 int i; 01504 int l = prob->l; 01505 double nu = param->nu; 01506 01507 schar *y = new schar[l]; 01508 01509 for(i=0;i<l;i++) 01510 if(prob->y[i]>0) 01511 y[i] = +1; 01512 else 01513 y[i] = -1; 01514 01515 double sum_pos = nu*l/2; 01516 double sum_neg = nu*l/2; 01517 01518 for(i=0;i<l;i++) 01519 if(y[i] == +1) 01520 { 01521 alpha[i] = min(1.0,sum_pos); 01522 sum_pos -= alpha[i]; 01523 } 01524 else 01525 { 01526 alpha[i] = min(1.0,sum_neg); 01527 sum_neg -= alpha[i]; 01528 } 01529 01530 double *zeros = new double[l]; 01531 01532 for(i=0;i<l;i++) 01533 zeros[i] = 0; 01534 01535 Solver_NU s; 01536 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y, 01537 alpha, 1.0, 1.0, param->eps, si, param->shrinking); 01538 double r = si->r; 01539 01540 info("C = %f\n",1/r); 01541 01542 for(i=0;i<l;i++) 01543 alpha[i] *= y[i]/r; 01544 01545 si->rho /= r; 01546 si->obj /= (r*r); 01547 si->upper_bound_p = 1/r; 01548 si->upper_bound_n = 1/r; 01549 01550 delete[] y; 01551 delete[] zeros; 01552 } 01553 01554 static void solve_one_class( 01555 const svm_problem *prob, const svm_parameter *param, 01556 double *alpha, Solver::SolutionInfo* si) 01557 { 01558 int l = prob->l; 01559 double *zeros = new double[l]; 01560 schar *ones = new schar[l]; 01561 int i; 01562 01563 int n = (int)(param->nu*prob->l); // # of alpha's at upper bound 01564 01565 for(i=0;i<n;i++) 01566 alpha[i] = 1; 01567 if(n<prob->l) 01568 alpha[n] = param->nu * prob->l - n; 01569 for(i=n+1;i<l;i++) 01570 alpha[i] = 0; 01571 01572 for(i=0;i<l;i++) 01573 { 01574 zeros[i] = 0; 01575 ones[i] = 1; 01576 } 01577 01578 Solver s; 01579 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, 01580 alpha, 1.0, 1.0, param->eps, si, param->shrinking); 01581 01582 delete[] zeros; 01583 delete[] ones; 01584 } 01585 01586 static void solve_epsilon_svr( 01587 const svm_problem *prob, const svm_parameter *param, 01588 double *alpha, Solver::SolutionInfo* si) 01589 { 01590 int l = prob->l; 01591 double *alpha2 = new double[2*l]; 01592 double *linear_term = new double[2*l]; 01593 schar *y = new schar[2*l]; 01594 int i; 01595 01596 for(i=0;i<l;i++) 01597 { 01598 alpha2[i] = 0; 01599 linear_term[i] = param->p - prob->y[i]; 01600 y[i] = 1; 01601 01602 alpha2[i+l] = 0; 01603 linear_term[i+l] = param->p + prob->y[i]; 01604 y[i+l] = -1; 01605 } 01606 01607 Solver s; 01608 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 01609 alpha2, param->C, param->C, param->eps, si, param->shrinking); 01610 01611 double sum_alpha = 0; 01612 for(i=0;i<l;i++) 01613 { 01614 alpha[i] = alpha2[i] - alpha2[i+l]; 01615 sum_alpha += fabs(alpha[i]); 01616 } 01617 info("nu = %f\n",sum_alpha/(param->C*l)); 01618 01619 delete[] alpha2; 01620 delete[] linear_term; 01621 delete[] y; 01622 } 01623 01624 static void solve_nu_svr( 01625 const svm_problem *prob, const svm_parameter *param, 01626 double *alpha, Solver::SolutionInfo* si) 01627 { 01628 int l = prob->l; 01629 double C = param->C; 01630 double *alpha2 = new double[2*l]; 01631 double *linear_term = new double[2*l]; 01632 schar *y = new schar[2*l]; 01633 int i; 01634 01635 double sum = C * param->nu * l / 2; 01636 for(i=0;i<l;i++) 01637 { 01638 alpha2[i] = alpha2[i+l] = min(sum,C); 01639 sum -= alpha2[i]; 01640 01641 linear_term[i] = - prob->y[i]; 01642 y[i] = 1; 01643 01644 linear_term[i+l] = prob->y[i]; 01645 y[i+l] = -1; 01646 } 01647 01648 Solver_NU s; 01649 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 01650 alpha2, C, C, param->eps, si, param->shrinking); 01651 01652 info("epsilon = %f\n",-si->r); 01653 01654 for(i=0;i<l;i++) 01655 alpha[i] = alpha2[i] - alpha2[i+l]; 01656 01657 delete[] alpha2; 01658 delete[] linear_term; 01659 delete[] y; 01660 } 01661 01662 // 01663 // decision_function 01664 // 01665 struct decision_function 01666 { 01667 double *alpha; 01668 double rho; 01669 }; 01670 01671 decision_function svm_train_one( 01672 const svm_problem *prob, const svm_parameter *param, 01673 double Cp, double Cn) 01674 { 01675 double *alpha = Malloc(double,prob->l); 01676 Solver::SolutionInfo si; 01677 switch(param->svm_type) 01678 { 01679 case C_SVC: 01680 solve_c_svc(prob,param,alpha,&si,Cp,Cn); 01681 break; 01682 case NU_SVC: 01683 solve_nu_svc(prob,param,alpha,&si); 01684 break; 01685 case ONE_CLASS: 01686 solve_one_class(prob,param,alpha,&si); 01687 break; 01688 case EPSILON_SVR: 01689 solve_epsilon_svr(prob,param,alpha,&si); 01690 break; 01691 case NU_SVR: 01692 solve_nu_svr(prob,param,alpha,&si); 01693 break; 01694 } 01695 01696 info("obj = %f, rho = %f\n",si.obj,si.rho); 01697 01698 // output SVs 01699 01700 int nSV = 0; 01701 int nBSV = 0; 01702 for(int i=0;i<prob->l;i++) 01703 { 01704 if(fabs(alpha[i]) > 0) 01705 { 01706 ++nSV; 01707 if(prob->y[i] > 0) 01708 { 01709 if(fabs(alpha[i]) >= si.upper_bound_p) 01710 ++nBSV; 01711 } 01712 else 01713 { 01714 if(fabs(alpha[i]) >= si.upper_bound_n) 01715 ++nBSV; 01716 } 01717 } 01718 } 01719 01720 info("nSV = %d, nBSV = %d\n",nSV,nBSV); 01721 01722 decision_function f; 01723 f.alpha = alpha; 01724 f.rho = si.rho; 01725 return f; 01726 } 01727 01728 // 01729 // svm_model 01730 // 01731 struct svm_model 01732 { 01733 svm_parameter param; // parameter 01734 int nr_class; // number of classes, = 2 in regression/one class svm 01735 int l; // total #SV 01736 svm_node **SV; // SVs (SV[l]) 01737 double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l]) 01738 double *rho; // constants in decision functions (rho[k*(k-1)/2]) 01739 double *probA; // pariwise probability information 01740 double *probB; 01741 01742 // for classification only 01743 01744 int *label; // label of each class (label[k]) 01745 int *nSV; // number of SVs for each class (nSV[k]) 01746 // nSV[0] + nSV[1] + ... + nSV[k-1] = l 01747 // XXX 01748 int free_sv; // 1 if svm_model is created by svm_load_model 01749 // 0 if svm_model is created by svm_train 01750 }; 01751 01752 // Platt's binary SVM Probablistic Output: an improvement from Lin et al. 01753 void sigmoid_train( 01754 int l, const double *dec_values, const double *labels, 01755 double& A, double& B) 01756 { 01757 double prior1=0, prior0 = 0; 01758 int i; 01759 01760 for (i=0;i<l;i++) 01761 if (labels[i] > 0) prior1+=1; 01762 else prior0+=1; 01763 01764 int max_iter=100; // Maximal number of iterations 01765 double min_step=1e-10; // Minimal step taken in line search 01766 double sigma=1e-12; // For numerically strict PD of Hessian 01767 double eps=1e-5; 01768 double hiTarget=(prior1+1.0)/(prior1+2.0); 01769 double loTarget=1/(prior0+2.0); 01770 double *t=Malloc(double,l); 01771 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize; 01772 double newA,newB,newf,d1,d2; 01773 int iter; 01774 01775 // Initial Point and Initial Fun Value 01776 A=0.0; B=log((prior0+1.0)/(prior1+1.0)); 01777 double fval = 0.0; 01778 01779 for (i=0;i<l;i++) 01780 { 01781 if (labels[i]>0) t[i]=hiTarget; 01782 else t[i]=loTarget; 01783 fApB = dec_values[i]*A+B; 01784 if (fApB>=0) 01785 fval += t[i]*fApB + log(1+exp(-fApB)); 01786 else 01787 fval += (t[i] - 1)*fApB +log(1+exp(fApB)); 01788 } 01789 for (iter=0;iter<max_iter;iter++) 01790 { 01791 // Update Gradient and Hessian (use H' = H + sigma I) 01792 h11=sigma; // numerically ensures strict PD 01793 h22=sigma; 01794 h21=0.0;g1=0.0;g2=0.0; 01795 for (i=0;i<l;i++) 01796 { 01797 fApB = dec_values[i]*A+B; 01798 if (fApB >= 0) 01799 { 01800 p=exp(-fApB)/(1.0+exp(-fApB)); 01801 q=1.0/(1.0+exp(-fApB)); 01802 } 01803 else 01804 { 01805 p=1.0/(1.0+exp(fApB)); 01806 q=exp(fApB)/(1.0+exp(fApB)); 01807 } 01808 d2=p*q; 01809 h11+=dec_values[i]*dec_values[i]*d2; 01810 h22+=d2; 01811 h21+=dec_values[i]*d2; 01812 d1=t[i]-p; 01813 g1+=dec_values[i]*d1; 01814 g2+=d1; 01815 } 01816 01817 // Stopping Criteria 01818 if (fabs(g1)<eps && fabs(g2)<eps) 01819 break; 01820 01821 // Finding Newton direction: -inv(H') * g 01822 det=h11*h22-h21*h21; 01823 dA=-(h22*g1 - h21 * g2) / det; 01824 dB=-(-h21*g1+ h11 * g2) / det; 01825 gd=g1*dA+g2*dB; 01826 01827 01828 stepsize = 1; // Line Search 01829 while (stepsize >= min_step) 01830 { 01831 newA = A + stepsize * dA; 01832 newB = B + stepsize * dB; 01833 01834 // New function value 01835 newf = 0.0; 01836 for (i=0;i<l;i++) 01837 { 01838 fApB = dec_values[i]*newA+newB; 01839 if (fApB >= 0) 01840 newf += t[i]*fApB + log(1+exp(-fApB)); 01841 else 01842 newf += (t[i] - 1)*fApB +log(1+exp(fApB)); 01843 } 01844 // Check sufficient decrease 01845 if (newf<fval+0.0001*stepsize*gd) 01846 { 01847 A=newA;B=newB;fval=newf; 01848 break; 01849 } 01850 else 01851 stepsize = stepsize / 2.0; 01852 } 01853 01854 if (stepsize < min_step) 01855 { 01856 info("Line search fails in two-class probability estimates\n"); 01857 break; 01858 } 01859 } 01860 01861 if (iter>=max_iter) 01862 info("Reaching maximal iterations in two-class probability estimates\n"); 01863 free(t); 01864 } 01865 01866 double sigmoid_predict(double decision_value, double A, double B) 01867 { 01868 double fApB = decision_value*A+B; 01869 if (fApB >= 0) 01870 return exp(-fApB)/(1.0+exp(-fApB)); 01871 else 01872 return 1.0/(1+exp(fApB)) ; 01873 } 01874 01875 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng 01876 void multiclass_probability(int k, double **r, double *p) 01877 { 01878 int t,j; 01879 int iter = 0, max_iter=max(100,k); 01880 double **Q=Malloc(double *,k); 01881 double *Qp=Malloc(double,k); 01882 double pQp, eps=0.005/k; 01883 01884 for (t=0;t<k;t++) 01885 { 01886 p[t]=1.0/k; // Valid if k = 1 01887 Q[t]=Malloc(double,k); 01888 Q[t][t]=0; 01889 for (j=0;j<t;j++) 01890 { 01891 Q[t][t]+=r[j][t]*r[j][t]; 01892 Q[t][j]=Q[j][t]; 01893 } 01894 for (j=t+1;j<k;j++) 01895 { 01896 Q[t][t]+=r[j][t]*r[j][t]; 01897 Q[t][j]=-r[j][t]*r[t][j]; 01898 } 01899 } 01900 for (iter=0;iter<max_iter;iter++) 01901 { 01902 // stopping condition, recalculate QP,pQP for numerical accuracy 01903 pQp=0; 01904 for (t=0;t<k;t++) 01905 { 01906 Qp[t]=0; 01907 for (j=0;j<k;j++) 01908 Qp[t]+=Q[t][j]*p[j]; 01909 pQp+=p[t]*Qp[t]; 01910 } 01911 double max_error=0; 01912 for (t=0;t<k;t++) 01913 { 01914 double error=fabs(Qp[t]-pQp); 01915 if (error>max_error) 01916 max_error=error; 01917 } 01918 if (max_error<eps) break; 01919 01920 for (t=0;t<k;t++) 01921 { 01922 double diff=(-Qp[t]+pQp)/Q[t][t]; 01923 p[t]+=diff; 01924 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff); 01925 for (j=0;j<k;j++) 01926 { 01927 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff); 01928 p[j]/=(1+diff); 01929 } 01930 } 01931 } 01932 if (iter>=max_iter) 01933 info("Exceeds max_iter in multiclass_prob\n"); 01934 for(t=0;t<k;t++) free(Q[t]); 01935 free(Q); 01936 free(Qp); 01937 } 01938 01939 // Cross-validation decision values for probability estimates 01940 void svm_binary_svc_probability( 01941 const svm_problem *prob, const svm_parameter *param, 01942 double Cp, double Cn, double& probA, double& probB) 01943 { 01944 int i; 01945 int nr_fold = 5; 01946 int *perm = Malloc(int,prob->l); 01947 double *dec_values = Malloc(double,prob->l); 01948 01949 // random shuffle 01950 for(i=0;i<prob->l;i++) perm[i]=i; 01951 for(i=0;i<prob->l;i++) 01952 { 01953 int j = i+rand()%(prob->l-i); 01954 swap(perm[i],perm[j]); 01955 } 01956 for(i=0;i<nr_fold;i++) 01957 { 01958 int begin = i*prob->l/nr_fold; 01959 int end = (i+1)*prob->l/nr_fold; 01960 int j,k; 01961 struct svm_problem subprob; 01962 01963 subprob.l = prob->l-(end-begin); 01964 subprob.x = Malloc(struct svm_node*,subprob.l); 01965 subprob.y = Malloc(double,subprob.l); 01966 01967 k=0; 01968 for(j=0;j<begin;j++) 01969 { 01970 subprob.x[k] = prob->x[perm[j]]; 01971 subprob.y[k] = prob->y[perm[j]]; 01972 ++k; 01973 } 01974 for(j=end;j<prob->l;j++) 01975 { 01976 subprob.x[k] = prob->x[perm[j]]; 01977 subprob.y[k] = prob->y[perm[j]]; 01978 ++k; 01979 } 01980 int p_count=0,n_count=0; 01981 for(j=0;j<k;j++) 01982 if(subprob.y[j]>0) 01983 p_count++; 01984 else 01985 n_count++; 01986 01987 if(p_count==0 && n_count==0) 01988 for(j=begin;j<end;j++) 01989 dec_values[perm[j]] = 0; 01990 else if(p_count > 0 && n_count == 0) 01991 for(j=begin;j<end;j++) 01992 dec_values[perm[j]] = 1; 01993 else if(p_count == 0 && n_count > 0) 01994 for(j=begin;j<end;j++) 01995 dec_values[perm[j]] = -1; 01996 else 01997 { 01998 svm_parameter subparam = *param; 01999 subparam.probability=0; 02000 subparam.C=1.0; 02001 subparam.nr_weight=2; 02002 subparam.weight_label = Malloc(int,2); 02003 subparam.weight = Malloc(double,2); 02004 subparam.weight_label[0]=+1; 02005 subparam.weight_label[1]=-1; 02006 subparam.weight[0]=Cp; 02007 subparam.weight[1]=Cn; 02008 struct svm_model *submodel = svm_train(&subprob,&subparam); 02009 for(j=begin;j<end;j++) 02010 { 02011 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 02012 // ensure +1 -1 order; reason not using CV subroutine 02013 dec_values[perm[j]] *= submodel->label[0]; 02014 } 02015 svm_destroy_model(submodel); 02016 svm_destroy_param(&subparam); 02017 } 02018 free(subprob.x); 02019 free(subprob.y); 02020 } 02021 sigmoid_train(prob->l,dec_values,prob->y,probA,probB); 02022 free(dec_values); 02023 free(perm); 02024 } 02025 02026 // Return parameter of a Laplace distribution 02027 double svm_svr_probability( 02028 const svm_problem *prob, const svm_parameter *param) 02029 { 02030 int i; 02031 int nr_fold = 5; 02032 double *ymv = Malloc(double,prob->l); 02033 double mae = 0; 02034 02035 svm_parameter newparam = *param; 02036 newparam.probability = 0; 02037 svm_cross_validation(prob,&newparam,nr_fold,ymv); 02038 for(i=0;i<prob->l;i++) 02039 { 02040 ymv[i]=prob->y[i]-ymv[i]; 02041 mae += fabs(ymv[i]); 02042 } 02043 mae /= prob->l; 02044 double std=sqrt(2*mae*mae); 02045 int count=0; 02046 mae=0; 02047 for(i=0;i<prob->l;i++) 02048 if (fabs(ymv[i]) > 5*std) 02049 count=count+1; 02050 else 02051 mae+=fabs(ymv[i]); 02052 mae /= (prob->l-count); 02053 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae); 02054 free(ymv); 02055 return mae; 02056 } 02057 02058 02059 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data 02060 // perm, length l, must be allocated before calling this subroutine 02061 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm) 02062 { 02063 int l = prob->l; 02064 int max_nr_class = 16; 02065 int nr_class = 0; 02066 int *label = Malloc(int,max_nr_class); 02067 int *count = Malloc(int,max_nr_class); 02068 int *data_label = Malloc(int,l); 02069 int i; 02070 02071 for(i=0;i<l;i++) 02072 { 02073 int this_label = (int)prob->y[i]; 02074 int j; 02075 for(j=0;j<nr_class;j++) 02076 { 02077 if(this_label == label[j]) 02078 { 02079 ++count[j]; 02080 break; 02081 } 02082 } 02083 data_label[i] = j; 02084 if(j == nr_class) 02085 { 02086 if(nr_class == max_nr_class) 02087 { 02088 max_nr_class *= 2; 02089 label = (int *)realloc(label,max_nr_class*sizeof(int)); 02090 count = (int *)realloc(count,max_nr_class*sizeof(int)); 02091 } 02092 label[nr_class] = this_label; 02093 count[nr_class] = 1; 02094 ++nr_class; 02095 } 02096 } 02097 02098 int *start = Malloc(int,nr_class); 02099 start[0] = 0; 02100 for(i=1;i<nr_class;i++) 02101 start[i] = start[i-1]+count[i-1]; 02102 for(i=0;i<l;i++) 02103 { 02104 perm[start[data_label[i]]] = i; 02105 ++start[data_label[i]]; 02106 } 02107 start[0] = 0; 02108 for(i=1;i<nr_class;i++) 02109 start[i] = start[i-1]+count[i-1]; 02110 02111 *nr_class_ret = nr_class; 02112 *label_ret = label; 02113 *start_ret = start; 02114 *count_ret = count; 02115 free(data_label); 02116 } 02117 02118 // 02119 // Interface functions 02120 // 02121 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param) 02122 { 02123 svm_model *model = Malloc(svm_model,1); 02124 model->param = *param; 02125 model->free_sv = 0; // XXX 02126 02127 if(param->svm_type == ONE_CLASS || 02128 param->svm_type == EPSILON_SVR || 02129 param->svm_type == NU_SVR) 02130 { 02131 // regression or one-class-svm 02132 model->nr_class = 2; 02133 model->label = NULL; 02134 model->nSV = NULL; 02135 model->probA = NULL; model->probB = NULL; 02136 model->sv_coef = Malloc(double *,1); 02137 02138 if(param->probability && 02139 (param->svm_type == EPSILON_SVR || 02140 param->svm_type == NU_SVR)) 02141 { 02142 model->probA = Malloc(double,1); 02143 model->probA[0] = svm_svr_probability(prob,param); 02144 } 02145 02146 decision_function f = svm_train_one(prob,param,0,0); 02147 model->rho = Malloc(double,1); 02148 model->rho[0] = f.rho; 02149 02150 int nSV = 0; 02151 int i; 02152 for(i=0;i<prob->l;i++) 02153 if(fabs(f.alpha[i]) > 0) ++nSV; 02154 model->l = nSV; 02155 model->SV = Malloc(svm_node *,nSV); 02156 model->sv_coef[0] = Malloc(double,nSV); 02157 int j = 0; 02158 for(i=0;i<prob->l;i++) 02159 if(fabs(f.alpha[i]) > 0) 02160 { 02161 model->SV[j] = prob->x[i]; 02162 model->sv_coef[0][j] = f.alpha[i]; 02163 ++j; 02164 } 02165 02166 free(f.alpha); 02167 } 02168 else 02169 { 02170 // classification 02171 int l = prob->l; 02172 int nr_class; 02173 int *label = NULL; 02174 int *start = NULL; 02175 int *count = NULL; 02176 int *perm = Malloc(int,l); 02177 02178 // group training data of the same class 02179 svm_group_classes(prob,&nr_class,&label,&start,&count,perm); 02180 svm_node **x = Malloc(svm_node *,l); 02181 int i; 02182 for(i=0;i<l;i++) 02183 x[i] = prob->x[perm[i]]; 02184 02185 // calculate weighted C 02186 02187 double *weighted_C = Malloc(double, nr_class); 02188 for(i=0;i<nr_class;i++) 02189 weighted_C[i] = param->C; 02190 for(i=0;i<param->nr_weight;i++) 02191 { 02192 int j; 02193 for(j=0;j<nr_class;j++) 02194 if(param->weight_label[i] == label[j]) 02195 break; 02196 if(j == nr_class) 02197 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]); 02198 else 02199 weighted_C[j] *= param->weight[i]; 02200 } 02201 02202 // train k*(k-1)/2 models 02203 02204 bool *nonzero = Malloc(bool,l); 02205 for(i=0;i<l;i++) 02206 nonzero[i] = false; 02207 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2); 02208 02209 double *probA=NULL,*probB=NULL; 02210 if (param->probability) 02211 { 02212 probA=Malloc(double,nr_class*(nr_class-1)/2); 02213 probB=Malloc(double,nr_class*(nr_class-1)/2); 02214 } 02215 02216 int p = 0; 02217 for(i=0;i<nr_class;i++) 02218 for(int j=i+1;j<nr_class;j++) 02219 { 02220 svm_problem sub_prob; 02221 int si = start[i], sj = start[j]; 02222 int ci = count[i], cj = count[j]; 02223 sub_prob.l = ci+cj; 02224 sub_prob.x = Malloc(svm_node *,sub_prob.l); 02225 sub_prob.y = Malloc(double,sub_prob.l); 02226 int k; 02227 for(k=0;k<ci;k++) 02228 { 02229 sub_prob.x[k] = x[si+k]; 02230 sub_prob.y[k] = +1; 02231 } 02232 for(k=0;k<cj;k++) 02233 { 02234 sub_prob.x[ci+k] = x[sj+k]; 02235 sub_prob.y[ci+k] = -1; 02236 } 02237 02238 if(param->probability) 02239 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]); 02240 02241 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]); 02242 for(k=0;k<ci;k++) 02243 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0) 02244 nonzero[si+k] = true; 02245 for(k=0;k<cj;k++) 02246 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0) 02247 nonzero[sj+k] = true; 02248 free(sub_prob.x); 02249 free(sub_prob.y); 02250 ++p; 02251 } 02252 02253 // build output 02254 02255 model->nr_class = nr_class; 02256 02257 model->label = Malloc(int,nr_class); 02258 for(i=0;i<nr_class;i++) 02259 model->label[i] = label[i]; 02260 02261 model->rho = Malloc(double,nr_class*(nr_class-1)/2); 02262 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02263 model->rho[i] = f[i].rho; 02264 02265 if(param->probability) 02266 { 02267 model->probA = Malloc(double,nr_class*(nr_class-1)/2); 02268 model->probB = Malloc(double,nr_class*(nr_class-1)/2); 02269 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02270 { 02271 model->probA[i] = probA[i]; 02272 model->probB[i] = probB[i]; 02273 } 02274 } 02275 else 02276 { 02277 model->probA=NULL; 02278 model->probB=NULL; 02279 } 02280 02281 int total_sv = 0; 02282 int *nz_count = Malloc(int,nr_class); 02283 model->nSV = Malloc(int,nr_class); 02284 for(i=0;i<nr_class;i++) 02285 { 02286 int nSV = 0; 02287 for(int j=0;j<count[i];j++) 02288 if(nonzero[start[i]+j]) 02289 { 02290 ++nSV; 02291 ++total_sv; 02292 } 02293 model->nSV[i] = nSV; 02294 nz_count[i] = nSV; 02295 } 02296 02297 info("Total nSV = %d\n",total_sv); 02298 02299 model->l = total_sv; 02300 model->SV = Malloc(svm_node *,total_sv); 02301 p = 0; 02302 for(i=0;i<l;i++) 02303 if(nonzero[i]) model->SV[p++] = x[i]; 02304 02305 int *nz_start = Malloc(int,nr_class); 02306 nz_start[0] = 0; 02307 for(i=1;i<nr_class;i++) 02308 nz_start[i] = nz_start[i-1]+nz_count[i-1]; 02309 02310 model->sv_coef = Malloc(double *,nr_class-1); 02311 for(i=0;i<nr_class-1;i++) 02312 model->sv_coef[i] = Malloc(double,total_sv); 02313 02314 p = 0; 02315 for(i=0;i<nr_class;i++) 02316 for(int j=i+1;j<nr_class;j++) 02317 { 02318 // classifier (i,j): coefficients with 02319 // i are in sv_coef[j-1][nz_start[i]...], 02320 // j are in sv_coef[i][nz_start[j]...] 02321 02322 int si = start[i]; 02323 int sj = start[j]; 02324 int ci = count[i]; 02325 int cj = count[j]; 02326 02327 int q = nz_start[i]; 02328 int k; 02329 for(k=0;k<ci;k++) 02330 if(nonzero[si+k]) 02331 model->sv_coef[j-1][q++] = f[p].alpha[k]; 02332 q = nz_start[j]; 02333 for(k=0;k<cj;k++) 02334 if(nonzero[sj+k]) 02335 model->sv_coef[i][q++] = f[p].alpha[ci+k]; 02336 ++p; 02337 } 02338 02339 free(label); 02340 free(probA); 02341 free(probB); 02342 free(count); 02343 free(perm); 02344 free(start); 02345 free(x); 02346 free(weighted_C); 02347 free(nonzero); 02348 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02349 free(f[i].alpha); 02350 free(f); 02351 free(nz_count); 02352 free(nz_start); 02353 } 02354 return model; 02355 } 02356 02357 // Stratified cross validation 02358 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target) 02359 { 02360 int i; 02361 int *fold_start = Malloc(int,nr_fold+1); 02362 int l = prob->l; 02363 int *perm = Malloc(int,l); 02364 int nr_class; 02365 02366 // stratified cv may not give leave-one-out rate 02367 // Each class to l folds -> some folds may have zero elements 02368 if((param->svm_type == C_SVC || 02369 param->svm_type == NU_SVC) && nr_fold < l) 02370 { 02371 int *start = NULL; 02372 int *label = NULL; 02373 int *count = NULL; 02374 svm_group_classes(prob,&nr_class,&label,&start,&count,perm); 02375 02376 // random shuffle and then data grouped by fold using the array perm 02377 int *fold_count = Malloc(int,nr_fold); 02378 int c; 02379 int *index = Malloc(int,l); 02380 for(i=0;i<l;i++) 02381 index[i]=perm[i]; 02382 for (c=0; c<nr_class; c++) 02383 for(i=0;i<count[c];i++) 02384 { 02385 int j = i+rand()%(count[c]-i); 02386 swap(index[start[c]+j],index[start[c]+i]); 02387 } 02388 for(i=0;i<nr_fold;i++) 02389 { 02390 fold_count[i] = 0; 02391 for (c=0; c<nr_class;c++) 02392 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold; 02393 } 02394 fold_start[0]=0; 02395 for (i=1;i<=nr_fold;i++) 02396 fold_start[i] = fold_start[i-1]+fold_count[i-1]; 02397 for (c=0; c<nr_class;c++) 02398 for(i=0;i<nr_fold;i++) 02399 { 02400 int begin = start[c]+i*count[c]/nr_fold; 02401 int end = start[c]+(i+1)*count[c]/nr_fold; 02402 for(int j=begin;j<end;j++) 02403 { 02404 perm[fold_start[i]] = index[j]; 02405 fold_start[i]++; 02406 } 02407 } 02408 fold_start[0]=0; 02409 for (i=1;i<=nr_fold;i++) 02410 fold_start[i] = fold_start[i-1]+fold_count[i-1]; 02411 free(start); 02412 free(label); 02413 free(count); 02414 free(index); 02415 free(fold_count); 02416 } 02417 else 02418 { 02419 for(i=0;i<l;i++) perm[i]=i; 02420 for(i=0;i<l;i++) 02421 { 02422 int j = i+rand()%(l-i); 02423 swap(perm[i],perm[j]); 02424 } 02425 for(i=0;i<=nr_fold;i++) 02426 fold_start[i]=i*l/nr_fold; 02427 } 02428 02429 for(i=0;i<nr_fold;i++) 02430 { 02431 int begin = fold_start[i]; 02432 int end = fold_start[i+1]; 02433 int j,k; 02434 struct svm_problem subprob; 02435 02436 subprob.l = l-(end-begin); 02437 subprob.x = Malloc(struct svm_node*,subprob.l); 02438 subprob.y = Malloc(double,subprob.l); 02439 02440 k=0; 02441 for(j=0;j<begin;j++) 02442 { 02443 subprob.x[k] = prob->x[perm[j]]; 02444 subprob.y[k] = prob->y[perm[j]]; 02445 ++k; 02446 } 02447 for(j=end;j<l;j++) 02448 { 02449 subprob.x[k] = prob->x[perm[j]]; 02450 subprob.y[k] = prob->y[perm[j]]; 02451 ++k; 02452 } 02453 struct svm_model *submodel = svm_train(&subprob,param); 02454 if(param->probability && 02455 (param->svm_type == C_SVC || param->svm_type == NU_SVC)) 02456 { 02457 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel)); 02458 for(j=begin;j<end;j++) 02459 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates); 02460 free(prob_estimates); 02461 } 02462 else 02463 for(j=begin;j<end;j++) 02464 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]); 02465 svm_destroy_model(submodel); 02466 free(subprob.x); 02467 free(subprob.y); 02468 } 02469 free(fold_start); 02470 free(perm); 02471 } 02472 02473 02474 int svm_get_svm_type(const svm_model *model) 02475 { 02476 return model->param.svm_type; 02477 } 02478 02479 int svm_get_nr_class(const svm_model *model) 02480 { 02481 return model->nr_class; 02482 } 02483 02484 void svm_get_labels(const svm_model *model, int* label) 02485 { 02486 if (model->label != NULL) 02487 for(int i=0;i<model->nr_class;i++) 02488 label[i] = model->label[i]; 02489 } 02490 02491 double svm_get_svr_probability(const svm_model *model) 02492 { 02493 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && 02494 model->probA!=NULL) 02495 return model->probA[0]; 02496 else 02497 { 02498 info("Model doesn't contain information for SVR probability inference\n"); 02499 return 0; 02500 } 02501 } 02502 02503 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values) 02504 { 02505 if(model->param.svm_type == ONE_CLASS || 02506 model->param.svm_type == EPSILON_SVR || 02507 model->param.svm_type == NU_SVR) 02508 { 02509 double *sv_coef = model->sv_coef[0]; 02510 double sum = 0; 02511 for(int i=0;i<model->l;i++) 02512 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); 02513 sum -= model->rho[0]; 02514 *dec_values = sum; 02515 } 02516 else 02517 { 02518 int i; 02519 int nr_class = model->nr_class; 02520 int l = model->l; 02521 02522 double *kvalue = Malloc(double,l); 02523 for(i=0;i<l;i++) 02524 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param); 02525 02526 int *start = Malloc(int,nr_class); 02527 start[0] = 0; 02528 for(i=1;i<nr_class;i++) 02529 start[i] = start[i-1]+model->nSV[i-1]; 02530 02531 int p=0; 02532 for(i=0;i<nr_class;i++) 02533 for(int j=i+1;j<nr_class;j++) 02534 { 02535 double sum = 0; 02536 int si = start[i]; 02537 int sj = start[j]; 02538 int ci = model->nSV[i]; 02539 int cj = model->nSV[j]; 02540 02541 int k; 02542 double *coef1 = model->sv_coef[j-1]; 02543 double *coef2 = model->sv_coef[i]; 02544 for(k=0;k<ci;k++) 02545 sum += coef1[si+k] * kvalue[si+k]; 02546 for(k=0;k<cj;k++) 02547 sum += coef2[sj+k] * kvalue[sj+k]; 02548 sum -= model->rho[p]; 02549 dec_values[p] = sum; 02550 p++; 02551 } 02552 02553 free(kvalue); 02554 free(start); 02555 } 02556 } 02557 02558 double svm_predict(const svm_model *model, const svm_node *x) 02559 { 02560 if(model->param.svm_type == ONE_CLASS || 02561 model->param.svm_type == EPSILON_SVR || 02562 model->param.svm_type == NU_SVR) 02563 { 02564 double res; 02565 svm_predict_values(model, x, &res); 02566 02567 if(model->param.svm_type == ONE_CLASS) 02568 return (res>0)?1:-1; 02569 else 02570 return res; 02571 } 02572 else 02573 { 02574 int i; 02575 int nr_class = model->nr_class; 02576 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2); 02577 svm_predict_values(model, x, dec_values); 02578 02579 int *vote = Malloc(int,nr_class); 02580 for(i=0;i<nr_class;i++) 02581 vote[i] = 0; 02582 int pos=0; 02583 for(i=0;i<nr_class;i++) 02584 for(int j=i+1;j<nr_class;j++) 02585 { 02586 if(dec_values[pos++] > 0) 02587 ++vote[i]; 02588 else 02589 ++vote[j]; 02590 } 02591 02592 int vote_max_idx = 0; 02593 for(i=1;i<nr_class;i++) 02594 if(vote[i] > vote[vote_max_idx]) 02595 vote_max_idx = i; 02596 free(vote); 02597 free(dec_values); 02598 return model->label[vote_max_idx]; 02599 } 02600 } 02601 02602 double svm_predict_probability( 02603 const svm_model *model, const svm_node *x, double *prob_estimates) 02604 { 02605 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && 02606 model->probA!=NULL && model->probB!=NULL) 02607 { 02608 int i; 02609 int nr_class = model->nr_class; 02610 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2); 02611 svm_predict_values(model, x, dec_values); 02612 02613 double min_prob=1e-7; 02614 double **pairwise_prob=Malloc(double *,nr_class); 02615 for(i=0;i<nr_class;i++) 02616 pairwise_prob[i]=Malloc(double,nr_class); 02617 int k=0; 02618 for(i=0;i<nr_class;i++) 02619 for(int j=i+1;j<nr_class;j++) 02620 { 02621 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob); 02622 pairwise_prob[j][i]=1-pairwise_prob[i][j]; 02623 k++; 02624 } 02625 multiclass_probability(nr_class,pairwise_prob,prob_estimates); 02626 02627 int prob_max_idx = 0; 02628 for(i=1;i<nr_class;i++) 02629 if(prob_estimates[i] > prob_estimates[prob_max_idx]) 02630 prob_max_idx = i; 02631 for(i=0;i<nr_class;i++) 02632 free(pairwise_prob[i]); 02633 free(dec_values); 02634 free(pairwise_prob); 02635 return model->label[prob_max_idx]; 02636 } 02637 else 02638 return svm_predict(model, x); 02639 } 02640 02641 const char *svm_type_table[] = 02642 { 02643 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL 02644 }; 02645 02646 const char *kernel_type_table[]= 02647 { 02648 "linear","polynomial","rbf","sigmoid","precomputed",NULL 02649 }; 02650 02651 void ignore_retval_gcc46(int ret) { /* keep gcc46 happy */ } 02652 02653 int svm_save_model(const char *model_file_name, const svm_model *model) 02654 { 02655 FILE *fp = fopen(model_file_name,"w"); 02656 if(fp==NULL) return -1; 02657 02658 const svm_parameter& param = model->param; 02659 02660 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]); 02661 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]); 02662 02663 if(param.kernel_type == POLY) 02664 fprintf(fp,"degree %d\n", param.degree); 02665 02666 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID) 02667 fprintf(fp,"gamma %g\n", param.gamma); 02668 02669 if(param.kernel_type == POLY || param.kernel_type == SIGMOID) 02670 fprintf(fp,"coef0 %g\n", param.coef0); 02671 02672 int nr_class = model->nr_class; 02673 int l = model->l; 02674 fprintf(fp, "nr_class %d\n", nr_class); 02675 fprintf(fp, "total_sv %d\n",l); 02676 02677 { 02678 fprintf(fp, "rho"); 02679 for(int i=0;i<nr_class*(nr_class-1)/2;i++) 02680 fprintf(fp," %g",model->rho[i]); 02681 fprintf(fp, "\n"); 02682 } 02683 02684 if(model->label) 02685 { 02686 fprintf(fp, "label"); 02687 for(int i=0;i<nr_class;i++) 02688 fprintf(fp," %d",model->label[i]); 02689 fprintf(fp, "\n"); 02690 } 02691 02692 if(model->probA) // regression has probA only 02693 { 02694 fprintf(fp, "probA"); 02695 for(int i=0;i<nr_class*(nr_class-1)/2;i++) 02696 fprintf(fp," %g",model->probA[i]); 02697 fprintf(fp, "\n"); 02698 } 02699 if(model->probB) 02700 { 02701 fprintf(fp, "probB"); 02702 for(int i=0;i<nr_class*(nr_class-1)/2;i++) 02703 fprintf(fp," %g",model->probB[i]); 02704 fprintf(fp, "\n"); 02705 } 02706 02707 if(model->nSV) 02708 { 02709 fprintf(fp, "nr_sv"); 02710 for(int i=0;i<nr_class;i++) 02711 fprintf(fp," %d",model->nSV[i]); 02712 fprintf(fp, "\n"); 02713 } 02714 02715 fprintf(fp, "SV\n"); 02716 const double * const *sv_coef = model->sv_coef; 02717 const svm_node * const *SV = model->SV; 02718 02719 for(int i=0;i<l;i++) 02720 { 02721 for(int j=0;j<nr_class-1;j++) 02722 fprintf(fp, "%.16g ",sv_coef[j][i]); 02723 02724 const svm_node *p = SV[i]; 02725 02726 if(param.kernel_type == PRECOMPUTED) 02727 fprintf(fp,"0:%d ",(int)(p->value)); 02728 else 02729 while(p->index != -1) 02730 { 02731 fprintf(fp,"%d:%.8g ",p->index,p->value); 02732 p++; 02733 } 02734 fprintf(fp, "\n"); 02735 } 02736 if (ferror(fp) != 0 || fclose(fp) != 0) return -1; 02737 else return 0; 02738 } 02739 02740 svm_model *svm_load_model(const char *model_file_name) 02741 { 02742 FILE *fp = fopen(model_file_name,"r"); 02743 if(fp==NULL) return NULL; 02744 02745 // read parameters 02746 02747 svm_model *model = Malloc(svm_model,1); 02748 svm_parameter& param = model->param; 02749 model->rho = NULL; 02750 model->probA = NULL; 02751 model->probB = NULL; 02752 model->label = NULL; 02753 model->nSV = NULL; 02754 02755 char cmd[81]; 02756 int ret; 02757 while(1) 02758 { 02759 ret = fscanf(fp,"%80s",cmd); 02760 02761 if(strcmp(cmd,"svm_type")==0) 02762 { 02763 ret = fscanf(fp,"%80s",cmd); 02764 int i; 02765 for(i=0;svm_type_table[i];i++) 02766 { 02767 if(strcmp(svm_type_table[i],cmd)==0) 02768 { 02769 param.svm_type=i; 02770 break; 02771 } 02772 } 02773 if(svm_type_table[i] == NULL) 02774 { 02775 fprintf(stderr,"unknown svm type.\n"); 02776 free(model->rho); 02777 free(model->label); 02778 free(model->nSV); 02779 free(model); 02780 return NULL; 02781 } 02782 } 02783 else if(strcmp(cmd,"kernel_type")==0) 02784 { 02785 ret = fscanf(fp,"%80s",cmd); 02786 int i; 02787 for(i=0;kernel_type_table[i];i++) 02788 { 02789 if(strcmp(kernel_type_table[i],cmd)==0) 02790 { 02791 param.kernel_type=i; 02792 break; 02793 } 02794 } 02795 if(kernel_type_table[i] == NULL) 02796 { 02797 fprintf(stderr,"unknown kernel function.\n"); 02798 free(model->rho); 02799 free(model->label); 02800 free(model->nSV); 02801 free(model); 02802 return NULL; 02803 } 02804 } 02805 else if(strcmp(cmd,"degree")==0) 02806 ret = fscanf(fp,"%d",¶m.degree); 02807 else if(strcmp(cmd,"gamma")==0) 02808 ret = fscanf(fp,"%lf",¶m.gamma); 02809 else if(strcmp(cmd,"coef0")==0) 02810 ret = fscanf(fp,"%lf",¶m.coef0); 02811 else if(strcmp(cmd,"nr_class")==0) 02812 ret = fscanf(fp,"%d",&model->nr_class); 02813 else if(strcmp(cmd,"total_sv")==0) 02814 ret = fscanf(fp,"%d",&model->l); 02815 else if(strcmp(cmd,"rho")==0) 02816 { 02817 int n = model->nr_class * (model->nr_class-1)/2; 02818 model->rho = Malloc(double,n); 02819 for(int i=0;i<n;i++) 02820 ret = fscanf(fp,"%lf",&model->rho[i]); 02821 } 02822 else if(strcmp(cmd,"label")==0) 02823 { 02824 int n = model->nr_class; 02825 model->label = Malloc(int,n); 02826 for(int i=0;i<n;i++) 02827 ret = fscanf(fp,"%d",&model->label[i]); 02828 } 02829 else if(strcmp(cmd,"probA")==0) 02830 { 02831 int n = model->nr_class * (model->nr_class-1)/2; 02832 model->probA = Malloc(double,n); 02833 for(int i=0;i<n;i++) 02834 ret = fscanf(fp,"%lf",&model->probA[i]); 02835 } 02836 else if(strcmp(cmd,"probB")==0) 02837 { 02838 int n = model->nr_class * (model->nr_class-1)/2; 02839 model->probB = Malloc(double,n); 02840 for(int i=0;i<n;i++) 02841 ret = fscanf(fp,"%lf",&model->probB[i]); 02842 } 02843 else if(strcmp(cmd,"nr_sv")==0) 02844 { 02845 int n = model->nr_class; 02846 model->nSV = Malloc(int,n); 02847 for(int i=0;i<n;i++) 02848 ret = fscanf(fp,"%d",&model->nSV[i]); 02849 } 02850 else if(strcmp(cmd,"SV")==0) 02851 { 02852 while(1) 02853 { 02854 int c = getc(fp); 02855 if(c==EOF || c=='\n') break; 02856 } 02857 break; 02858 } 02859 else 02860 { 02861 fprintf(stderr,"unknown text in model file: [%s]\n",cmd); 02862 free(model->rho); 02863 free(model->label); 02864 free(model->nSV); 02865 free(model); 02866 return NULL; 02867 } 02868 } 02869 02870 // read sv_coef and SV 02871 02872 int elements = 0; 02873 long pos = ftell(fp); 02874 02875 while(1) 02876 { 02877 int c = fgetc(fp); 02878 switch(c) 02879 { 02880 case '\n': 02881 // count the '-1' element 02882 case ':': 02883 ++elements; 02884 break; 02885 case EOF: 02886 goto out; 02887 default: 02888 ; 02889 } 02890 } 02891 out: 02892 fseek(fp,pos,SEEK_SET); 02893 02894 int m = model->nr_class - 1; 02895 int l = model->l; 02896 model->sv_coef = Malloc(double *,m); 02897 int i; 02898 for(i=0;i<m;i++) 02899 model->sv_coef[i] = Malloc(double,l); 02900 model->SV = Malloc(svm_node*,l); 02901 svm_node *x_space=NULL; 02902 if(l>0) x_space = Malloc(svm_node,elements); 02903 02904 int j=0; 02905 for(i=0;i<l;i++) 02906 { 02907 model->SV[i] = &x_space[j]; 02908 for(int k=0;k<m;k++) 02909 ret = fscanf(fp,"%lf",&model->sv_coef[k][i]); 02910 while(1) 02911 { 02912 int c; 02913 do { 02914 c = getc(fp); 02915 if(c=='\n') goto out2; 02916 } while(isspace(c)); 02917 ungetc(c,fp); 02918 ret = fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value)); 02919 ++j; 02920 } 02921 out2: 02922 x_space[j++].index = -1; 02923 } 02924 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL; 02925 02926 model->free_sv = 1; // XXX 02927 02928 ignore_retval_gcc46(ret); 02929 02930 return model; 02931 } 02932 02933 void svm_destroy_model(svm_model* model) 02934 { 02935 if(model->free_sv && model->l > 0) 02936 free((void *)(model->SV[0])); 02937 for(int i=0;i<model->nr_class-1;i++) 02938 free(model->sv_coef[i]); 02939 free(model->SV); 02940 free(model->sv_coef); 02941 free(model->rho); 02942 free(model->label); 02943 free(model->probA); 02944 free(model->probB); 02945 free(model->nSV); 02946 free(model); 02947 } 02948 02949 void svm_destroy_param(svm_parameter* param) 02950 { 02951 free(param->weight_label); 02952 free(param->weight); 02953 } 02954 02955 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param) 02956 { 02957 // svm_type 02958 02959 int svm_type = param->svm_type; 02960 if(svm_type != C_SVC && 02961 svm_type != NU_SVC && 02962 svm_type != ONE_CLASS && 02963 svm_type != EPSILON_SVR && 02964 svm_type != NU_SVR) 02965 return "unknown svm type"; 02966 02967 // kernel_type, degree 02968 02969 int kernel_type = param->kernel_type; 02970 if(kernel_type != LINEAR && 02971 kernel_type != POLY && 02972 kernel_type != RBF && 02973 kernel_type != SIGMOID && 02974 kernel_type != PRECOMPUTED) 02975 return "unknown kernel type"; 02976 02977 if(param->degree < 0) 02978 return "degree of polynomial kernel < 0"; 02979 02980 // cache_size,eps,C,nu,p,shrinking 02981 02982 if(param->cache_size <= 0) 02983 return "cache_size <= 0"; 02984 02985 if(param->eps <= 0) 02986 return "eps <= 0"; 02987 02988 if(svm_type == C_SVC || 02989 svm_type == EPSILON_SVR || 02990 svm_type == NU_SVR) 02991 if(param->C <= 0) 02992 return "C <= 0"; 02993 02994 if(svm_type == NU_SVC || 02995 svm_type == ONE_CLASS || 02996 svm_type == NU_SVR) 02997 if(param->nu <= 0 || param->nu > 1) 02998 return "nu <= 0 or nu > 1"; 02999 03000 if(svm_type == EPSILON_SVR) 03001 if(param->p < 0) 03002 return "p < 0"; 03003 03004 if(param->shrinking != 0 && 03005 param->shrinking != 1) 03006 return "shrinking != 0 and shrinking != 1"; 03007 03008 if(param->probability != 0 && 03009 param->probability != 1) 03010 return "probability != 0 and probability != 1"; 03011 03012 if(param->probability == 1 && 03013 svm_type == ONE_CLASS) 03014 return "one-class SVM probability output not supported yet"; 03015 03016 03017 // check whether nu-svc is feasible 03018 03019 if(svm_type == NU_SVC) 03020 { 03021 int l = prob->l; 03022 int max_nr_class = 16; 03023 int nr_class = 0; 03024 int *label = Malloc(int,max_nr_class); 03025 int *count = Malloc(int,max_nr_class); 03026 03027 int i; 03028 for(i=0;i<l;i++) 03029 { 03030 int this_label = (int)prob->y[i]; 03031 int j; 03032 for(j=0;j<nr_class;j++) 03033 if(this_label == label[j]) 03034 { 03035 ++count[j]; 03036 break; 03037 } 03038 if(j == nr_class) 03039 { 03040 if(nr_class == max_nr_class) 03041 { 03042 max_nr_class *= 2; 03043 label = (int *)realloc(label,max_nr_class*sizeof(int)); 03044 count = (int *)realloc(count,max_nr_class*sizeof(int)); 03045 } 03046 label[nr_class] = this_label; 03047 count[nr_class] = 1; 03048 ++nr_class; 03049 } 03050 } 03051 03052 for(i=0;i<nr_class;i++) 03053 { 03054 int n1 = count[i]; 03055 for(int j=i+1;j<nr_class;j++) 03056 { 03057 int n2 = count[j]; 03058 if(param->nu*(n1+n2)/2 > min(n1,n2)) 03059 { 03060 free(label); 03061 free(count); 03062 return "specified nu is infeasible"; 03063 } 03064 } 03065 } 03066 free(label); 03067 free(count); 03068 } 03069 03070 return NULL; 03071 } 03072 03073 int svm_check_probability_model(const svm_model *model) 03074 { 03075 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && 03076 model->probA!=NULL && model->probB!=NULL) || 03077 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && 03078 model->probA!=NULL); 03079 }