00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <math.h>
00035 #include <stdio.h>
00036 #include <stdlib.h>
00037 #include <ctype.h>
00038 #include <float.h>
00039 #include <string.h>
00040 #include <stdarg.h>
00041 #include "svm.h"
00042 typedef float Qfloat;
00043 typedef signed char schar;
00044 #ifndef min
00045 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00046 #endif
00047 #ifndef max
00048 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00049 #endif
00050 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00051 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00052 {
00053 dst = new T[n];
00054 memcpy((void *)dst,(void *)src,sizeof(T)*n);
00055 }
00056 inline double powi(double base, int times)
00057 {
00058 double tmp = base, ret = 1.0;
00059
00060 for(int t=times; t>0; t/=2)
00061 {
00062 if(t%2==1) ret*=tmp;
00063 tmp = tmp * tmp;
00064 }
00065 return ret;
00066 }
00067 #define INF HUGE_VAL
00068 #define TAU 1e-12
00069 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00070 #if 1
00071 void info(const char *fmt,...)
00072 {
00073 va_list ap;
00074 va_start(ap,fmt);
00075 vprintf(fmt,ap);
00076 va_end(ap);
00077 }
00078 void info_flush()
00079 {
00080 fflush(stdout);
00081 }
00082 #else
00083 void info(char *fmt,...) {}
00084 void info_flush() {}
00085 #endif
00086
00087
00088
00089
00090
00091
00092
00093 class Cache
00094 {
00095 public:
00096 Cache(int l,long int size);
00097 ~Cache();
00098
00099
00100
00101
00102 int get_data(const int index, Qfloat **data, int len);
00103 void swap_index(int i, int j);
00104 private:
00105 int l;
00106 long int size;
00107 struct head_t
00108 {
00109 head_t *prev, *next;
00110 Qfloat *data;
00111 int len;
00112 };
00113
00114 head_t *head;
00115 head_t lru_head;
00116 void lru_delete(head_t *h);
00117 void lru_insert(head_t *h);
00118 };
00119
00120 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00121 {
00122 head = (head_t *)calloc(l,sizeof(head_t));
00123 size /= sizeof(Qfloat);
00124 size -= l * sizeof(head_t) / sizeof(Qfloat);
00125 size = max(size, 2 * (long int) l);
00126 lru_head.next = lru_head.prev = &lru_head;
00127 }
00128
00129 Cache::~Cache()
00130 {
00131 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00132 free(h->data);
00133 free(head);
00134 }
00135
00136 void Cache::lru_delete(head_t *h)
00137 {
00138
00139 h->prev->next = h->next;
00140 h->next->prev = h->prev;
00141 }
00142
00143 void Cache::lru_insert(head_t *h)
00144 {
00145
00146 h->next = &lru_head;
00147 h->prev = lru_head.prev;
00148 h->prev->next = h;
00149 h->next->prev = h;
00150 }
00151
00152 int Cache::get_data(const int index, Qfloat **data, int len)
00153 {
00154 head_t *h = &head[index];
00155 if(h->len) lru_delete(h);
00156 int more = len - h->len;
00157
00158 if(more > 0)
00159 {
00160
00161 while(size < more)
00162 {
00163 head_t *old = lru_head.next;
00164 lru_delete(old);
00165 free(old->data);
00166 size += old->len;
00167 old->data = 0;
00168 old->len = 0;
00169 }
00170
00171
00172 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00173 size -= more;
00174 swap(h->len,len);
00175 }
00176
00177 lru_insert(h);
00178 *data = h->data;
00179 return len;
00180 }
00181
00182 void Cache::swap_index(int i, int j)
00183 {
00184 if(i==j) return;
00185
00186 if(head[i].len) lru_delete(&head[i]);
00187 if(head[j].len) lru_delete(&head[j]);
00188 swap(head[i].data,head[j].data);
00189 swap(head[i].len,head[j].len);
00190 if(head[i].len) lru_insert(&head[i]);
00191 if(head[j].len) lru_insert(&head[j]);
00192
00193 if(i>j) swap(i,j);
00194 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00195 {
00196 if(h->len > i)
00197 {
00198 if(h->len > j)
00199 swap(h->data[i],h->data[j]);
00200 else
00201 {
00202
00203 lru_delete(h);
00204 free(h->data);
00205 size += h->len;
00206 h->data = 0;
00207 h->len = 0;
00208 }
00209 }
00210 }
00211 }
00212
00213
00214
00215
00216
00217
00218
00219
00220 class QMatrix {
00221 public:
00222 virtual Qfloat *get_Q(int column, int len) const = 0;
00223 virtual Qfloat *get_QD() const = 0;
00224 virtual void swap_index(int i, int j) const = 0;
00225 virtual ~QMatrix() {}
00226 };
00227
00228 class Kernel: public QMatrix {
00229 public:
00230 Kernel(int l, svm_node * const * x, const svm_parameter& param);
00231 virtual ~Kernel();
00232
00233 static double k_function(const svm_node *x, const svm_node *y,
00234 const svm_parameter& param);
00235 virtual Qfloat *get_Q(int column, int len) const = 0;
00236 virtual Qfloat *get_QD() const = 0;
00237 virtual void swap_index(int i, int j) const
00238 {
00239 swap(x[i],x[j]);
00240 if(x_square) swap(x_square[i],x_square[j]);
00241 }
00242 protected:
00243
00244 double (Kernel::*kernel_function)(int i, int j) const;
00245
00246 private:
00247 const svm_node **x;
00248 double *x_square;
00249
00250
00251 const int kernel_type;
00252 const int degree;
00253 const double gamma;
00254 const double coef0;
00255
00256 static double dot(const svm_node *px, const svm_node *py);
00257 double kernel_linear(int i, int j) const
00258 {
00259 return dot(x[i],x[j]);
00260 }
00261 double kernel_poly(int i, int j) const
00262 {
00263 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00264 }
00265 double kernel_rbf(int i, int j) const
00266 {
00267 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00268 }
00269 double kernel_sigmoid(int i, int j) const
00270 {
00271 return tanh(gamma*dot(x[i],x[j])+coef0);
00272 }
00273 double kernel_precomputed(int i, int j) const
00274 {
00275 return x[i][(int)(x[j][0].value)].value;
00276 }
00277 };
00278
00279 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00280 :kernel_type(param.kernel_type), degree(param.degree),
00281 gamma(param.gamma), coef0(param.coef0)
00282 {
00283 switch(kernel_type)
00284 {
00285 case LINEAR:
00286 kernel_function = &Kernel::kernel_linear;
00287 break;
00288 case POLY:
00289 kernel_function = &Kernel::kernel_poly;
00290 break;
00291 case RBF:
00292 kernel_function = &Kernel::kernel_rbf;
00293 break;
00294 case SIGMOID:
00295 kernel_function = &Kernel::kernel_sigmoid;
00296 break;
00297 case PRECOMPUTED:
00298 kernel_function = &Kernel::kernel_precomputed;
00299 break;
00300 }
00301
00302 clone(x,x_,l);
00303
00304 if(kernel_type == RBF)
00305 {
00306 x_square = new double[l];
00307 for(int i=0;i<l;i++)
00308 x_square[i] = dot(x[i],x[i]);
00309 }
00310 else
00311 x_square = 0;
00312 }
00313
00314 Kernel::~Kernel()
00315 {
00316 delete[] x;
00317 delete[] x_square;
00318 }
00319
00320 double Kernel::dot(const svm_node *px, const svm_node *py)
00321 {
00322 double sum = 0;
00323 while(px->index != -1 && py->index != -1)
00324 {
00325 if(px->index == py->index)
00326 {
00327 sum += px->value * py->value;
00328 ++px;
00329 ++py;
00330 }
00331 else
00332 {
00333 if(px->index > py->index)
00334 ++py;
00335 else
00336 ++px;
00337 }
00338 }
00339 return sum;
00340 }
00341
00342 double Kernel::k_function(const svm_node *x, const svm_node *y,
00343 const svm_parameter& param)
00344 {
00345 switch(param.kernel_type)
00346 {
00347 case LINEAR:
00348 return dot(x,y);
00349 case POLY:
00350 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00351 case RBF:
00352 {
00353 double sum = 0;
00354 while(x->index != -1 && y->index !=-1)
00355 {
00356 if(x->index == y->index)
00357 {
00358 double d = x->value - y->value;
00359 sum += d*d;
00360 ++x;
00361 ++y;
00362 }
00363 else
00364 {
00365 if(x->index > y->index)
00366 {
00367 sum += y->value * y->value;
00368 ++y;
00369 }
00370 else
00371 {
00372 sum += x->value * x->value;
00373 ++x;
00374 }
00375 }
00376 }
00377
00378 while(x->index != -1)
00379 {
00380 sum += x->value * x->value;
00381 ++x;
00382 }
00383
00384 while(y->index != -1)
00385 {
00386 sum += y->value * y->value;
00387 ++y;
00388 }
00389
00390 return exp(-param.gamma*sum);
00391 }
00392 case SIGMOID:
00393 return tanh(param.gamma*dot(x,y)+param.coef0);
00394 case PRECOMPUTED:
00395 return x[(int)(y->value)].value;
00396 default:
00397 return 0;
00398 }
00399 }
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419 class Solver {
00420 public:
00421 Solver() {};
00422 virtual ~Solver() {};
00423
00424 struct SolutionInfo {
00425 double obj;
00426 double rho;
00427 double upper_bound_p;
00428 double upper_bound_n;
00429 double r;
00430 };
00431
00432 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00433 double *alpha_, double Cp, double Cn, double eps,
00434 SolutionInfo* si, int shrinking);
00435 protected:
00436 int active_size;
00437 schar *y;
00438 double *G;
00439 enum { LOWER_BOUND, UPPER_BOUND, FREE };
00440 char *alpha_status;
00441 double *alpha;
00442 const QMatrix *Q;
00443 const Qfloat *QD;
00444 double eps;
00445 double Cp,Cn;
00446 double *p;
00447 int *active_set;
00448 double *G_bar;
00449 int l;
00450 bool unshrinked;
00451
00452 double get_C(int i)
00453 {
00454 return (y[i] > 0)? Cp : Cn;
00455 }
00456 void update_alpha_status(int i)
00457 {
00458 if(alpha[i] >= get_C(i))
00459 alpha_status[i] = UPPER_BOUND;
00460 else if(alpha[i] <= 0)
00461 alpha_status[i] = LOWER_BOUND;
00462 else alpha_status[i] = FREE;
00463 }
00464 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00465 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00466 bool is_free(int i) { return alpha_status[i] == FREE; }
00467 void swap_index(int i, int j);
00468 void reconstruct_gradient();
00469 virtual int select_working_set(int &i, int &j);
00470 virtual double calculate_rho();
00471 virtual void do_shrinking();
00472 private:
00473 bool be_shrunken(int i, double Gmax1, double Gmax2);
00474 };
00475
00476 void Solver::swap_index(int i, int j)
00477 {
00478 Q->swap_index(i,j);
00479 swap(y[i],y[j]);
00480 swap(G[i],G[j]);
00481 swap(alpha_status[i],alpha_status[j]);
00482 swap(alpha[i],alpha[j]);
00483 swap(p[i],p[j]);
00484 swap(active_set[i],active_set[j]);
00485 swap(G_bar[i],G_bar[j]);
00486 }
00487
00488 void Solver::reconstruct_gradient()
00489 {
00490
00491
00492 if(active_size == l) return;
00493
00494 int i;
00495 for(i=active_size;i<l;i++)
00496 G[i] = G_bar[i] + p[i];
00497
00498 for(i=0;i<active_size;i++)
00499 if(is_free(i))
00500 {
00501 const Qfloat *Q_i = Q->get_Q(i,l);
00502 double alpha_i = alpha[i];
00503 for(int j=active_size;j<l;j++)
00504 G[j] += alpha_i * Q_i[j];
00505 }
00506 }
00507
00508 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00509 double *alpha_, double Cp, double Cn, double eps,
00510 SolutionInfo* si, int shrinking)
00511 {
00512 this->l = l;
00513 this->Q = &Q;
00514 QD=Q.get_QD();
00515 clone(p, p_,l);
00516 clone(y, y_,l);
00517 clone(alpha,alpha_,l);
00518 this->Cp = Cp;
00519 this->Cn = Cn;
00520 this->eps = eps;
00521 unshrinked = false;
00522
00523
00524 {
00525 alpha_status = new char[l];
00526 for(int i=0;i<l;i++)
00527 update_alpha_status(i);
00528 }
00529
00530
00531 {
00532 active_set = new int[l];
00533 for(int i=0;i<l;i++)
00534 active_set[i] = i;
00535 active_size = l;
00536 }
00537
00538
00539 {
00540 G = new double[l];
00541 G_bar = new double[l];
00542 int i;
00543 for(i=0;i<l;i++)
00544 {
00545 G[i] = p[i];
00546 G_bar[i] = 0;
00547 }
00548 for(i=0;i<l;i++)
00549 if(!is_lower_bound(i))
00550 {
00551 const Qfloat *Q_i = Q.get_Q(i,l);
00552 double alpha_i = alpha[i];
00553 int j;
00554 for(j=0;j<l;j++)
00555 G[j] += alpha_i*Q_i[j];
00556 if(is_upper_bound(i))
00557 for(j=0;j<l;j++)
00558 G_bar[j] += get_C(i) * Q_i[j];
00559 }
00560 }
00561
00562
00563
00564 int iter = 0;
00565 int counter = min(l,1000)+1;
00566
00567 while(1)
00568 {
00569
00570
00571 if(--counter == 0)
00572 {
00573 counter = min(l,1000);
00574 if(shrinking) do_shrinking();
00575 info("."); info_flush();
00576 }
00577
00578 int i,j;
00579 if(select_working_set(i,j)!=0)
00580 {
00581
00582 reconstruct_gradient();
00583
00584 active_size = l;
00585 info("*"); info_flush();
00586 if(select_working_set(i,j)!=0)
00587 break;
00588 else
00589 counter = 1;
00590 }
00591
00592 ++iter;
00593
00594
00595
00596 const Qfloat *Q_i = Q.get_Q(i,active_size);
00597 const Qfloat *Q_j = Q.get_Q(j,active_size);
00598
00599 double C_i = get_C(i);
00600 double C_j = get_C(j);
00601
00602 double old_alpha_i = alpha[i];
00603 double old_alpha_j = alpha[j];
00604
00605 if(y[i]!=y[j])
00606 {
00607 double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00608 if (quad_coef <= 0)
00609 quad_coef = TAU;
00610 double delta = (-G[i]-G[j])/quad_coef;
00611 double diff = alpha[i] - alpha[j];
00612 alpha[i] += delta;
00613 alpha[j] += delta;
00614
00615 if(diff > 0)
00616 {
00617 if(alpha[j] < 0)
00618 {
00619 alpha[j] = 0;
00620 alpha[i] = diff;
00621 }
00622 }
00623 else
00624 {
00625 if(alpha[i] < 0)
00626 {
00627 alpha[i] = 0;
00628 alpha[j] = -diff;
00629 }
00630 }
00631 if(diff > C_i - C_j)
00632 {
00633 if(alpha[i] > C_i)
00634 {
00635 alpha[i] = C_i;
00636 alpha[j] = C_i - diff;
00637 }
00638 }
00639 else
00640 {
00641 if(alpha[j] > C_j)
00642 {
00643 alpha[j] = C_j;
00644 alpha[i] = C_j + diff;
00645 }
00646 }
00647 }
00648 else
00649 {
00650 double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00651 if (quad_coef <= 0)
00652 quad_coef = TAU;
00653 double delta = (G[i]-G[j])/quad_coef;
00654 double sum = alpha[i] + alpha[j];
00655 alpha[i] -= delta;
00656 alpha[j] += delta;
00657
00658 if(sum > C_i)
00659 {
00660 if(alpha[i] > C_i)
00661 {
00662 alpha[i] = C_i;
00663 alpha[j] = sum - C_i;
00664 }
00665 }
00666 else
00667 {
00668 if(alpha[j] < 0)
00669 {
00670 alpha[j] = 0;
00671 alpha[i] = sum;
00672 }
00673 }
00674 if(sum > C_j)
00675 {
00676 if(alpha[j] > C_j)
00677 {
00678 alpha[j] = C_j;
00679 alpha[i] = sum - C_j;
00680 }
00681 }
00682 else
00683 {
00684 if(alpha[i] < 0)
00685 {
00686 alpha[i] = 0;
00687 alpha[j] = sum;
00688 }
00689 }
00690 }
00691
00692
00693
00694 double delta_alpha_i = alpha[i] - old_alpha_i;
00695 double delta_alpha_j = alpha[j] - old_alpha_j;
00696
00697 for(int k=0;k<active_size;k++)
00698 {
00699 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00700 }
00701
00702
00703
00704 {
00705 bool ui = is_upper_bound(i);
00706 bool uj = is_upper_bound(j);
00707 update_alpha_status(i);
00708 update_alpha_status(j);
00709 int k;
00710 if(ui != is_upper_bound(i))
00711 {
00712 Q_i = Q.get_Q(i,l);
00713 if(ui)
00714 for(k=0;k<l;k++)
00715 G_bar[k] -= C_i * Q_i[k];
00716 else
00717 for(k=0;k<l;k++)
00718 G_bar[k] += C_i * Q_i[k];
00719 }
00720
00721 if(uj != is_upper_bound(j))
00722 {
00723 Q_j = Q.get_Q(j,l);
00724 if(uj)
00725 for(k=0;k<l;k++)
00726 G_bar[k] -= C_j * Q_j[k];
00727 else
00728 for(k=0;k<l;k++)
00729 G_bar[k] += C_j * Q_j[k];
00730 }
00731 }
00732 }
00733
00734
00735
00736 si->rho = calculate_rho();
00737
00738
00739 {
00740 double v = 0;
00741 int i;
00742 for(i=0;i<l;i++)
00743 v += alpha[i] * (G[i] + p[i]);
00744
00745 si->obj = v/2;
00746 }
00747
00748
00749 {
00750 for(int i=0;i<l;i++)
00751 alpha_[active_set[i]] = alpha[i];
00752 }
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762 si->upper_bound_p = Cp;
00763 si->upper_bound_n = Cn;
00764
00765 info("\noptimization finished, #iter = %d\n",iter);
00766
00767 delete[] p;
00768 delete[] y;
00769 delete[] alpha;
00770 delete[] alpha_status;
00771 delete[] active_set;
00772 delete[] G;
00773 delete[] G_bar;
00774 }
00775
00776
00777 int Solver::select_working_set(int &out_i, int &out_j)
00778 {
00779
00780
00781
00782
00783
00784
00785 double Gmax = -INF;
00786 double Gmax2 = -INF;
00787 int Gmax_idx = -1;
00788 int Gmin_idx = -1;
00789 double obj_diff_min = INF;
00790
00791 for(int t=0;t<active_size;t++)
00792 if(y[t]==+1)
00793 {
00794 if(!is_upper_bound(t))
00795 if(-G[t] >= Gmax)
00796 {
00797 Gmax = -G[t];
00798 Gmax_idx = t;
00799 }
00800 }
00801 else
00802 {
00803 if(!is_lower_bound(t))
00804 if(G[t] >= Gmax)
00805 {
00806 Gmax = G[t];
00807 Gmax_idx = t;
00808 }
00809 }
00810
00811 int i = Gmax_idx;
00812 const Qfloat *Q_i = NULL;
00813 if(i != -1)
00814 Q_i = Q->get_Q(i,active_size);
00815
00816 for(int j=0;j<active_size;j++)
00817 {
00818 if(y[j]==+1)
00819 {
00820 if (!is_lower_bound(j))
00821 {
00822 double grad_diff=Gmax+G[j];
00823 if (G[j] >= Gmax2)
00824 Gmax2 = G[j];
00825 if (grad_diff > 0)
00826 {
00827 double obj_diff;
00828 double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
00829 if (quad_coef > 0)
00830 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00831 else
00832 obj_diff = -(grad_diff*grad_diff)/TAU;
00833
00834 if (obj_diff <= obj_diff_min)
00835 {
00836 Gmin_idx=j;
00837 obj_diff_min = obj_diff;
00838 }
00839 }
00840 }
00841 }
00842 else
00843 {
00844 if (!is_upper_bound(j))
00845 {
00846 double grad_diff= Gmax-G[j];
00847 if (-G[j] >= Gmax2)
00848 Gmax2 = -G[j];
00849 if (grad_diff > 0)
00850 {
00851 double obj_diff;
00852 double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
00853 if (quad_coef > 0)
00854 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00855 else
00856 obj_diff = -(grad_diff*grad_diff)/TAU;
00857
00858 if (obj_diff <= obj_diff_min)
00859 {
00860 Gmin_idx=j;
00861 obj_diff_min = obj_diff;
00862 }
00863 }
00864 }
00865 }
00866 }
00867
00868 if(Gmax+Gmax2 < eps)
00869 return 1;
00870
00871 out_i = Gmax_idx;
00872 out_j = Gmin_idx;
00873 return 0;
00874 }
00875
00876 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2)
00877 {
00878 if(is_upper_bound(i))
00879 {
00880 if(y[i]==+1)
00881 return(-G[i] > Gmax1);
00882 else
00883 return(-G[i] > Gmax2);
00884 }
00885 else if(is_lower_bound(i))
00886 {
00887 if(y[i]==+1)
00888 return(G[i] > Gmax2);
00889 else
00890 return(G[i] > Gmax1);
00891 }
00892 else
00893 return(false);
00894 }
00895
00896 void Solver::do_shrinking()
00897 {
00898 int i;
00899 double Gmax1 = -INF;
00900 double Gmax2 = -INF;
00901
00902
00903 for(i=0;i<active_size;i++)
00904 {
00905 if(y[i]==+1)
00906 {
00907 if(!is_upper_bound(i))
00908 {
00909 if(-G[i] >= Gmax1)
00910 Gmax1 = -G[i];
00911 }
00912 if(!is_lower_bound(i))
00913 {
00914 if(G[i] >= Gmax2)
00915 Gmax2 = G[i];
00916 }
00917 }
00918 else
00919 {
00920 if(!is_upper_bound(i))
00921 {
00922 if(-G[i] >= Gmax2)
00923 Gmax2 = -G[i];
00924 }
00925 if(!is_lower_bound(i))
00926 {
00927 if(G[i] >= Gmax1)
00928 Gmax1 = G[i];
00929 }
00930 }
00931 }
00932
00933
00934
00935 for(i=0;i<active_size;i++)
00936 if (be_shrunken(i, Gmax1, Gmax2))
00937 {
00938 active_size--;
00939 while (active_size > i)
00940 {
00941 if (!be_shrunken(active_size, Gmax1, Gmax2))
00942 {
00943 swap_index(i,active_size);
00944 break;
00945 }
00946 active_size--;
00947 }
00948 }
00949
00950
00951
00952 if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
00953
00954 unshrinked = true;
00955 reconstruct_gradient();
00956
00957 for(i=l-1;i>=active_size;i--)
00958 if (!be_shrunken(i, Gmax1, Gmax2))
00959 {
00960 while (active_size < i)
00961 {
00962 if (be_shrunken(active_size, Gmax1, Gmax2))
00963 {
00964 swap_index(i,active_size);
00965 break;
00966 }
00967 active_size++;
00968 }
00969 active_size++;
00970 }
00971 }
00972
00973 double Solver::calculate_rho()
00974 {
00975 double r;
00976 int nr_free = 0;
00977 double ub = INF, lb = -INF, sum_free = 0;
00978 for(int i=0;i<active_size;i++)
00979 {
00980 double yG = y[i]*G[i];
00981
00982 if(is_upper_bound(i))
00983 {
00984 if(y[i]==-1)
00985 ub = min(ub,yG);
00986 else
00987 lb = max(lb,yG);
00988 }
00989 else if(is_lower_bound(i))
00990 {
00991 if(y[i]==+1)
00992 ub = min(ub,yG);
00993 else
00994 lb = max(lb,yG);
00995 }
00996 else
00997 {
00998 ++nr_free;
00999 sum_free += yG;
01000 }
01001 }
01002
01003 if(nr_free>0)
01004 r = sum_free/nr_free;
01005 else
01006 r = (ub+lb)/2;
01007
01008 return r;
01009 }
01010
01011
01012
01013
01014
01015
01016 class Solver_NU : public Solver
01017 {
01018 public:
01019 Solver_NU() {}
01020 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01021 double *alpha, double Cp, double Cn, double eps,
01022 SolutionInfo* si, int shrinking)
01023 {
01024 this->si = si;
01025 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01026 }
01027 private:
01028 SolutionInfo *si;
01029 int select_working_set(int &i, int &j);
01030 double calculate_rho();
01031 bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01032 void do_shrinking();
01033 };
01034
01035
01036 int Solver_NU::select_working_set(int &out_i, int &out_j)
01037 {
01038
01039
01040
01041
01042
01043
01044 double Gmaxp = -INF;
01045 double Gmaxp2 = -INF;
01046 int Gmaxp_idx = -1;
01047
01048 double Gmaxn = -INF;
01049 double Gmaxn2 = -INF;
01050 int Gmaxn_idx = -1;
01051
01052 int Gmin_idx = -1;
01053 double obj_diff_min = INF;
01054
01055 for(int t=0;t<active_size;t++)
01056 if(y[t]==+1)
01057 {
01058 if(!is_upper_bound(t))
01059 if(-G[t] >= Gmaxp)
01060 {
01061 Gmaxp = -G[t];
01062 Gmaxp_idx = t;
01063 }
01064 }
01065 else
01066 {
01067 if(!is_lower_bound(t))
01068 if(G[t] >= Gmaxn)
01069 {
01070 Gmaxn = G[t];
01071 Gmaxn_idx = t;
01072 }
01073 }
01074
01075 int ip = Gmaxp_idx;
01076 int in = Gmaxn_idx;
01077 const Qfloat *Q_ip = NULL;
01078 const Qfloat *Q_in = NULL;
01079 if(ip != -1)
01080 Q_ip = Q->get_Q(ip,active_size);
01081 if(in != -1)
01082 Q_in = Q->get_Q(in,active_size);
01083
01084 for(int j=0;j<active_size;j++)
01085 {
01086 if(y[j]==+1)
01087 {
01088 if (!is_lower_bound(j))
01089 {
01090 double grad_diff=Gmaxp+G[j];
01091 if (G[j] >= Gmaxp2)
01092 Gmaxp2 = G[j];
01093 if (grad_diff > 0)
01094 {
01095 double obj_diff;
01096 double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01097 if (quad_coef > 0)
01098 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01099 else
01100 obj_diff = -(grad_diff*grad_diff)/TAU;
01101
01102 if (obj_diff <= obj_diff_min)
01103 {
01104 Gmin_idx=j;
01105 obj_diff_min = obj_diff;
01106 }
01107 }
01108 }
01109 }
01110 else
01111 {
01112 if (!is_upper_bound(j))
01113 {
01114 double grad_diff=Gmaxn-G[j];
01115 if (-G[j] >= Gmaxn2)
01116 Gmaxn2 = -G[j];
01117 if (grad_diff > 0)
01118 {
01119 double obj_diff;
01120 double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01121 if (quad_coef > 0)
01122 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01123 else
01124 obj_diff = -(grad_diff*grad_diff)/TAU;
01125
01126 if (obj_diff <= obj_diff_min)
01127 {
01128 Gmin_idx=j;
01129 obj_diff_min = obj_diff;
01130 }
01131 }
01132 }
01133 }
01134 }
01135
01136 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01137 return 1;
01138
01139 if (y[Gmin_idx] == +1)
01140 out_i = Gmaxp_idx;
01141 else
01142 out_i = Gmaxn_idx;
01143 out_j = Gmin_idx;
01144
01145 return 0;
01146 }
01147
01148 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01149 {
01150 if(is_upper_bound(i))
01151 {
01152 if(y[i]==+1)
01153 return(-G[i] > Gmax1);
01154 else
01155 return(-G[i] > Gmax4);
01156 }
01157 else if(is_lower_bound(i))
01158 {
01159 if(y[i]==+1)
01160 return(G[i] > Gmax2);
01161 else
01162 return(G[i] > Gmax3);
01163 }
01164 else
01165 return(false);
01166 }
01167
01168 void Solver_NU::do_shrinking()
01169 {
01170 double Gmax1 = -INF;
01171 double Gmax2 = -INF;
01172 double Gmax3 = -INF;
01173 double Gmax4 = -INF;
01174
01175
01176 int i;
01177 for(i=0;i<active_size;i++)
01178 {
01179 if(!is_upper_bound(i))
01180 {
01181 if(y[i]==+1)
01182 {
01183 if(-G[i] > Gmax1) Gmax1 = -G[i];
01184 }
01185 else if(-G[i] > Gmax4) Gmax4 = -G[i];
01186 }
01187 if(!is_lower_bound(i))
01188 {
01189 if(y[i]==+1)
01190 {
01191 if(G[i] > Gmax2) Gmax2 = G[i];
01192 }
01193 else if(G[i] > Gmax3) Gmax3 = G[i];
01194 }
01195 }
01196
01197
01198
01199 for(i=0;i<active_size;i++)
01200 if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01201 {
01202 active_size--;
01203 while (active_size > i)
01204 {
01205 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01206 {
01207 swap_index(i,active_size);
01208 break;
01209 }
01210 active_size--;
01211 }
01212 }
01213
01214
01215
01216 if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
01217
01218 unshrinked = true;
01219 reconstruct_gradient();
01220
01221 for(i=l-1;i>=active_size;i--)
01222 if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01223 {
01224 while (active_size < i)
01225 {
01226 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01227 {
01228 swap_index(i,active_size);
01229 break;
01230 }
01231 active_size++;
01232 }
01233 active_size++;
01234 }
01235 }
01236
01237 double Solver_NU::calculate_rho()
01238 {
01239 int nr_free1 = 0,nr_free2 = 0;
01240 double ub1 = INF, ub2 = INF;
01241 double lb1 = -INF, lb2 = -INF;
01242 double sum_free1 = 0, sum_free2 = 0;
01243
01244 for(int i=0;i<active_size;i++)
01245 {
01246 if(y[i]==+1)
01247 {
01248 if(is_upper_bound(i))
01249 lb1 = max(lb1,G[i]);
01250 else if(is_lower_bound(i))
01251 ub1 = min(ub1,G[i]);
01252 else
01253 {
01254 ++nr_free1;
01255 sum_free1 += G[i];
01256 }
01257 }
01258 else
01259 {
01260 if(is_upper_bound(i))
01261 lb2 = max(lb2,G[i]);
01262 else if(is_lower_bound(i))
01263 ub2 = min(ub2,G[i]);
01264 else
01265 {
01266 ++nr_free2;
01267 sum_free2 += G[i];
01268 }
01269 }
01270 }
01271
01272 double r1,r2;
01273 if(nr_free1 > 0)
01274 r1 = sum_free1/nr_free1;
01275 else
01276 r1 = (ub1+lb1)/2;
01277
01278 if(nr_free2 > 0)
01279 r2 = sum_free2/nr_free2;
01280 else
01281 r2 = (ub2+lb2)/2;
01282
01283 si->r = (r1+r2)/2;
01284 return (r1-r2)/2;
01285 }
01286
01287
01288
01289
01290 class SVC_Q: public Kernel
01291 {
01292 public:
01293 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01294 :Kernel(prob.l, prob.x, param)
01295 {
01296 clone(y,y_,prob.l);
01297 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01298 QD = new Qfloat[prob.l];
01299 for(int i=0;i<prob.l;i++)
01300 QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01301 }
01302
01303 Qfloat *get_Q(int i, int len) const
01304 {
01305 Qfloat *data;
01306 int start;
01307 if((start = cache->get_data(i,&data,len)) < len)
01308 {
01309 for(int j=start;j<len;j++)
01310 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01311 }
01312 return data;
01313 }
01314
01315 Qfloat *get_QD() const
01316 {
01317 return QD;
01318 }
01319
01320 void swap_index(int i, int j) const
01321 {
01322 cache->swap_index(i,j);
01323 Kernel::swap_index(i,j);
01324 swap(y[i],y[j]);
01325 swap(QD[i],QD[j]);
01326 }
01327
01328 ~SVC_Q()
01329 {
01330 delete[] y;
01331 delete cache;
01332 delete[] QD;
01333 }
01334 private:
01335 schar *y;
01336 Cache *cache;
01337 Qfloat *QD;
01338 };
01339
01340 class ONE_CLASS_Q: public Kernel
01341 {
01342 public:
01343 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01344 :Kernel(prob.l, prob.x, param)
01345 {
01346 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01347 QD = new Qfloat[prob.l];
01348 for(int i=0;i<prob.l;i++)
01349 QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01350 }
01351
01352 Qfloat *get_Q(int i, int len) const
01353 {
01354 Qfloat *data;
01355 int start;
01356 if((start = cache->get_data(i,&data,len)) < len)
01357 {
01358 for(int j=start;j<len;j++)
01359 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01360 }
01361 return data;
01362 }
01363
01364 Qfloat *get_QD() const
01365 {
01366 return QD;
01367 }
01368
01369 void swap_index(int i, int j) const
01370 {
01371 cache->swap_index(i,j);
01372 Kernel::swap_index(i,j);
01373 swap(QD[i],QD[j]);
01374 }
01375
01376 ~ONE_CLASS_Q()
01377 {
01378 delete cache;
01379 delete[] QD;
01380 }
01381 private:
01382 Cache *cache;
01383 Qfloat *QD;
01384 };
01385
01386 class SVR_Q: public Kernel
01387 {
01388 public:
01389 SVR_Q(const svm_problem& prob, const svm_parameter& param)
01390 :Kernel(prob.l, prob.x, param)
01391 {
01392 l = prob.l;
01393 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01394 QD = new Qfloat[2*l];
01395 sign = new schar[2*l];
01396 index = new int[2*l];
01397 for(int k=0;k<l;k++)
01398 {
01399 sign[k] = 1;
01400 sign[k+l] = -1;
01401 index[k] = k;
01402 index[k+l] = k;
01403 QD[k]= (Qfloat)(this->*kernel_function)(k,k);
01404 QD[k+l]=QD[k];
01405 }
01406 buffer[0] = new Qfloat[2*l];
01407 buffer[1] = new Qfloat[2*l];
01408 next_buffer = 0;
01409 }
01410
01411 void swap_index(int i, int j) const
01412 {
01413 swap(sign[i],sign[j]);
01414 swap(index[i],index[j]);
01415 swap(QD[i],QD[j]);
01416 }
01417
01418 Qfloat *get_Q(int i, int len) const
01419 {
01420 Qfloat *data;
01421 int real_i = index[i];
01422 if(cache->get_data(real_i,&data,l) < l)
01423 {
01424 for(int j=0;j<l;j++)
01425 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01426 }
01427
01428
01429 Qfloat *buf = buffer[next_buffer];
01430 next_buffer = 1 - next_buffer;
01431 schar si = sign[i];
01432 for(int j=0;j<len;j++)
01433 buf[j] = si * sign[j] * data[index[j]];
01434 return buf;
01435 }
01436
01437 Qfloat *get_QD() const
01438 {
01439 return QD;
01440 }
01441
01442 ~SVR_Q()
01443 {
01444 delete cache;
01445 delete[] sign;
01446 delete[] index;
01447 delete[] buffer[0];
01448 delete[] buffer[1];
01449 delete[] QD;
01450 }
01451 private:
01452 int l;
01453 Cache *cache;
01454 schar *sign;
01455 int *index;
01456 mutable int next_buffer;
01457 Qfloat *buffer[2];
01458 Qfloat *QD;
01459 };
01460
01461
01462
01463
01464 static void solve_c_svc(
01465 const svm_problem *prob, const svm_parameter* param,
01466 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01467 {
01468 int l = prob->l;
01469 double *minus_ones = new double[l];
01470 schar *y = new schar[l];
01471
01472 int i;
01473
01474 for(i=0;i<l;i++)
01475 {
01476 alpha[i] = 0;
01477 minus_ones[i] = -1;
01478 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01479 }
01480
01481 Solver s;
01482 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01483 alpha, Cp, Cn, param->eps, si, param->shrinking);
01484
01485 double sum_alpha=0;
01486 for(i=0;i<l;i++)
01487 sum_alpha += alpha[i];
01488
01489 if (Cp==Cn)
01490 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01491
01492 for(i=0;i<l;i++)
01493 alpha[i] *= y[i];
01494
01495 delete[] minus_ones;
01496 delete[] y;
01497 }
01498
01499 static void solve_nu_svc(
01500 const svm_problem *prob, const svm_parameter *param,
01501 double *alpha, Solver::SolutionInfo* si)
01502 {
01503 int i;
01504 int l = prob->l;
01505 double nu = param->nu;
01506
01507 schar *y = new schar[l];
01508
01509 for(i=0;i<l;i++)
01510 if(prob->y[i]>0)
01511 y[i] = +1;
01512 else
01513 y[i] = -1;
01514
01515 double sum_pos = nu*l/2;
01516 double sum_neg = nu*l/2;
01517
01518 for(i=0;i<l;i++)
01519 if(y[i] == +1)
01520 {
01521 alpha[i] = min(1.0,sum_pos);
01522 sum_pos -= alpha[i];
01523 }
01524 else
01525 {
01526 alpha[i] = min(1.0,sum_neg);
01527 sum_neg -= alpha[i];
01528 }
01529
01530 double *zeros = new double[l];
01531
01532 for(i=0;i<l;i++)
01533 zeros[i] = 0;
01534
01535 Solver_NU s;
01536 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01537 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01538 double r = si->r;
01539
01540 info("C = %f\n",1/r);
01541
01542 for(i=0;i<l;i++)
01543 alpha[i] *= y[i]/r;
01544
01545 si->rho /= r;
01546 si->obj /= (r*r);
01547 si->upper_bound_p = 1/r;
01548 si->upper_bound_n = 1/r;
01549
01550 delete[] y;
01551 delete[] zeros;
01552 }
01553
01554 static void solve_one_class(
01555 const svm_problem *prob, const svm_parameter *param,
01556 double *alpha, Solver::SolutionInfo* si)
01557 {
01558 int l = prob->l;
01559 double *zeros = new double[l];
01560 schar *ones = new schar[l];
01561 int i;
01562
01563 int n = (int)(param->nu*prob->l);
01564
01565 for(i=0;i<n;i++)
01566 alpha[i] = 1;
01567 if(n<prob->l)
01568 alpha[n] = param->nu * prob->l - n;
01569 for(i=n+1;i<l;i++)
01570 alpha[i] = 0;
01571
01572 for(i=0;i<l;i++)
01573 {
01574 zeros[i] = 0;
01575 ones[i] = 1;
01576 }
01577
01578 Solver s;
01579 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01580 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01581
01582 delete[] zeros;
01583 delete[] ones;
01584 }
01585
01586 static void solve_epsilon_svr(
01587 const svm_problem *prob, const svm_parameter *param,
01588 double *alpha, Solver::SolutionInfo* si)
01589 {
01590 int l = prob->l;
01591 double *alpha2 = new double[2*l];
01592 double *linear_term = new double[2*l];
01593 schar *y = new schar[2*l];
01594 int i;
01595
01596 for(i=0;i<l;i++)
01597 {
01598 alpha2[i] = 0;
01599 linear_term[i] = param->p - prob->y[i];
01600 y[i] = 1;
01601
01602 alpha2[i+l] = 0;
01603 linear_term[i+l] = param->p + prob->y[i];
01604 y[i+l] = -1;
01605 }
01606
01607 Solver s;
01608 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01609 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01610
01611 double sum_alpha = 0;
01612 for(i=0;i<l;i++)
01613 {
01614 alpha[i] = alpha2[i] - alpha2[i+l];
01615 sum_alpha += fabs(alpha[i]);
01616 }
01617 info("nu = %f\n",sum_alpha/(param->C*l));
01618
01619 delete[] alpha2;
01620 delete[] linear_term;
01621 delete[] y;
01622 }
01623
01624 static void solve_nu_svr(
01625 const svm_problem *prob, const svm_parameter *param,
01626 double *alpha, Solver::SolutionInfo* si)
01627 {
01628 int l = prob->l;
01629 double C = param->C;
01630 double *alpha2 = new double[2*l];
01631 double *linear_term = new double[2*l];
01632 schar *y = new schar[2*l];
01633 int i;
01634
01635 double sum = C * param->nu * l / 2;
01636 for(i=0;i<l;i++)
01637 {
01638 alpha2[i] = alpha2[i+l] = min(sum,C);
01639 sum -= alpha2[i];
01640
01641 linear_term[i] = - prob->y[i];
01642 y[i] = 1;
01643
01644 linear_term[i+l] = prob->y[i];
01645 y[i+l] = -1;
01646 }
01647
01648 Solver_NU s;
01649 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01650 alpha2, C, C, param->eps, si, param->shrinking);
01651
01652 info("epsilon = %f\n",-si->r);
01653
01654 for(i=0;i<l;i++)
01655 alpha[i] = alpha2[i] - alpha2[i+l];
01656
01657 delete[] alpha2;
01658 delete[] linear_term;
01659 delete[] y;
01660 }
01661
01662
01663
01664
01665 struct decision_function
01666 {
01667 double *alpha;
01668 double rho;
01669 };
01670
01671 decision_function svm_train_one(
01672 const svm_problem *prob, const svm_parameter *param,
01673 double Cp, double Cn)
01674 {
01675 double *alpha = Malloc(double,prob->l);
01676 Solver::SolutionInfo si;
01677 switch(param->svm_type)
01678 {
01679 case C_SVC:
01680 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01681 break;
01682 case NU_SVC:
01683 solve_nu_svc(prob,param,alpha,&si);
01684 break;
01685 case ONE_CLASS:
01686 solve_one_class(prob,param,alpha,&si);
01687 break;
01688 case EPSILON_SVR:
01689 solve_epsilon_svr(prob,param,alpha,&si);
01690 break;
01691 case NU_SVR:
01692 solve_nu_svr(prob,param,alpha,&si);
01693 break;
01694 }
01695
01696 info("obj = %f, rho = %f\n",si.obj,si.rho);
01697
01698
01699
01700 int nSV = 0;
01701 int nBSV = 0;
01702 for(int i=0;i<prob->l;i++)
01703 {
01704 if(fabs(alpha[i]) > 0)
01705 {
01706 ++nSV;
01707 if(prob->y[i] > 0)
01708 {
01709 if(fabs(alpha[i]) >= si.upper_bound_p)
01710 ++nBSV;
01711 }
01712 else
01713 {
01714 if(fabs(alpha[i]) >= si.upper_bound_n)
01715 ++nBSV;
01716 }
01717 }
01718 }
01719
01720 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01721
01722 decision_function f;
01723 f.alpha = alpha;
01724 f.rho = si.rho;
01725 return f;
01726 }
01727
01728
01729
01730
01731 struct svm_model
01732 {
01733 svm_parameter param;
01734 int nr_class;
01735 int l;
01736 svm_node **SV;
01737 double **sv_coef;
01738 double *rho;
01739 double *probA;
01740 double *probB;
01741
01742
01743
01744 int *label;
01745 int *nSV;
01746
01747
01748 int free_sv;
01749
01750 };
01751
01752
01753 void sigmoid_train(
01754 int l, const double *dec_values, const double *labels,
01755 double& A, double& B)
01756 {
01757 double prior1=0, prior0 = 0;
01758 int i;
01759
01760 for (i=0;i<l;i++)
01761 if (labels[i] > 0) prior1+=1;
01762 else prior0+=1;
01763
01764 int max_iter=100;
01765 double min_step=1e-10;
01766 double sigma=1e-12;
01767 double eps=1e-5;
01768 double hiTarget=(prior1+1.0)/(prior1+2.0);
01769 double loTarget=1/(prior0+2.0);
01770 double *t=Malloc(double,l);
01771 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01772 double newA,newB,newf,d1,d2;
01773 int iter;
01774
01775
01776 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01777 double fval = 0.0;
01778
01779 for (i=0;i<l;i++)
01780 {
01781 if (labels[i]>0) t[i]=hiTarget;
01782 else t[i]=loTarget;
01783 fApB = dec_values[i]*A+B;
01784 if (fApB>=0)
01785 fval += t[i]*fApB + log(1+exp(-fApB));
01786 else
01787 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01788 }
01789 for (iter=0;iter<max_iter;iter++)
01790 {
01791
01792 h11=sigma;
01793 h22=sigma;
01794 h21=0.0;g1=0.0;g2=0.0;
01795 for (i=0;i<l;i++)
01796 {
01797 fApB = dec_values[i]*A+B;
01798 if (fApB >= 0)
01799 {
01800 p=exp(-fApB)/(1.0+exp(-fApB));
01801 q=1.0/(1.0+exp(-fApB));
01802 }
01803 else
01804 {
01805 p=1.0/(1.0+exp(fApB));
01806 q=exp(fApB)/(1.0+exp(fApB));
01807 }
01808 d2=p*q;
01809 h11+=dec_values[i]*dec_values[i]*d2;
01810 h22+=d2;
01811 h21+=dec_values[i]*d2;
01812 d1=t[i]-p;
01813 g1+=dec_values[i]*d1;
01814 g2+=d1;
01815 }
01816
01817
01818 if (fabs(g1)<eps && fabs(g2)<eps)
01819 break;
01820
01821
01822 det=h11*h22-h21*h21;
01823 dA=-(h22*g1 - h21 * g2) / det;
01824 dB=-(-h21*g1+ h11 * g2) / det;
01825 gd=g1*dA+g2*dB;
01826
01827
01828 stepsize = 1;
01829 while (stepsize >= min_step)
01830 {
01831 newA = A + stepsize * dA;
01832 newB = B + stepsize * dB;
01833
01834
01835 newf = 0.0;
01836 for (i=0;i<l;i++)
01837 {
01838 fApB = dec_values[i]*newA+newB;
01839 if (fApB >= 0)
01840 newf += t[i]*fApB + log(1+exp(-fApB));
01841 else
01842 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01843 }
01844
01845 if (newf<fval+0.0001*stepsize*gd)
01846 {
01847 A=newA;B=newB;fval=newf;
01848 break;
01849 }
01850 else
01851 stepsize = stepsize / 2.0;
01852 }
01853
01854 if (stepsize < min_step)
01855 {
01856 info("Line search fails in two-class probability estimates\n");
01857 break;
01858 }
01859 }
01860
01861 if (iter>=max_iter)
01862 info("Reaching maximal iterations in two-class probability estimates\n");
01863 free(t);
01864 }
01865
01866 double sigmoid_predict(double decision_value, double A, double B)
01867 {
01868 double fApB = decision_value*A+B;
01869 if (fApB >= 0)
01870 return exp(-fApB)/(1.0+exp(-fApB));
01871 else
01872 return 1.0/(1+exp(fApB)) ;
01873 }
01874
01875
01876 void multiclass_probability(int k, double **r, double *p)
01877 {
01878 int t,j;
01879 int iter = 0, max_iter=max(100,k);
01880 double **Q=Malloc(double *,k);
01881 double *Qp=Malloc(double,k);
01882 double pQp, eps=0.005/k;
01883
01884 for (t=0;t<k;t++)
01885 {
01886 p[t]=1.0/k;
01887 Q[t]=Malloc(double,k);
01888 Q[t][t]=0;
01889 for (j=0;j<t;j++)
01890 {
01891 Q[t][t]+=r[j][t]*r[j][t];
01892 Q[t][j]=Q[j][t];
01893 }
01894 for (j=t+1;j<k;j++)
01895 {
01896 Q[t][t]+=r[j][t]*r[j][t];
01897 Q[t][j]=-r[j][t]*r[t][j];
01898 }
01899 }
01900 for (iter=0;iter<max_iter;iter++)
01901 {
01902
01903 pQp=0;
01904 for (t=0;t<k;t++)
01905 {
01906 Qp[t]=0;
01907 for (j=0;j<k;j++)
01908 Qp[t]+=Q[t][j]*p[j];
01909 pQp+=p[t]*Qp[t];
01910 }
01911 double max_error=0;
01912 for (t=0;t<k;t++)
01913 {
01914 double error=fabs(Qp[t]-pQp);
01915 if (error>max_error)
01916 max_error=error;
01917 }
01918 if (max_error<eps) break;
01919
01920 for (t=0;t<k;t++)
01921 {
01922 double diff=(-Qp[t]+pQp)/Q[t][t];
01923 p[t]+=diff;
01924 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01925 for (j=0;j<k;j++)
01926 {
01927 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01928 p[j]/=(1+diff);
01929 }
01930 }
01931 }
01932 if (iter>=max_iter)
01933 info("Exceeds max_iter in multiclass_prob\n");
01934 for(t=0;t<k;t++) free(Q[t]);
01935 free(Q);
01936 free(Qp);
01937 }
01938
01939
01940 void svm_binary_svc_probability(
01941 const svm_problem *prob, const svm_parameter *param,
01942 double Cp, double Cn, double& probA, double& probB)
01943 {
01944 int i;
01945 int nr_fold = 5;
01946 int *perm = Malloc(int,prob->l);
01947 double *dec_values = Malloc(double,prob->l);
01948
01949
01950 for(i=0;i<prob->l;i++) perm[i]=i;
01951 for(i=0;i<prob->l;i++)
01952 {
01953 int j = i+rand()%(prob->l-i);
01954 swap(perm[i],perm[j]);
01955 }
01956 for(i=0;i<nr_fold;i++)
01957 {
01958 int begin = i*prob->l/nr_fold;
01959 int end = (i+1)*prob->l/nr_fold;
01960 int j,k;
01961 struct svm_problem subprob;
01962
01963 subprob.l = prob->l-(end-begin);
01964 subprob.x = Malloc(struct svm_node*,subprob.l);
01965 subprob.y = Malloc(double,subprob.l);
01966
01967 k=0;
01968 for(j=0;j<begin;j++)
01969 {
01970 subprob.x[k] = prob->x[perm[j]];
01971 subprob.y[k] = prob->y[perm[j]];
01972 ++k;
01973 }
01974 for(j=end;j<prob->l;j++)
01975 {
01976 subprob.x[k] = prob->x[perm[j]];
01977 subprob.y[k] = prob->y[perm[j]];
01978 ++k;
01979 }
01980 int p_count=0,n_count=0;
01981 for(j=0;j<k;j++)
01982 if(subprob.y[j]>0)
01983 p_count++;
01984 else
01985 n_count++;
01986
01987 if(p_count==0 && n_count==0)
01988 for(j=begin;j<end;j++)
01989 dec_values[perm[j]] = 0;
01990 else if(p_count > 0 && n_count == 0)
01991 for(j=begin;j<end;j++)
01992 dec_values[perm[j]] = 1;
01993 else if(p_count == 0 && n_count > 0)
01994 for(j=begin;j<end;j++)
01995 dec_values[perm[j]] = -1;
01996 else
01997 {
01998 svm_parameter subparam = *param;
01999 subparam.probability=0;
02000 subparam.C=1.0;
02001 subparam.nr_weight=2;
02002 subparam.weight_label = Malloc(int,2);
02003 subparam.weight = Malloc(double,2);
02004 subparam.weight_label[0]=+1;
02005 subparam.weight_label[1]=-1;
02006 subparam.weight[0]=Cp;
02007 subparam.weight[1]=Cn;
02008 struct svm_model *submodel = svm_train(&subprob,&subparam);
02009 for(j=begin;j<end;j++)
02010 {
02011 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
02012
02013 dec_values[perm[j]] *= submodel->label[0];
02014 }
02015 svm_destroy_model(submodel);
02016 svm_destroy_param(&subparam);
02017 }
02018 free(subprob.x);
02019 free(subprob.y);
02020 }
02021 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
02022 free(dec_values);
02023 free(perm);
02024 }
02025
02026
02027 double svm_svr_probability(
02028 const svm_problem *prob, const svm_parameter *param)
02029 {
02030 int i;
02031 int nr_fold = 5;
02032 double *ymv = Malloc(double,prob->l);
02033 double mae = 0;
02034
02035 svm_parameter newparam = *param;
02036 newparam.probability = 0;
02037 svm_cross_validation(prob,&newparam,nr_fold,ymv);
02038 for(i=0;i<prob->l;i++)
02039 {
02040 ymv[i]=prob->y[i]-ymv[i];
02041 mae += fabs(ymv[i]);
02042 }
02043 mae /= prob->l;
02044 double std=sqrt(2*mae*mae);
02045 int count=0;
02046 mae=0;
02047 for(i=0;i<prob->l;i++)
02048 if (fabs(ymv[i]) > 5*std)
02049 count=count+1;
02050 else
02051 mae+=fabs(ymv[i]);
02052 mae /= (prob->l-count);
02053 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02054 free(ymv);
02055 return mae;
02056 }
02057
02058
02059
02060
02061 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02062 {
02063 int l = prob->l;
02064 int max_nr_class = 16;
02065 int nr_class = 0;
02066 int *label = Malloc(int,max_nr_class);
02067 int *count = Malloc(int,max_nr_class);
02068 int *data_label = Malloc(int,l);
02069 int i;
02070
02071 for(i=0;i<l;i++)
02072 {
02073 int this_label = (int)prob->y[i];
02074 int j;
02075 for(j=0;j<nr_class;j++)
02076 {
02077 if(this_label == label[j])
02078 {
02079 ++count[j];
02080 break;
02081 }
02082 }
02083 data_label[i] = j;
02084 if(j == nr_class)
02085 {
02086 if(nr_class == max_nr_class)
02087 {
02088 max_nr_class *= 2;
02089 label = (int *)realloc(label,max_nr_class*sizeof(int));
02090 count = (int *)realloc(count,max_nr_class*sizeof(int));
02091 }
02092 label[nr_class] = this_label;
02093 count[nr_class] = 1;
02094 ++nr_class;
02095 }
02096 }
02097
02098 int *start = Malloc(int,nr_class);
02099 start[0] = 0;
02100 for(i=1;i<nr_class;i++)
02101 start[i] = start[i-1]+count[i-1];
02102 for(i=0;i<l;i++)
02103 {
02104 perm[start[data_label[i]]] = i;
02105 ++start[data_label[i]];
02106 }
02107 start[0] = 0;
02108 for(i=1;i<nr_class;i++)
02109 start[i] = start[i-1]+count[i-1];
02110
02111 *nr_class_ret = nr_class;
02112 *label_ret = label;
02113 *start_ret = start;
02114 *count_ret = count;
02115 free(data_label);
02116 }
02117
02118
02119
02120
02121 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02122 {
02123 svm_model *model = Malloc(svm_model,1);
02124 model->param = *param;
02125 model->free_sv = 0;
02126
02127 if(param->svm_type == ONE_CLASS ||
02128 param->svm_type == EPSILON_SVR ||
02129 param->svm_type == NU_SVR)
02130 {
02131
02132 model->nr_class = 2;
02133 model->label = NULL;
02134 model->nSV = NULL;
02135 model->probA = NULL; model->probB = NULL;
02136 model->sv_coef = Malloc(double *,1);
02137
02138 if(param->probability &&
02139 (param->svm_type == EPSILON_SVR ||
02140 param->svm_type == NU_SVR))
02141 {
02142 model->probA = Malloc(double,1);
02143 model->probA[0] = svm_svr_probability(prob,param);
02144 }
02145
02146 decision_function f = svm_train_one(prob,param,0,0);
02147 model->rho = Malloc(double,1);
02148 model->rho[0] = f.rho;
02149
02150 int nSV = 0;
02151 int i;
02152 for(i=0;i<prob->l;i++)
02153 if(fabs(f.alpha[i]) > 0) ++nSV;
02154 model->l = nSV;
02155 model->SV = Malloc(svm_node *,nSV);
02156 model->sv_coef[0] = Malloc(double,nSV);
02157 int j = 0;
02158 for(i=0;i<prob->l;i++)
02159 if(fabs(f.alpha[i]) > 0)
02160 {
02161 model->SV[j] = prob->x[i];
02162 model->sv_coef[0][j] = f.alpha[i];
02163 ++j;
02164 }
02165
02166 free(f.alpha);
02167 }
02168 else
02169 {
02170
02171 int l = prob->l;
02172 int nr_class;
02173 int *label = NULL;
02174 int *start = NULL;
02175 int *count = NULL;
02176 int *perm = Malloc(int,l);
02177
02178
02179 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02180 svm_node **x = Malloc(svm_node *,l);
02181 int i;
02182 for(i=0;i<l;i++)
02183 x[i] = prob->x[perm[i]];
02184
02185
02186
02187 double *weighted_C = Malloc(double, nr_class);
02188 for(i=0;i<nr_class;i++)
02189 weighted_C[i] = param->C;
02190 for(i=0;i<param->nr_weight;i++)
02191 {
02192 int j;
02193 for(j=0;j<nr_class;j++)
02194 if(param->weight_label[i] == label[j])
02195 break;
02196 if(j == nr_class)
02197 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02198 else
02199 weighted_C[j] *= param->weight[i];
02200 }
02201
02202
02203
02204 bool *nonzero = Malloc(bool,l);
02205 for(i=0;i<l;i++)
02206 nonzero[i] = false;
02207 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02208
02209 double *probA=NULL,*probB=NULL;
02210 if (param->probability)
02211 {
02212 probA=Malloc(double,nr_class*(nr_class-1)/2);
02213 probB=Malloc(double,nr_class*(nr_class-1)/2);
02214 }
02215
02216 int p = 0;
02217 for(i=0;i<nr_class;i++)
02218 for(int j=i+1;j<nr_class;j++)
02219 {
02220 svm_problem sub_prob;
02221 int si = start[i], sj = start[j];
02222 int ci = count[i], cj = count[j];
02223 sub_prob.l = ci+cj;
02224 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02225 sub_prob.y = Malloc(double,sub_prob.l);
02226 int k;
02227 for(k=0;k<ci;k++)
02228 {
02229 sub_prob.x[k] = x[si+k];
02230 sub_prob.y[k] = +1;
02231 }
02232 for(k=0;k<cj;k++)
02233 {
02234 sub_prob.x[ci+k] = x[sj+k];
02235 sub_prob.y[ci+k] = -1;
02236 }
02237
02238 if(param->probability)
02239 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02240
02241 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02242 for(k=0;k<ci;k++)
02243 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02244 nonzero[si+k] = true;
02245 for(k=0;k<cj;k++)
02246 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02247 nonzero[sj+k] = true;
02248 free(sub_prob.x);
02249 free(sub_prob.y);
02250 ++p;
02251 }
02252
02253
02254
02255 model->nr_class = nr_class;
02256
02257 model->label = Malloc(int,nr_class);
02258 for(i=0;i<nr_class;i++)
02259 model->label[i] = label[i];
02260
02261 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02262 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02263 model->rho[i] = f[i].rho;
02264
02265 if(param->probability)
02266 {
02267 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02268 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02269 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02270 {
02271 model->probA[i] = probA[i];
02272 model->probB[i] = probB[i];
02273 }
02274 }
02275 else
02276 {
02277 model->probA=NULL;
02278 model->probB=NULL;
02279 }
02280
02281 int total_sv = 0;
02282 int *nz_count = Malloc(int,nr_class);
02283 model->nSV = Malloc(int,nr_class);
02284 for(i=0;i<nr_class;i++)
02285 {
02286 int nSV = 0;
02287 for(int j=0;j<count[i];j++)
02288 if(nonzero[start[i]+j])
02289 {
02290 ++nSV;
02291 ++total_sv;
02292 }
02293 model->nSV[i] = nSV;
02294 nz_count[i] = nSV;
02295 }
02296
02297 info("Total nSV = %d\n",total_sv);
02298
02299 model->l = total_sv;
02300 model->SV = Malloc(svm_node *,total_sv);
02301 p = 0;
02302 for(i=0;i<l;i++)
02303 if(nonzero[i]) model->SV[p++] = x[i];
02304
02305 int *nz_start = Malloc(int,nr_class);
02306 nz_start[0] = 0;
02307 for(i=1;i<nr_class;i++)
02308 nz_start[i] = nz_start[i-1]+nz_count[i-1];
02309
02310 model->sv_coef = Malloc(double *,nr_class-1);
02311 for(i=0;i<nr_class-1;i++)
02312 model->sv_coef[i] = Malloc(double,total_sv);
02313
02314 p = 0;
02315 for(i=0;i<nr_class;i++)
02316 for(int j=i+1;j<nr_class;j++)
02317 {
02318
02319
02320
02321
02322 int si = start[i];
02323 int sj = start[j];
02324 int ci = count[i];
02325 int cj = count[j];
02326
02327 int q = nz_start[i];
02328 int k;
02329 for(k=0;k<ci;k++)
02330 if(nonzero[si+k])
02331 model->sv_coef[j-1][q++] = f[p].alpha[k];
02332 q = nz_start[j];
02333 for(k=0;k<cj;k++)
02334 if(nonzero[sj+k])
02335 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02336 ++p;
02337 }
02338
02339 free(label);
02340 free(probA);
02341 free(probB);
02342 free(count);
02343 free(perm);
02344 free(start);
02345 free(x);
02346 free(weighted_C);
02347 free(nonzero);
02348 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02349 free(f[i].alpha);
02350 free(f);
02351 free(nz_count);
02352 free(nz_start);
02353 }
02354 return model;
02355 }
02356
02357
02358 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02359 {
02360 int i;
02361 int *fold_start = Malloc(int,nr_fold+1);
02362 int l = prob->l;
02363 int *perm = Malloc(int,l);
02364 int nr_class;
02365
02366
02367
02368 if((param->svm_type == C_SVC ||
02369 param->svm_type == NU_SVC) && nr_fold < l)
02370 {
02371 int *start = NULL;
02372 int *label = NULL;
02373 int *count = NULL;
02374 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02375
02376
02377 int *fold_count = Malloc(int,nr_fold);
02378 int c;
02379 int *index = Malloc(int,l);
02380 for(i=0;i<l;i++)
02381 index[i]=perm[i];
02382 for (c=0; c<nr_class; c++)
02383 for(i=0;i<count[c];i++)
02384 {
02385 int j = i+rand()%(count[c]-i);
02386 swap(index[start[c]+j],index[start[c]+i]);
02387 }
02388 for(i=0;i<nr_fold;i++)
02389 {
02390 fold_count[i] = 0;
02391 for (c=0; c<nr_class;c++)
02392 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02393 }
02394 fold_start[0]=0;
02395 for (i=1;i<=nr_fold;i++)
02396 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02397 for (c=0; c<nr_class;c++)
02398 for(i=0;i<nr_fold;i++)
02399 {
02400 int begin = start[c]+i*count[c]/nr_fold;
02401 int end = start[c]+(i+1)*count[c]/nr_fold;
02402 for(int j=begin;j<end;j++)
02403 {
02404 perm[fold_start[i]] = index[j];
02405 fold_start[i]++;
02406 }
02407 }
02408 fold_start[0]=0;
02409 for (i=1;i<=nr_fold;i++)
02410 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02411 free(start);
02412 free(label);
02413 free(count);
02414 free(index);
02415 free(fold_count);
02416 }
02417 else
02418 {
02419 for(i=0;i<l;i++) perm[i]=i;
02420 for(i=0;i<l;i++)
02421 {
02422 int j = i+rand()%(l-i);
02423 swap(perm[i],perm[j]);
02424 }
02425 for(i=0;i<=nr_fold;i++)
02426 fold_start[i]=i*l/nr_fold;
02427 }
02428
02429 for(i=0;i<nr_fold;i++)
02430 {
02431 int begin = fold_start[i];
02432 int end = fold_start[i+1];
02433 int j,k;
02434 struct svm_problem subprob;
02435
02436 subprob.l = l-(end-begin);
02437 subprob.x = Malloc(struct svm_node*,subprob.l);
02438 subprob.y = Malloc(double,subprob.l);
02439
02440 k=0;
02441 for(j=0;j<begin;j++)
02442 {
02443 subprob.x[k] = prob->x[perm[j]];
02444 subprob.y[k] = prob->y[perm[j]];
02445 ++k;
02446 }
02447 for(j=end;j<l;j++)
02448 {
02449 subprob.x[k] = prob->x[perm[j]];
02450 subprob.y[k] = prob->y[perm[j]];
02451 ++k;
02452 }
02453 struct svm_model *submodel = svm_train(&subprob,param);
02454 if(param->probability &&
02455 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02456 {
02457 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02458 for(j=begin;j<end;j++)
02459 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02460 free(prob_estimates);
02461 }
02462 else
02463 for(j=begin;j<end;j++)
02464 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02465 svm_destroy_model(submodel);
02466 free(subprob.x);
02467 free(subprob.y);
02468 }
02469 free(fold_start);
02470 free(perm);
02471 }
02472
02473
02474 int svm_get_svm_type(const svm_model *model)
02475 {
02476 return model->param.svm_type;
02477 }
02478
02479 int svm_get_nr_class(const svm_model *model)
02480 {
02481 return model->nr_class;
02482 }
02483
02484 void svm_get_labels(const svm_model *model, int* label)
02485 {
02486 if (model->label != NULL)
02487 for(int i=0;i<model->nr_class;i++)
02488 label[i] = model->label[i];
02489 }
02490
02491 double svm_get_svr_probability(const svm_model *model)
02492 {
02493 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02494 model->probA!=NULL)
02495 return model->probA[0];
02496 else
02497 {
02498 info("Model doesn't contain information for SVR probability inference\n");
02499 return 0;
02500 }
02501 }
02502
02503 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02504 {
02505 if(model->param.svm_type == ONE_CLASS ||
02506 model->param.svm_type == EPSILON_SVR ||
02507 model->param.svm_type == NU_SVR)
02508 {
02509 double *sv_coef = model->sv_coef[0];
02510 double sum = 0;
02511 for(int i=0;i<model->l;i++)
02512 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02513 sum -= model->rho[0];
02514 *dec_values = sum;
02515 }
02516 else
02517 {
02518 int i;
02519 int nr_class = model->nr_class;
02520 int l = model->l;
02521
02522 double *kvalue = Malloc(double,l);
02523 for(i=0;i<l;i++)
02524 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02525
02526 int *start = Malloc(int,nr_class);
02527 start[0] = 0;
02528 for(i=1;i<nr_class;i++)
02529 start[i] = start[i-1]+model->nSV[i-1];
02530
02531 int p=0;
02532 for(i=0;i<nr_class;i++)
02533 for(int j=i+1;j<nr_class;j++)
02534 {
02535 double sum = 0;
02536 int si = start[i];
02537 int sj = start[j];
02538 int ci = model->nSV[i];
02539 int cj = model->nSV[j];
02540
02541 int k;
02542 double *coef1 = model->sv_coef[j-1];
02543 double *coef2 = model->sv_coef[i];
02544 for(k=0;k<ci;k++)
02545 sum += coef1[si+k] * kvalue[si+k];
02546 for(k=0;k<cj;k++)
02547 sum += coef2[sj+k] * kvalue[sj+k];
02548 sum -= model->rho[p];
02549 dec_values[p] = sum;
02550 p++;
02551 }
02552
02553 free(kvalue);
02554 free(start);
02555 }
02556 }
02557
02558 double svm_predict(const svm_model *model, const svm_node *x)
02559 {
02560 if(model->param.svm_type == ONE_CLASS ||
02561 model->param.svm_type == EPSILON_SVR ||
02562 model->param.svm_type == NU_SVR)
02563 {
02564 double res;
02565 svm_predict_values(model, x, &res);
02566
02567 if(model->param.svm_type == ONE_CLASS)
02568 return (res>0)?1:-1;
02569 else
02570 return res;
02571 }
02572 else
02573 {
02574 int i;
02575 int nr_class = model->nr_class;
02576 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02577 svm_predict_values(model, x, dec_values);
02578
02579 int *vote = Malloc(int,nr_class);
02580 for(i=0;i<nr_class;i++)
02581 vote[i] = 0;
02582 int pos=0;
02583 for(i=0;i<nr_class;i++)
02584 for(int j=i+1;j<nr_class;j++)
02585 {
02586 if(dec_values[pos++] > 0)
02587 ++vote[i];
02588 else
02589 ++vote[j];
02590 }
02591
02592 int vote_max_idx = 0;
02593 for(i=1;i<nr_class;i++)
02594 if(vote[i] > vote[vote_max_idx])
02595 vote_max_idx = i;
02596 free(vote);
02597 free(dec_values);
02598 return model->label[vote_max_idx];
02599 }
02600 }
02601
02602 double svm_predict_probability(
02603 const svm_model *model, const svm_node *x, double *prob_estimates)
02604 {
02605 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02606 model->probA!=NULL && model->probB!=NULL)
02607 {
02608 int i;
02609 int nr_class = model->nr_class;
02610 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02611 svm_predict_values(model, x, dec_values);
02612
02613 double min_prob=1e-7;
02614 double **pairwise_prob=Malloc(double *,nr_class);
02615 for(i=0;i<nr_class;i++)
02616 pairwise_prob[i]=Malloc(double,nr_class);
02617 int k=0;
02618 for(i=0;i<nr_class;i++)
02619 for(int j=i+1;j<nr_class;j++)
02620 {
02621 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02622 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02623 k++;
02624 }
02625 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02626
02627 int prob_max_idx = 0;
02628 for(i=1;i<nr_class;i++)
02629 if(prob_estimates[i] > prob_estimates[prob_max_idx])
02630 prob_max_idx = i;
02631 for(i=0;i<nr_class;i++)
02632 free(pairwise_prob[i]);
02633 free(dec_values);
02634 free(pairwise_prob);
02635 return model->label[prob_max_idx];
02636 }
02637 else
02638 return svm_predict(model, x);
02639 }
02640
02641 const char *svm_type_table[] =
02642 {
02643 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02644 };
02645
02646 const char *kernel_type_table[]=
02647 {
02648 "linear","polynomial","rbf","sigmoid","precomputed",NULL
02649 };
02650
02651 void ignore_retval_gcc46(int ret) { }
02652
02653 int svm_save_model(const char *model_file_name, const svm_model *model)
02654 {
02655 FILE *fp = fopen(model_file_name,"w");
02656 if(fp==NULL) return -1;
02657
02658 const svm_parameter& param = model->param;
02659
02660 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02661 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02662
02663 if(param.kernel_type == POLY)
02664 fprintf(fp,"degree %d\n", param.degree);
02665
02666 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02667 fprintf(fp,"gamma %g\n", param.gamma);
02668
02669 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02670 fprintf(fp,"coef0 %g\n", param.coef0);
02671
02672 int nr_class = model->nr_class;
02673 int l = model->l;
02674 fprintf(fp, "nr_class %d\n", nr_class);
02675 fprintf(fp, "total_sv %d\n",l);
02676
02677 {
02678 fprintf(fp, "rho");
02679 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02680 fprintf(fp," %g",model->rho[i]);
02681 fprintf(fp, "\n");
02682 }
02683
02684 if(model->label)
02685 {
02686 fprintf(fp, "label");
02687 for(int i=0;i<nr_class;i++)
02688 fprintf(fp," %d",model->label[i]);
02689 fprintf(fp, "\n");
02690 }
02691
02692 if(model->probA)
02693 {
02694 fprintf(fp, "probA");
02695 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02696 fprintf(fp," %g",model->probA[i]);
02697 fprintf(fp, "\n");
02698 }
02699 if(model->probB)
02700 {
02701 fprintf(fp, "probB");
02702 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02703 fprintf(fp," %g",model->probB[i]);
02704 fprintf(fp, "\n");
02705 }
02706
02707 if(model->nSV)
02708 {
02709 fprintf(fp, "nr_sv");
02710 for(int i=0;i<nr_class;i++)
02711 fprintf(fp," %d",model->nSV[i]);
02712 fprintf(fp, "\n");
02713 }
02714
02715 fprintf(fp, "SV\n");
02716 const double * const *sv_coef = model->sv_coef;
02717 const svm_node * const *SV = model->SV;
02718
02719 for(int i=0;i<l;i++)
02720 {
02721 for(int j=0;j<nr_class-1;j++)
02722 fprintf(fp, "%.16g ",sv_coef[j][i]);
02723
02724 const svm_node *p = SV[i];
02725
02726 if(param.kernel_type == PRECOMPUTED)
02727 fprintf(fp,"0:%d ",(int)(p->value));
02728 else
02729 while(p->index != -1)
02730 {
02731 fprintf(fp,"%d:%.8g ",p->index,p->value);
02732 p++;
02733 }
02734 fprintf(fp, "\n");
02735 }
02736 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02737 else return 0;
02738 }
02739
02740 svm_model *svm_load_model(const char *model_file_name)
02741 {
02742 FILE *fp = fopen(model_file_name,"r");
02743 if(fp==NULL) return NULL;
02744
02745
02746
02747 svm_model *model = Malloc(svm_model,1);
02748 svm_parameter& param = model->param;
02749 model->rho = NULL;
02750 model->probA = NULL;
02751 model->probB = NULL;
02752 model->label = NULL;
02753 model->nSV = NULL;
02754
02755 char cmd[81];
02756 int ret;
02757 while(1)
02758 {
02759 ret = fscanf(fp,"%80s",cmd);
02760
02761 if(strcmp(cmd,"svm_type")==0)
02762 {
02763 ret = fscanf(fp,"%80s",cmd);
02764 int i;
02765 for(i=0;svm_type_table[i];i++)
02766 {
02767 if(strcmp(svm_type_table[i],cmd)==0)
02768 {
02769 param.svm_type=i;
02770 break;
02771 }
02772 }
02773 if(svm_type_table[i] == NULL)
02774 {
02775 fprintf(stderr,"unknown svm type.\n");
02776 free(model->rho);
02777 free(model->label);
02778 free(model->nSV);
02779 free(model);
02780 return NULL;
02781 }
02782 }
02783 else if(strcmp(cmd,"kernel_type")==0)
02784 {
02785 ret = fscanf(fp,"%80s",cmd);
02786 int i;
02787 for(i=0;kernel_type_table[i];i++)
02788 {
02789 if(strcmp(kernel_type_table[i],cmd)==0)
02790 {
02791 param.kernel_type=i;
02792 break;
02793 }
02794 }
02795 if(kernel_type_table[i] == NULL)
02796 {
02797 fprintf(stderr,"unknown kernel function.\n");
02798 free(model->rho);
02799 free(model->label);
02800 free(model->nSV);
02801 free(model);
02802 return NULL;
02803 }
02804 }
02805 else if(strcmp(cmd,"degree")==0)
02806 ret = fscanf(fp,"%d",¶m.degree);
02807 else if(strcmp(cmd,"gamma")==0)
02808 ret = fscanf(fp,"%lf",¶m.gamma);
02809 else if(strcmp(cmd,"coef0")==0)
02810 ret = fscanf(fp,"%lf",¶m.coef0);
02811 else if(strcmp(cmd,"nr_class")==0)
02812 ret = fscanf(fp,"%d",&model->nr_class);
02813 else if(strcmp(cmd,"total_sv")==0)
02814 ret = fscanf(fp,"%d",&model->l);
02815 else if(strcmp(cmd,"rho")==0)
02816 {
02817 int n = model->nr_class * (model->nr_class-1)/2;
02818 model->rho = Malloc(double,n);
02819 for(int i=0;i<n;i++)
02820 ret = fscanf(fp,"%lf",&model->rho[i]);
02821 }
02822 else if(strcmp(cmd,"label")==0)
02823 {
02824 int n = model->nr_class;
02825 model->label = Malloc(int,n);
02826 for(int i=0;i<n;i++)
02827 ret = fscanf(fp,"%d",&model->label[i]);
02828 }
02829 else if(strcmp(cmd,"probA")==0)
02830 {
02831 int n = model->nr_class * (model->nr_class-1)/2;
02832 model->probA = Malloc(double,n);
02833 for(int i=0;i<n;i++)
02834 ret = fscanf(fp,"%lf",&model->probA[i]);
02835 }
02836 else if(strcmp(cmd,"probB")==0)
02837 {
02838 int n = model->nr_class * (model->nr_class-1)/2;
02839 model->probB = Malloc(double,n);
02840 for(int i=0;i<n;i++)
02841 ret = fscanf(fp,"%lf",&model->probB[i]);
02842 }
02843 else if(strcmp(cmd,"nr_sv")==0)
02844 {
02845 int n = model->nr_class;
02846 model->nSV = Malloc(int,n);
02847 for(int i=0;i<n;i++)
02848 ret = fscanf(fp,"%d",&model->nSV[i]);
02849 }
02850 else if(strcmp(cmd,"SV")==0)
02851 {
02852 while(1)
02853 {
02854 int c = getc(fp);
02855 if(c==EOF || c=='\n') break;
02856 }
02857 break;
02858 }
02859 else
02860 {
02861 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02862 free(model->rho);
02863 free(model->label);
02864 free(model->nSV);
02865 free(model);
02866 return NULL;
02867 }
02868 }
02869
02870
02871
02872 int elements = 0;
02873 long pos = ftell(fp);
02874
02875 while(1)
02876 {
02877 int c = fgetc(fp);
02878 switch(c)
02879 {
02880 case '\n':
02881
02882 case ':':
02883 ++elements;
02884 break;
02885 case EOF:
02886 goto out;
02887 default:
02888 ;
02889 }
02890 }
02891 out:
02892 fseek(fp,pos,SEEK_SET);
02893
02894 int m = model->nr_class - 1;
02895 int l = model->l;
02896 model->sv_coef = Malloc(double *,m);
02897 int i;
02898 for(i=0;i<m;i++)
02899 model->sv_coef[i] = Malloc(double,l);
02900 model->SV = Malloc(svm_node*,l);
02901 svm_node *x_space=NULL;
02902 if(l>0) x_space = Malloc(svm_node,elements);
02903
02904 int j=0;
02905 for(i=0;i<l;i++)
02906 {
02907 model->SV[i] = &x_space[j];
02908 for(int k=0;k<m;k++)
02909 ret = fscanf(fp,"%lf",&model->sv_coef[k][i]);
02910 while(1)
02911 {
02912 int c;
02913 do {
02914 c = getc(fp);
02915 if(c=='\n') goto out2;
02916 } while(isspace(c));
02917 ungetc(c,fp);
02918 ret = fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
02919 ++j;
02920 }
02921 out2:
02922 x_space[j++].index = -1;
02923 }
02924 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
02925
02926 model->free_sv = 1;
02927
02928 ignore_retval_gcc46(ret);
02929
02930 return model;
02931 }
02932
02933 void svm_destroy_model(svm_model* model)
02934 {
02935 if(model->free_sv && model->l > 0)
02936 free((void *)(model->SV[0]));
02937 for(int i=0;i<model->nr_class-1;i++)
02938 free(model->sv_coef[i]);
02939 free(model->SV);
02940 free(model->sv_coef);
02941 free(model->rho);
02942 free(model->label);
02943 free(model->probA);
02944 free(model->probB);
02945 free(model->nSV);
02946 free(model);
02947 }
02948
02949 void svm_destroy_param(svm_parameter* param)
02950 {
02951 free(param->weight_label);
02952 free(param->weight);
02953 }
02954
02955 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02956 {
02957
02958
02959 int svm_type = param->svm_type;
02960 if(svm_type != C_SVC &&
02961 svm_type != NU_SVC &&
02962 svm_type != ONE_CLASS &&
02963 svm_type != EPSILON_SVR &&
02964 svm_type != NU_SVR)
02965 return "unknown svm type";
02966
02967
02968
02969 int kernel_type = param->kernel_type;
02970 if(kernel_type != LINEAR &&
02971 kernel_type != POLY &&
02972 kernel_type != RBF &&
02973 kernel_type != SIGMOID &&
02974 kernel_type != PRECOMPUTED)
02975 return "unknown kernel type";
02976
02977 if(param->degree < 0)
02978 return "degree of polynomial kernel < 0";
02979
02980
02981
02982 if(param->cache_size <= 0)
02983 return "cache_size <= 0";
02984
02985 if(param->eps <= 0)
02986 return "eps <= 0";
02987
02988 if(svm_type == C_SVC ||
02989 svm_type == EPSILON_SVR ||
02990 svm_type == NU_SVR)
02991 if(param->C <= 0)
02992 return "C <= 0";
02993
02994 if(svm_type == NU_SVC ||
02995 svm_type == ONE_CLASS ||
02996 svm_type == NU_SVR)
02997 if(param->nu <= 0 || param->nu > 1)
02998 return "nu <= 0 or nu > 1";
02999
03000 if(svm_type == EPSILON_SVR)
03001 if(param->p < 0)
03002 return "p < 0";
03003
03004 if(param->shrinking != 0 &&
03005 param->shrinking != 1)
03006 return "shrinking != 0 and shrinking != 1";
03007
03008 if(param->probability != 0 &&
03009 param->probability != 1)
03010 return "probability != 0 and probability != 1";
03011
03012 if(param->probability == 1 &&
03013 svm_type == ONE_CLASS)
03014 return "one-class SVM probability output not supported yet";
03015
03016
03017
03018
03019 if(svm_type == NU_SVC)
03020 {
03021 int l = prob->l;
03022 int max_nr_class = 16;
03023 int nr_class = 0;
03024 int *label = Malloc(int,max_nr_class);
03025 int *count = Malloc(int,max_nr_class);
03026
03027 int i;
03028 for(i=0;i<l;i++)
03029 {
03030 int this_label = (int)prob->y[i];
03031 int j;
03032 for(j=0;j<nr_class;j++)
03033 if(this_label == label[j])
03034 {
03035 ++count[j];
03036 break;
03037 }
03038 if(j == nr_class)
03039 {
03040 if(nr_class == max_nr_class)
03041 {
03042 max_nr_class *= 2;
03043 label = (int *)realloc(label,max_nr_class*sizeof(int));
03044 count = (int *)realloc(count,max_nr_class*sizeof(int));
03045 }
03046 label[nr_class] = this_label;
03047 count[nr_class] = 1;
03048 ++nr_class;
03049 }
03050 }
03051
03052 for(i=0;i<nr_class;i++)
03053 {
03054 int n1 = count[i];
03055 for(int j=i+1;j<nr_class;j++)
03056 {
03057 int n2 = count[j];
03058 if(param->nu*(n1+n2)/2 > min(n1,n2))
03059 {
03060 free(label);
03061 free(count);
03062 return "specified nu is infeasible";
03063 }
03064 }
03065 }
03066 free(label);
03067 free(count);
03068 }
03069
03070 return NULL;
03071 }
03072
03073 int svm_check_probability_model(const svm_model *model)
03074 {
03075 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03076 model->probA!=NULL && model->probB!=NULL) ||
03077 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03078 model->probA!=NULL);
03079 }