diff --git a/CHANGES.txt b/CHANGES.txt index 5bee6c2..99e0c3a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,6 @@ 1.4.1-SNAPSHOT -- updated to liblinear 1.7 +- upgraded lisvm 3.0 + rely on external lib instead of modified code +- upgraded liblinear 1.7 - can specify weighting schemes per field - added TestWeightingSchemes - can remap the attribute numbers before generating the vector file diff --git a/lib/libsvm-3.0.jar b/lib/libsvm-3.0.jar new file mode 100644 index 0000000..6ed1a25 Binary files /dev/null and b/lib/libsvm-3.0.jar differ diff --git a/libLinear.copyright.txt b/libLinear.copyright.txt index 65ad0ec..c319013 100644 --- a/libLinear.copyright.txt +++ b/libLinear.copyright.txt @@ -1,5 +1,4 @@ - -Copyright (c) 2007-2008 The LIBLINEAR Project. +Copyright (c) 2007-2010 The LIBLINEAR Project. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -29,3 +28,4 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/libSVM.copyright.txt b/libSVM.copyright.txt index 5af6ebb..58a2d82 100644 --- a/libSVM.copyright.txt +++ b/libSVM.copyright.txt @@ -1,4 +1,4 @@ -Copyright (c) 2000-2006 Chih-Chung Chang and Chih-Jen Lin +Copyright (c) 2000-2010 Chih-Chung Chang and Chih-Jen Lin All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/src/java/com/digitalpebble/classification/libsvm/LibSVMModelCreator.java b/src/java/com/digitalpebble/classification/libsvm/LibSVMModelCreator.java index a015555..c364cc7 100755 --- a/src/java/com/digitalpebble/classification/libsvm/LibSVMModelCreator.java +++ b/src/java/com/digitalpebble/classification/libsvm/LibSVMModelCreator.java @@ -29,7 +29,6 @@ import libsvm.svm_node; import libsvm.svm_parameter; import libsvm.svm_problem; -import libsvm.svm_problem_impl; import com.digitalpebble.classification.Document; import com.digitalpebble.classification.Learner; @@ -94,12 +93,6 @@ public void internal_learn() throws Exception { } else { model = svm.svm_train(prob, param); svm.svm_save_model(model_file_name, model); - // dump linear weights in lexicon - try { - double[] weights = model.getLinearWeights(); - this.lexicon.setLinearWeight(weights); - } catch (Exception e) { - } } } @@ -221,12 +214,15 @@ private void read_problem(File learningFile) throws IOException { max_index = Math.max(max_index, x[m - 1].index); vx.addElement(x); } - prob = new svm_problem_impl(vy.size()); - for (int i = 0; i < prob.size(); i++) - prob.setNodes(i, (svm_node[]) vx.elementAt(i)); - for (int i = 0; i < prob.size(); i++) { + prob = new svm_problem(); + prob.l=vy.size(); + prob.y = new double[prob.l]; + prob.x = new svm_node[prob.l][]; + for (int i = 0; i < prob.l; i++) + prob.x[i]= (svm_node[]) vx.elementAt(i); + for (int i = 0; i < prob.l; i++) { double labell = Double.parseDouble((String) vy.elementAt(i)); - prob.setLabel(i, labell); + prob.y[i]= labell; } if (param.gamma == 0) param.gamma = 1.0 / max_index; @@ -246,13 +242,13 @@ private void do_cross_validation() { int total_correct = 0; double total_error = 0; double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; - double size = prob.size(); - double[] target = new double[prob.size()]; + double size = prob.l; + double[] target = new double[prob.l]; svm.svm_cross_validation(prob, param, this.nfold, target); if (param.svm_type == svm_parameter.EPSILON_SVR || param.svm_type == svm_parameter.NU_SVR) { - for (i = 0; i < prob.size(); i++) { - double y = prob.getLabel(i); + for (i = 0; i < prob.l; i++) { + double y = prob.y[i]; double v = target[i]; total_error += (v - y) * (v - y); sumv += v; @@ -274,7 +270,7 @@ private void do_cross_validation() { int numclasses = lexicon.getLabelNum(); double[][] confMatrix = new double[numclasses][numclasses]; for (i = 0; i < size; i++) { - double expected = prob.getLabel(i); + double expected = prob.y[i]; if (target[i] == expected) ++total_correct; confMatrix[(int) target[i]][(int) expected]++; @@ -328,13 +324,13 @@ private void do_cross_validation() { Map inverted = lexicon.getInvertedIndex(); for (i = 0; i < size; i++) { StringBuffer sb = new StringBuffer(); - double expected = prob.getLabel(i); + double expected = prob.y[i]; if (target[i] == expected) continue; sb.append("expected: ").append(lexicon.getLabel((int) expected)) .append("\tfound:").append( lexicon.getLabel((int) target[i])); - svm_node[] nodes = prob.getNodes(i); + svm_node[] nodes = prob.x[i]; for (svm_node node : nodes) { String attLabel = inverted.get(new Integer(node.index)); if (attLabel == null) diff --git a/src/java/libsvm/svm.java b/src/java/libsvm/svm.java deleted file mode 100644 index 3434ac9..0000000 --- a/src/java/libsvm/svm.java +++ /dev/null @@ -1,2811 +0,0 @@ -package libsvm; -import java.io.BufferedReader; -import java.io.DataOutputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.util.StringTokenizer; - -// -// Kernel Cache -// -// l is the number of total data items -// size is the cache size limit in bytes -// -class Cache { - private final int l; - private int size; - private final class head_t - { - head_t prev, next; // a cicular list - float[] data; - int len; // data[0,len) is cached in this entry - } - private final head_t[] head; - private head_t lru_head; - - Cache(int l_, int size_) - { - l = l_; - size = size_; - head = new head_t[l]; - for(int i=0;i= len if nothing needs to be filled) - // java: simulate pointer using single-element array - int get_data(int index, float[][] data, int len) - { - head_t h = head[index]; - if(h.len > 0) lru_delete(h); - int more = len - h.len; - - if(more > 0) - { - // free old space - while(size < more) - { - head_t old = lru_head.next; - lru_delete(old); - size += old.len; - old.data = null; - old.len = 0; - } - - // allocate new space - float[] new_data = new float[len]; - if(h.data != null) System.arraycopy(h.data,0,new_data,0,h.len); - h.data = new_data; - size -= more; - do {int _=h.len; h.len=len; len=_;} while(false); - } - - lru_insert(h); - data[0] = h.data; - return len; - } - - void swap_index(int i, int j) - { - if(i==j) return; - - if(head[i].len > 0) lru_delete(head[i]); - if(head[j].len > 0) lru_delete(head[j]); - do {float[] _=head[i].data; head[i].data=head[j].data; head[j].data=_;} while(false); - do {int _=head[i].len; head[i].len=head[j].len; head[j].len=_;} while(false); - if(head[i].len > 0) lru_insert(head[i]); - if(head[j].len > 0) lru_insert(head[j]); - - if(i>j) do {int _=i; i=j; j=_;} while(false); - for(head_t h = lru_head.next; h!=lru_head; h=h.next) - { - if(h.len > i) - { - if(h.len > j) - do {float _=h.data[i]; h.data[i]=h.data[j]; h.data[j]=_;} while(false); - else - { - // give up - lru_delete(h); - size += h.len; - h.data = null; - h.len = 0; - } - } - } - } -} - -// -// Kernel evaluation -// -// the static method k_function is for doing single kernel evaluation -// the constructor of Kernel prepares to calculate the l*l kernel matrix -// the member function get_Q is for getting one column from the Q Matrix -// -abstract class QMatrix { - abstract float[] get_Q(int column, int len); - abstract float[] get_QD(); - abstract void swap_index(int i, int j); -}; - -abstract class Kernel extends QMatrix { - private svm_node[][] x; - private final double[] x_square; - - // svm_parameter - private final int kernel_type; - private final int degree; - private final double gamma; - private final double coef0; - - abstract float[] get_Q(int column, int len); - abstract float[] get_QD(); - - void swap_index(int i, int j) - { - do {svm_node[] _=x[i]; x[i]=x[j]; x[j]=_;} while(false); - if(x_square != null) do {double _=x_square[i]; x_square[i]=x_square[j]; x_square[j]=_;} while(false); - } - - private static double powi(double base, int times) - { - double tmp = base, ret = 1.0; - - for(int t=times; t>0; t/=2) - { - if(t%2==1) ret*=tmp; - tmp = tmp * tmp; - } - return ret; - } - - private static double tanh(double x) - { - double e = Math.exp(x); - return 1.0-2.0/(e*e+1); - } - - double kernel_function(int i, int j) - { - switch(kernel_type) - { - case svm_parameter.LINEAR: - return dot(x[i],x[j]); - case svm_parameter.POLY: - return powi(gamma*dot(x[i],x[j])+coef0,degree); - case svm_parameter.RBF: - return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); - case svm_parameter.SIGMOID: - return tanh(gamma*dot(x[i],x[j])+coef0); - case svm_parameter.PRECOMPUTED: - return x[i][(int)(x[j][0].value)].value; - default: - return 0; // java - } - } - - Kernel(int l, svm_node[][] x_, svm_parameter param) - { - this.kernel_type = param.kernel_type; - this.degree = param.degree; - this.gamma = param.gamma; - this.coef0 = param.coef0; - - x = (svm_node[][])x_.clone(); - - if(kernel_type == svm_parameter.RBF) - { - x_square = new double[l]; - for(int i=0;i y[j].index) - ++j; - else - ++i; - } - } - return sum; - } - - final static double k_function(svm_node[] x, svm_node[] y, - svm_parameter param) - { - switch(param.kernel_type) - { - case svm_parameter.LINEAR: - return dot(x,y); - case svm_parameter.POLY: - return powi(param.gamma*dot(x,y)+param.coef0,param.degree); - case svm_parameter.RBF: - { - double sum = 0; - int xlen = x.length; - int ylen = y.length; - int i = 0; - int j = 0; - while(i < xlen && j < ylen) - { - if(x[i].index == y[j].index) - { - double d = x[i++].value - y[j++].value; - sum += d*d; - } - else if(x[i].index > y[j].index) - { - sum += y[j].value * y[j].value; - ++j; - } - else - { - sum += x[i].value * x[i].value; - ++i; - } - } - - while(i < xlen) - { - sum += x[i].value * x[i].value; - ++i; - } - - while(j < ylen) - { - sum += y[j].value * y[j].value; - ++j; - } - - return Math.exp(-param.gamma*sum); - } - case svm_parameter.SIGMOID: - return tanh(param.gamma*dot(x,y)+param.coef0); - case svm_parameter.PRECOMPUTED: - return x[(int)(y[0].value)].value; - default: - return 0; // java - } - } -} - -// Generalized SMO+SVMlight algorithm -// Solves: -// -// min 0.5(\alpha^T Q \alpha) + b^T \alpha -// -// y^T \alpha = \delta -// y_i = +1 or -1 -// 0 <= alpha_i <= Cp for y_i = 1 -// 0 <= alpha_i <= Cn for y_i = -1 -// -// Given: -// -// Q, b, y, Cp, Cn, and an initial feasible point \alpha -// l is the size of vectors and matrices -// eps is the stopping criterion -// -// solution will be put in \alpha, objective value will be put in obj -// -class Solver { - int active_size; - byte[] y; - double[] G; // gradient of objective function - static final byte LOWER_BOUND = 0; - static final byte UPPER_BOUND = 1; - static final byte FREE = 2; - byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE - double[] alpha; - QMatrix Q; - float[] QD; - double eps; - double Cp,Cn; - double[] b; - int[] active_set; - double[] G_bar; // gradient, if we treat free variables as 0 - int l; - boolean unshrinked; // XXX - - static final double INF = java.lang.Double.POSITIVE_INFINITY; - - double get_C(int i) - { - return (y[i] > 0)? Cp : Cn; - } - void update_alpha_status(int i) - { - if(alpha[i] >= get_C(i)) - alpha_status[i] = UPPER_BOUND; - else if(alpha[i] <= 0) - alpha_status[i] = LOWER_BOUND; - else alpha_status[i] = FREE; - } - boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } - boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } - boolean is_free(int i) { return alpha_status[i] == FREE; } - - // java: information about solution except alpha, - // because we cannot return multiple values otherwise... - static class SolutionInfo { - double obj; - double rho; - double upper_bound_p; - double upper_bound_n; - double r; // for Solver_NU - } - - void swap_index(int i, int j) - { - Q.swap_index(i,j); - do {byte _=y[i]; y[i]=y[j]; y[j]=_;} while(false); - do {double _=G[i]; G[i]=G[j]; G[j]=_;} while(false); - do {byte _=alpha_status[i]; alpha_status[i]=alpha_status[j]; alpha_status[j]=_;} while(false); - do {double _=alpha[i]; alpha[i]=alpha[j]; alpha[j]=_;} while(false); - do {double _=b[i]; b[i]=b[j]; b[j]=_;} while(false); - do {int _=active_set[i]; active_set[i]=active_set[j]; active_set[j]=_;} while(false); - do {double _=G_bar[i]; G_bar[i]=G_bar[j]; G_bar[j]=_;} while(false); - } - - void reconstruct_gradient() - { - // reconstruct inactive elements of G from G_bar and free variables - - if(active_size == l) return; - - int i; - for(i=active_size;i 0) - { - if(alpha[j] < 0) - { - alpha[j] = 0; - alpha[i] = diff; - } - } - else - { - if(alpha[i] < 0) - { - alpha[i] = 0; - alpha[j] = -diff; - } - } - if(diff > C_i - C_j) - { - if(alpha[i] > C_i) - { - alpha[i] = C_i; - alpha[j] = C_i - diff; - } - } - else - { - if(alpha[j] > C_j) - { - alpha[j] = C_j; - alpha[i] = C_j + diff; - } - } - } - else - { - double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; - if (quad_coef <= 0) - quad_coef = 1e-12; - double delta = (G[i]-G[j])/quad_coef; - double sum = alpha[i] + alpha[j]; - alpha[i] -= delta; - alpha[j] += delta; - - if(sum > C_i) - { - if(alpha[i] > C_i) - { - alpha[i] = C_i; - alpha[j] = sum - C_i; - } - } - else - { - if(alpha[j] < 0) - { - alpha[j] = 0; - alpha[i] = sum; - } - } - if(sum > C_j) - { - if(alpha[j] > C_j) - { - alpha[j] = C_j; - alpha[i] = sum - C_j; - } - } - else - { - if(alpha[i] < 0) - { - alpha[i] = 0; - alpha[j] = sum; - } - } - } - - // update G - - double delta_alpha_i = alpha[i] - old_alpha_i; - double delta_alpha_j = alpha[j] - old_alpha_j; - - for(int k=0;k= Gmax) - { - Gmax = -G[t]; - Gmax_idx = t; - } - } - else - { - if(!is_lower_bound(t)) - if(G[t] >= Gmax) - { - Gmax = G[t]; - Gmax_idx = t; - } - } - - int i = Gmax_idx; - float[] Q_i = null; - if(i != -1) // null Q_i not accessed: Gmax=-INF if i=-1 - Q_i = Q.get_Q(i,active_size); - - for(int j=0;j= Gmax2) - Gmax2 = G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/1e-12; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - else - { - if (!is_upper_bound(j)) - { - double grad_diff= Gmax-G[j]; - if (-G[j] >= Gmax2) - Gmax2 = -G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/1e-12; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - } - - if(Gmax+Gmax2 < eps) - return 1; - - working_set[0] = Gmax_idx; - working_set[1] = Gmin_idx; - return 0; - } - - // return 1 if already optimal, return 0 otherwise - int max_violating_pair(int[] working_set) - { - // return i,j which maximize -grad(f)^T d , under constraint - // if alpha_i == C, d != +1 - // if alpha_i == 0, d != -1 - - double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } - int Gmax1_idx = -1; - - int Gmax2_idx = -1; - double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } - - for(int i=0;i= Gmax1) - { - Gmax1 = -G[i]; - Gmax1_idx = i; - } - } - if(!is_lower_bound(i)) // d = -1 - { - if(G[i] >= Gmax2) - { - Gmax2 = G[i]; - Gmax2_idx = i; - } - } - } - else // y = -1 - { - if(!is_upper_bound(i)) // d = +1 - { - if(-G[i] >= Gmax2) - { - Gmax2 = -G[i]; - Gmax2_idx = i; - } - } - if(!is_lower_bound(i)) // d = -1 - { - if(G[i] >= Gmax1) - { - Gmax1 = G[i]; - Gmax1_idx = i; - } - } - } - } - - if(Gmax1+Gmax2 < eps) - return 1; - - working_set[0] = Gmax1_idx; - working_set[1] = Gmax2_idx; - return 0; - } - - void do_shrinking() - { - int i,j,k; - int[] working_set = new int[2]; - if(max_violating_pair(working_set)!=0) return; - i = working_set[0]; - j = working_set[1]; - double Gm1 = -y[j]*G[j]; - double Gm2 = y[i]*G[i]; - - // shrink - - for(k=0;k= Gm1) continue; - } - else if(-G[k] >= Gm2) continue; - } - else if(is_upper_bound(k)) - { - if(y[k]==+1) - { - if(G[k] >= Gm2) continue; - } - else if(G[k] >= Gm1) continue; - } - else continue; - - --active_size; - swap_index(k,active_size); - --k; // look at the newcomer - } - - // unshrink, check all variables again before final iterations - - if(unshrinked || -(Gm1 + Gm2) > eps*10) return; - - unshrinked = true; - reconstruct_gradient(); - - for(k=l-1;k>=active_size;k--) - { - if(is_lower_bound(k)) - { - if(y[k]==+1) - { - if(-G[k] < Gm1) continue; - } - else if(-G[k] < Gm2) continue; - } - else if(is_upper_bound(k)) - { - if(y[k]==+1) - { - if(G[k] < Gm2) continue; - } - else if(G[k] < Gm1) continue; - } - else continue; - - swap_index(k,active_size); - active_size++; - ++k; // look at the newcomer - } - } - - double calculate_rho() - { - double r; - int nr_free = 0; - double ub = INF, lb = -INF, sum_free = 0; - for(int i=0;i 0) - ub = Math.min(ub,yG); - else - lb = Math.max(lb,yG); - } - else if(is_upper_bound(i)) - { - if(y[i] < 0) - ub = Math.min(ub,yG); - else - lb = Math.max(lb,yG); - } - else - { - ++nr_free; - sum_free += yG; - } - } - - if(nr_free>0) - r = sum_free/nr_free; - else - r = (ub+lb)/2; - - return r; - } - -} - -// -// Solver for nu-svm classification and regression -// -// additional constraint: e^T \alpha = constant -// -final class Solver_NU extends Solver -{ - private SolutionInfo si; - - void Solve(int l, QMatrix Q, double[] b, byte[] y, - double[] alpha, double Cp, double Cn, double eps, - SolutionInfo si, int shrinking) - { - this.si = si; - super.Solve(l,Q,b,y,alpha,Cp,Cn,eps,si,shrinking); - } - - // return 1 if already optimal, return 0 otherwise - int select_working_set(int[] working_set) - { - // return i,j such that y_i = y_j and - // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) - // j: minimizes the decrease of obj value - // (if quadratic coefficeint <= 0, replace it with tau) - // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) - - double Gmaxp = -INF; - double Gmaxp2 = -INF; - int Gmaxp_idx = -1; - - double Gmaxn = -INF; - double Gmaxn2 = -INF; - int Gmaxn_idx = -1; - - int Gmin_idx = -1; - double obj_diff_min = INF; - - for(int t=0;t= Gmaxp) - { - Gmaxp = -G[t]; - Gmaxp_idx = t; - } - } - else - { - if(!is_lower_bound(t)) - if(G[t] >= Gmaxn) - { - Gmaxn = G[t]; - Gmaxn_idx = t; - } - } - - int ip = Gmaxp_idx; - int in = Gmaxn_idx; - float[] Q_ip = null; - float[] Q_in = null; - if(ip != -1) // null Q_ip not accessed: Gmaxp=-INF if ip=-1 - Q_ip = Q.get_Q(ip,active_size); - if(in != -1) - Q_in = Q.get_Q(in,active_size); - - for(int j=0;j= Gmaxp2) - Gmaxp2 = G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/1e-12; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - else - { - if (!is_upper_bound(j)) - { - double grad_diff=Gmaxn-G[j]; - if (-G[j] >= Gmaxn2) - Gmaxn2 = -G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef = Q_in[in]+QD[j]-2*Q_in[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/1e-12; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - } - - if(Math.max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps) - return 1; - - if(y[Gmin_idx] == +1) - working_set[0] = Gmaxp_idx; - else - working_set[0] = Gmaxn_idx; - working_set[1] = Gmin_idx; - - return 0; - } - - void do_shrinking() - { - double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } - double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } - double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } - double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } - - // find maximal violating pair first - int k; - for(k=0;k Gmax1) Gmax1 = -G[k]; - } - else if(-G[k] > Gmax3) Gmax3 = -G[k]; - } - if(!is_lower_bound(k)) - { - if(y[k]==+1) - { - if(G[k] > Gmax2) Gmax2 = G[k]; - } - else if(G[k] > Gmax4) Gmax4 = G[k]; - } - } - - // shrinking - - double Gm1 = -Gmax2; - double Gm2 = -Gmax1; - double Gm3 = -Gmax4; - double Gm4 = -Gmax3; - - for(k=0;k= Gm1) continue; - } - else if(-G[k] >= Gm3) continue; - } - else if(is_upper_bound(k)) - { - if(y[k]==+1) - { - if(G[k] >= Gm2) continue; - } - else if(G[k] >= Gm4) continue; - } - else continue; - - --active_size; - swap_index(k,active_size); - --k; // look at the newcomer - } - - // unshrink, check all variables again before final iterations - - if(unshrinked || Math.max(-(Gm1+Gm2),-(Gm3+Gm4)) > eps*10) return; - - unshrinked = true; - reconstruct_gradient(); - - for(k=l-1;k>=active_size;k--) - { - if(is_lower_bound(k)) - { - if(y[k]==+1) - { - if(-G[k] < Gm1) continue; - } - else if(-G[k] < Gm3) continue; - } - else if(is_upper_bound(k)) - { - if(y[k]==+1) - { - if(G[k] < Gm2) continue; - } - else if(G[k] < Gm4) continue; - } - else continue; - - swap_index(k,active_size); - active_size++; - ++k; // look at the newcomer - } - } - - double calculate_rho() - { - int nr_free1 = 0,nr_free2 = 0; - double ub1 = INF, ub2 = INF; - double lb1 = -INF, lb2 = -INF; - double sum_free1 = 0, sum_free2 = 0; - - for(int i=0;i 0) - r1 = sum_free1/nr_free1; - else - r1 = (ub1+lb1)/2; - - if(nr_free2 > 0) - r2 = sum_free2/nr_free2; - else - r2 = (ub2+lb2)/2; - - si.r = (r1+r2)/2; - return (r1-r2)/2; - } -} - -// -// Q matrices for various formulations -// -class SVC_Q extends Kernel -{ - private final byte[] y; - private final Cache cache; - private final float[] QD; - - SVC_Q(svm_problem prob, svm_parameter param, byte[] y_) - { - super(prob.size(), prob.getMatrix(), param); - y = (byte[])y_.clone(); - cache = new Cache(prob.size(),(int)(param.cache_size*(1<<20))); - QD = new float[prob.size()]; - for(int i=0;i 0) y[i] = +1; else y[i]=-1; - } - - Solver s = new Solver(); - s.Solve(l, new SVC_Q(prob,param,y), minus_ones, y, - alpha, Cp, Cn, param.eps, si, param.shrinking); - - double sum_alpha=0; - for(i=0;i0) - y[i] = +1; - else - y[i] = -1; - - double sum_pos = nu*l/2; - double sum_neg = nu*l/2; - - for(i=0;i 0) - { - ++nSV; - if(prob.getLabel(i) > 0) - { - if(Math.abs(alpha[i]) >= si.upper_bound_p) - ++nBSV; - } - else - { - if(Math.abs(alpha[i]) >= si.upper_bound_n) - ++nBSV; - } - } - } - - // System.out.print("nSV = "+nSV+", nBSV = "+nBSV+"\n"); - - decision_function f = new decision_function(); - f.alpha = alpha; - f.rho = si.rho; - return f; - } - - // Platt's binary SVM Probablistic Output: an improvement from Lin et al. - private static void sigmoid_train(int l, double[] dec_values, double[] labels, - double[] probAB) - { - double A, B; - double prior1=0, prior0 = 0; - int i; - - for (i=0;i 0) prior1+=1; - else prior0+=1; - - int max_iter=100; // Maximal number of iterations - double min_step=1e-10; // Minimal step taken in line search - double sigma=1e-3; // For numerically strict PD of Hessian - double eps=1e-5; - double hiTarget=(prior1+1.0)/(prior1+2.0); - double loTarget=1/(prior0+2.0); - double[] t= new double[l]; - double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize; - double newA,newB,newf,d1,d2; - int iter; - - // Initial Point and Initial Fun Value - A=0.0; B=Math.log((prior0+1.0)/(prior1+1.0)); - double fval = 0.0; - - for (i=0;i0) t[i]=hiTarget; - else t[i]=loTarget; - fApB = dec_values[i]*A+B; - if (fApB>=0) - fval += t[i]*fApB + Math.log(1+Math.exp(-fApB)); - else - fval += (t[i] - 1)*fApB +Math.log(1+Math.exp(fApB)); - } - for (iter=0;iter= 0) - { - p=Math.exp(-fApB)/(1.0+Math.exp(-fApB)); - q=1.0/(1.0+Math.exp(-fApB)); - } - else - { - p=1.0/(1.0+Math.exp(fApB)); - q=Math.exp(fApB)/(1.0+Math.exp(fApB)); - } - d2=p*q; - h11+=dec_values[i]*dec_values[i]*d2; - h22+=d2; - h21+=dec_values[i]*d2; - d1=t[i]-p; - g1+=dec_values[i]*d1; - g2+=d1; - } - - // Stopping Criteria - if (Math.abs(g1)= min_step) - { - newA = A + stepsize * dA; - newB = B + stepsize * dB; - - // New function value - newf = 0.0; - for (i=0;i= 0) - newf += t[i]*fApB + Math.log(1+Math.exp(-fApB)); - else - newf += (t[i] - 1)*fApB +Math.log(1+Math.exp(fApB)); - } - // Check sufficient decrease - if (newf=max_iter) - System.err.print("Reaching maximal iterations in two-class probability estimates\n"); - probAB[0]=A;probAB[1]=B; - } - - private static double sigmoid_predict(double decision_value, double A, double B) - { - double fApB = decision_value*A+B; - if (fApB >= 0) - return Math.exp(-fApB)/(1.0+Math.exp(-fApB)); - else - return 1.0/(1+Math.exp(fApB)) ; - } - - // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng - private static void multiclass_probability(int k, double[][] r, double[] p) - { - int t,j; - int iter = 0, max_iter=Math.max(100,k); - double[][] Q=new double[k][k]; - double[] Qp= new double[k]; - double pQp, eps=0.005/k; - - for (t=0;tmax_error) - max_error=error; - } - if (max_error=max_iter) - System.err.print("Exceeds max_iter in multiclass_prob\n"); - } - - // Cross-validation decision values for probability estimates - private static void svm_binary_svc_probability(svm_problem prob, svm_parameter param, double Cp, double Cn, double[] probAB) - { - int i; - int nr_fold = 5; - int[] perm = new int[prob.size()]; - double[] dec_values = new double[prob.size()]; - - // random shuffle - for(i=0;i0) - p_count++; - else - n_count++; - - if(p_count==0 && n_count==0) - for(j=begin;j 0 && n_count == 0) - for(j=begin;j 0) - for(j=begin;j 5*std) - count=count+1; - else - mae+=Math.abs(ymv[i]); - mae /= (prob.size()-count); - System.err.print("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+mae+"\n"); - return mae; - } - - // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data - // perm, length l, must be allocated before calling this subroutine - private static void svm_group_classes(svm_problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm) - { - int l = prob.size(); - int max_nr_class = 16; - int nr_class = 0; - int[] label = new int[max_nr_class]; - int[] count = new int[max_nr_class]; - int[] data_label = new int[l]; - int i; - - for(i=0;i 0) ++nSV; - model.l = nSV; - model.SV = new svm_node[nSV][]; - model.sv_coef[0] = new double[nSV]; - int j = 0; - for(i=0;i 0) - { - model.SV[j] = prob.getNodes(i); - model.sv_coef[0][j] = f.alpha[i]; - ++j; - } - } - else - { - // classification - int l = prob.size(); - int[] tmp_nr_class = new int[1]; - int[][] tmp_label = new int[1][]; - int[][] tmp_start = new int[1][]; - int[][] tmp_count = new int[1][]; - int[] perm = new int[l]; - - // group training data of the same class - svm_group_classes(prob,tmp_nr_class,tmp_label,tmp_start,tmp_count,perm); - int nr_class = tmp_nr_class[0]; - int[] label = tmp_label[0]; - int[] start = tmp_start[0]; - int[] count = tmp_count[0]; - svm_node[][] x = new svm_node[l][]; - int i; - for(i=0;i 0) - nonzero[si+k] = true; - for(k=0;k 0) - nonzero[sj+k] = true; - ++p; - } - - // build output - - model.nr_class = nr_class; - - model.label = new int[nr_class]; - for(i=0;i some folds may have zero elements - if((param.svm_type == svm_parameter.C_SVC || - param.svm_type == svm_parameter.NU_SVC) && nr_fold < l) - { - int[] tmp_nr_class = new int[1]; - int[][] tmp_label = new int[1][]; - int[][] tmp_start = new int[1][]; - int[][] tmp_count = new int[1][]; - - svm_group_classes(prob,tmp_nr_class,tmp_label,tmp_start,tmp_count,perm); - - int nr_class = tmp_nr_class[0]; - int[] label = tmp_label[0]; - int[] start = tmp_start[0]; - int[] count = tmp_count[0]; - - // random shuffle and then data grouped by fold using the array perm - int[] fold_count = new int[nr_fold]; - int c; - int[] index = new int[l]; - for(i=0;i0)?1:-1; - else - return res[0]; - } - else - { - int i; - int nr_class = model.nr_class; - double[] dec_values = new double[nr_class*(nr_class-1)/2]; - svm_predict_values(model, x, dec_values); - - int[] vote = new int[nr_class]; - for(i=0;i 0) - ++vote[i]; - else - ++vote[j]; - } - - int vote_max_idx = 0; - for(i=1;i vote[vote_max_idx]) - vote_max_idx = i; - return model.label[vote_max_idx]; - } - } - - public static double svm_predict_probability(svm_model model, svm_node[] x, double[] prob_estimates) - { - if ((model.param.svm_type == svm_parameter.C_SVC || model.param.svm_type == svm_parameter.NU_SVC) && - model.probA!=null && model.probB!=null) - { - int i; - int nr_class = model.nr_class; - double[] dec_values = new double[nr_class*(nr_class-1)/2]; - svm_predict_values(model, x, dec_values); - - double min_prob=1e-7; - double[][] pairwise_prob=new double[nr_class][nr_class]; - - int k=0; - for(i=0;i prob_estimates[prob_max_idx]) - prob_max_idx = i; - return model.label[prob_max_idx]; - } - else - return svm_predict(model, x); - } - - static final String svm_type_table[] = - { - "c_svc","nu_svc","one_class","epsilon_svr","nu_svr", - }; - - static final String kernel_type_table[]= - { - "linear","polynomial","rbf","sigmoid","precomputed" - }; - - public static void svm_save_model(String model_file_name, svm_model model) throws IOException - { - DataOutputStream fp = new DataOutputStream(new FileOutputStream(model_file_name)); - - svm_parameter param = model.param; - - fp.writeBytes("svm_type "+svm_type_table[param.svm_type]+"\n"); - fp.writeBytes("kernel_type "+kernel_type_table[param.kernel_type]+"\n"); - - if(param.kernel_type == svm_parameter.POLY) - fp.writeBytes("degree "+param.degree+"\n"); - - if(param.kernel_type == svm_parameter.POLY || - param.kernel_type == svm_parameter.RBF || - param.kernel_type == svm_parameter.SIGMOID) - fp.writeBytes("gamma "+param.gamma+"\n"); - - if(param.kernel_type == svm_parameter.POLY || - param.kernel_type == svm_parameter.SIGMOID) - fp.writeBytes("coef0 "+param.coef0+"\n"); - - int nr_class = model.nr_class; - int l = model.l; - fp.writeBytes("nr_class "+nr_class+"\n"); - fp.writeBytes("total_sv "+l+"\n"); - - { - fp.writeBytes("rho"); - for(int i=0;i 1) - return "nu <= 0 or nu > 1"; - - if(svm_type == svm_parameter.EPSILON_SVR) - if(param.p < 0) - return "p < 0"; - - if(param.shrinking != 0 && - param.shrinking != 1) - return "shrinking != 0 and shrinking != 1"; - - if(param.probability != 0 && - param.probability != 1) - return "probability != 0 and probability != 1"; - - if(param.probability == 1 && - svm_type == svm_parameter.ONE_CLASS) - return "one-class SVM probability output not supported yet"; - - // check whether nu-svc is feasible - - if(svm_type == svm_parameter.NU_SVC) - { - int l = prob.size(); - int max_nr_class = 16; - int nr_class = 0; - int[] label = new int[max_nr_class]; - int[] count = new int[max_nr_class]; - - int i; - for(i=0;i Math.min(n1,n2)) - return "specified nu is infeasible"; - } - } - } - - return null; - } - - public static int svm_check_probability_model(svm_model model) - { - if (((model.param.svm_type == svm_parameter.C_SVC || model.param.svm_type == svm_parameter.NU_SVC) && - model.probA!=null && model.probB!=null) || - ((model.param.svm_type == svm_parameter.EPSILON_SVR || model.param.svm_type == svm_parameter.NU_SVR) && - model.probA!=null)) - return 1; - else - return 0; - } -} diff --git a/src/java/libsvm/svm_model.java b/src/java/libsvm/svm_model.java deleted file mode 100644 index 49028a6..0000000 --- a/src/java/libsvm/svm_model.java +++ /dev/null @@ -1,55 +0,0 @@ -// -// svm_model -// -package libsvm; - -public class svm_model implements java.io.Serializable { - svm_parameter param; // parameter - int nr_class; // number of classes, = 2 in regression/one class svm - int l; // total #SV - svm_node[][] SV; // SVs (SV[l]) - double[][] sv_coef; // coefficients for SVs in decision functions - // (sv_coef[n-1][l]) - double[] rho; // constants in decision functions (rho[n*(n-1)/2]) - double[] probA; // pariwise probability information - double[] probB; - // for classification only - int[] label; // label of each class (label[n]) - int[] nSV; // number of SVs for each class (nSV[n]) - - // nSV[0] + nSV[1] + ... + nSV[n-1] = l - double[] linWeights; - - - // returns the weights for each attribute in a linear model - public double[] getLinearWeights() throws Exception { - if(l == 0) throw new Exception("Model not trained"); - if(param.kernel_type != svm_parameter.LINEAR) throw new Exception("Model is not a linear kernel"); - if(nr_class != 2) throw new Exception("Model is not binary"); - return _getLinearWeights(); - } - - private double[] _getLinearWeights() { - if(this.linWeights != null) return this.linWeights; - int highestIndex = 0; - // find the highest index in the SVs - for(int i = 0; i < SV.length; i++) { - svm_node[] currentnodes = SV[i]; - for(int j = 0; j < currentnodes.length; j++) { - if (highestIndex