diff --git a/ID3Tree.cpp b/ID3Tree.cpp new file mode 100644 index 0000000..d727f83 --- /dev/null +++ b/ID3Tree.cpp @@ -0,0 +1,435 @@ +/*** +¾ö²ßÊ÷£¬ID3,C4.5,Ê÷»Ø¹é + +¿¼ÂÇ + +1ÐÅÏ¢ÔöÒ棬ÐÅÏ¢ÔöÒæÂÊ£¬×îС¾ù·½²î(Ê÷»Ø¹éÖÐÓÖÓÐÖµ»Ø¹éºÍÄ£Ðͻعé) +2ÊôÐÔÀàÐÍ£¨double£¬string£©Á¬ÐøÖµºÍȱËðÖµ +3¼ôÖ¦ +**/ + + + +#include +#include +#include +#include +#include +#include +#include +#define MAX_SIZE_OF_TRAINING_SET 100 +#define ATTR_NUM 5 +#define JC_ATTR_KIND 10 +#define FET_ATTR_KIND 10 +using namespace std; +struct data +{ + int id; + string attr[ATTR_NUM];//ÓÃÓÚ×Ö·û´®ÊôÐÔ + double attr_double[ATTR_NUM];//ÓÃÓÚÊýÖµÐÍÊôÐÔ + data *next; +}; +struct matrixTree +{ + int id; + int splitfet; + string fetvalue; + string jc_attr; + int len; + int kind; + int parent; + matrixTree *next; + struct data *data;//¿ÉÓÃÓڼǼdataµÄÖ¸ÕëÍ· +}; +matrixTree matrixtree[MAX_SIZE_OF_TRAINING_SET];//ÓÃ×÷´æ´¢ºóÐò±éÀúÉú³ÉÊ÷µÄÐòÁÐ +stack s; +int node=0;//ÓÃ×÷´æ´¢ºóÐò±éÀúÉú³ÉÊ÷µÄÐòÁÐÊý×éµÄϱê +int fetflag[ATTR_NUM]= {0,0,0,0,0}; //³õʼ»¯Îª0,×îºóÒ»¸öÓÃÓÚͳ¼Æ»¹Ê£¶àÉÙÌØÕ÷δʹÓà +// ¼ÆËã»®·Öºó×Ó¼¯µÄÐÅÏ¢ìØ£¬¸ÃÐÅÏ¢ìØÓÉ×Ó¼¯Öоö²ßÊôÐÔ³öÏֵĸÅÂʾö¶¨ +string* getkindattr(data *dataSet,int axis) +{ + int i,j,kind=0; + data *p; + p=dataSet->next; + string attr_kind_sum[JC_ATTR_KIND]; + string *att=new string[JC_ATTR_KIND]; + for(i=0; p!=NULL; i++) + { + for(j=0; jattr[axis]==attr_kind_sum[j]) + { + break; + } + } + if(j==kind) + { + attr_kind_sum[kind]=p->attr[axis]; + kind++; + } + p=p->next; + } + attr_kind_sum[kind]="0"; + i=0; + while(attr_kind_sum[i]!="0") + { + att[i]=attr_kind_sum[i]; + i++; + } + att[i]="0"; + //¼ì²éµÃµ½µ±Ç°ÌØÕ÷ËùÓпÉÄܵÄÈ¡ÖµÊÇ·ñÕýÈ· + /* + for(i=0; attr_kind_sum[i]!="0"; i++) + { + cout<next; + string *jc_attr_kind=(string *)malloc(sizeof(string)*JC_ATTR_KIND); + jc_attr_kind=getkindattr(dataSet,ATTR_NUM-1); + double jc_attr_kind_sum[JC_ATTR_KIND]; + double prob=0; + double shannoEnt=0; + for(i=0; iattr[ATTR_NUM-1]==jc_attr_kind[j])//³õʼ»¯Îª0¼ÇµÃ + { + jc_attr_kind_sum[j]++; + } + } + len++; + p=p->next; + } + //ͳ¼Æ¸Ã×ÓÊý¾Ý¼¯ÉϵÄËùÓÐÑù±¾µÄ¾ö²ßÊôÐÔÒÔ¼°²»Í¬¾ö²ßÊôÐÔÉÏÑù±¾Êý + for(i=0; jc_attr_kind[i]!="0"; i++) + { + cout<<"subdatasetLen="<id; + q=q->next; + } + cout<next; + string *fet_attr_kind[ATTR_NUM-1]; + int *fet_attr_kind_sum[ATTR_NUM-1];//³õʼ»¯Îª0¼ÇµÃ + for(i=0; iattr[j]==fet_attr_kind[j][k]) + { + fet_attr_kind_sum[j][k]++; + } + } + } + } + len++; + p=p->next; + } + for(i=0; inext; + while(p!=NULL) + { + sublen++; + p=p->next; + } + if(sublennext->attr[ATTR_NUM-1]; + matrixtree[node].splitfet=-1; + matrixtree[node].fetvalue=dataSet->next->attr[bbestFet]; + matrixtree[node].parent=bbestFet; + matrixtree[node].len=len; + matrixtree[node].id=node; + //cout<<"fetflag=1 "<next->attr[bbestFet]; + matrixtree[node].parent=bbestFet; + matrixtree[node].jc_attr="-1"; + matrixtree[node].len=len; + matrixtree[node].kind=kind; + matrixtree[node].id=node; + node++; + return matrixtree[node]; +} +void loadData(data *dataSet) +{ + ifstream infile; + string tmpstrline; + string tmpstr; + data *p; + p=dataSet; + data *datatmp; + infile.open("data\\id3.txt",ios::in); + int i=0,j=0,yblen=0,fetlen=0; + while(!infile.eof()&&iid=i; + datatmp->next=NULL; + while(input>>tmpstr) + { + datatmp->attr[j]=tmpstr; + j++; + fetlen=j; + } + i++; + p->next=datatmp; + p=p->next; + } + } + //¼ì²âÊý¾Ý¼ÓÔØÊÇ·ñÕýÈ· + /* + yblen=i; + p=dataSet->next; + for(i=0; p!=NULL; i++) + { + for(j=0; jattr[j]<<" "; + } + p=p->next; + cout<next=child; + p=p->next; + } + s.push(matrixtree[i]); + } + } + s.pop(); + return 0; +} +int main() +{ + data *dataSet=new data; + dataSet->next=NULL; + loadData(dataSet); + dataToTree(dataSet,0);//³õÖµºÜÖØÒª£¬µ±·µ»Øµ½¸ù½Úµãʱ£¬ÈçÈ¡-1£¬Ôò»áÎÞ·¨·ÃÎʸù½ÚµãÉϵÄÊôÐÔÌØÕ÷Öµ + //createTree(); + //µÚ¶þ¸ö²ÎÊýµÄÈ¡Öµ¼´±íʾ¸ù½ÚµãµÄ¸¸½Úµã + for(int i=0; inext; + } + cout< +using namespace std; +#define size 12 +int main() +{ + int matrix[size][size]= + { + {0,1,0,0,1,0,0,0,0,0,0,0}, + {1,0,1,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,0,1,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {1,0,0,0,0,0,0,0,1,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,1,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,1,0,0,0,0,1,0,0}, + {0,0,0,0,0,0,0,0,1,0,1,0}, + {0,0,0,0,0,0,1,0,0,1,0,1}, + {0,0,0,0,0,0,0,1,0,0,1,0} + }; + double reward[size]= + { + -0.02,-0.02,-0.02,1, + -0.02,0,-0.02,-1, + -0.02,-0.02,-0.02,-0.02 + }; + double maxreward[size]= {0,0,0,0,0,0,0,0,0,0,0,0}; + int action[size]= {4,0,1,-1,8,-1,10,-1,9,8,9,10}; //ÉÏÓÒÏÂ×ó{1,2,3,4} + int i=0,j=0,count=0; + bool flag=0; + for(i=0;i0||action[i]==0) + maxreward[i]=reward[i]+maxreward[action[i]]; + else + maxreward[i]=reward[i]; + }//·Åµ½ÕâÒâζ×Åͬ²½¸üУ¬count=1008ÊÇ12*12µÄ7±¶£¬¼´É¨ÁË7±é + for(i=0; imaxreward[i]-reward[i]+0.0001)//¸üÐÂÀÛ»ý»Ø±¨ + { + action[i]=j; + //if(action[i]>0||action[i]==0) + //maxreward[i]=reward[i]+maxreward[action[i]];//·Åµ½ÕâÊÇÒì²½¸üУ¬ + //else + // maxreward[i]=reward[i]; + flag=0;//µ±ÀÛ»ý»Ø±¨²»ÔÙ¸üУ¬¼´²»½øÈë¸Ãif£¬ÄÇô¾Í½áÊøµü´ú + } + count++; + } + } + } + for(i=0; imaxreward[i]-reward[i]+0.0001) + { + action[i]=j; + //if(reward[i]!=1&&reward[i]!=-1) + maxreward[i]=reward[i]+maxreward[ac[j]+i]; + //else + // maxreward[i]=reward[i]; + flag=0; + } + count++; + } + } + } + for(i=0; i +#include +#include +using namespace std; +#define MAX 1000000 +#define MIN -100000 +//SMO²ÎÊý½á¹¹Ìå +struct OS +{ + Matrix x; + Matrix y; + double C; + double soft; + int m; + Matrix alphas; + double b; + Matrix eCache; + Matrix kernel; + bool k; +}; +//ºËº¯ÊýµÄ½á¹¹Ìå +struct kTup +{ + int type;//0,1 + double arg; +}; +class SMOP +{ + //·Ç³£ÖµµÃ×¢ÒâµÄÊÇsvmÖÐѵÁ·Ñù±¾°´ÁÐÅÅÁУ¬¼´Ã¿Ò»ÁÐÊÇÒ»¸öÑù±¾£¬ËùÒÔµ¼ÖÂwÊÇÐÐÏòÁ¿ +public: + OS os; +public: + /** + ¸ù¾Ý²ÎÊý£¬À´Éú³É²»Í¬µÄºËº¯Êý + */ + Matrix kernelTran(Matrix x,Matrix xOneCol,kTup ktup) + { + Matrix K; + K.initMatrix(&K,x.col,1); + Matrix xOneColT; + xOneColT.initMatrix(&xOneColT,xOneCol.row,xOneCol.col); + xOneColT.transposematrix(xOneCol,&xOneColT); + if(ktup.type==1) + { + K.multsmatrix(&K,x,xOneColT); + } + if(ktup.type==2) + { + //¸ß˹ºË + } + return K; + } + /** + ½á¹¹ÌåOSµÄ³õʼ»¯£¬ÓÃÓÚ±£´æËùÒÔSMOËã·¨ÖÐÐèÒªÓõ½µÄ²ÎÊý + */ + int initOs(Matrix x,Matrix y,double C,double soft,kTup ktup) + { + os.x.initMatrix(&os.x,x.col,x.row); + os.x.copy(x,&os.x); + os.y.initMatrix(&os.y,y.col,y.row); + os.y.copy(y,&os.y); + os.eCache.initMatrix(&os.eCache,x.col,2); + os.alphas.initMatrix(&os.alphas,x.col,1); + + os.b=0; + os.C=C; + os.m=x.col; + os.kernel.initMatrix(&os.kernel,os.m,os.m); + os.soft=soft; + if(ktup.type!=0) + { + int i=0,j=0; + Matrix xOneCol; + xOneCol.initMatrix(&xOneCol,1,os.x.row); + Matrix kOneRow; + kOneRow.initMatrix(&kOneRow,os.m,1); + cout<<"-----------"<H) + return H; + if(alphamaxDeltaE) + { + maxK=k; + maxDeltaE=deltaE; + Ej=Ek; + } + } + //Ëæ»úÑ¡ÔñÒ»¸öj, ÀíÂÛÉÏÖ»ÓеÚÒ»´ÎʱËùÓеÄek¶¼Ã»¼ÆËã¹ý£¬ËùÒÔÈÎÑ¡Ò»¸ö¼´¿É + if(maxK==-1) + { + maxK=(i*2+1)%100; + Ej=calcEk(maxK); + } + } + return maxK; + } + + /** + ÄÚ²ãÑ­»·ÊµÏÖÁ½¸öalphaµÄÓÅ»¯£¬ÓÉÍâ²ãÑ­»·º¯ÊýÌṩalpha1µÄ¿ÉÑ¡¼¯ºÏ£¨ËùÓÐÑù±¾»òÕßÊÇÖ§³ÖÏòÁ¿£©£¬ + ±éÀú¼¯ºÏÉϵÄÿһ¸öalpha£¬¶ÔÆä½øÐÐÅжÏÊÇ·ñÎ¥·´KKTÌõ¼þ£¬Èç¹ûÎ¥·´ÔòѡΪµÚÒ»¸öalpha1£¬ + ͬÑùµ÷ÓÃselctJº¯ÊýÀ´Æô·¢Ê½Ñ¡ÔñʹµÃalpha1¸Ä±ä×î´óµÄalpha2£¬ + Ñ¡Ôñ³öºÏÊÊijÖ®ºó£¬»¹µÃ¸ù¾Ýµ±Ç°alpha1ºÍalpha2À´È·¶¨ÓÅ»¯µÄÉÏÏÂÏÞ¡£ + È»ºóͨ¹ýµü´ú¹«Ê½£¬À´¼ÆËãalpha1ºÍalpha2£¬¼ÆËãµÄʱºòÅжÏÊÇ·ñ²ÉÓú˷½·¨¡£ + ²¢¶Ô¼ÆËã½á¹û½øÐÐÉÏÏÂÏÞ¼ô¼­£¬×îÖÕÈ·¶¨ÓÅ»¯ºóµÄalpha1ºÍalpha2£¬Í¬Ê±¶Ôµ±Ç°ÓÅ»¯ºó½á¹û¶ÔbÒ²½øÐÐÐÞ¸Ä + ¿¼Âǵ½Ã¿Ò»´ÎÑ¡ÔñµÚ¶þ¸öalpha2ʱ£¬¶¼ÐèÒª±È½ÏÒ»¸öÎó²îÖµ£¬¶øÕâ¸öÎó²îÖµÔÚÿ´ÎÓÅ»¯¹ý³ÌÖж¼Ö»ÓжÔÓ¦ÐÞ¸ÄalphaµÄÎó²îÖµÓб仯 + ¶øÆäËûµÄÊDz»±äµÄ£¬ËùÒÔÓÃÒ»¸ö¾ØÕóÀ´±£´æÓÐЧµÄÎó²îÖµ¡£ + */ + int innerL(int i) + { + double Ei; + double Ej; + int j; + double alphaIOld; + double alphaJOld; + double L; + double H; + double eta; + int n; + double temp[3];//ii£¬jj£¬ij + double b1,b2; + if(os.y.mat[i][0]*(Ei-os.y.mat[i][0]*os.soft)<0&&os.alphas.mat[i][0]0&&os.alphas.mat[i][0]>0) + { + Ei=calcEk(i); + j=selectJ(i,Ei); + Ej=calcEk(j); + alphaIOld=os.alphas.mat[i][0]; + alphaJOld=os.alphas.mat[j][0]; + if(os.y.mat[i][0]!=os.y.mat[j][0]) + { + L=max(0.0,os.alphas.mat[j][0]-os.alphas.mat[i][0]); + H=min(os.C,os.alphas.mat[j][0]-os.alphas.mat[i][0]+os.C); + } + else + { + L=max(0.0,os.alphas.mat[j][0]+os.alphas.mat[i][0]-os.C); + H=min(os.C,os.alphas.mat[j][0]+os.alphas.mat[i][0]); + } + if(L==H) + { + cout<<"l=h------------------"<os.alphas.mat[i][0]) + os.b=b1; + else if(0os.alphas.mat[j][0]) + os.b=b2; + else + os.b=(b1+b2)/2; + return 1; + } + cout<<"kkt--------------------------"<0||entireSet); iter++) + {//Ñ­»·½áÊø±ê־Ϊµü´ú´ÎÊýÒѵ½Ô¤ÉèÖµ£¬»òÕßÊDz»ÄÜÔÙ¼ÌÐøÓÅ»¯£¨¶ÔÓÚËùÓеÄÖ§³ÖÏòÁ¿Éϵĵ㶼ÕÒ²»µ½µÚ¶þ¸öalpha¶ÔµÚÒ»¸öalpha½øÐÐÓÅ»¯ºó£¬ÖØÐÂÔÙ±éÀúËùÓеĵãÑ°ÕÒ¿ÉÓÅ»¯µÄ²ÎÊý¶Ô£© + //»¹ÊÇÕÒ²»µ½ÔòÔٴαéÀúÖ§³ÖÏòÁ¿Éϵĵ㣬Õâ´Î±ØȻҲÊÇÕÒ²»µ½£¬²Å½áÊøµü´ú + alphaPairsChanged=0; + if(entireSet) + { + for(i=0; i0) + cout<<1-os.y.mat[i][0]< +#include +#include +#include +#include +#include +#include +#define MAX_SIZE_OF_TRAINING_SET 100 +#define MAX_NUMIT 100 +#define ATTR_NUM 22 +#define MAX 1000000 +#define MIN -100000 +using namespace std; +struct data +{ + int id; + string attr[ATTR_NUM];//ÓÃÓÚ×Ö·û´®ÊôÐÔ + double attr_double[ATTR_NUM];//ÓÃÓÚÊýÖµÐÍÊôÐÔ + double weight; + data *next; +}; +struct twoSubData +{ + data *left; + data *right; +}; +struct Stump +{ + double minErr; + int bestIndex;//±íʾ×îºÃµÄ·ÖÀàÊôÐÔ£¬µ±·ÇÒ¶×Ó½Úµãʱ£¬¼´±íʾ·ÖÁÑÊôÐÔϱ꣬·ñÔòΪ-1£¬±íʾΪҶ×Ó½Úµã±ê¼Ç + string ltOrgt;//ÓÃÓÚ±íʾÊÇСÓÚµÈÓÚΪ-1£¬»¹ÊÇ´óÓÚµÈÓÚΪ-1 + double threshVal; + struct twoSubData twosubdata;//¿ÉÓÃÓڼǼdataµÄÖ¸ÕëÍ· + int predict[MAX_SIZE_OF_TRAINING_SET]; + double alpha; +}; +//matrixTree matrixtree[MAX_SIZE_OF_TRAINING_SET];//ÓÃ×÷´æ´¢ºóÐò±éÀúÉú³ÉÊ÷µÄÐòÁÐ +int node=0;//ÓÃ×÷´æ´¢ºóÐò±éÀúÉú³ÉÊ÷µÄÐòÁÐÊý×éµÄϱê +int fetflag[ATTR_NUM]= {0,0,0,0,0}; //³õʼ»¯Îª0,×îºóÒ»¸öÓÃÓÚͳ¼Æ»¹Ê£¶àÉÙÌØÕ÷δʹÓà +// ¼ÆËã»®·Öºó×Ó¼¯µÄÐÅÏ¢ìØ£¬¸ÃÐÅÏ¢ìØÓÉ×Ó¼¯Öоö²ßÊôÐÔ³öÏֵĸÅÂʾö¶¨ + +void loadData(data *dataSet) +{ + ifstream infile; + string tmpstrline; + data *p; + p=dataSet; + data *datatmp; + infile.open("data\\adaboost.txt",ios::in); + int i=0,j=0,yblen=0,fetlen=0; + while(!infile.eof()&&iid=i; + datatmp->next=NULL; + while(input>>datatmp->attr_double[j]) + { + j++; + fetlen=j; + } + i++; + p->next=datatmp; + p=p->next; + } + } + //¼ì²âÊý¾Ý¼ÓÔØÊÇ·ñÕýÈ· + yblen=i; + p=dataSet->next; + for(i=0; p!=NULL; i++) + { + for(j=0; jattr_double[j]<<" "; + } + p=p->next; + cout<next; + while(p!=NULL) + { + if(p->attr_double[axis]>rangemax) + rangemax=p->attr_double[axis]; + if(p->attr_double[axis]attr_double[axis]; + p=p->next; + } + range[0]=rangemin; + range[1]=rangemax; + return range; +} +int* stumpClassify(data *trainData,int axis,double threshVal,string threshIneq) +{ + data *p; + p=trainData->next; + int *predict=new int[MAX_SIZE_OF_TRAINING_SET]; + int i=0; + while(p!=NULL) + { + predict[i]=1; + if(threshIneq=="lt") + {if(p->attr_double[axis]<=threshVal) + predict[i]=-1; + } + else + {if(p->attr_double[axis]>threshVal) + predict[i]=-1; + } + p=p->next; + i++; + } + return predict; +} +Stump buildStump(data *dataSet,double *w) +{ + Stump sp; + int len; + int i,j,k,l; + twoSubData twosubdata; + twosubdata.left=new data; + twosubdata.right=new data; + twosubdata.left->next=NULL; + twosubdata.right->next=NULL; + data *left=twosubdata.left;//=(data *)malloc(sizeof(data)*MAX_SIZE_OF_TRAINING_SET); + data *right=twosubdata.right; + data *p; + data *datatmp; + int *predict=new int[MAX_SIZE_OF_TRAINING_SET]; + double errArr[MAX_SIZE_OF_TRAINING_SET]; + double weightError=0; + double threshVal; + double minErr=MAX; + double *range=(double *)malloc(sizeof(double)*2); + int numSteps=MAX_SIZE_OF_TRAINING_SET/10; + double stepSize; + double rangemin,rangemax; + string threshIneq[2]={"lt","gt"}; + for(i=0; inext; + l=0; + weightError=0; + while(p!=NULL&&l<100) + { + //cout<attr_double[l]<attr_double[ATTR_NUM-1]>0?(predict[l]-p->attr_double[ATTR_NUM-1])/2:-(predict[l]-p->attr_double[ATTR_NUM-1])/2; + weightError+=errArr[l]*w[l]; + p=p->next; + l++; + } + if(weightErrornext; + left=twosubdata.left; + right=twosubdata.right; + l=0; + while(p!=NULL) + { + sp.predict[l]=predict[l]; + datatmp=new data;//Æäʵ¿ÉÒÔ²ÉÓÃÐÞ¸ÄÖ¸ÕëÖ¸ÏòµÄ·½Ê½À´¼Ç¼×Ó¼¯ÖеÄÊý¾Ý£¬¶ø²»ÊÇÁíÍ⿪±Ù¿Õ¼ä + datatmp->next=NULL; + datatmp->id=p->id; + int j=0; + for(j=0; jattr_double[j]=p->attr_double[j]; + datatmp->attr_double[ATTR_NUM-1]=predict[l]; + if(predict[l]>0) + { + left->next=datatmp; + left=left->next; + } + if(predict[l]<0) + { + right->next=datatmp; + right=right->next; + } + p=p->next; + l++; + } + sp.bestIndex=i; + sp.minErr=minErr; + sp.threshVal=threshVal; + sp.twosubdata=twosubdata; + sp.ltOrgt=threshIneq[k]; + cout<<"minErr="<next; + int i,j,k; + double len=0; + double w[MAX_SIZE_OF_TRAINING_SET]; + double wSum=0; + double aggErr[MAX_SIZE_OF_TRAINING_SET]; + double sumErr; + while(p!=NULL) + { + len++; + p=p->next; + } + for(i=0;inext=NULL; + loadData(trainData); + adaBoostTrainDS(trainData,50); + return 0; +} diff --git a/bayes.cpp b/bayes.cpp new file mode 100644 index 0000000..2434cc7 --- /dev/null +++ b/bayes.cpp @@ -0,0 +1,341 @@ +#include +#include +#include +#include +#include +#include +#include +#include "matrix.h" +#define EMAILSUM 50 //·Ç³£ÌرðµÄÒ»¸ö³£Á¿£¬ÊÊÓ÷¶Î§ÓÐÏÞ +#define FILE_MAX_NUM 99 +#define FILE_NAME_LEN 30 +#define FILE_WORD_LEN 1000 +#define CLASS_SUM 2 +#define VEC_LEN 1000 +using namespace std; + +struct Bayes +{ + Matrix pXY; + Matrix pY; + Matrix pX; +}; +Bayes bayes;//±£´æbayes·ÖÀàÆ÷ËùÓеIJÎÊý +char file[CLASS_SUM][FILE_MAX_NUM][FILE_NAME_LEN];//¼ÓÔØÊý¾ÝµÄÎļþÃû + +/** +Éú³Éµ¥´Ê×Öµä +*/ +dataToMatrix createVocabList(dataToMatrix *dtm) +{ + int i,j,k,vl; + int vec_len=0; + string dic[VEC_LEN]; + Data *p; + for(k=0; knext; + for(j=0; jattr_string[j].compare(dic[vl])) + break; + } + if(vl==vec_len) + { + dic[vec_len]=p->attr_string[j]; + vec_len++; + } + } + p=p->next; + } + } + dataToMatrix dicMat; + dicMat.dataSet=new Data; + dicMat.dataSet->next=NULL; + p=dicMat.dataSet; + Data *datatmp=new Data; + datatmp->next=NULL; + for(i=0; iattr_string[i]=dic[i]; + } + p->next=datatmp; + dicMat.col=1; + dicMat.row=vec_len; + cout<<"vec_len="<next;//×ÖµäͬÑùÒ²ÊÇÒ»ÐÐ + Matrix vecX; + vecX.initMatrix(&vecX,EMAILSUM,dicMat.row); + for(k=0; knext; + for(j=0; jattr_string[j].compare(q->attr_string[vl])) + { + vecX.mat[k][vl]=1; + break; + } + } + } + } + } + /* + for(i=0;inext; + q=dicMat.dataSet->next;//×ÖµäͬÑùÒ²ÊÇÒ»ÐÐ + Matrix testVecX; + testVecX.initMatrix(&testVecX,testX.col,dicMat.row); + for(i=0; iattr_string[j].compare(q->attr_string[vl])) + { + testVecX.mat[0][vl]=1; + break; + } + } + } + } + return testVecX; +} + +/** +ѵÁ·º¯ÊýµÄʵÏÖ£¬×¢Òâ¶Ô²ÎÊý½øÐÐƽ»¬´¦Àí +**/ +int trainNB(Matrix vecX,Matrix vecY) +{ + bayes.pY.initMatrix(&bayes.pY,2,1);//Á½Àà³õʼ»¯Îª2ÐеÄÁÐÏòÁ¿ + bayes.pX.initMatrix(&bayes.pX,1,vecX.row); + bayes.pXY.initMatrix(&bayes.pXY,2,vecX.row); + + double dicY[CLASS_SUM]= {0,0}; + int i,j,k,len=0; + for(k=0; k0) + { + file[k][i][j]=48+((i+1)/10); + file[k][i][j+1]=48+(i+1)%10; + j++; + } + else + { + file[k][i][j]=48+i+1; + } + } + } + //cout< +#include +#include +#include +#include +#include +#define MAX_SIZE_OF_TRAINING_SET 1000 +#define ATTR_NUM 3 +#define MAX 1024000 +#define MIN 0.0000001 +using namespace std; +struct data +{ + int id; + double attr_double[ATTR_NUM];//ÓÃÓÚÊýÖµÐÍÊôÐÔ + data *next; +}; +struct twoSubData +{ + data *left; + data *right; +}; +struct split +{ + int bestIndex;//±íʾ×îºÃµÄ·ÖÀàÊôÐÔ£¬µ±·ÇÒ¶×Ó½Úµãʱ£¬¼´±íʾ·ÖÁÑÊôÐÔϱ꣬·ñÔòΪ-1£¬±íʾΪҶ×Ó½Úµã±ê¼Ç + double value;//ÈôΪ·ÖÁѽڵ㣬Ôò±íʾ·ÖÁÑãÐÖµ£¬·ñÔòΪҶ×ӽڵ㣬ÓÃÀ´¼Ç¼Ҷ×Ó½ÚµãµÄ¾ùÖµ +}; +typedef struct bitnode +{ + struct bitnode *left;//СÓÚµÈÓÚãÐÖµµÄ×ó×ÓÊ÷ + struct bitnode *right;//´óÓÚãÐÖµµÄÓÒ×ÓÊ÷ + int leafType;//Ò¶×Ó½ÚµãÀàÐÍ.0:Öµ£¬1:Ä£ÐÍ + int feature;//Ö»ÓзÇÒ¶×Ó½Úµã²ÅÓзÖÁÑÊôÐÔ + double value;//Ò¶×Ó½ÚµãΪֵÐÍ,·ÇÒ¶×Ó½ÚµãΪãÐÖµ + int modle[ATTR_NUM];//Ò¶×Ó½ÚµãΪmodelÐÍ + struct data *data;//¿ÉÓÃÓڼǼdataµÄÖ¸ÕëÍ· + int len;//¼Ç¼¸Ã½áµã×Ó¼¯µÄ³¤¶È +}bitnode,*bitree; +twoSubData binSplitDataSet(data *dataSet,int axis,double value) +{ + twoSubData twosubdata; + twosubdata.left=new data; + twosubdata.right=new data; + twosubdata.left->next=NULL; + twosubdata.right->next=NULL; + data *left=twosubdata.left;//=(data *)malloc(sizeof(data)*MAX_SIZE_OF_TRAINING_SET); + data *right=twosubdata.right; + data *p; + data *datatmp; + p=dataSet->next; + int i,j,k; + for(i=0;p!=NULL;i++) + { + if(p->attr_double[axis]<=value) + { + datatmp=new data; + datatmp->next=NULL; + datatmp->id=p->id; + for(j=0;jattr_double[j]=p->attr_double[j]; + left->next=datatmp; + left=left->next; + } + else + { + datatmp=new data; + datatmp->next=NULL; + datatmp->id=p->id; + for(j=0;jattr_double[j]=p->attr_double[j]; + right->next=datatmp; + right=right->next; + } + p=p->next; + } + return twosubdata; +} +double mean(data *dataSet) +{ + int i; + double meanvalue=0; + double meanErr=0; + data *p; + p=dataSet->next; + for(i=0;p!=NULL;i++) + { + meanvalue+=p->attr_double[ATTR_NUM-1]; + p=p->next; + } + meanvalue/=(i);//ÕâÀï×¢Òâi¼´±íʾ³¤¶È£¬ÒòΪiÊÇ´Ó0¿ªÊ¼ËãµÄ£¬ËùÒÔ×îºóµÄ¼Ó1²»ÄܼõÈ¥ + return meanvalue; +} +double MeanErr(data *dataSet) +{ + int i; + double meanvalue=0; + double meanErr=0; + data *p; + meanvalue=mean(dataSet); + p=dataSet->next; + for(i=0;p!=NULL;i++) + { + meanErr+=(p->attr_double[ATTR_NUM-1]-meanvalue)*(p->attr_double[ATTR_NUM-1]-meanvalue); + p=p->next; + } + meanErr=sqrt(meanErr/(i));//ÕâÀï×¢Òâi¼´±íʾ³¤¶È£¬ÒòΪiÊÇ´Ó0¿ªÊ¼ËãµÄ£¬ËùÒÔ×îºóµÄ¼Ó1²»ÄܼõÈ¥ + //cout<<"meanErr="<next; + while(p!=NULL) + { + p + p=p->next; + } +}*/ +split chooseBestSplit(data *dataSet,int leafType,double minErr,int minLen) +{ + int signvalue=1; + twoSubData twosubdata; + data *p; + data *left; + data *right; + split sp; + int len; + int i,j; + double oldMeanErr=MeanErr(dataSet); + double bestMeanErr=MAX; + double newMeanErr; + p=dataSet->next; + double value=p->attr_double[ATTR_NUM-1]; + for(i=0;p!=NULL;i++) + { + signvalue=0; + if(p->attr_double[ATTR_NUM-1]!=value) + { + signvalue=0; + } + len++; + p=p->next; + } + if(signvalue||len==1) + { + cout<<"signvalue+len"<next; + for(j=0;p!=NULL;j++) + { + twosubdata=binSplitDataSet(dataSet,i,p->attr_double[i]); + left=twosubdata.left->next; + right=twosubdata.right->next; + len=0;//len¼ÇµÃÔÚ½øÈëÏ´ÎÑ­»·ÊÇÇå0 + while(left!=NULL&&right!=NULL) + { + left=left->next; + right=right->next; + len++; + } + //cout<<"len===="<next;//ÌáÇ°½áÊøµ±Ç°Ñ­»·Ö®Ç°»¹µÃ°ÑÖ¸ÕëÖ¸ÏòÏÂÒ»¸ö + continue; + } + newMeanErr=MeanErr(twosubdata.left)+MeanErr(twosubdata.right); + //cout<<"id="<next; + if(!(t=(bitnode *)malloc(sizeof(bitnode)))) exit(-1); + + int len=0; + while(p!=NULL) + { + len++; + //cout<<"data: "<attr_double[0]<<" "<attr_double[1]<<" "<attr_double[2]<next; + } + cout<<"len="<id=i; + datatmp->next=NULL; + j=0; + while(input>>datatmp->attr_double[j]) + { + j++; + fetlen=j; + } + p->next=datatmp; + p=p->next; + } + i++; + } + //¼ì²âÊý¾Ý¼ÓÔØÊÇ·ñÕýÈ· + /*yblen=i; + p=dataSet->next; + for(i=0; p!=NULL; i++) + { + for(j=0; jattr_double[j]<<" "; + } + p=p->next; + cout<feature<<" "<value<<" len="<len<next; + if(t->left!=NULL)//ÕâÀïÖ®ËùÒÔ²»ÔÙÏÂÒ»´ÎµÝ¹éʱ¼ì²ât->leftÊÇ·ñΪNULL£¬ÊÇÒòΪµÝ¹éº¯ÊýÖÐÁíÍâÒ»¸ö²ÎÊýÓõ½ÁËleft->data + //³£¹æµÄÏÈÐò±éÀúÒ»°ãÔÚÏÂÒ»´ÎµÝ¹éΪNULL·µ»Ø + preorder(t->left,t->left->data); + if(t->right!=NULL) + preorder(t->right,t->right->data); + } + return 0; +} +int prune(bitree &t,data *testData) +{ + data *p=testData; + int len=0; + while(p!=NULL) + { + p=p->next; + len++; + } + if(len==0) + return 0; + if(t==NULL)//¼ì²â×ÓÊ÷ÊÇ·ñΪNULL,²»È»ºóÃæµÄ²Ù×÷Ö´Ðв»ÁË + return 0; + if(t->left->feature>-1||t->right->feature>-1) + { + twoSubData twosubdata=binSplitDataSet(testData,t->feature,t->value); + if(t->left->feature>-1) + prune(t->left,twosubdata.left); + if(t->right->feature>-1) + prune(t->right,twosubdata.right); + } + else + { + twoSubData twosubdata=binSplitDataSet(testData,t->feature,t->value); + double errortwo=MeanErr(twosubdata.left)+MeanErr(twosubdata.right); + double errorone=MeanErr(testData); + if(erroroneleft=NULL; + t->right=NULL; + t->feature=-1; + t->value=mean(testData); + return 0; + } + } + +} +int main() +{ + data *dataSet=new data; + dataSet->next=NULL; + loadData(dataSet,0); + //MeanErr(dataSet); + //chooseBestSplit(dataSet,0,0,1); + bitree t; + if(!(t=(bitree)malloc(sizeof(bitnode)))) exit(-1); + //t=NULL; + createBinTree(t,dataSet); + cout<feature< +#include +#include +#include +#define max_vertex_num 20 +#define error -1 +#define ok 1 +#define size 12 +using namespace std; +typedef struct arccell +{ + int adj;//½áµã¼äµÄ¹Øϵ¼´È¨ + char *info; +} arccell,adjmatrix[max_vertex_num][max_vertex_num]; +typedef struct mgraph +{ + int vexs[max_vertex_num];//¸÷¶¥µãÃû³Æ + adjmatrix arcs;//ÁÚ½Ó¾ØÕó + int vexnum,arcnum;//ͼµ±Ç°µÄ¶¥µãÊýºÍ»¡Êý + double reward[max_vertex_num];//ÿ¸ö״̬µÄ»Ø±¨ +} mgraph; +bool visited[max_vertex_num]; +int matrix[size][size]= +{ + {0,1,0,0,1,0,0,0,0,0,0,0}, + {1,0,1,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,0,1,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,0,0}, + {1,0,0,0,0,0,0,0,1,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,1,0}, + {0,0,0,1,0,0,1,0,0,0,0,1}, + {0,0,0,0,1,0,0,0,0,1,0,0}, + {0,0,0,0,0,0,0,0,1,0,1,0}, + {0,0,0,0,0,0,1,0,0,1,0,1}, + {0,0,0,0,0,0,0,1,0,0,1,0} +}; +double reward[size]= +{ + -0.02,-0.02,-0.02,1, + -0.02,0,-0.02,-1, + -0.02,-0.02,-0.02,-0.02 +}; +double sumreward[size]; +double maxreward[size]={0,0,0,0,0,0,0,0,0,0,0,0}; +int count=0; +int createudn(mgraph &g); +void dfstraverse(mgraph &g);//Éî¶È±éÀúͼ +void dfs(mgraph &g,int i); +stack s;//¸ÃÕ»¶¨ÒåΪȫ¾Ö±äÁ¿µÄÔ­ÒòÊÇÈç¹û¶¨ÒåÔÚÓеݹ麯ÊýµÄÌåÄÚ²¿£¬ +//Ôòÿһ´Î¶¼»áÉêÇëÒ»´ÎÕ»£¬Ã¿Ò»´Î²Ù×÷Ò²²»ÊǶÔͬһ¸öÕ»½øÐвÙ×÷ +stackcopys; +int main() +{ + mgraph g; + createudn(g); + int i=0; + dfstraverse(g);//Éî¶È±éÀúºÍ¹ã¶È±éÀú¶øÑ¡ÆäÒ»£¬ÒòΪ¹«ÓÃÁËÈ«¾Ö±äÁ¿ + for(i=0; i0; j--) + { + sumreward[lujin[start]-1]+=g.reward[lujin[j-1]-1]; + cout< +#include +#include +#include +#define max_vertex_num 20 +#define error -1 +#define ok 1 +#define overflow -2 +#define int_max 9999 +#define size 16 +using namespace std; +typedef struct arccell +{ + int adj;//½áµã¼äµÄ¹Øϵ¼´È¨ + char *info; +} arccell,adjmatrix[max_vertex_num][max_vertex_num]; +typedef struct mgraph +{ + int vexs[max_vertex_num];//¸÷¶¥µãÃû³Æ + adjmatrix arcs;//ÁÚ½Ó¾ØÕó + int vexnum,arcnum;//ͼµ±Ç°µÄ¶¥µãÊýºÍ»¡Êý + double reward[max_vertex_num];//ÿ¸ö״̬µÄ»Ø±¨ +} mgraph; +bool visited[max_vertex_num]; +int matrix[size][size]= +{ + {0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0}, + {1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0}, + {1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0}, + {0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0}, + {0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0}, + {0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0}, + {0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0}, + {0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1}, + + {0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0}, + {0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0}, + {0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1}, + {0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0} + +}; +double reward[size]= +{ + -0.02,-0.02,-0.02,1, + -0.02,0,-0.02,-1, + -0.02,-0.02,-0.02,-0.02, + -0.02,-0.02,-0.02,-0.02 +}; +double sumreward[size]; +double maxreward[size]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +int createudn(mgraph &g); +void dfstraverse(mgraph &g);//Éî¶È±éÀúͼ +void dfs(mgraph &g,int i); +stack s;//¸ÃÕ»¶¨ÒåΪȫ¾Ö±äÁ¿µÄÔ­ÒòÊÇÈç¹û¶¨ÒåÔÚÓеݹ麯ÊýµÄÌåÄÚ²¿£¬ +//Ôòÿһ´Î¶¼»áÉêÇëÒ»´ÎÕ»£¬Ã¿Ò»´Î²Ù×÷Ò²²»ÊǶÔͬһ¸öÕ»½øÐвÙ×÷ +stackcopys; +int main() +{ + mgraph g; + createudn(g); + int i=0; + dfstraverse(g);//Éî¶È±éÀúºÍ¹ã¶È±éÀú¶øÑ¡ÆäÒ»£¬ÒòΪ¹«ÓÃÁËÈ«¾Ö±äÁ¿ + //bfstraverse(g); + for(i=0; i0; j--) + { + sumreward[lujin[start]-1]+=g.reward[lujin[j-1]-1]; + cout< +#include +#include +#include +#define max_vertex_num 20 +#define error -1 +#define ok 1 +#define overflow -2 +#define int_max 9999 +#define size 12 +using namespace std; +typedef struct arccell +{ + int adj;//½áµã¼äµÄ¹Øϵ¼´È¨ + char *info; +} arccell,adjmatrix[max_vertex_num][max_vertex_num]; +typedef struct mgraph +{ + int vexs[max_vertex_num];//¸÷¶¥µãÃû³Æ + adjmatrix arcs;//ÁÚ½Ó¾ØÕó + int vexnum,arcnum;//ͼµ±Ç°µÄ¶¥µãÊýºÍ»¡Êý + double reward[max_vertex_num];//ÿ¸ö״̬µÄ»Ø±¨ +} mgraph; +bool visited[max_vertex_num]; +int matrix[size][size]= +{ + {0,1,0,0,1,0,0,0,0,0,0,0}, + {1,0,1,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,0,1,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,0,0}, + {1,0,0,0,0,0,0,0,1,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,1,0}, + {0,0,0,1,0,0,1,0,0,0,0,1}, + {0,0,0,0,1,0,0,0,0,1,0,0}, + {0,0,0,0,0,0,0,0,1,0,1,0}, + {0,0,0,0,0,0,1,0,0,1,0,1}, + {0,0,0,0,0,0,0,1,0,0,1,0} +}; +double reward[size]= +{ + -0.02,-0.02,-0.02,1, + -0.02,0,-0.02,-1, + -0.02,-0.02,-0.02,-0.02 +}; +double sumreward[size]; +double maxreward[size]= {0,0,0,0,0,0,0,0,0,0,0,0}; +int count=0; +int createudn(mgraph &g); +void dfstraverse(mgraph &g);//Éî¶È±éÀúͼ +void dfs(mgraph &g,int i); +stack s;//¸ÃÕ»¶¨ÒåΪȫ¾Ö±äÁ¿µÄÔ­ÒòÊÇÈç¹û¶¨ÒåÔÚÓеݹ麯ÊýµÄÌåÄÚ²¿£¬ +//Ôòÿһ´Î¶¼»áÉêÇëÒ»´ÎÕ»£¬Ã¿Ò»´Î²Ù×÷Ò²²»ÊǶÔͬһ¸öÕ»½øÐвÙ×÷ +stackcopys; +int maiwSum=0;n() +{ + mgraph g; + createudn(g); + int i=0; + dfstraverse(g);//Éî¶È±éÀúºÍ¹ã¶È±éÀú¶øÑ¡ÆäÒ»£¬ÒòΪ¹«ÓÃÁËÈ«¾Ö±äÁ¿ + for(i=0; i0+0.001) + continue; + for(j=0; j=0; j--) + { + sumreward[lujin[start-k]-1]+=g.reward[lujin[j]-1];//¼Ç¼ÀÛ»ý»Ø±¨ + //cout<=0; j--) + { + //sumreward[lujin[start-k]-1]+=g.reward[lujin[j]-1]; + cout< +using namespace std; +#include +#define size 12 +typedef struct subset +{ + int data; + struct subset* next; +} subset; +int main() +{ + int matrix[size][size]= + { + {0,1,0,0,1,0,0,0,0,0,0,0}, + {1,0,1,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,0,1,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {1,0,0,0,0,0,0,0,1,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,1,0,0,0,0,1,0,0,1,0}, + {0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,1,0,0,0,0,1,0,0}, + {0,0,0,0,0,0,0,0,1,0,1,0}, + {0,0,0,0,0,0,1,0,0,1,0,1}, + {0,0,0,0,0,0,0,1,0,0,1,0} + }; + double reward[size]= + { + -0.02,-0.02,-0.02,1, + -0.02,0,-0.02,-1, + -0.02,-0.02,-0.02,-0.02 + }; + double maxreward[size]= {0,0,0,0,0,0,0,0,0,0,0,0}; + int i=0,j=0,count=0; + struct subset *maxsubset; + maxsubset=(subset *)malloc(sizeof(subset)*1); + maxsubset->data=-1; + maxsubset->next=NULL; + struct subset *p,*q; + for(i=0; idata=i; + p->next=NULL; + q=maxsubset; + while((q->next)!=NULL) + q=q->next; + q->next=p; + } + } + q=maxsubset->next; + while(q!=NULL)//ÕâÀïͼµÄ¹ã¶È±éÀúûÓÐÓõ½¶ÓÁУ¬µ«Ò²Óõ½Á˶ÓÁеÄ˼Ïë + //¶ÔÓÚµ±Ç°²½Äܵ½´ïµÄ½ÚµãÓÃÁ´±íÁ¬½ÓÆðÀ´£¬È»ºóÖð½¥½øÐÐÏÂÒ»²½µÄÄܵ½´ïµÄ½Úµã½øÐÐÈëÁ´£¨Èë¶ÓÁУ©£¬Í¬ÑùÊÇÒ»ÖÖÏȽøÏȳö˼Ïë + { + for(i=0; idata]==1&&maxreward[i]<0)//doubleÀàÐͱȽϴóСµÄÆ«²î£¬¼ÓÉÏÒ»¸öСÊý×÷Ϊ¾«¶È + { + maxreward[i]=reward[i]+maxreward[q->data]; + p=(subset *)malloc(sizeof(subset)*1); + p->data=i; + p->next=NULL; + q=maxsubset; + while((q->next)!=NULL) + q=q->next; + q->next=p; + } + count++; + } + maxsubset->next=maxsubset->next->next;//ɾ³ýµ±Ç°½Úµã£¬¼´µ±Ç°²½ÏÂÄܵ½´ïµÄ½Úµã¶¼ÒѾ­×ßÍêÁË£¬¿É³ö¶ÓÁÐÁË + q=maxsubset->next;// + } + for(i=0; imax[j]) + { + max[j]=x.mat[i][j]; + } + } + } + /** + Ëæ»ú´ÓÑù±¾ÖÐÑ¡Ôñk¸öÑù±¾×÷ΪÀàÖÐÐÄ + */ + for(k=0; k +#include +#include +#include +#include +#include +#include "matrix.h" +using namespace std; + +/***Êý¾Ý¹éÒ»»¯´¦Àí£¬data[i][j]-min[j]/range[j]**/ +int autoNorm(Matrix x) +{ + int j=0,i=0; + + Matrix minVals; + minVals.initMatrix(&minVals,1,x.row,MAX); + + Matrix maxVals; + maxVals.initMatrix(&maxVals,1,x.row,MIN); + + Matrix ranges; + ranges.initMatrix(&ranges,1,x.row); + + for(j=0; jmaxVals.mat[0][j]) + maxVals.mat[0][j]=x.mat[i][j]; + } + } + for(i=0; isumf) + cout<<"juece="<<"1"<<"&"<<"shiji="< +#include +#include +#include +#include +#include +#include +#include +using namespace std; +#define MAX_SIZE_OF_TRAINING_SET 1000 +#define ATTR_NUM 1000 + + +struct Data +{ + //¿ÉÒÔÓù²ÓÃÌåÀàÐÍ + int id; + double attr_double[ATTR_NUM];//ÓÃÓÚÊýÖµÐÍÊôÐÔ + string attr_string[ATTR_NUM];//ÓÃÓÚÊýÖµÐÍÊôÐÔ + double weight; + Data *next; +}; +class dataToMatrix +{ +public: + Data *dataSet; + int col; + int row; +public: + + /** + ¼ÓÔØÊýÖµÐÍÑù±¾Êý¾Ý + */ + int loadData(dataToMatrix *dtm,char *file) + { + int i=0,j=0; + ifstream infile; + string tmpstrline; + Data *p; + dtm->dataSet=new Data; + dtm->dataSet->next=NULL; + p=dtm->dataSet; + Data *datatmp; + dtm->col=0; + cout<id=i; + datatmp->next=NULL; + j=0; + while(input>>datatmp->attr_double[j])j++; + p->next=datatmp; + p=p->next; + dtm->col++; + } + } + dtm->row=j; + infile.close(); + return 0; + } + /** + ¼ÓÔØ×Ö·ûÐÍÑù±¾Êý¾Ý + */ + int loadData(dataToMatrix *dtm,char *file,int type) + { + int i=0,j=0; + ifstream infile; + string tmpstrline; + Data *p; + dtm->dataSet=new Data; + dtm->dataSet->next=NULL; + p=dtm->dataSet; + Data *datatmp; + dtm->col=1; + cout<next=NULL; + while(!infile.eof()&&i>datatmp->attr_string[j]){j++;}; + } + } + p->next=datatmp; + dtm->row=j; + infile.close(); + return 0; + } + int print(dataToMatrix dtm) + { + //¼ì²âÊý¾Ý¼ÓÔØÊÇ·ñÕýÈ· + int i,j; + Data *p=dtm.dataSet->next; + for(i=0; iattr_string[j]!="") + cout<attr_string[j]<<" "; + else + cout<attr_double[j]<<" "; + } + p=p->next; + cout< +#include +#include +#include +#include "matrix.h" +#include +#include +#include +using namespace std; +#define MAX 1000000 +#define MIN -100000 + +/** +sigmadº¯Êý,ÕâÀïûÓÐÖ±½Ó½«Ô¤²âÀà±ðת»»ÎªÕûÐÍ£¬¶øÊÇ»¹ÊÇ·µ»ØÒ»¸ödoubleÖµ +*/ +double sigmoid(double z) +{ + return 1.0/(1+exp(-z)); +} +/** +ÌݶÈϽµËã·¨£¬Ö÷ÒªÊÇÈ·¶¨¸ºÌݶȷ½Ïò£¬²½³¤£¬²ÉÓõü´úµÄ˼Ïëµü´únÖÁÊÕÁ²£¬ +µ±Ä¿±êº¯ÊýÊÇ͹¹æ»®ÎÊÌ⣬ÄÇô¾Ö²¿×îСֵ¾ÍÊÇÈ«¾Ö×îСֵ + +**/ +int gradAscent(Matrix x,Matrix y) +{ + Matrix weights; + weights.initMatrix(&weights,x.row,1,1);///³õʼ»¯ÓÅ»¯²ÎÊýÏòÁ¿Îª1 + + Matrix xT; + xT.initMatrix(&xT,x.row,x.col); + xT.transposematrix(x,&xT); + + Matrix z; + z.initMatrix(&z,x.col,1); + + Matrix w1; + w1.initMatrix(&w1,x.row,y.row); + + double alpha=0.001;///µü´ú²½³¤ + double error;///¼Ç¼´íÎóÂÊ + int k,c=0; + int i,j; + for(c=0; c<1000; c++) + { + z.multsmatrix(&z,x,weights); + for(i=0; i0) + cout<<1-y.mat[i][0]<0) + cout<<1-y.mat[i][0]< +#include +#include +#include +#include "loadData.h" +#include +#include +#include +#define MAX 1000000 +#define MIN -100000 +#define MAX_MATRIX_COL 1000 +#define MAX_MATRIX_ROW 100 +using namespace std; +class Matrix +{ +public: + double **mat; + int col,row; +public: + int loadMatrix(Matrix *matrix,dataToMatrix dtm) + { + int i,j; + Data *p; + p=dtm.dataSet->next; + matrix->mat=(double **)malloc(sizeof(double*)*dtm.col); + if(!matrix->mat) + { + cout<<"loadMatrix fail"<mat[i]=(double *)malloc(sizeof(double)*dtm.row); + if(!matrix->mat[i]) + { + cout<<"loadmatrix fail"<mat[i][j]=p->attr_double[j]; + } + p=p->next; + } + matrix->row=dtm.row; + matrix->col=dtm.col; + return 0; + } + + /** + newºÍmallocµÄ±¾ÖÊÇø±ðÔÚÓÚ£¬Ç°ÕßÊÇÕë¶Ô¶ÔÏó¶øÑÔ£¬ÎÞÂÛÊÇ»ù±¾ÀàÐÍ»¹ÊÇÀàÀàÐÍ£¬½á¹¹ÌåÀàÐͶ¼¿ÉÒÔ¿´×÷¶ÔÏó£¬ + ʹÓÃnewÊÇ»áµ÷ÓöÔÏóµÄ¹¹Ô캯Êý£¬³õʼ»¯¶ÔÏ󣬼´Ê×Ïȵ÷ÖÅäÄڴ棬ºó³õʼ»¯£¬·ÖÅäÄÚ´æÕâÊÂÄÚ²¿ÊµÏÖ¿ÉÄÜÊǵ÷ÓÃmalloc + ¶ømallocÖ»ÊÇ·ÖÅäÄڴ棬¶ÔÓÚ»ù±¾ÀàÐͶøÑÔ£¬ÕâÊÇûÎÊÌâµÄ£¬ºóÆÚ¿ÉÒÔ¼ÌÐø³õʼ»¯£¬¶ø¶ÔÓÚ¶ÔÏó¶øÑÔ£¬mallocÖ»·ÖÅäÄÚ´æ + ²»µ÷Óù¹Ô캯Êý£¬ËùÒÔ²»»á¶Ô¶ÔÏó½øÐгõʼ»¯ + **/ + + int initMatrix(Matrix *matrix,int col,int row) + { + initMatrix(matrix,col,row,0); + } + int initMatrix(Matrix *matrix,int col,int row,double lam) + { + if(col==0||row==0) + { + cout<<"matrix row or col no can 0"<col=col; + matrix->row=row; + matrix->mat=(double **)malloc(sizeof(double*)*col); + if(!matrix->mat) + { + cout<<"initMatrix fail"<mat[i]=(double *)malloc(sizeof(double)*row); + if(!matrix->mat[i]) + { + cout<<"initMatrix fail"<mat[i][j]=0; + if(i==j) + matrix->mat[i][j]=lam; + } + } + return 0; + } + int print(Matrix matrix) + { + int i,j; + for(i=0; icol!=matrixA.col||matrixB->row!=matrixA.row) + { + cout<<"matrixA matrixB is no "<mat[i][j]=matrixA.mat[i][j]; + } + } + return 0; + } + Matrix getOneRow(Matrix matrix,int iRow) + { + Matrix oneRow; + oneRow.col=matrix.col; + oneRow.row=1; + initMatrix(&oneRow,oneRow.col,oneRow.row); + int i=0; + for(i=0; irow==1) + { + cout<<"matrix is vec"<row; i++) + { + for(j=0;jcol;j++) + { + matrix->mat[j][i]=matrix->mat[j][i+1]; + } + } + matrix->row--; + return 0; + } + void transposematrix(Matrix matrix,Matrix *matrixT)//¾ØÕóÐÎʽµÄתÖà + { + if(matrixT->col!=matrix.row||matrixT->row!=matrix.col) + { + cout<<"matrix matrixT is no "<col; i++) + { + for(j=0; jrow; j++) + { + matrixT->mat[i][j]=matrix.mat[j][i]; + } + } + } + int addmatrix(Matrix *addMatrix,Matrix matrix1,Matrix matrix2) + { + if(matrix1.col!=matrix2.col||matrix1.row!=matrix2.row||addMatrix->col!=matrix1.col||addMatrix->row!=matrix1.row) + { + cout<<"addMatrix matrix1 matrix2 is no"<mat[i][j]=matrix1.mat[i][j]+matrix2.mat[i][j]; + } + } + return 0; + } + + int submatrix(Matrix *subMatrix,Matrix matrix1,Matrix matrix2) + { + if(matrix1.col!=matrix2.col||matrix1.row!=matrix2.row||subMatrix->col!=matrix1.col||subMatrix->row!=matrix1.row) + { + cout<<"subMatrix matrix1 matrix2 is no"<col=matrix1.col; + subMatrix->row=matrix1.row; + for(i=0; imat[i][j]=matrix1.mat[i][j]-matrix2.mat[i][j]; + } + } + return 0; + } + + int multsmatrix(Matrix *multsMatrix,Matrix matrix1,Matrix matrix2)//¾ØÕóÐÎʽµÄÏà³Ë + { + if(matrix1.row!=matrix2.col||multsMatrix->col!=matrix1.col||multsMatrix->row!=matrix2.row) + { + cout<<"multsmatrix error"<mat[i][j]=0; + } + } + for(i=0; imat[i][j]+=matrix1.mat[i][k]*matrix2.mat[k][j]; + } + } + } + return 0; + } + //ÐÐÁÐʽ + double detmatrix(Matrix matrix) + { + if(matrix.col!=matrix.row) + { + cout<<"matrix det is no"<max)//ÿһ´ÎÏûÔª¶¼ÊDZȽϵÚkÁеÄÔªËØ£¬Ñ¡³öµÚkÁÐÖÐ×î´óµÄÒ»ÐÐ + { + swap=i; + } + }//ÕÒµ½µÚk´ÎÁÐÖ÷ÔªÏûÈ¥µÄ×î´óÐеÄϱê + if(swap==-1||matrixCopy.mat[swap][k]==0) + return -1;//×î´óÖ÷ԪΪ0 + for(j=0; ji) + lMatrix.mat[i][j]=0; + } + } + cout<<"lMatrix"<=0; i--) + { + if(i==j) + uniMatrix.mat[i][j]=1/uMatrix.mat[i][j]; + else + { + temp=0; + for(k=j; k>i; k--) + { + temp+=uMatrix.mat[i][k]*uniMatrix.mat[k][j]; + } + uniMatrix.mat[i][j]=-1/uMatrix.mat[i][i]*temp; + } + } + } + cout<<"uniMatrix"< +#include +#include +#include +#include "matrix.h" +#include +#include +#include +using namespace std; +#define MAX 1000000 +#define MIN -100000 + + +/** + +ÏßÐԻع麯ÊýµÄʵÏÖ£¬¿¼ÂÇÒ»°ãµÄÏßÐԻع飬×îСƽ·½ºÍ×÷ΪËðʧº¯Êý£¬ÔòÄ¿±êº¯ÊýÊÇÒ»¸öÎÞÔ¼ÊøµÄ͹¶þ´Î¹æ»®ÎÊÌ⣬ +ÓÉ͹¶þ´Î¹æ»®ÎÊÌâµÄ¼«Ð¡ÖµÔÚµ¼ÊýΪ0´¦È¡µ½£¬ÇÒ¼«Ð¡ÖµÎªÈ«¾Ö×îСֵ£¬ÇÒÓбÕʽ½â¡£¸ù¾ÝÊýѧ±í´ïʽʵÏÖ¾ØÕóÖ®¼äµÄÔËËãÇóµÃ²ÎÊýw¡£ +**/ +int regression(Matrix x,Matrix y) +{ + int i=0; + Matrix xT; + xT.initMatrix(&xT,x.row,x.col); + x.transposematrix(x,&xT); + //xT.print(xT); + + Matrix xTx; + xTx.initMatrix(&xTx,xT.col,x.row); + xTx.multsmatrix(&xTx,xT,x);///xµÄתÖóËÉÏx + //xTx.print(xTx); + + Matrix xTx_1; + xTx_1.initMatrix(&xTx_1,xTx.row,xTx.col); + xTx_1.nimatrix(&xTx_1,xTx);///xµÄתÖóËÉÏxÖ®ºóÇóÄæ + //xTx_1.print(xTx_1); + + Matrix xTx_1xT; + xTx_1xT.initMatrix(&xTx_1xT,xTx_1.col,xT.row); + xTx_1xT.multsmatrix(&xTx_1xT,xTx_1,xT);///ÄæÔÙ³ËÉÏxµÄתÖà + //xTx_1.print(xTx_1xT); + + Matrix ws; + ws.initMatrix(&ws,xTx_1xT.col,y.row); + ws.multsmatrix(&ws,xTx_1xT,y);///ÔÙ³ËÉÏy¾ÍÊÇȨÖØȨÖØÏòÁ¿£¬¶øÇÒÊÇÒ»¸öÁÐÏòÁ¿ + + cout<<"ws"<