From 673a64c4e3d7e2a02cf2893f069f99261e32a42d Mon Sep 17 00:00:00 2001 From: Madhuparna04 Date: Thu, 24 May 2018 03:19:03 +0530 Subject: [PATCH 1/4] Assignment4 added --- .../Week-3/Madhuparna/Assignment4.txt | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt diff --git a/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt new file mode 100644 index 0000000..564977d --- /dev/null +++ b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt @@ -0,0 +1,56 @@ +Q1: +Solution: +ab+ mode is for append and open or creat binary file for updating ans reading and writing +at the end of the file. + +Q2: +Solution: +When a file is read the data is transferred to a buffer. +Instead of accessing the file from where it is saved which might +take a longer time it is better to load the file in a buffer in memory. +While opening a file a negative value of buffer refers to default buffer size, +while zero indicates no buffering. + +Q3: +Solution: + a) try: + file=open("hh.txt","r") + file.readline() + except: + print("Could not open") + + b) + try: + a=a+10 + except: + print("Name 'a' not defined") + +Q4: +Solution: +code: + +import os +f=open("nn.txt","w+") +print(f.name) +f.write("Hello\n File Handling \n in Python\n") +f.close() +f=open("nn.txt","r+") +s=f.readlines() +print(s) +s.reverse() + +f.close() +f=open("nn.txt","w+") +​ +for item in s: + f.write(item) +f.close() +f=open("nn.txt","r+") +print(f.readlines()) +f.close() + + +Output: +nn.txt +['Hello\n', ' File Handling \n', ' in Python\n'] +[' in Python\n', ' File Handling \n', 'Hello\n'] \ No newline at end of file From 2b49c2fb3f723d6072161622b410c7586aa7ce98 Mon Sep 17 00:00:00 2001 From: Madhuparna04 Date: Wed, 30 May 2018 23:31:36 +0530 Subject: [PATCH 2/4] Assignment5 added --- .../Week-3/Madhuparna/Assignment5.ipynb | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb diff --git a/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb new file mode 100644 index 0000000..b743621 --- /dev/null +++ b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb @@ -0,0 +1,114 @@ +Question1: +Solution: +A class is a code template for creating objects. +Objects have member variables and have behaviour associated with them. + +Question2: +Solution: +A variable of class type which has all its attributes is called an object. + +Question3: +Solution: +A class defines the properties and behavior for the objects represented by the abstraction. + Abstraction is a property of object oriented programming. It denotes the essential properties + and behaviors of an object. It hides code and data. A class thus denotes a category of objects + and act as a blueprint for creating such objects. + An object exhibits the property and behaviors defined by its class. + + Question4: + Solution: + Syntax: + class NEWCLASS: + def printhello(self): + print(hello) + +Question 5: +Solution: +A method is a function that takes a class instance as its first parameter. +Methods are members of classes. +eg. printhello(self) is a method. + +Question6: +Solution: +Self refers to the object whose method was called. + +Question7: +Solution: +__init__ is a constructor.It is autoatically called when an object +of the class is created. + +Question8: +Solution: +When a class inherits from another class , it's object can access methods from +the inherited class also. +Thus we need not write methods again for the inheriting class. + + +Question 9: +Solution: +import random +import numpy as np +class deck_of_card: + + class card: + def __init__(self,suit,num): + self.suit=suit + self.num=num + cards = [] + def __init__(self): + for i in range(13): + self.cards.append(self.card('ace',i)) + + for i in range(13): + self.cards.append(self.card('heart',i)) + for i in range(13): + self.cards.append(self.card('diamond',i)) + for i in range(13): + self.cards.append(self.card('club',i)) + print(len(self.cards)) + print(self.cards[51].suit) + def deal(self,suit,num): + i=0 + for i in range(51): + + if self.cards[i].suit==suit and self.cards[i].num == num: + del self.cards[i] + def shuffle(self): + if (len(self.cards))==52: + random.shuffle(self.cards) + for i in range(52): + print(self.cards[i].suit, self.cards[i].num) + else: + print('All cards are not there',len(self.cards)) + +Question 10: +Solution: + class person: + def __init__(self,first='',last='',phno='',el=[]): + self.first_name=first + self.last_name=last + self.phone_num=phno + self.email=el + def cont(self): + print(self.first_name) + print(self.last_name) + print(self.phone_num) + print(self.email) + +class address_book: + def __init__(self): + self.adbook=[] + def add_contact(self,f,l,ph,em): + self.adbook.append(person(f,l,ph,em)) + def look_up_contact(self,l,f=''): + for i in range(len(self.adbook)): + if self.adbook[i].last_name==l: + print(self.adbook[i].cont()); + +a=address_book() +a.add_contact('madhu','parna',6888,'ms@gmail.com') +a.look_up_contact('parna') + + + + From 2c5fd46abf1624787f5957364103c0f02b5a9139 Mon Sep 17 00:00:00 2001 From: Madhuparna04 Date: Thu, 14 Jun 2018 19:01:54 +0530 Subject: [PATCH 3/4] Pandas Assignment --- .../Madhuparna/Assignment 6-Pandas.ipynb | 283 ++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb diff --git a/Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb b/Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb new file mode 100644 index 0000000..7a65250 --- /dev/null +++ b/Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "player=pd.read_csv(\"DIM_PLAYER.csv\",delimiter=',',encoding=\"ISO-8859-1\")\n", + "player_match=pd.read_csv(\"DIM_PLAYER_MATCH.csv\",delimiter=',',encoding=\"ISO-8859-1\",skiprows=[1])\n", + "team=pd.read_csv(\"DIM_TEAM.csv\",delimiter=',',encoding=\"ISO-8859-1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 249, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SEASON 2008 :\n", + "\n", + "Total number of players: 163\n", + "Total number of young players : 71\n", + "Percentage of young players is : 43.558282208588956 %\n", + "Team with max number of young players is : Mumbai Indians \n", + "\n", + "SEASON 2009 :\n", + "\n", + "Total number of players: 165\n", + "Total number of young players : 72\n", + "Percentage of young players is : 43.63636363636363 %\n", + "Team with max number of young players is : Royal Challengers Bangalore \n", + "\n", + "SEASON 2010 :\n", + "\n", + "Total number of players: 181\n", + "Total number of young players : 66\n", + "Percentage of young players is : 36.46408839779006 %\n", + "Team with max number of young players is : Kolkata Knight Riders \n", + "\n", + "SEASON 2011 :\n", + "\n", + "Total number of players: 199\n", + "Total number of young players : 61\n", + "Percentage of young players is : 30.65326633165829 %\n", + "Team with max number of young players is : Pune Warriors \n", + "\n", + "SEASON 2012 :\n", + "\n", + "Total number of players: 190\n", + "Total number of young players : 64\n", + "Percentage of young players is : 33.68421052631579 %\n", + "Team with max number of young players is : Delhi Daredevils \n", + "\n", + "SEASON 2013 :\n", + "\n", + "Total number of players: 197\n", + "Total number of young players : 65\n", + "Percentage of young players is : 32.99492385786802 %\n", + "Team with max number of young players is : Pune Warriors \n", + "\n", + "SEASON 2014 :\n", + "\n", + "Total number of players: 152\n", + "Total number of young players : 50\n", + "Percentage of young players is : 32.89473684210527 %\n", + "Team with max number of young players is : Rajasthan Royals \n", + "\n", + "SEASON 2015 :\n", + "\n", + "Total number of players: 145\n", + "Total number of young players : 45\n", + "Percentage of young players is : 31.03448275862069 %\n", + "Team with max number of young players is : Kings XI Punjab \n", + "\n", + "SEASON 2016 :\n", + "\n", + "Total number of players: 159\n", + "Total number of young players : 43\n", + "Percentage of young players is : 27.044025157232703 %\n", + "Team with max number of young players is : Delhi Daredevils \n", + "\n", + "SEASON 2017 :\n", + "\n", + "Total number of players: 161\n", + "Total number of young players : 40\n", + "Percentage of young players is : 24.84472049689441 %\n", + "Team with max number of young players is : Gujarat Lions \n", + "\n", + "Percentage of young players won Man of the match is 24.722662440570524 %\n" + ] + } + ], + "source": [ + "#QUESTION 1\n", + "#part a:\n", + "teams_list=player_match['Player_team'].unique()\n", + "for i in range(2008,2018):\n", + " print('SEASON ',i,':\\n')\n", + " a = player_match[(player_match.Age_As_on_match<=25)&(player_match.Season_year==i)]['Player_Name'].unique()\n", + " b=player_match[(player_match.Season_year==i)]['Player_Name'].unique()\n", + " print('Total number of players:',len(b))\n", + " print('Total number of young players :',len(a))\n", + " print('Percentage of young players is : ',(len(a)/len(b))*100,'%')\n", + " maxnum=0;\n", + " maxteam=0\n", + " for j in range(len(teams_list)):\n", + " nu=player_match[(player_match.Age_As_on_match<=25)&(player_match.Season_year==i)&(player_match.Player_team==teams_list[j])]['Player_Name'].unique()\n", + " num=len(nu)\n", + " \n", + " if num>maxnum:\n", + " maxnum=num\n", + " maxteam=j\n", + " print('Team with max number of young players is :',teams_list[maxteam],'\\n')\n", + " \n", + "#part b:\n", + "motm=player_match[player_match.is_manofThematch==1]\n", + "motm_young=player_match[(player_match.is_manofThematch==1)&(player_match.Age_As_on_match<=25)]\n", + "print(\"Percentage of young players won Man of the match is \",(len(motm_young)/len(motm))*100,'%')" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of times a team having a Right handed player won is 632 \n", + "\n", + "Number of times a team having a Left handed player won is 610 \n", + "\n", + "Number of times a right handed player won Man of the Match 430 \n", + "\n", + "Number of times a left handed player won Man of the Match 199 \n", + "\n" + ] + } + ], + "source": [ + "#QUESTION 2\n", + "if len(player_match['Batting_hand'].value_counts())==5 :# So that if you run it multiple times it doesn't change the value\n", + " player_match['Batting_hand'] = player_match['Batting_hand'].map({'Right-hand bat':'Right-handed',\n", + " '\\xa0Right-hand bat':'Right-handed',\n", + " 'Left-hand bat' : 'Left-handed',\n", + " '\\xa0Left-hand bat' : 'Left-handed'}) \n", + "\n", + "rb=player_match[(player_match.Batting_hand=='Right-handed')&(player_match.IsPlayers_Team_won==1)]['Match_Id'].unique()\n", + "lb=player_match[(player_match.Batting_hand=='Left-handed')&(player_match.IsPlayers_Team_won==1)]['Match_Id'].unique()\n", + "\n", + "print('Number of times a team having a Right handed player won is ',len(rb),'\\n')\n", + "print('Number of times a team having a Left handed player won is ',len(lb),'\\n')\n", + "\n", + "rbm=player_match[(player_match.Batting_hand=='Right-handed')&(player_match.is_manofThematch==1)]\n", + "lbm=player_match[(player_match.Batting_hand=='Left-handed')&(player_match.is_manofThematch==1)]\n", + "print('Number of times a right handed player won Man of the Match',len(rbm),'\\n')\n", + "print('Number of times a left handed player won Man of the Match',len(lbm),'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BEST ELEVEN : \n", + "\n", + "YK Pathan 16\n", + "AB de Villiers 15\n", + "Name: Player_Name, dtype: int64\n", + "CH Gayle 18\n", + "DA Warner 15\n", + "Name: Player_Name, dtype: int64\n", + "MS Dhoni 13\n", + "Name: Player_Name, dtype: int64\n", + "SK Raina 14\n", + "Name: Player_Name, dtype: int64\n", + "AM Rahane 12\n", + "MEK Hussey 12\n", + "V Kohli 11\n", + "Name: Player_Name, dtype: int64\n", + "RG Sharma 14\n", + "Name: Player_Name, dtype: int64\n", + "G Gambhir 13\n", + "Name: Player_Name, dtype: int64\n" + ] + } + ], + "source": [ + "#QUESTION 3\n", + "print(\"BEST ELEVEN :\",'\\n')\n", + "\n", + "rpp=player_match[(player_match.Batting_hand=='Right-handed')&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n", + "print(rpp[0:2])\n", + "lpp=player_match[(player_match.Batting_hand=='Left-handed')&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n", + "print(lpp[0:2])\n", + "wk=player_match[((player_match.Player_Name)==(player_match.Player_keeper))&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n", + "print(wk[0:1])\n", + "\n", + "ar=player_match[(player_match.Bowling_skill!='Is Null value')&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n", + "print(ar[4:5])\n", + "p=player_match[((player_match.Bowling_skill.str.contains('fast'))|(player_match.Bowling_skill.str.contains('medium')))&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n", + "print(p[2:5])\n", + "pp=player_match[(((player_match.Bowling_skill.str.contains('fast')==False)& ((player_match.Bowling_skill.str.contains('medium')==False)))&(player_match.is_manofThematch==1))]['Player_Name'].value_counts()\n", + "print(pp[3:4])\n", + "print(pp[5:6])" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mumbai Indians won 16 times vs Kolkata Knight Riders \n", + "\n" + ] + } + ], + "source": [ + "#QUESTION 4\n", + "maxwon=0\n", + "for i in range(0,len(teams_list)):\n", + " for j in range(0,len(teams_list)):\n", + " vs=player_match[(player_match.Player_team==teams_list[i])&(player_match.Opposit_Team==teams_list[j])&(player_match.IsPlayers_Team_won==1)]['Match_Id'].unique()\n", + " if len(vs)>maxwon:\n", + " maxwon=len(vs)\n", + " winteam=teams_list[i]\n", + " lteam=teams_list[j]\n", + "\n", + "print(winteam,' won ',maxwon,' times vs ',lteam,'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 4c5eeda1d5ab468e1f59272754ca990ec6ee978c Mon Sep 17 00:00:00 2001 From: Madhuparna04 Date: Wed, 20 Jun 2018 22:33:51 +0530 Subject: [PATCH 4/4] Classification using sklearn --- .../Classifcation using sklearn.ipynb | 343 ++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb diff --git a/Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb b/Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb new file mode 100644 index 0000000..b0073ed --- /dev/null +++ b/Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn import datasets\n", + "from sklearn.datasets import load_breast_cancer\n", + "cancer = load_breast_cancer()\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.svm import SVC\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn import preprocessing\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Breast Cancer Wisconsin (Diagnostic) Database\n", + "=============================================\n", + "\n", + "Notes\n", + "-----\n", + "Data Set Characteristics:\n", + " :Number of Instances: 569\n", + "\n", + " :Number of Attributes: 30 numeric, predictive attributes and the class\n", + "\n", + " :Attribute Information:\n", + " - radius (mean of distances from center to points on the perimeter)\n", + " - texture (standard deviation of gray-scale values)\n", + " - perimeter\n", + " - area\n", + " - smoothness (local variation in radius lengths)\n", + " - compactness (perimeter^2 / area - 1.0)\n", + " - concavity (severity of concave portions of the contour)\n", + " - concave points (number of concave portions of the contour)\n", + " - symmetry \n", + " - fractal dimension (\"coastline approximation\" - 1)\n", + "\n", + " The mean, standard error, and \"worst\" or largest (mean of the three\n", + " largest values) of these features were computed for each image,\n", + " resulting in 30 features. For instance, field 3 is Mean Radius, field\n", + " 13 is Radius SE, field 23 is Worst Radius.\n", + "\n", + " - class:\n", + " - WDBC-Malignant\n", + " - WDBC-Benign\n", + "\n", + " :Summary Statistics:\n", + "\n", + " ===================================== ====== ======\n", + " Min Max\n", + " ===================================== ====== ======\n", + " radius (mean): 6.981 28.11\n", + " texture (mean): 9.71 39.28\n", + " perimeter (mean): 43.79 188.5\n", + " area (mean): 143.5 2501.0\n", + " smoothness (mean): 0.053 0.163\n", + " compactness (mean): 0.019 0.345\n", + " concavity (mean): 0.0 0.427\n", + " concave points (mean): 0.0 0.201\n", + " symmetry (mean): 0.106 0.304\n", + " fractal dimension (mean): 0.05 0.097\n", + " radius (standard error): 0.112 2.873\n", + " texture (standard error): 0.36 4.885\n", + " perimeter (standard error): 0.757 21.98\n", + " area (standard error): 6.802 542.2\n", + " smoothness (standard error): 0.002 0.031\n", + " compactness (standard error): 0.002 0.135\n", + " concavity (standard error): 0.0 0.396\n", + " concave points (standard error): 0.0 0.053\n", + " symmetry (standard error): 0.008 0.079\n", + " fractal dimension (standard error): 0.001 0.03\n", + " radius (worst): 7.93 36.04\n", + " texture (worst): 12.02 49.54\n", + " perimeter (worst): 50.41 251.2\n", + " area (worst): 185.2 4254.0\n", + " smoothness (worst): 0.071 0.223\n", + " compactness (worst): 0.027 1.058\n", + " concavity (worst): 0.0 1.252\n", + " concave points (worst): 0.0 0.291\n", + " symmetry (worst): 0.156 0.664\n", + " fractal dimension (worst): 0.055 0.208\n", + " ===================================== ====== ======\n", + "\n", + " :Missing Attribute Values: None\n", + "\n", + " :Class Distribution: 212 - Malignant, 357 - Benign\n", + "\n", + " :Creator: Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\n", + "\n", + " :Donor: Nick Street\n", + "\n", + " :Date: November, 1995\n", + "\n", + "This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\n", + "https://goo.gl/U2Uwz2\n", + "\n", + "Features are computed from a digitized image of a fine needle\n", + "aspirate (FNA) of a breast mass. They describe\n", + "characteristics of the cell nuclei present in the image.\n", + "\n", + "Separating plane described above was obtained using\n", + "Multisurface Method-Tree (MSM-T) [K. P. Bennett, \"Decision Tree\n", + "Construction Via Linear Programming.\" Proceedings of the 4th\n", + "Midwest Artificial Intelligence and Cognitive Science Society,\n", + "pp. 97-101, 1992], a classification method which uses linear\n", + "programming to construct a decision tree. Relevant features\n", + "were selected using an exhaustive search in the space of 1-4\n", + "features and 1-3 separating planes.\n", + "\n", + "The actual linear program used to obtain the separating plane\n", + "in the 3-dimensional space is that described in:\n", + "[K. P. Bennett and O. L. Mangasarian: \"Robust Linear\n", + "Programming Discrimination of Two Linearly Inseparable Sets\",\n", + "Optimization Methods and Software 1, 1992, 23-34].\n", + "\n", + "This database is also available through the UW CS ftp server:\n", + "\n", + "ftp ftp.cs.wisc.edu\n", + "cd math-prog/cpo-dataset/machine-learn/WDBC/\n", + "\n", + "References\n", + "----------\n", + " - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \n", + " for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \n", + " Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\n", + " San Jose, CA, 1993.\n", + " - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \n", + " prognosis via linear programming. Operations Research, 43(4), pages 570-577, \n", + " July-August 1995.\n", + " - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\n", + " to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \n", + " 163-171.\n", + "\n" + ] + } + ], + "source": [ + "print(cancer.DESCR)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n", + " 'mean smoothness' 'mean compactness' 'mean concavity'\n", + " 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n", + " 'radius error' 'texture error' 'perimeter error' 'area error'\n", + " 'smoothness error' 'compactness error' 'concavity error'\n", + " 'concave points error' 'symmetry error' 'fractal dimension error'\n", + " 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n", + " 'worst smoothness' 'worst compactness' 'worst concavity'\n", + " 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n" + ] + }, + { + "data": { + "text/plain": [ + "(569, 30)" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(cancer.feature_names)\n", + "x_train, x_test, y_train, y_test = train_test_split(cancer.data,cancer.target, random_state=0)\n", + "#Feature scaling\n", + "x_train = preprocessing.scale(x_train)\n", + "x_test=preprocessing.scale(x_test)\n", + "cancer.data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traning set accuracy is: 0.9906103286384976\n", + "Test set accuracy is: 0.958041958041958\n" + ] + } + ], + "source": [ + "#Using Logistic Regression\n", + "log_reg = LogisticRegression()\n", + "log_reg.fit(x_train, y_train)\n", + "print('Traning set accuracy is:',format(log_reg.score(x_train,y_train)))\n", + "print('Test set accuracy is:',format(log_reg.score(x_test,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traning set accuracy is: 0.9859154929577465\n", + "Test set accuracy is: 0.965034965034965\n" + ] + } + ], + "source": [ + "#Using SVM\n", + "svm= SVC()\n", + "svm.fit(x_train, y_train)\n", + "\n", + "print('Traning set accuracy is:',format(svm.score(x_train,y_train)))\n", + "print('Test set accuracy is:',format(svm.score(x_test,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Maximum Accuracy obtained for k= 3\n", + "Train Accuracy = 0.9835680751173709\n", + "Test Accuracy= 0.951048951048951\n" + ] + } + ], + "source": [ + "#Using KNN\n", + "max_accuracy=0;\n", + "train_accu=0;\n", + "k_neighbour=0\n", + "for i in range(1,10):\n", + " knn = KNeighborsClassifier(n_neighbors=i)\n", + " knn.fit(x_train,y_train);\n", + " tscore=knn.score(x_train,y_train)\n", + " score=knn.score(x_test,y_test);\n", + " if(score>max_accuracy):\n", + " max_accuracy=score\n", + " train_accu=tscore\n", + " k_neighbour=i\n", + "\n", + "print('Maximum Accuracy obtained for k=',k_neighbour)\n", + "print('Train Accuracy =',train_accu)\n", + "print('Test Accuracy=',max_accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Maximum accuracy obtained for alpha = 0.2\n", + "Traning set accuracy is: 0.9906103286384976\n", + "Test set accuracy is: 0.972027972027972\n" + ] + } + ], + "source": [ + "#Using Neural Network\n", + "#choosing value of alpha\n", + "i=0.1\n", + "best_alpha=0\n", + "max_accu=0\n", + "train_accu=0\n", + "while i<2:\n", + " mlp = MLPClassifier(max_iter=1000,alpha=i,random_state=42)\n", + " mlp.fit(x_train, y_train)\n", + " score=(mlp.score(x_train,y_train))\n", + " test_score=(mlp.score(x_test,y_test))\n", + " if max_accu