From 673a64c4e3d7e2a02cf2893f069f99261e32a42d Mon Sep 17 00:00:00 2001
From: Madhuparna04 <madhuparnabhowmik04@gmail.com>
Date: Thu, 24 May 2018 03:19:03 +0530
Subject: [PATCH 1/4] Assignment4 added

---
 .../Week-3/Madhuparna/Assignment4.txt         | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt

diff --git a/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt
new file mode 100644
index 0000000..564977d
--- /dev/null
+++ b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment4.txt
@@ -0,0 +1,56 @@
+Q1:
+Solution:
+ab+ mode is for append and open or creat binary file for updating ans reading and writing
+at the end of the file.
+
+Q2:
+Solution:
+When a file is read the data is transferred to a buffer.
+Instead of accessing the file from where it is saved which might
+take a longer time it is better to load the file in a buffer in memory.
+While opening a file a negative value of buffer refers to default buffer size,
+while zero indicates no buffering.
+
+Q3:
+Solution:
+	a) 			try:
+					file=open("hh.txt","r")
+					file.readline()
+				except:
+					print("Could not open")
+
+	b)
+				try:
+					a=a+10
+				except:
+					print("Name 'a' not defined")
+					
+Q4:
+Solution:
+code:
+
+import os
+f=open("nn.txt","w+")
+print(f.name)
+f.write("Hello\n File Handling \n in Python\n")
+f.close()
+f=open("nn.txt","r+")
+s=f.readlines()
+print(s)
+s.reverse()
+
+f.close()
+f=open("nn.txt","w+")
+​
+for item in s:
+    f.write(item)
+f.close()
+f=open("nn.txt","r+")
+print(f.readlines())
+f.close()
+
+
+Output:
+nn.txt
+['Hello\n', ' File Handling \n', ' in Python\n']
+[' in Python\n', ' File Handling \n', 'Hello\n']
\ No newline at end of file

From 2b49c2fb3f723d6072161622b410c7586aa7ce98 Mon Sep 17 00:00:00 2001
From: Madhuparna04 <madhuparnabhowmik04@gmail.com>
Date: Wed, 30 May 2018 23:31:36 +0530
Subject: [PATCH 2/4] Assignment5 added

---
 .../Week-3/Madhuparna/Assignment5.ipynb       | 114 ++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb

diff --git a/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb
new file mode 100644
index 0000000..b743621
--- /dev/null
+++ b/Introduction-to-Data-Science/Week-3/Madhuparna/Assignment5.ipynb
@@ -0,0 +1,114 @@
+Question1:
+Solution:
+A class is a code template for creating objects.
+Objects have member variables and have behaviour associated with them.
+
+Question2:
+Solution:
+A variable of class type which has all its attributes is called an object.
+
+Question3:
+Solution:
+A class defines the properties and behavior for the objects represented by the abstraction.
+ Abstraction is a property of object oriented programming. It denotes the essential properties 
+ and behaviors of an object. It hides code and data. A class thus denotes a category of objects 
+ and act as a blueprint for creating such objects. 
+ An object exhibits the property and behaviors defined by its class.
+
+ Question4:
+ Solution:
+ Syntax:
+ class NEWCLASS:
+	def printhello(self):
+		print(hello)
+		
+Question 5:
+Solution:
+A method is a function that takes a class instance as its first parameter. 
+Methods are members of classes.
+eg. printhello(self) is a method.
+
+Question6:
+Solution:
+Self refers to the object whose method was called.
+
+Question7:
+Solution:
+__init__ is a constructor.It is autoatically called when an object
+of the class is created.
+
+Question8:
+Solution:
+When a class inherits from another class , it's object can access methods from
+the inherited class also.
+Thus we need not write methods again for the inheriting class.
+
+
+Question 9:
+Solution:
+import random
+import numpy as np
+class deck_of_card:
+    
+    class card:
+        def __init__(self,suit,num):
+            self.suit=suit
+            self.num=num
+    cards = []
+    def __init__(self):
+        for i in range(13):
+            self.cards.append(self.card('ace',i))
+
+        for i in range(13):
+             self.cards.append(self.card('heart',i))
+        for i in range(13):
+             self.cards.append(self.card('diamond',i))
+        for i in range(13):
+             self.cards.append(self.card('club',i))
+        print(len(self.cards))
+        print(self.cards[51].suit)
+    def deal(self,suit,num):
+        i=0
+        for i in range(51):
+            
+            if self.cards[i].suit==suit and self.cards[i].num == num:
+                del self.cards[i]
+    def shuffle(self):
+        if (len(self.cards))==52:
+            random.shuffle(self.cards)
+            for i in range(52):
+                print(self.cards[i].suit, self.cards[i].num)
+        else:
+            print('All cards are not there',len(self.cards))
+
+Question 10:
+Solution:
+		class person:
+    def __init__(self,first='',last='',phno='',el=[]):               
+        self.first_name=first
+        self.last_name=last
+        self.phone_num=phno
+        self.email=el
+    def cont(self):
+        print(self.first_name)
+        print(self.last_name)
+        print(self.phone_num)
+        print(self.email)
+    
+class address_book:
+    def __init__(self):
+        self.adbook=[]
+    def add_contact(self,f,l,ph,em):
+        self.adbook.append(person(f,l,ph,em))
+    def look_up_contact(self,l,f=''):
+        for i in range(len(self.adbook)):
+            if self.adbook[i].last_name==l:
+                print(self.adbook[i].cont());
+        
+a=address_book()
+a.add_contact('madhu','parna',6888,'ms@gmail.com')
+a.look_up_contact('parna')
+ 
+
+
+ 

From 2c5fd46abf1624787f5957364103c0f02b5a9139 Mon Sep 17 00:00:00 2001
From: Madhuparna04 <madhuparnabhowmik04@gmail.com>
Date: Thu, 14 Jun 2018 19:01:54 +0530
Subject: [PATCH 3/4] Pandas Assignment

---
 .../Madhuparna/Assignment 6-Pandas.ipynb      | 283 ++++++++++++++++++
 1 file changed, 283 insertions(+)
 create mode 100644 Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb

diff --git a/Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb b/Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb
new file mode 100644
index 0000000..7a65250
--- /dev/null
+++ b/Introduction-to-Data-Science/Week-5/Madhuparna/Assignment 6-Pandas.ipynb	
@@ -0,0 +1,283 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 248,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "player=pd.read_csv(\"DIM_PLAYER.csv\",delimiter=',',encoding=\"ISO-8859-1\")\n",
+    "player_match=pd.read_csv(\"DIM_PLAYER_MATCH.csv\",delimiter=',',encoding=\"ISO-8859-1\",skiprows=[1])\n",
+    "team=pd.read_csv(\"DIM_TEAM.csv\",delimiter=',',encoding=\"ISO-8859-1\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 249,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SEASON  2008 :\n",
+      "\n",
+      "Total number of players: 163\n",
+      "Total number of young players : 71\n",
+      "Percentage of young players is :  43.558282208588956 %\n",
+      "Team with max number of young players is : Mumbai Indians \n",
+      "\n",
+      "SEASON  2009 :\n",
+      "\n",
+      "Total number of players: 165\n",
+      "Total number of young players : 72\n",
+      "Percentage of young players is :  43.63636363636363 %\n",
+      "Team with max number of young players is : Royal Challengers Bangalore \n",
+      "\n",
+      "SEASON  2010 :\n",
+      "\n",
+      "Total number of players: 181\n",
+      "Total number of young players : 66\n",
+      "Percentage of young players is :  36.46408839779006 %\n",
+      "Team with max number of young players is : Kolkata Knight Riders \n",
+      "\n",
+      "SEASON  2011 :\n",
+      "\n",
+      "Total number of players: 199\n",
+      "Total number of young players : 61\n",
+      "Percentage of young players is :  30.65326633165829 %\n",
+      "Team with max number of young players is : Pune Warriors \n",
+      "\n",
+      "SEASON  2012 :\n",
+      "\n",
+      "Total number of players: 190\n",
+      "Total number of young players : 64\n",
+      "Percentage of young players is :  33.68421052631579 %\n",
+      "Team with max number of young players is : Delhi Daredevils \n",
+      "\n",
+      "SEASON  2013 :\n",
+      "\n",
+      "Total number of players: 197\n",
+      "Total number of young players : 65\n",
+      "Percentage of young players is :  32.99492385786802 %\n",
+      "Team with max number of young players is : Pune Warriors \n",
+      "\n",
+      "SEASON  2014 :\n",
+      "\n",
+      "Total number of players: 152\n",
+      "Total number of young players : 50\n",
+      "Percentage of young players is :  32.89473684210527 %\n",
+      "Team with max number of young players is : Rajasthan Royals \n",
+      "\n",
+      "SEASON  2015 :\n",
+      "\n",
+      "Total number of players: 145\n",
+      "Total number of young players : 45\n",
+      "Percentage of young players is :  31.03448275862069 %\n",
+      "Team with max number of young players is : Kings XI Punjab \n",
+      "\n",
+      "SEASON  2016 :\n",
+      "\n",
+      "Total number of players: 159\n",
+      "Total number of young players : 43\n",
+      "Percentage of young players is :  27.044025157232703 %\n",
+      "Team with max number of young players is : Delhi Daredevils \n",
+      "\n",
+      "SEASON  2017 :\n",
+      "\n",
+      "Total number of players: 161\n",
+      "Total number of young players : 40\n",
+      "Percentage of young players is :  24.84472049689441 %\n",
+      "Team with max number of young players is : Gujarat Lions \n",
+      "\n",
+      "Percentage of young players won Man of the match is  24.722662440570524 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "#QUESTION 1\n",
+    "#part a:\n",
+    "teams_list=player_match['Player_team'].unique()\n",
+    "for i in range(2008,2018):\n",
+    "    print('SEASON ',i,':\\n')\n",
+    "    a = player_match[(player_match.Age_As_on_match<=25)&(player_match.Season_year==i)]['Player_Name'].unique()\n",
+    "    b=player_match[(player_match.Season_year==i)]['Player_Name'].unique()\n",
+    "    print('Total number of players:',len(b))\n",
+    "    print('Total number of young players :',len(a))\n",
+    "    print('Percentage of young players is : ',(len(a)/len(b))*100,'%')\n",
+    "    maxnum=0;\n",
+    "    maxteam=0\n",
+    "    for j in range(len(teams_list)):\n",
+    "        nu=player_match[(player_match.Age_As_on_match<=25)&(player_match.Season_year==i)&(player_match.Player_team==teams_list[j])]['Player_Name'].unique()\n",
+    "        num=len(nu)\n",
+    "        \n",
+    "        if num>maxnum:\n",
+    "            maxnum=num\n",
+    "            maxteam=j\n",
+    "    print('Team with max number of young players is :',teams_list[maxteam],'\\n')\n",
+    "    \n",
+    "#part b:\n",
+    "motm=player_match[player_match.is_manofThematch==1]\n",
+    "motm_young=player_match[(player_match.is_manofThematch==1)&(player_match.Age_As_on_match<=25)]\n",
+    "print(\"Percentage of young players won Man of the match is \",(len(motm_young)/len(motm))*100,'%')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 250,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of times a team having a Right handed player won is  632 \n",
+      "\n",
+      "Number of times a team having a Left handed player won is  610 \n",
+      "\n",
+      "Number of times a right handed player won Man of the Match 430 \n",
+      "\n",
+      "Number of times a left handed player won Man of the Match 199 \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#QUESTION 2\n",
+    "if len(player_match['Batting_hand'].value_counts())==5 :# So that if you run it multiple times it doesn't change the value\n",
+    "    player_match['Batting_hand'] = player_match['Batting_hand'].map({'Right-hand bat':'Right-handed',\n",
+    "                                                                 '\\xa0Right-hand bat':'Right-handed',\n",
+    "                                                                'Left-hand bat' : 'Left-handed',\n",
+    "                                                                '\\xa0Left-hand bat' : 'Left-handed'})  \n",
+    "\n",
+    "rb=player_match[(player_match.Batting_hand=='Right-handed')&(player_match.IsPlayers_Team_won==1)]['Match_Id'].unique()\n",
+    "lb=player_match[(player_match.Batting_hand=='Left-handed')&(player_match.IsPlayers_Team_won==1)]['Match_Id'].unique()\n",
+    "\n",
+    "print('Number of times a team having a Right handed player won is ',len(rb),'\\n')\n",
+    "print('Number of times a team having a Left handed player won is ',len(lb),'\\n')\n",
+    "\n",
+    "rbm=player_match[(player_match.Batting_hand=='Right-handed')&(player_match.is_manofThematch==1)]\n",
+    "lbm=player_match[(player_match.Batting_hand=='Left-handed')&(player_match.is_manofThematch==1)]\n",
+    "print('Number of times a right handed player won Man of the Match',len(rbm),'\\n')\n",
+    "print('Number of times a left handed player won Man of the Match',len(lbm),'\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 251,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BEST ELEVEN : \n",
+      "\n",
+      "YK Pathan         16\n",
+      "AB de Villiers    15\n",
+      "Name: Player_Name, dtype: int64\n",
+      "CH Gayle     18\n",
+      "DA Warner    15\n",
+      "Name: Player_Name, dtype: int64\n",
+      "MS Dhoni    13\n",
+      "Name: Player_Name, dtype: int64\n",
+      "SK Raina    14\n",
+      "Name: Player_Name, dtype: int64\n",
+      "AM Rahane     12\n",
+      "MEK Hussey    12\n",
+      "V Kohli       11\n",
+      "Name: Player_Name, dtype: int64\n",
+      "RG Sharma    14\n",
+      "Name: Player_Name, dtype: int64\n",
+      "G Gambhir    13\n",
+      "Name: Player_Name, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "#QUESTION 3\n",
+    "print(\"BEST ELEVEN :\",'\\n')\n",
+    "\n",
+    "rpp=player_match[(player_match.Batting_hand=='Right-handed')&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n",
+    "print(rpp[0:2])\n",
+    "lpp=player_match[(player_match.Batting_hand=='Left-handed')&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n",
+    "print(lpp[0:2])\n",
+    "wk=player_match[((player_match.Player_Name)==(player_match.Player_keeper))&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n",
+    "print(wk[0:1])\n",
+    "\n",
+    "ar=player_match[(player_match.Bowling_skill!='Is Null value')&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n",
+    "print(ar[4:5])\n",
+    "p=player_match[((player_match.Bowling_skill.str.contains('fast'))|(player_match.Bowling_skill.str.contains('medium')))&(player_match.is_manofThematch==1)]['Player_Name'].value_counts()\n",
+    "print(p[2:5])\n",
+    "pp=player_match[(((player_match.Bowling_skill.str.contains('fast')==False)& ((player_match.Bowling_skill.str.contains('medium')==False)))&(player_match.is_manofThematch==1))]['Player_Name'].value_counts()\n",
+    "print(pp[3:4])\n",
+    "print(pp[5:6])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 252,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mumbai Indians  won  16  times vs  Kolkata Knight Riders \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#QUESTION 4\n",
+    "maxwon=0\n",
+    "for i in range(0,len(teams_list)):\n",
+    "    for j in range(0,len(teams_list)):\n",
+    "        vs=player_match[(player_match.Player_team==teams_list[i])&(player_match.Opposit_Team==teams_list[j])&(player_match.IsPlayers_Team_won==1)]['Match_Id'].unique()\n",
+    "        if len(vs)>maxwon:\n",
+    "            maxwon=len(vs)\n",
+    "            winteam=teams_list[i]\n",
+    "            lteam=teams_list[j]\n",
+    "\n",
+    "print(winteam,' won ',maxwon,' times vs ',lteam,'\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 4c5eeda1d5ab468e1f59272754ca990ec6ee978c Mon Sep 17 00:00:00 2001
From: Madhuparna04 <madhuparnabhowmik04@gmail.com>
Date: Wed, 20 Jun 2018 22:33:51 +0530
Subject: [PATCH 4/4] Classification using sklearn

---
 .../Classifcation using sklearn.ipynb         | 343 ++++++++++++++++++
 1 file changed, 343 insertions(+)
 create mode 100644 Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb

diff --git a/Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb b/Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb
new file mode 100644
index 0000000..b0073ed
--- /dev/null
+++ b/Introduction-to-Data-Science/Week-6/Madhuparna/Classifcation using sklearn.ipynb	
@@ -0,0 +1,343 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn import datasets\n",
+    "from sklearn.datasets import load_breast_cancer\n",
+    "cancer = load_breast_cancer()\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "from sklearn import preprocessing\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Breast Cancer Wisconsin (Diagnostic) Database\n",
+      "=============================================\n",
+      "\n",
+      "Notes\n",
+      "-----\n",
+      "Data Set Characteristics:\n",
+      "    :Number of Instances: 569\n",
+      "\n",
+      "    :Number of Attributes: 30 numeric, predictive attributes and the class\n",
+      "\n",
+      "    :Attribute Information:\n",
+      "        - radius (mean of distances from center to points on the perimeter)\n",
+      "        - texture (standard deviation of gray-scale values)\n",
+      "        - perimeter\n",
+      "        - area\n",
+      "        - smoothness (local variation in radius lengths)\n",
+      "        - compactness (perimeter^2 / area - 1.0)\n",
+      "        - concavity (severity of concave portions of the contour)\n",
+      "        - concave points (number of concave portions of the contour)\n",
+      "        - symmetry \n",
+      "        - fractal dimension (\"coastline approximation\" - 1)\n",
+      "\n",
+      "        The mean, standard error, and \"worst\" or largest (mean of the three\n",
+      "        largest values) of these features were computed for each image,\n",
+      "        resulting in 30 features.  For instance, field 3 is Mean Radius, field\n",
+      "        13 is Radius SE, field 23 is Worst Radius.\n",
+      "\n",
+      "        - class:\n",
+      "                - WDBC-Malignant\n",
+      "                - WDBC-Benign\n",
+      "\n",
+      "    :Summary Statistics:\n",
+      "\n",
+      "    ===================================== ====== ======\n",
+      "                                           Min    Max\n",
+      "    ===================================== ====== ======\n",
+      "    radius (mean):                        6.981  28.11\n",
+      "    texture (mean):                       9.71   39.28\n",
+      "    perimeter (mean):                     43.79  188.5\n",
+      "    area (mean):                          143.5  2501.0\n",
+      "    smoothness (mean):                    0.053  0.163\n",
+      "    compactness (mean):                   0.019  0.345\n",
+      "    concavity (mean):                     0.0    0.427\n",
+      "    concave points (mean):                0.0    0.201\n",
+      "    symmetry (mean):                      0.106  0.304\n",
+      "    fractal dimension (mean):             0.05   0.097\n",
+      "    radius (standard error):              0.112  2.873\n",
+      "    texture (standard error):             0.36   4.885\n",
+      "    perimeter (standard error):           0.757  21.98\n",
+      "    area (standard error):                6.802  542.2\n",
+      "    smoothness (standard error):          0.002  0.031\n",
+      "    compactness (standard error):         0.002  0.135\n",
+      "    concavity (standard error):           0.0    0.396\n",
+      "    concave points (standard error):      0.0    0.053\n",
+      "    symmetry (standard error):            0.008  0.079\n",
+      "    fractal dimension (standard error):   0.001  0.03\n",
+      "    radius (worst):                       7.93   36.04\n",
+      "    texture (worst):                      12.02  49.54\n",
+      "    perimeter (worst):                    50.41  251.2\n",
+      "    area (worst):                         185.2  4254.0\n",
+      "    smoothness (worst):                   0.071  0.223\n",
+      "    compactness (worst):                  0.027  1.058\n",
+      "    concavity (worst):                    0.0    1.252\n",
+      "    concave points (worst):               0.0    0.291\n",
+      "    symmetry (worst):                     0.156  0.664\n",
+      "    fractal dimension (worst):            0.055  0.208\n",
+      "    ===================================== ====== ======\n",
+      "\n",
+      "    :Missing Attribute Values: None\n",
+      "\n",
+      "    :Class Distribution: 212 - Malignant, 357 - Benign\n",
+      "\n",
+      "    :Creator:  Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\n",
+      "\n",
+      "    :Donor: Nick Street\n",
+      "\n",
+      "    :Date: November, 1995\n",
+      "\n",
+      "This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\n",
+      "https://goo.gl/U2Uwz2\n",
+      "\n",
+      "Features are computed from a digitized image of a fine needle\n",
+      "aspirate (FNA) of a breast mass.  They describe\n",
+      "characteristics of the cell nuclei present in the image.\n",
+      "\n",
+      "Separating plane described above was obtained using\n",
+      "Multisurface Method-Tree (MSM-T) [K. P. Bennett, \"Decision Tree\n",
+      "Construction Via Linear Programming.\" Proceedings of the 4th\n",
+      "Midwest Artificial Intelligence and Cognitive Science Society,\n",
+      "pp. 97-101, 1992], a classification method which uses linear\n",
+      "programming to construct a decision tree.  Relevant features\n",
+      "were selected using an exhaustive search in the space of 1-4\n",
+      "features and 1-3 separating planes.\n",
+      "\n",
+      "The actual linear program used to obtain the separating plane\n",
+      "in the 3-dimensional space is that described in:\n",
+      "[K. P. Bennett and O. L. Mangasarian: \"Robust Linear\n",
+      "Programming Discrimination of Two Linearly Inseparable Sets\",\n",
+      "Optimization Methods and Software 1, 1992, 23-34].\n",
+      "\n",
+      "This database is also available through the UW CS ftp server:\n",
+      "\n",
+      "ftp ftp.cs.wisc.edu\n",
+      "cd math-prog/cpo-dataset/machine-learn/WDBC/\n",
+      "\n",
+      "References\n",
+      "----------\n",
+      "   - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \n",
+      "     for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \n",
+      "     Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\n",
+      "     San Jose, CA, 1993.\n",
+      "   - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \n",
+      "     prognosis via linear programming. Operations Research, 43(4), pages 570-577, \n",
+      "     July-August 1995.\n",
+      "   - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\n",
+      "     to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \n",
+      "     163-171.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(cancer.DESCR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n",
+      " 'mean smoothness' 'mean compactness' 'mean concavity'\n",
+      " 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n",
+      " 'radius error' 'texture error' 'perimeter error' 'area error'\n",
+      " 'smoothness error' 'compactness error' 'concavity error'\n",
+      " 'concave points error' 'symmetry error' 'fractal dimension error'\n",
+      " 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n",
+      " 'worst smoothness' 'worst compactness' 'worst concavity'\n",
+      " 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(569, 30)"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(cancer.feature_names)\n",
+    "x_train, x_test, y_train, y_test = train_test_split(cancer.data,cancer.target, random_state=0)\n",
+    "#Feature scaling\n",
+    "x_train = preprocessing.scale(x_train)\n",
+    "x_test=preprocessing.scale(x_test)\n",
+    "cancer.data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traning set accuracy is: 0.9906103286384976\n",
+      "Test set accuracy is: 0.958041958041958\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Using Logistic Regression\n",
+    "log_reg = LogisticRegression()\n",
+    "log_reg.fit(x_train, y_train)\n",
+    "print('Traning set accuracy is:',format(log_reg.score(x_train,y_train)))\n",
+    "print('Test set accuracy is:',format(log_reg.score(x_test,y_test)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traning set accuracy is: 0.9859154929577465\n",
+      "Test set accuracy is: 0.965034965034965\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Using SVM\n",
+    "svm= SVC()\n",
+    "svm.fit(x_train, y_train)\n",
+    "\n",
+    "print('Traning set accuracy is:',format(svm.score(x_train,y_train)))\n",
+    "print('Test set accuracy is:',format(svm.score(x_test,y_test)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Maximum Accuracy obtained for k= 3\n",
+      "Train Accuracy = 0.9835680751173709\n",
+      "Test Accuracy= 0.951048951048951\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Using KNN\n",
+    "max_accuracy=0;\n",
+    "train_accu=0;\n",
+    "k_neighbour=0\n",
+    "for i in range(1,10):\n",
+    "    knn = KNeighborsClassifier(n_neighbors=i)\n",
+    "    knn.fit(x_train,y_train);\n",
+    "    tscore=knn.score(x_train,y_train)\n",
+    "    score=knn.score(x_test,y_test);\n",
+    "    if(score>max_accuracy):\n",
+    "        max_accuracy=score\n",
+    "        train_accu=tscore\n",
+    "        k_neighbour=i\n",
+    "\n",
+    "print('Maximum Accuracy obtained for k=',k_neighbour)\n",
+    "print('Train Accuracy =',train_accu)\n",
+    "print('Test Accuracy=',max_accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Maximum accuracy obtained for alpha = 0.2\n",
+      "Traning set accuracy is: 0.9906103286384976\n",
+      "Test set accuracy is: 0.972027972027972\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Using Neural Network\n",
+    "#choosing value of alpha\n",
+    "i=0.1\n",
+    "best_alpha=0\n",
+    "max_accu=0\n",
+    "train_accu=0\n",
+    "while i<2:\n",
+    "    mlp = MLPClassifier(max_iter=1000,alpha=i,random_state=42)\n",
+    "    mlp.fit(x_train, y_train)\n",
+    "    score=(mlp.score(x_train,y_train))\n",
+    "    test_score=(mlp.score(x_test,y_test))\n",
+    "    if max_accu<test_score:\n",
+    "        max_accu=test_score\n",
+    "        train_accu=score\n",
+    "        best_alpha=i\n",
+    "    i=i+0.1\n",
+    "print('Maximum accuracy obtained for alpha =',best_alpha)\n",
+    "print('Traning set accuracy is:',train_accu)\n",
+    "print('Test set accuracy is:',max_accu)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}