-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPython uploads
84 lines (63 loc) · 3.09 KB
/
Python uploads
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
df = pd.read_csv("https://storage.googleapis.com/ml_universities/california_housing_train.csv", sep=",")
pd.read_csv('nome_do_arquivo.csv', encoding='ISO-8859-1')
xlsx = pd.ExcelFile('seu_arquivo_excel.xlsx')
df = pd.read_excel(xlsx, 'Planilha 1')
df.head()
df.describe()
len(df)
#Now, split the data into two parts -- training and evaluation:
np.random.seed(seed=1) #makes result reproducible
msk = np.random.rand(len(df)) < 0.8
traindf = df[msk]
evaldf = df[~msk]
#80% do dataframe será treinamento e 20% será avaliação. A escolha da entidade será via random.
len(traindf)
len(evaldf)
#we'll be trying to predict median_house_value
#We divide total_rooms by households to get avg_rooms_per_house which we expect to positively correlate with median_house_value
#We also divide population by total_rooms to get avg_persons_per_room which we expect to negatively correlate with median_house_value
def add_more_features(df):
df['avg_rooms_per_house'] = df['total_rooms'] / df['households'] #expect positive correlation
df['avg_persons_per_room'] = df['population'] / df['total_rooms'] #expect negative correlation
return df
# Create pandas input function
# entrando com o dataframe e a quantidade de épocas a serem treinadas
def make_input_fn(df, num_epochs):
#utilizando a função pandas input
return tf.estimator.inputs.pandas_input_fn(
# adicionando novas colunas:
x = add_more_features(df),
y = df['median_house_value'] / 100000, # will talk about why later in the course
batch_size = 128,
num_epochs = num_epochs,
shuffle = True,
queue_capacity = 1000,
num_threads = 1
)
# Define your feature columns
def create_feature_cols():
return [
tf.feature_column.numeric_column('housing_median_age'),
tf.feature_column.bucketized_column(tf.feature_column.numeric_column('latitude'), boundaries = np.arange(32.0, 42, 1).tolist()),
tf.feature_column.numeric_column('avg_rooms_per_house'),
tf.feature_column.numeric_column('avg_persons_per_room'),
tf.feature_column.numeric_column('median_income')
]
# Create estimator train and evaluate function
def train_and_evaluate(output_dir, num_train_steps):
estimator = tf.estimator.LinearRegressor(model_dir = output_dir, feature_columns = create_feature_cols())
train_spec = tf.estimator.TrainSpec(input_fn = make_input_fn(traindf, None),
max_steps = num_train_steps)
eval_spec = tf.estimator.EvalSpec(input_fn = make_input_fn(evaldf, 1),
steps = None,
start_delay_secs = 1, # start evaluating after N seconds,
throttle_secs = 5) # evaluate every N seconds
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
# Launch tensorboard
from google.datalab.ml import TensorBoard
OUTDIR = './trained_model'
TensorBoard().start(OUTDIR)
# Run the model
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file
train_and_evaluate(OUTDIR, 2000)