data-01-test-score.csv
# EXAM1, EXAM2, EXAM3, FINAL
73,80,75,152
93,88,93,185
89,91,90,180
96,98,100,196
73,66,70,142
53,46,55,101
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)
xy = np.loadtxt('data-01-test-scroe.csv', delimeter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
#데이터를 맞게 Slicing 했는지 확인한다
print(x_data.shape, x_data, len(x_data))
print(y_data.shape, y_data)
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
#[3(X값 개수), 1(Y값 개수)]
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
#Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
cost_val, hy_val, _ = sess.run([cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
#Ask My score
print("Your score will be ", sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]}))
print("Other scores will be ", sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))
파일에 있는 전체 데이터가 같은 타입이어야 한다. (예제에서는 float32)
참고. Python - Slicing 기능
nums = range(5)
print nums # Prints "[0, 1, 2, 3, 4]"
print nums[2:4] # Prints "[2, 3]"
print nums[2:] # Prints "[2, 3, 4]"
print nums[:2] # Prints "[0, 1]"
print nums[:] # Prints "[0, 1, 2, 3, 4]"
print nums[:-1] # Prints "[0, 1, 2, 3]"
nums[2:4] = [8,9]
print nums # Prints "[0, 1, 8, 9, 4]"
Indexing, Slicing, Iterating
a = np.array([1, 2, 3, 4, 5])
# array([1, 2, 3, 4, 5])
a[1:3]
# array([2, 3])
a[-1]
# 5
a[0:2] = 9
a
# array([9, 9, 3, 4, 5])
b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
# array([ 1, 2, 3, 4],
# [5, 6, 7, 8],
# [9, 10, 11, 12]])
b[:-1] #array([2, 6, 10])
b[-1] #array([9, 10, 11, 12])
b[-1, :] #array([9, 10, 11, 12])
b[-1, ...] #array([9, 10, 11, 12])
b[0:2, :]
#array([[1, 2, 3, 4],
# [5, 6, 7, 8]])
.2. 파일 개수가 여러개인 경우
import tensorflow as tf
filename_queue = tf.train.string_input_producer(['data-01-test-score.csv'], shuffle=False, name='filename_queue')
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)
#collect batches of csv in
#tf.train.batch([X값, Y값])
#batch는 펌프질을 해서 데이터를 조금씩 떼온다고 생각하면 된다.
#데이터의 순서를 무작위로 가져오고 싶을 경우 shuffle_batch도 있다.
train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_noraml([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(2001):
x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
coord.request_stop()
coord.join(threads)