[실습] TensorFlow로 파일에서 데이터 읽어오기

data-01-test-score.csv

# EXAM1, EXAM2, EXAM3, FINAL
73,80,75,152
93,88,93,185
89,91,90,180
96,98,100,196
73,66,70,142
53,46,55,101

import tensorflow as tf
import numpy as np

tf.set_random_seed(777)
xy = np.loadtxt('data-01-test-scroe.csv', delimeter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

#데이터를 맞게 Slicing 했는지 확인한다
print(x_data.shape, x_data, len(x_data))
print(y_data.shape, y_data)

X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

#[3(X값 개수), 1(Y값 개수)]
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))

#Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(2001):
    cost_val, hy_val, _ = sess.run([cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
    if step % 10 == 0:
        print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)

#Ask My score
print("Your score will be ", sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]}))
print("Other scores will be ", sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))

파일에 있는 전체 데이터가 같은 타입이어야 한다. (예제에서는 float32)

참고. Python - Slicing 기능

nums = range(5)
print nums            # Prints "[0, 1, 2, 3, 4]"
print nums[2:4]       # Prints "[2, 3]"
print nums[2:]        # Prints "[2, 3, 4]"
print nums[:2]        # Prints "[0, 1]"
print nums[:]         # Prints "[0, 1, 2, 3, 4]"
print nums[:-1]       # Prints "[0, 1, 2, 3]"
nums[2:4] = [8,9]
print nums            # Prints "[0, 1, 8, 9, 4]"

Indexing, Slicing, Iterating

a = np.array([1, 2, 3, 4, 5])
# array([1, 2, 3, 4, 5])

a[1:3]
# array([2, 3])

a[-1]
# 5

a[0:2] = 9

a
# array([9, 9, 3, 4, 5])

b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
# array([ 1, 2, 3, 4],
#        [5, 6, 7, 8],
#        [9, 10, 11, 12]])

b[:-1]     #array([2, 6, 10])
b[-1]      #array([9, 10, 11, 12])
b[-1, :]   #array([9, 10, 11, 12])
b[-1, ...] #array([9, 10, 11, 12])
b[0:2, :]
#array([[1, 2, 3, 4],
#        [5, 6, 7, 8]])

.2. 파일 개수가 여러개인 경우

import tensorflow as tf
filename_queue = tf.train.string_input_producer(['data-01-test-score.csv'], shuffle=False, name='filename_queue')

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)

#collect batches of csv in
#tf.train.batch([X값, Y값])
#batch는 펌프질을 해서 데이터를 조금씩 떼온다고 생각하면 된다.
#데이터의 순서를 무작위로 가져오고 싶을 경우 shuffle_batch도 있다.
train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)

X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_noraml([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

for step in range(2001):
   x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
   cost_val, hy_val, _ = sess.run(
      [cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch})
   if step % 10 == 0:
      print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)

coord.request_stop()
coord.join(threads)

results for ""

    No results matching ""