Skip to content

Commit

Permalink
Merge pull request google#77 from XericZephyr/fix/op_never_used
Browse files Browse the repository at this point in the history
Auto-formatting, warning fix and small refactor.
  • Loading branch information
LeegleechN authored May 13, 2019
2 parents 4ca7736 + 3348a46 commit 73aee83
Show file tree
Hide file tree
Showing 6 changed files with 338 additions and 229 deletions.
6 changes: 5 additions & 1 deletion eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,15 @@ def build_graph(reader,
"""

global_step = tf.Variable(0, trainable=False, name="global_step")
video_id_batch, model_input_raw, labels_batch, num_frames = get_input_evaluation_tensors( # pylint: disable=g-line-too-long
input_data_dict = get_input_evaluation_tensors(
reader,
eval_data_pattern,
batch_size=batch_size,
num_readers=num_readers)
video_id_batch = input_data_dict["video_ids"]
model_input_raw = input_data_dict["video_matrix"]
labels_batch = input_data_dict["labels"]
num_frames = input_data_dict["num_frames"]
tf.summary.histogram("model_input_raw", model_input_raw)

feature_dim = len(model_input_raw.get_shape()) - 1
Expand Down
37 changes: 25 additions & 12 deletions export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

_TOP_PREDICTIONS_IN_OUTPUT = 20


class ModelExporter(object):

def __init__(self, frame_features, model, reader):
Expand All @@ -49,11 +50,13 @@ def export_model(self, model_dir, global_step_val, last_checkpoint):
outputs=self.outputs,
method_name=signature_constants.PREDICT_METHOD_NAME)

signature_map = {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
signature}
signature_map = {
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature
}

model_builder = saved_model_builder.SavedModelBuilder(model_dir)
model_builder.add_meta_graph_and_variables(session,
model_builder.add_meta_graph_and_variables(
session,
tags=[tag_constants.SERVING],
signature_def_map=signature_map,
clear_devices=True)
Expand All @@ -65,28 +68,38 @@ def build_inputs_and_outputs(self):

fn = lambda x: self.build_prediction_graph(x)
video_id_output, top_indices_output, top_predictions_output = (
tf.map_fn(fn, serialized_examples,
dtype=(tf.string, tf.int32, tf.float32)))
tf.map_fn(
fn, serialized_examples, dtype=(tf.string, tf.int32, tf.float32)))

else:
serialized_examples = tf.placeholder(tf.string, shape=(None,))

video_id_output, top_indices_output, top_predictions_output = (
self.build_prediction_graph(serialized_examples))

inputs = {"example_bytes":
saved_model_utils.build_tensor_info(serialized_examples)}
inputs = {
"example_bytes":
saved_model_utils.build_tensor_info(serialized_examples)
}

outputs = {
"video_id": saved_model_utils.build_tensor_info(video_id_output),
"class_indexes": saved_model_utils.build_tensor_info(top_indices_output),
"predictions": saved_model_utils.build_tensor_info(top_predictions_output)}
"video_id":
saved_model_utils.build_tensor_info(video_id_output),
"class_indexes":
saved_model_utils.build_tensor_info(top_indices_output),
"predictions":
saved_model_utils.build_tensor_info(top_predictions_output)
}

return inputs, outputs

def build_prediction_graph(self, serialized_examples):
video_id, model_input_raw, labels_batch, num_frames = (
input_data_dict = (
self.reader.prepare_serialized_examples(serialized_examples))
video_id = input_data_dict["video_ids"]
model_input_raw = input_data_dict["video_matrix"]
labels_batch = input_data_dict["labels"]
num_frames = input_data_dict["num_frames"]

feature_dim = len(model_input_raw.get_shape()) - 1
model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
Expand All @@ -105,5 +118,5 @@ def build_prediction_graph(self, serialized_examples):
predictions = result["predictions"]

top_predictions, top_indices = tf.nn.top_k(predictions,
_TOP_PREDICTIONS_IN_OUTPUT)
_TOP_PREDICTIONS_IN_OUTPUT)
return video_id, top_indices, top_predictions
153 changes: 86 additions & 67 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Binary for generating predictions over a set of videos."""

import os
Expand All @@ -34,54 +33,55 @@

FLAGS = flags.FLAGS

if __name__ == '__main__':
if __name__ == "__main__":
# Input
flags.DEFINE_string("train_dir", "",
"The directory to load the model files from. We assume "
"that you have already run eval.py onto this, such that "
"inference_model.* files already exist.")
flags.DEFINE_string(
"train_dir", "", "The directory to load the model files from. We assume "
"that you have already run eval.py onto this, such that "
"inference_model.* files already exist.")
flags.DEFINE_string(
"input_data_pattern", "",
"File glob defining the evaluation dataset in tensorflow.SequenceExample "
"format. The SequenceExamples are expected to have an 'rgb' byte array "
"sequence feature as well as a 'labels' int64 context feature.")
flags.DEFINE_string("input_model_tgz", "",
"If given, must be path to a .tgz file that was written "
"by this binary using flag --output_model_tgz. In this "
"case, the .tgz file will be untarred to "
"--untar_model_dir and the model will be used for "
"inference.")
flags.DEFINE_string("untar_model_dir", "/tmp/yt8m-model",
"If --input_model_tgz is given, then this directory will "
"be created and the contents of the .tgz file will be "
"untarred here.")
flags.DEFINE_string(
"input_model_tgz", "",
"If given, must be path to a .tgz file that was written "
"by this binary using flag --output_model_tgz. In this "
"case, the .tgz file will be untarred to "
"--untar_model_dir and the model will be used for "
"inference.")
flags.DEFINE_string(
"untar_model_dir", "/tmp/yt8m-model",
"If --input_model_tgz is given, then this directory will "
"be created and the contents of the .tgz file will be "
"untarred here.")

# Output
flags.DEFINE_string("output_file", "",
"The file to save the predictions to.")
flags.DEFINE_string("output_model_tgz", "",
"If given, should be a filename with a .tgz extension, "
"the model graph and checkpoint will be bundled in this "
"gzip tar. This file can be uploaded to Kaggle for the "
"top 10 participants.")
flags.DEFINE_integer("top_k", 20,
"How many predictions to output per video.")
flags.DEFINE_string("output_file", "", "The file to save the predictions to.")
flags.DEFINE_string(
"output_model_tgz", "",
"If given, should be a filename with a .tgz extension, "
"the model graph and checkpoint will be bundled in this "
"gzip tar. This file can be uploaded to Kaggle for the "
"top 10 participants.")
flags.DEFINE_integer("top_k", 20, "How many predictions to output per video.")

# Other flags.
flags.DEFINE_integer(
"batch_size", 8192,
"How many examples to process per batch.")
flags.DEFINE_integer("batch_size", 8192,
"How many examples to process per batch.")
flags.DEFINE_integer("num_readers", 1,
"How many threads to use for reading input files.")


def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join(
yield video_ids[video_index].decode("utf-8") + "," + " ".join(
"%i %g" % (label, score) for (label, score) in line) + "\n"


Expand Down Expand Up @@ -110,34 +110,47 @@ def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
logging.info("number of input files: " + str(len(files)))
filename_queue = tf.train.string_input_producer(
files, num_epochs=1, shuffle=False)
examples_and_labels = [reader.prepare_reader(filename_queue)
for _ in range(num_readers)]

video_id_batch, video_batch, unused_labels, num_frames_batch = (
tf.train.batch_join(examples_and_labels,
batch_size=batch_size,
allow_smaller_final_batch=True,
enqueue_many=True))
examples_and_labels = [
reader.prepare_reader(filename_queue) for _ in range(num_readers)
]

input_data_dict = (
tf.train.batch_join(
examples_and_labels,
batch_size=batch_size,
allow_smaller_final_batch=True,
enqueue_many=True))
video_id_batch = input_data_dict["video_ids"]
video_batch = input_data_dict["video_matrix"]
num_frames_batch = input_data_dict["num_frames"]
return video_id_batch, video_batch, num_frames_batch

def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess, gfile.Open(out_file_location, "w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(reader, data_pattern, batch_size)
checkpoint_file = os.path.join(FLAGS.train_dir, "inference_model", "inference_model")

def inference(reader, train_dir, data_pattern, out_file_location, batch_size,
top_k):
with tf.Session(config=tf.ConfigProto(
allow_soft_placement=True)) as sess, gfile.Open(out_file_location,
"w+") as out_file:
video_id_batch, video_batch, num_frames_batch = get_input_data_tensors(
reader, data_pattern, batch_size)
checkpoint_file = os.path.join(FLAGS.train_dir, "inference_model",
"inference_model")
if not gfile.Exists(checkpoint_file + ".meta"):
raise IOError("Cannot find %s. Did you run eval.py?" % checkpoint_file)
meta_graph_location = checkpoint_file + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)

if FLAGS.output_model_tgz:
with tarfile.open(FLAGS.output_model_tgz, "w:gz") as tar:
for model_file in glob.glob(checkpoint_file + '.*'):
for model_file in glob.glob(checkpoint_file + ".*"):
tar.add(model_file, arcname=os.path.basename(model_file))
tar.add(os.path.join(FLAGS.train_dir, "model_flags.json"),
arcname="model_flags.json")
print('Tarred model onto ' + FLAGS.output_model_tgz)
tar.add(
os.path.join(FLAGS.train_dir, "model_flags.json"),
arcname="model_flags.json")
print("Tarred model onto " + FLAGS.output_model_tgz)
with tf.device("/cpu:0"):
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
saver = tf.train.import_meta_graph(
meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + checkpoint_file)
saver.restore(sess, checkpoint_file)
input_tensor = tf.get_collection("input_batch_raw")[0]
Expand All @@ -154,8 +167,8 @@ def set_up_init_ops(variables):
init_op_list.append(tf.variables_initializer(variables))
return init_op_list

sess.run(set_up_init_ops(tf.get_collection_ref(
tf.GraphKeys.LOCAL_VARIABLES)))
sess.run(
set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
Expand All @@ -165,21 +178,27 @@ def set_up_init_ops(variables):

try:
while not coord.should_stop():
video_id_batch_val, video_batch_val,num_frames_batch_val = sess.run([video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor], feed_dict={input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now-start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()

video_id_batch_val, video_batch_val, num_frames_batch_val = sess.run(
[video_id_batch, video_batch, num_frames_batch])
predictions_val, = sess.run([predictions_tensor],
feed_dict={
input_tensor: video_batch_val,
num_frames_tensor: num_frames_batch_val
})
now = time.time()
num_examples_processed += len(video_batch_val)
num_classes = predictions_val.shape[1]
logging.info("num examples processed: " + str(num_examples_processed) +
" elapsed seconds: " + "{0:.2f}".format(now - start_time))
for line in format_lines(video_id_batch_val, predictions_val, top_k):
out_file.write(line)
out_file.flush()

except tf.errors.OutOfRangeError:
logging.info('Done with inference. The output file was written to ' + out_file_location)
logging.info("Done with inference. The output file was written to " +
out_file_location)
finally:
coord.request_stop()
coord.request_stop()

coord.join(threads)
sess.close()
Expand Down Expand Up @@ -207,22 +226,22 @@ def main(unused_argv):
flags_dict["feature_names"], flags_dict["feature_sizes"])

if flags_dict["frame_features"]:
reader = readers.YT8MFrameFeatureReader(feature_names=feature_names,
feature_sizes=feature_sizes)
reader = readers.YT8MFrameFeatureReader(
feature_names=feature_names, feature_sizes=feature_sizes)
else:
reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names,
feature_sizes=feature_sizes)
reader = readers.YT8MAggregatedFeatureReader(
feature_names=feature_names, feature_sizes=feature_sizes)

if FLAGS.output_file is "":
raise ValueError("'output_file' was not specified. "
"Unable to continue with inference.")
"Unable to continue with inference.")

if FLAGS.input_data_pattern is "":
raise ValueError("'input_data_pattern' was not specified. "
"Unable to continue with inference.")
"Unable to continue with inference.")

inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern,
FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)
FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)


if __name__ == "__main__":
Expand Down
25 changes: 15 additions & 10 deletions losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provides definitions for non-regularized training or test losses."""

import tensorflow as tf
Expand All @@ -38,16 +37,21 @@ def calculate_loss(self, unused_predictions, unused_labels, **unused_params):


class CrossEntropyLoss(BaseLoss):
"""Calculate the cross entropy loss between the predictions and labels.
"""
"""Calculate the cross entropy loss between the predictions and labels."""

def calculate_loss(self, predictions, labels, **unused_params):
def calculate_loss(self,
predictions,
labels,
label_weights=None,
**unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
epsilon = 1e-5
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if label_weights is not None:
cross_entropy_loss *= label_weights
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))


Expand All @@ -66,7 +70,8 @@ def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
all_zeros,
tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))


Expand All @@ -88,10 +93,10 @@ def calculate_loss(self, predictions, labels, **unused_params):
float_labels = tf.cast(labels, tf.float32)
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(float_labels, 1, keep_dims=True),
epsilon)
tf.reduce_sum(float_labels, 1, keep_dims=True), epsilon)
norm_float_labels = tf.div(float_labels, label_rowsum)
softmax_outputs = tf.nn.softmax(predictions)
softmax_loss = tf.negative(tf.reduce_sum(
tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1))
softmax_loss = tf.negative(
tf.reduce_sum(
tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1))
return tf.reduce_mean(softmax_loss)
Loading

0 comments on commit 73aee83

Please sign in to comment.