chores: refactor for the new ai research, add linter, gh action, etc (#27)

This commit is contained in:
Marina von Steinkirch, PhD 2025-08-13 21:49:46 +08:00 committed by von-steinkirch
parent fb4ab80dc3
commit d5467e559f
40 changed files with 5177 additions and 2476 deletions

View file

@ -1,105 +1,112 @@
# Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Code derived from
# tensorflow/tensorflow/models/image/imagenet/classify_image.py
from __future__ import absolute_import, division, print_function
import math
import os.path
import sys
import tarfile
import numpy as np
from six.moves import urllib
import tensorflow as tf
import glob
import scipy.misc
import math
import sys
import horovod.tensorflow as hvd
import numpy as np
import tensorflow as tf
from six.moves import urllib
MODEL_DIR = '/tmp/imagenet'
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
MODEL_DIR = "/tmp/imagenet"
DATA_URL = (
"http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz"
)
softmax = None
config = tf.ConfigProto()
config.gpu_options.visible_device_list = str(hvd.local_rank())
sess = tf.Session(config=config)
# Call this function with list of images. Each of elements should be a
# Call this function with list of images. Each of elements should be a
# numpy array with values ranging from 0 to 255.
def get_inception_score(images, splits=10):
# For convenience
if len(images[0].shape) != 3:
return 0, 0
# For convenience
if len(images[0].shape) != 3:
return 0, 0
# Bypassing all the assertions so that we don't end prematuraly'
# assert(type(images) == list)
# assert(type(images[0]) == np.ndarray)
# assert(len(images[0].shape) == 3)
# assert(np.max(images[0]) > 10)
# assert(np.min(images[0]) >= 0.0)
inps = []
for img in images:
img = img.astype(np.float32)
inps.append(np.expand_dims(img, 0))
bs = 1
preds = []
n_batches = int(math.ceil(float(len(inps)) / float(bs)))
for i in range(n_batches):
sys.stdout.write(".")
sys.stdout.flush()
inp = inps[(i * bs) : min((i + 1) * bs, len(inps))]
inp = np.concatenate(inp, 0)
pred = sess.run(softmax, {"ExpandDims:0": inp})
preds.append(pred)
preds = np.concatenate(preds, 0)
scores = []
for i in range(splits):
part = preds[
(i * preds.shape[0] // splits) : ((i + 1) * preds.shape[0] // splits), :
]
kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
kl = np.mean(np.sum(kl, 1))
scores.append(np.exp(kl))
return np.mean(scores), np.std(scores)
# Bypassing all the assertions so that we don't end prematuraly'
# assert(type(images) == list)
# assert(type(images[0]) == np.ndarray)
# assert(len(images[0].shape) == 3)
# assert(np.max(images[0]) > 10)
# assert(np.min(images[0]) >= 0.0)
inps = []
for img in images:
img = img.astype(np.float32)
inps.append(np.expand_dims(img, 0))
bs = 1
preds = []
n_batches = int(math.ceil(float(len(inps)) / float(bs)))
for i in range(n_batches):
sys.stdout.write(".")
sys.stdout.flush()
inp = inps[(i * bs):min((i + 1) * bs, len(inps))]
inp = np.concatenate(inp, 0)
pred = sess.run(softmax, {'ExpandDims:0': inp})
preds.append(pred)
preds = np.concatenate(preds, 0)
scores = []
for i in range(splits):
part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
kl = np.mean(np.sum(kl, 1))
scores.append(np.exp(kl))
return np.mean(scores), np.std(scores)
# This function is called automatically.
def _init_inception():
global softmax
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(MODEL_DIR, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
with tf.gfile.FastGFile(os.path.join(
MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
# Works with an arbitrary minibatch size.
pool3 = sess.graph.get_tensor_by_name('pool_3:0')
ops = pool3.graph.get_operations()
for op_idx, op in enumerate(ops):
for o in op.outputs:
shape = o.get_shape()
shape = [s.value for s in shape]
new_shape = []
for j, s in enumerate(shape):
if s == 1 and j == 0:
new_shape.append(None)
else:
new_shape.append(s)
o.set_shape(tf.TensorShape(new_shape))
w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
logits = tf.matmul(tf.squeeze(pool3, [1, 2]), w)
softmax = tf.nn.softmax(logits)
global softmax
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
filename = DATA_URL.split("/")[-1]
filepath = os.path.join(MODEL_DIR, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write(
"\r>> Downloading %s %.1f%%"
% (filename, float(count * block_size) / float(total_size) * 100.0)
)
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print("Succesfully downloaded", filename, statinfo.st_size, "bytes.")
tarfile.open(filepath, "r:gz").extractall(MODEL_DIR)
with tf.gfile.FastGFile(
os.path.join(MODEL_DIR, "classify_image_graph_def.pb"), "rb"
) as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name="")
# Works with an arbitrary minibatch size.
pool3 = sess.graph.get_tensor_by_name("pool_3:0")
ops = pool3.graph.get_operations()
for op_idx, op in enumerate(ops):
for o in op.outputs:
shape = o.get_shape()
shape = [s.value for s in shape]
new_shape = []
for j, s in enumerate(shape):
if s == 1 and j == 0:
new_shape.append(None)
else:
new_shape.append(s)
o.set_shape(tf.TensorShape(new_shape))
w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
logits = tf.matmul(tf.squeeze(pool3, [1, 2]), w)
softmax = tf.nn.softmax(logits)
if softmax is None:
_init_inception()
_init_inception()