# Training and testing PhaseAwareNet V Net with Miracle DataBase

In [1]:
caffe_root = '../../' # this file should be run from {caffe_root}/examples/PhaseAware 

import sys
import lmdb

sys.path.insert(0, caffe_root + 'python')
import caffe
import os

os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

#caffe.set_device(1)
caffe.set_mode_gpu()

import numpy as np
from pylab import *
%matplotlib inline
import tempfile

sys.path.append("../pycaffe/layers") # the datalayers we will use are in this directory.
sys.path.append("../pycaffe") # the tools file is in this folder

import tools #this contains some tools that we need

from __future__ import division

from caffe import layers as L
from caffe import params as P


define the PhaseAwareNet, VNet

In [2]:
def VahidNet( train=True, num_conv0_kernels=4, batch_size = 40, learn_conv0=False, moving_average_fraction = 0.98):
 
 # set conv0 leraning parameters
 if learn_conv0==False:
 param_conv0 = [dict(lr_mult=0, decay_mult=0)] * 2
 else:
 param_conv0 = [dict(lr_mult=1, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
 
 param_bn = [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0),
 dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
 # define network model
 n = caffe.NetSpec()
 
 
 # Prepare the data source, using Mo's stego Aug data layer
 subset = 'train' if train else 'test'
 root = '/home/mchen/tmp/caffe/data/JStego/MiracleList/'
 params = dict( batch_size = batch_size, im_shape = [512, 512], split = subset, root = root )

 n.data, n.label = L.Python( module = 'AugStegoDataLayer', layer = 'AugmentDataLayerSync', 
 ntop = 2, param_str = str( params ) )
 
 #n.data, n.label = L.Data(source=source, batch_size = batch_size, backend=1, ntop=2)
 
 n.conv0 = L.Convolution(n.data, kernel_size=5, stride=1, num_output=num_conv0_kernels, pad=2,
 param = param_conv0, 
 weight_filler=dict(type='constant', value=0),
 bias_filler=dict(type='constant', value=0))
 
 n.conv1 = L.Convolution(n.conv0, kernel_size=5, stride=1, num_output=8, pad=2,
 param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 
 weight_filler=dict(type='gaussian', std=0.01),
 bias_filler=dict(type='constant', value=0))
 
 n.abs1 = L.AbsVal(n.conv1)
 
 n.bn1 = L.BatchNorm(n.abs1, param=param_bn,
 scale_filler=dict(type='constant', value=1),
 bias_filler=dict(type='constant', value=0),
 moving_average_fraction = moving_average_fraction,
 eps=1e-4)
 
 n.tanh1 = L.TanH(n.bn1, in_place=True)
 
 n.conv2 = L.Convolution(n.bn1, kernel_size=5, stride=1, num_output=16, pad=2,
 param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 
 weight_filler=dict(type='gaussian', std=0.01),
 bias_filler=dict(type='constant', value=0))
 
 n.bn2 = L.BatchNorm(n.conv2, param=param_bn,
 scale_filler=dict(type='constant', value=1),
 bias_filler=dict(type='constant', value=0),
 moving_average_fraction = moving_average_fraction,
 eps=1e-4)
 
 n.tanh2 = L.TanH(n.bn2, in_place=True)
 
 n.sbp = L.SplitByPhase(n.tanh2)
 
 n.conv3 = L.Convolution(n.sbp, kernel_size=1, stride=1, num_output=128, pad=0,
 param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 
 weight_filler=dict(type='gaussian', std=0.01),
 bias_filler=dict(type='constant', value=0))
 
 n.bn3 = L.BatchNorm(n.conv3, param=param_bn,
 scale_filler=dict(type='constant', value=1),
 bias_filler=dict(type='constant', value=0),
 moving_average_fraction = moving_average_fraction,
 eps=1e-4)
 
 n.relu3 = L.ReLU(n.bn3, in_place=True)
 
 n.pool3 = L.Pooling(n.bn3, pool=P.Pooling.AVE, kernel_size=5, stride=2, pad=1)
 
 n.conv4 = L.Convolution(n.pool3, kernel_size=1, stride=1, num_output=256, pad=0,
 param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 
 weight_filler=dict(type='gaussian', std=0.01),
 bias_filler=dict(type='constant', value=0))
 
 n.bn4 = L.BatchNorm(n.conv4, param=param_bn,
 scale_filler=dict(type='constant', value=1),
 bias_filler=dict(type='constant', value=0),
 moving_average_fraction = moving_average_fraction,
 eps=1e-4)
 
 n.relu4 = L.ReLU(n.bn4, in_place=True)
 
 n.pool4 = L.Pooling(n.bn4, pool=P.Pooling.AVE, kernel_size=5, stride=2, pad=1)
 
 n.conv5 = L.Convolution(n.pool4, kernel_size=1, stride=1, num_output=512, pad=0,
 param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 
 weight_filler=dict(type='gaussian', std=0.01),
 bias_filler=dict(type='constant', value=0))
 
 n.bn5 = L.BatchNorm(n.conv5, param=param_bn,
 scale_filler=dict(type='constant', value=1),
 bias_filler=dict(type='constant', value=0),
 moving_average_fraction = moving_average_fraction,
 eps=1e-4)
 
 n.relu5 = L.ReLU(n.bn5, in_place=True)
 
 n.pool5 = L.Pooling(n.bn5, pool=P.Pooling.AVE, global_pooling=True)
 
 n.fc6 = L.InnerProduct(n.pool5, num_output=2, 
 param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
 weight_filler=dict(type='xavier'),
 bias_filler=dict(type='constant', value=0.01))
 
 n.loss = L.SoftmaxWithLoss(n.fc6, n.label)
 
 n.acc = L.Accuracy(n.fc6, n.label)
 
 # write the net to a temporary file and return its filename
 with tempfile.NamedTemporaryFile(delete=False) as f:
 f.write(str(n.to_proto()))
 return f.name
 

Define the solver

In [3]:
from caffe.proto import caffe_pb2

def solver(train_net_path, test_net_path=None, base_lr=0.001, weight_decay = 0.01):
 s = caffe_pb2.SolverParameter()

 # Specify locations of the train and (maybe) test networks.
 s.train_net = train_net_path
 if test_net_path is not None:
 s.test_net.append(test_net_path)
 s.test_interval = 1000000 # Test after every 1000 training iterations.
 s.test_iter.append(1) # Test on 100 batches each time we test.

 # The number of iterations over which to average the gradient.
 # Effectively boosts the training batch size by the given factor, without
 # affecting memory utilization.
 s.iter_size = 1
 
 s.max_iter = 1000000 # # of times to update the net (training iterations)
 
 # Solve using the stochastic gradient descent (SGD) algorithm.
 # Other choices include 'Adam' and 'RMSProp'.
 s.type = 'SGD'

 # Set the initial learning rate for SGD.
 s.base_lr = base_lr

 # Set `lr_policy` to define how the learning rate changes during training.
 # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
 # every `stepsize` iterations.
 s.lr_policy = 'step'
 s.gamma = 0.75 
 s.stepsize = 15000 #Every 20 epoch

 # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
 # weighted average of the current gradient and previous gradients to make
 # learning more stable. L2 weight decay regularizes learning, to help prevent
 # the model from overfitting.
 s.momentum = 0.9
 s.weight_decay = weight_decay

 # Display the current training loss and accuracy every 1000 iterations.
 s.display = 10

 # Snapshots are files used to store networks we've trained. Here, we'll
 # snapshot every 10K iterations -- ten times during training.
 s.snapshot = 6000
 #s.snapshot_prefix = '/home/dde/Desktop/JpegTraningSnapshots/Vnet_BOSS_JUNI_04_QF75_KVM_lr1em3_wd5em4_120ep_NoSBP'
 s.snapshot_prefix = '/home/mchen/tmp/caffe/examples/PhaseAwareNet/VahidNet'
 
 # Train on the GPU. Using the CPU to train large networks is very slow.
 s.solver_mode = caffe_pb2.SolverParameter.GPU
 
 # Write the solver to a temporary file and return its filename.
 with tempfile.NamedTemporaryFile(delete=False) as f:
 f.write(str(s))
 return f.name

In [4]:
def run_solver(niter, solver, disp_interval=10):
 
 blobs = ('loss', 'acc')
 loss, acc = ({name: np.zeros(niter) for name, _ in solvers}
 for _ in blobs)
 for it in range(niter):
 for name, s in solvers:
 s.step(1) # run a single SGD step in Caffe
 loss[name][it], acc[name][it] = (s.net.blobs[b].data.copy() for b in blobs)
 if it % disp_interval == 0 or it + 1 == niter:
 if len(s.test_nets):
 test_output = s.test_nets[0].forward()
 test_acc = test_output['acc']
 test_loss = test_output['loss']
 else:
 test_acc = 0
 
 test_loss = 0
 #print test_acc
 n = 'stego'
 loss_disp = '%s: train_loss=%.3f, train_acc=%2d%%, test_loss=%.3f, test_acc=%2d%%' % (n, loss[n][it], np.round(100*acc[n][it]), test_loss, np.round(100*test_acc))
 #loss_disp = '; '.join('%s: loss=%.3f, train_acc=%2d%%, test_acc=%2d%%' %
 # (n, loss[n][it], np.round(100*acc[n][it]), test_acc)
 # for n, _ in solvers)
 print '%3d) %s' % (it, loss_disp) 
 # Save the learned weights from both nets.
 weight_dir = tempfile.mkdtemp()
 weights = {}
 for name, s in solvers:
 filename = 'weights.%s.caffemodel' % name
 weights[name] = os.path.join(weight_dir, filename)
 s.net.save(weights[name])
 return loss, acc, weights

In [5]:
def get_vahidnet_solver(model=None, base_lr=1e-3, weight_decay=0.01,
 num_conv0_kernels=4, learn_conv0=False, moving_average_fraction=0.95):
 KV = np.array([[-1,2,-2,2,-1],[2,-6,8,-6,2],[-2,8,-12,8,-2],[2,-6,8,-6,2],[-1,2,-2,2,-1]], dtype=np.float32)/12
 KM = np.array([[0,0,5.2,0,0],[0,23.4,36.4,23.4,0],[5.2,36.4,-261,36.4,5.2],[0,23.4,36.4,23.4,0],[0,0,5.2,0,0]], dtype=np.float32)/261
 GH = np.array([[0.0562,-0.1354,0,0.1354,-0.0562],[0.0818,-0.1970,0,0.1970,-0.0818],[0.0926,-0.2233,0,0.2233,-0.0926],[0.0818,-0.1970,0,0.1970,-0.0818],[0.0562,-0.1354,0,0.1354,-0.0562]], dtype=np.float32)
 GV = np.fliplr(GH).T.copy()
 
 assert(num_conv0_kernels>0 and num_conv0_kernels<5)
 
 trn_net = VahidNet(True, num_conv0_kernels=num_conv0_kernels,
 learn_conv0=learn_conv0, moving_average_fraction = moving_average_fraction)
 
 val_net = VahidNet(False, num_conv0_kernels=num_conv0_kernels,
 learn_conv0=learn_conv0, moving_average_fraction = moving_average_fraction)
 
 solver_path = solver(trn_net, val_net, base_lr=base_lr, weight_decay=weight_decay)
 
 shisnet_solver = caffe.get_solver(solver_path)
 
 if model != None:
 shisnet_solver.net.copy_from(model)
 else:
 if learn_conv0:
 for k in xrange(num_conv0_kernels):
 norm_const = np.delete(shisnet_solver.net.params['conv0'][0].data[k,0,:,:].flatten(), 12).sum()
 shisnet_solver.net.params['conv0'][0].data[k,0,:,:] /= norm_const
 shisnet_solver.net.params['conv0'][0].data[k,0,2,2] = -1
 else:
 if num_conv0_kernels>0:
 shisnet_solver.net.params['conv0'][0].data[0] = KV
 if num_conv0_kernels>1:
 shisnet_solver.net.params['conv0'][0].data[1] = KM
 if num_conv0_kernels>2:
 shisnet_solver.net.params['conv0'][0].data[2] = GH
 if num_conv0_kernels>3:
 shisnet_solver.net.params['conv0'][0].data[3] = GV
 
 return shisnet_solver

Train the Vahid Net

In [6]:
niter = 150000 # 200 epochs


#model = 'JPGSteganalysis_KM_NVC_40000iter_wd5e3.caffemodel'
model = None

base_lr = 0.001
weight_decay = 1e-2

num_conv0_kernels = 4
learn_conv0 = False
moving_average_fraction = 0

vahid_net_solver = get_vahidnet_solver( model=model, base_lr=base_lr, weight_decay=weight_decay,
 num_conv0_kernels=num_conv0_kernels, learn_conv0=learn_conv0, 
 moving_average_fraction=moving_average_fraction)

print 'Running solvers for %d iterations...' % niter
solvers = [('stego', vahid_net_solver)] 
loss, acc, weights = run_solver(niter, solvers)
print 'Done.'

train_loss = loss['stego']
train_acc = acc['stego']
stego_weights = weights['stego']

# Delete solvers to save memory.
del vahid_net_solver, solvers

BatchLoader initialized with 30000 images
AugmentStegoDataLayerSync initialized for split: train, with bs: 40, im_shape: [512, 512].
BatchLoader initialized with 10000 images
AugmentStegoDataLayerSync initialized for split: test, with bs: 40, im_shape: [512, 512].
Running solvers for 150000 iterations...
 0) stego: train_loss=0.707, train_acc=50%, test_loss=0.830, test_acc=50%
 10) stego: train_loss=0.694, train_acc=50%, test_loss=0.709, test_acc=50%
 20) stego: train_loss=0.693, train_acc=52%, test_loss=0.706, test_acc=50%
 30) stego: train_loss=0.688, train_acc=50%, test_loss=0.695, test_acc=55%
 40) stego: train_loss=0.685, train_acc=52%, test_loss=0.685, test_acc=52%
 50) stego: train_loss=0.680, train_acc=65%, test_loss=0.688, test_acc=52%
 60) stego: train_loss=0.676, train_acc=55%, test_loss=0.693, test_acc=50%
 70) stego: train_loss=0.685, train_acc=52%, test_loss=0.788, test_acc=50%
 80) stego: train_loss=0.657, train_acc=55%, test_loss=0.759, test_acc=55%
 90) stego: train_lo

In [7]:
# !cp {"%s" % stego_weights} JPGSteganalysis_Vnet_BOSS_UED_02_QF95_KVMGHV_lr1em3g085ep10k_wd1em2_300k_bs40_MAF095_S_OrigCS.caffemodel
!cp {"%s" % stego_weights} VahidNet_JUNI_04_Miracle_Aug.caffemodel

In [None]:
plt.plot(train_loss)

In [None]:
plt.plot(train_acc)

In [8]:
# update_progress() : Displays or updates a console progress bar
## Accepts a float between 0 and 1. Any int will be converted to a float.
## A value under 0 represents a 'halt'.
## A value at 1 or bigger represents 100%
def update_progress(progress):
 barLength = 10 # Modify this to change the length of the progress bar
 status = ""
 if isinstance(progress, int):
 progress = float(progress)
 if not isinstance(progress, float):
 progress = 0
 status = "error: progress var must be float\r\n"
 if progress < 0:
 progress = 0
 status = "Halt...\r\n"
 if progress >= 1:
 progress = 1
 status = "Done...\r\n"
 block = int(round(barLength*progress))
 text = "\rPercent: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), progress*100, status)
 sys.stdout.write(text)
 sys.stdout.flush()

In [9]:
def compute_refinement_moments (net, collection_iters = 2000):
 bn1 = {}
 bn1['mean'] = np.zeros((collection_iters, net.params['bn1'][2].data.shape[1]))
 bn1['var'] = np.zeros((collection_iters, net.params['bn1'][2].data.shape[1]))
 bn1['ma_mean'] = np.squeeze(net.params['bn1'][2].data).copy()
 bn1['ma_var'] = np.squeeze(net.params['bn1'][3].data).copy()

 bn2 = {}
 bn2['mean'] = np.zeros((collection_iters, net.params['bn2'][2].data.shape[1]))
 bn2['var'] = np.zeros((collection_iters, net.params['bn2'][2].data.shape[1]))
 bn2['ma_mean'] = np.squeeze(net.params['bn2'][2].data).copy()
 bn2['ma_var'] = np.squeeze(net.params['bn2'][3].data).copy()

 bn3 = {}
 bn3['mean'] = np.zeros((collection_iters, net.params['bn3'][2].data.shape[1]))
 bn3['var'] = np.zeros((collection_iters, net.params['bn3'][2].data.shape[1]))
 bn3['ma_mean'] = np.squeeze(net.params['bn3'][2].data).copy()
 bn3['ma_var'] = np.squeeze(net.params['bn3'][3].data).copy()

 bn4 = {}
 bn4['mean'] = np.zeros((collection_iters, net.params['bn4'][2].data.shape[1]))
 bn4['var'] = np.zeros((collection_iters, net.params['bn4'][2].data.shape[1]))
 bn4['ma_mean'] = np.squeeze(net.params['bn4'][2].data).copy()
 bn4['ma_var'] = np.squeeze(net.params['bn4'][3].data).copy()

 bn5 = {}
 bn5['mean'] = np.zeros((collection_iters, net.params['bn5'][2].data.shape[1]))
 bn5['var'] = np.zeros((collection_iters, net.params['bn5'][2].data.shape[1]))
 bn5['ma_mean'] = np.squeeze(net.params['bn5'][2].data).copy()
 bn5['ma_var'] = np.squeeze(net.params['bn5'][3].data).copy()

 for it in xrange(collection_iters):
 net.forward()

 bn1['mean'][it, :] = np.squeeze(net.params['bn1'][2].data).copy()
 bn1['var'][it, :] = np.squeeze(net.params['bn1'][3].data).copy()

 bn2['mean'][it, :] = np.squeeze(net.params['bn2'][2].data).copy()
 bn2['var'][it, :] = np.squeeze(net.params['bn2'][3].data).copy()

 bn3['mean'][it, :] = np.squeeze(net.params['bn3'][2].data).copy()
 bn3['var'][it, :] = np.squeeze(net.params['bn3'][3].data).copy()

 bn4['mean'][it, :] = np.squeeze(net.params['bn4'][2].data).copy()
 bn4['var'][it, :] = np.squeeze(net.params['bn4'][3].data).copy()

 bn5['mean'][it, :] = np.squeeze(net.params['bn5'][2].data).copy()
 bn5['var'][it, :] = np.squeeze(net.params['bn5'][3].data).copy()

 update_progress((it+1)/collection_iters)

 bn1['rf_mean'] = np.median(bn1['mean'], axis=0)
 bn1['rf_var'] = np.median(bn1['var'], axis=0)

 bn2['rf_mean'] = np.median(bn2['mean'], axis=0)
 bn2['rf_var'] = np.median(bn2['var'], axis=0)

 bn3['rf_mean'] = np.median(bn3['mean'], axis=0)
 bn3['rf_var'] = np.median(bn3['var'], axis=0)

 bn4['rf_mean'] = np.median(bn4['mean'], axis=0)
 bn4['rf_var'] = np.median(bn4['var'], axis=0)

 bn5['rf_mean'] = np.median(bn5['mean'], axis=0)
 bn5['rf_var'] = np.median(bn5['var'], axis=0)
 return bn1, bn2, bn3, bn4, bn5

In [10]:
def embed_refinement_moments(net, bn1, bn2, bn3, bn4, bn5):
 net.params['bn1'][2].data[0,:,0,0] = bn1['rf_mean'].copy()
 net.params['bn1'][3].data[0,:,0,0] = bn1['rf_var'].copy()

 net.params['bn2'][2].data[0,:,0,0] = bn2['rf_mean'].copy()
 net.params['bn2'][3].data[0,:,0,0] = bn2['rf_var'].copy()

 net.params['bn3'][2].data[0,:,0,0] = bn3['rf_mean'].copy()
 net.params['bn3'][3].data[0,:,0,0] = bn3['rf_var'].copy()

 net.params['bn4'][2].data[0,:,0,0] = bn4['rf_mean'].copy()
 net.params['bn4'][3].data[0,:,0,0] = bn4['rf_var'].copy()

 net.params['bn5'][2].data[0,:,0,0] = bn5['rf_mean'].copy()
 net.params['bn5'][3].data[0,:,0,0] = bn5['rf_var'].copy() 

In [11]:
def eval_stego_net( weights, test_iters=None, refine_bn=True):
 
 if refine_bn:
 net = VahidNet( True, batch_size = 64, moving_average_fraction = 0)
 net = caffe.Net(net, weights, caffe.TRAIN)
 bn1, bn2, bn3, bn4, bn5 = compute_refinement_moments(net, collection_iters = 3000)
 del net
 
 net = VahidNet(False)
 
 test_net = caffe.Net(net, weights, caffe.TEST)
 
 if refine_bn:
 embed_refinement_moments(test_net, bn1, bn2, bn3, bn4, bn5)
 
 batch_size = test_net.blobs['data'].shape[0]
 num_entries = 8000
 if test_iters==None:
 test_iters = int(np.floor(num_entries/batch_size))
 
 print 'Number of test samples: ', num_entries
 print 'Testing for %d iterations using batch size %d...' % (test_iters, batch_size)
 
 accuracy = 0
 for it in xrange(test_iters):
 accuracy += test_net.forward()['acc']
 update_progress((it+1)/test_iters)
 accuracy /= test_iters
 del test_net
 
 return accuracy

In [None]:
weights = 'VahidNet_JUNI_04_Miracle_Aug.caffemodel'

accuracy = eval_stego_net( weights, refine_bn=True)

print 'Accuracy: ', accuracy

BatchLoader initialized with 30000 images
AugmentStegoDataLayerSync initialized for split: train, with bs: 64, im_shape: [512, 512].
Percent: [##########] 100% Done...7% 
BatchLoader initialized with 10000 images
AugmentStegoDataLayerSync initialized for split: test, with bs: 40, im_shape: [512, 512].
Number of test samples: 8000
Testing for 200 iterations using batch size 40...
Percent: [##########] 96.0% 

In [None]:
# Keep training

niter = 12000 # number of iterations to train 40000

model = 'VahidNet_JUNI_04_AUG.caffemodel'

base_lr = 0.001

# Bring down the base_lr to be last one
for i in range(72000//6000 + 1):
 base_lr = base_lr * 0.75

num_conv0_kernels = 4
learn_conv0 = False
moving_average_fraction = 0

vahid_net_solver, num_trn_entries, num_val_entries = get_vahidnet_solver(
 trn_source=trn_source, val_source=val_source, model=model, 
 base_lr=base_lr, weight_decay=weight_decay,
 num_conv0_kernels=num_conv0_kernels, learn_conv0=learn_conv0, 
 moving_average_fraction=moving_average_fraction)

print 'Running solvers for %d iterations...' % niter
solvers = [('stego', vahid_net_solver)] 
loss, acc, weights = run_solver(niter, solvers)
print 'Done.'

train_loss = loss['stego']
train_acc = acc['stego']
stego_weights = weights['stego']

# Delete solvers to save memory.
del vahid_net_solver, solvers

In [None]:
!cp {"%s" % stego_weights} VahidNet_JUNI_04_AUG_more.caffemodel

In [None]:
TRN_Source = '/home/mchen/tmp/caffe/examples/StegoLibrary/JUNI_04_AUG/JUNI_0.4_trn_lmdb'
TST_Source = '/home/mchen/tmp/caffe/examples/StegoLibrary/JUNI_04_AUG/JUNI_0.4_val_lmdb'

weights = 'VahidNet_JUNI_04_AUG_more.caffemodel'

accuracy = eval_stego_net( TRN_Source, TST_Source, weights, refine_bn=True)

print 'Accuracy: ', accuracy