function net=LCA_infer_VB_rb(net, A, control,model)

control.maxiter=getOption(control,'maxiter',1000);
control.xtol=getOption(control,'xtol',1e-5);
control.display=getOption(control,'display',1);

[M,N] = size(A);

net.rB=set(net.rB,N); %only consider the submatrix
net.rBt=net.rB';

net.V=getOption(net,'V');
if isempty(net.V)
  [net.V,gamma,BV] = LCA_update_VB_V_rb(net, A, [], [], 50);
end

Vn=net.V;
if ~isempty(model.V)
    Vn=[net.V;model.V];
end
BV=net.rBt*Vn;

net.U=model.U;
net.alpha=model.alpha;

% Three cases are considered in the inference
% 1. model. V is empty and net.V is only for the test data, in this case
%     net.rB is the square matrix,  we donot consider the training data
%   
% 2. model. V is not empty and net.V is  for the test data, in this case
%     net.rB is not the square matrix,  we consider the training data but fix the 
%     varitional parameters of the training data.
% 
% 3. model. V is empty and net.V is  for the test data and training data, in this case
%     net.rB is not the square matrix,  we consider the training data and optimize over all the 
%     varitional parameters of the training data and test data.
    
    
for iter = 1:control.maxiter
  old_U=net.U;
  old_BV=BV;
  % approximate E-step
  % run the EM loop for beta twice as often as for p
  [net.V,net.gamma,BV] = LCA_infer_VB_V_rb(net, A, net.V, BV, control.eiter,model);
  % beta(i,j) is the Dirichlet parameter for lambda_ij
  
  change= max(abs(BV(:) - old_BV(:))/max(BV(:)));
  %change= max(abs(net.U(:) - old_U(:))/max(net.U(:)));  

  if change < control.xtol
    break
  end
end
net.loss = LCA_likelihood_VB_rb(net, A, [net.V;model.V], BV, [net.gamma;model.gamma]);
net.BV=BV;
net.perplexity=exp(net.loss/sum(A(:)));

