gpu_infer.m

Go to the documentation of this file.
00001 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>
00002 % Updates the feature maps for a single training sample (image) using
00003 % the GPUmat libraries (and thus is FAST). This is done via
00004 % conjuagte gradient. This solves Ax=b where A is the F k matrix, x is the z feature maps
00005 % and b is the y_tilda reconstruction (which you want to make equal y).
00006 % Therefore Atb is F'y and AtAx is F'Fz (where each is a convolution).
00007 %
00008 % @file
00009 % @author Matthew Zeiler
00010 % @date Mar 11, 2010
00011 %
00012 % @inference_file @copybrief gpu_infer.m
00013 % @gpu_file @copybrief gpu_infer.m
00014 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00015 
00016 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>
00017 % @copybrief gpu_infer.m
00018 %
00019 % @param max_it number of conjugate gradient iterations
00020 % @param z the feature maps to update (xdim+filter_size x ydim+filter_size x num_feature_maps).
00021 % @param w the auxilary variable (same size as z).
00022 % @param y the input maps for the layer (xdim x ydim x num_input_maps).
00023 % @param F the filters (Fxdim x Fydim x num_input_maps x num_feature_maps).
00024 % @param z0 the z0 feature maps (may not be used) (xdim+filter_size x
00025 % ydim+filter_size x num_input_maps).
00026 % @param z0_filter_size the size of the z0 filters (if used).
00027 % @param lambda the coefficient on the reconstruction error term.
00028 % @param beta the continuation variable on the ||z-x|| term.
00029 % @param C the connectivity matrix for the layer.
00030 % @param TRAIN_Z0 binary indicating if z0 should be used or not.
00031 % @param COMP_THREADS the number of threads to split computation over.
00032 %
00033 % @retval z the updated feature maps.
00034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00035 function [z] = gpu_infer(max_it,z,w,y,F,z0,z0_filter_size,lambda,beta,C,TRAIN_Z0,COMP_THREADS)
00036 
00037 % Get the number of ks.
00038 % num_feature_maps = size(F,4);
00039 num_input_maps = size(F,3);
00040 % filter_size = size(F,1);
00041 % xdim = size(y,1);
00042 % ydim = size(y,2);
00043 
00044 % siny = single(y);
00045 % sinz = single(z);
00046 % sinF = single(F);
00047 % sinFflip = flipdim(flipdim(sinF,1),2);
00048 % sinC = logical(single(C));
00049 
00050 % Flipped version of F.
00051 Fflip = flipdim(flipdim(F,1),2);
00052 % Same as flipping like above.
00053 % Fflip = slice(slice(F,[END -1 1],':',':',':'),':',[END -1 1],':',':');
00054 % newFflip = reshape(Fflip,size(Fflip,1)*size(Fflip,2),size(Fflip,3)*size(Fflip,4));
00055 
00056 
00057 %%%%%%%%%%%%%%%
00058 
00059 % Initialize the running sum for each feature map.
00060 z0_filter = ones(z0_filter_size,z0_filter_size,GPUsingle)/single(z0_filter_size);
00061 % C = GPUsingle(C);
00062 
00063 % numOutputsX = xdim - filter_size + 1;
00064 % numOutputs = numOutputsX*numOutputsX;
00065 % targets = zeros(num_feature_maps*numOutputs,1, GPUsingle); %note this is
00066 
00067 %%%%%%%%%%
00068 %%Compute the right hand side (A'b) term
00069 Atb = zeros(size(z),GPUsingle);
00070 % sinAtb = single(Atb);
00071 % Do the f'y convolutions.
00072 if(TRAIN_Z0) % Also convolve flipped Fjk with z0 maps convolution
00073     % Convolve z0 map for each j with it's filter.
00074     z0conv = mycuConv2(z0,z0_filter,'valid');
00075     for j=1:num_input_maps
00076         % For all feature maps, convolve with input map j and z0's
00077         % convolution.
00078         Atb(:,:,C(j,:)) = Atb(:,:,C(j,:)) +...
00079             mycuConv2(y(:,:,j),squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS) -...
00080             mycuConv2(z0conv(:,:,j),squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00081     end
00082 else
00083 %     'CPU edition'
00084 %     tic
00085 %     for j=1:num_input_maps
00086 %         % For all feature maps, convolve with inptu map j.
00087 %         sinAtb(:,:,sinC(j,:)) = sinAtb(:,:,sinC(j,:)) +...
00088 %             ipp_conv2(siny(:,:,j),squeeze(sinFflip(:,:,j,sinC(j,:))),'full',COMP_THREADS);
00089 %     end
00090 %     t=toc
00091 %     'GPUedition'
00092 %     tic
00093     for j=1:num_input_maps
00094         idx = GPUsingle(find(single(C(j,:))));
00095         % For all feature maps, convolve with inptu map j.
00096         Atb(:,:,idx) = Atb(:,:,idx) + gpu_conv2(y(:,:,j),Fflip(:,:,j,idx),'full');
00097     end
00098 %     t=toc
00099     
00100     %         idx = GPUsingle(find(single(C)));
00101     %     size(Atb)
00102     %
00103     %     Atb = gpu_conv2(y,Fflip(:,:,C),'full');
00104     %     sizeAtb = size(Atb);
00105     %     selector = GPUsingle(repmat(GPUsingle(eye(num_input_maps,num_input_maps)),num_feature_maps,1));
00106     %     Atb = Atb(:,:,selector);
00107     %     Atb = reshape(Atb,sizeAtb);
00108     %     'blah'
00109     %     size(Atb)
00110     %     Atb = sum(Atb,4);
00111     %     'blah2'
00112     %     size(Atb)
00113     %     Atb = reshape(Atb,size(Atb,1),size(Atb,2),num_feature_maps,num_input_maps);
00114     %     Atb = sum(Atb,4);
00115     % %     Atb = reshape(Atb,size(Atb,1),size(Atb,2),num_feature_maps,num_input_maps,size(Atb,4));
00116     % %     Atb = Atb(:,:,GPUsingle(eye(num_input_maps,num_input_maps)),:);
00117     % % %     Atb = sum(Atb,5);
00118     % %     Atb = sum(Atb,4);
00119     % %     Atb = Atb(:,:,C);
00120     %     size(C)
00121     % %     Atb(1:2,1:2,:,:);
00122     %     size(Atb)
00123     %         t=toc
00124     
00125 %     fprintf('Error: %f\n\n\n',max(sinAtb(:)-single(Atb(:))))
00126     
00127     
00128 end
00129 % This is the RHS. Only comput this once.
00130 Atb = lambda*Atb(:) + beta*w(:);
00131 %%%%%%%%%%
00132 
00133 
00134 
00135 %%%%%%%%%%
00136 %%Compute the left hand side (A'Ax) term
00137 % Initialize the running sum for each feature map.
00138 AtAx = zeros(size(z),GPUsingle);
00139 % sinAtAx = single(AtAx);
00140 % 'CPU edition'
00141 % tic
00142 % for j=1:num_input_maps
00143 %     % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00144 %     % The second convolution is the convolution of the above with F* summed over the input maps.
00145 %     %temp1 = sum(mycuConv2(z(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3);
00146 %     %AtAX(:,:,C(j,:)) = AtAx(:,:,C(j,:)) + mycuConv2(temp1,squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00147 %     
00148 %     sinAtAx(:,:,sinC(j,:)) = sinAtAx(:,:,sinC(j,:)) + ipp_conv2(...
00149 %         sum(ipp_conv2(sinz(:,:,sinC(j,:)),squeeze(sinF(:,:,j,sinC(j,:))),'valid',COMP_THREADS),3),...
00150 %         squeeze(sinFflip(:,:,j,sinC(j,:))),'full',COMP_THREADS);
00151 %     
00152 %     
00153 %     
00154 % end
00155 % t=toc
00156 % 'GPU edition'
00157 % tic
00158 
00159 for j=1:num_input_maps
00160     % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00161     % The second convolution is the convolution of the above with F* summed over the input maps.
00162     %temp1 = sum(mycuConv2(z(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3);
00163     %AtAX(:,:,C(j,:)) = AtAx(:,:,C(j,:)) + mycuConv2(temp1,squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00164     idx = GPUsingle(find(single(C(j,:))));
00165     AtAx(:,:,idx) = AtAx(:,:,idx) + gpu_conv2(...
00166         sum(gpu_conv2(z(:,:,idx),F(:,:,j,idx),'valid'),3),...
00167         Fflip(:,:,j,idx),'full');
00168 end
00169 % t=toc
00170 % 
00171 % fprintf('Error: %f\n\n\n',max(sinAtAx(:)-single(AtAx(:))))
00172 
00173 % This is the left hand side.
00174 AtAx = lambda*AtAx(:)+beta*z(:);
00175 %%%%%%%%%%
00176 
00177 
00178 % Compute the residual.
00179 r = Atb - AtAx;
00180 
00181 for iter = 1:max_it
00182     rho = (r(:)'*r(:));
00183     
00184     if ( iter > 1 ),                       % direction vector
00185         %         their_beta = rho / rho_1;
00186         their_beta = double(abs(rho_1) > 1e-9).*rho / rho_1;  % Added from dilips.m
00187         p(:) = r(:) + their_beta*p(:);
00188     else
00189         p = r;
00190         p = reshape(p,size(z));
00191     end
00192     
00193     %%%%%%%%%%
00194     %%Compute the left hand side (A'Ax) term
00195     % Initialize the running sum for each feature map.
00196     q = zeros(size(z),GPUsingle);
00197     %     for j=1:num_input_maps
00198     %         % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00199     %         % The second convolution is the convolution of the above with F* summed over the input maps.
00200     %         q(:,:,C(j,:)) = q(:,:,C(j,:)) + mycuConv2(...
00201     %             sum(mycuConv2(p(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3),...
00202     %             squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00203     %     end
00204     
00205     for j=1:num_input_maps
00206         % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00207         % The second convolution is the convolution of the above with F* summed over the input maps.
00208         %temp1 = sum(mycuConv2(z(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3);
00209         %AtAX(:,:,C(j,:)) = AtAx(:,:,C(j,:)) + mycuConv2(temp1,squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00210         idx = GPUsingle(find(single(C(j,:))));
00211         q(:,:,idx) = q(:,:,idx) + gpu_conv2(...
00212             sum(gpu_conv2(p(:,:,idx),F(:,:,j,idx),'valid'),3),...
00213             Fflip(:,:,j,idx),'full');
00214     end
00215     
00216     % This is the left hand side.
00217     q = lambda*q(:)+beta*p(:);
00218     %%%%%%%%%%
00219     
00220     %     their_alpha = rho / (p(:)'*q(:) );
00221     temp = p(:)'*q(:);
00222     their_alpha = double(abs(temp) > 1e-9).*rho / temp;
00223     z(:) = z(:) + their_alpha * p(:);           % update approximation vector
00224     r = r - their_alpha*q;                      % compute residual
00225     
00226     rho_1 = rho;
00227     %     fprintf('\nIteration %d |residual| %.3g', iter, norm(r));
00228     if(sqrt(sum(abs(r(:)).^2)) < 1e-6)
00229         %        fprintf('Convergence reached in iteration %d - breaking\n', iter);
00230         break;
00231     end;
00232 end
00233 
00234 end
Generated on Thu Apr 8 13:36:44 2010 for DeconvNetToolbox by  doxygen 1.6.3