00001 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>
00002 % Updates the feature maps for a single training sample (image) using
00003 % the GPUmat libraries (and thus is FAST). This is done via
00004 % conjuagte gradient. This solves Ax=b where A is the F k matrix, x is the z feature maps
00005 % and b is the y_tilda reconstruction (which you want to make equal y).
00006 % Therefore Atb is F'y and AtAx is F'Fz (where each is a convolution).
00007 %
00008 % @file
00009 % @author Matthew Zeiler
00010 % @date Mar 11, 2010
00011 %
00012 % @inference_file @copybrief gpu_infer.m
00013 % @gpu_file @copybrief gpu_infer.m
00014 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00015
00016 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>
00017 % @copybrief gpu_infer.m
00018 %
00019 % @param max_it number of conjugate gradient iterations
00020 % @param z the feature maps to update (xdim+filter_size x ydim+filter_size x num_feature_maps).
00021 % @param w the auxilary variable (same size as z).
00022 % @param y the input maps for the layer (xdim x ydim x num_input_maps).
00023 % @param F the filters (Fxdim x Fydim x num_input_maps x num_feature_maps).
00024 % @param z0 the z0 feature maps (may not be used) (xdim+filter_size x
00025 % ydim+filter_size x num_input_maps).
00026 % @param z0_filter_size the size of the z0 filters (if used).
00027 % @param lambda the coefficient on the reconstruction error term.
00028 % @param beta the continuation variable on the ||z-x|| term.
00029 % @param C the connectivity matrix for the layer.
00030 % @param TRAIN_Z0 binary indicating if z0 should be used or not.
00031 % @param COMP_THREADS the number of threads to split computation over.
00032 %
00033 % @retval z the updated feature maps.
00034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00035 function [z] = gpu_infer(max_it,z,w,y,F,z0,z0_filter_size,lambda,beta,C,TRAIN_Z0,COMP_THREADS)
00036
00037 % Get the number of ks.
00038 % num_feature_maps = size(F,4);
00039 num_input_maps = size(F,3);
00040 % filter_size = size(F,1);
00041 % xdim = size(y,1);
00042 % ydim = size(y,2);
00043
00044 % siny = single(y);
00045 % sinz = single(z);
00046 % sinF = single(F);
00047 % sinFflip = flipdim(flipdim(sinF,1),2);
00048 % sinC = logical(single(C));
00049
00050 % Flipped version of F.
00051 Fflip = flipdim(flipdim(F,1),2);
00052 % Same as flipping like above.
00053 % Fflip = slice(slice(F,[END -1 1],':',':',':'),':',[END -1 1],':',':');
00054 % newFflip = reshape(Fflip,size(Fflip,1)*size(Fflip,2),size(Fflip,3)*size(Fflip,4));
00055
00056
00057 %%%%%%%%%%%%%%%
00058
00059 % Initialize the running sum for each feature map.
00060 z0_filter = ones(z0_filter_size,z0_filter_size,GPUsingle)/single(z0_filter_size);
00061 % C = GPUsingle(C);
00062
00063 % numOutputsX = xdim - filter_size + 1;
00064 % numOutputs = numOutputsX*numOutputsX;
00065 % targets = zeros(num_feature_maps*numOutputs,1, GPUsingle); %note this is
00066
00067 %%%%%%%%%%
00068 %%Compute the right hand side (A'b) term
00069 Atb = zeros(size(z),GPUsingle);
00070 % sinAtb = single(Atb);
00071 % Do the f'y convolutions.
00072 if(TRAIN_Z0) % Also convolve flipped Fjk with z0 maps convolution
00073 % Convolve z0 map for each j with it's filter.
00074 z0conv = mycuConv2(z0,z0_filter,'valid');
00075 for j=1:num_input_maps
00076 % For all feature maps, convolve with input map j and z0's
00077 % convolution.
00078 Atb(:,:,C(j,:)) = Atb(:,:,C(j,:)) +...
00079 mycuConv2(y(:,:,j),squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS) -...
00080 mycuConv2(z0conv(:,:,j),squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00081 end
00082 else
00083 % 'CPU edition'
00084 % tic
00085 % for j=1:num_input_maps
00086 % % For all feature maps, convolve with inptu map j.
00087 % sinAtb(:,:,sinC(j,:)) = sinAtb(:,:,sinC(j,:)) +...
00088 % ipp_conv2(siny(:,:,j),squeeze(sinFflip(:,:,j,sinC(j,:))),'full',COMP_THREADS);
00089 % end
00090 % t=toc
00091 % 'GPUedition'
00092 % tic
00093 for j=1:num_input_maps
00094 idx = GPUsingle(find(single(C(j,:))));
00095 % For all feature maps, convolve with inptu map j.
00096 Atb(:,:,idx) = Atb(:,:,idx) + gpu_conv2(y(:,:,j),Fflip(:,:,j,idx),'full');
00097 end
00098 % t=toc
00099
00100 % idx = GPUsingle(find(single(C)));
00101 % size(Atb)
00102 %
00103 % Atb = gpu_conv2(y,Fflip(:,:,C),'full');
00104 % sizeAtb = size(Atb);
00105 % selector = GPUsingle(repmat(GPUsingle(eye(num_input_maps,num_input_maps)),num_feature_maps,1));
00106 % Atb = Atb(:,:,selector);
00107 % Atb = reshape(Atb,sizeAtb);
00108 % 'blah'
00109 % size(Atb)
00110 % Atb = sum(Atb,4);
00111 % 'blah2'
00112 % size(Atb)
00113 % Atb = reshape(Atb,size(Atb,1),size(Atb,2),num_feature_maps,num_input_maps);
00114 % Atb = sum(Atb,4);
00115 % % Atb = reshape(Atb,size(Atb,1),size(Atb,2),num_feature_maps,num_input_maps,size(Atb,4));
00116 % % Atb = Atb(:,:,GPUsingle(eye(num_input_maps,num_input_maps)),:);
00117 % % % Atb = sum(Atb,5);
00118 % % Atb = sum(Atb,4);
00119 % % Atb = Atb(:,:,C);
00120 % size(C)
00121 % % Atb(1:2,1:2,:,:);
00122 % size(Atb)
00123 % t=toc
00124
00125 % fprintf('Error: %f\n\n\n',max(sinAtb(:)-single(Atb(:))))
00126
00127
00128 end
00129 % This is the RHS. Only comput this once.
00130 Atb = lambda*Atb(:) + beta*w(:);
00131 %%%%%%%%%%
00132
00133
00134
00135 %%%%%%%%%%
00136 %%Compute the left hand side (A'Ax) term
00137 % Initialize the running sum for each feature map.
00138 AtAx = zeros(size(z),GPUsingle);
00139 % sinAtAx = single(AtAx);
00140 % 'CPU edition'
00141 % tic
00142 % for j=1:num_input_maps
00143 % % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00144 % % The second convolution is the convolution of the above with F* summed over the input maps.
00145 % %temp1 = sum(mycuConv2(z(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3);
00146 % %AtAX(:,:,C(j,:)) = AtAx(:,:,C(j,:)) + mycuConv2(temp1,squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00147 %
00148 % sinAtAx(:,:,sinC(j,:)) = sinAtAx(:,:,sinC(j,:)) + ipp_conv2(...
00149 % sum(ipp_conv2(sinz(:,:,sinC(j,:)),squeeze(sinF(:,:,j,sinC(j,:))),'valid',COMP_THREADS),3),...
00150 % squeeze(sinFflip(:,:,j,sinC(j,:))),'full',COMP_THREADS);
00151 %
00152 %
00153 %
00154 % end
00155 % t=toc
00156 % 'GPU edition'
00157 % tic
00158
00159 for j=1:num_input_maps
00160 % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00161 % The second convolution is the convolution of the above with F* summed over the input maps.
00162 %temp1 = sum(mycuConv2(z(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3);
00163 %AtAX(:,:,C(j,:)) = AtAx(:,:,C(j,:)) + mycuConv2(temp1,squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00164 idx = GPUsingle(find(single(C(j,:))));
00165 AtAx(:,:,idx) = AtAx(:,:,idx) + gpu_conv2(...
00166 sum(gpu_conv2(z(:,:,idx),F(:,:,j,idx),'valid'),3),...
00167 Fflip(:,:,j,idx),'full');
00168 end
00169 % t=toc
00170 %
00171 % fprintf('Error: %f\n\n\n',max(sinAtAx(:)-single(AtAx(:))))
00172
00173 % This is the left hand side.
00174 AtAx = lambda*AtAx(:)+beta*z(:);
00175 %%%%%%%%%%
00176
00177
00178 % Compute the residual.
00179 r = Atb - AtAx;
00180
00181 for iter = 1:max_it
00182 rho = (r(:)'*r(:));
00183
00184 if ( iter > 1 ), % direction vector
00185 % their_beta = rho / rho_1;
00186 their_beta = double(abs(rho_1) > 1e-9).*rho / rho_1; % Added from dilips.m
00187 p(:) = r(:) + their_beta*p(:);
00188 else
00189 p = r;
00190 p = reshape(p,size(z));
00191 end
00192
00193 %%%%%%%%%%
00194 %%Compute the left hand side (A'Ax) term
00195 % Initialize the running sum for each feature map.
00196 q = zeros(size(z),GPUsingle);
00197 % for j=1:num_input_maps
00198 % % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00199 % % The second convolution is the convolution of the above with F* summed over the input maps.
00200 % q(:,:,C(j,:)) = q(:,:,C(j,:)) + mycuConv2(...
00201 % sum(mycuConv2(p(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3),...
00202 % squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00203 % end
00204
00205 for j=1:num_input_maps
00206 % This is the sum over k feature maps of the zk*Fjk convolutions as the first term of teh second convolution.
00207 % The second convolution is the convolution of the above with F* summed over the input maps.
00208 %temp1 = sum(mycuConv2(z(:,:,C(j,:)),squeeze(F(:,:,j,C(j,:))),'valid',COMP_THREADS),3);
00209 %AtAX(:,:,C(j,:)) = AtAx(:,:,C(j,:)) + mycuConv2(temp1,squeeze(Fflip(:,:,j,C(j,:))),'full',COMP_THREADS);
00210 idx = GPUsingle(find(single(C(j,:))));
00211 q(:,:,idx) = q(:,:,idx) + gpu_conv2(...
00212 sum(gpu_conv2(p(:,:,idx),F(:,:,j,idx),'valid'),3),...
00213 Fflip(:,:,j,idx),'full');
00214 end
00215
00216 % This is the left hand side.
00217 q = lambda*q(:)+beta*p(:);
00218 %%%%%%%%%%
00219
00220 % their_alpha = rho / (p(:)'*q(:) );
00221 temp = p(:)'*q(:);
00222 their_alpha = double(abs(temp) > 1e-9).*rho / temp;
00223 z(:) = z(:) + their_alpha * p(:); % update approximation vector
00224 r = r - their_alpha*q; % compute residual
00225
00226 rho_1 = rho;
00227 % fprintf('\nIteration %d |residual| %.3g', iter, norm(r));
00228 if(sqrt(sum(abs(r(:)).^2)) < 1e-6)
00229 % fprintf('Convergence reached in iteration %d - breaking\n', iter);
00230 break;
00231 end;
00232 end
00233
00234 end