as model: # weights with L2 normalization w_in_1 = Normal('w_in_1', 0, sd=1, shape=(n_in, n_hidden)) w_1_2 = Normal('w_1_2', 0, sd=1, shape=(n_hidden, n_hidden)) w_2_out = Normal('w_2_out', 0, sd=1, shape=(n_hidden, n_out)) # dropout d_in_1 = Bernoulli('d_in_1', p=0.5, shape=n_in) d_1_2 = Bernoulli('d_1_2', p=0.5, shape=n_hidden) d_2_out = Bernoulli('d_2_out', p=0.5, shape=n_hidden) # layers l_in = InputLayer(in_shape, input_var=X_shared) l_1 = DenseLayer(l_in, n_hidden, W=T.dot(T.nlinalg.diag(d_in_1), w_in_1), nonlinearity=tanh) l_2 = DenseLayer(l_1, n_hidden, W=T.dot(T.nlinalg.diag(d_1_2), w_1_2), nonlinearity=tanh) l_out = DenseLayer(l_2, n_out, W=T.dot(T.nlinalg.diag(d_2_out), w_2_out), nonlinearity=softmax) p = Deterministic('p', lasagne.layers.get_output(l_out)) out = Categorical('out', p=p, observed=y_shared) x y y x p μ σ