diff --git a/homework_summary/carl - 00 - summary/summary.pdf b/homework_summary/carl - 00 - summary/summary.pdf new file mode 100644 index 0000000000000000000000000000000000000000..31181c6813e2f31805b2af3298305c3d32ebdb7a Binary files /dev/null and b/homework_summary/carl - 00 - summary/summary.pdf differ diff --git a/hw10_tensorflow/carl - 04 - tensorflow/cnn_mnist.py b/hw10_tensorflow/carl - 04 - tensorflow/cnn_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..6a8b17ea55c4c8b936506a69186a7a5bea0a18ab --- /dev/null +++ b/hw10_tensorflow/carl - 04 - tensorflow/cnn_mnist.py @@ -0,0 +1,31 @@ +import tflearn +from tflearn.layers.core import input_data, dropout, fully_connected +from tflearn.layers.conv import conv_2d, max_pool_2d +from tflearn.layers.normalization import local_response_normalization +from tflearn.layers.estimator import regression + +# Data loading and preprocessing +import tflearn.datasets.mnist as mnist +X, Y, testX, testY = mnist.load_data(one_hot=True) +X = X.reshape([-1, 28, 28, 1]) +testX = testX.reshape([-1, 28, 28, 1]) + +# Building convolutional network +network = input_data(shape=[None, 28, 28, 1], name='input') +network = conv_2d(network, 32, 3, activation='relu', regularizer='L2') +network = max_pool_2d(network, 2) +network = local_response_normalization(network) +network = fully_connected(network, 128, activation='tanh') +network = dropout(network, 0.8) +network = fully_connected(network, 256, activation='tanh') +network = dropout(network, 0.8) +network = fully_connected(network, 10, activation='softmax') +network = regression(network, optimizer='adam', learning_rate=0.01, + loss='categorical_crossentropy', name='target') + +# Training +model = tflearn.DNN(network, tensorboard_verbose=0) +model.fit({'input': X}, {'target': Y}, n_epoch=20, + validation_set=({'input': testX}, {'target': testY}), + batch_size=100, + show_metric=True, run_id='cnn_mnist') diff --git a/hw10_tensorflow/carl - 04 - tensorflow/run.sh b/hw10_tensorflow/carl - 04 - tensorflow/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..600d71acd87669480f3cab8b34eba521b475189d --- /dev/null +++ b/hw10_tensorflow/carl - 04 - tensorflow/run.sh @@ -0,0 +1,5 @@ +#!/usr/bin/bash +rm /tmp/tflearn_logs/cnn_mnist/* +python cnn_mnist.py +xdg-open http://localhost:6006 +tensorboard --logdir=/tmp/tflearn_logs/cnn_mnist diff --git a/hw13_optimization/carl - 06 - optimization/autoencoder.jl b/hw13_optimization/carl - 06 - optimization/autoencoder.jl new file mode 100644 index 0000000000000000000000000000000000000000..73fe69ddf0fd89c93db510ea637677756944d85c --- /dev/null +++ b/hw13_optimization/carl - 06 - optimization/autoencoder.jl @@ -0,0 +1,111 @@ +using MLDatasets +using Plots + +function flatten(X) + dims = size(X) + reshape(X, prod(dims[1:end-1]), dims[end]) +end + +function subset(X, y, labels::AbstractArray{Int64}) + mask = in.(y, [labels]) + X[:, mask], y[mask] +end + +function subset(X, y, N::Integer) + mask = rand(1:length(y), N) + X[:, mask], y[mask] +end + +function subset(X, y, labels) + mask = in.(y, [labels]) + X[:, mask], y[mask] +end + +type AutoEncoder{T<:Real} + N::Int64 + W_enc::Matrix{T} + b_enc::Vector{T} + W_dec::Matrix{T} + b_dec::Vector{T} + AutoEncoder(N) = new(N) +end + +relu(x) = max(0, x) +drelu(x) = x .> 0 +logsig(n) = exp( -log(1 + exp(-n)) ) +dlogsig(n) = exp( n - 2 * log(exp(n) + 1) ) +# relu(x) = identity(x) +# d_relu(x) = 1 + +# logsig(n) = 1 / (1 + exp(-n)) +f_enc(n) = relu(n) +df_enc(n) = drelu(n) +f_dec(n) = logsig(n) +df_dec(n) = dlogsig(n) + + +AutoEncoder(N) = AutoEncoder{Float64}(N) + +encode(ae::AutoEncoder, X) = f_enc(ae.W_enc * X .+ ae.b_enc) +decode(ae::AutoEncoder, Z) = f_dec(ae.W_dec * Z .+ ae.b_dec) + +function train!(ae::AutoEncoder, X; max_iterations=4000, learning_rate=0.1, batch_size=10, regularization=0.000, momentum=0.9) + D, T = size(X) + + ae.W_enc = 0.001 * randn(ae.N, D) + ae.b_enc = 0.001 * randn(ae.N) + ae.W_dec = 0.001 * randn(D, ae.N) + ae.b_dec = 0.001 * randn(D) + + f = (X) -> decode(ae, encode(ae, X)) + l = (Y) -> 0.5 / T * sumabs2(f(Y) - X) + + ΔW_enc = 0.0 + Δb_enc = 0.0 + ΔW_dec = 0.0 + Δb_dec = 0.0 + + @show l(X) + for i in 1:max_iterations + println("iteration $i") + batch_idx = rand(1:T, batch_size) + # batch_idx = 1:T + X_batch = X[:,batch_idx] + Z_batch = encode(ae, X_batch) + ∇l = f(X_batch) - X_batch + Δ_dec = 1 / T * ∇l .* f_dec(ae.W_dec * Z_batch .+ ae.b_dec) + Δ_enc = ae.W_dec' * Δ_dec .* f_enc(ae.W_enc * X_batch .+ ae.b_enc) + ∂W_enc = Δ_enc * X_batch' + ∂b_enc = vec(sum(Δ_enc, 2)) # Δ_enc * ones(D) + ∂W_dec = Δ_dec * Z_batch' + ∂b_dec = vec(sum(Δ_dec, 2)) # Δ_dec * ones(ae.N) + + ΔW_enc = momentum * ΔW_enc + learning_rate * ∂W_enc + regularization * ae.W_enc + Δb_enc = momentum * Δb_enc + learning_rate * ∂b_enc + ΔW_dec = momentum * ΔW_dec + learning_rate * ∂W_dec + regularization * ae.W_dec + Δb_dec = momentum * Δb_dec + learning_rate * ∂b_dec + + ae.W_enc -= ΔW_enc + ae.b_enc -= Δb_enc + ae.W_dec -= ΔW_dec + ae.b_dec -= Δb_dec + @show l(X) + end + +end + +function test() + X, y = MNIST.traindata() + X = convert(Array{Float32}, X) + X = flatten(X) + # X, y = subset(X, y, [5, 2]) + X, y = subset(X, y, 5000) + encoder = AutoEncoder{Float32}(100) + train!(encoder, X) + + # feats = reshape(encoder.W_enc', 28, 28, 100) + # heatmap(flipdim(reshape(permutedims(reshape(feats, 28, 28, 10, 10), (2, 3, 1, 4)), 280, 280), 1)) + encoder +end + +encoder = test() diff --git a/hw13_optimization/carl - 06 - optimization/see 03 - boltzmann.txt b/hw13_optimization/carl - 06 - optimization/see 03 - boltzmann.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hw2_autoencoders/carl - 01 - autoencoders/ae_encoder_mnist_visualize.jl b/hw2_autoencoders/carl - 01 - autoencoders/ae_encoder_mnist_visualize.jl new file mode 100644 index 0000000000000000000000000000000000000000..b44f02ed16f20447d2ebed78d8e6316e172480f6 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/ae_encoder_mnist_visualize.jl @@ -0,0 +1,34 @@ +using MXNet +using Plots +plotlyjs() + +#-------------------------------------------------------------------------------- +# extract parameters and visualize trained model + +# helper function for wisualizing weights +include("visualize_mnist.jl") + +# data provider +include("data_provider_mnist.jl") +train_provider, eval_provider = get_mnist_providers(data_name=:data, label_name=:output_label, subset=[0, 1, 7]) + +# load pre-trained model +prefix = "ae_encoder_mnist" +epochs = 1000 +model = cd("$(dirname(@__FILE__))/output/models") do + mx.load_checkpoint(prefix, epochs, MXNet.mx.FeedForward) +end + +# predict model +eval_labels = vec(convert(Array{Int64}, eval_provider.label_arrays[1])) +predictions = mx.predict(model, eval_provider) +W = copy(model.arg_params[:enc_0_1_weight]) + +# plot +cd("$(dirname(@__FILE__))/output/plots") do + p1 = scatter(predictions[1,:], predictions[2,:], group=eval_labels) + savefig(p1, "$(prefix)_prediction_scatter.pdf") + + p2 = mnist_visualize(W) + savefig(p2, "$(prefix)_weights.pdf") +end diff --git a/hw2_autoencoders/carl - 01 - autoencoders/ae_full_mnist_visualize.jl b/hw2_autoencoders/carl - 01 - autoencoders/ae_full_mnist_visualize.jl new file mode 100644 index 0000000000000000000000000000000000000000..80c62f8c08a2d6ece427f8c1e56759bcaa86dcb0 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/ae_full_mnist_visualize.jl @@ -0,0 +1,33 @@ +using MXNet +using Plots +plotlyjs() + +#-------------------------------------------------------------------------------- +# extract parameters and visualize trained model + +# helper function for wisualizing weights +include("visualize_mnist.jl") + +# data provider +include("data_provider_mnist.jl") +train_provider, eval_provider = get_mnist_providers(data_name=:data, label_name=:output_label, subset=[0, 1, 7]) + +# load pre-trained model +prefix = "ae_full_mnist" +epochs = 1000 +model = cd("$(dirname(@__FILE__))/output/models") do + mx.load_checkpoint(prefix, epochs, MXNet.mx.FeedForward) +end + +# predict model +eval_data = eval_provider.data_arrays[1] +predictions = mx.predict(model, eval_provider) + +# plot +cd("$(dirname(@__FILE__))/output/plots") do + p1 = mnist_visualize(predictions[:,1:100]) + savefig(p1, "$(prefix)_predictions.pdf") + + p2 = mnist_visualize(eval_data[:,1:100]) + savefig(p2, "$(prefix)_original.pdf") +end diff --git a/hw2_autoencoders/carl - 01 - autoencoders/ae_mnist.jl b/hw2_autoencoders/carl - 01 - autoencoders/ae_mnist.jl new file mode 100644 index 0000000000000000000000000000000000000000..3b6c51b9c6e96bcdc08ccf2e6d7545fc2f6ef514 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/ae_mnist.jl @@ -0,0 +1,51 @@ +using MXNet + +#-------------------------------------------------------------------------------- +# define and train autoencoder + +# autoencoder factory functions +include("autoencoder.jl") + +# data provider +include("data_provider_mnist.jl") +batch_size = 100 +train_provider, eval_provider = get_mnist_autoencoder_providers(batch_size, data_name=:data, label_name=:output_label, subset=[0, 1, 7]) + +# build the autoencoder +layer_dims = [784, 64, 256, 64, 2] +ae, encoder = autoencoder(layer_dims, data_name=:data, label_name=:output_label) + +# setup model +model = mx.FeedForward(ae, context=mx.gpu.(0)) + +# optimization algorithm +optimizer = mx.ADAM() +# optimizer = mx.SGD(lr=0.1, momentum=0.9, weight_decay=0.00001) + +# fit parameters +mx.fit(model, optimizer, train_provider, n_epoch=1000, eval_metric=mx.MSE()) + +# create the encoder partial model +encoder_model = mx.FeedForward(encoder) +encoder_model.arg_params = filter((k,v) -> !startswith(string(k), "dec"), model.arg_params) +encoder_model.aux_params = model.aux_params + +ae_prefix = "ae_full_mnist" +encoder_prefix = "ae_encoder_mnist" + +# save models +cd("$(dirname(@__FILE__))/output/models") do + mx.save_checkpoint(model, ae_prefix, optimizer.state) + mx.save_checkpoint(encoder_model, encoder_prefix, optimizer.state) +end + +# save visualizations of the network +cd("$(dirname(@__FILE__))/output/plots") do + write("$ae_prefix.dot", mx.to_graphviz(ae)) + run(`dot -Tpdf $ae_prefix.dot -o $ae_prefix.pdf`) + run(`rm $ae_prefix.dot`) + + write("$encoder_prefix.dot", mx.to_graphviz(encoder)) + run(`dot -Tpdf $encoder_prefix.dot -o $encoder_prefix.pdf`) + run(`rm $encoder_prefix.dot`) +end diff --git a/hw2_autoencoders/carl - 01 - autoencoders/autoencoder.jl b/hw2_autoencoders/carl - 01 - autoencoders/autoencoder.jl new file mode 100644 index 0000000000000000000000000000000000000000..e00e5c3649fc222bd0caaaffa61e67916e8c1f03 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/autoencoder.jl @@ -0,0 +1,41 @@ +# autoencoder factory functions + +# factory for single layer +function build_layer(input, num_hidden, weight, name, act) + fc = mx.FullyConnected(input, weight, num_hidden=num_hidden, name="$(name)_fc") + if act != :none + fc = mx.Activation(fc, act_type=act, name="$(name)_act") + end + # fc = mx.BatchNorm(data=fc) + return fc +end + +# recursive factory for autoencoder +function build_autoencoder(input, layer_dims; inner_activation="relu", outer_activation="sigmoid", depth=0) + n = length(layer_dims) + if n < 2 + return input, input + end + if n == 2 + inner_activation = :none + end + name = "$(depth)_$(depth+1)" + W = mx.Variable("enc_$(name)_weight") + Wt = W' + mx.set_attr(Wt, :name, "enc_$(name)_weight") + encoder_input = build_layer(input, layer_dims[2], W, "enc_$(depth)_$(depth+1)", inner_activation) + decoder_input, encoder = build_autoencoder(encoder_input, layer_dims[2:end]; + inner_activation=inner_activation, + outer_activation=inner_activation, + depth=depth+1) + decoder = build_layer(decoder_input, layer_dims[1], Wt, "dec_$(depth+1)_$(depth)", outer_activation) + return decoder, encoder +end + +function autoencoder(layer_dims; data_name="data", label_name="output_label") + data = mx.Variable(data_name) + label = mx.Variable(label_name) + autoencoder, encoder = build_autoencoder(data, layer_dims) + cost = mx.LinearRegressionOutput(autoencoder, label, name="cost") + return cost, encoder +end diff --git a/hw2_autoencoders/carl - 01 - autoencoders/data_provider_mnist.jl b/hw2_autoencoders/carl - 01 - autoencoders/data_provider_mnist.jl new file mode 100644 index 0000000000000000000000000000000000000000..f005f32551ad4507cea064eee204b42621de33b3 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/data_provider_mnist.jl @@ -0,0 +1,37 @@ +using MNIST + +function get_mnist_autoencoder_providers(batch_size::Int; data_name=:data, label_name=:softmax_label, subset=0:9) + train_data, train_label = MNIST.traindata() + idx = BitArray(indexin(train_label, subset)) + train_data = train_data[:,idx] + # train_label = train_data + scale!(train_data, 1/255) + train_provider = mx.ArrayDataProvider(data_name=>train_data, label_name=>train_data, batch_size=batch_size, shuffle=true) + + eval_data, eval_label = MNIST.testdata() + idx = BitArray(indexin(eval_label, subset)) + eval_data = eval_data[:,idx] + # eval_label = eval_data + scale!(eval_data, 1/255) + eval_provider = mx.ArrayDataProvider(data_name=>eval_data, label_name=>eval_data, batch_size=batch_size, shuffle=false) + + return train_provider, eval_provider +end + +function get_mnist_providers(batch_size::Int=0; data_name=:data, label_name=:softmax_label, subset=0:9) + train_data, train_label = MNIST.traindata() + idx = BitArray(indexin(train_label, subset)) + train_data = train_data[:,idx] + train_label = train_label[idx] + scale!(train_data, 1/255) + train_provider = mx.ArrayDataProvider(data_name=>train_data, label_name=>train_label, batch_size=batch_size, shuffle=true) + + eval_data, eval_label = MNIST.testdata() + idx = BitArray(indexin(eval_label, subset)) + eval_data = eval_data[:,idx] + eval_label = eval_label[idx] + scale!(eval_data, 1/255) + eval_provider = mx.ArrayDataProvider(data_name=>eval_data, label_name=>eval_label, batch_size=batch_size, shuffle=false) + + return train_provider, eval_provider +end diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_encoder_mnist-1000.params b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_encoder_mnist-1000.params new file mode 100644 index 0000000000000000000000000000000000000000..d7240a33a2ca7f1de78b54d2266b71cecee76bdb Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_encoder_mnist-1000.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_encoder_mnist-symbol.json b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_encoder_mnist-symbol.json new file mode 100644 index 0000000000000000000000000000000000000000..5acb71dcc9dff1cf364407632faf28322601ecca --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_encoder_mnist-symbol.json @@ -0,0 +1,117 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_0_1_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_0_1_act", + "attr": {"act_type": "relu"}, + "inputs": [[3, 0, 0]] + }, + { + "op": "null", + "name": "enc_1_2_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_1_2_fc_bias", + "attr": {"num_hidden": "256"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_1_2_fc", + "attr": {"num_hidden": "256"}, + "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_1_2_act", + "attr": {"act_type": "relu"}, + "inputs": [[7, 0, 0]] + }, + { + "op": "null", + "name": "enc_2_3_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_2_3_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_2_3_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_2_3_act", + "attr": {"act_type": "relu"}, + "inputs": [[11, 0, 0]] + }, + { + "op": "null", + "name": "enc_3_4_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_3_4_fc_bias", + "attr": {"num_hidden": "2"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_3_4_fc", + "attr": {"num_hidden": "2"}, + "inputs": [[12, 0, 0], [13, 0, 0], [14, 0, 0]] + } + ], + "arg_nodes": [0, 1, 2, 5, 6, 9, 10, 13, 14], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16 + ], + "heads": [[15, 0, 0]], + "attrs": {"mxnet_version": ["int", 903]} +} \ No newline at end of file diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_full_mnist-1000.params b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_full_mnist-1000.params new file mode 100644 index 0000000000000000000000000000000000000000..b08d308e1f29be27fb637595632201e3cb14006a Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_full_mnist-1000.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_full_mnist-symbol.json b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_full_mnist-symbol.json new file mode 100644 index 0000000000000000000000000000000000000000..9a1337e19c8db65a1b77ff60266a4771ca2265f8 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/output/models/ae_full_mnist-symbol.json @@ -0,0 +1,252 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_0_1_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_0_1_act", + "attr": {"act_type": "relu"}, + "inputs": [[3, 0, 0]] + }, + { + "op": "null", + "name": "enc_1_2_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_1_2_fc_bias", + "attr": {"num_hidden": "256"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_1_2_fc", + "attr": {"num_hidden": "256"}, + "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_1_2_act", + "attr": {"act_type": "relu"}, + "inputs": [[7, 0, 0]] + }, + { + "op": "null", + "name": "enc_2_3_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_2_3_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_2_3_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_2_3_act", + "attr": {"act_type": "relu"}, + "inputs": [[11, 0, 0]] + }, + { + "op": "null", + "name": "enc_3_4_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_3_4_fc_bias", + "attr": {"num_hidden": "2"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_3_4_fc", + "attr": {"num_hidden": "2"}, + "inputs": [[12, 0, 0], [13, 0, 0], [14, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_3_4_weight", + "inputs": [[13, 0, 0]] + }, + { + "op": "null", + "name": "dec_4_3_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_4_3_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[15, 0, 0], [16, 0, 0], [17, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_4_3_act", + "attr": {"act_type": "relu"}, + "inputs": [[18, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_2_3_weight", + "inputs": [[9, 0, 0]] + }, + { + "op": "null", + "name": "dec_3_2_fc_bias", + "attr": {"num_hidden": "256"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_3_2_fc", + "attr": {"num_hidden": "256"}, + "inputs": [[19, 0, 0], [20, 0, 0], [21, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_3_2_act", + "attr": {"act_type": "relu"}, + "inputs": [[22, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_1_2_weight", + "inputs": [[5, 0, 0]] + }, + { + "op": "null", + "name": "dec_2_1_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_2_1_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[23, 0, 0], [24, 0, 0], [25, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_2_1_act", + "attr": {"act_type": "relu"}, + "inputs": [[26, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_0_1_weight", + "inputs": [[1, 0, 0]] + }, + { + "op": "null", + "name": "dec_1_0_fc_bias", + "attr": {"num_hidden": "784"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_1_0_fc", + "attr": {"num_hidden": "784"}, + "inputs": [[27, 0, 0], [28, 0, 0], [29, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_1_0_act", + "attr": {"act_type": "sigmoid"}, + "inputs": [[30, 0, 0]] + }, + { + "op": "null", + "name": "output_label", + "inputs": [] + }, + { + "op": "LinearRegressionOutput", + "name": "cost", + "inputs": [[31, 0, 0], [32, 0, 0]] + } + ], + "arg_nodes": [ + 0, + 1, + 2, + 5, + 6, + 9, + 10, + 13, + 14, + 17, + 21, + 25, + 29, + 32 + ], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34 + ], + "heads": [[33, 0, 0]], + "attrs": {"mxnet_version": ["int", 903]} +} \ No newline at end of file diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8cd6c25e1e5d802f61d4c62f9d69c6967ba52ef7 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist_prediction_scatter.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist_prediction_scatter.pdf new file mode 100644 index 0000000000000000000000000000000000000000..23bfa783d3dca5179a39ccb4318c9eef72d4dc61 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist_prediction_scatter.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist_weights.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist_weights.pdf new file mode 100644 index 0000000000000000000000000000000000000000..186616711174de911e532fbea76ec6c4ac5b7095 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_encoder_mnist_weights.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4b2fe21127b1c6c0c2a311b3d1c3708d6e9f13cf Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist_original.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist_original.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e065d89deb2a8af1e33358b73142b9b33dda6a38 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist_original.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist_predictions.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist_predictions.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b60d675f9b43abee7af83e8be509f75642046f5b Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output/plots/ae_full_mnist_predictions.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-0100.params b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-0100.params new file mode 100644 index 0000000000000000000000000000000000000000..cad5729959cd2a169737d6d0e1b738d10d6f3c43 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-0100.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-1000.params b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-1000.params new file mode 100644 index 0000000000000000000000000000000000000000..2f1b067007249d39ef60ce58a07219cec25f7079 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-1000.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-symbol.json b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-symbol.json new file mode 100644 index 0000000000000000000000000000000000000000..be07b7d5f2f1b7544bb7244acc56eac7af2e6cec --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_encoder_mnist-symbol.json @@ -0,0 +1,90 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_fc_bias", + "attr": {"num_hidden": "256"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_0_1_fc", + "attr": {"num_hidden": "256"}, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_0_1_act", + "attr": {"act_type": "relu"}, + "inputs": [[3, 0, 0]] + }, + { + "op": "null", + "name": "enc_1_2_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_1_2_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_1_2_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_1_2_act", + "attr": {"act_type": "relu"}, + "inputs": [[7, 0, 0]] + }, + { + "op": "null", + "name": "enc_2_3_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_2_3_fc_bias", + "attr": {"num_hidden": "2"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_2_3_fc", + "attr": {"num_hidden": "2"}, + "inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]] + } + ], + "arg_nodes": [0, 1, 2, 5, 6, 9, 10], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12 + ], + "heads": [[11, 0, 0]], + "attrs": {"mxnet_version": ["int", 903]} +} \ No newline at end of file diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-0100.params b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-0100.params new file mode 100644 index 0000000000000000000000000000000000000000..5f2805f79c02ce9097e477092dc4e6beda2830bb Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-0100.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-1000.params b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-1000.params new file mode 100644 index 0000000000000000000000000000000000000000..5f81d0b0f73b89b55219128ddecfbb12e4984a16 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-1000.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-symbol.json b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-symbol.json new file mode 100644 index 0000000000000000000000000000000000000000..04eeaa65fa4e3770f28b830c2d7498b1fdc951e7 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/ae_full_mnist-symbol.json @@ -0,0 +1,195 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_0_1_fc_bias", + "attr": {"num_hidden": "256"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_0_1_fc", + "attr": {"num_hidden": "256"}, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_0_1_act", + "attr": {"act_type": "relu"}, + "inputs": [[3, 0, 0]] + }, + { + "op": "null", + "name": "enc_1_2_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_1_2_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_1_2_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] + }, + { + "op": "Activation", + "name": "enc_1_2_act", + "attr": {"act_type": "relu"}, + "inputs": [[7, 0, 0]] + }, + { + "op": "null", + "name": "enc_2_3_weight", + "inputs": [] + }, + { + "op": "null", + "name": "enc_2_3_fc_bias", + "attr": {"num_hidden": "2"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "enc_2_3_fc", + "attr": {"num_hidden": "2"}, + "inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_2_3_weight", + "inputs": [[9, 0, 0]] + }, + { + "op": "null", + "name": "dec_3_2_fc_bias", + "attr": {"num_hidden": "64"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_3_2_fc", + "attr": {"num_hidden": "64"}, + "inputs": [[11, 0, 0], [12, 0, 0], [13, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_3_2_act", + "attr": {"act_type": "relu"}, + "inputs": [[14, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_1_2_weight", + "inputs": [[5, 0, 0]] + }, + { + "op": "null", + "name": "dec_2_1_fc_bias", + "attr": {"num_hidden": "256"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_2_1_fc", + "attr": {"num_hidden": "256"}, + "inputs": [[15, 0, 0], [16, 0, 0], [17, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_2_1_act", + "attr": {"act_type": "relu"}, + "inputs": [[18, 0, 0]] + }, + { + "op": "transpose", + "name": "enc_0_1_weight", + "inputs": [[1, 0, 0]] + }, + { + "op": "null", + "name": "dec_1_0_fc_bias", + "attr": {"num_hidden": "784"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "dec_1_0_fc", + "attr": {"num_hidden": "784"}, + "inputs": [[19, 0, 0], [20, 0, 0], [21, 0, 0]] + }, + { + "op": "Activation", + "name": "dec_1_0_act", + "attr": {"act_type": "sigmoid"}, + "inputs": [[22, 0, 0]] + }, + { + "op": "null", + "name": "output_label", + "inputs": [] + }, + { + "op": "LinearRegressionOutput", + "name": "cost", + "inputs": [[23, 0, 0], [24, 0, 0]] + } + ], + "arg_nodes": [ + 0, + 1, + 2, + 5, + 6, + 9, + 10, + 13, + 17, + 21, + 24 + ], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26 + ], + "heads": [[25, 0, 0]], + "attrs": {"mxnet_version": ["int", 903]} +} \ No newline at end of file diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/simple_ae_mnist-0100.params b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/simple_ae_mnist-0100.params new file mode 100644 index 0000000000000000000000000000000000000000..3e23ef36bd241052f70ddb32dd3d63f87fc5914b Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/simple_ae_mnist-0100.params differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/simple_ae_mnist-symbol.json b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/simple_ae_mnist-symbol.json new file mode 100644 index 0000000000000000000000000000000000000000..a88001aaa922830bc55b919a2fa7f55ef1fec52e --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/output_old/models/simple_ae_mnist-symbol.json @@ -0,0 +1,204 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected0_weight", + "attr": {"num_hidden": "100"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected0_bias", + "attr": {"num_hidden": "100"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected0", + "attr": {"num_hidden": "100"}, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] + }, + { + "op": "Activation", + "name": "activation0", + "attr": {"act_type": "relu"}, + "inputs": [[3, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected1_weight", + "attr": {"num_hidden": "50"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected1_bias", + "attr": {"num_hidden": "50"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected1", + "attr": {"num_hidden": "50"}, + "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]] + }, + { + "op": "Activation", + "name": "activation1", + "attr": {"act_type": "relu"}, + "inputs": [[7, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected2_weight", + "attr": {"num_hidden": "10"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected2_bias", + "attr": {"num_hidden": "10"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected2", + "attr": {"num_hidden": "10"}, + "inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected3_weight", + "attr": {"num_hidden": "50"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected3_bias", + "attr": {"num_hidden": "50"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected3", + "attr": {"num_hidden": "50"}, + "inputs": [[11, 0, 0], [12, 0, 0], [13, 0, 0]] + }, + { + "op": "Activation", + "name": "activation2", + "attr": {"act_type": "relu"}, + "inputs": [[14, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected4_weight", + "attr": {"num_hidden": "100"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected4_bias", + "attr": {"num_hidden": "100"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected4", + "attr": {"num_hidden": "100"}, + "inputs": [[15, 0, 0], [16, 0, 0], [17, 0, 0]] + }, + { + "op": "Activation", + "name": "activation3", + "attr": {"act_type": "relu"}, + "inputs": [[18, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected5_weight", + "attr": {"num_hidden": "784"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected5_bias", + "attr": {"num_hidden": "784"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected5", + "attr": {"num_hidden": "784"}, + "inputs": [[19, 0, 0], [20, 0, 0], [21, 0, 0]] + }, + { + "op": "Activation", + "name": "activation4", + "attr": {"act_type": "sigmoid"}, + "inputs": [[22, 0, 0]] + }, + { + "op": "null", + "name": "output_label", + "inputs": [] + }, + { + "op": "LinearRegressionOutput", + "name": "linearregressionoutput0", + "inputs": [[23, 0, 0], [24, 0, 0]] + } + ], + "arg_nodes": [ + 0, + 1, + 2, + 5, + 6, + 9, + 10, + 12, + 13, + 16, + 17, + 20, + 21, + 24 + ], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26 + ], + "heads": [[25, 0, 0]], + "attrs": {"mxnet_version": ["int", 903]} +} \ No newline at end of file diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b91cee15b1e05e153c8bb2010b27f3310e62936b Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist_prediction_scatter.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist_prediction_scatter.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cab253e155ba90315d7d70feb3d1e1798f57005d Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist_prediction_scatter.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist_weights.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist_weights.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3432c791e629b8ee6d89aab9ad9520679e6e46dc Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_encoder_mnist_weights.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bad12141f89789462dca2b44441640440c10bcdb Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist_original.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist_original.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e065d89deb2a8af1e33358b73142b9b33dda6a38 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist_original.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist_predictions.pdf b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist_predictions.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d756ae5e82297bb96b29b8d65c9974097319fd20 Binary files /dev/null and b/hw2_autoencoders/carl - 01 - autoencoders/output_old/plots/ae_full_mnist_predictions.pdf differ diff --git a/hw2_autoencoders/carl - 01 - autoencoders/run.sh b/hw2_autoencoders/carl - 01 - autoencoders/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..8547edcc4b77eb15634570e939472a23fdb67b6e --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/bash + +current_dir=$(dirname "$0") +julia "$current_dir/ae_mnist.jl" +julia "$current_dir/ae_encoder_mnist_visualize.jl" +julia "$current_dir/ae_full_mnist_visualize.jl" diff --git a/hw2_autoencoders/carl - 01 - autoencoders/simple_ae_mnist.jl b/hw2_autoencoders/carl - 01 - autoencoders/simple_ae_mnist.jl new file mode 100644 index 0000000000000000000000000000000000000000..4e2c33eb2ca05695991fb22cb83af9cb2c442e1b --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/simple_ae_mnist.jl @@ -0,0 +1,49 @@ +using MXNet + +# data provider +include("data_provider_mnist.jl") +batch_size = 10000 +train_provider, eval_provider = get_mnist_autoencoder_providers(batch_size, data_name=:data, label_name=:output_label) + +# build the autoencoder +# ae = @mx.chain mx.Variable(:data) => +# mx.FullyConnected(num_hidden=100) => mx.Activation(act_type=:relu) => +# mx.FullyConnected(num_hidden=50) => mx.Activation(act_type=:relu) => +# mx.FullyConnected(num_hidden=10) => +# mx.FullyConnected(num_hidden=50) => mx.Activation(act_type=:relu) => +# mx.FullyConnected(num_hidden=100) => mx.Activation(act_type=:relu) => +# mx.FullyConnected(num_hidden=784) => mx.Activation(act_type=:sigmoid) => +# mx.LinearRegressionOutput(name=:output) + +data = mx.Variable(:data) + +encoder = @mx.chain mx.FullyConnected(data, num_hidden=100, name="enc_0_1_fc") => + mx.Activation(act_type=:relu) => + mx.FullyConnected(num_hidden=50, name="enc_1_2_fc") => + mx.Activation(act_type=:relu) => + mx.FullyConnected(num_hidden=10, name="enc_2_3_fc") + +decoder = @mx.chain mx.FullyConnected(encoder, num_hidden=50, name="dec_3_2_fc") => + mx.Activation(act_type=:relu) => + mx.FullyConnected(num_hidden=100, name="dec_2_1_fc") => + mx.Activation(act_type=:relu) => + mx.FullyConnected(num_hidden=784, name="dec_1_0_fc") => + mx.Activation(act_type=:sigmoid) + +label = mx.Variable(:output_label) + +ae = mx.LinearRegressionOutput(decoder, label) + +# setup model +model = mx.FeedForward(ae, context=mx.gpu.(0)) + +# optimization algorithm +optimizer = mx.ADAM() + +# fit parameters +mx.fit(model, optimizer, train_provider, n_epoch=100, eval_metric=mx.MSE(), eval_data=eval_provider) + +# save model +cd("$(dirname(@__FILE__))/output/models") do + mx.save_checkpoint(model, "simple_ae_mnist", optimizer.state) +end diff --git a/hw2_autoencoders/carl - 01 - autoencoders/visualize_mnist.jl b/hw2_autoencoders/carl - 01 - autoencoders/visualize_mnist.jl new file mode 100644 index 0000000000000000000000000000000000000000..a20d8d39e1bb14dee1bc0b85b4fbfd02a59a5d00 --- /dev/null +++ b/hw2_autoencoders/carl - 01 - autoencoders/visualize_mnist.jl @@ -0,0 +1,10 @@ +function mnist_visualize(X) + N = last(size(X)) + columns = ceil(Int64, sqrt(N)) + rows = ceil(Int64, N / columns) + Z = fill(NaN, 28, 28, rows, columns) + copy!(Z, X) + Z = permutedims(Z, (2, 3, 1, 4)) + Z = reshape(Z, 28 * rows, 28 * columns) + heatmap(Z', yflip=true, color=:grays, aspect_ratio=1, legend=false) +end diff --git a/hw3_convolutional_networks/carl - 02 - convolutional/cnn_mnist.jl b/hw3_convolutional_networks/carl - 02 - convolutional/cnn_mnist.jl new file mode 100644 index 0000000000000000000000000000000000000000..a8005263d4d49b2b8fb8e694afcabb7de763bee7 --- /dev/null +++ b/hw3_convolutional_networks/carl - 02 - convolutional/cnn_mnist.jl @@ -0,0 +1,54 @@ +using MXNet + +#-------------------------------------------------------------------------------- +# define model + +# input +data = mx.Variable(:data) + +# first conv +conv1 = @mx.chain mx.Convolution(data, kernel=(5,5), num_filter=20) => + mx.Activation(act_type=:tanh) => + mx.Pooling(pool_type=:max, kernel=(2,2), stride=(2,2)) + +# second conv +conv2 = @mx.chain mx.Convolution(conv1, kernel=(5,5), num_filter=50) => + mx.Activation(act_type=:tanh) => + mx.Pooling(pool_type=:max, kernel=(2,2), stride=(2,2)) + +# first fully-connected +fc1 = @mx.chain mx.Flatten(conv2) => + mx.FullyConnected(num_hidden=500) => + mx.Activation(act_type=:tanh) + +# second fully-connected +fc2 = mx.FullyConnected(fc1, num_hidden=10) + +# softmax loss +network = mx.SoftmaxOutput(fc2, name=:softmax) + + +#-------------------------------------------------------------------------------- +# load data +batch_size = 10000 +include("data_provider_mnist.jl") +train_provider, eval_provider = get_mnist_providers(batch_size; flat=false) + +#-------------------------------------------------------------------------------- +# fit model +# model = mx.FeedForward(network, context=mx.cpu()) +# model = mx.FeedForward(network, context=MXNet.mx.gpu(0)) +# model = mx.FeedForward(network, context=MXNet.mx.gpu.(0:1)) +model = mx.FeedForward(network, context=MXNet.mx.gpu.(0:2)) + +# optimizer +# optimizer = mx.SGD(lr=0.05, momentum=0.9, weight_decay=0.00001) +optimizer = mx.ADAM() + +# fit parameters +mx.fit(model, optimizer, train_provider, n_epoch=1000, eval_data=eval_provider) + +# save model +cd("$(dirname(@__FILE__))/output") do + mx.save_checkpoint(model, "cnn_mnist", optimizer.state) +end diff --git a/hw3_convolutional_networks/carl - 02 - convolutional/data_provider_mnist.jl b/hw3_convolutional_networks/carl - 02 - convolutional/data_provider_mnist.jl new file mode 100644 index 0000000000000000000000000000000000000000..3d6d092bdfbba382873ba5b5d70bc70f160cd725 --- /dev/null +++ b/hw3_convolutional_networks/carl - 02 - convolutional/data_provider_mnist.jl @@ -0,0 +1,16 @@ + +function get_mnist_providers(batch_size::Int; data_name=:data, label_name=:softmax_label, flat=true) + # download MNIST into Pkg.dir("MXNet")/data/mnist if not exist + filenames = mx.get_mnist_ubyte() + + # data provider + train_provider = mx.MNISTProvider(image=filenames[:train_data], + label=filenames[:train_label], + data_name=data_name, label_name=label_name, + batch_size=batch_size, shuffle=true, flat=flat, silent=true) + eval_provider = mx.MNISTProvider(image=filenames[:test_data], + label=filenames[:test_label], + data_name=data_name, label_name=label_name, + batch_size=batch_size, shuffle=false, flat=flat, silent=true) + return train_provider, eval_provider +end diff --git a/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-0020.params b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-0020.params new file mode 100644 index 0000000000000000000000000000000000000000..df78c792e7fbbfc4b37bc8e1d0efa915a1f4a2b2 Binary files /dev/null and b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-0020.params differ diff --git a/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-0200.params b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-0200.params new file mode 100644 index 0000000000000000000000000000000000000000..f8ae7f253ac7fa52f2013e95470a3b8284fd3d68 Binary files /dev/null and b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-0200.params differ diff --git a/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-1000.params b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-1000.params new file mode 100644 index 0000000000000000000000000000000000000000..cfe37e6bd7b3a264eb93980d99736721cc0bf2f1 Binary files /dev/null and b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-1000.params differ diff --git a/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-symbol.json b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-symbol.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0c1f7a9e8fca135f85f62f40ce9a56832d44cf --- /dev/null +++ b/hw3_convolutional_networks/carl - 02 - convolutional/output/cnn_mnist-symbol.json @@ -0,0 +1,179 @@ +{ + "nodes": [ + { + "op": "null", + "name": "data", + "inputs": [] + }, + { + "op": "null", + "name": "convolution18_weight", + "attr": { + "kernel": "(5,5)", + "num_filter": "20" + }, + "inputs": [] + }, + { + "op": "null", + "name": "convolution18_bias", + "attr": { + "kernel": "(5,5)", + "num_filter": "20" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "convolution18", + "attr": { + "kernel": "(5,5)", + "num_filter": "20" + }, + "inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]] + }, + { + "op": "Activation", + "name": "activation27", + "attr": {"act_type": "tanh"}, + "inputs": [[3, 0, 0]] + }, + { + "op": "Pooling", + "name": "pooling18", + "attr": { + "kernel": "(2,2)", + "pool_type": "max", + "stride": "(2,2)" + }, + "inputs": [[4, 0, 0]] + }, + { + "op": "null", + "name": "convolution19_weight", + "attr": { + "kernel": "(5,5)", + "num_filter": "50" + }, + "inputs": [] + }, + { + "op": "null", + "name": "convolution19_bias", + "attr": { + "kernel": "(5,5)", + "num_filter": "50" + }, + "inputs": [] + }, + { + "op": "Convolution", + "name": "convolution19", + "attr": { + "kernel": "(5,5)", + "num_filter": "50" + }, + "inputs": [[5, 0, 0], [6, 0, 0], [7, 0, 0]] + }, + { + "op": "Activation", + "name": "activation28", + "attr": {"act_type": "tanh"}, + "inputs": [[8, 0, 0]] + }, + { + "op": "Pooling", + "name": "pooling19", + "attr": { + "kernel": "(2,2)", + "pool_type": "max", + "stride": "(2,2)" + }, + "inputs": [[9, 0, 0]] + }, + { + "op": "Flatten", + "name": "flatten9", + "inputs": [[10, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected18_weight", + "attr": {"num_hidden": "500"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected18_bias", + "attr": {"num_hidden": "500"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected18", + "attr": {"num_hidden": "500"}, + "inputs": [[11, 0, 0], [12, 0, 0], [13, 0, 0]] + }, + { + "op": "Activation", + "name": "activation29", + "attr": {"act_type": "tanh"}, + "inputs": [[14, 0, 0]] + }, + { + "op": "null", + "name": "fullyconnected19_weight", + "attr": {"num_hidden": "10"}, + "inputs": [] + }, + { + "op": "null", + "name": "fullyconnected19_bias", + "attr": {"num_hidden": "10"}, + "inputs": [] + }, + { + "op": "FullyConnected", + "name": "fullyconnected19", + "attr": {"num_hidden": "10"}, + "inputs": [[15, 0, 0], [16, 0, 0], [17, 0, 0]] + }, + { + "op": "null", + "name": "softmax_label", + "inputs": [] + }, + { + "op": "SoftmaxOutput", + "name": "softmax", + "inputs": [[18, 0, 0], [19, 0, 0]] + } + ], + "arg_nodes": [0, 1, 2, 6, 7, 12, 13, 16, 17, 19], + "node_row_ptr": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21 + ], + "heads": [[20, 0, 0]], + "attrs": {"mxnet_version": ["int", 903]} +} \ No newline at end of file diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/dream.gif b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/dream.gif new file mode 100644 index 0000000000000000000000000000000000000000..46caaf123e922b9ad1d5bb11195f926caf0ffea4 Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/dream.gif differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/origin.png b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/origin.png new file mode 100644 index 0000000000000000000000000000000000000000..c7dcc7927ac398fb13dc123d243c8064e7aee35d Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/origin.png differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/prob.png b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/prob.png new file mode 100644 index 0000000000000000000000000000000000000000..9638b110838fecbccb78a20643b06b93acc8560e Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/prob.png differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/rbm.gif b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/rbm.gif new file mode 100644 index 0000000000000000000000000000000000000000..6a57189b40212a3398087a5e836337784496e475 Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/rbm.gif differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/rbm2.gif b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/rbm2.gif new file mode 100644 index 0000000000000000000000000000000000000000..b29d9a8d6b49573051ed46b43a2fe5c5902cf328 Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/rbm2.gif differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/shortdream.gif b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/shortdream.gif new file mode 100644 index 0000000000000000000000000000000000000000..7c4f7b048a8048dd3ae5a3a04fb2304182a7d7b2 Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/output/shortdream.gif differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm.jl new file mode 100644 index 0000000000000000000000000000000000000000..b9f7bfb499d9ad80c4cadf45d78ce6c396a6392a --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm.jl @@ -0,0 +1,110 @@ +using Distributions +using MLDatasets +using Plots + +function visualize(X) + N = last(size(X)) + columns = ceil(Int64, sqrt(N)) + rows = ceil(Int64, N / columns) + Z = fill(NaN, 28, 28, rows, columns) + copy!(Z, X) + Z = permutedims(Z, (2, 3, 1, 4)) + Z = reshape(Z, 28 * rows, 28 * columns) + heatmap(Z, yflip=true, color=:grays, aspect_ratio=1, legend=false) +end + +σ(x) = 1 / (1 + exp(-x)) + +function test() + X, y = MNIST.traindata() + X = reshape(X, 784, 60000) + X = X .>= 0.35 + X = convert(Array{Float32}, X) + + T = size(X, 2) + + # learning rate + ε = Float32(0.1) + batch_size = 20 + num_epochs = 15 + + + # dimensions of visible and hidden layer + N = 784 + M = 100 + + # initialize parameters + W = zeros(Float32, M, N) + hbias = zeros(Float32, M) + vbias = zeros(Float32, N) + + # pre-allocation + v = Array(Float32, N) + h = Array(Float32, M) + vhat = Array(Float32, N) + hhat = Array(Float32, M) + zN = Array(Float32, N) # used as temp for rand numbers + zM = Array(Float32, M) # used as temp for rand numbers + ΔW = zeros(Float32, M, N) + Δhbias = zeros(Float32, M) + Δvbias = zeros(Float32, N) + + I = DiscreteUniform(1, T) + + # iterate training algorithm https://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm + anim = @animate for epoch in 1:num_epochs + fill!(ΔW, 0) + fill!(Δhbias, 0) + fill!(Δvbias, 0) + println("epoch $i") + for batch in 1:num_batches + idx = rand(I) + # idx = j + v = @view X[:,idx] + + Base.LinAlg.copy!(h, hbias) + Base.LinAlg.BLAS.gemv!('N', one(Float32), W, v, one(Float32), h) + asd!(h, zM) + + copy!(vhat, v) + copy!(hhat, h) + + for k in 1:1 + Base.LinAlg.copy!(vhat, vbias) + Base.LinAlg.BLAS.gemv!('T', one(Float32), W, hhat, one(Float32), vhat) + asd!(vhat, zN) + + Base.LinAlg.copy!(hhat, hbias) + Base.LinAlg.BLAS.gemv!('N', one(Float32), W, vhat, one(Float32), hhat) + asd!(hhat, zM) + end + + Base.LinAlg.BLAS.ger!(ε, h, v, ΔW) # ΔW += ε * h * v' + Base.LinAlg.BLAS.ger!(-ε, hhat, vhat, ΔW) # ΔW -= ε * hhat * vhat' + + Base.LinAlg.axpy!(ε, h, Δhbias) + Base.LinAlg.axpy!(-ε, hhat, Δhbias) + Base.LinAlg.axpy!(ε, v, Δvbias) + Base.LinAlg.axpy!(-ε, vhat, Δvbias) + end + W += ΔW / batch_size + hbias += Δhbias / batch_size + vbias += Δvbias / batch_size + + visualize(W') + end + + gif(anim, "rbm.gif", fps=5) + + W, hbias, vbias +end + +function asd!(p, z) + rand!(z) + for i in eachindex(p) + @inbounds p[i] = z[i] < σ(p[i]) + end +end + +W, hbias, vbias = test() +# visualize(W') diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm1.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm1.jl new file mode 100644 index 0000000000000000000000000000000000000000..b873dd348ebac60d99677efa85f6de0dd8029c85 --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm1.jl @@ -0,0 +1,106 @@ +using Distributions +using MLDatasets +using Plots + +function visualize(X) + N = last(size(X)) + columns = ceil(Int64, sqrt(N)) + rows = ceil(Int64, N / columns) + Z = fill(NaN, 28, 28, rows, columns) + copy!(Z, X) + Z = permutedims(Z, (2, 3, 1, 4)) + Z = reshape(Z, 28 * rows, 28 * columns) + heatmap(Z, yflip=true, color=:grays, aspect_ratio=1, legend=false) +end + +σ(x) = 1 / (1 + exp(-x)) + +function test() + X, y = MNIST.traindata() + X = reshape(X, 784, 60000) + X = X .>= 0.35 + X = convert(Array{Float32}, X) + + T = size(X, 2) + + # learning rate + ε = Float32(0.001) + + # dimensions of visible and hidden layer + N = 784 + M = 100 + + # initialize parameters + W = zeros(Float32, M, N) + hbias = zeros(Float32, M) + vbias = zeros(Float32, N) + + # pre-allocation + v = Array(Float32, N) + h = Array(Float32, M) + vhat = Array(Float32, N) + hhat = Array(Float32, M) + zN = Array(Float32, N) # used as temp for rand numbers + zM = Array(Float32, M) # used as temp for rand numbers + + I = DiscreteUniform(1, T) + + # iterate training algorithm https://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm + # anim = Animation() + @time for i in 1:300 + if mod(i, 100000) == 0 + println("iteration $i") + error = sumabs2(σ.(W' * σ.(W * X .+ hbias) .+ vbias) - X) + println("error $error") + # visualize(W') + # frame(anim) + end + + k = rand(I) + # Base.LinAlg.copy!(v, 1, X, N * (k - 1) + 1, N) + v = @view X[:,k] + + # h .= rand!(h) .< σ.(W * v + b) + # vhat .= rand!(vhat) .< σ.(W' * h + c) + # hhat .= rand!(hhat) .< σ.(W * vhat + b) + + Base.LinAlg.copy!(h, hbias) + Base.LinAlg.BLAS.gemv!('N', one(Float32), W, v, one(Float32), h) + # h .= (W * v + b) + asd!(h, zM) + Base.LinAlg.copy!(vhat, vbias) + Base.LinAlg.BLAS.gemv!('T', one(Float32), W, h, one(Float32), vhat) + # vhat .= (W' * h + c) + asd!(vhat, zN) + Base.LinAlg.copy!(hhat, hbias) + Base.LinAlg.BLAS.gemv!('N', one(Float32), W, vhat, one(Float32), hhat) + # hhat .= (W * vhat + b) + asd!(hhat, zM) + + Base.LinAlg.BLAS.ger!(ε, h, v, W) # W += ε * h * v' + Base.LinAlg.BLAS.ger!(-ε, hhat, vhat, W) # W -= ε * hhat * vhat' + + # b += ε * (h - hhat) + # c += ε * (v - vhat) + Base.LinAlg.axpy!(ε, h, hbias) + Base.LinAlg.axpy!(-ε, hhat, hbias) + Base.LinAlg.axpy!(ε, v, vbias) + Base.LinAlg.axpy!(-ε, vhat, vbias) + end + + # gif(anim, "rbm.gif", fps=15) + + W, hbias, vbias +end + + + +function asd!(p, z) + rand!(z) + for i in eachindex(p) + @inbounds p[i] = z[i] < σ(p[i]) + end +end + +W, hbias, vbias = test() +# visualize(W') diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm2.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm2.jl new file mode 100644 index 0000000000000000000000000000000000000000..fb7070b4c48cc34169e7368f0faf1e9e8497f847 --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm2.jl @@ -0,0 +1,74 @@ +using Distributions +using MLDatasets +using Plots + +function visualize(X) + N = last(size(X)) + columns = ceil(Int64, sqrt(N)) + rows = ceil(Int64, N / columns) + Z = fill(NaN, 28, 28, rows, columns) + copy!(Z, X) + Z = permutedims(Z, (2, 3, 1, 4)) + Z = reshape(Z, 28 * rows, 28 * columns) + heatmap(Z, yflip=true, color=:grays, aspect_ratio=1, legend=false) +end + +σ(x) = 1 / (1 + exp(-x)) + +function test() + X, y = MNIST.traindata() + X = reshape(X, 784, 60000) + X = X .>= 0.35 + X = convert(Array{Float32}, X) + + T = size(X, 2) + + # learning rate + ε = Float32(0.01) + + # dimensions of visible and hidden layer + N = 784 + M = 512 + + # initialize parameters + W = zeros(Float32, M, N) + b = zeros(Float32, M) + c = zeros(Float32, N) + + # pre-allocation + v = Array(Float32, N) + h = Array(Float32, M) + vhat = Array(Float32, N) + hhat = Array(Float32, M) + + I = DiscreteUniform(1, T) + + # iterate training algorithm https://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm + @time for i in 1:10000 + # anim = @animate for i in 1:100000 + mod(i, 1000) == 0 && println("iteration $i") + + k = rand(I) + Base.LinAlg.copy!(v, view(X, :, k)) + h .= rand!(h) .< σ.(W * v + b) + vhat .= rand!(vhat) .< σ.(W' * h + c) + hhat .= rand!(hhat) .< σ.(W * vhat + b) + Base.LinAlg.BLAS.ger!(ε, h, v, W) # W += ε * h * v' + Base.LinAlg.BLAS.ger!(-ε, hhat, vhat, W) # W -= ε * hhat * vhat' + # b += ε * (h - hhat) + # c += ε * (v - vhat) + Base.LinAlg.axpy!(ε, h, b) + Base.LinAlg.axpy!(-ε, hhat, b) + Base.LinAlg.axpy!(ε, v, c) + Base.LinAlg.axpy!(-ε, vhat, c) + # visualize(W') + # end every 1000 + end + + # gif(anim, "rbm.gif", fps=15) + + W, b, c +end + +W, b, c = test() +# visualize(W') diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm_10000000.gif b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm_10000000.gif new file mode 100644 index 0000000000000000000000000000000000000000..5b8d0571bbbf16416a27dc424357026b56870780 Binary files /dev/null and b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm_10000000.gif differ diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm_epochs.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm_epochs.jl new file mode 100644 index 0000000000000000000000000000000000000000..b9c4529ad01641f05dbd51ba29ec73d957918b02 --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/playground/rbm_epochs.jl @@ -0,0 +1,98 @@ +using Distributions +using MLDatasets +using Plots + +function visualize(X) + # tile the columns + N = last(size(X)) + columns = ceil(Int64, sqrt(N)) + rows = ceil(Int64, N / columns) + Z = fill(NaN, 28, 28, rows, columns) + copy!(Z, X) + Z = permutedims(Z, (2, 3, 1, 4)) + Z = reshape(Z, 28 * rows, 28 * columns) + # plot + heatmap(Z, yflip=true, color=:grays, aspect_ratio=1, legend=false) +end + +function test() + float_type = Float32 + + # read the data + X, y = MNIST.traindata() + X = reshape(X, 784, 60000) + X = X .>= 0.35 + X = convert(Array{Float32}, X) + T = size(X, 2) + + # learning parameters + learning_rate = float_type(0.01) + batch_size = 20 + num_batches = floor(Int64, T / batch_size) + num_epochs = 1000 + batches = [((i-1)*batch_size+1,i*batch_size) for i in 1:num_batches] + + # RBM parameters + N = 784 + M = 100 + W = 0.001 * randn(float_type, M, N) + hbias = zeros(float_type, M) + vbias = zeros(float_type, N) + + # pre-allocation + H = Array(float_type, M, batch_size) + Vhat = Array(float_type, N, batch_size) + Hhat = similar(H) + Hrand = similar(H) + Vrand = similar(Vhat) + + α = float_type(learning_rate / batch_size) + I = ones(float_type, batch_size) + + # run training algorithm + @time for epoch in 1:num_epochs + error = 0.0 + print("epoch $epoch: ") + for (a, b) in batches + # sample V from data + V = @view X[:,a:b] + # sample H given V + H .= hbias + Base.LinAlg.BLAS.gemm!('N', 'N', one(float_type), W, V, one(float_type), H) + rbm_sample!(H, H, Hrand) + + copy!(Hhat, H) +# for k in 1:15 + # sample Vhat given H + Vhat .= vbias + Base.LinAlg.BLAS.gemm!('T', 'N', one(float_type), W, Hhat, one(float_type), Vhat) + rbm_sample!(Vhat, Vhat, Vrand) + # sample Hhat given Vhat + Hhat .= hbias + Base.LinAlg.BLAS.gemm!('N', 'N', one(float_type), W, Vhat, one(float_type), Hhat) + rbm_sample!(Hhat, Hhat, Hrand) +# end + + error += sumabs2(V - Vhat) + + # update parameters + Base.LinAlg.BLAS.gemm!('N', 'T', α, H, V, one(float_type), W) + Base.LinAlg.BLAS.gemm!('N', 'T', -α, Hhat, Vhat, one(float_type), W) + Base.LinAlg.BLAS.gemv!('N', α, H, I, one(float_type), hbias) + Base.LinAlg.BLAS.gemv!('N', -α, Hhat, I, one(float_type), hbias) + Base.LinAlg.BLAS.gemv!('N', α, V, I, one(float_type), vbias) + Base.LinAlg.BLAS.gemv!('N', -α, Vhat, I, one(float_type), vbias) + end + println(error) + end + W, hbias, vbias +end + +function rbm_sample!(dest, src, rnd) + rand!(rnd) + for i in length(dest) + @inbounds dest[i] = rnd[i] < σ(src[i]) + end +end + +W, hbias, vbias = test() diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/rbm_naive.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/rbm_naive.jl new file mode 100644 index 0000000000000000000000000000000000000000..98c6543df334aaaecfa62e89c608c9dd5c3ed90a --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/rbm_naive.jl @@ -0,0 +1,50 @@ +using MLDatasets +using StatsFuns: logistic + +function test() + T = Float64 + + # read and pre-process data + X, y = MNIST.traindata() + X = convert(Array{T}, X) + X = reshape(X, 784, 60000) + X .= X .> 0.35 + + # define RBM model + v_size = 784 + h_size = 100 + + # define training parameters + num_epochs = 1 + batch_size = 100 + data_size = 6000 + learning_rate = 0.1 + k = 1 + + # initialize model parameters + W = 0.01 * randn(T, h_size, v_size) + vbias = zeros(T, v_size) + hbias = zeros(T, h_size) + + @time for epoch in 1:num_epochs + for batch_start in 1:batch_size:data_size + batch_end = min(batch_start + batch_size - 1, data_size) + Vpos = X[:,batch_start:batch_end] + Hpos = rand(h_size, batch_size) .<= logistic(W * Vpos .+ hbias) + Vneg = rand(v_size, batch_size) .<= logistic(W' * Hpos .+ vbias) + Hneg = rand(h_size, batch_size) .<= logistic(W * Vneg .+ hbias) + for _ in 1:k-1 + Vneg = rand(v_size, batch_size) .<= logistic(W' * Hneg .+ vbias) + Hneg = rand(h_size, batch_size) .<= logistic(W * Vneg .+ hbias) + end + η = learning_rate / length(batch_start:batch_end) + W += η * (Hpos * Vpos' - Hneg * Vneg') + vbias += η * vec(sum(Vpos - Vneg, 2)) + hbias += η * vec(sum(Hpos - Hneg, 2)) + end + end + + return W, vbias, hbias +end + +W, vbias, hbias = test() diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/rbm_optimized.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/rbm_optimized.jl new file mode 100644 index 0000000000000000000000000000000000000000..6ff623f8d0ec40b576befec43745af72434d8b3f --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/rbm_optimized.jl @@ -0,0 +1,161 @@ +using MLDatasets +using Base.LinAlg.BLAS +using StatsFuns: logistic + +include("utils.jl") + +function test() + T = Float32 + + # read and pre-process data + X, y = MNIST.traindata() + X = convert(Array{T}, X) + X = reshape(X, 784, 60000) + X .= X .> 0.35 + + # define RBM model + v_size = 784 + h_size = 500 + + # define training parameters + num_epochs = 100 + batch_size = 10 + data_size = 60000 + learning_rate = 0.1 + k = 1 + persistent = false + + # initialize model parameters + W = 0.01 * randn(T, h_size, v_size) + vbias = zeros(T, v_size) + hbias = zeros(T, h_size) + + # pre-allocation + Hpos = Array(T, h_size, batch_size) + Vneg = Array(T, v_size, batch_size) + Hneg = similar(Hpos) + Hbuffer = similar(Hneg) + Vbuffer = similar(Vneg) + η = T(learning_rate / batch_size) + I = ones(T, batch_size) + Vpersist = rand!(similar(Vneg)) + Vpersist .= Vpersist .< 0.5 + + for epoch in 1:num_epochs + # anim = @animate for epoch in 1:num_epochs + error = 0.0 + elapsed = @elapsed begin + for batch_start in 1:batch_size:data_size + batch_end = batch_start + batch_size - 1 + # sample positive + Vpos = @view X[:,batch_start:batch_end] + Hpos = logistic.(W * Vpos .+ hbias) + # sample_h_given_v!(Hpos, Vpos, W, hbias, Hbuffer) + # sample negative + if persistent + sample_h_given_v!(Hneg, Vpersist, W, hbias, Hbuffer) + else + copy!(Hneg, Hpos) + end + for _ in 1:k + sample_v_given_h!(Vneg, Hneg, W, vbias, Vbuffer) + sample_h_given_v!(Hneg, Vneg, W, hbias, Hbuffer) + end + if persistent + copy!(Vpersist, Vneg) + end + + # update parameters + gemm!('N', 'T', η, Hpos, Vpos, one(T), W) + gemm!('N', 'T', -η, Hneg, Vneg, one(T), W) + gemv!('N', η, Vpos, I, one(T), vbias) + gemv!('N', -η, Vneg, I, one(T), vbias) + gemv!('N', η, Hpos, I, one(T), hbias) + gemv!('N', -η, Hneg, I, one(T), hbias) + + error += sumabs2(Vpos .- Vneg) + end + end + # visualize(W') + println("epoch $epoch: $(elapsed)s $error") + end + + # gif(anim, "rbm.gif", fps=round(Int, num_epochs/5)) + + return W, vbias, hbias +end + +function sample_h_given_v!{T}(H::AbstractArray{T}, V, W, hbias, buffer) + H .= hbias + gemm!('N', 'N', one(T), W, V, one(T), H) + map!(logistic, H) + rand!(buffer) + for i in eachindex(H) + @inbounds H[i] = buffer[i] <= H[i] + end + return H +end + +function sample_v_given_h!{T}(V::AbstractArray{T}, H, W, vbias, buffer) + V .= vbias + gemm!('T', 'N', one(T), W, H, one(T), V) + map!(logistic, V) + rand!(buffer) + for i in eachindex(V) + @inbounds V[i] = buffer[i] <= V[i] + end + return V +end + +function dream{T}(W::AbstractArray{T}, vbias, hbias) + n = 16 + + # V = convert(Array{T}, rand(Bool, length(vbias), n)) + + X, y = MNIST.traindata() + X = convert(Array{T}, X) + X = reshape(X, 784, 60000) + X .= X .> 0.35 + V = X[:,1:n] + + H = Array(T, length(hbias), n) + Hbuffer = similar(H) + Vbuffer = similar(V) + anim = @animate for i in 1:100 + for j in 1:100 + sample_h_given_v!(H, V, W, hbias, Hbuffer) + sample_v_given_h!(V, H, W, vbias, Vbuffer) + end + println("iteration $i") + visualize(V) + end + gif(anim, "dream.gif") +end + +function shortdream{T}(W::AbstractArray{T}, vbias, hbias) + n = 100 + + # V = convert(Array{T}, rand(Bool, length(vbias), n)) + + X, y = MNIST.traindata() + X = convert(Array{T}, X) + X = reshape(X, 784, 60000) + X .= X .> 0.35 + V = X[:,1:n] + + H = Array(T, length(hbias), n) + Hbuffer = similar(H) + Vbuffer = similar(V) + anim = @animate for i in 1:100 + sample_h_given_v!(H, V, W, hbias, Hbuffer) + sample_v_given_h!(V, H, W, vbias, Vbuffer) + println("iteration $i") + visualize(V) + end + gif(anim, "shortdream.gif") +end + +W, vbias, hbias = test() + +visualize(W') +# dream(W, vbias, hbias) diff --git a/hw4_structured_probabilistic_models/carl - 03 - boltzmann/utils.jl b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/utils.jl new file mode 100644 index 0000000000000000000000000000000000000000..aff8b00d468f8439e44c61fb86fd9eb31c6c5fe5 --- /dev/null +++ b/hw4_structured_probabilistic_models/carl - 03 - boltzmann/utils.jl @@ -0,0 +1,12 @@ +using Plots + +function visualize(X) + N = last(size(X)) + columns = ceil(Int64, sqrt(N)) + rows = ceil(Int64, N / columns) + Z = fill(NaN, 28, 28, rows, columns) + copy!(Z, X) + Z = permutedims(Z, (2, 3, 1, 4)) + Z = reshape(Z, 28 * rows, 28 * columns) + heatmap(Z, yflip=true, color=:grays, aspect_ratio=1, legend=false) +end diff --git a/hw9_gpu/carl - 05 - gpu/see 02 - convolutional.txt b/hw9_gpu/carl - 05 - gpu/see 02 - convolutional.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391