Skip to content

Instantly share code, notes, and snippets.

@mahyarmirrashed
Created June 20, 2025 20:59
Show Gist options
  • Save mahyarmirrashed/7d764a1bc6e7368131a1c239a6e7caa8 to your computer and use it in GitHub Desktop.
Save mahyarmirrashed/7d764a1bc6e7368131a1c239a6e7caa8 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
// Sigmoid activation function
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
// Derivative of sigmoid
double sigmoid_derivative(double x) {
return x * (1.0 - x); // Assuming x is already sigmoid(z)
}
// Initialize matrix with random values using Xavier initialization
void initialize_matrix(double **matrix, int rows, int cols) {
double limit = sqrt(6.0 / (rows + cols));
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
matrix[i][j] = ((double)rand() / RAND_MAX - 0.5) * 2.0 * limit;
}
}
}
// Matrix multiplication: C = A * B
void matrix_multiply(double **A, double **B, double **C, int rowsA, int colsA, int rowsB, int colsB) {
if (colsA != rowsB) {
fprintf(stderr, "Matrix multiply error: colsA (%d) != rowsB (%d)\n", colsA, rowsB);
exit(1);
}
for (int i = 0; i < rowsA; i++) {
for (int j = 0; j < colsB; j++) {
C[i][j] = 0.0;
for (int k = 0; k < colsA; k++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
// Matrix addition
void matrix_add(double **A, double **B, double **C, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
C[i][j] = A[i][j] + B[i][j];
}
}
}
// Matrix subtraction
void matrix_subtract(double **A, double **B, double **C, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
C[i][j] = A[i][j] - B[i][j];
}
}
}
// Element-wise multiplication
void elementwise_multiply(double **A, double **B, double **C, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
C[i][j] = A[i][j] * B[i][j];
}
}
}
// Transpose matrix
void transpose(double **A, double **T, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
T[j][i] = A[i][j];
}
}
}
// Allocate 2D matrix
double** allocate_matrix(int rows, int cols) {
double **matrix = (double**)malloc(rows * sizeof(double*));
if (!matrix) {
fprintf(stderr, "Memory allocation failed for matrix rows\n");
exit(1);
}
for (int i = 0; i < rows; i++) {
matrix[i] = (double*)malloc(cols * sizeof(double));
if (!matrix[i]) {
fprintf(stderr, "Memory allocation failed for matrix cols\n");
exit(1);
}
for (int j = 0; j < cols; j++) {
matrix[i][j] = 0.0;
}
}
return matrix;
}
// Free 2D matrix
void free_matrix(double **matrix, int rows) {
if (!matrix) return;
for (int i = 0; i < rows; i++) {
if (matrix[i]) free(matrix[i]);
}
free(matrix);
}
// Print matrix for debugging
void print_matrix(double **matrix, int rows, int cols, const char *name) {
printf("%s:\n", name);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%8.4f ", matrix[i][j]);
}
printf("\n");
}
printf("\n");
}
// Neural Network structure
typedef struct {
int input_size;
int hidden_size;
int output_size;
double **W1; // Input to hidden weights
double **b1; // Hidden biases
double **W2; // Hidden to output weights
double **b2; // Output biases
double **z1; // Hidden layer pre-activation
double **a1; // Hidden layer activation
double **z2; // Output layer pre-activation
double **a2; // Output layer activation
} NeuralNetwork;
// Initialize neural network
NeuralNetwork* create_network(int input_size, int hidden_size, int output_size) {
NeuralNetwork *nn = (NeuralNetwork*)malloc(sizeof(NeuralNetwork));
if (!nn) {
fprintf(stderr, "Memory allocation failed for neural network\n");
exit(1);
}
nn->input_size = input_size;
nn->hidden_size = hidden_size;
nn->output_size = output_size;
// Allocate weight matrices
nn->W1 = allocate_matrix(input_size, hidden_size);
nn->b1 = allocate_matrix(1, hidden_size);
nn->W2 = allocate_matrix(hidden_size, output_size);
nn->b2 = allocate_matrix(1, output_size);
// Allocate activation matrices
nn->z1 = allocate_matrix(1, hidden_size);
nn->a1 = allocate_matrix(1, hidden_size);
nn->z2 = allocate_matrix(1, output_size);
nn->a2 = allocate_matrix(1, output_size);
// Initialize weights
initialize_matrix(nn->W1, input_size, hidden_size);
initialize_matrix(nn->W2, hidden_size, output_size);
// Initialize biases to zero
for (int i = 0; i < hidden_size; i++) {
nn->b1[0][i] = 0.0;
}
for (int i = 0; i < output_size; i++) {
nn->b2[0][i] = 0.0;
}
return nn;
}
// Forward propagation
void forward(NeuralNetwork *nn, double **input) {
// Hidden layer: z1 = input * W1 + b1
matrix_multiply(input, nn->W1, nn->z1, 1, nn->input_size, nn->input_size, nn->hidden_size);
matrix_add(nn->z1, nn->b1, nn->z1, 1, nn->hidden_size);
// Apply sigmoid activation: a1 = sigmoid(z1)
for (int i = 0; i < nn->hidden_size; i++) {
nn->a1[0][i] = sigmoid(nn->z1[0][i]);
}
// Output layer: z2 = a1 * W2 + b2
matrix_multiply(nn->a1, nn->W2, nn->z2, 1, nn->hidden_size, nn->hidden_size, nn->output_size);
matrix_add(nn->z2, nn->b2, nn->z2, 1, nn->output_size);
// Apply sigmoid activation: a2 = sigmoid(z2)
for (int i = 0; i < nn->output_size; i++) {
nn->a2[0][i] = sigmoid(nn->z2[0][i]);
}
}
// Backward propagation and weight update
void backward(NeuralNetwork *nn, double **input, double **target, double learning_rate) {
// Allocate temporary matrices
double **dz2 = allocate_matrix(1, nn->output_size);
double **dW2 = allocate_matrix(nn->hidden_size, nn->output_size);
double **db2 = allocate_matrix(1, nn->output_size);
double **dz1 = allocate_matrix(1, nn->hidden_size);
double **dW1 = allocate_matrix(nn->input_size, nn->hidden_size);
double **db1 = allocate_matrix(1, nn->hidden_size);
double **a1_T = allocate_matrix(nn->hidden_size, 1);
double **input_T = allocate_matrix(nn->input_size, 1);
double **W2_T = allocate_matrix(nn->output_size, nn->hidden_size);
// Output layer gradients
// dz2 = a2 - target
matrix_subtract(nn->a2, target, dz2, 1, nn->output_size);
// dW2 = a1^T * dz2
transpose(nn->a1, a1_T, 1, nn->hidden_size);
matrix_multiply(a1_T, dz2, dW2, nn->hidden_size, 1, 1, nn->output_size);
// db2 = dz2
for (int i = 0; i < nn->output_size; i++) {
db2[0][i] = dz2[0][i];
}
// Hidden layer gradients
// dz1 = (dz2 * W2^T) * sigmoid_derivative(a1)
transpose(nn->W2, W2_T, nn->hidden_size, nn->output_size);
matrix_multiply(dz2, W2_T, dz1, 1, nn->output_size, nn->output_size, nn->hidden_size);
for (int i = 0; i < nn->hidden_size; i++) {
dz1[0][i] *= sigmoid_derivative(nn->a1[0][i]);
}
// dW1 = input^T * dz1
transpose(input, input_T, 1, nn->input_size);
matrix_multiply(input_T, dz1, dW1, nn->input_size, 1, 1, nn->hidden_size);
// db1 = dz1
for (int i = 0; i < nn->hidden_size; i++) {
db1[0][i] = dz1[0][i];
}
// Update weights and biases
for (int i = 0; i < nn->hidden_size; i++) {
for (int j = 0; j < nn->output_size; j++) {
nn->W2[i][j] -= learning_rate * dW2[i][j];
}
nn->b1[0][i] -= learning_rate * db1[0][i];
}
for (int i = 0; i < nn->input_size; i++) {
for (int j = 0; j < nn->hidden_size; j++) {
nn->W1[i][j] -= learning_rate * dW1[i][j];
}
}
for (int i = 0; i < nn->output_size; i++) {
nn->b2[0][i] -= learning_rate * db2[0][i];
}
// Free temporary matrices
free_matrix(dz2, 1);
free_matrix(dW2, nn->hidden_size);
free_matrix(db2, 1);
free_matrix(dz1, 1);
free_matrix(dW1, nn->input_size);
free_matrix(db1, 1);
free_matrix(a1_T, nn->hidden_size);
free_matrix(input_T, nn->input_size);
free_matrix(W2_T, nn->output_size);
}
// Train neural network
void train(NeuralNetwork *nn, double **inputs, double **targets, int num_samples, double learning_rate, int epochs) {
double **input = allocate_matrix(1, nn->input_size);
double **target = allocate_matrix(1, nn->output_size);
for (int epoch = 0; epoch < epochs; epoch++) {
double total_loss = 0.0;
for (int sample = 0; sample < num_samples; sample++) {
// Copy input and target for this sample
for (int i = 0; i < nn->input_size; i++) {
input[0][i] = inputs[sample][i];
}
for (int i = 0; i < nn->output_size; i++) {
target[0][i] = targets[sample][i];
}
// Forward pass
forward(nn, input);
// Compute loss (Mean Squared Error)
for (int i = 0; i < nn->output_size; i++) {
double error = target[0][i] - nn->a2[0][i];
total_loss += error * error;
}
// Backward pass
backward(nn, input, target, learning_rate);
}
// Print average loss every 1000 epochs
if (epoch % 1000 == 0) {
printf("Epoch %d, Average Loss: %.6f\n", epoch, total_loss / (num_samples * nn->output_size));
}
}
free_matrix(input, 1);
free_matrix(target, 1);
}
// Test the network
void test(NeuralNetwork *nn, double **inputs, double **targets, int num_samples) {
double **input = allocate_matrix(1, nn->input_size);
printf("\nTesting the network:\n");
for (int i = 0; i < num_samples; i++) {
// Copy input
for (int j = 0; j < nn->input_size; j++) {
input[0][j] = inputs[i][j];
}
// Forward pass
forward(nn, input);
// Print results
printf("Input: [");
for (int j = 0; j < nn->input_size; j++) {
printf("%.0f", inputs[i][j]);
if (j < nn->input_size - 1) printf(", ");
}
printf("], Output: %.6f, Target: %.0f\n", nn->a2[0][0], targets[i][0]);
}
free_matrix(input, 1);
}
// Free neural network
void free_network(NeuralNetwork *nn) {
if (!nn) return;
free_matrix(nn->W1, nn->input_size);
free_matrix(nn->b1, 1);
free_matrix(nn->W2, nn->hidden_size);
free_matrix(nn->b2, 1);
free_matrix(nn->z1, 1);
free_matrix(nn->a1, 1);
free_matrix(nn->z2, 1);
free_matrix(nn->a2, 1);
free(nn);
}
int main() {
srand(time(NULL));
// Create a 2-4-1 neural network (2 inputs, 4 hidden, 1 output)
NeuralNetwork *nn = create_network(2, 4, 1);
// XOR problem data
double input_data[4][2] = {{0, 0}, {0, 1}, {1, 0}, {1, 1}};
double target_data[4][1] = {{0}, {1}, {1}, {0}};
// Allocate matrices for inputs and targets
double **inputs = allocate_matrix(4, 2);
double **targets = allocate_matrix(4, 1);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 2; j++) {
inputs[i][j] = input_data[i][j];
}
targets[i][0] = target_data[i][0];
}
// Train the network
printf("Starting training on XOR problem...\n");
train(nn, inputs, targets, 4, 0.5, 10000);
// Test the network
test(nn, inputs, targets, 4);
// Cleanup
free_matrix(inputs, 4);
free_matrix(targets, 4);
free_network(nn);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment