Created
June 20, 2025 20:59
-
-
Save mahyarmirrashed/7d764a1bc6e7368131a1c239a6e7caa8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <time.h> | |
// Sigmoid activation function | |
double sigmoid(double x) { | |
return 1.0 / (1.0 + exp(-x)); | |
} | |
// Derivative of sigmoid | |
double sigmoid_derivative(double x) { | |
return x * (1.0 - x); // Assuming x is already sigmoid(z) | |
} | |
// Initialize matrix with random values using Xavier initialization | |
void initialize_matrix(double **matrix, int rows, int cols) { | |
double limit = sqrt(6.0 / (rows + cols)); | |
for (int i = 0; i < rows; i++) { | |
for (int j = 0; j < cols; j++) { | |
matrix[i][j] = ((double)rand() / RAND_MAX - 0.5) * 2.0 * limit; | |
} | |
} | |
} | |
// Matrix multiplication: C = A * B | |
void matrix_multiply(double **A, double **B, double **C, int rowsA, int colsA, int rowsB, int colsB) { | |
if (colsA != rowsB) { | |
fprintf(stderr, "Matrix multiply error: colsA (%d) != rowsB (%d)\n", colsA, rowsB); | |
exit(1); | |
} | |
for (int i = 0; i < rowsA; i++) { | |
for (int j = 0; j < colsB; j++) { | |
C[i][j] = 0.0; | |
for (int k = 0; k < colsA; k++) { | |
C[i][j] += A[i][k] * B[k][j]; | |
} | |
} | |
} | |
} | |
// Matrix addition | |
void matrix_add(double **A, double **B, double **C, int rows, int cols) { | |
for (int i = 0; i < rows; i++) { | |
for (int j = 0; j < cols; j++) { | |
C[i][j] = A[i][j] + B[i][j]; | |
} | |
} | |
} | |
// Matrix subtraction | |
void matrix_subtract(double **A, double **B, double **C, int rows, int cols) { | |
for (int i = 0; i < rows; i++) { | |
for (int j = 0; j < cols; j++) { | |
C[i][j] = A[i][j] - B[i][j]; | |
} | |
} | |
} | |
// Element-wise multiplication | |
void elementwise_multiply(double **A, double **B, double **C, int rows, int cols) { | |
for (int i = 0; i < rows; i++) { | |
for (int j = 0; j < cols; j++) { | |
C[i][j] = A[i][j] * B[i][j]; | |
} | |
} | |
} | |
// Transpose matrix | |
void transpose(double **A, double **T, int rows, int cols) { | |
for (int i = 0; i < rows; i++) { | |
for (int j = 0; j < cols; j++) { | |
T[j][i] = A[i][j]; | |
} | |
} | |
} | |
// Allocate 2D matrix | |
double** allocate_matrix(int rows, int cols) { | |
double **matrix = (double**)malloc(rows * sizeof(double*)); | |
if (!matrix) { | |
fprintf(stderr, "Memory allocation failed for matrix rows\n"); | |
exit(1); | |
} | |
for (int i = 0; i < rows; i++) { | |
matrix[i] = (double*)malloc(cols * sizeof(double)); | |
if (!matrix[i]) { | |
fprintf(stderr, "Memory allocation failed for matrix cols\n"); | |
exit(1); | |
} | |
for (int j = 0; j < cols; j++) { | |
matrix[i][j] = 0.0; | |
} | |
} | |
return matrix; | |
} | |
// Free 2D matrix | |
void free_matrix(double **matrix, int rows) { | |
if (!matrix) return; | |
for (int i = 0; i < rows; i++) { | |
if (matrix[i]) free(matrix[i]); | |
} | |
free(matrix); | |
} | |
// Print matrix for debugging | |
void print_matrix(double **matrix, int rows, int cols, const char *name) { | |
printf("%s:\n", name); | |
for (int i = 0; i < rows; i++) { | |
for (int j = 0; j < cols; j++) { | |
printf("%8.4f ", matrix[i][j]); | |
} | |
printf("\n"); | |
} | |
printf("\n"); | |
} | |
// Neural Network structure | |
typedef struct { | |
int input_size; | |
int hidden_size; | |
int output_size; | |
double **W1; // Input to hidden weights | |
double **b1; // Hidden biases | |
double **W2; // Hidden to output weights | |
double **b2; // Output biases | |
double **z1; // Hidden layer pre-activation | |
double **a1; // Hidden layer activation | |
double **z2; // Output layer pre-activation | |
double **a2; // Output layer activation | |
} NeuralNetwork; | |
// Initialize neural network | |
NeuralNetwork* create_network(int input_size, int hidden_size, int output_size) { | |
NeuralNetwork *nn = (NeuralNetwork*)malloc(sizeof(NeuralNetwork)); | |
if (!nn) { | |
fprintf(stderr, "Memory allocation failed for neural network\n"); | |
exit(1); | |
} | |
nn->input_size = input_size; | |
nn->hidden_size = hidden_size; | |
nn->output_size = output_size; | |
// Allocate weight matrices | |
nn->W1 = allocate_matrix(input_size, hidden_size); | |
nn->b1 = allocate_matrix(1, hidden_size); | |
nn->W2 = allocate_matrix(hidden_size, output_size); | |
nn->b2 = allocate_matrix(1, output_size); | |
// Allocate activation matrices | |
nn->z1 = allocate_matrix(1, hidden_size); | |
nn->a1 = allocate_matrix(1, hidden_size); | |
nn->z2 = allocate_matrix(1, output_size); | |
nn->a2 = allocate_matrix(1, output_size); | |
// Initialize weights | |
initialize_matrix(nn->W1, input_size, hidden_size); | |
initialize_matrix(nn->W2, hidden_size, output_size); | |
// Initialize biases to zero | |
for (int i = 0; i < hidden_size; i++) { | |
nn->b1[0][i] = 0.0; | |
} | |
for (int i = 0; i < output_size; i++) { | |
nn->b2[0][i] = 0.0; | |
} | |
return nn; | |
} | |
// Forward propagation | |
void forward(NeuralNetwork *nn, double **input) { | |
// Hidden layer: z1 = input * W1 + b1 | |
matrix_multiply(input, nn->W1, nn->z1, 1, nn->input_size, nn->input_size, nn->hidden_size); | |
matrix_add(nn->z1, nn->b1, nn->z1, 1, nn->hidden_size); | |
// Apply sigmoid activation: a1 = sigmoid(z1) | |
for (int i = 0; i < nn->hidden_size; i++) { | |
nn->a1[0][i] = sigmoid(nn->z1[0][i]); | |
} | |
// Output layer: z2 = a1 * W2 + b2 | |
matrix_multiply(nn->a1, nn->W2, nn->z2, 1, nn->hidden_size, nn->hidden_size, nn->output_size); | |
matrix_add(nn->z2, nn->b2, nn->z2, 1, nn->output_size); | |
// Apply sigmoid activation: a2 = sigmoid(z2) | |
for (int i = 0; i < nn->output_size; i++) { | |
nn->a2[0][i] = sigmoid(nn->z2[0][i]); | |
} | |
} | |
// Backward propagation and weight update | |
void backward(NeuralNetwork *nn, double **input, double **target, double learning_rate) { | |
// Allocate temporary matrices | |
double **dz2 = allocate_matrix(1, nn->output_size); | |
double **dW2 = allocate_matrix(nn->hidden_size, nn->output_size); | |
double **db2 = allocate_matrix(1, nn->output_size); | |
double **dz1 = allocate_matrix(1, nn->hidden_size); | |
double **dW1 = allocate_matrix(nn->input_size, nn->hidden_size); | |
double **db1 = allocate_matrix(1, nn->hidden_size); | |
double **a1_T = allocate_matrix(nn->hidden_size, 1); | |
double **input_T = allocate_matrix(nn->input_size, 1); | |
double **W2_T = allocate_matrix(nn->output_size, nn->hidden_size); | |
// Output layer gradients | |
// dz2 = a2 - target | |
matrix_subtract(nn->a2, target, dz2, 1, nn->output_size); | |
// dW2 = a1^T * dz2 | |
transpose(nn->a1, a1_T, 1, nn->hidden_size); | |
matrix_multiply(a1_T, dz2, dW2, nn->hidden_size, 1, 1, nn->output_size); | |
// db2 = dz2 | |
for (int i = 0; i < nn->output_size; i++) { | |
db2[0][i] = dz2[0][i]; | |
} | |
// Hidden layer gradients | |
// dz1 = (dz2 * W2^T) * sigmoid_derivative(a1) | |
transpose(nn->W2, W2_T, nn->hidden_size, nn->output_size); | |
matrix_multiply(dz2, W2_T, dz1, 1, nn->output_size, nn->output_size, nn->hidden_size); | |
for (int i = 0; i < nn->hidden_size; i++) { | |
dz1[0][i] *= sigmoid_derivative(nn->a1[0][i]); | |
} | |
// dW1 = input^T * dz1 | |
transpose(input, input_T, 1, nn->input_size); | |
matrix_multiply(input_T, dz1, dW1, nn->input_size, 1, 1, nn->hidden_size); | |
// db1 = dz1 | |
for (int i = 0; i < nn->hidden_size; i++) { | |
db1[0][i] = dz1[0][i]; | |
} | |
// Update weights and biases | |
for (int i = 0; i < nn->hidden_size; i++) { | |
for (int j = 0; j < nn->output_size; j++) { | |
nn->W2[i][j] -= learning_rate * dW2[i][j]; | |
} | |
nn->b1[0][i] -= learning_rate * db1[0][i]; | |
} | |
for (int i = 0; i < nn->input_size; i++) { | |
for (int j = 0; j < nn->hidden_size; j++) { | |
nn->W1[i][j] -= learning_rate * dW1[i][j]; | |
} | |
} | |
for (int i = 0; i < nn->output_size; i++) { | |
nn->b2[0][i] -= learning_rate * db2[0][i]; | |
} | |
// Free temporary matrices | |
free_matrix(dz2, 1); | |
free_matrix(dW2, nn->hidden_size); | |
free_matrix(db2, 1); | |
free_matrix(dz1, 1); | |
free_matrix(dW1, nn->input_size); | |
free_matrix(db1, 1); | |
free_matrix(a1_T, nn->hidden_size); | |
free_matrix(input_T, nn->input_size); | |
free_matrix(W2_T, nn->output_size); | |
} | |
// Train neural network | |
void train(NeuralNetwork *nn, double **inputs, double **targets, int num_samples, double learning_rate, int epochs) { | |
double **input = allocate_matrix(1, nn->input_size); | |
double **target = allocate_matrix(1, nn->output_size); | |
for (int epoch = 0; epoch < epochs; epoch++) { | |
double total_loss = 0.0; | |
for (int sample = 0; sample < num_samples; sample++) { | |
// Copy input and target for this sample | |
for (int i = 0; i < nn->input_size; i++) { | |
input[0][i] = inputs[sample][i]; | |
} | |
for (int i = 0; i < nn->output_size; i++) { | |
target[0][i] = targets[sample][i]; | |
} | |
// Forward pass | |
forward(nn, input); | |
// Compute loss (Mean Squared Error) | |
for (int i = 0; i < nn->output_size; i++) { | |
double error = target[0][i] - nn->a2[0][i]; | |
total_loss += error * error; | |
} | |
// Backward pass | |
backward(nn, input, target, learning_rate); | |
} | |
// Print average loss every 1000 epochs | |
if (epoch % 1000 == 0) { | |
printf("Epoch %d, Average Loss: %.6f\n", epoch, total_loss / (num_samples * nn->output_size)); | |
} | |
} | |
free_matrix(input, 1); | |
free_matrix(target, 1); | |
} | |
// Test the network | |
void test(NeuralNetwork *nn, double **inputs, double **targets, int num_samples) { | |
double **input = allocate_matrix(1, nn->input_size); | |
printf("\nTesting the network:\n"); | |
for (int i = 0; i < num_samples; i++) { | |
// Copy input | |
for (int j = 0; j < nn->input_size; j++) { | |
input[0][j] = inputs[i][j]; | |
} | |
// Forward pass | |
forward(nn, input); | |
// Print results | |
printf("Input: ["); | |
for (int j = 0; j < nn->input_size; j++) { | |
printf("%.0f", inputs[i][j]); | |
if (j < nn->input_size - 1) printf(", "); | |
} | |
printf("], Output: %.6f, Target: %.0f\n", nn->a2[0][0], targets[i][0]); | |
} | |
free_matrix(input, 1); | |
} | |
// Free neural network | |
void free_network(NeuralNetwork *nn) { | |
if (!nn) return; | |
free_matrix(nn->W1, nn->input_size); | |
free_matrix(nn->b1, 1); | |
free_matrix(nn->W2, nn->hidden_size); | |
free_matrix(nn->b2, 1); | |
free_matrix(nn->z1, 1); | |
free_matrix(nn->a1, 1); | |
free_matrix(nn->z2, 1); | |
free_matrix(nn->a2, 1); | |
free(nn); | |
} | |
int main() { | |
srand(time(NULL)); | |
// Create a 2-4-1 neural network (2 inputs, 4 hidden, 1 output) | |
NeuralNetwork *nn = create_network(2, 4, 1); | |
// XOR problem data | |
double input_data[4][2] = {{0, 0}, {0, 1}, {1, 0}, {1, 1}}; | |
double target_data[4][1] = {{0}, {1}, {1}, {0}}; | |
// Allocate matrices for inputs and targets | |
double **inputs = allocate_matrix(4, 2); | |
double **targets = allocate_matrix(4, 1); | |
for (int i = 0; i < 4; i++) { | |
for (int j = 0; j < 2; j++) { | |
inputs[i][j] = input_data[i][j]; | |
} | |
targets[i][0] = target_data[i][0]; | |
} | |
// Train the network | |
printf("Starting training on XOR problem...\n"); | |
train(nn, inputs, targets, 4, 0.5, 10000); | |
// Test the network | |
test(nn, inputs, targets, 4); | |
// Cleanup | |
free_matrix(inputs, 4); | |
free_matrix(targets, 4); | |
free_network(nn); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment