使用 C 语言编写 ANN 实现 XOR 异或函数

介绍用纯 C 语言实现一个简单的 BP 网络，并使用 XOR 数据测试。

完整代码可参考：https://github.com/howardlau1999/learning-machine-learning/blob/master/mlp_work.c

辅助函数

double random_(double low, double high) { return (((high - low) * (double)rand() / RAND_MAX) + low); }
double sigmoid(double x) { return 1 / (1 + exp(-x)); }
double d_sigmoid(double x) { return sigmoid(x) * (1 - sigmoid(x)); }

初始化网络

实现 BP 网络需要存储网络的每一层的权重以及偏置，而实现 BP 算法则需要存储神经元的输入、激活输出以及误差，因此，定义数据结构如下：

struct mlp {
    // biases[l][i] is the bias of the i-th neuron in layer l + 1
    double** biases;
    // weights[l][i][j] is the weight of the edge
    // connecting the j-th neuron in layer l and i-th neuron in layer l + 1
    double*** weights;
};

struct bp {
    // input from previous layer
    double **z;
    // output after activation
    double **a;
    // back-prop error
    double **delta;
};

其中 weights 权重数组需要随机初始化，其他数组初始化为全 0 即可。

double** alloc_biases() {
    double** arr = malloc((layers - 1) * sizeof(double*));
    for (int l = 0; l < layers - 1; ++l) {
        arr[l] = malloc(sizes[l + 1] * sizeof(double));
        for (int i = 0; i < sizes[l + 1]; ++i) {
            arr[l][i] = 0;
        }
    }
    return arr;
}

double*** alloc_weights() {
    double*** weights = malloc(layers * sizeof(double**));
    for (int l = 0; l < layers - 1; ++l) {
        weights[l] = malloc(sizes[l + 1] * sizeof(double*));
        for (int i = 0; i < sizes[l + 1]; ++i) {
            weights[l][i] = malloc(sizes[l] * sizeof(double));
            for (int j = 0; j < sizes[l]; ++j) {
                weights[l][i][j] = random_(-5, 5);
            }
        }
    }

    return weights;
}

double** alloc_cache() {
    double** arr = malloc(layers * sizeof(double*));
    for (int l = 0; l < layers; ++l) {
        arr[l] = malloc(sizes[l] * sizeof(double));
        for (int i = 0; i < sizes[l]; ++i) {
            arr[l][i] = 0;
        }
    }
    return arr;
}

前向传播

void forward(double* input, double* output, struct mlp* net, struct bp* cache) {
    for (int i = 0; i < sizes[0]; ++i) {
        cache->a[0][i] = input[i];
    }

    for (int l = 1; l < layers; ++l) {
        for (int i = 0; i < sizes[l]; ++i) {
            cache->z[l][i] = net->biases[l - 1][i];
            for (int j = 0; j < sizes[l - 1]; ++j) {
                cache->z[l][i] +=
                    net->weights[l - 1][i][j] * cache->a[l - 1][j];
            }
            cache->a[l][i] = sigmoid(cache->z[l][i]);
        }
    }

    for (int i = 0; i < sizes[layers - 1]; ++i) {
        output[i] = cache->z[layers - 1][i];
    }
}

需要注意的是，输出层不需要激活函数。

损失函数

double loss(double* output, double* ground_truth) {
    double sum = 0;

    for (int i = 0; i < sizes[layers - 1]; ++i) {
        sum += (output[i] - ground_truth[i]) * (output[i] - ground_truth[i]);
    }

    return .5 * sum;
}

误差回传

void backward(double* ground_truth, struct mlp* net, struct bp* cache) {
    for (int i = 0; i < sizes[layers - 1]; ++i) {
        cache->delta[layers - 1][i] =
            -(ground_truth[i] - cache->z[layers - 1][i]);
    }

    for (int l = layers - 2; l >= 0; --l) {
        for (int i = 0; i < sizes[l]; ++i) {
            double sum = 0;
            for (int j = 0; j < sizes[l + 1]; ++j) {
                sum +=
                    cache->delta[l + 1][j] * net->weights[l][j][i];
            }
            cache->delta[l][i] = sum * d_sigmoid(cache->z[l][i]);
        }
    }
}

参数更新

void optimize(double lr, struct mlp* net, struct bp* cache) {
    for (int l = 0; l < layers - 1; ++l) {
        for (int i = 0; i < sizes[l + 1]; ++i) {
            for (int j = 0; j < sizes[l]; ++j) {
                net->weights[l][i][j] -= lr * (cache->delta[l + 1][i] * cache->a[l][j]);
            }
            net->biases[l][i] -= lr * cache->delta[l + 1][i]; 
        }
    }
}

训练代码

// input_layer, hidden_layers..., output_layer
int sizes[] = {2, 3, 4, 3, 1};
int layers = sizeof(sizes) / sizeof(int);

int main() {
    double input[4][2] = {{0, 0}, {0, 1}, {1, 0}, {1, 1}};
    double ground_truth[4][1] = {{0}, {1}, {1}, {0}};
    double output[1];

    cache.z = alloc_cache();
    cache.a = alloc_cache();
    cache.delta = alloc_cache();

    net.biases = alloc_biases();
    net.weights = alloc_weights();

    for (int epoch = 0; epoch < 50000; ++epoch) {
        for (int n = 0; n < 4; ++n) {
            forward(input[n], output, &net, &cache);
            if (epoch % 1000 == 0)
                printf("epoch %d, sample %d, loss %.6f\n", epoch, n, loss(output, ground_truth[n]));
            backward(ground_truth[n], &net, &cache);
            optimize(0.3, &net, &cache);
            zero_grad(&cache);
        }
    }
}

完整代码可参考：https://github.com/howardlau1999/learning-machine-learning/blob/master/mlp_work.c