#include <stdlib.h>
#include <string.h>

#include <stdio.h>
#include <math.h>

#include "naive.h"
#include "matrix.h"

void fw_transitive_closure_bit(bitnaive_t *naive) {
	uint32_t i, j, k;

	uint32_t m = ceil((double)naive->n/BPI);
	uint32_t n = naive->n;

	bitmatrix_t *T0 = naive->T0;
	bitmatrix_t *A = naive->Tn;
	bitmatrix_t *B = naive->temp;

	uint64_t *a = A->data,
			*b = B->data,
			*t = T0->data;

	memcpy(a, t, T0->size * sizeof(uint64_t));

	uint32_t rsize = T0->rsize;

	for (k = 0; k < n; k++) {
		for (i = 0; i < n; i++) {
			uint64_t mask = 0;
			mask = (a[i * rsize + k/BPI] & ((uint64_t)1) << (63 - k%BPI)) != 0 ? ~0 : 0;
			
			for (j = 0; j < m; j++) {
				// 'j' is increased in uint64_t.
				b[i * rsize + j] = a[i * rsize + j] | (mask & a[k * rsize + j]);
			}
		}

		t = a;
		a = b;
		b = t;
	}

	uint32_t count = 0;

	for (i = 0; i < T0->size; i++) {
		count += __builtin_popcountll(a[i]);
	}

	naive->tc = count;

	naive->Tn->data = b;
	naive->temp->data = a;
}

void fw_transitive_closure(naive_t *naive) {
	int i, j, k;
	int n = naive->n;

	matrix_t *T0 = naive->T0;
	matrix_t *A = naive->Tn;
	matrix_t *B = naive->temp;

	int64_t *a = A->data,
			*b = B->data,
			*t = T0->data;

	memcpy(a, t, n * n * sizeof(int64_t));

	for (k = 0; k < n-1; k++) {
		for (i = 0; i < n; i++) {
			for (j = 0; j < n; j++) {
				//MAT(B, i, j) = MAT(A, i, j) | (MAT(A, i, k) & MAT(A, k, j));
				b[i*n + j] = a[i*n + j] | (a[i * n + k] & a[k*n + j]);
			}
		}

		t = a;
		a = b;
		b = t;
	}

	uint32_t counter = 0;
	for (i = 0; i < n; i++) {
		for (j = 0; j < n; j++) {
			//MAT(B, i, j) = MAT(A, i, j) | (MAT(A, i, k) & MAT(A, k, j));
			b[i*n + j] = a[i*n + j] | (a[i * n + k] & a[k*n + j]);

			//if ( MAT(B, i, j) != 0 ) {
			if ( b[i*n + j] != 0 ) {
				counter++;
			}
		}
	}

	naive->tc = counter;
	naive->Tn->data = b;
	naive->temp->data = a;
}
