#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <math.h>

#include "dfs.h"
#include "xutil.h"

void dfs_recurse(uint32_t *array, int32_t start, int32_t end, uint32_t index,
		uint32_t d, dfs_t *dfs) {
	int32_t mid;

	if (start > end) {
		return;
	}

	mid = start + ceil((float)(end-start)/(1/dfs->skew));

	// Store parent
	dfs->storage[index-1] = array[mid];

	// Left recursion
	dfs_recurse(array, start, mid-1, index+1, d+1, dfs);

	// Right recursion
	dfs_recurse(array, mid+1, end, index+(1 << (dfs->height-d)), d+1, dfs);
}

void dfs_recurse_blazing(uint32_t *array, int32_t start, int32_t end,
		uint32_t index, uint32_t d, dfs_t *dfs) {
	int32_t mid;
	int32_t N = end - start + 1;

	if (start > end) {
		return;
	}

	uint32_t height = floor(log2(N)) + 1;
	int32_t nodes = pow(2, height) - 1;

	if (N == nodes) {
		mid = start + ceil((float)((end-start)/(1/dfs->skew)));
	} else {
		uint32_t h = height - 1;

		uint32_t left_size = pow(2, h) - 1;

		uint32_t leaf_count = N - left_size;

		uint32_t left_leaves = pow(2, height - 2);

		if (leaf_count < left_leaves) {
			left_leaves = leaf_count;
		}

		mid = start + (left_size - 1)/2 + left_leaves;
	}

	// Store parent
	dfs->storage[index-1] = array[mid];

	// Left recursion
	dfs_recurse_blazing(array, start, mid-1, index+1, d+1, dfs);

	// Right recursion
	dfs_recurse_blazing(array, mid+1, end, index+(mid+1-start), d+1, dfs);
}

dfs_t *dfs_create(uint32_t *array, uint32_t N, double skew) {
	dfs_t *dfs = xmalloc(sizeof(dfs_t));

	dfs->height = floor(log2(N))+1;
	dfs->skew = skew;
	dfs->nodes = pow(2, dfs->height)-1;
	dfs->storage = xmalloc(dfs->nodes * sizeof(uint32_t));
	dfs->N = N;

	dfs_recurse(array, 0, N-1, 1, 1, dfs);

//	uint32_t i;
//	for (i = 0; i < N; i++){
//		printf("%"PRIu32" ", dfs->storage[i]);
//	}
//	printf("\n");

	return dfs;
}

uint32_t dfs_pred(struct dfs_search *restrict str) {
	uint32_t x = str->x;
	dfs_t    *restrict dfs = str->dfs;
	uint32_t index = 1; 						// Current index
	uint32_t p = UINT32_MAX;					// Current predecessor
	uint32_t j;									// Current element
	uint32_t d = dfs->height; 					// Depths left
	uint32_t subtree_size = dfs->N;				// Current subtree size

	// TODO: Fix subtree_size for skewed trees

	do {
		j = dfs->storage[index-1];

		d--;

		if (x < j) {
			subtree_size = subtree_size >> 1;
			index = index+1;
		} else {
			p = j;

			//index = index + dfs->table[d-1];
			index = index + (1 << d);

			subtree_size = (subtree_size - 1) >> 1;
		}
	} while(x != j && subtree_size != 0);

	if (x == j) {
		return j;
	}

	return p;
}

/*
//uint32_t dfs_pred(uint32_t x, dfs_t *dfs) {
uint32_t dfs_pred(struct dfs_search *str) {
	uint32_t x = str->x;
	dfs_t    *dfs = str->dfs;
	uint32_t index = 1; 						// Current index
	uint32_t p = UINT32_MAX;					// Current predecessor
	uint32_t j;									// Current element
	uint32_t h = dfs->height;					// Depths left
	uint32_t N = dfs->N;
	uint32_t cN = N;
	uint32_t nodes = dfs->nodes;
	uint32_t leaves;
	uint32_t left_size;

	do {
		j = dfs->storage[index-1];

		h--;

		left_size = (1 << h)-1;

		if (cN != nodes) {
			leaves = cN-left_size;
			left_size = (1 << (h-1));
			if (leaves < left_size) {
				left_size = leaves;
			}
			left_size += (1 << (h-1))-1;
		}

		if (x < j) {
			index = index+1;
			cN = left_size;
		} else {
			p = j;

			index = index+(left_size+1);
			cN = cN-(left_size+1);
		}

		if (cN <= (uint32_t)((1 << (h-1))-1)) {
			h--;
		}

		nodes = (1 << h)-1;
	} while(x != j && index <= N);

	if (x == j) {
		return j;
	}

	return p;
}
*/

/*
WITH O3 enabled

dfs_pred:
.LFB17:
    .cfi_startproc
    pushq   %rbp              # Base pointer
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movl    $-1, %eax         # eax = -1
    pushq   %rbx              # something something stack
    .cfi_def_cfa_offset 24
    .cfi_offset 3, -24
    movl    $1, %r8d          # index = 1
    movl    24(%rsi), %ecx    # d = height
    movl    32(%rsi), %edx    # subtree_size = N
    movq    8(%rsi), %rbp     # rbp = storage
    movl    $1, %ebx          # ebx = 1 ?
    jmp .L56
    .p2align 4,,10
    .p2align 3
.L65:
    testl   %edx, %edx   # subtreesize & with itself.
    je  .L59             # Jump if zero.
.L56:
    leal    -1(%r8), %esi  # esi = index - 1
    subl    $1, %ecx       # d--
    movl    %ebx, %r11d    # r11d = 1
    movl    %edx, %r9d     # r9d = subtree_size
    leal    1(%r8), %r10d  # r10d = index + 1
    sall    %cl, %r11d     # shift 1 << d
    movl    0(%rbp,%rsi,4), %esi  #  j = storage[index -1]
    subl    $1, %edx       # edx -= 1, subtree_size -1

    shrl    %r9d           # (subtree_size) >> 1

    addl    %r11d, %r8d    # index = index + 1 << d
    shrl    %edx           # (subtree_size - 1 ) >> 1

    cmpl    %edi, %esi     # test(j - x)
    cmova   %r10d, %r8d    # index = index + 1
    cmova   %r9d, %edx     # subtree_size = (subtree_size) >> 1
    cmovbe  %esi, %eax     # p = j

    cmpl    %esi, %edi    # j != x
    jne .L65
.L59:
    cmpl    %esi, %edi    # x == j
    cmove   %edi, %eax    # return j == x
    popq    %rbx          # restore stack
    .cfi_def_cfa_offset 16
    popq    %rbp          # restore base pointer
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
*/


/*
WITH O3 enabled and without sparsity!

dfs_pred:
.LFB33:
	.cfi_startproc
	pushq	%r15
	.cfi_def_cfa_offset 16
	.cfi_offset 15, -16
	pushq	%r14
	.cfi_def_cfa_offset 24
	.cfi_offset 14, -24
	movl	$-1, %r15d
	pushq	%r13
	.cfi_def_cfa_offset 32
	.cfi_offset 13, -32
	pushq	%r12
	.cfi_def_cfa_offset 40
	.cfi_offset 12, -40
	movl	$1, %r11d
	pushq	%rbp
	.cfi_def_cfa_offset 48
	.cfi_offset 6, -48
	pushq	%rbx
	.cfi_def_cfa_offset 56
	.cfi_offset 3, -56
	movl	$1, %r12d
	movl	24(%rsi), %r13d
	movl	16(%rsi), %r8d
	movl	20(%rsi), %r9d
	movq	(%rsi), %r14
	movl	%r13d, %ebx
	jmp	.L69
	.p2align 4,,10
	.p2align 3
.L76:
	addl	$1, %r11d
	cmpl	%r9d, %edx
	ja	.L66
	movl	%edx, %ebx
.L65:
	cmpl	%r11d, %r13d
	jb	.L74
.L69:
	leal	-1(%r8), %ebp
	movl	%r12d, %r10d
	leal	-1(%r11), %eax
	movl	%ebp, %ecx
	movl	(%r14,%rax,4), %eax
	sall	%cl, %r10d
	subl	$1, %r10d
	cmpl	%r9d, %ebx
	je	.L75
	subl	$2, %r8d
	movl	%ebx, %esi
	movl	%r12d, %edx
	movl	%r8d, %ecx
	subl	%r10d, %esi
	sall	%cl, %edx
	cmpl	%edx, %esi
	movl	%edx, %ecx
	leal	-1(%rdx), %r9d
	cmovbe	%esi, %ecx
	movl	%ecx, %edx
	addl	%r9d, %edx
.L63:
	cmpl	%edi, %eax
	ja	.L76
	subl	%edx, %ebx
	leal	1(%rdx,%r11), %r11d
	leal	-1(%rbx), %edx
	cmpl	%r9d, %edx
	jbe	.L71
	movl	%eax, %r15d
.L66:
	movl	%r10d, %r9d
	movl	%edx, %ebx
	movl	%ebp, %r8d
.L67:
	cmpl	%eax, %edi
	jne	.L65
	popq	%rbx
	.cfi_remember_state
	.cfi_def_cfa_offset 48
	popq	%rbp
	.cfi_def_cfa_offset 40
	popq	%r12
	.cfi_def_cfa_offset 32
	popq	%r13
	.cfi_def_cfa_offset 24
	popq	%r14
	.cfi_def_cfa_offset 16
	popq	%r15
	.cfi_def_cfa_offset 8
	ret
	.p2align 4,,10
	.p2align 3
.L75:
	.cfi_restore_state
	subl	$2, %r8d
	movl	%r12d, %esi
	movl	%r10d, %edx
	movl	%r8d, %ecx
	sall	%cl, %esi
	leal	-1(%rsi), %r9d
	jmp	.L63
	.p2align 4,,10
	.p2align 3
.L71:
	movl	%edx, %ebx
	movl	%eax, %r15d
	jmp	.L67
	.p2align 4,,10
	.p2align 3
*/
