Created
June 5, 2016 07:35
-
-
Save vgangireddyin/86524cee4c89658629a4cf4baf9ed6b5 to your computer and use it in GitHub Desktop.
BPlus Tree in CPP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <stdio.h> | |
#include <stack> | |
#include <stdlib.h> | |
#include <fstream> | |
#include <math.h> | |
#include <time.h> | |
using namespace std; | |
int nextPointer = 0, rfp, keySize; //track next pointer value, root pointer and key sizes | |
struct BplusNode | |
{ | |
public: | |
vector<int> filePointers; // these are served as file pointers and object pointers as well | |
vector<float> keys; // constructed as all keys are in sorted order | |
int next, prev; // in case of leaf these would help | |
int isLeaf; // 0 for internal node and 1 for leaf node | |
}; | |
struct splitReturn // this will combine node in memory pointer and its int pointer used in splitNode method | |
{ | |
BplusNode node; | |
int childptr; | |
}; | |
struct KNNElement | |
{ | |
float key; | |
float distance; | |
}; | |
int calcKeySize(int s) | |
{ | |
s = s - 3 * sizeof(int); // extra file pointer when overload, next and prev | |
s = s - sizeof(float); // extra key when overload | |
return (s - sizeof(int)) / (sizeof(int) + sizeof(float)) ; | |
} | |
BplusNode createNewNode(int isLeaf) | |
{ | |
BplusNode newNode; | |
newNode.next = -1; | |
newNode.prev = -1; | |
newNode.isLeaf = isLeaf; | |
return newNode; | |
} | |
//file structure: prev, next, fp 1, val 1, fp 2, val 2,..., fp n val n, fp n+1 | |
//fp1(special pointer) : +ve in case of internal node, -ve in case of leaf node | |
BplusNode loadIntoMemory(int fpt) | |
{ | |
BplusNode node; | |
char fname[33]; | |
int temp; | |
float temp2; | |
sprintf(fname, "%d.bpt", fpt); | |
FILE *infile = fopen(fname, "rb"); | |
if(infile != NULL) | |
{ | |
fread((char*)&node.prev, sizeof(node.prev), 1, infile); | |
fread((char*)&node.next, sizeof(node.next), 1, infile); | |
//special pointer | |
fread((char*)&temp, sizeof(temp), 1, infile); | |
node.filePointers.push_back(temp); | |
if(temp >= 0) | |
node.isLeaf = 0; | |
else | |
node.isLeaf = 1; | |
while(1) | |
{ | |
//printf("loading keys \n"); | |
if(fread((char*)&temp2, sizeof(temp2), 1, infile) == 0) | |
break; | |
node.keys.push_back(temp2); | |
fread((char*)&temp, sizeof(temp), 1, infile); | |
node.filePointers.push_back(temp); | |
} | |
} | |
else | |
{ | |
node = createNewNode(1); | |
} | |
fclose(infile); | |
return node; | |
} | |
void writeBackFile(BplusNode &node, int fpt) | |
{ | |
char fname[33]; | |
sprintf(fname, "%d.bpt", fpt); | |
FILE *outfile = fopen(fname, "wb"); | |
fwrite((char*)&node.prev, sizeof(node.prev), 1, outfile); | |
fwrite((char*)&node.next, sizeof(node.next), 1, outfile); | |
//write special pointer | |
fwrite((char*)&node.filePointers[0], sizeof(node.filePointers[0]), 1, outfile); | |
for(int i = 0; i < node.keys.size(); i++) | |
{ | |
fwrite((char*)&node.keys[i], sizeof(node.keys[i]), 1, outfile); | |
fwrite((char*)&node.filePointers[i+1], sizeof(node.filePointers[i+1]), 1, outfile); | |
} | |
fclose(outfile); | |
node.filePointers.clear(); | |
node.keys.clear(); | |
} | |
// if value not present it will return the index of next maximum, if multiple values are there it will return the least index | |
int findIndex(vector<float> &keys, int from, int to, float value) | |
{ | |
if(from == to) | |
{ | |
// printf("this should execute for first time\n %f\t%f\t%d\n", keys[from], value, from); | |
if(keys[from] < value) | |
{ | |
return from + 1; | |
} | |
else | |
{ | |
return from; | |
} | |
} | |
else | |
{ | |
int mid = (from + to) / 2; | |
if(keys[mid] >= value) | |
return findIndex(keys, from, mid, value); | |
else | |
return findIndex(keys, mid+1, to, value); | |
} | |
} | |
//merge two sorted lists and return best k | |
vector<float> mergeLists(vector<KNNElement> &l, vector<KNNElement> &r, int k) | |
{ | |
vector<float> ans; | |
int i = 0, j = 0, cnt = k; | |
if(i == l.size()) | |
{ | |
while(cnt) | |
{ | |
ans.push_back(r[j].key); | |
j++; | |
cnt--; | |
} | |
} | |
if(j == r.size()) | |
{ | |
while(cnt) | |
{ | |
ans.push_back(l[i].key); | |
i++; | |
cnt--; | |
} | |
} | |
while(cnt) | |
{ | |
if(l[i].distance <= r[j].distance) | |
{ | |
ans.push_back(l[i].key); | |
i++; | |
cnt--; | |
} | |
else | |
{ | |
ans.push_back(r[j].key); | |
j++; | |
cnt--; | |
} | |
if(i == l.size()) | |
{ | |
printf("it should execute \n"); | |
while(cnt) | |
{ | |
ans.push_back(r[j].key); | |
j++; | |
cnt--; | |
} | |
} | |
if(j == r.size()) | |
{ | |
while(cnt) | |
{ | |
ans.push_back(l[i].key); | |
i++; | |
cnt--; | |
} | |
} | |
} | |
return ans; | |
} | |
//split the parent of node return next split is need or not, if yes +ve number that is parent pointer need to process | |
splitReturn splitNode(BplusNode &node, int npt, int ppt) | |
{ | |
int mid = keySize / 2; | |
int spt = ++nextPointer; | |
BplusNode newSibling = createNewNode(node.isLeaf); | |
//set special pointer | |
if(node.isLeaf) | |
newSibling.filePointers.push_back(-1); | |
else | |
newSibling.filePointers.push_back(node.filePointers[mid]); | |
//share items | |
for(int i = (mid + 1); i < node.keys.size(); i++) | |
{ | |
newSibling.keys.push_back(node.keys[i]); | |
newSibling.filePointers.push_back(node.filePointers[i+1]); | |
} | |
//clean node | |
node.keys.erase(node.keys.begin() + (mid + 1), node.keys.end()); | |
node.filePointers.erase(node.filePointers.begin() + (mid + 1), node.filePointers.end()); | |
//leaf case, make pointers | |
if(node.isLeaf) | |
{ | |
int temp = node.next; | |
node.next = spt; | |
newSibling.prev = npt; | |
newSibling.next = temp; | |
if(temp != -1) | |
{ | |
BplusNode tempnode = loadIntoMemory(temp); | |
tempnode.prev = spt; | |
writeBackFile(tempnode, temp); | |
} | |
} | |
//root case | |
if(ppt == -1) | |
{ | |
int rpt = ++nextPointer; | |
BplusNode newNode = createNewNode(0); | |
newNode.keys.push_back(node.keys[mid]); | |
//update root pointers | |
newNode.filePointers.push_back(npt); | |
newNode.filePointers.push_back(spt); | |
//write back data | |
writeBackFile(node, npt); | |
writeBackFile(newSibling, spt); | |
writeBackFile(newNode, rpt); | |
//change new node as root | |
rfp = rpt; | |
BplusNode dummy; | |
splitReturn ans = {dummy, -1}; | |
return ans; | |
} | |
//non-root case, parent contains the elements and be careful to insert new key into parent | |
else | |
{ | |
BplusNode parent = loadIntoMemory(ppt); | |
//insert key-mid in perfect place | |
int index = findIndex(parent.keys, 0, parent.keys.size() - 1, node.keys[mid]); // if equal keys : no problem, if two keys are equal all its children are equal | |
parent.keys.insert(parent.keys.begin() + index, node.keys[mid]); | |
parent.filePointers.insert(parent.filePointers.begin() + (index + 1), spt); | |
//write back data | |
writeBackFile(node, npt); | |
writeBackFile(newSibling, spt); | |
if(parent.keys.size() < keySize) | |
{ | |
writeBackFile(parent, ppt); | |
BplusNode dummy; | |
splitReturn ans = {dummy, -1}; | |
return ans; | |
} | |
else | |
{ | |
splitReturn ans = {parent, ppt}; | |
return ans; | |
} | |
} | |
} | |
//using split node implement insert node, iterative version | |
void insertValue(float value, int objpointer) | |
{ | |
stack<int> path; | |
BplusNode root = loadIntoMemory(rfp); | |
int next = rfp, pfp, cfp = 0; | |
//move to corresponding leaf node | |
while(root.isLeaf == 0) | |
{ | |
path.push(next); | |
int index = findIndex(root.keys, 0, root.keys.size() - 1, value); //it will return corresponding index | |
next = root.filePointers[index]; | |
cfp = next; | |
root = loadIntoMemory(next); | |
} | |
if(root.keys.size() < keySize) | |
{ | |
//init case | |
if(root.keys.size() == 0) | |
{ | |
root.filePointers.push_back(-1); | |
root.keys.push_back(value); | |
root.filePointers.push_back(objpointer); | |
writeBackFile(root, rfp); | |
} | |
else | |
{ | |
int index = findIndex(root.keys, 0, root.keys.size() - 1, value); | |
//printf("index %d\n", index); | |
root.keys.insert(root.keys.begin() + index, value); | |
root.filePointers.insert(root.filePointers.begin() + (index + 1), objpointer); | |
//write back root | |
writeBackFile(root, cfp); | |
} | |
} | |
else | |
{ | |
// insert value in node, but call split | |
int index = findIndex(root.keys, 0, root.keys.size() - 1, value); | |
root.keys.insert(root.keys.begin() + index, value); | |
root.filePointers.insert(root.filePointers.begin() + (index + 1), objpointer); | |
BplusNode child = root; | |
//track down the path | |
do | |
{ | |
//root case | |
if(path.size() == 0) | |
{ | |
splitReturn check = splitNode(child, cfp, -1); | |
child = check.node; | |
cfp = check.childptr; | |
} | |
else | |
{ | |
pfp = path.top(); | |
path.pop(); | |
splitReturn check = splitNode(child, cfp, pfp); | |
child = check.node; | |
cfp = check.childptr; | |
} | |
}while(cfp != -1); | |
} | |
} | |
vector<float> windowQuery(float a, float b) | |
{ | |
int next = rfp, Index = 0; | |
vector<float> ans; | |
bool stopit = false, first = true;; | |
BplusNode node = loadIntoMemory(next); | |
//move to the proper leaf | |
while(node.isLeaf == 0) | |
{ | |
Index = findIndex(node.keys, 0, node.keys.size() - 1, a); | |
next = node.filePointers[Index]; | |
node = loadIntoMemory(next); | |
} | |
//sequential search from leaf | |
while(!stopit) | |
{ | |
if(first) | |
Index = findIndex(node.keys, 0, node.keys.size() - 1, a); | |
else | |
Index = 0; | |
for(int i = Index; i < node.keys.size(); i++) | |
{ | |
first = false; | |
if(node.keys[i] <= b) | |
{ | |
ans.push_back(node.keys[i]); | |
} | |
else | |
{ | |
stopit = true; | |
break; | |
} | |
} | |
if(node.next == -1) | |
break; | |
node = loadIntoMemory(node.next); | |
} | |
return ans; | |
} | |
vector<float> rangeQuery(float center, float range) | |
{ | |
return windowQuery(center-range, center+range); | |
} | |
bool pointQuery(float a) | |
{ | |
return windowQuery(a, a).size() > 0; | |
} | |
vector<float> kNNQuery(float center, int k) | |
{ | |
vector<KNNElement> left, right; | |
KNNElement temp; | |
int next = rfp, Index = 0, centerLeaf, cnt = k; | |
bool first = true, stopit = false; | |
BplusNode node = loadIntoMemory(next); | |
//move to the proper leaf | |
while(node.isLeaf == 0) | |
{ | |
Index = findIndex(node.keys, 0, node.keys.size() - 1, center); | |
next = node.filePointers[Index]; | |
node = loadIntoMemory(next); | |
} | |
centerLeaf = next; | |
//load all right | |
while(!stopit) | |
{ | |
if(first) | |
Index = findIndex(node.keys, 0, node.keys.size() - 1, center); | |
else | |
Index = 0; | |
for(int i = Index; i < node.keys.size(); i++) | |
{ | |
first = false; | |
if(cnt--) | |
{ | |
temp = {node.keys[i], fabs(node.keys[i] - center)}; | |
left.push_back(temp); | |
} | |
else | |
{ | |
stopit = true; | |
break; | |
} | |
} | |
if(node.next == -1) | |
break; | |
node = loadIntoMemory(node.next); | |
} | |
//load all left | |
cnt = k; | |
next = centerLeaf; | |
node = loadIntoMemory(next); | |
stopit = false; | |
first = true; | |
while(!stopit) | |
{ | |
if(first) | |
Index = findIndex(node.keys, 0, node.keys.size() - 1, center); | |
else | |
Index = node.keys.size(); | |
for(int i = Index - 1; i >= 0; i--) | |
{ | |
first = false; | |
if(cnt--) | |
{ | |
temp = {node.keys[i], fabs(node.keys[i] - center)}; | |
right.push_back(temp); | |
} | |
else | |
{ | |
stopit = true; | |
break; | |
} | |
} | |
if(node.prev == -1) | |
break; | |
node = loadIntoMemory(node.prev); | |
} | |
//merge left and right and pick best k | |
return mergeLists(left, right, k); | |
} | |
//for testing | |
void testTree() | |
{ | |
int next = rfp; | |
BplusNode node = loadIntoMemory(next); | |
printf("testing\n"); | |
while(node.isLeaf == 0) | |
{ | |
next = node.filePointers[0]; | |
printf("next is %d\n", next); | |
node = loadIntoMemory(next); | |
} | |
while(1) | |
{ | |
printf("%d\t%d\t%d\n", node.filePointers[0], node.prev, node.next); | |
for(int i = 0; i < node.keys.size(); i++) | |
{ | |
printf("%f\t", node.keys[i]); | |
printf("%d\n", node.filePointers[i+1]); | |
} | |
if(node.next == -1) | |
break; | |
node = loadIntoMemory(node.next); | |
} | |
} | |
int main() | |
{ | |
clock_t start; | |
float temp, x, y, t; | |
int i, objptr = 0, choice, K , j = 0, pagesize; | |
bool check = false; | |
char obj[13]; | |
vector<float> ans; | |
FILE *fpinfo = fopen("bplus.info", "a"); // bplus.info contain the root and nextpointer values | |
FILE *fp = fopen("assgn3_bplus_data.txt", "r"); | |
FILE *fpdata = fopen("bplus.data", "wb"); | |
FILE *queryFile = fopen("assgn3_bplus_querysample.txt", "r"); | |
FILE *reportFile = fopen("bplusreport.csv", "w"); | |
FILE *outputFile = fopen("bplusoutput.txt", "w"); | |
FILE *config = fopen("bplustree.config","r"); | |
if(fpinfo == NULL) | |
{ | |
rfp = 0; | |
nextPointer = 0; | |
} | |
else | |
{ | |
fscanf(fpinfo, "%d%d", &rfp, &nextPointer); | |
} | |
fscanf(config, "%d", &pagesize); | |
keySize = calcKeySize(pagesize); | |
//build tree | |
i = fscanf(fp, "%f%s", &temp, obj); | |
float time = 0; | |
while(i == 2) | |
{ | |
j++; | |
if(j % 5000 == 0) | |
{ | |
printf("inserting %d : %f\t%s\n", j, temp, obj); | |
} | |
fwrite (obj , sizeof(char), 13, fpdata); | |
start = clock(); | |
insertValue(temp, objptr); | |
time += float( clock() - start); | |
objptr += 13; | |
i = fscanf(fp, "%f%s", &temp, obj); | |
} | |
printf("Entire tree is build in %f time units", time ); | |
j = 0; | |
while(fscanf(queryFile, "%d", &choice) == 1) | |
{ | |
j++; | |
if(j % 500 == 0) | |
{ | |
printf("accessing %d query\n", j); | |
} | |
switch(choice) | |
{ | |
case 0: | |
fscanf(queryFile,"%f%s", &temp, obj); | |
fwrite (obj , sizeof(char), 13, fpdata); | |
start = clock(); | |
insertValue(temp, objptr); | |
t = float( clock() - start ) ; | |
objptr += 13; | |
fprintf(reportFile, "%d,%f\n", choice, t); | |
fprintf(outputFile,"%d\t%f\t%s\n", choice, temp, obj); | |
break; | |
case 1: | |
fscanf(queryFile,"%f", &x); | |
start = clock(); | |
check = pointQuery(x); | |
t = float( clock() - start ) ; | |
fprintf(reportFile, "%d,%f\n", choice, t); | |
fprintf(outputFile,"%d\t%f\n", choice, x); | |
if(check) | |
fprintf(outputFile,"%f\n", x); | |
break; | |
case 2: | |
ans.clear(); | |
fscanf(queryFile,"%f%f", &x, &y); | |
start = clock(); | |
ans = rangeQuery(x, y); | |
t = float( clock() - start ) ; | |
fprintf(reportFile, "%d,%f,%d\n", choice, t, ans.size()); | |
fprintf(outputFile,"%d\t%f\t%f\n", choice, x, y); | |
// printf("%d\t%f\t%f\t%f\n", choice, qurpoint.p[0], qurpoint.p[1], range); | |
for(int i = 0; i < ans.size(); i++) | |
fprintf(outputFile,"%f\n", ans[i]); | |
break; | |
case 3: | |
ans.clear(); | |
fscanf(queryFile,"%f%d", &x, &K); | |
start = clock(); | |
ans = kNNQuery(x, K); | |
t = float( clock() - start ) ; | |
fprintf(reportFile, "%d,%f,%d\n", choice, t, ans.size()); | |
fprintf(outputFile,"%d\t%f\t%d\n", choice, x, K); | |
// printf("%d\t%f\t%f\t%d\n", choice, qurpoint.p[0], qurpoint.p[1], K); | |
for(int i = 0; i < ans.size(); i++) | |
fprintf(outputFile,"%f\n", ans[i]); | |
break; | |
case 4: | |
ans.clear(); | |
fscanf(queryFile,"%f%f", &x, &y); | |
start = clock(); | |
ans = windowQuery(min(x, y), max(x,y)); | |
t = float( clock() - start ); | |
fprintf(reportFile, "%d,%f,%d\n", choice, t, ans.size()); | |
fprintf(outputFile,"%d\t%f\t%f\n", choice, x, y); | |
for(int i = 0; i < ans.size(); i++) | |
fprintf(outputFile,"%f\n", ans[i]); | |
break; | |
} | |
} | |
//closing opening files | |
fflush(fpinfo); | |
fprintf(fpinfo, "%d %d", rfp, nextPointer); | |
fclose(fpinfo); | |
fclose(fp); | |
fclose(fpdata); | |
fclose(reportFile); | |
fclose(outputFile); | |
fclose(queryFile); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment