Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
guowentian committed Feb 28, 2018
0 parents commit 6d55460
Show file tree
Hide file tree
Showing 116 changed files with 6,634 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.o
pagerank
encoder
workload
88 changes: 88 additions & 0 deletions Arguments.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#ifndef __ARGUMENTS_H__
#define __ARGUMENTS_H__

#include <iostream>
#include <cassert>
#include "Meta.h"
#include "CommandLine.h"

static void PrintUsage() {
std::cout << "==========[USAGE]==========" << std::endl;
std::cout << "-d: gDataFileName" << std::endl;
std::cout << "-a: gAppType" << std::endl;
std::cout << REVERSE_PUSH << ":rev push" << std::endl;
std::cout << "-i: gIsDirected" << std::endl;
std::cout << "-y: gIsDynamic" << std::endl;

std::cout << "-w: gWindowRatio" << std::endl;
std::cout << "-n: gWorkloadConfigType" << std::endl;
std::cout << SLIDE_WINDOW_RATIO << ": SLIDE_WINDOW_RATIO, " << SLIDE_BATCH_SIZE << ": SLIDE_BATCH_SIZE" << std::endl;
std::cout << "-r: gStreamUpdateCountVersusWindowRatio" << std::endl;
std::cout << "-b: gStreamBatchCount" << std::endl;
std::cout << "-c: gStreamUpdateCountPerBatch" << std::endl;
std::cout << "-l: gStreamUpdateCountTotal" << std::endl;

std::cout << "-s: gSourceVertexId" << std::endl;
std::cout << "-t: gThreadNum" << std::endl;
std::cout << "-o: gVariant" << std::endl;
std::cout << OPTIMIZED << ": optimized, " << FAST_FRONTIER << ": fast frontier, " << EAGER << ": eager, " << VANILLA << ": VANILLA" << std::endl;
std::cout << "-e: error tolerance" << std::endl;
std::cout << "EXAMPLE: ./pagerank -d ../data/com-dblp.ungraph.bin -a 0 -i 0 -y 1 -w 0.1 -n 0 -r 0.01 -b 1000 -s 1" << std::endl;
std::cout << "EXAMPLE: ./pagerank -d ../data/com-dblp.ungraph.bin -a 0 -i 0 -y 1 -w 0.1 -n 1 -c 100 -l 10000 -s 1" << std::endl;
std::cout << "deprected EXAMPLE: ./pagerank -d ../data/com-dblp.ungraph.bin -a 0 -i 0 -y 0" << std::endl;
}
static void PrintArguments(){
std::cout << "gAppType=" << gAppType << ",gIsDirected=" << gIsDirected << ",gIsDynamic=" << gIsDynamic << std::endl;
std::cout << "gWindowRatio=" << gWindowRatio << ",gWorkloadConfigType=" << gWorkloadConfigType << ",gStreamUpdateCountVersusWindowRatio=" << gStreamUpdateCountVersusWindowRatio << ",gStreamBatchCount=" << gStreamBatchCount << ",gStreamUpdateCountPerBatch=" << gStreamUpdateCountPerBatch << ",gStreamUpdateCountTotal=" << gStreamUpdateCountTotal << std::endl;
std::cout << "gSourceVertexId=" << gSourceVertexId << std::endl;
std::cout << "gThreadNum=" << gThreadNum << ",gVariant=" << gVariant << std::endl;
std::cout << "error=" << gTolerance << ",ALPHA=" << ALPHA << std::endl;
}

static void ArgumentsChecker() {
bool valid = true;
if (gAppType < 0 || gAppType > kAlgoTypeSize){
valid = false;
}
if (gIsDirected < 0 || gIsDynamic < 0 || gDataFileName == ""){
valid = false;
}
if (gWorkloadConfigType == SLIDE_WINDOW_RATIO){
if (gStreamUpdateCountVersusWindowRatio < 0.0 || gStreamBatchCount == 0) valid = false;
}
else if (gWorkloadConfigType == SLIDE_BATCH_SIZE){
if (gStreamUpdateCountPerBatch == 0 || gStreamUpdateCountTotal == 0) valid = false;
}
else{
valid = false;
}
if (!valid){
std::cout << "invalid arguments" << std::endl;
PrintUsage();
exit(-1);
}
}

static void ArgumentsParser(int argc, char *argv[]) {
CommandLine commandline(argc, argv);

gDataFileName = commandline.GetOptionValue("-d", "");
gAppType = commandline.GetOptionIntValue("-a", 0);
gIsDirected = commandline.GetOptionIntValue("-i", -1);
gIsDynamic = commandline.GetOptionIntValue("-y", -1);

gWindowRatio = commandline.GetOptionDoubleValue("-w", 0.1);
gWorkloadConfigType = commandline.GetOptionIntValue("-n", SLIDE_WINDOW_RATIO);
gStreamUpdateCountVersusWindowRatio = commandline.GetOptionDoubleValue("-r", -1.0);
gStreamBatchCount = commandline.GetOptionIntValue("-b", 0);
gStreamUpdateCountPerBatch = commandline.GetOptionIntValue("-c", 0);
gStreamUpdateCountTotal = commandline.GetOptionIntValue("-l", 0);
gSourceVertexId = commandline.GetOptionIntValue("-s", 1);
gThreadNum = commandline.GetOptionIntValue("-t", 1);
gVariant = commandline.GetOptionIntValue("-o", 0);
gTolerance = commandline.GetOptionDoubleValue("-e", 0.000000001);

ArgumentsChecker();
}

#endif
32 changes: 32 additions & 0 deletions EdgeBatch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef __EDGE_BATCH_H__
#define __EDGE_BATCH_H__

#include "Meta.h"

struct EdgeBatch{

EdgeBatch(IndexType sz){
size = sz;
length = 0;
edge1 = new IndexType[size];
edge2 = new IndexType[size];
is_insert = new bool[size];
}
~EdgeBatch(){
delete[] edge1;
edge1 = NULL;
delete[] edge2;
edge2 = NULL;
delete[] is_insert;
is_insert = NULL;
}

IndexType *edge1;
IndexType *edge2;
bool *is_insert;

IndexType length;
IndexType size;
};

#endif
136 changes: 136 additions & 0 deletions GraphVec.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#ifndef __GRAPH_VEC_H__
#define __GRAPH_VEC_H__

#include "Meta.h"
#include "TimeMeasurer.h"
#include "EdgeBatch.h"
#include <string>
#include <cstring>
#include <cassert>
#include <fstream>
#include <cstdio>
#include <vector>
#include <algorithm>
#include <iostream>

class GraphVec{
public:

GraphVec(const std::string &filename, bool directed = true){
this->directed = directed;
this->filename = filename;
ConstructGraph();
}
GraphVec(){}
~GraphVec(){}

virtual void ConstructGraph(){
// edge file
// format: a b (a and b is vertex id)
TimeMeasurer timer;
timer.StartTimer();

// initialize file
FILE* file_ptr = fopen(filename.c_str(), "rb");
assert(file_ptr != NULL);
fseek(file_ptr, 0L, SEEK_END);
size_t file_size = ftell(file_ptr);
rewind(file_ptr);

// first int indicate vertex_count
size_t res_size;
size_t file_pos = 0;
res_size = fread(&vertex_count, sizeof(IndexType), 1, file_ptr);
assert(res_size == 1);
file_pos += sizeof(IndexType);
std::cout << "vertex_count=" << vertex_count << std::endl;

col_ind.resize(vertex_count);
in_col_ind.resize(vertex_count);
deg.resize(vertex_count);

IndexType v1, v2;
edge_count = 0;
while (file_pos < file_size){
res_size += fread(&v1, sizeof(IndexType), 1, file_ptr);
res_size += fread(&v2, sizeof(IndexType), 1, file_ptr);

assert(0 <= v1 && v1 < vertex_count);
assert(0 <= v2 && v2 < vertex_count);

col_ind[v1].push_back(v2);
in_col_ind[v2].push_back(v1);
edge_count++;
if (!directed){
col_ind[v2].push_back(v1);
in_col_ind[v1].push_back(v2);
edge_count++;
}
file_pos += sizeof(IndexType) * 2;
}
IndexType edge_stream_count = directed ? edge_count : edge_count / 2;
assert(res_size / 2 == edge_stream_count);
std::cout << "edge_stream_count=" << edge_stream_count << std::endl;
std::cout << "edge_count=" << edge_count << std::endl;

for (IndexType i = 0; i < vertex_count; ++i) deg[i] = col_ind[i].size();

fclose(file_ptr);

timer.EndTimer();
std::cout << "read file elapsed time=" << timer.GetElapsedMicroSeconds() * 1.0 / 1000 << "ms" << std::endl;
}


void SortCSRColumns(){
for (IndexType i = 0; i < vertex_count; ++i){
std::sort(col_ind[i].begin(), col_ind[i].end());
}
for (IndexType i = 0; i < vertex_count; ++i){
std::sort(in_col_ind[i].begin(), in_col_ind[i].end());
}
}
virtual void SerializeEdgeStream(EdgeBatch *edge_stream){
FILE* file_ptr = fopen(filename.c_str(), "rb");
assert(file_ptr != NULL);
fseek(file_ptr, 0L, SEEK_END);
size_t file_size = ftell(file_ptr);
rewind(file_ptr);

size_t res_size, file_pos;
IndexType vcount;
res_size = fread(&vcount, sizeof(IndexType), 1, file_ptr);
assert(res_size == 1);
file_pos = sizeof(IndexType);

IndexType v1, v2;
edge_stream->length = 0;
while (file_pos < file_size){
res_size += fread(&v1, sizeof(IndexType), 1, file_ptr);
res_size += fread(&v2, sizeof(IndexType), 1, file_ptr);
edge_stream->edge1[edge_stream->length] = v1;
edge_stream->edge2[edge_stream->length] = v2;
edge_stream->length++;
file_pos += sizeof(IndexType) * 2;
}
fclose(file_ptr);
}

virtual bool StreamUpdates(const size_t stream_count){
return true;
}

public:
std::string filename;
bool directed;
IndexType vertex_count;
IndexType edge_count;

std::vector<IndexType> deg;
std::vector<std::vector<IndexType> > col_ind;
std::vector<std::vector<IndexType> > in_col_ind;

};


#endif
22 changes: 22 additions & 0 deletions Meta.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include "Meta.h"


std::string gDataFileName = "";
int gAppType = -1;
int gIsDirected = -1;
int gIsDynamic = -1;

int gWorkloadConfigType = SLIDE_WINDOW_RATIO;
double gWindowRatio = 0.1;
double gStreamUpdateCountVersusWindowRatio = -1.0;
size_t gStreamBatchCount = 0;
size_t gStreamUpdateCountTotal = 0;
size_t gStreamUpdateCountPerBatch = 0;

int gSourceVertexId = 1;

int gThreadNum = 1;
int gVariant = 0;

ValueType gTolerance = 0.000000001;

73 changes: 73 additions & 0 deletions Meta.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#ifndef __META_H__
#define __META_H__

#include <iostream>
#include <string>

enum AlgoType{
REVERSE_PUSH = 0,
kAlgoTypeSize
};
enum VariantType{
OPTIMIZED = 0,
FAST_FRONTIER = 1,
EAGER = 2,
VANILLA= 3,
kVariantTypeSize
};

enum WorkloadConfigType{
// the slide size is a ratio of the window size, e.g. 1%, 0.1%, 0.01%
SLIDE_WINDOW_RATIO,
// the slide size is given as a number of edges, e.g. 10^5
SLIDE_BATCH_SIZE
};
typedef double ValueType;
typedef int IndexType;

const static IndexType kMinVertexId = 0;
const static IndexType kMaxVertexId = 1000000000;

const static ValueType ALPHA = 0.15; // which is REST_PROB

const static size_t kMasterThreadId = 0;


const static IndexType DEFAULT_SOURCE_VERTEX_ID = 1;
//GPU
const static size_t THREADS_PER_BLOCK = 256;
const static size_t THREADS_PER_WARP = 32;
const static size_t MAX_BLOCKS_NUM = 96 * 8;
const static size_t MAX_THREADS_NUM = MAX_BLOCKS_NUM * THREADS_PER_BLOCK;
//CPU
const static size_t VERTEX_DEGREE_THRESHOLD = 512;

// ============== parameter ==============
// general
extern std::string gDataFileName;
extern int gAppType;
extern int gIsDirected;
extern int gIsDynamic;
// workload
extern double gWindowRatio;
extern int gWorkloadConfigType;
// in either workload, gStreamBatchCount, gStreamUpdateCountPerBatch and gStreamUpdateCountTotal will be used in execution
// for SLIDE_WINDOW_RATIO
extern double gStreamUpdateCountVersusWindowRatio;
extern size_t gStreamBatchCount;
// for SLIDE_BATCH_SIZE
extern size_t gStreamUpdateCountPerBatch;
extern size_t gStreamUpdateCountTotal;

extern int gSourceVertexId;
// execution
extern int gThreadNum;
extern int gVariant;
// epsilon
extern ValueType gTolerance;

#define CALC_BLOCKS_NUM(ITEMS_PER_BLOCK, CALC_SIZE) MAX_BLOCKS_NUM < ((CALC_SIZE - 1) / ITEMS_PER_BLOCK + 1) ? MAX_BLOCKS_NUM : ((CALC_SIZE - 1) / ITEMS_PER_BLOCK + 1)
//#define CALC_THREAD_BIN_SIZE(vertex_count, max_threads_num) ((vertex_count - 1) / max_threads_num + 1)


#endif
7 changes: 7 additions & 0 deletions PapiProfiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include "PapiProfiler.h"

#if defined(PAPI_PROFILE)
long_long PapiProfiler::papi_values[PapiProfiler::kPapiEventsNum] = {0};
long_long PapiProfiler::papi_temp_values[PapiProfiler::kPapiEventsNum] = {0};
int PapiProfiler::papi_events[kPapiEventsNum] = {0};
#endif
Loading

0 comments on commit 6d55460

Please sign in to comment.