-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
parse paths of worker/partitions nodes from yml; add rmat, weighted email graph; add types for rdd row for mains, mirrors; add aggregation driver that unions partitioned edgelists, assigns mains, mirrors to partitions, and saves to partition paths; add rdd caching to compression script;
- Loading branch information
Showing
14 changed files
with
718 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
0 1 | ||
0 2 | ||
0 3 | ||
0 6 | ||
0 32 | ||
0 34 | ||
0 48 | ||
0 57 | ||
1 0 | ||
1 4 | ||
1 8 | ||
1 17 | ||
1 32 | ||
1 34 | ||
1 40 | ||
1 42 | ||
2 32 | ||
3 0 | ||
3 32 | ||
3 48 | ||
3 56 | ||
4 20 | ||
4 32 | ||
5 0 | ||
8 0 | ||
8 2 | ||
8 3 | ||
8 4 | ||
8 32 | ||
8 36 | ||
8 48 | ||
9 0 | ||
9 1 | ||
9 36 | ||
9 40 | ||
10 50 | ||
12 48 | ||
14 0 | ||
16 0 | ||
16 2 | ||
16 32 | ||
16 43 | ||
16 48 | ||
17 4 | ||
17 33 | ||
17 41 | ||
24 38 | ||
24 51 | ||
32 0 | ||
32 3 | ||
33 4 | ||
33 8 | ||
34 0 | ||
35 32 | ||
36 0 | ||
36 38 | ||
40 0 | ||
40 4 | ||
41 36 | ||
44 15 | ||
48 0 | ||
48 8 | ||
48 17 | ||
50 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
Nodes: 8 | ||
Edges: 32 | ||
Nodes: 33 | ||
Edges: 64 |
File renamed without changes.
2 changes: 2 additions & 0 deletions
2
src/main/resources/graphs/email-Eu-core-random-weights/stats.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Nodes: 986 | ||
Edges: 24929 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
workers: | ||
- src/main/resources/graphs/8rmat/partitions/hybrid/bySrc/p0 | ||
- src/main/resources/graphs/8rmat/partitions/hybrid/bySrc/p1 | ||
- src/main/resources/graphs/8rmat/partitions/hybrid/bySrc/p2 | ||
- src/main/resources/graphs/8rmat/partitions/hybrid/bySrc/p3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
src/main/scala/com/preprocessing/aggregation/Aggregator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package com.preprocessing.aggregation | ||
|
||
class Aggregator(numPartitions: Int, partitionRoot: String, isWeighted: Boolean, sep: String) { | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package com.preprocessing.aggregation | ||
|
||
import com.preprocessing.partitioning.Util.{getDegreesByPartition, partitionAssignment, partitionMainsDF, partitionMirrorsDF, readMainPartitionDF, readMirrorPartitionDF, readPartitionsAndJoin, readWorkerPathsFromYaml} | ||
import org.apache.spark.rdd.RDD | ||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.{SparkConf, SparkContext} | ||
|
||
|
||
// driver program to test partition aggregation in preparation for ingestion | ||
// akka | ||
object Driver { | ||
|
||
def main(args: Array[String]): Unit = { | ||
|
||
|
||
// local spark config | ||
val appName: String = "preprocessing.aggregation.Driver" | ||
val conf = new SparkConf() | ||
.setAppName(appName) | ||
.setMaster("local[*]") | ||
val sc = new SparkContext(conf) | ||
|
||
val spark: SparkSession = SparkSession.builder.master("local[*]").getOrCreate | ||
|
||
val hadoopConfig = sc.hadoopConfiguration | ||
hadoopConfig.set("fs.hdfs.impl", classOf[org.apache.hadoop.hdfs.DistributedFileSystem].getName) | ||
hadoopConfig.set("fs.file.impl", classOf[org.apache.hadoop.fs.LocalFileSystem].getName) | ||
|
||
val numPartitions = 4 | ||
|
||
val workerPaths = "src/main/resources/paths.yaml" | ||
|
||
// a map between partition ids to location on hdfs of mains, mirrors for that partition | ||
val partitionMap = readWorkerPathsFromYaml(workerPaths: String) | ||
|
||
val path = "src/main/resources/graphs/8rmat/partitions/hybrid/bySrc" | ||
val mainsPartitionPath = path + "/mains" | ||
val mirrorsPartitionPath = path + "/mirrors" | ||
val sep = " " | ||
|
||
// (partition id, (source, destination, weight)) | ||
val edgeList: RDD[(Int, (Int, Int, Int))] = readPartitionsAndJoin(sc, path, numPartitions, sep) | ||
|
||
val (degrees, outNeighbors, inDegreesPerPartition) = getDegreesByPartition(edgeList) | ||
|
||
val (mains, mirrors) = partitionAssignment(degrees, outNeighbors, inDegreesPerPartition) | ||
|
||
// save to file | ||
partitionMainsDF(mains, spark, partitionMap) | ||
partitionMirrorsDF(mirrors, spark, partitionMap) | ||
|
||
// read for testing | ||
|
||
for ((pid, path) <- partitionMap) { | ||
println(s"Reading partition ${pid} in ${path}") | ||
val mains = readMainPartitionDF(path+"/mains", spark) | ||
val mirrors = readMirrorPartitionDF(path+"/mirrors", spark) | ||
println("mains") | ||
mains.foreach(m => println(s"\t$m")) | ||
println("mirrors") | ||
mirrors.foreach(m => println(s"\t$m")) | ||
} | ||
|
||
sc.stop() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.