Skip to content

Commit

Permalink
bumped to version 0.5.8
Browse files Browse the repository at this point in the history
- todo on seed based propagation of universities ...
  • Loading branch information
cosminbasca committed Sep 4, 2014
1 parent 4934bcc commit c9ffdd7
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 12 deletions.
2 changes: 1 addition & 1 deletion rdftools/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__author__ = 'basca'

version = (0, 5, 7)
version = (0, 5, 8)
str_version = '.'.join(['%s' % v for v in version])
9 changes: 0 additions & 9 deletions rdftools/datagen/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
"""
an Lubm data generator that follows several data modes of distribution
data mode 1: each uni is distributed to one host
data mode 2: horizontal partitioning of all data (based on stars)
data mode 3: randoly distribute a seed of resources to hosts, propagate from that point on (keep seed specific stuff on the same host)
data mode 4: choose a distribution (normal) for a university and distribute the data in the university to some machines given that uni
see substitution smapling
"""
import os
import sh
from abc import ABCMeta, abstractmethod
Expand Down
5 changes: 5 additions & 0 deletions rdftools/datagen/lubm_horizontal.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def get_results(self, *args, **kwargs):
return self.site_index


"""
distribution process:
1) horizontal partitioning of all data (based on stars)
"""
class LubmHorizontal(LubmGenerator):
def __init__(self, output_path, sites, permutation='s', **kwargs):
super(LubmHorizontal, self).__init__(output_path, sites, **kwargs)
Expand Down
11 changes: 9 additions & 2 deletions rdftools/datagen/lubm_seed_propagation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

__author__ = 'basca'

"""
distribution process:
1) randoly distribute a seed of resources to hosts,
2) propagate from that point on (keep seed specific stuff on the same host)
"""
class LubmSeedPropagation(LubmGenerator):
def _create_distribution(self, universities_rdf):
pass
def _create_distribution(self, universities_rdf, **kwargs):
#TODO: implement me!
raise NotImplementedError
6 changes: 6 additions & 0 deletions rdftools/datagen/lubm_uni2many.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
is_valid_distribution = lambda distro: np.sum(distro) == 1.0


"""
distribution process:
1) choose a distribution (normal) for a university
2) distribute the data of the university to some machines given that uni
obs: see substitution smapling
"""
class LubmUni2Many(LubmGenerator):
def __call__(self, p=None):
if p is None:
Expand Down
5 changes: 5 additions & 0 deletions rdftools/datagen/lubm_uni2one.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
__author__ = 'basca'


"""
distribution process:
1) each uni is distributed to one host
"""
class LubmUni2One(LubmGenerator):
def _create_distribution(self, universities_rdf, **kwargs):
sites_index = np.random.random_integers(0, self._num_sites-1, len(universities_rdf))
Expand Down

0 comments on commit c9ffdd7

Please sign in to comment.