-
Notifications
You must be signed in to change notification settings - Fork 444
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
First Credit mining prototype #2064
Changes from 1 commit
7226455
168f7cc
bd87ab1
897cdff
d713895
9d3201b
24ebb7b
2184356
b7010d8
a31b923
9a87521
6600773
d7d9ccf
ae7576c
9b9c01a
c2e894b
a33b4c7
6cc9d7d
2310579
a85ab3c
66eb87f
57e8636
b11fd65
b9d1940
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Squashed commits : - Separate Credit mining policies in different file - Refactor constant and functions to different file - Removed unused part of the code in utilities - Keep BoostingSource and BoostingManager clean - Extract lambda-variable as function in policies - Refer BoostingManager object in LaunchMany - Separate instantiation and start source Because of this, creating new instance will not make mining automatically start. Calling start function is needed. - Refactor source to have its own TaskManager - Use custom RSS parser instead of libtorrent's
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
# -*- coding: utf-8 -*- | ||
# Written by Egbert Bouman, Mihai Capotă, Elric Milon, and Ardhi Putra Pratama H | ||
"""Manage boosting of swarms""" | ||
import errno | ||
import logging | ||
import os | ||
from binascii import hexlify, unhexlify | ||
|
@@ -16,53 +15,22 @@ | |
from Tribler.Core.DownloadConfig import DownloadStartupConfig | ||
from Tribler.Core.Libtorrent.LibtorrentDownloadImpl import LibtorrentDownloadImpl | ||
from Tribler.Core.Utilities import utilities | ||
from Tribler.Core.Utilities.install_dir import determine_install_dir | ||
from Tribler.Core.exceptions import OperationNotPossibleAtRuntimeException | ||
from Tribler.Core.simpledefs import DLSTATUS_SEEDING, NTFY_TORRENTS, NTFY_UPDATE, NTFY_CHANNELCAST | ||
from Tribler.Main.globals import DefaultDownloadStartupConfig | ||
from Tribler.Policies.BoostingPolicy import RandomPolicy, CreationDatePolicy, SeederRatioPolicy | ||
from Tribler.Policies.BoostingSource import ChannelSource | ||
from Tribler.Policies.BoostingSource import DirectorySource | ||
from Tribler.Policies.BoostingSource import RSSFeedSource | ||
from Tribler.Policies.defs import NUMBER_TYPES, SAVED_ATTR, CREDIT_MINING_FOLDER_DOWNLOAD, CONFIG_KEY_ARCHIVELIST, \ | ||
CONFIG_OP_RM, CONFIG_OP_ADD, CONFIG_KEY_SOURCELIST, CONFIG_KEY_ENABLEDLIST, CONFIG_KEY_DISABLEDLIST | ||
from Tribler.Policies.credit_mining_util import source_to_string, string_to_source, compare_torrents | ||
from Tribler.Policies.defs import SAVED_ATTR, CREDIT_MINING_FOLDER_DOWNLOAD, CONFIG_KEY_ARCHIVELIST, \ | ||
CONFIG_KEY_SOURCELIST, CONFIG_KEY_ENABLEDLIST, CONFIG_KEY_DISABLEDLIST | ||
from Tribler.dispersy.taskmanager import TaskManager | ||
|
||
|
||
def levenshtein_dist(t1_fname, t2_fname): | ||
""" | ||
Calculates the Levenshtein distance between a and b. | ||
""" | ||
len_t1_fname, len_t2_fname = len(t1_fname), len(t2_fname) | ||
if len_t1_fname > len_t2_fname: | ||
# Make sure len_t1_fname <= len_t2_fname, to use O(min(len_t1_fname,len_t2_fname)) space | ||
t1_fname, t2_fname = t2_fname, t1_fname | ||
len_t1_fname, len_t2_fname = len_t2_fname, len_t1_fname | ||
|
||
current = range(len_t1_fname + 1) | ||
for i in range(1, len_t2_fname + 1): | ||
previous, current = current, [i] + [0] * len_t1_fname | ||
for j in range(1, len_t1_fname + 1): | ||
add, delete = previous[j] + 1, current[j - 1] + 1 | ||
change = previous[j - 1] | ||
if t1_fname[j - 1] != t2_fname[i - 1]: | ||
change += 1 | ||
current[j] = min(add, delete, change) | ||
|
||
return current[len_t1_fname] | ||
|
||
def source_to_string(source_obj): | ||
return hexlify(source_obj) if len(source_obj) == 20 and not (source_obj.startswith('https://') | ||
or source_obj.startswith('https://')) else source_obj | ||
|
||
def string_to_source(source_str): | ||
return source_str.decode('hex') \ | ||
if len(source_str) == 40 and not (os.path.isdir(source_str) or source_str.startswith('https://')) else source_str | ||
|
||
|
||
class BoostingSettings(object): | ||
""" | ||
Class contains settings on boosting manager | ||
This class contains settings used by the boosting manager | ||
""" | ||
def __init__(self, session, policy=SeederRatioPolicy, load_config=True): | ||
self.session = session | ||
|
@@ -73,7 +41,7 @@ def __init__(self, session, policy=SeederRatioPolicy, load_config=True): | |
self.source_interval = 100 | ||
self.swarm_interval = 100 | ||
|
||
# Can't be changed in runtime | ||
# Can't be changed on runtime | ||
self.tracker_interval = 200 | ||
self.logging_interval = 60 | ||
self.share_mode_target = 3 | ||
|
@@ -154,7 +122,7 @@ def del_instance(): | |
|
||
def shutdown(self): | ||
""" | ||
save configuration before stopping stuffs | ||
Shutting down boosting manager. It also stops and remove all the sources. | ||
""" | ||
self.save_config() | ||
self._logger.info("Shutting down boostingmanager") | ||
|
@@ -170,8 +138,9 @@ def shutdown(self): | |
self.session.lm.threadpool.cancel_pending_task("CreditMining_log_init") | ||
|
||
self.cancel_all_pending_tasks() | ||
# for torrent in self.torrents.itervalues(): | ||
# self.stop_download(torrent) | ||
|
||
# remove credit mining downloaded data | ||
shutil.rmtree(self.settings.credit_mining_path, ignore_errors=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is always deleting the data, not just when not in persistent mode. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, currently boosting manager only in not persistent mode. I just left the note for future consideration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, why not make a persistent mode attribute in BoostingManager and still wrap this inside an if-statement that checks this persistent mode attribute? I think it's confusing right now since the comment is not in sync with the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed the comment to |
||
|
||
def get_source_object(self, sourcekey): | ||
return self.boosting_sources.get(sourcekey, None) | ||
|
@@ -205,6 +174,7 @@ def add_source(self, source): | |
try: | ||
isdir = os.path.isdir(source) | ||
except TypeError: | ||
# this handle binary data that has null bytes '\00' | ||
isdir = False | ||
|
||
if isdir: | ||
|
@@ -242,23 +212,8 @@ def remove_source(self, source_key): | |
def on_torrent_insert(self, source, infohash, torrent): | ||
""" | ||
This function called when a source is finally determined. Fetch some torrents from it, | ||
then insert it to our data | ||
then insert it into our data | ||
""" | ||
def compare_torrents(torrent_1, torrent_2): | ||
""" | ||
comparing swarms. We don't want to download same swarm with different infohash | ||
:return: whether those t1 and t2 similar enough | ||
""" | ||
files1 = [files for files in torrent_1['metainfo'].get_files_with_length() if files[1] > 1024 * 1024] | ||
files2 = [files for files in torrent_2['metainfo'].get_files_with_length() if files[1] > 1024 * 1024] | ||
|
||
if len(files1) == len(files2): | ||
for ft1 in files1: | ||
for ft2 in files2: | ||
if ft1[1] != ft2[1] or levenshtein_dist(ft1[0], ft2[0]) > 5: | ||
return False | ||
return True | ||
return False | ||
|
||
# Remember where we got this torrent from | ||
self._logger.debug("remember torrent %s from %s", torrent, source_to_string(source)) | ||
|
@@ -546,8 +501,8 @@ def log_statistics(self): | |
|
||
if unhexlify(str(status.info_hash)) in self.torrents: | ||
self._logger.debug("Status for %s : %s %s | ul_lim : %d, max_ul %d, maxcon %d", status.info_hash, | ||
status.all_time_download, | ||
status.all_time_upload) | ||
status.all_time_download, status.all_time_upload, lt_torrent.upload_limit(), | ||
lt_torrent.max_uploads(), lt_torrent.max_connections()) | ||
|
||
# piece_priorities will fail in libtorrent 1.0.9 | ||
if lt.version == '1.0.9.0': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it only fail in libtorrent 1.0.9? What about older/newer versions of lt? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIK, it works, it's so weird. |
||
|
@@ -564,7 +519,10 @@ def log_statistics(self): | |
|
||
def update_torrent_stats(self, torrent_infohash_str, seeding_stats): | ||
""" | ||
function to update swarm statistics | ||
function to update swarm statistics. | ||
|
||
This function called when we get new Downloadstate for active torrents. | ||
Updated downloadstate (seeding_stats) for a particular torrent is stored here. | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how are they updated? Explain a bit what this function does please. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated explanation in the comment |
||
if 'time_seeding' in self.torrents[torrent_infohash_str]['last_seeding_stats']: | ||
if seeding_stats['time_seeding'] >= self.torrents[torrent_infohash_str][ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# coding=utf-8 | ||
""" | ||
Written by Egbert Bouman, Mihai Capotă, Elric Milon, and Ardhi Putra Pratama H | ||
Supported boosting policy | ||
""" | ||
import logging | ||
import random | ||
|
||
|
||
class BoostingPolicy(object): | ||
""" | ||
Base class for determining what swarm selection policy will be applied | ||
""" | ||
|
||
def __init__(self, session): | ||
self.session = session | ||
# function that checks if key can be applied to torrent | ||
self.reverse = None | ||
|
||
self._logger = logging.getLogger(self.__class__.__name__) | ||
|
||
def apply(self, torrents, max_active, force=False): | ||
""" | ||
apply the policy to the torrents stored | ||
""" | ||
sorted_torrents = sorted([torrent for torrent in torrents.itervalues() | ||
if self.key_check(torrent)], | ||
key=self.key, reverse=self.reverse) | ||
|
||
torrents_start = [] | ||
for torrent in sorted_torrents[:max_active]: | ||
if not self.session.get_download(torrent["metainfo"].get_infohash()): | ||
torrents_start.append(torrent) | ||
torrents_stop = [] | ||
for torrent in sorted_torrents[max_active:]: | ||
if self.session.get_download(torrent["metainfo"].get_infohash()): | ||
torrents_stop.append(torrent) | ||
|
||
if force: | ||
return torrents_start, torrents_stop | ||
|
||
# if both results are empty for some reason (e.g, key_check too restrictive) | ||
# or torrent started less than half available torrent (try to keep boosting alive) | ||
# if it's already random, just let it be | ||
if not isinstance(self, RandomPolicy) and ((not torrents_start and not torrents_stop) or | ||
(len(torrents_start) < len(torrents) / 2 and len( | ||
torrents_start) < max_active / 2)): | ||
self._logger.error("Start and stop torrent list are empty. Fallback to Random") | ||
# fallback to random policy | ||
torrents_start, torrents_stop = RandomPolicy(self.session).apply(torrents, max_active) | ||
|
||
return torrents_start, torrents_stop | ||
|
||
def key(self, key): | ||
""" | ||
function to find a key of an object | ||
""" | ||
return None | ||
|
||
def key_check(self, key): | ||
""" | ||
function to check whether a swarm is included to download | ||
""" | ||
return False | ||
|
||
class RandomPolicy(BoostingPolicy): | ||
""" | ||
A credit mining policy that chooses a swarm randomly | ||
""" | ||
def __init__(self, session): | ||
BoostingPolicy.__init__(self, session) | ||
self.reverse = False | ||
|
||
def key_check(self, key): | ||
return True | ||
|
||
def key(self, key): | ||
return random.random() | ||
|
||
|
||
class CreationDatePolicy(BoostingPolicy): | ||
""" | ||
A credit mining policy that chooses swarm by its creation date | ||
|
||
The idea is, older swarms need to be boosted. | ||
""" | ||
def __init__(self, session): | ||
BoostingPolicy.__init__(self, session) | ||
self.reverse = True | ||
|
||
def key_check(self, key): | ||
return key['creation_date'] > 0 | ||
|
||
def key(self, key): | ||
return key['creation_date'] | ||
|
||
|
||
class SeederRatioPolicy(BoostingPolicy): | ||
""" | ||
Default policy. Find the most underseeded swarm to boost. | ||
""" | ||
def __init__(self, session): | ||
BoostingPolicy.__init__(self, session) | ||
self.reverse = False | ||
|
||
def key(self, key): | ||
return key['num_seeders'] / float(key['num_seeders'] + key['num_leechers']) | ||
|
||
def key_check(self, key): | ||
return (key['num_seeders'] + key['num_leechers']) > 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we could move
DefaultDownloadStartupConfig
to the core, we can move theBoostingManager
to the core.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking about it, maybe we should move this class to
Core/Modules
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree. What do you think, @whirm?
Btw, I only used
DefaultDownloadStartupConfig
for the default download location (which in this case inTriblerDownloads
).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ardhipoetra I just created an issue for it, forgot to link it here, see #2243 👍