Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[New scheduler] Fix new scheduler bug #9467

Merged
merged 3 commits into from
Jul 20, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix new scheduler bug
  • Loading branch information
kisuke95 committed Jul 20, 2020
commit 6de02057ec547ddcfe8832bc0004affc6462a9bc
9 changes: 9 additions & 0 deletions src/ray/raylet/node_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,15 @@ void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) {
return;
}

// Remove the client from the resource map.
if (new_scheduler_enabled_) {
if (!new_resource_scheduler_->RemoveNode(node_id.Binary())) {
RAY_LOG(DEBUG) << "Received NodeRemoved callback for an unknown node: " << node_id
<< ".";
return;
}
}

// Remove the node manager client.
const auto client_entry = remote_node_manager_clients_.find(node_id);
if (client_entry != remote_node_manager_clients_.end()) {
Expand Down
9 changes: 8 additions & 1 deletion src/ray/raylet/scheduling/cluster_resource_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,13 @@ bool ClusterResourceScheduler::RemoveNode(int64_t node_id) {
}
}

bool ClusterResourceScheduler::RemoveNode(const std::string &node_id_string) {
auto node_id = string_to_int_map_.Get(node_id_string);
RAY_CHECK(node_id != -1);

return RemoveNode(node_id);
}

int64_t ClusterResourceScheduler::IsSchedulable(const TaskRequest &task_req,
int64_t node_id,
const NodeResources &resources) {
Expand Down Expand Up @@ -929,7 +936,7 @@ bool ClusterResourceScheduler::AllocateResourceInstances(
(*allocation)[i] = remaining_demand;
return true;
} else {
(*allocation)[i] = available[i];
(*allocation)[i] += available[i];
remaining_demand -= available[i];
available[i] = 0;
}
Expand Down
1 change: 1 addition & 0 deletions src/ray/raylet/scheduling/cluster_resource_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ class ClusterResourceScheduler {
///
/// \param ID of the node to be removed.
bool RemoveNode(int64_t node_id);
bool RemoveNode(const std::string &node_id_string);

/// Check whether a task request can be scheduled given a node.
///
Expand Down