Skip to content

Commit

Permalink
bug: project delete (#1762)
Browse files Browse the repository at this point in the history
* bug: complete restart projects on delete

* tests: update to latest

Remove failed test that can no longer be simulated and update a test
that is now handled successfully.

* refactor: allow provisioner to be mocked
  • Loading branch information
chesedo committed May 7, 2024
1 parent 399b525 commit cf701a7
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 90 deletions.
1 change: 1 addition & 0 deletions backends/src/test_utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod gateway;
pub mod provisioner;
pub mod resource_recorder;
51 changes: 51 additions & 0 deletions backends/src/test_utils/provisioner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use std::net::{Ipv4Addr, SocketAddr};

use async_trait::async_trait;
use portpicker::pick_unused_port;
use shuttle_proto::provisioner::{
provisioner_server::{Provisioner, ProvisionerServer},
DatabaseDeletionResponse, DatabaseRequest, DatabaseResponse, Ping, Pong,
};
use tonic::transport::Server;

struct ProvisionerMock;

#[async_trait]
impl Provisioner for ProvisionerMock {
async fn provision_database(
&self,
_request: tonic::Request<DatabaseRequest>,
) -> Result<tonic::Response<DatabaseResponse>, tonic::Status> {
panic!("no run tests should request a db");
}

async fn delete_database(
&self,
_request: tonic::Request<DatabaseRequest>,
) -> Result<tonic::Response<DatabaseDeletionResponse>, tonic::Status> {
panic!("no run tests should delete a db");
}

async fn health_check(
&self,
_request: tonic::Request<Ping>,
) -> Result<tonic::Response<Pong>, tonic::Status> {
panic!("no run tests should do a health check");
}
}

/// Start a mocked provisioner and return the port it started on
pub async fn get_mocked_provisioner() -> u16 {
let provisioner = ProvisionerMock;

let port = pick_unused_port().unwrap();
let provisioner_addr = SocketAddr::new(Ipv4Addr::UNSPECIFIED.into(), port);
tokio::spawn(async move {
Server::builder()
.add_service(ProvisionerServer::new(provisioner))
.serve(provisioner_addr)
.await
});

port
}
13 changes: 2 additions & 11 deletions backends/src/test_utils/resource_recorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl ResourceRecorder for MockedResourceRecorder {
&self,
request: Request<RecordRequest>,
) -> Result<Response<ResultResponse>, Status> {
println!("recording resources");
println!("recording resources: {request:?}");

let RecordRequest {
project_id,
Expand Down Expand Up @@ -149,14 +149,6 @@ impl ResourceRecorder for MockedResourceRecorder {
r#type,
} = request.into_inner();

// Fail to delete a metadata resource if requested
if r#type == "metadata" {
return Ok(Response::new(ResultResponse {
success: false,
message: Default::default(),
}));
}

self.resources.lock().unwrap().retain(|r| {
!(r.project_id == project_id && r.service_id == service_id && r.r#type == r#type)
});
Expand All @@ -169,8 +161,7 @@ impl ResourceRecorder for MockedResourceRecorder {
}

/// Start a mocked resource recorder and return the port it started on
/// This mock will function like a normal resource recorder. However, it will always fail to delete metadata resources
/// if any tests need to simulate a failure.
/// This mock will function like a normal resource recorder.
pub async fn get_mocked_resource_recorder() -> u16 {
let resource_recorder = MockedResourceRecorder {
resources: Mutex::new(Vec::new()),
Expand Down
2 changes: 1 addition & 1 deletion common/src/models/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub const GIT_STRINGS_MAX_LENGTH: usize = 80;
pub const CREATE_SERVICE_BODY_LIMIT: usize = 50_000_000;
const GIT_OPTION_NONE_TEXT: &str = "N/A";

#[derive(Deserialize, Serialize)]
#[derive(Deserialize, Serialize, Debug)]
pub struct Response {
pub id: Uuid,
pub service_id: String,
Expand Down
89 changes: 32 additions & 57 deletions gateway/src/api/latest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use tokio::sync::mpsc::Sender;
use tokio::sync::{Mutex, MutexGuard};
use tower::ServiceBuilder;
use tower_http::cors::CorsLayer;
use tracing::{error, field, instrument, trace, Span};
use tracing::{debug, error, field, info, instrument, trace, warn, Span};
use ttl_cache::TtlCache;
use ulid::Ulid;
use uuid::Uuid;
Expand Down Expand Up @@ -309,49 +309,30 @@ async fn delete_project(

let project_id = Ulid::from_string(&project.id).expect("stored project id to be a valid ULID");

// Try to startup destroyed, errored or outdated projects
let project_deletable = project.state.is_ready() || project.state.is_stopped();
let current_version: semver::Version = env!("CARGO_PKG_VERSION")
.parse()
.expect("to have a valid semver gateway version");

let version = project
.state
.container()
.and_then(|container_inspect_response| {
container_inspect_response.image.and_then(|inner| {
inner
.strip_prefix("public.ecr.aws/shuttle/deployer:v")
.and_then(|x| x.parse::<semver::Version>().ok())
})
})
// Defaulting to a version that introduced a breaking change.
// This was the last one that introduced it at the present
// moment.
.unwrap_or(semver::Version::new(0, 39, 0));

// We restart the project before deletion everytime
// we detect it is outdated, so that we avoid by default
// breaking changes that can happen on the deployer
// side in the future.
if !project_deletable || version < current_version {
let handle = state
.service
.new_task()
.project(project_name.clone())
.and_then(task::restart(project_id))
.and_then(task::run_until_done())
.send(&state.sender)
.await?;
let handle = state
.service
.new_task()
.project(project_name.clone())
.and_then(task::destroy()) // This destroy might only recover the project from an errored state
.and_then(task::run_until_destroyed())
.and_then(task::restart(project_id))
.and_then(task::run_until_ready())
.and_then(task::destroy())
.and_then(task::run_until_destroyed())
.and_then(task::restart(project_id))
.and_then(task::run_until_ready())
.send(&state.sender)
.await?;

// Wait for the project to be ready
handle.await;
// Wait for the project to be ready
handle.await;

let new_state = state.service.find_project_by_name(&project_name).await?;
let new_state = state.service.find_project_by_name(&project_name).await?;

if !new_state.state.is_ready() {
return Err(ProjectCorrupted.into());
}
if !new_state.state.is_ready() {
warn!(state = ?new_state.state, "failed to restart project");
return Err(ProjectCorrupted.into());
}

let service = state.service.clone();
Expand All @@ -360,8 +341,10 @@ async fn delete_project(
let project_caller =
ProjectCaller::new(state.clone(), scoped_user.clone(), req.headers()).await?;

trace!("getting deployments");
// check that a deployment is not running
let mut deployments = project_caller.get_deployment_list().await?;
debug!(?deployments, "got deployments");
deployments.sort_by_key(|d| d.last_update);

// Make sure no deployment is in the building pipeline
Expand All @@ -376,6 +359,7 @@ async fn delete_project(
});

if has_bad_state {
warn!("has bad state");
return Err(ProjectHasBuildingDeployment.into());
}

Expand All @@ -384,6 +368,7 @@ async fn delete_project(
.filter(|d| d.state == deployment::State::Running);

for running_deployment in running_deployments {
info!(%running_deployment, "stopping running deployment");
let res = project_caller
.stop_deployment(&running_deployment.id)
.await?;
Expand All @@ -393,11 +378,13 @@ async fn delete_project(
}
}

trace!("getting resources");
// check if any resources exist
let resources = project_caller.get_resources().await?;
let mut delete_fails = Vec::new();

for resource in resources {
info!(?resource, "deleting resource");
let resource_type = resource.r#type.to_string();
let res = project_caller.delete_resource(&resource_type).await?;

Expand All @@ -410,6 +397,7 @@ async fn delete_project(
return Err(ProjectHasResources(delete_fails).into());
}

trace!("deleting container");
let task = service
.new_task()
.project(project_name.clone())
Expand All @@ -418,6 +406,7 @@ async fn delete_project(
.await?;
task.await;

trace!("removing project from state");
service.delete_project(&project_name).await?;

Ok(AxumJson("project successfully deleted".to_owned()))
Expand Down Expand Up @@ -692,7 +681,7 @@ async fn get_status(
};

// Compute provisioner status.
let provisioner_status = if let Ok(channel) = service.provisioner_host().connect().await {
let provisioner_status = if let Ok(channel) = service.provisioner_uri().connect().await {
let channel = ServiceBuilder::new().service(channel);
let mut provisioner_client = ProvisionerClient::new(channel);
if provisioner_client.health_check(Ping {}).await.is_ok() {
Expand Down Expand Up @@ -1740,20 +1729,6 @@ pub mod tests {
);
}

#[test_context(TestProject)]
#[tokio::test]
async fn api_delete_project_that_has_resources_but_fails_to_remove_them(
project: &mut TestProject,
) {
project.deploy("../examples/axum/metadata").await;
project.stop_service().await;

assert_eq!(
project.router_call(Method::DELETE, "/delete").await,
StatusCode::INTERNAL_SERVER_ERROR
);
}

#[test_context(TestProject)]
#[tokio::test]
async fn api_delete_project_that_has_running_deployment(project: &mut TestProject) {
Expand All @@ -1771,11 +1746,11 @@ pub mod tests {
project.just_deploy("../examples/axum/hello-world").await;

// Wait a bit to it to progress in the queue
sleep(Duration::from_secs(2)).await;
sleep(Duration::from_secs(10)).await;

assert_eq!(
project.router_call(Method::DELETE, "/delete").await,
StatusCode::BAD_REQUEST
StatusCode::OK
);
}

Expand Down
4 changes: 2 additions & 2 deletions gateway/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ pub struct ServiceArgs {
pub prefix: String,
/// The address at which an active runtime container will find
/// the provisioner service
#[arg(long, default_value = "provisioner")]
pub provisioner_host: String,
#[arg(long, default_value = "http:https://provisioner:8000")]
pub provisioner_uri: String,
/// Address to reach the authentication service at
#[arg(long, default_value = "http:https://127.0.0.1:8008")]
pub auth_uri: Uri,
Expand Down
8 changes: 4 additions & 4 deletions gateway/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ pub mod tests {
use ring::signature::{self, Ed25519KeyPair, KeyPair};
use shuttle_backends::auth::ConvertResponse;
use shuttle_backends::test_utils::gateway::PermissionsMock;
use shuttle_backends::test_utils::provisioner::get_mocked_provisioner;
use shuttle_backends::test_utils::resource_recorder::get_mocked_resource_recorder;
use shuttle_common::claims::{AccountTier, Claim};
use shuttle_common::models::deployment::DeploymentRequest;
Expand Down Expand Up @@ -399,6 +400,7 @@ pub mod tests {
let auth: SocketAddr = format!("0.0.0.0:{auth_port}").parse().unwrap();
let auth_uri: Uri = format!("http:https://{auth}").parse().unwrap();
let resource_recorder_port = get_mocked_resource_recorder().await;
let provisioner_port = get_mocked_provisioner().await;

let auth_service = AuthService::new(auth);
auth_service
Expand All @@ -418,8 +420,6 @@ pub mod tests {
let network_name =
env::var("SHUTTLE_TESTS_NETWORK").unwrap_or_else(|_| "shuttle_default".to_string());

let provisioner_host = "provisioner".to_string();

let docker_host = "/var/run/docker.sock".to_string();

let args = StartArgs {
Expand All @@ -432,9 +432,9 @@ pub mod tests {
docker_host,
image,
prefix,
provisioner_host,
provisioner_uri: format!("http:https://host.docker.internal:{provisioner_port}"),
// The started containers need to reach auth on the host.
// For this to work, the firewall should not be blocking traffic on the `SHUTTLE_TEST_NETWORK` interface.
// For this to work, the firewall should not be blocking traffic on the `SHUTTLE_TESTS_NETWORK` interface.
// The following command can be used on NixOs to allow traffic on the interface.
// ```
// sudo iptables -I nixos-fw -i <interface> -j nixos-fw-accept
Expand Down
6 changes: 3 additions & 3 deletions gateway/src/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ impl ProjectCreating {
let ContainerSettings {
image: default_image,
prefix,
provisioner_host,
provisioner_uri,
auth_uri,
resource_recorder_uri,
extra_hosts,
Expand Down Expand Up @@ -902,7 +902,7 @@ impl ProjectCreating {
"--api-address",
format!("0.0.0.0:{RUNTIME_API_PORT}"),
"--provisioner-address",
format!("http:https://{provisioner_host}:8000"),
provisioner_uri,
"--artifacts-path",
"/opt/shuttle",
"--state",
Expand Down Expand Up @@ -1732,7 +1732,7 @@ where

#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct ProjectDestroyed {
destroyed: Option<ContainerInspectResponse>,
pub destroyed: Option<ContainerInspectResponse>,
}

#[async_trait]
Expand Down
Loading

0 comments on commit cf701a7

Please sign in to comment.