diff --git a/.circleci/config.yml b/.circleci/config.yml index cd3c922446..731e2578c7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,7 @@ jobs: build-api: working_directory: ~/marquez machine: - image: ubuntu-2004:202010-01 + image: ubuntu-2004:current environment: TESTCONTAINERS_RYUK_DISABLED: true steps: @@ -44,7 +44,8 @@ jobs: build-image-api: working_directory: ~/marquez - machine: true + machine: + image: ubuntu-2004:current steps: - checkout - run: docker build --no-cache --tag "marquezproject/marquez:${CIRCLE_SHA1}" . @@ -54,7 +55,8 @@ jobs: build-image-web: working_directory: ~/marquez/web - machine: true + machine: + image: ubuntu-2004:current steps: - *checkout_project_root - run: docker build --no-cache --tag "marquezproject/marquez-web:${CIRCLE_SHA1}" . @@ -85,7 +87,7 @@ jobs: build-client-java: working_directory: ~/marquez machine: - image: ubuntu-2004:202010-01 + image: ubuntu-2004:current steps: - checkout - restore_cache: @@ -142,7 +144,7 @@ jobs: release-java: working_directory: ~/marquez machine: - image: ubuntu-2004:202010-01 + image: ubuntu-2004:current steps: - checkout - run: ./.circleci/get-jdk17.sh @@ -165,7 +167,8 @@ jobs: release-docker: working_directory: ~/marquez - machine: true + machine: + image: ubuntu-2004:current steps: - checkout - run: ./docker/login.sh diff --git a/.github/workflows/test-chart.yaml b/.github/workflows/test-chart.yaml index 8779a0bf48..36584d84ee 100644 --- a/.github/workflows/test-chart.yaml +++ b/.github/workflows/test-chart.yaml @@ -11,20 +11,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Setup Helm - uses: azure/setup-helm@v2.0 + uses: azure/setup-helm@v2.1 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: 3.7 - name: Setup chart-testing - uses: helm/chart-testing-action@v2.2.0 + uses: helm/chart-testing-action@v2.2.1 - name: Run chart-testing (list-changed) id: list-changed diff --git a/CHANGELOG.md b/CHANGELOG.md index 977d34e8c1..fbaad1b95b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,26 @@ # Changelog -## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.21.0...HEAD) +## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.22.0...HEAD) + +## [0.22.0](https://github.com/MarquezProject/marquez/compare/0.21.0...0.22.0) - 2022-05-16 + +### Added + +* Add support for `LifecycleStateChangeFacet` with an ability to softly delete datasets [#1847](https://github.com/MarquezProject/marquez/pull/1847)[@pawel-big-lebowski](https://github.com/pawel-big-lebowski) +* Enable pod specific annotations in Marquez Helm Chart via `marquez.podAnnotations` [#1945](https://github.com/MarquezProject/marquez/pull/1945) [@wslulciuc](https://github.com/wslulciuc) +* Add support for job renaming/redirection via symlink [#1947](https://github.com/MarquezProject/marquez/pull/1947) [@collado-mike](https://github.com/collado-mike) +* Add `Created by` view for dataset versions along with SQL syntax highlighting in web UI [#1929](https://github.com/MarquezProject/marquez/pull/1929) [@phixMe](https://github.com/phixMe) +* Add `operationId` to openapi spec [#1978](https://github.com/MarquezProject/marquez/pull/1978) [@phixMe](https://github.com/phixMe) + +### Changed + +* Upgrade Flyway to v7.6.0 [#1974](https://github.com/MarquezProject/marquez/pull/1974) [@dakshin-k](https://github.com/dakshin-k) + +### Fixed + +* Remove size limits on namespaces, dataset names, and and source connection urls [#1925](https://github.com/MarquezProject/marquez/pull/1925) [@collado-mike](https://github.com/collado-mike) +* Update namespace names to allow `=`, `@`, and `;` [#1936](https://github.com/MarquezProject/marquez/pull/1936) [@mobuchowski](https://github.com/mobuchowski) +* Time duration display in web UI [#1950](https://github.com/MarquezProject/marquez/pull/1950) [@phixMe](https://github.com/phixMe) ## [0.21.0](https://github.com/MarquezProject/marquez/compare/0.20.0...0.21.0) - 2022-03-03 diff --git a/RELEASING.md b/RELEASING.md index 6ba83297b5..53c56f2352 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -18,4 +18,16 @@ 6. Before closing the project board for the _current_ release, move any open issues to the project board created in **step 5** 7. Draft a [new release](https://github.com/MarquezProject/marquez/releases/new) using the release notes for `X.Y.Z` in **step 1** as the release description: - ![](./docs/assets/images/new-release.png) \ No newline at end of file + ![](./docs/assets/images/new-release.png) + +# Voting on Releases + +Anyone may request a new release of the project in the #general Slack channel. + +After one is proposed, committers have 48 hours to give a +1 or -1. + +A total of three +1s, taking into account -1s and excluding votes by the proposer, authorize the release. + +Alternatively, if after 2 days the release has received at least one +1 and no -1s, the release is also authorized. + +If the proposed release receives no +1s in two days, it is not authorized and the proposer must make a new request to reset the clock. \ No newline at end of file diff --git a/api/build.gradle b/api/build.gradle index bf7435b928..03e8747158 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -21,10 +21,10 @@ plugins { } ext { - jdbi3Version = '3.27.2' + jdbi3Version = '3.28.0' prometheusVersion = '0.15.0' - testcontainersVersion = '1.16.3' - sentryVersion = '5.6.1' + testcontainersVersion = '1.17.1' + sentryVersion = '5.7.3' } dependencies { @@ -42,12 +42,12 @@ dependencies { implementation "org.jdbi:jdbi3-core:${jdbi3Version}" implementation "org.jdbi:jdbi3-postgres:${jdbi3Version}" implementation "org.jdbi:jdbi3-sqlobject:${jdbi3Version}" - implementation 'com.google.guava:guava:31.0.1-jre' - implementation 'org.dhatim:dropwizard-sentry:2.0.28-8' + implementation 'com.google.guava:guava:31.1-jre' + implementation 'org.dhatim:dropwizard-sentry:2.0.29' implementation "io.sentry:sentry:${sentryVersion}" - implementation 'org.flywaydb:flyway-core:6.5.7' + implementation 'org.flywaydb:flyway-core:8.5.10' implementation "org.postgresql:postgresql:${postgresqlVersion}" - implementation 'com.graphql-java:graphql-java:17.3' + implementation 'com.graphql-java:graphql-java:18.0' implementation 'com.graphql-java-kickstart:graphql-java-servlet:12.0.0' testImplementation "io.dropwizard:dropwizard-testing:${dropwizardVersion}" diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 5ca39396f5..1697acfd61 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -23,6 +23,7 @@ import javax.sql.DataSource; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import marquez.api.filter.JobRedirectFilter; import marquez.cli.SeedCommand; import marquez.common.Utils; import marquez.db.DbMigration; @@ -115,9 +116,10 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { env.jersey().register(new TracingContainerResponseFilter()); } - registerResources(config, env, source); + MarquezContext marquezContext = buildMarquezContext(config, env, (ManagedDataSource) source); + registerResources(config, env, marquezContext); registerServlets(env); - registerFilters(env); + registerFilters(env, marquezContext); } private boolean isSentryEnabled(MarquezConfig config) { @@ -126,11 +128,26 @@ private boolean isSentryEnabled(MarquezConfig config) { } public void registerResources( - @NonNull MarquezConfig config, @NonNull Environment env, @NonNull DataSource source) { + @NonNull MarquezConfig config, @NonNull Environment env, MarquezContext context) { + + if (config.getGraphql().isEnabled()) { + env.servlets() + .addServlet("api/v1-beta/graphql", context.getGraphqlServlet()) + .addMapping("/api/v1-beta/graphql", "/api/v1/schema.json"); + } + + log.debug("Registering resources..."); + for (final Object resource : context.getResources()) { + env.jersey().register(resource); + } + } + + private MarquezContext buildMarquezContext( + MarquezConfig config, Environment env, ManagedDataSource source) { final JdbiFactory factory = new JdbiFactory(); final Jdbi jdbi = factory - .build(env, config.getDataSourceFactory(), (ManagedDataSource) source, DB_POSTGRES) + .build(env, config.getDataSourceFactory(), source, DB_POSTGRES) .installPlugin(new SqlObjectPlugin()) .installPlugin(new PostgresPlugin()); SqlLogger sqlLogger = new InstrumentedSqlLogger(env.metrics()); @@ -141,17 +158,7 @@ public void registerResources( final MarquezContext context = MarquezContext.builder().jdbi(jdbi).tags(config.getTags()).build(); - - if (config.getGraphql().isEnabled()) { - env.servlets() - .addServlet("api/v1-beta/graphql", context.getGraphqlServlet()) - .addMapping("/api/v1-beta/graphql", "/api/v1/schema.json"); - } - - log.debug("Registering resources..."); - for (final Object resource : context.getResources()) { - env.jersey().register(resource); - } + return context; } private void registerServlets(@NonNull Environment env) { @@ -161,7 +168,10 @@ private void registerServlets(@NonNull Environment env) { env.servlets().addServlet(PROMETHEUS, new MetricsServlet()).addMapping(PROMETHEUS_ENDPOINT); } - private void registerFilters(@NonNull Environment env) { + private void registerFilters(@NonNull Environment env, MarquezContext marquezContext) { env.jersey().getResourceConfig().register(new LoggingMdcFilter()); + env.jersey() + .getResourceConfig() + .register(new JobRedirectFilter(marquezContext.getJobService())); } } diff --git a/api/src/main/java/marquez/api/filter/JobRedirectFilter.java b/api/src/main/java/marquez/api/filter/JobRedirectFilter.java new file mode 100644 index 0000000000..d60614b53d --- /dev/null +++ b/api/src/main/java/marquez/api/filter/JobRedirectFilter.java @@ -0,0 +1,92 @@ +package marquez.api.filter; + +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import javax.ws.rs.container.ContainerRequestContext; +import javax.ws.rs.container.ContainerRequestFilter; +import javax.ws.rs.core.MultivaluedHashMap; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.UriBuilder; +import lombok.extern.slf4j.Slf4j; +import marquez.common.models.JobId; +import marquez.db.models.JobRow; +import marquez.service.JobService; +import marquez.service.models.Job; +import org.glassfish.jersey.server.ExtendedUriInfo; +import org.glassfish.jersey.uri.UriComponent; +import org.glassfish.jersey.uri.UriComponent.Type; +import org.glassfish.jersey.uri.UriTemplate; + +/** + * Filters requests that reference a job that has been symlinked to another job. This filter + * redirects such requests to the URL with the symlink target's name using a 301 status code. + */ +@Slf4j +public class JobRedirectFilter implements ContainerRequestFilter { + + public static final String JOB_PATH_PARAM = "job"; + public static final String NAMESPACE_PATH_PARAM = "namespace"; + private final JobService jobService; + + public JobRedirectFilter(JobService jobService) { + this.jobService = jobService; + } + + @Override + public void filter(ContainerRequestContext requestContext) throws IOException { + MultivaluedMap pathParams = requestContext.getUriInfo().getPathParameters(); + if (!pathParams.containsKey(NAMESPACE_PATH_PARAM) || !pathParams.containsKey(JOB_PATH_PARAM)) { + return; + } + List namespaceParams = pathParams.get(NAMESPACE_PATH_PARAM); + List jobParams = pathParams.get(JOB_PATH_PARAM); + if (namespaceParams.isEmpty() || jobParams.isEmpty()) { + return; + } + Optional job = jobService.findJobByName(namespaceParams.get(0), jobParams.get(0)); + job.ifPresent( + j -> { + if (!j.getName().getValue().equals(jobParams.get(0))) { + log.info( + "Job {}.{} has been redirected to {}.{}", + namespaceParams.get(0), + jobParams.get(0), + j.getNamespace().getValue(), + j.getName().getValue()); + URI location = buildLocationFor(requestContext, j.getId()); + log.debug("Redirecting to url {}", location); + requestContext.abortWith(Response.status(301).location(location).build()); + } + }); + } + + /** + * Construct a URI from a Request's matched resource, replacing the {@value #JOB_PATH_PARAM} and + * {@value #NAMESPACE_PATH_PARAM} parameters with the fully-qualified values from the provided + * {@link JobRow}. + * + * @param ctx + * @param jobId + * @return + */ + private URI buildLocationFor(ContainerRequestContext ctx, JobId jobId) { + Object resource = ctx.getUriInfo().getMatchedResources().get(0); + MultivaluedMap pathParameters = ctx.getUriInfo().getPathParameters(); + MultivaluedHashMap copy = new MultivaluedHashMap<>(pathParameters); + copy.putSingle( + JOB_PATH_PARAM, UriComponent.encode(jobId.getName().getValue(), Type.PATH_SEGMENT)); + copy.putSingle( + NAMESPACE_PATH_PARAM, + UriComponent.encode(jobId.getNamespace().getValue(), Type.PATH_SEGMENT)); + Map singletonMap = new HashMap<>(); + copy.forEach((k, v) -> singletonMap.put(k, v.get(0))); + UriTemplate pathTemplate = ((ExtendedUriInfo) ctx.getUriInfo()).getMatchedTemplates().get(0); + String newPath = pathTemplate.createURI(singletonMap); + return UriBuilder.fromResource(resource.getClass()).path(newPath).buildFromEncodedMap(copy); + } +} diff --git a/api/src/main/java/marquez/common/Utils.java b/api/src/main/java/marquez/common/Utils.java index 2def865580..e2f79ef900 100644 --- a/api/src/main/java/marquez/common/Utils.java +++ b/api/src/main/java/marquez/common/Utils.java @@ -204,6 +204,7 @@ public static Version newJobVersionFor( * @param sourceName The source name of the dataset. * @param physicalName The physical name of the dataset. * @param datasetName The dataset name. + * @param lifecycleState The dataset change like CREATE, DROP, TRUNCATE. * @param fields The fields of the dataset. * @param runId The UUID of the run linked to the dataset. * @return A {@link Version} object based on the specified job meta. @@ -213,6 +214,7 @@ public static Version newDatasetVersionFor( String sourceName, String physicalName, String datasetName, + String lifecycleState, List fields, UUID runId) { DatasetVersionData data = @@ -221,6 +223,7 @@ public static Version newDatasetVersionFor( .sourceName(sourceName) .physicalName(physicalName) .datasetName(datasetName) + .lifecycleState(lifecycleState) .schemaFields(fields) .runId(runId) .build(); @@ -259,6 +262,7 @@ private static Version newDatasetVersionFor(DatasetVersionData data) { data.getPhysicalName(), data.getSchemaLocation(), data.getFields().stream().map(Utils::joinField).collect(joining(VERSION_DELIM)), + data.getLifecycleState(), data.getRunId()) .getBytes(UTF_8); return Version.of(UUID.nameUUIDFromBytes(bytes)); @@ -275,6 +279,7 @@ private static class DatasetVersionData { private String sourceName; private String physicalName; private String datasetName; + private String lifecycleState; private String schemaLocation; private Set> fields; private UUID runId; diff --git a/api/src/main/java/marquez/common/models/NamespaceName.java b/api/src/main/java/marquez/common/models/NamespaceName.java index 50ff36be50..0db8429bd4 100644 --- a/api/src/main/java/marquez/common/models/NamespaceName.java +++ b/api/src/main/java/marquez/common/models/NamespaceName.java @@ -21,7 +21,7 @@ public final class NamespaceName { private static final int MIN_SIZE = 1; private static final int MAX_SIZE = 1024; private static final Pattern PATTERN = - Pattern.compile(String.format("^[a-zA-Z:/0-9_\\-\\.]{%d,%d}$", MIN_SIZE, MAX_SIZE)); + Pattern.compile(String.format("^[a-zA-Z:;=/0-9_\\-\\.@]{%d,%d}$", MIN_SIZE, MAX_SIZE)); @Getter private final String value; @@ -29,8 +29,8 @@ public NamespaceName(@NonNull final String value) { checkArgument( PATTERN.matcher(value).matches(), "namespace '%s' must contain only letters (a-z, A-Z), numbers (0-9), " - + "underscores (_), dashes (-), colons (:), slashes (/) or dots (.) with a maximum " - + "length of %s characters.", + + "underscores (_), at (@), dashes (-), colons (:), equals (=), semicolons (;), slashes (/) " + + "or dots (.) with a maximum length of %s characters.", value, MAX_SIZE); this.value = value; diff --git a/api/src/main/java/marquez/db/Columns.java b/api/src/main/java/marquez/db/Columns.java index 2be2b56f41..64ff9b86f0 100644 --- a/api/src/main/java/marquez/db/Columns.java +++ b/api/src/main/java/marquez/db/Columns.java @@ -25,6 +25,7 @@ @Slf4j public final class Columns { + private Columns() {} private static final ObjectMapper MAPPER = Utils.getMapper(); @@ -69,13 +70,18 @@ private Columns() {} public static final String TAG_UUIDS = "tag_uuids"; public static final String TAGGED_AT = "tagged_at"; public static final String LAST_MODIFIED_AT = "last_modified_at"; + public static final String IS_DELETED = "is_deleted"; /* DATASET VERSION ROW COLUMNS */ public static final String FIELD_UUIDS = "field_uuids"; + public static final String LIFECYCLE_STATE = "lifecycle_state"; /* STREAM VERSION ROW COLUMNS */ public static final String SCHEMA_LOCATION = "schema_location"; + /* JOB ROW COLUMNS */ + public static final String SYMLINK_TARGET_UUID = "symlink_target_uuid"; + /* JOB VERSION I/O ROW COLUMNS */ public static final String INPUT_UUIDS = "input_uuids"; public static final String OUTPUT_UUIDS = "output_uuids"; @@ -160,6 +166,15 @@ public static String stringOrThrow(final ResultSet results, final String column) return results.getString(column); } + public static boolean booleanOrDefault( + final ResultSet results, final String column, final boolean defaultValue) + throws SQLException { + if (results.getObject(column) == null) { + return defaultValue; + } + return results.getBoolean(column); + } + public static int intOrThrow(final ResultSet results, final String column) throws SQLException { if (results.getObject(column) == null) { throw new IllegalArgumentException(); diff --git a/api/src/main/java/marquez/db/DatasetDao.java b/api/src/main/java/marquez/db/DatasetDao.java index 9eac597321..202b3f459e 100644 --- a/api/src/main/java/marquez/db/DatasetDao.java +++ b/api/src/main/java/marquez/db/DatasetDao.java @@ -72,7 +72,7 @@ void updateLastModifiedAt( + " WHERE d.namespace_name = :namespaceName\n" + " AND d.name = :datasetName\n" + "), dataset_runs AS (\n" - + " SELECT d.uuid, d.name, d.namespace_name, dv.run_uuid, event_time, event\n" + + " SELECT d.uuid, d.name, d.namespace_name, dv.run_uuid, dv.lifecycle_state, event_time, event\n" + " FROM selected_datasets d\n" + " INNER JOIN dataset_versions dv ON dv.uuid = d.current_version_uuid\n" + " LEFT JOIN LATERAL (\n" @@ -80,7 +80,7 @@ void updateLastModifiedAt( + " WHERE run_uuid = dv.run_uuid\n" + " ) e ON e.run_uuid = dv.run_uuid\n" + " UNION\n" - + " SELECT d.uuid, d.name, d.namespace_name, rim.run_uuid, event_time, event\n" + + " SELECT d.uuid, d.name, d.namespace_name, rim.run_uuid, lifecycle_state, event_time, event\n" + " FROM selected_datasets d\n" + " INNER JOIN dataset_versions dv ON dv.uuid = d.current_version_uuid\n" + " LEFT JOIN runs_input_mapping rim ON dv.uuid = rim.dataset_version_uuid\n" @@ -89,7 +89,7 @@ void updateLastModifiedAt( + " WHERE run_uuid = rim.run_uuid\n" + " ) e ON e.run_uuid = rim.run_uuid\n" + ")\n" - + "SELECT d.*, dv.fields, sv.schema_location, t.tags, facets\n" + + "SELECT d.*, dv.fields, dv.lifecycle_state, sv.schema_location, t.tags, facets\n" + "FROM selected_datasets d\n" + "LEFT JOIN dataset_versions dv ON d.current_version_uuid = dv.uuid\n" + "LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid\n" @@ -142,7 +142,7 @@ default void setFields(Dataset ds) { + " ORDER BY d.name\n" + " LIMIT :limit OFFSET :offset\n" + "), dataset_runs AS (\n" - + " SELECT d.uuid, d.name, d.namespace_name, dv.run_uuid, event_time, event\n" + + " SELECT d.uuid, d.name, d.namespace_name, dv.run_uuid, dv.lifecycle_state, event_time, event\n" + " FROM selected_datasets d\n" + " INNER JOIN dataset_versions dv ON dv.uuid = d.current_version_uuid\n" + " LEFT JOIN LATERAL (\n" @@ -150,7 +150,7 @@ default void setFields(Dataset ds) { + " WHERE run_uuid = dv.run_uuid\n" + " ) e ON e.run_uuid = dv.run_uuid\n" + " UNION\n" - + " SELECT d.uuid, d.name, d.namespace_name, rim.run_uuid, event_time, event\n" + + " SELECT d.uuid, d.name, d.namespace_name, rim.run_uuid, lifecycle_state, event_time, event\n" + " FROM selected_datasets d\n" + " INNER JOIN dataset_versions dv ON dv.uuid = d.current_version_uuid\n" + " LEFT JOIN runs_input_mapping rim ON dv.uuid = rim.dataset_version_uuid\n" @@ -159,7 +159,7 @@ default void setFields(Dataset ds) { + " WHERE run_uuid = rim.run_uuid\n" + " ) e ON e.run_uuid = rim.run_uuid\n" + ")\n" - + "SELECT d.*, dv.fields, sv.schema_location, t.tags, facets\n" + + "SELECT d.*, dv.fields, dv.lifecycle_state, sv.schema_location, t.tags, facets\n" + "FROM selected_datasets d\n" + "LEFT JOIN dataset_versions dv ON d.current_version_uuid = dv.uuid\n" + "LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid\n" @@ -205,7 +205,8 @@ default List findAllWithTags(String namespaceName, int limit, int offse + "source_name, " + "name, " + "physical_name, " - + "description " + + "description, " + + "is_deleted " + ") VALUES ( " + ":uuid, " + ":type, " @@ -217,13 +218,15 @@ default List findAllWithTags(String namespaceName, int limit, int offse + ":sourceName, " + ":name, " + ":physicalName, " - + ":description) " + + ":description, " + + ":isDeleted) " + "ON CONFLICT (namespace_uuid, name) " + "DO UPDATE SET " + "type = EXCLUDED.type, " + "updated_at = EXCLUDED.updated_at, " + "physical_name = EXCLUDED.physical_name, " - + "description = EXCLUDED.description " + + "description = EXCLUDED.description, " + + "is_deleted = EXCLUDED.is_deleted " + "RETURNING *") DatasetRow upsert( UUID uuid, @@ -235,7 +238,8 @@ DatasetRow upsert( String sourceName, String name, String physicalName, - String description); + String description, + boolean isDeleted); @SqlQuery( "INSERT INTO datasets (" @@ -308,7 +312,8 @@ default Dataset upsertDatasetMeta( sourceRow.getName(), datasetName.getValue(), datasetMeta.getPhysicalName().getValue(), - datasetMeta.getDescription().orElse(null)); + datasetMeta.getDescription().orElse(null), + false); } else { datasetRow = upsert( @@ -340,6 +345,7 @@ default Dataset upsertDatasetMeta( now, namespaceName.getValue(), datasetName.getValue(), + null, datasetMeta); return findWithTags(namespaceName.getValue(), datasetName.getValue()).get(); diff --git a/api/src/main/java/marquez/db/DatasetVersionDao.java b/api/src/main/java/marquez/db/DatasetVersionDao.java index ee99d4e53a..521143ff7f 100644 --- a/api/src/main/java/marquez/db/DatasetVersionDao.java +++ b/api/src/main/java/marquez/db/DatasetVersionDao.java @@ -48,6 +48,7 @@ default DatasetVersionRow upsertDatasetVersion( Instant now, String namespaceName, String datasetName, + String lifecycleState, DatasetMeta datasetMeta) { TagDao tagDao = createTagDao(); DatasetFieldDao datasetFieldDao = createDatasetFieldDao(); @@ -63,7 +64,8 @@ default DatasetVersionRow upsertDatasetVersion( datasetMeta.getRunId().map(RunId::getValue).orElse(null), toPgObjectFields(datasetMeta.getFields()), namespaceName, - datasetName); + datasetName, + lifecycleState); updateDatasetVersionMetric( namespaceName, datasetMeta.getType().toString(), @@ -167,7 +169,7 @@ default void updateDatasetVersionMetric( + " FROM selected_dataset_version_runs dv\n" + " LEFT JOIN lineage_events le ON le.run_uuid = dv.run_uuid\n" + ")\n" - + "SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description,\n" + + "SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state, \n" + " dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location,\n" + " t.tags, f.facets\n" + "FROM selected_dataset_versions dv\n" @@ -209,7 +211,7 @@ default void updateDatasetVersionMetric( + " FROM selected_dataset_version_runs dv\n" + " LEFT JOIN lineage_events le ON le.run_uuid = dv.run_uuid\n" + ")\n" - + "SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description,\n" + + "SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state, \n" + " dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location,\n" + " t.tags, f.facets\n" + "FROM selected_dataset_versions dv\n" @@ -280,7 +282,7 @@ default Optional findByWithRun(UUID version) { + " FROM selected_dataset_version_runs dv\n" + " LEFT JOIN lineage_events le ON le.run_uuid = dv.run_uuid\n" + ")\n" - + "SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description,\n" + + "SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,\n" + " dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location,\n" + " t.tags, f.facets\n" + "FROM selected_dataset_versions dv\n" @@ -324,9 +326,9 @@ default List findAllWithRun( @SqlQuery( "INSERT INTO dataset_versions " - + "(uuid, created_at, dataset_uuid, version, run_uuid, fields, namespace_name, dataset_name) " + + "(uuid, created_at, dataset_uuid, version, run_uuid, fields, namespace_name, dataset_name, lifecycle_state) " + "VALUES " - + "(:uuid, :now, :datasetUuid, :version, :runUuid, :fields, :namespaceName, :datasetName) " + + "(:uuid, :now, :datasetUuid, :version, :runUuid, :fields, :namespaceName, :datasetName, :lifecycleState) " + "ON CONFLICT(version) " + "DO UPDATE SET " + "run_uuid = EXCLUDED.run_uuid " @@ -339,7 +341,8 @@ DatasetVersionRow upsert( UUID runUuid, PGobject fields, String namespaceName, - String datasetName); + String datasetName, + String lifecycleState); @SqlUpdate("UPDATE dataset_versions SET fields = :fields WHERE uuid = :uuid") void updateFields(UUID uuid, PGobject fields); diff --git a/api/src/main/java/marquez/db/DbMigration.java b/api/src/main/java/marquez/db/DbMigration.java index 800eb3de45..a0d47d34f6 100644 --- a/api/src/main/java/marquez/db/DbMigration.java +++ b/api/src/main/java/marquez/db/DbMigration.java @@ -7,6 +7,7 @@ import lombok.extern.slf4j.Slf4j; import org.flywaydb.core.Flyway; import org.flywaydb.core.api.FlywayException; +import org.flywaydb.core.api.output.MigrateResult; @Slf4j public final class DbMigration { @@ -31,8 +32,9 @@ public static void migrateDbOrError( // issues before app termination. try { log.info("Migrating database..."); - final int migrations = flyway.migrate(); - log.info("Successfully applied '{}' migrations to database.", migrations); + final MigrateResult migrateResult = flyway.migrate(); + log.info( + "Successfully applied '{}' migrations to database.", migrateResult.migrationsExecuted); } catch (FlywayException errorOnDbMigrate) { log.error("Failed to apply migration to database.", errorOnDbMigrate); try { diff --git a/api/src/main/java/marquez/db/JobDao.java b/api/src/main/java/marquez/db/JobDao.java index 9e49098894..21a3272377 100644 --- a/api/src/main/java/marquez/db/JobDao.java +++ b/api/src/main/java/marquez/db/JobDao.java @@ -48,23 +48,34 @@ public interface JobDao extends BaseDao { void updateVersionFor(UUID rowUuid, Instant updatedAt, UUID currentVersionUuid); @SqlQuery( - "SELECT j.*, jc.context, f.facets\n" - + " FROM jobs AS j\n" - + " LEFT OUTER JOIN job_versions AS jv ON jv.uuid = j.current_version_uuid\n" - + " LEFT OUTER JOIN job_contexts jc ON jc.uuid = j.current_job_context_uuid\n" - + " LEFT OUTER JOIN (\n" - + " SELECT run_uuid, JSON_AGG(e.facets) AS facets\n" - + " FROM (\n" - + " SELECT run_uuid, event->'job'->'facets' AS facets\n" - + " FROM lineage_events AS le\n" - + " INNER JOIN job_versions jv2 ON jv2.latest_run_uuid=le.run_uuid\n" - + " INNER JOIN jobs j2 ON j2.current_version_uuid=jv2.uuid\n" - + " WHERE j2.name=:jobName AND j2.namespace_name=:namespaceName\n" - + " ORDER BY event_time ASC\n" - + " ) e\n" - + " GROUP BY e.run_uuid\n" - + " ) f ON f.run_uuid=jv.latest_run_uuid\n" - + "WHERE j.namespace_name = :namespaceName AND j.name = :jobName") + """ + WITH RECURSIVE job_ids AS ( + SELECT uuid, symlink_target_uuid + FROM jobs j + WHERE j.namespace_name=:namespaceName AND j.name=:jobName + UNION + SELECT j.uuid, j.symlink_target_uuid + FROM jobs j + INNER JOIN job_ids jn ON j.uuid=jn.symlink_target_uuid + ) + SELECT j.*, jc.context, f.facets + FROM jobs j + INNER JOIN job_ids jn ON jn.uuid=j.uuid AND jn.symlink_target_uuid IS NULL + LEFT OUTER JOIN job_versions AS jv ON jv.uuid = j.current_version_uuid + LEFT OUTER JOIN job_contexts jc ON jc.uuid = j.current_job_context_uuid + LEFT OUTER JOIN ( + SELECT run_uuid, JSON_AGG(e.facets) AS facets + FROM ( + SELECT run_uuid, event->'job'->'facets' AS facets + FROM lineage_events AS le + INNER JOIN job_versions jv2 ON jv2.latest_run_uuid=le.run_uuid + INNER JOIN jobs j2 ON j2.current_version_uuid=jv2.uuid + WHERE j2.name=:jobName AND j2.namespace_name=:namespaceName + ORDER BY event_time ASC + ) e + GROUP BY e.run_uuid + ) f ON f.run_uuid=jv.latest_run_uuid + """) Optional findJobByName(String namespaceName, String jobName); default Optional findWithRun(String namespaceName, String jobName) { @@ -78,11 +89,21 @@ default Optional findWithRun(String namespaceName, String jobName) { } @SqlQuery( - "SELECT j.*, n.name AS namespace_name FROM jobs AS j " - + "INNER JOIN namespaces AS n " - + " ON (n.name = :namespaceName AND " - + " j.namespace_uuid = n.uuid AND " - + " j.name = :jobName)") + """ + WITH RECURSIVE job_ids AS ( + SELECT uuid, symlink_target_uuid + FROM jobs j + WHERE j.namespace_name=:namespaceName AND j.name=:jobName + UNION + SELECT j.uuid, j.symlink_target_uuid + FROM jobs j + INNER JOIN job_ids jn ON j.uuid=jn.symlink_target_uuid + ) + SELECT j.*, n.name AS namespace_name + FROM jobs AS j + INNER JOIN job_ids jn ON jn.uuid=j.uuid AND jn.symlink_target_uuid IS NULL + INNER JOIN namespaces AS n ON j.namespace_uuid = n.uuid + """) Optional findJobByNameAsRow(String namespaceName, String jobName); @SqlQuery( @@ -103,14 +124,17 @@ default Optional findWithRun(String namespaceName, String jobName) { + " GROUP BY e.run_uuid\n" + " ) f ON f.run_uuid=jv.latest_run_uuid\n" + "WHERE j.namespace_name = :namespaceName\n" + + "AND j.symlink_target_uuid IS NULL\n" + "ORDER BY j.name " + "LIMIT :limit OFFSET :offset") List findAll(String namespaceName, int limit, int offset); - @SqlQuery("SELECT count(*) FROM jobs AS j") - int count(String namespaceName); + @SqlQuery("SELECT count(*) FROM jobs AS j WHERE symlink_target_uuid IS NULL") + int count(); - @SqlQuery("SELECT count(*) FROM jobs AS j WHERE j.namespace_name = :namespaceName") + @SqlQuery( + "SELECT count(*) FROM jobs AS j WHERE j.namespace_name = :namespaceName\n" + + "AND symlink_target_uuid IS NULL") int countFor(String namespaceName); default List findAllWithRun(String namespaceName, int limit, int offset) { @@ -147,6 +171,15 @@ default void setJobData(Run run, Job j) { default JobRow upsertJobMeta( NamespaceName namespaceName, JobName jobName, JobMeta jobMeta, ObjectMapper mapper) { + return upsertJobMeta(namespaceName, jobName, null, jobMeta, mapper); + } + + default JobRow upsertJobMeta( + NamespaceName namespaceName, + JobName jobName, + UUID symlinkTargetUuid, + JobMeta jobMeta, + ObjectMapper mapper) { Instant createdAt = Instant.now(); NamespaceRow namespace = createNamespaceDao() @@ -170,6 +203,7 @@ default JobRow upsertJobMeta( jobMeta.getDescription().orElse(null), contextRow.getUuid(), toUrlString(jobMeta.getLocation().orElse(null)), + symlinkTargetUuid, toJson(jobMeta.getInputs(), mapper)); } @@ -192,39 +226,45 @@ default PGobject toJson(Set dataset, ObjectMapper mapper) { } @SqlQuery( - "INSERT INTO jobs (" - + "uuid, " - + "type, " - + "created_at, " - + "updated_at, " - + "namespace_uuid, " - + "namespace_name, " - + "name, " - + "description," - + "current_job_context_uuid," - + "current_location," - + "current_inputs" - + ") VALUES ( " - + ":uuid, " - + ":type, " - + ":now, " - + ":now, " - + ":namespaceUuid, " - + ":namespaceName, " - + ":name, " - + ":description, " - + ":jobContextUuid, " - + ":location, " - + ":inputs " - + ") ON CONFLICT (name, namespace_uuid) DO " - + "UPDATE SET " - + "updated_at = EXCLUDED.updated_at, " - + "type = EXCLUDED.type, " - + "description = EXCLUDED.description, " - + "current_job_context_uuid = EXCLUDED.current_job_context_uuid, " - + "current_location = EXCLUDED.current_location, " - + "current_inputs = EXCLUDED.current_inputs " - + "RETURNING *") + """ + INSERT INTO jobs AS j ( + uuid, + type, + created_at, + updated_at, + namespace_uuid, + namespace_name, + name, + description, + current_job_context_uuid, + current_location, + current_inputs, + symlink_target_uuid + ) VALUES ( + :uuid, + :type, + :now, + :now, + :namespaceUuid, + :namespaceName, + :name, + :description, + :jobContextUuid, + :location, + :inputs, + :symlinkTargetId + ) ON CONFLICT (name, namespace_uuid) DO + UPDATE SET + updated_at = EXCLUDED.updated_at, + type = EXCLUDED.type, + description = EXCLUDED.description, + current_job_context_uuid = EXCLUDED.current_job_context_uuid, + current_location = EXCLUDED.current_location, + current_inputs = EXCLUDED.current_inputs, + -- update the symlink target if not null. otherwise, keep the old value + symlink_target_uuid = COALESCE(EXCLUDED.symlink_target_uuid, j.symlink_target_uuid) + RETURNING * + """) JobRow upsertJob( UUID uuid, JobType type, @@ -235,5 +275,6 @@ JobRow upsertJob( String description, UUID jobContextUuid, String location, + UUID symlinkTargetId, PGobject inputs); } diff --git a/api/src/main/java/marquez/db/LineageDao.java b/api/src/main/java/marquez/db/LineageDao.java index 3ad077778a..0fd25c2643 100644 --- a/api/src/main/java/marquez/db/LineageDao.java +++ b/api/src/main/java/marquez/db/LineageDao.java @@ -67,7 +67,7 @@ public interface LineageDao { Optional getJobUuid(String jobName, String namespace); @SqlQuery( - "SELECT ds.*, dv.fields\n" + "SELECT ds.*, dv.fields, dv.lifecycle_state\n" + "FROM datasets ds\n" + "LEFT JOIN dataset_versions dv on dv.uuid = ds.current_version_uuid\n" + "WHERE ds.uuid IN ();") diff --git a/api/src/main/java/marquez/db/OpenLineageDao.java b/api/src/main/java/marquez/db/OpenLineageDao.java index 3d28226faa..e83d0b9ff1 100644 --- a/api/src/main/java/marquez/db/OpenLineageDao.java +++ b/api/src/main/java/marquez/db/OpenLineageDao.java @@ -41,6 +41,7 @@ import marquez.service.models.LineageEvent.Dataset; import marquez.service.models.LineageEvent.DatasetFacets; import marquez.service.models.LineageEvent.Job; +import marquez.service.models.LineageEvent.LifecycleStateChangeFacet; import marquez.service.models.LineageEvent.SchemaDatasetFacet; import marquez.service.models.LineageEvent.SchemaField; import org.jdbi.v3.sqlobject.statement.SqlUpdate; @@ -132,6 +133,7 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper description, jobContext.getUuid(), location, + null, jobDao.toJson(toDatasetId(event.getInputs()), mapper)); bag.setJob(job); @@ -350,6 +352,12 @@ default DatasetRecord upsertLineageDataset( formatNamespaceName(ds.getNamespace()), DEFAULT_NAMESPACE_OWNER); + String dslifecycleState = + Optional.ofNullable(ds.getFacets()) + .map(DatasetFacets::getLifecycleStateChange) + .map(LifecycleStateChangeFacet::getLifecycleStateChange) + .orElse(""); + DatasetRow datasetRow = datasetDao.upsert( UUID.randomUUID(), @@ -361,7 +369,8 @@ default DatasetRecord upsertLineageDataset( source.getName(), formatDatasetName(ds.getName()), ds.getName(), - dsDescription); + dsDescription, + dslifecycleState.equalsIgnoreCase("DROP")); List fields = Optional.ofNullable(ds.getFacets()) @@ -385,6 +394,7 @@ default DatasetRecord upsertLineageDataset( source.getName(), dsRow.getPhysicalName(), dsRow.getName(), + dslifecycleState, fields, runUuid) .getValue(); @@ -397,8 +407,8 @@ default DatasetRecord upsertLineageDataset( isInput ? null : runUuid, datasetVersionDao.toPgObjectSchemaFields(fields), dsNamespace.getName(), - ds.getName()); - + ds.getName(), + dslifecycleState); return row; }); List datasetFieldMappings = new ArrayList<>(); diff --git a/api/src/main/java/marquez/db/RunDao.java b/api/src/main/java/marquez/db/RunDao.java index 6dc2e9c548..7cfb24f1e0 100644 --- a/api/src/main/java/marquez/db/RunDao.java +++ b/api/src/main/java/marquez/db/RunDao.java @@ -104,39 +104,50 @@ public interface RunDao extends BaseDao { Optional findRunByUuidAsRow(UUID runUuid); @SqlQuery( - "SELECT r.*, ra.args, ctx.context, f.facets,\n" - + "jv.namespace_name, jv.job_name, jv.version AS job_version,\n" - + "ri.input_versions, ro.output_versions\n" - + "FROM runs AS r\n" - + "LEFT OUTER JOIN\n" - + "(\n" - + " SELECT le.run_uuid, JSON_AGG(event->'run'->'facets') AS facets\n" - + " FROM lineage_events le\n" - + " INNER JOIN runs ON runs.uuid=le.run_uuid\n" - + " WHERE runs.job_name=:jobName AND runs.namespace_name=:namespace\n" - + " GROUP BY le.run_uuid\n" - + ") AS f ON r.uuid=f.run_uuid\n" - + "LEFT OUTER JOIN run_args AS ra ON ra.uuid = r.run_args_uuid\n" - + "LEFT OUTER JOIN job_contexts AS ctx ON r.job_context_uuid = ctx.uuid\n" - + "LEFT OUTER JOIN job_versions jv ON jv.uuid=r.job_version_uuid\n" - + "LEFT OUTER JOIN (\n" - + " SELECT im.run_uuid, JSON_AGG(json_build_object('namespace', dv.namespace_name,\n" - + " 'name', dv.dataset_name,\n" - + " 'version', dv.version)) AS input_versions\n" - + " FROM runs_input_mapping im\n" - + " INNER JOIN dataset_versions dv on im.dataset_version_uuid = dv.uuid\n" - + " GROUP BY im.run_uuid\n" - + ") ri ON ri.run_uuid=r.uuid\n" - + "LEFT OUTER JOIN (\n" - + " SELECT run_uuid, JSON_AGG(json_build_object('namespace', namespace_name,\n" - + " 'name', dataset_name,\n" - + " 'version', version)) AS output_versions\n" - + " FROM dataset_versions\n" - + " GROUP BY run_uuid\n" - + ") ro ON ro.run_uuid=r.uuid\n" - + "WHERE r.namespace_name = :namespace and r.job_name = :jobName\n" - + "ORDER BY STARTED_AT DESC NULLS LAST\n" - + "LIMIT :limit OFFSET :offset") + """ + WITH RECURSIVE job_names AS ( + SELECT uuid, namespace_name, name, symlink_target_uuid + FROM jobs j + WHERE j.namespace_name=:namespace AND j.name=:jobName + UNION + SELECT j.uuid, j.namespace_name, j.name, j.symlink_target_uuid + FROM jobs j + INNER JOIN job_names jn ON j.uuid=jn.symlink_target_uuid OR j.symlink_target_uuid=jn.uuid + ) + SELECT r.*, ra.args, ctx.context, f.facets, + jv.namespace_name, jv.job_name, jv.version AS job_version, + ri.input_versions, ro.output_versions + FROM runs AS r + INNER JOIN job_names j ON r.namespace_name=j.namespace_name AND r.job_name=j.name + LEFT OUTER JOIN + ( + SELECT le.run_uuid, JSON_AGG(event->'run'->'facets') AS facets + FROM lineage_events le + INNER JOIN runs ON runs.uuid=le.run_uuid + WHERE runs.job_name=:jobName AND runs.namespace_name=:namespace + GROUP BY le.run_uuid + ) AS f ON r.uuid=f.run_uuid + LEFT OUTER JOIN run_args AS ra ON ra.uuid = r.run_args_uuid + LEFT OUTER JOIN job_contexts AS ctx ON r.job_context_uuid = ctx.uuid + LEFT OUTER JOIN job_versions jv ON jv.uuid=r.job_version_uuid + LEFT OUTER JOIN ( + SELECT im.run_uuid, JSON_AGG(json_build_object('namespace', dv.namespace_name, + 'name', dv.dataset_name, + 'version', dv.version)) AS input_versions + FROM runs_input_mapping im + INNER JOIN dataset_versions dv on im.dataset_version_uuid = dv.uuid + GROUP BY im.run_uuid + ) ri ON ri.run_uuid=r.uuid + LEFT OUTER JOIN ( + SELECT run_uuid, JSON_AGG(json_build_object('namespace', namespace_name, + 'name', dataset_name, + 'version', version)) AS output_versions + FROM dataset_versions + GROUP BY run_uuid + ) ro ON ro.run_uuid=r.uuid + ORDER BY STARTED_AT DESC NULLS LAST + LIMIT :limit OFFSET :offset + """) List findAll(String namespace, String jobName, int limit, int offset); @SqlQuery( @@ -277,6 +288,7 @@ default void upsertOutputDatasetsFor(UUID runUuid, ImmutableSet runOu d.getSourceName().getValue(), d.getPhysicalName().getValue(), d.getName().getValue(), + null, toSchemaFields(d.getFields()), runUuid) .getValue(); @@ -288,7 +300,8 @@ default void upsertOutputDatasetsFor(UUID runUuid, ImmutableSet runOu runUuid, datasetVersionDao.toPgObjectFields(d.getFields()), d.getNamespace().getValue(), - d.getName().getValue()); + d.getName().getValue(), + null); }); } } @@ -382,11 +395,25 @@ default RunRow upsertRunMeta( void updateJobVersion(UUID runUuid, UUID jobVersionUuid); @SqlQuery( - BASE_FIND_RUN_SQL - + "WHERE r.uuid=(\n" - + " SELECT uuid FROM runs WHERE namespace_name = :namespace and job_name = :jobName\n" - + " ORDER BY transitioned_at DESC\n" - + " LIMIT 1\n" - + ")") + """ + WITH RECURSIVE job_names AS ( + SELECT uuid, namespace_name, name, symlink_target_uuid + FROM jobs j + WHERE j.namespace_name=:namespace AND j.name=:jobName + UNION + SELECT j.uuid, j.namespace_name, j.name, j.symlink_target_uuid + FROM jobs j + INNER JOIN job_names jn ON j.uuid=jn.symlink_target_uuid OR j.symlink_target_uuid=jn.uuid + ) + """ + + BASE_FIND_RUN_SQL + + """ + WHERE r.uuid=( + SELECT r.uuid FROM runs r + INNER JOIN job_names j ON j.namespace_name=r.namespace_name AND j.name=r.job_name + ORDER BY transitioned_at DESC + LIMIT 1 + ) + """) Optional findByLatestJob(String namespace, String jobName); } diff --git a/api/src/main/java/marquez/db/mappers/DatasetDataMapper.java b/api/src/main/java/marquez/db/mappers/DatasetDataMapper.java index 2a21de7802..b7334805ac 100644 --- a/api/src/main/java/marquez/db/mappers/DatasetDataMapper.java +++ b/api/src/main/java/marquez/db/mappers/DatasetDataMapper.java @@ -52,7 +52,8 @@ public DatasetData map(@NonNull ResultSet results, @NonNull StatementContext con toFields(results, "fields"), ImmutableSet.of(), timestampOrNull(results, Columns.LAST_MODIFIED_AT), - stringOrNull(results, Columns.DESCRIPTION)); + stringOrNull(results, Columns.DESCRIPTION), + stringOrNull(results, Columns.LIFECYCLE_STATE)); } public static ImmutableList toFields(ResultSet results, String column) diff --git a/api/src/main/java/marquez/db/mappers/DatasetMapper.java b/api/src/main/java/marquez/db/mappers/DatasetMapper.java index 2fef22e829..d812015eb0 100644 --- a/api/src/main/java/marquez/db/mappers/DatasetMapper.java +++ b/api/src/main/java/marquez/db/mappers/DatasetMapper.java @@ -2,6 +2,7 @@ package marquez.db.mappers; +import static marquez.db.Columns.booleanOrDefault; import static marquez.db.Columns.stringArrayOrThrow; import static marquez.db.Columns.stringOrNull; import static marquez.db.Columns.stringOrThrow; @@ -62,9 +63,11 @@ public Dataset map(@NonNull ResultSet results, @NonNull StatementContext context toFields(results, "fields"), toTags(results, "tags"), timestampOrNull(results, Columns.LAST_MODIFIED_AT), + stringOrNull(results, Columns.LIFECYCLE_STATE), stringOrNull(results, Columns.DESCRIPTION), uuidOrNull(results, Columns.CURRENT_VERSION_UUID), - toFacetsOrNull(results, Columns.FACETS)); + toFacetsOrNull(results, Columns.FACETS), + booleanOrDefault(results, Columns.IS_DELETED, false)); } else { return new Stream( new DatasetId( @@ -79,9 +82,11 @@ public Dataset map(@NonNull ResultSet results, @NonNull StatementContext context toFields(results, "fields"), toTags(results, "tags"), timestampOrNull(results, Columns.LAST_MODIFIED_AT), + stringOrNull(results, Columns.LIFECYCLE_STATE), stringOrNull(results, Columns.DESCRIPTION), uuidOrNull(results, Columns.CURRENT_VERSION_UUID), - toFacetsOrNull(results, Columns.FACETS)); + toFacetsOrNull(results, Columns.FACETS), + booleanOrDefault(results, Columns.IS_DELETED, false)); } } diff --git a/api/src/main/java/marquez/db/mappers/DatasetRowMapper.java b/api/src/main/java/marquez/db/mappers/DatasetRowMapper.java index 9a6a8461f5..71969f2775 100644 --- a/api/src/main/java/marquez/db/mappers/DatasetRowMapper.java +++ b/api/src/main/java/marquez/db/mappers/DatasetRowMapper.java @@ -2,6 +2,7 @@ package marquez.db.mappers; +import static marquez.db.Columns.booleanOrDefault; import static marquez.db.Columns.stringOrNull; import static marquez.db.Columns.stringOrThrow; import static marquez.db.Columns.timestampOrNull; @@ -32,6 +33,7 @@ public DatasetRow map(@NonNull ResultSet results, @NonNull StatementContext cont stringOrThrow(results, Columns.PHYSICAL_NAME), timestampOrNull(results, Columns.LAST_MODIFIED_AT), stringOrNull(results, Columns.DESCRIPTION), - uuidOrNull(results, Columns.CURRENT_VERSION_UUID)); + uuidOrNull(results, Columns.CURRENT_VERSION_UUID), + booleanOrDefault(results, Columns.IS_DELETED, false)); } } diff --git a/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java b/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java index 8042a33596..92dfdee8cf 100644 --- a/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java +++ b/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java @@ -54,6 +54,7 @@ public DatasetVersion map(@NonNull ResultSet results, @NonNull StatementContext toFields(results, "fields"), columnNames.contains("tags") ? toTags(results, "tags") : null, stringOrNull(results, Columns.DESCRIPTION), + stringOrNull(results, Columns.LIFECYCLE_STATE), null, toFacetsOrNull(results, Columns.FACETS)); } else { @@ -71,6 +72,7 @@ public DatasetVersion map(@NonNull ResultSet results, @NonNull StatementContext toFields(results, "fields"), columnNames.contains("tags") ? toTags(results, "tags") : null, stringOrNull(results, Columns.DESCRIPTION), + stringOrNull(results, Columns.LIFECYCLE_STATE), null, toFacetsOrNull(results, Columns.FACETS)); } diff --git a/api/src/main/java/marquez/db/mappers/DatasetVersionRowMapper.java b/api/src/main/java/marquez/db/mappers/DatasetVersionRowMapper.java index 3854498984..78c5756df8 100644 --- a/api/src/main/java/marquez/db/mappers/DatasetVersionRowMapper.java +++ b/api/src/main/java/marquez/db/mappers/DatasetVersionRowMapper.java @@ -2,6 +2,7 @@ package marquez.db.mappers; +import static marquez.db.Columns.stringOrNull; import static marquez.db.Columns.timestampOrThrow; import static marquez.db.Columns.uuidOrNull; import static marquez.db.Columns.uuidOrThrow; @@ -23,6 +24,7 @@ public DatasetVersionRow map(@NonNull ResultSet results, @NonNull StatementConte timestampOrThrow(results, Columns.CREATED_AT), uuidOrThrow(results, Columns.DATASET_UUID), uuidOrThrow(results, Columns.VERSION), + stringOrNull(results, Columns.LIFECYCLE_STATE), uuidOrNull(results, Columns.RUN_UUID)); } } diff --git a/api/src/main/java/marquez/db/mappers/ExtendedDatasetVersionRowMapper.java b/api/src/main/java/marquez/db/mappers/ExtendedDatasetVersionRowMapper.java index 11143e6c21..5c02a185ff 100644 --- a/api/src/main/java/marquez/db/mappers/ExtendedDatasetVersionRowMapper.java +++ b/api/src/main/java/marquez/db/mappers/ExtendedDatasetVersionRowMapper.java @@ -24,6 +24,7 @@ public ExtendedDatasetVersionRow map( timestampOrThrow(results, Columns.CREATED_AT), uuidOrThrow(results, Columns.DATASET_UUID), uuidOrThrow(results, Columns.VERSION), + stringOrNull(results, Columns.LIFECYCLE_STATE), uuidOrNull(results, Columns.RUN_UUID), stringOrNull(results, Columns.NAMESPACE_NAME), stringOrNull(results, Columns.DATASET_NAME)); diff --git a/api/src/main/java/marquez/db/mappers/JobRowMapper.java b/api/src/main/java/marquez/db/mappers/JobRowMapper.java index 35f07a7244..7c865b69ef 100644 --- a/api/src/main/java/marquez/db/mappers/JobRowMapper.java +++ b/api/src/main/java/marquez/db/mappers/JobRowMapper.java @@ -43,7 +43,8 @@ public JobRow map(@NonNull ResultSet results, @NonNull StatementContext context) uuidOrNull(results, Columns.CURRENT_VERSION_UUID), uuidOrNull(results, "current_job_context_uuid"), stringOrNull(results, "current_location"), - getDatasetFromJsonOrNull(results, "current_inputs")); + getDatasetFromJsonOrNull(results, "current_inputs"), + uuidOrNull(results, Columns.SYMLINK_TARGET_UUID)); } Set getDatasetFromJsonOrNull(@NonNull ResultSet results, String column) diff --git a/api/src/main/java/marquez/db/models/DatasetData.java b/api/src/main/java/marquez/db/models/DatasetData.java index a435a664d0..ed32e28488 100644 --- a/api/src/main/java/marquez/db/models/DatasetData.java +++ b/api/src/main/java/marquez/db/models/DatasetData.java @@ -36,6 +36,7 @@ public class DatasetData implements NodeData { @NonNull ImmutableSet tags; @Nullable Instant lastModifiedAt; @Nullable String description; + @Nullable String lastlifecycleState; public Optional getLastModifiedAt() { return Optional.ofNullable(lastModifiedAt); @@ -45,6 +46,10 @@ public Optional getDescription() { return Optional.ofNullable(description); } + public Optional getLastlifecycleState() { + return Optional.ofNullable(lastlifecycleState); + } + @JsonIgnore public UUID getUuid() { return uuid; diff --git a/api/src/main/java/marquez/db/models/DatasetRow.java b/api/src/main/java/marquez/db/models/DatasetRow.java index 1f695c16ab..0e57f6bb03 100644 --- a/api/src/main/java/marquez/db/models/DatasetRow.java +++ b/api/src/main/java/marquez/db/models/DatasetRow.java @@ -28,6 +28,7 @@ public class DatasetRow { @Nullable private final Instant lastModifiedAt; @Nullable private final String description; @With @Nullable private final UUID currentVersionUuid; + @Getter private final boolean isDeleted; public Optional getLastModifiedAt() { return Optional.ofNullable(lastModifiedAt); diff --git a/api/src/main/java/marquez/db/models/DatasetVersionRow.java b/api/src/main/java/marquez/db/models/DatasetVersionRow.java index fafd17470c..b2f491c379 100644 --- a/api/src/main/java/marquez/db/models/DatasetVersionRow.java +++ b/api/src/main/java/marquez/db/models/DatasetVersionRow.java @@ -20,6 +20,7 @@ public class DatasetVersionRow { @Getter @NonNull private final Instant createdAt; @Getter @NonNull private final UUID datasetUuid; @Getter @NonNull private final UUID version; + @Getter @Nullable private final String lifecycleState; @Nullable private final UUID runUuid; public Optional getRunUuid() { diff --git a/api/src/main/java/marquez/db/models/ExtendedDatasetVersionRow.java b/api/src/main/java/marquez/db/models/ExtendedDatasetVersionRow.java index b1084737d3..80b876f6b2 100644 --- a/api/src/main/java/marquez/db/models/ExtendedDatasetVersionRow.java +++ b/api/src/main/java/marquez/db/models/ExtendedDatasetVersionRow.java @@ -4,6 +4,7 @@ import java.time.Instant; import java.util.UUID; +import javax.annotation.Nullable; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NonNull; @@ -20,10 +21,11 @@ public ExtendedDatasetVersionRow( @NonNull Instant createdAt, @NonNull UUID datasetUuid, @NonNull UUID version, + @Nullable String lifecycleState, UUID runUuid, @NonNull final String namespaceName, @NonNull final String datasetName) { - super(uuid, createdAt, datasetUuid, version, runUuid); + super(uuid, createdAt, datasetUuid, version, lifecycleState, runUuid); this.namespaceName = namespaceName; this.datasetName = datasetName; } diff --git a/api/src/main/java/marquez/db/models/JobRow.java b/api/src/main/java/marquez/db/models/JobRow.java index e2aed8accd..3cc45ab2ed 100644 --- a/api/src/main/java/marquez/db/models/JobRow.java +++ b/api/src/main/java/marquez/db/models/JobRow.java @@ -24,6 +24,7 @@ public class JobRow { @Nullable UUID jobContextUuid; @Nullable String location; @Nullable Set inputs; + @Nullable UUID symlinkTargetId; public Optional getDescription() { return Optional.ofNullable(description); diff --git a/api/src/main/java/marquez/service/models/Dataset.java b/api/src/main/java/marquez/service/models/Dataset.java index c72128835f..26599d87b1 100644 --- a/api/src/main/java/marquez/service/models/Dataset.java +++ b/api/src/main/java/marquez/service/models/Dataset.java @@ -47,9 +47,11 @@ public abstract class Dataset { @Getter @Setter private List fields; @Getter private final ImmutableSet tags; @Nullable private final Instant lastModifiedAt; + @Nullable private final String lastLifecycleState; @Nullable private final String description; @Nullable private final UUID currentVersion; @Getter ImmutableMap facets; + @Getter private final boolean isDeleted; public Dataset( @NonNull final DatasetId id, @@ -62,9 +64,11 @@ public Dataset( @Nullable final ImmutableList fields, @Nullable final ImmutableSet tags, @Nullable final Instant lastModifiedAt, + @Nullable final String lastLifecycleState, @Nullable final String description, @Nullable final UUID currentVersion, - @Nullable final ImmutableMap facets) { + @Nullable final ImmutableMap facets, + boolean isDeleted) { this.id = id; this.type = type; this.name = name; @@ -76,9 +80,11 @@ public Dataset( this.fields = (fields == null) ? ImmutableList.of() : fields; this.tags = (tags == null) ? ImmutableSet.of() : tags; this.lastModifiedAt = lastModifiedAt; + this.lastLifecycleState = lastLifecycleState; this.description = description; this.currentVersion = currentVersion; this.facets = (facets == null) ? ImmutableMap.of() : facets; + this.isDeleted = isDeleted; } public Optional getLastModifiedAt() { @@ -89,6 +95,10 @@ public Optional getDescription() { return Optional.ofNullable(description); } + public Optional getLastLifecycleState() { + return Optional.ofNullable(lastLifecycleState); + } + public Optional getCurrentVersion() { return Optional.ofNullable(currentVersion); } diff --git a/api/src/main/java/marquez/service/models/DatasetVersion.java b/api/src/main/java/marquez/service/models/DatasetVersion.java index 5f616eed6d..fb3eecfb39 100644 --- a/api/src/main/java/marquez/service/models/DatasetVersion.java +++ b/api/src/main/java/marquez/service/models/DatasetVersion.java @@ -47,6 +47,7 @@ public abstract class DatasetVersion { @Getter private final SourceName sourceName; @Getter @Setter private ImmutableList fields; @Getter @Setter private ImmutableSet tags; + @Nullable private final String lifecycleState; @Nullable private final String description; @Nullable @Setter private Run createdByRun; @Nullable @Setter private UUID createdByRunUuid; @@ -62,6 +63,7 @@ public DatasetVersion( @NonNull final SourceName sourceName, @Nullable final ImmutableList fields, @Nullable final ImmutableSet tags, + @Nullable final String lifecycleState, @Nullable final String description, @Nullable final Run createdByRun, @Nullable final ImmutableMap facets) { @@ -75,6 +77,7 @@ public DatasetVersion( this.sourceName = sourceName; this.fields = (fields == null) ? ImmutableList.of() : fields; this.tags = (tags == null) ? ImmutableSet.of() : tags; + this.lifecycleState = lifecycleState; this.description = description; this.createdByRun = createdByRun; this.facets = (facets == null) ? ImmutableMap.of() : facets; @@ -88,6 +91,10 @@ public Optional getCreatedByRun() { return Optional.ofNullable(createdByRun); } + public Optional getLifecycleState() { + return Optional.ofNullable(lifecycleState); + } + @JsonIgnore public UUID getCreatedByRunUuid() { return createdByRunUuid; diff --git a/api/src/main/java/marquez/service/models/DbTable.java b/api/src/main/java/marquez/service/models/DbTable.java index 283985583c..bde85ddcfb 100644 --- a/api/src/main/java/marquez/service/models/DbTable.java +++ b/api/src/main/java/marquez/service/models/DbTable.java @@ -31,9 +31,11 @@ public DbTable( @Nullable final ImmutableList fields, @Nullable final ImmutableSet tags, @Nullable final Instant lastModifiedAt, + @Nullable final String lastLifecycleState, @Nullable final String description, @Nullable final UUID currentVersion, - @Nullable final ImmutableMap facets) { + @Nullable final ImmutableMap facets, + final boolean isDeleted) { super( id, DB_TABLE, @@ -45,8 +47,10 @@ public DbTable( fields, tags, lastModifiedAt, + lastLifecycleState, description, currentVersion, - facets); + facets, + isDeleted); } } diff --git a/api/src/main/java/marquez/service/models/DbTableVersion.java b/api/src/main/java/marquez/service/models/DbTableVersion.java index 81d6bdbb24..acaab509ca 100644 --- a/api/src/main/java/marquez/service/models/DbTableVersion.java +++ b/api/src/main/java/marquez/service/models/DbTableVersion.java @@ -31,6 +31,7 @@ public DbTableVersion( @Nullable final ImmutableList fields, @Nullable final ImmutableSet tags, @Nullable final String description, + @Nullable final String lifecycleState, @Nullable final Run createdByRun, @Nullable final ImmutableMap facets) { super( @@ -43,6 +44,7 @@ public DbTableVersion( sourceName, fields, tags, + lifecycleState, description, createdByRun, facets); diff --git a/api/src/main/java/marquez/service/models/LineageEvent.java b/api/src/main/java/marquez/service/models/LineageEvent.java index f8d29ebb32..8c1a3b21bb 100644 --- a/api/src/main/java/marquez/service/models/LineageEvent.java +++ b/api/src/main/java/marquez/service/models/LineageEvent.java @@ -309,11 +309,18 @@ public static class Dataset extends BaseJsonModel { @Setter @Valid @ToString - @JsonPropertyOrder({"documentation", "schema", "dataSource", "description"}) + @JsonPropertyOrder({ + "documentation", + "schema", + "dataSource", + "description", + "lifecycleStateChange" + }) public static class DatasetFacets { @Valid private DocumentationDatasetFacet documentation; @Valid private SchemaDatasetFacet schema; + @Valid private LifecycleStateChangeFacet lifecycleStateChange; @Valid private DatasourceDatasetFacet dataSource; private String description; @Builder.Default @JsonIgnore private Map additional = new LinkedHashMap<>(); @@ -336,6 +343,10 @@ public SchemaDatasetFacet getSchema() { return schema; } + public LifecycleStateChangeFacet getLifecycleStateChange() { + return lifecycleStateChange; + } + public DatasourceDatasetFacet getDataSource() { return dataSource; } @@ -411,4 +422,21 @@ public DatasourceDatasetFacet( this.uri = uri; } } + + @NoArgsConstructor + @Getter + @Setter + @Valid + @ToString + public static class LifecycleStateChangeFacet extends BaseFacet { + + private String lifecycleStateChange; + + @Builder + public LifecycleStateChangeFacet( + @NotNull URI _producer, @NotNull URI _schemaURL, String lifecycleStateChange) { + super(_producer, _schemaURL); + this.lifecycleStateChange = lifecycleStateChange; + } + } } diff --git a/api/src/main/java/marquez/service/models/Stream.java b/api/src/main/java/marquez/service/models/Stream.java index 29dc4135ec..ddd6d63e26 100644 --- a/api/src/main/java/marquez/service/models/Stream.java +++ b/api/src/main/java/marquez/service/models/Stream.java @@ -37,9 +37,11 @@ public Stream( @Nullable final ImmutableList fields, @Nullable final ImmutableSet tags, @Nullable final Instant lastModifiedAt, + @Nullable final String lastLifecycleState, @Nullable final String description, @Nullable final UUID currentVersion, - @Nullable final ImmutableMap facets) { + @Nullable final ImmutableMap facets, + final boolean isDeleted) { super( id, STREAM, @@ -51,9 +53,11 @@ public Stream( fields, tags, lastModifiedAt, + lastLifecycleState, description, currentVersion, - facets); + facets, + isDeleted); this.schemaLocation = schemaLocation; } } diff --git a/api/src/main/java/marquez/service/models/StreamVersion.java b/api/src/main/java/marquez/service/models/StreamVersion.java index 3bace63c22..ed6e8dd529 100644 --- a/api/src/main/java/marquez/service/models/StreamVersion.java +++ b/api/src/main/java/marquez/service/models/StreamVersion.java @@ -37,6 +37,7 @@ public StreamVersion( @Nullable final ImmutableList fields, @Nullable final ImmutableSet tags, @Nullable final String description, + @Nullable final String lifecycleState, @Nullable final Run createdByRun, @Nullable final ImmutableMap facets) { super( @@ -49,6 +50,7 @@ public StreamVersion( sourceName, fields, tags, + lifecycleState, description, createdByRun, facets); diff --git a/api/src/main/resources/marquez/db/migration/V40__alter_tables_resize_namespace_and_connection_url.sql b/api/src/main/resources/marquez/db/migration/V40__alter_tables_resize_namespace_and_connection_url.sql new file mode 100644 index 0000000000..f9c274f244 --- /dev/null +++ b/api/src/main/resources/marquez/db/migration/V40__alter_tables_resize_namespace_and_connection_url.sql @@ -0,0 +1,10 @@ +ALTER TABLE dataset_versions ALTER COLUMN namespace_name TYPE VARCHAR; +ALTER TABLE dataset_versions ALTER COLUMN dataset_name TYPE VARCHAR; +ALTER TABLE datasets ALTER COLUMN name TYPE VARCHAR; +ALTER TABLE datasets ALTER COLUMN physical_name TYPE VARCHAR; +ALTER TABLE datasets ALTER COLUMN source_name TYPE VARCHAR; +ALTER TABLE namespaces ALTER COLUMN name TYPE VARCHAR; +ALTER TABLE runs ALTER COLUMN external_id TYPE VARCHAR; +ALTER TABLE sources ALTER COLUMN name TYPE VARCHAR; +ALTER TABLE sources ALTER COLUMN connection_url TYPE VARCHAR; +ALTER TABLE tags ALTER COLUMN name TYPE VARCHAR; \ No newline at end of file diff --git a/api/src/main/resources/marquez/db/migration/V41__add_operation_to_dataset_versions.sql b/api/src/main/resources/marquez/db/migration/V41__add_operation_to_dataset_versions.sql new file mode 100644 index 0000000000..6219f40082 --- /dev/null +++ b/api/src/main/resources/marquez/db/migration/V41__add_operation_to_dataset_versions.sql @@ -0,0 +1,2 @@ +alter table dataset_versions add column lifecycle_state VARCHAR(63); +alter table datasets add column is_deleted BOOLEAN DEFAULT FALSE; \ No newline at end of file diff --git a/api/src/main/resources/marquez/db/migration/V42__add_job_symlink_target.sql b/api/src/main/resources/marquez/db/migration/V42__add_job_symlink_target.sql new file mode 100644 index 0000000000..07040a07a7 --- /dev/null +++ b/api/src/main/resources/marquez/db/migration/V42__add_job_symlink_target.sql @@ -0,0 +1,4 @@ +ALTER TABLE jobs ADD COLUMN symlink_target_uuid uuid REFERENCES jobs (uuid); +CREATE INDEX jobs_symlinks ON jobs (symlink_target_uuid) + INCLUDE (uuid, namespace_name, name) + WHERE symlink_target_uuid IS NOT NULL; \ No newline at end of file diff --git a/api/src/test/java/marquez/MarquezAppIntegrationTest.java b/api/src/test/java/marquez/MarquezAppIntegrationTest.java index eb695cc3d9..9679aa96bb 100644 --- a/api/src/test/java/marquez/MarquezAppIntegrationTest.java +++ b/api/src/test/java/marquez/MarquezAppIntegrationTest.java @@ -17,6 +17,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import java.net.URL; +import java.sql.SQLException; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; @@ -42,11 +43,20 @@ import marquez.client.models.StreamMeta; import marquez.client.models.Tag; import marquez.common.models.DatasetName; +import marquez.common.models.JobType; +import marquez.db.JobDao; +import marquez.db.NamespaceDao; +import marquez.db.models.JobRow; +import marquez.db.models.NamespaceRow; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; +import org.jdbi.v3.core.Jdbi; +import org.jdbi.v3.postgres.PostgresPlugin; +import org.jdbi.v3.sqlobject.SqlObjectPlugin; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.postgresql.util.PGobject; @org.junit.jupiter.api.Tag("IntegrationTests") @ExtendWith(MarquezJdbiExternalPostgresExtension.class) @@ -626,4 +636,68 @@ public void testApp_search() { assertThat(result.getType()).isEqualTo(SearchResult.ResultType.DATASET); assertThat(result.getName()).isEqualTo(datasetName); } + + @Test + public void testApp_getJob() throws SQLException { + Jdbi jdbi = + Jdbi.create(POSTGRES.getJdbcUrl(), POSTGRES.getUsername(), POSTGRES.getPassword()) + .installPlugin(new SqlObjectPlugin()) + .installPlugin(new PostgresPlugin()); + createNamespace(NAMESPACE_NAME); + + // Create job + String jobName = newJobName().getValue(); + final JobMeta jobMeta = + JobMeta.builder() + .type(JOB_TYPE) + .inputs(ImmutableSet.of()) + .outputs(ImmutableSet.of()) + .location(JOB_LOCATION) + .context(JOB_CONTEXT) + .description(JOB_DESCRIPTION) + .build(); + final Job originalJob = client.createJob(NAMESPACE_NAME, jobName, jobMeta); + + String targetJobName = newJobName().getValue(); + final JobMeta targetJobMeta = + JobMeta.builder() + .type(JOB_TYPE) + .inputs(ImmutableSet.of()) + .outputs(ImmutableSet.of()) + .location(JOB_LOCATION) + .context(JOB_CONTEXT) + .description(JOB_DESCRIPTION) + .build(); + final Job targetJob = client.createJob(NAMESPACE_NAME, targetJobName, targetJobMeta); + + JobDao jobDao = jdbi.onDemand(JobDao.class); + NamespaceDao namespaceDao = jdbi.onDemand(NamespaceDao.class); + Optional namespaceRow = namespaceDao.findNamespaceByName(NAMESPACE_NAME); + Optional originalJobRow = jobDao.findJobByNameAsRow(NAMESPACE_NAME, jobName); + Optional targetJobRow = jobDao.findJobByNameAsRow(NAMESPACE_NAME, targetJobName); + PGobject inputs = new PGobject(); + inputs.setType("json"); + inputs.setValue("[]"); + originalJobRow.ifPresent( + j -> { + jobDao.upsertJob( + j.getUuid(), + JobType.valueOf(JOB_TYPE.name()), + Instant.now(), + namespaceRow.get().getUuid(), + NAMESPACE_NAME, + jobName, + JOB_DESCRIPTION, + j.getJobContextUuid().orElse(null), + JOB_LOCATION.toString(), + targetJobRow.get().getUuid(), + inputs); + }); + + Job job = client.getJob(NAMESPACE_NAME, jobName); + assertThat(job) + .isNotNull() + .hasFieldOrPropertyWithValue("namespace", NAMESPACE_NAME) + .hasFieldOrPropertyWithValue("name", targetJobName); + } } diff --git a/api/src/test/java/marquez/OpenLineageIntegrationTest.java b/api/src/test/java/marquez/OpenLineageIntegrationTest.java index aa8eb22640..e5df95b9b3 100644 --- a/api/src/test/java/marquez/OpenLineageIntegrationTest.java +++ b/api/src/test/java/marquez/OpenLineageIntegrationTest.java @@ -53,9 +53,9 @@ public static List data() { @Test public void testSendOpenLineageBadArgument() throws IOException { - // Namespaces can't have semi-colons, so this will get rejected + // Namespaces can't have emojis, so this will get rejected String badNamespace = - "sqlserver://myhost:3342;user=auser;password=XXXXXXXXXX;database=TheDatabase"; + "sqlserver://myhost:3342;user=auser;password=\uD83D\uDE02\uD83D\uDE02\uD83D\uDE02;database=TheDatabase"; LineageEvent event = new LineageEvent( "COMPLETE", diff --git a/api/src/test/java/marquez/common/UtilsTest.java b/api/src/test/java/marquez/common/UtilsTest.java index f7d53ad097..f3ddfe3a54 100644 --- a/api/src/test/java/marquez/common/UtilsTest.java +++ b/api/src/test/java/marquez/common/UtilsTest.java @@ -5,6 +5,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static marquez.common.models.CommonModelGenerator.newDatasetName; import static marquez.common.models.CommonModelGenerator.newJobName; +import static marquez.common.models.CommonModelGenerator.newLifecycleState; import static marquez.common.models.CommonModelGenerator.newNamespaceName; import static marquez.common.models.CommonModelGenerator.newRunId; import static marquez.common.models.CommonModelGenerator.newSchemaFields; @@ -259,6 +260,7 @@ public void testDatasetVersionEqualOnSameData() { DatasetName datasetName = newDatasetName(); DatasetName physicalName = newDatasetName(); SourceName sourceName = newSourceName(); + String lifecycleState = newLifecycleState(); List schemaFields = newSchemaFields(2); RunId runId = newRunId(); @@ -268,6 +270,7 @@ public void testDatasetVersionEqualOnSameData() { sourceName.getValue(), physicalName.getValue(), datasetName.getValue(), + lifecycleState, schemaFields, runId.getValue()); Version second = @@ -276,6 +279,7 @@ public void testDatasetVersionEqualOnSameData() { sourceName.getValue(), physicalName.getValue(), datasetName.getValue(), + lifecycleState, schemaFields, runId.getValue()); @@ -320,6 +324,7 @@ public void testDatasetVersionIsNotEqualOnDifferentData() { newSourceName().getValue(), newDatasetName().getValue(), newDatasetName().getValue(), + newLifecycleState(), schemaFields, newRunId().getValue()); @@ -329,6 +334,7 @@ public void testDatasetVersionIsNotEqualOnDifferentData() { newSourceName().getValue(), newDatasetName().getValue(), newDatasetName().getValue(), + newLifecycleState(), schemaFields, newRunId().getValue()); @@ -337,7 +343,7 @@ public void testDatasetVersionIsNotEqualOnDifferentData() { @Test public void testDatasetVersionWithNullFields() { - Version version = Utils.newDatasetVersionFor(null, null, null, null, null, null); + Version version = Utils.newDatasetVersionFor(null, null, null, null, null, null, null); assertThat(version.getValue()).isNotNull(); } @@ -355,6 +361,7 @@ public void testNewDatasetVersionFor_equalOnUnsortedSchemaFields() { DatasetName datasetName = newDatasetName(); DatasetName physicalName = newDatasetName(); SourceName sourceName = newSourceName(); + String lifecycleState = newLifecycleState(); List schemaFields = newSchemaFields(2); RunId runId = newRunId(); @@ -364,6 +371,7 @@ public void testNewDatasetVersionFor_equalOnUnsortedSchemaFields() { sourceName.getValue(), physicalName.getValue(), datasetName.getValue(), + lifecycleState, schemaFields, runId.getValue()); @@ -375,6 +383,7 @@ public void testNewDatasetVersionFor_equalOnUnsortedSchemaFields() { sourceName.getValue(), physicalName.getValue(), datasetName.getValue(), + lifecycleState, shuffleSchemaFields, runId.getValue()); diff --git a/api/src/test/java/marquez/common/models/CommonModelGenerator.java b/api/src/test/java/marquez/common/models/CommonModelGenerator.java index 0d67367ae0..76c96e2fb9 100644 --- a/api/src/test/java/marquez/common/models/CommonModelGenerator.java +++ b/api/src/test/java/marquez/common/models/CommonModelGenerator.java @@ -157,6 +157,10 @@ public static RunId newRunId() { return RunId.of(UUID.randomUUID()); } + public static String newLifecycleState() { + return "TRUNCATE"; + } + public static Version newVersion() { return Version.of(UUID.randomUUID()); } diff --git a/api/src/test/java/marquez/common/models/NamespaceNameTest.java b/api/src/test/java/marquez/common/models/NamespaceNameTest.java new file mode 100644 index 0000000000..4e9ea3176a --- /dev/null +++ b/api/src/test/java/marquez/common/models/NamespaceNameTest.java @@ -0,0 +1,33 @@ +package marquez.common.models; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +@org.junit.jupiter.api.Tag("UnitTests") +public class NamespaceNameTest { + + @ParameterizedTest + @ValueSource( + strings = { + "DEFAULT", + "database://localhost:1234", + "s3://bucket", + "bigquery:", + "sqlserver://synapse-test-test001.sql.azuresynapse.net;databaseName=TESTPOOL1;", + "\u003D", + "@", + "abfss://something@.something-else.core.windows.net" + }) + void testValidNamespaceName(String name) { + assertThat(NamespaceName.of(name).getValue()).isEqualTo(name); + } + + @ParameterizedTest + @ValueSource(strings = {"\uD83D\uDE02", "!", ""}) + void testInvalidNamespaceName(String name) { + Assertions.assertThrows(IllegalArgumentException.class, () -> NamespaceName.of(name)); + } +} diff --git a/api/src/test/java/marquez/db/ColumnsTest.java b/api/src/test/java/marquez/db/ColumnsTest.java index 80f9c68522..f5645e2c64 100644 --- a/api/src/test/java/marquez/db/ColumnsTest.java +++ b/api/src/test/java/marquez/db/ColumnsTest.java @@ -285,4 +285,23 @@ public void testBlankUri() throws SQLException { final URI actual = Columns.uriOrNull(results, column); assertThat(actual).isNull(); } + + @Test + public void testBooleanOrDefault() throws SQLException { + final String column = "is_deleted"; + when(results.getObject(column)).thenReturn(true); + when(results.getBoolean(column)).thenReturn(true); + + final boolean actual = Columns.booleanOrDefault(results, column, false); + assertThat(actual).isTrue(); + } + + @Test + public void testBooleanOrDefaultWhenNoValue() throws SQLException { + final String column = "is_deleted"; + when(results.getObject(column)).thenReturn(null); + + final boolean actual = Columns.booleanOrDefault(results, column, true); + assertThat(actual).isTrue(); + } } diff --git a/api/src/test/java/marquez/db/DatasetDaoTest.java b/api/src/test/java/marquez/db/DatasetDaoTest.java index ae92c38bab..d1886648ea 100644 --- a/api/src/test/java/marquez/db/DatasetDaoTest.java +++ b/api/src/test/java/marquez/db/DatasetDaoTest.java @@ -19,6 +19,7 @@ import java.util.Optional; import lombok.Getter; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; +import marquez.service.models.LineageEvent; import marquez.service.models.LineageEvent.Dataset; import marquez.service.models.LineageEvent.JobFacet; import marquez.service.models.LineageEvent.SchemaField; @@ -131,6 +132,64 @@ public void testGetDataset() { "anotherInputFacet"); } + @Test + public void testGetDatasetWithlifecycleStatePresent() { + Dataset dataset = + new Dataset( + NAMESPACE, + DATASET, + LineageEvent.DatasetFacets.builder() + .lifecycleStateChange( + new LineageEvent.LifecycleStateChangeFacet(PRODUCER_URL, SCHEMA_URL, "CREATE")) + .build()); + + createLineageRow( + openLineageDao, + "aWriteJob", + "COMPLETE", + jobFacet, + Collections.emptyList(), + Collections.singletonList(dataset)); + + Optional datasetByName = + datasetDao.findDatasetByName(NAMESPACE, DATASET); + assertThat(datasetByName.get().getLastLifecycleState().get()).isEqualTo("CREATE"); + } + + @Test + public void testGetDatasetWithDatasetMarkedDeleted() { + // create dataset + createLineageRow( + openLineageDao, + "aWriteJob", + "COMPLETE", + jobFacet, + Collections.emptyList(), + Collections.singletonList( + new Dataset(NAMESPACE, DATASET, LineageEvent.DatasetFacets.builder().build()))); + + // mark it deleted + createLineageRow( + openLineageDao, + "aWriteJob", + "COMPLETE", + jobFacet, + Collections.emptyList(), + Collections.singletonList( + new Dataset( + NAMESPACE, + DATASET, + LineageEvent.DatasetFacets.builder() + .lifecycleStateChange( + new LineageEvent.LifecycleStateChangeFacet( + PRODUCER_URL, SCHEMA_URL, "DROP")) + .build()))); + + // make sure it's returned by DAO and marked as deleted + assertThat(datasetDao.findDatasetByName(NAMESPACE, DATASET).get().isDeleted()).isTrue(); + assertThat(datasetDao.findWithTags(NAMESPACE, DATASET).get().isDeleted()).isTrue(); + } + @Test public void testGetDatasetWithMultipleVersions() { createLineageRow( diff --git a/api/src/test/java/marquez/db/DbTestUtils.java b/api/src/test/java/marquez/db/DbTestUtils.java index cfd706170d..8f8bf152e3 100644 --- a/api/src/test/java/marquez/db/DbTestUtils.java +++ b/api/src/test/java/marquez/db/DbTestUtils.java @@ -33,6 +33,7 @@ import marquez.common.models.DatasetId; import marquez.common.models.DatasetName; import marquez.common.models.JobName; +import marquez.common.models.JobType; import marquez.common.models.NamespaceName; import marquez.common.models.RunState; import marquez.db.models.DatasetRow; @@ -128,11 +129,56 @@ static ImmutableSet newJobs(final Jdbi jdbi, final int limit) { return Stream.generate(() -> newJob(jdbi)).limit(limit).collect(toImmutableSet()); } + public static JobRow createJobWithoutSymlinkTarget( + Jdbi jdbi, NamespaceRow namespace, String jobName, String description) { + return newJobWith( + jdbi, + namespace.getName(), + jobName, + new JobMeta( + JobType.BATCH, + ImmutableSet.of(), + ImmutableSet.of(), + null, + ImmutableMap.of(), + description, + null)); + } + + public static JobRow createJobWithSymlinkTarget( + Jdbi jdbi, NamespaceRow namespace, String jobName, UUID jobSymlinkId, String description) { + return newJobWith( + jdbi, + namespace.getName(), + jobName, + jobSymlinkId, + new JobMeta( + JobType.BATCH, + ImmutableSet.of(), + ImmutableSet.of(), + null, + ImmutableMap.of(), + description, + null)); + } + /** * Adds a new {@link JobRow} object to the {@code jobs} table with the provided {@link JobMeta}. */ static JobRow newJobWith( final Jdbi jdbi, final String namespaceName, final String jobName, final JobMeta jobMeta) { + return newJobWith(jdbi, namespaceName, jobName, null, jobMeta); + } + + /** + * Adds a new {@link JobRow} object to the {@code jobs} table with the provided {@link JobMeta}. + */ + static JobRow newJobWith( + final Jdbi jdbi, + final String namespaceName, + final String jobName, + UUID symlinkTargetUuid, + final JobMeta jobMeta) { final DatasetDao datasetDao = jdbi.onDemand(DatasetDao.class); final JobDao jobDao = jdbi.onDemand(JobDao.class); @@ -151,7 +197,11 @@ static JobRow newJobWith( } return jobDao.upsertJobMeta( - namespaceForDatasetAndJob, JobName.of(jobName), jobMeta, Utils.getMapper()); + namespaceForDatasetAndJob, + JobName.of(jobName), + symlinkTargetUuid, + jobMeta, + Utils.getMapper()); } /** Adds a new {@link JobContextRow} object to the {@code job_contexts} table. */ @@ -268,35 +318,36 @@ public static Stream> query(Jdbi jdbi, String sql) { .scanResultSet( (rs, ctx) -> { ResultSet resultSet = rs.get(); - return Stream.generate( - () -> { - try { - if (resultSet.next()) { - ResultSetMetaData metaData = resultSet.getMetaData(); - int keys = metaData.getColumnCount(); - return IntStream.range(1, keys + 1) - .mapToObj( - i -> { - try { - return Map.entry( - metaData.getColumnName(i), - Optional.ofNullable(resultSet.getObject(i)) - .orElse("NULL")); - } catch (SQLException e) { - throw new RuntimeException(e); - } - }) - .collect( - Collectors.toMap( - Map.Entry::getKey, Map.Entry::getValue)); - } else { - return null; - } - } catch (SQLException e) { - throw new RuntimeException(e); - } - }) - .takeWhile(Predicates.notNull()); + return streamResults(resultSet); })); } + + public static Stream> streamResults(ResultSet resultSet) { + return Stream.generate( + () -> { + try { + if (resultSet.next()) { + ResultSetMetaData metaData = resultSet.getMetaData(); + int keys = metaData.getColumnCount(); + return IntStream.range(1, keys + 1) + .mapToObj( + i -> { + try { + return Map.entry( + metaData.getColumnName(i), + Optional.ofNullable(resultSet.getObject(i)).orElse("NULL")); + } catch (SQLException e) { + throw new RuntimeException(e); + } + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } else { + return null; + } + } catch (SQLException e) { + throw new RuntimeException(e); + } + }) + .takeWhile(Predicates.notNull()); + } } diff --git a/api/src/test/java/marquez/db/JobDaoTest.java b/api/src/test/java/marquez/db/JobDaoTest.java index 8a9732fd5d..81a0b0c3bc 100644 --- a/api/src/test/java/marquez/db/JobDaoTest.java +++ b/api/src/test/java/marquez/db/JobDaoTest.java @@ -2,6 +2,9 @@ package marquez.db; +import static marquez.db.DbTestUtils.createJobWithSymlinkTarget; +import static marquez.db.DbTestUtils.createJobWithoutSymlinkTarget; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -9,8 +12,18 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import marquez.db.models.DbModelGenerator; +import marquez.db.models.JobRow; +import marquez.db.models.NamespaceRow; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; +import marquez.service.models.Job; +import org.assertj.core.api.AbstractObjectAssert; import org.jdbi.v3.core.Jdbi; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -19,10 +32,26 @@ public class JobDaoTest { private static JobDao jobDao; + private static NamespaceDao namespaceDao; + private static NamespaceRow namespace; + private static Jdbi jdbi; @BeforeAll public static void setUpOnce(Jdbi jdbi) { + JobDaoTest.jdbi = jdbi; jobDao = jdbi.onDemand(JobDao.class); + namespaceDao = jdbi.onDemand(NamespaceDao.class); + namespace = + namespaceDao.upsertNamespaceRow( + UUID.randomUUID(), + Instant.now(), + JobDaoTest.class.getSimpleName(), + JobDaoTest.class.getName()); + } + + @AfterEach + public void cleanUp(Jdbi jdbi) { + jdbi.inTransaction(h -> h.execute("DELETE FROM jobs")); } @Test @@ -30,6 +59,120 @@ public void emptyUrl() { assertNull(jobDao.toUrlString(null)); } + @Test + public void testFindSymlinkedJobByName() { + JobRow targetJob = + createJobWithoutSymlinkTarget(jdbi, namespace, "targetJob", "the target of the symlink"); + JobRow symlinkJob = + createJobWithSymlinkTarget( + jdbi, namespace, "symlinkJob", targetJob.getUuid(), "the symlink job"); + + Optional jobByName = + jobDao.findJobByName(symlinkJob.getNamespaceName(), symlinkJob.getName()); + + assertJobEquals(jobByName, targetJob.getNamespaceName(), targetJob.getName()); + } + + @Test + public void testFindSymlinkedJobRowByName() { + JobRow targetJob = + createJobWithoutSymlinkTarget(jdbi, namespace, "targetJob", "the target of the symlink"); + JobRow symlinkJob = + createJobWithSymlinkTarget( + jdbi, namespace, "symlinkJob", targetJob.getUuid(), "the symlink job"); + + Optional jobByName = + jobDao.findJobByNameAsRow(symlinkJob.getNamespaceName(), symlinkJob.getName()); + assertThat(jobByName) + .isPresent() + .get() + .hasFieldOrPropertyWithValue("name", targetJob.getName()) + .hasFieldOrPropertyWithValue("namespaceName", targetJob.getNamespaceName()); + } + + @Test + public void testFindAll() { + JobRow targetJob = + createJobWithoutSymlinkTarget(jdbi, namespace, "targetJob", "the target of the symlink"); + JobRow symlinkJob = + createJobWithSymlinkTarget( + jdbi, namespace, "symlinkJob", targetJob.getUuid(), "the symlink job"); + JobRow anotherJobSameNamespace = + createJobWithoutSymlinkTarget(jdbi, namespace, "anotherJob", "a random other job"); + + List jobs = jobDao.findAll(namespace.getName(), 10, 0); + + // the symlinked job isn't present in the response - only the symlink target and the job with + // no symlink + assertThat(jobs) + .hasSize(2) + .map(Job::getId) + .containsExactlyInAnyOrder( + DbModelGenerator.jobIdFor(namespace.getName(), targetJob.getName()), + DbModelGenerator.jobIdFor(namespace.getName(), anotherJobSameNamespace.getName())); + } + + @Test + public void testCountFor() { + JobRow targetJob = + createJobWithoutSymlinkTarget(jdbi, namespace, "targetJob", "the target of the symlink"); + createJobWithSymlinkTarget( + jdbi, namespace, "symlinkJob", targetJob.getUuid(), "the symlink job"); + createJobWithoutSymlinkTarget(jdbi, namespace, "anotherJob", "a random other job"); + createJobWithoutSymlinkTarget(jdbi, namespace, "aThirdJob", "a random third job"); + + NamespaceRow anotherNamespace = + namespaceDao.upsertNamespaceRow( + UUID.randomUUID(), Instant.now(), "anotherNamespace", getClass().getName()); + createJobWithSymlinkTarget( + jdbi, anotherNamespace, "othernamespacejob", null, "job in another namespace"); + + assertThat(jobDao.count()).isEqualTo(4); + + assertThat(jobDao.countFor(namespace.getName())).isEqualTo(3); + } + + @Test + public void testUpsertJobWithNewSymlink() { + JobRow targetJob = + createJobWithoutSymlinkTarget(jdbi, namespace, "targetJob", "the target of the symlink"); + + String symlinkJobName = "symlinkJob"; + JobRow symlinkJob = + createJobWithoutSymlinkTarget(jdbi, namespace, symlinkJobName, "the symlink job"); + + // the job queried is returned, since there is no symlink + Optional jobByName = + jobDao.findJobByName(symlinkJob.getNamespaceName(), symlinkJob.getName()); + assertJobEquals(jobByName, symlinkJob.getNamespaceName(), symlinkJob.getName()); + + createJobWithSymlinkTarget( + jdbi, namespace, symlinkJobName, targetJob.getUuid(), "the symlink job"); + + // now the symlink target should be returned + assertJobEquals( + jobDao.findJobByName(symlinkJob.getNamespaceName(), symlinkJob.getName()), + targetJob.getNamespaceName(), + targetJob.getName()); + + // upsert without the symlink target - the previous value should be respected + createJobWithoutSymlinkTarget(jdbi, namespace, symlinkJobName, "the symlink job"); + + // the symlink target should still be returned + assertJobEquals( + jobDao.findJobByName(symlinkJob.getNamespaceName(), symlinkJob.getName()), + targetJob.getNamespaceName(), + targetJob.getName()); + } + + private AbstractObjectAssert assertJobEquals( + Optional jobByName, String namespaceName, String jobName) { + return assertThat(jobByName) + .isPresent() + .get() + .hasFieldOrPropertyWithValue("id", DbModelGenerator.jobIdFor(namespaceName, jobName)); + } + @Test public void pgObjectException() throws JsonProcessingException { ObjectMapper objectMapper = mock(ObjectMapper.class); diff --git a/api/src/test/java/marquez/db/LineageDaoTest.java b/api/src/test/java/marquez/db/LineageDaoTest.java index 97ab8cb6a7..3b0aa14f63 100644 --- a/api/src/test/java/marquez/db/LineageDaoTest.java +++ b/api/src/test/java/marquez/db/LineageDaoTest.java @@ -2,7 +2,10 @@ package marquez.db; +import static marquez.db.DatasetDaoTest.DATASET; import static marquez.db.LineageTestUtils.NAMESPACE; +import static marquez.db.LineageTestUtils.PRODUCER_URL; +import static marquez.db.LineageTestUtils.SCHEMA_URL; import static marquez.db.LineageTestUtils.newDatasetFacet; import static marquez.db.LineageTestUtils.writeDownstreamLineage; import static org.assertj.core.api.Assertions.assertThat; @@ -25,6 +28,7 @@ import marquez.db.models.JobData; import marquez.db.models.UpdateLineageRow; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; +import marquez.service.models.LineageEvent; import marquez.service.models.LineageEvent.Dataset; import marquez.service.models.LineageEvent.JobFacet; import marquez.service.models.LineageEvent.SchemaField; @@ -534,6 +538,35 @@ public void testGetDatasetData() { .allMatch(str -> str.contains("outputData2")); } + @Test + public void testGetDatasetDatalifecycleStateReturned() { + Dataset dataset = + new Dataset( + NAMESPACE, + DATASET, + LineageEvent.DatasetFacets.builder() + .lifecycleStateChange( + new LineageEvent.LifecycleStateChangeFacet(PRODUCER_URL, SCHEMA_URL, "CREATE")) + .build()); + + UpdateLineageRow row = + LineageTestUtils.createLineageRow( + openLineageDao, + "writeJob", + "COMPLETE", + jobFacet, + Arrays.asList(), + Arrays.asList(dataset)); + + Set datasetData = + lineageDao.getDatasetData( + Collections.singleton(row.getOutputs().get().get(0).getDatasetRow().getUuid())); + + assertThat(datasetData) + .extracting(ds -> ds.getLastlifecycleState().orElse("")) + .anyMatch(str -> str.contains("CREATE")); + } + @Test public void testGetCurrentRuns() { diff --git a/api/src/test/java/marquez/db/OpenLineageDaoTest.java b/api/src/test/java/marquez/db/OpenLineageDaoTest.java index 375f8f26eb..bffba3aac9 100644 --- a/api/src/test/java/marquez/db/OpenLineageDaoTest.java +++ b/api/src/test/java/marquez/db/OpenLineageDaoTest.java @@ -2,12 +2,15 @@ package marquez.db; +import static marquez.db.LineageTestUtils.PRODUCER_URL; +import static marquez.db.LineageTestUtils.SCHEMA_URL; import static org.assertj.core.api.Assertions.assertThat; import java.util.Arrays; import marquez.db.models.UpdateLineageRow; import marquez.db.models.UpdateLineageRow.DatasetRecord; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; +import marquez.service.models.LineageEvent; import marquez.service.models.LineageEvent.Dataset; import marquez.service.models.LineageEvent.DatasetFacets; import marquez.service.models.LineageEvent.JobFacet; @@ -65,6 +68,28 @@ void testUpdateMarquezModel() { .isEqualTo(writeJob.getOutputs().get().get(0).getDatasetVersionRow()); } + @Test + void testUpdateMarquezModelLifecycleStateChangeFacet() { + Dataset dataset = + new Dataset( + LineageTestUtils.NAMESPACE, + DATASET_NAME, + LineageEvent.DatasetFacets.builder() + .lifecycleStateChange( + new LineageEvent.LifecycleStateChangeFacet( + PRODUCER_URL, SCHEMA_URL, "TRUNCATE")) + .build()); + + JobFacet jobFacet = new JobFacet(null, null, null, LineageTestUtils.EMPTY_MAP); + UpdateLineageRow writeJob = + LineageTestUtils.createLineageRow( + dao, WRITE_JOB_NAME, "COMPLETE", jobFacet, Arrays.asList(), Arrays.asList(dataset)); + + assertThat(writeJob.getOutputs()).isPresent().get().asList().size().isEqualTo(1); + assertThat(writeJob.getOutputs().get().get(0).getDatasetVersionRow().getLifecycleState()) + .isEqualTo("TRUNCATE"); + } + /** * When reading a new dataset, a version is created and the dataset's current version is updated * immediately. @@ -115,6 +140,7 @@ void testUpdateMarquezModelWithNonMatchingReadSchema() { new SchemaField("name", "STRING", "my name"), new SchemaField("age", "INT", "my age"), new SchemaField("eyeColor", "STRING", "my eye color"))), + this.datasetFacets.getLifecycleStateChange(), this.datasetFacets.getDataSource(), this.datasetFacets.getDescription(), this.datasetFacets.getAdditionalFacets()); diff --git a/api/src/test/java/marquez/db/RunDaoTest.java b/api/src/test/java/marquez/db/RunDaoTest.java index 9a29b61738..e8e05fe63e 100644 --- a/api/src/test/java/marquez/db/RunDaoTest.java +++ b/api/src/test/java/marquez/db/RunDaoTest.java @@ -3,15 +3,22 @@ package marquez.db; import static marquez.common.models.CommonModelGenerator.newJobName; +import static marquez.db.DbTestUtils.createJobWithSymlinkTarget; +import static marquez.db.DbTestUtils.createJobWithoutSymlinkTarget; +import static marquez.db.DbTestUtils.newJobWith; import static marquez.service.models.ServiceModelGenerator.newJobMetaWith; import static org.assertj.core.api.Assertions.assertThat; +import com.google.common.collect.ImmutableSet; import java.time.Instant; +import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.TreeSet; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; import marquez.common.models.DatasetId; import marquez.common.models.DatasetVersionId; import marquez.common.models.NamespaceName; @@ -25,6 +32,7 @@ import marquez.service.models.Run; import org.assertj.core.api.InstanceOfAssertFactories; import org.jdbi.v3.core.Jdbi; +import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -79,7 +87,7 @@ public void getRun() { final JobMeta jobMeta = newJobMetaWith(NamespaceName.of(namespaceRow.getName())); final JobRow jobRow = - DbTestUtils.newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); + newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); final RunRow runRow = DbTestUtils.newRun(jdbi, jobRow.getNamespaceName(), jobRow.getName()); DbTestUtils.transitionRunWithOutputs( @@ -116,26 +124,35 @@ public void getFindAll() { final JobMeta jobMeta = newJobMetaWith(NamespaceName.of(namespaceRow.getName())); final JobRow jobRow = - DbTestUtils.newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); + newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); Set expectedRuns = - IntStream.range(0, 5) - .mapToObj( - i -> { - final RunRow runRow = - DbTestUtils.newRun(jdbi, jobRow.getNamespaceName(), jobRow.getName()); - DbTestUtils.transitionRunWithOutputs( - jdbi, runRow.getUuid(), RunState.COMPLETED, jobMeta.getOutputs()); - - jobVersionDao.upsertJobVersionOnRunTransition( - jobRow.getNamespaceName(), - jobRow.getName(), - runRow.getUuid(), - RunState.COMPLETED, - Instant.now()); - return runRow; - }) + createRunsForJob(jobRow, 5, jobMeta.getOutputs()).collect(Collectors.toSet()); + List runs = runDao.findAll(jobRow.getNamespaceName(), jobRow.getName(), 10, 0); + assertThat(runs) + .hasSize(expectedRuns.size()) + .map(Run::getId) + .map(RunId::getValue) + .containsAll(expectedRuns.stream().map(RunRow::getUuid).collect(Collectors.toSet())); + } + + @Test + public void getFindAllForSymlinkedJob() { + final JobMeta jobMeta = newJobMetaWith(NamespaceName.of(namespaceRow.getName())); + final JobRow jobRow = + newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); + + final JobRow symlinkJob = + createJobWithSymlinkTarget( + jdbi, namespaceRow, newJobName().getValue(), jobRow.getUuid(), "symlink job"); + + Set expectedRuns = + Stream.concat( + createRunsForJob(symlinkJob, 3, jobMeta.getOutputs()), + createRunsForJob(jobRow, 2, jobMeta.getOutputs())) .collect(Collectors.toSet()); + + // all runs should be present List runs = runDao.findAll(jobRow.getNamespaceName(), jobRow.getName(), 10, 0); assertThat(runs) .hasSize(expectedRuns.size()) @@ -144,13 +161,70 @@ public void getFindAll() { .containsAll(expectedRuns.stream().map(RunRow::getUuid).collect(Collectors.toSet())); } + @Test + public void testFindByLatestJob() { + final JobMeta jobMeta = newJobMetaWith(NamespaceName.of(namespaceRow.getName())); + final JobRow jobRow = + newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); + Set runs = + createRunsForJob(jobRow, 5, jobMeta.getOutputs()).collect(Collectors.toSet()); + + TreeSet sortedRuns = + new TreeSet<>(Comparator.comparing(RunRow::getUpdatedAt).reversed()); + sortedRuns.addAll(runs); + Optional byLatestJob = runDao.findByLatestJob(jobRow.getNamespaceName(), jobRow.getName()); + assertThat(byLatestJob) + .isPresent() + .get() + .hasFieldOrPropertyWithValue("id", new RunId(sortedRuns.first().getUuid())); + + JobRow newTargetJob = + createJobWithoutSymlinkTarget(jdbi, namespaceRow, "newTargetJob", "a symlink target"); + + // update the old job to point to the new targets + createJobWithSymlinkTarget( + jdbi, + namespaceRow, + jobRow.getName(), + newTargetJob.getUuid(), + jobMeta.getDescription().orElse(null)); + + // get the latest run for the *newTargetJob*. It should be the same as the old job's latest run + byLatestJob = runDao.findByLatestJob(newTargetJob.getNamespaceName(), newTargetJob.getName()); + assertThat(byLatestJob) + .isPresent() + .get() + .hasFieldOrPropertyWithValue("id", new RunId(sortedRuns.first().getUuid())); + } + + @NotNull + private Stream createRunsForJob( + JobRow jobRow, int count, ImmutableSet outputs) { + return IntStream.range(0, count) + .mapToObj( + i -> { + final RunRow runRow = + DbTestUtils.newRun(jdbi, jobRow.getNamespaceName(), jobRow.getName()); + DbTestUtils.transitionRunWithOutputs( + jdbi, runRow.getUuid(), RunState.COMPLETED, outputs); + + jobVersionDao.upsertJobVersionOnRunTransition( + jobRow.getNamespaceName(), + jobRow.getName(), + runRow.getUuid(), + RunState.COMPLETED, + Instant.now()); + return runRow; + }); + } + @Test public void updateRowWithNullNominalTimeDoesNotUpdateNominalTime() { final RunDao runDao = jdbi.onDemand(RunDao.class); final JobMeta jobMeta = newJobMetaWith(NamespaceName.of(namespaceRow.getName())); final JobRow jobRow = - DbTestUtils.newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); + newJobWith(jdbi, namespaceRow.getName(), newJobName().getValue(), jobMeta); RunRow row = DbTestUtils.newRun(jdbi, namespaceRow.getName(), jobRow.getName()); diff --git a/api/src/test/java/marquez/db/mappers/DatasetMapperTest.java b/api/src/test/java/marquez/db/mappers/DatasetMapperTest.java index 28eb07d048..270e3891d9 100644 --- a/api/src/test/java/marquez/db/mappers/DatasetMapperTest.java +++ b/api/src/test/java/marquez/db/mappers/DatasetMapperTest.java @@ -42,6 +42,8 @@ public static void setUp() throws SQLException, MalformedURLException { when(resultSet.getObject(Columns.PHYSICAL_NAME)).thenReturn("PHYSICAL_NAME"); when(resultSet.getString(Columns.TYPE)).thenReturn("DB_TABLE"); when(resultSet.getObject(Columns.TYPE)).thenReturn("DB_TABLE"); + when(resultSet.getString(Columns.LIFECYCLE_STATE)).thenReturn("TRUNCATE"); + when(resultSet.getObject(Columns.LIFECYCLE_STATE)).thenReturn("TRUNCATE"); when(resultSet.getString(Columns.DESCRIPTION)).thenReturn("DESCRIPTION"); when(resultSet.getObject(Columns.DESCRIPTION)).thenReturn("DESCRIPTION"); when(resultSet.getString(Columns.SOURCE_NAME)).thenReturn("POSTGRES"); diff --git a/api/src/test/java/marquez/db/models/DbModelGenerator.java b/api/src/test/java/marquez/db/models/DbModelGenerator.java index d3a555493d..1857c371f3 100644 --- a/api/src/test/java/marquez/db/models/DbModelGenerator.java +++ b/api/src/test/java/marquez/db/models/DbModelGenerator.java @@ -12,6 +12,10 @@ import java.util.UUID; import java.util.stream.Stream; import marquez.Generator; +import marquez.common.models.JobId; +import marquez.common.models.JobName; +import marquez.common.models.NamespaceName; +import org.jetbrains.annotations.NotNull; /** Generates new instances for {@code marquez.db.models} with random values used for testing. */ public final class DbModelGenerator extends Generator { @@ -38,4 +42,9 @@ public static NamespaceRow newNamespaceRow() { public static UUID newRowUuid() { return UUID.randomUUID(); } + + @NotNull + public static JobId jobIdFor(String namespaceName, String jobName) { + return new JobId(new NamespaceName(namespaceName), new JobName(jobName)); + } } diff --git a/api/src/test/java/marquez/service/models/ServiceModelGenerator.java b/api/src/test/java/marquez/service/models/ServiceModelGenerator.java index 6ab61d6281..c66da12695 100644 --- a/api/src/test/java/marquez/service/models/ServiceModelGenerator.java +++ b/api/src/test/java/marquez/service/models/ServiceModelGenerator.java @@ -11,6 +11,7 @@ import static marquez.common.models.CommonModelGenerator.newDescription; import static marquez.common.models.CommonModelGenerator.newFields; import static marquez.common.models.CommonModelGenerator.newJobType; +import static marquez.common.models.CommonModelGenerator.newLifecycleState; import static marquez.common.models.CommonModelGenerator.newLocation; import static marquez.common.models.CommonModelGenerator.newNamespaceName; import static marquez.common.models.CommonModelGenerator.newOwnerName; @@ -59,9 +60,11 @@ public static DbTable newDbTableWith(final DatasetId dbTableId) { newFields(4), newTagNames(2), null, + newLifecycleState(), newDescription(), null, - null); + null, + false); } /** Returns a new {@link DbTableMeta} object. */ diff --git a/api/src/test/resources/mappers/full_dataset_mapper.json b/api/src/test/resources/mappers/full_dataset_mapper.json index c6dba5a7cc..277a2725d5 100644 --- a/api/src/test/resources/mappers/full_dataset_mapper.json +++ b/api/src/test/resources/mappers/full_dataset_mapper.json @@ -4,6 +4,7 @@ "name": "NAME" }, "type": "DB_TABLE", + "lastLifecycleState": "TRUNCATE", "description": "DESCRIPTION", "name": "NAME", "physicalName": "PHYSICAL_NAME", diff --git a/build.gradle b/build.gradle index 61055ff273..81d5afdffd 100644 --- a/build.gradle +++ b/build.gradle @@ -19,9 +19,9 @@ buildscript { } } dependencies { - classpath 'com.adarshr:gradle-test-logger-plugin:3.1.0' + classpath 'com.adarshr:gradle-test-logger-plugin:3.2.0' classpath 'gradle.plugin.com.github.johnrengelman:shadow:7.1.2' - classpath 'com.diffplug.spotless:spotless-plugin-gradle:6.3.0' + classpath 'com.diffplug.spotless:spotless-plugin-gradle:6.5.1' } } @@ -54,14 +54,14 @@ subprojects { ext { assertjVersion = '3.22.0' - dropwizardVersion = '2.0.28' + dropwizardVersion = '2.0.29' jacocoVersion = '0.8.7' junit5Version = '5.8.2' - lombokVersion = '1.18.22' - mockitoVersion = '4.3.1' - openlineageVersion = '0.5.2' + lombokVersion = '1.18.24' + mockitoVersion = '4.5.1' + openlineageVersion = '0.8.1' slf4jVersion = '1.7.36' - postgresqlVersion = '42.3.3' + postgresqlVersion = '42.3.4' isReleaseVersion = !version.endsWith('SNAPSHOT') } diff --git a/chart/Chart.lock b/chart/Chart.lock index 7ea9d369c0..2d021410f9 100644 --- a/chart/Chart.lock +++ b/chart/Chart.lock @@ -1,9 +1,9 @@ dependencies: - name: common repository: https://charts.bitnami.com/bitnami - version: 1.11.2 + version: 1.13.1 - name: postgresql repository: https://charts.bitnami.com/bitnami - version: 10.16.2 -digest: sha256:6c0ef7ef5fdac977d03aee299c7bcab3521255a4fea415d51cd1b229668afa58 -generated: "2022-03-02T19:54:32.624226165Z" + version: 11.1.26 +digest: sha256:9f64607528832d10773a573c90ebc7a008350b2527a7de275458510a3e57fdec +generated: "2022-05-03T18:38:42.955188002Z" diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 6f1e074ba9..6da907f384 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -5,11 +5,11 @@ dependencies: repository: https://charts.bitnami.com/bitnami tags: - bitnami-common - version: 1.11.2 + version: 1.13.1 - condition: postgresql.enabled name: postgresql repository: https://charts.bitnami.com/bitnami - version: 10.16.2 + version: 11.1.26 description: Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata. home: https://github.com/MarquezProject/marquez/tree/main/chart icon: https://raw.githubusercontent.com/MarquezProject/marquez/main/web/src/img/marquez-logo.png @@ -29,4 +29,4 @@ name: marquez sources: - https://github.com/MarquezProject/marquez - https://marquezproject.github.io/marquez/ -version: 0.21.0 +version: 0.22.0 diff --git a/chart/README.md b/chart/README.md index bf8ac890eb..afe461d155 100644 --- a/chart/README.md +++ b/chart/README.md @@ -45,25 +45,26 @@ helm delete marquez ### [Marquez](https://github.com/MarquezProject/marquez) **parameters** -| Parameter | Description | Default | -|------------------------------|----------------------------------|--------------------------| -| `marquez.replicaCount` | Number of desired replicas | `1` | -| `marquez.image.registry` | Marquez image registry | `docker.io` | -| `marquez.image.repository` | Marquez image repository | `marquezproject/marquez` | -| `marquez.image.tag` | Marquez image tag | `0.15.0` | -| `marquez.image.pullPolicy` | Image pull policy | `IfNotPresent` | +| Parameter | Description | Default | +|------------------------------|----------------------------------------|--------------------------| +| `marquez.replicaCount` | Number of desired replicas | `1` | +| `marquez.image.registry` | Marquez image registry | `docker.io` | +| `marquez.image.repository` | Marquez image repository | `marquezproject/marquez` | +| `marquez.image.tag` | Marquez image tag | `0.15.0` | +| `marquez.image.pullPolicy` | Image pull policy | `IfNotPresent` | | `marquez.existingSecretName` | Name of an existing secret containing db password ('marquez-db-password' key) | `nil` | -| `marquez.db.host` | PostgreSQL host | `localhost` | -| `marquez.db.port` | PostgreSQL port | `5432` | -| `marquez.db.name` | PostgreSQL database | `marquez` | -| `marquez.db.user` | PostgreSQL user | `buendia` | -| `marquez.db.password` | PostgreSQL password | `macondo` | -| `marquez.migrateOnStartup` | Execute Flyway migration | `true` | -| `marquez.hostname` | Marquez hostname | `localhost` | -| `marquez.port` | API host port | `5000` | -| `marquez.adminPort` | Heath/Liveness host port | `5001` | -| `marquez.resources.limits` | K8s resource limit overrides | `nil` | -| `marquez.resources.requests` | K8s resource requests overrides | `nil` | +| `marquez.db.host` | PostgreSQL host | `localhost` | +| `marquez.db.port` | PostgreSQL port | `5432` | +| `marquez.db.name` | PostgreSQL database | `marquez` | +| `marquez.db.user` | PostgreSQL user | `buendia` | +| `marquez.db.password` | PostgreSQL password | `macondo` | +| `marquez.migrateOnStartup` | Execute Flyway migration | `true` | +| `marquez.hostname` | Marquez hostname | `localhost` | +| `marquez.port` | API host port | `5000` | +| `marquez.adminPort` | Heath/Liveness host port | `5001` | +| `marquez.resources.limits` | K8s resource limit overrides | `nil` | +| `marquez.resources.requests` | K8s resource requests overrides | `nil` | +| `marquez.podAnnotations` | Additional pod annotations for Marquez | `{}` | ### [Marquez Web UI](https://github.com/MarquezProject/marquez-web) **parameters** @@ -81,13 +82,14 @@ helm delete marquez ### [Postgres](https://github.com/bitnami/charts/blob/master/bitnami/postgresql/values.yaml) (sub-chart) **parameters** -| Parameter | Description | Default | -|---------------------------------|---------------------------------|-----------| -| `postgresql.enabled` | Deploy PostgreSQL container(s) | `false` | -| `postgresql.postgresqlUsername` | PostgreSQL username | `buendia` | -| `postgresql.postgresqlPassword` | PostgreSQL password | `macondo` | -| `postgresql.postgresqlDatabase` | PostgreSQL database | `marquez` | -| `postgresql.existingSecret` | Name of existing secret object | `nil` | +| Parameter | Description | Default | +|----------------------------------|---------------------------------|-----------| +| `postgresql.enabled` | Deploy PostgreSQL container(s) | `false` | +| `postgresql.image.tag` | PostgreSQL image version | `12.1.0` | +| `postgresql.auth.username` | PostgreSQL username | `buendia` | +| `postgresql.auth.password` | PostgreSQL password | `macondo` | +| `postgresql.auth.database` | PostgreSQL database | `marquez` | +| `postgresql.auth.existingSecret` | Name of existing secret object | `nil` | ### Common **parameters** diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl index 36e71aabd4..fd72a34b9e 100644 --- a/chart/templates/_helpers.tpl +++ b/chart/templates/_helpers.tpl @@ -91,7 +91,7 @@ Flexible Postgres database name, using an existing or newly created instance. */}} {{- define "marquez.database.name" -}} {{- if eq .Values.postgresql.enabled true -}} - {{- .Values.postgresql.postgresqlDatabase -}} + {{- .Values.postgresql.auth.database -}} {{- else -}} {{- .Values.marquez.db.name -}} {{- end -}} @@ -102,7 +102,7 @@ Flexible Postgres database user, using an existing or newly created instance. */}} {{- define "marquez.database.user" -}} {{- if eq .Values.postgresql.enabled true -}} - {{- .Values.postgresql.postgresqlUsername -}} + {{- .Values.postgresql.auth.username -}} {{- else -}} {{- .Values.marquez.db.user -}} {{- end -}} @@ -112,10 +112,10 @@ Flexible Postgres database user, using an existing or newly created instance. Postgres helm chart expects a specific secret name, when an override is not provided. */}} {{- define "marquez.postgresql.secretName" -}} -{{- if and (.Values.postgresql.enabled) (not .Values.postgresql.existingSecret) -}} +{{- if and (.Values.postgresql.enabled) (not .Values.postgresql.auth.existingSecret) -}} {{- printf "%s" (include "marquez.postgresql.fullname" .) -}} -{{- else if and (.Values.postgresql.enabled) (.Values.postgresql.existingSecret) -}} - {{- printf "%s" .Values.postgresql.existingSecret -}} +{{- else if and (.Values.postgresql.enabled) (.Values.postgresql.auth.existingSecret) -}} + {{- printf "%s" .Values.postgresql.auth.existingSecret -}} {{- else -}} {{- include "marquez.secretName" . -}} {{- end -}} @@ -126,7 +126,7 @@ Postgres helm chart expects the password to exist within a specific key. */}} {{- define "marquez.database.existingsecret.key" -}} {{- if .Values.postgresql.enabled -}} - {{- printf "%s" "postgresql-password" -}} + {{- printf "%s" "password" -}} {{- else -}} {{- printf "%s" "marquez-db-password" -}} {{- end -}} diff --git a/chart/templates/marquez/deployment.yaml b/chart/templates/marquez/deployment.yaml index 9da6b73f16..3c9aa45a54 100644 --- a/chart/templates/marquez/deployment.yaml +++ b/chart/templates/marquez/deployment.yaml @@ -7,9 +7,13 @@ metadata: {{- include "common.tplvalues.render" (dict "value" .Values.commonLabels "context" $) | nindent 4 }} {{- end }} app.kubernetes.io/component: marquez - {{- if .Values.commonAnnotations }} - annotations: {{- include "common.tplvalues.render" (dict "value" .Values.commonAnnotations "context" $) | nindent 4 }} - {{- end }} + annotations: + {{- if .Values.commonAnnotations }} + {{- include "common.tplvalues.render" (dict "value" .Values.commonAnnotations "context" $) | nindent 4 }} + {{- end }} + {{- range $key, $value := .Values.marquez.podAnnotations }} + {{ $key }}: {{ include "common.tplvalues.render" (dict "value" $value "context" $) | quote }} + {{- end }} spec: selector: matchLabels: {{- include "common.labels.matchLabels" . | nindent 6 }} diff --git a/chart/templates/web/deployment.yaml b/chart/templates/web/deployment.yaml index 31e4063f51..39e91dabfb 100644 --- a/chart/templates/web/deployment.yaml +++ b/chart/templates/web/deployment.yaml @@ -39,7 +39,7 @@ spec: port: http env: - name: MARQUEZ_HOST - value: {{ .Values.marquez.hostname | quote }} + value: {{ include "common.names.fullname" . }} - name: MARQUEZ_PORT value: {{ .Values.marquez.port | quote }} {{- if .Values.web.resources }} diff --git a/chart/values.yaml b/chart/values.yaml index edf9a4f458..d8419f9ee7 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -17,7 +17,7 @@ marquez: image: registry: docker.io repository: marquezproject/marquez - tag: 0.21.0 + tag: 0.22.0 pullPolicy: IfNotPresent ## Name of the existing secret containing credentials for the Marquez installation. ## When this is specified, it will take precedence over the values configured in the 'db' section. @@ -55,6 +55,10 @@ marquez: requests: {} # memory: 256Mi # cpu: 250m + podAnnotations: {} + ## - name: + ## value: + ## ## Properties related to Marquez frontend functionality ## @@ -71,7 +75,7 @@ web: image: registry: docker.io repository: marquezproject/marquez-web - tag: 0.21.0 + tag: 0.22.0 pullPolicy: IfNotPresent ## Marquez website will run on this port ## @@ -103,28 +107,32 @@ postgresql: ## @param image.tag PostgreSQL image tag (immutable tags are recommended) ## image: - registry: docker.io - repository: bitnami/postgresql - tag: 0.21.0 - ## @param postgresql.postgresqlUsername PostgreSQL username - ## ref: https://hub.docker.com/_/postgres/ - ## - postgresqlUsername: buendia - ## @param postgresql.postgresqlPassword PostgreSQL password - ## ref: https://hub.docker.com/_/postgres/ - ## - postgresqlPassword: macondo - ## @param postgresql.postgresqlDatabase PostgreSQL database + tag: 0.22.0 + ## Authentication parameters + ## ref: https://github.com/bitnami/bitnami-docker-postgresql/blob/master/README.md#setting-the-root-password-on-first-run ## ref: https://github.com/bitnami/bitnami-docker-postgresql/blob/master/README.md#creating-a-database-on-first-run - ## - postgresqlDatabase: marquez - ## @param postgresql.existingSecret Name of existing secret object - ## The secret should contain the following keys: - ## postgresql-postgres-password (for root user) - ## postgresql-password (for the unprivileged user) - ## - # existingSecret: my-secret - existingSecret: "" + ## ref: https://github.com/bitnami/bitnami-docker-postgresql/blob/master/README.md#creating-a-database-user-on-first-run + ## + auth: + ## @param auth.username Name for a custom user to create + ## + username: buendia + ## @param auth.password Password for the custom user to create + ## + password: macondo + ## @param auth.database Name for a custom database to create + ## + database: marquez + ## @param auth.existingSecret Name of existing secret to use for PostgreSQL credentials + ## `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` will be ignored and picked up from this secret + ## The secret must contain the keys `postgres-password` (which is the password for "postgres" admin user), + ## `password` (which is the password for the custom user to create when `auth.username` is set), + ## and `replication-password` (which is the password for replication user). + ## The secret might also contains the key `ldap-password` if LDAP is enabled. `ldap.bind_password` will be ignored and + ## picked from this secret in this case. + ## The value is evaluated as a template. + ## + existingSecret: "" ## Additional labels to all the deployed resources; note that ## the following standard labels will automatically be applied. diff --git a/clients/java/README.md b/clients/java/README.md index 3ad1ba2beb..fea3631074 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -10,14 +10,14 @@ Maven: io.github.marquezproject marquez-java - 0.21.0 + 0.22.0 ``` or Gradle: ```groovy -implementation 'io.github.marquezproject:marquez-java:0.21.0 +implementation 'io.github.marquezproject:marquez-java:0.22.0 ``` ## Usage diff --git a/clients/python/marquez_client/__init__.py b/clients/python/marquez_client/__init__.py index 8f1591ffaa..c3998111ce 100644 --- a/clients/python/marquez_client/__init__.py +++ b/clients/python/marquez_client/__init__.py @@ -13,7 +13,7 @@ # -*- coding: utf-8 -*- __author__ = """Marquez Project""" -__version__ = "0.21.0" +__version__ = "0.22.0" from marquez_client.client import MarquezClient # noqa: F401 from marquez_client.clients import Clients # noqa: F401 diff --git a/clients/python/setup.cfg b/clients/python/setup.cfg index b7f645a146..6e3fb83168 100644 --- a/clients/python/setup.cfg +++ b/clients/python/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.21.0 +current_version = 0.22.0 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?P.*) diff --git a/clients/python/setup.py b/clients/python/setup.py index 6ee4bec9ec..ae6e61cb9a 100644 --- a/clients/python/setup.py +++ b/clients/python/setup.py @@ -23,7 +23,7 @@ setup( name="marquez-python", - version="0.21.0", + version="0.22.0", description="Marquez Python Client", long_description=readme, long_description_content_type="text/markdown", diff --git a/docker-compose.yml b/docker-compose.yml index 9afa4bcfc0..acb6a169fc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,12 +10,12 @@ services: - "${API_PORT}:${API_PORT}" - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" volumes: - - ./docker/wait-for-it.sh:/usr/src/app/wait-for-it.sh + - utils:/opt/marquez links: - "db:postgres" depends_on: - db - entrypoint: ["./wait-for-it.sh", "db:5432", "--", "./entrypoint.sh"] + entrypoint: ["/opt/marquez/wait-for-it.sh", "db:5432", "--", "./entrypoint.sh"] web: image: "marquezproject/marquez-web:${TAG}" @@ -42,6 +42,10 @@ services: - MARQUEZ_USER=marquez - MARQUEZ_PASSWORD=marquez volumes: - - ./docker/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh + - db-init:/docker-entrypoint-initdb.d # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) # command: ["postgres", "-c", "log_statement=all"] + +volumes: + utils: + db-init: \ No newline at end of file diff --git a/docker/up.sh b/docker/up.sh index 8d1ba22a7f..6bb62bfd13 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -3,6 +3,9 @@ # SPDX-License-Identifier: Apache-2.0 set -e +set -x + +SCRIPTDIR=$(dirname $0) title() { echo -e "\033[1m${1}\033[0m" @@ -104,4 +107,6 @@ if [[ "${SEED}" = "true" ]]; then compose_files+=" -f docker-compose.seed.yml" fi +$SCRIPTDIR/volumes.sh marquez + API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} TAG="${TAG}" docker-compose $compose_files up $args diff --git a/docker/volumes.sh b/docker/volumes.sh new file mode 100755 index 0000000000..5a30fe7c62 --- /dev/null +++ b/docker/volumes.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +VOLUME_PREFIX=$1 + +UTILS_VOLUME="${VOLUME_PREFIX}_utils" +DB_INIT_VOLUME="${VOLUME_PREFIX}_db-init" + +docker volume create $UTILS_VOLUME +docker volume create $DB_INIT_VOLUME +docker create --name marquez-volume-helper -v $UTILS_VOLUME:/opt/marquez-utils -v $DB_INIT_VOLUME:/opt/marquez-db-init busybox +docker cp ./docker/wait-for-it.sh marquez-volume-helper:/opt/marquez-utils/wait-for-it.sh +docker cp ./docker/init-db.sh marquez-volume-helper:/opt/marquez-db-init/init-db.sh + +docker rm marquez-volume-helper diff --git a/docs/openapi.html b/docs/openapi.html index 78f680c191..dd177119f5 100644 --- a/docs/openapi.html +++ b/docs/openapi.html @@ -2155,7 +2155,7 @@ -

Marquez (0.21.0)

Download OpenAPI specification:Download

License: Apache 2.0

Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.

-

Namespaces

Create a namespace

Creates a new namespace object. A namespace enables the contextual grouping of related jobs and datasets. Namespaces must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), dashes (-), colons (:), slashes (/), or dots (.). A namespace is case-insensitive with a maximum length of 1024 characters. Note jobs and datasets will be unique within a namespace, but not across namespaces.

+ " fill="currentColor">

Marquez (0.22.0)

Download OpenAPI specification:Download

License: Apache 2.0

Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.

+

Namespaces

Create a namespace

Creates a new namespace object. A namespace enables the contextual grouping of related jobs and datasets. Namespaces must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), dashes (-), colons (:), slashes (/), or dots (.). A namespace is case-insensitive with a maximum length of 1024 characters. Note jobs and datasets will be unique within a namespace, but not across namespaces.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

Request Body schema: application/json
ownerName
required
string

The owner of the namespace.

description
string

The description of the namespace.

Responses

Request samples

Content type
application/json
{
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Retrieve a namespace

Returns a namespace.

+
http://localhost:5000/api/v1/namespaces/{namespace}

Request samples

Content type
application/json
{
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Retrieve a namespace

Returns a namespace.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

List all namespaces

Returns a list of namespaces.

+
http://localhost:5000/api/v1/namespaces/{namespace}

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

List all namespaces

Returns a list of namespaces.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "namespaces": [
    ]
}

Sources

Create a source Deprecated

Creates a new source object. A source is the physical location of a dataset such as a table in PostgreSQL, or topic in Kafka. A source enables the grouping of physical datasets to their physical source.

+
http://localhost:5000/api/v1/namespaces

Response samples

Content type
application/json
{
  • "namespaces": [
    ]
}

Sources

Create a source Deprecated

Creates a new source object. A source is the physical location of a dataset such as a table in PostgreSQL, or topic in Kafka. A source enables the grouping of physical datasets to their physical source.

path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

Request Body schema: application/json
type
required
string

The type of the source.

connectionUrl
required
string <URL>

The URL to the location of the source.

description
string

The description of the source.

Responses

Request samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Retrieve a source

Returns a source.

+
http://localhost:5000/api/v1/sources/{source}

Request samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Retrieve a source

Returns a source.

path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

Responses

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

List all sources

Returns a list of sources.

+
http://localhost:5000/api/v1/sources/{source}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

List all sources

Returns a list of sources.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "sources": [
    ]
}

Datasets

Create a dataset Deprecated

Creates a new dataset.

+
http://localhost:5000/api/v1/sources

Response samples

Content type
application/json
{
  • "sources": [
    ]
}

Datasets

Create a dataset Deprecated

Creates a new dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Request Body schema: application/json
One of
type
required
string
Value: "DB_TABLE"

The type of the dataset.

@@ -2217,43 +2217,43 @@
runId
string

The ID associated with the run modifying the table.

Responses

Request samples

Content type
application/json
Example
{
  • "type": "DB_TABLE",
  • "physicalName": "public.mytable",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "description": "My first dataset!"
}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a dataset

Returns a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}

Request samples

Content type
application/json
Example
{
  • "type": "DB_TABLE",
  • "physicalName": "public.mytable",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "description": "My first dataset!"
}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a dataset

Returns a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a version for a dataset

Returns a version for a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a version for a dataset

Returns a version for a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "version": "d224dac0-35d7-4d9b-bbbe-6fff1a8485ad",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "description": "My first dataset!",
  • "createdByRun": {
    }
}

List all versions for a dataset

Returns a list of versions for a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/versions/{version}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "version": "d224dac0-35d7-4d9b-bbbe-6fff1a8485ad",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "description": "My first dataset!",
  • "createdByRun": {
    }
}

List all versions for a dataset

Returns a list of versions for a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

List all datasets

Returns a list of datasets.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/versions

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

List all datasets

Returns a list of datasets.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "datasets": [
    ],
  • "totalCount": 0
}

Tag a dataset

Tag an existing dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets

Response samples

Content type
application/json
{
  • "datasets": [
    ],
  • "totalCount": 0
}

Tag a dataset

Tag an existing dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

tag
required
string
Example: SENSITIVE

The name of the tag.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Tag a field

Tag an existing field of a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/tags/{tag}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Tag a field

Tag an existing field of a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

field
required
string
Example: my_field

The name of the field.

tag
required
string
Example: SENSITIVE

The name of the tag.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Jobs

Create a job Deprecated

Creates a new job object. All job objects are immutable and are uniquely identified by a generated ID. Marquez will create a version of a job each time the contents of the object is modified. For example, the location of a job may change over time resulting in new versions. The accumulated versions can be listed, used to rerun a specific job version or possibly help debug a failed job run.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/fields/{field}/tags/{tag}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Jobs

Create a job Deprecated

Creates a new job object. All job objects are immutable and are uniquely identified by a generated ID. Marquez will create a version of a job each time the contents of the object is modified. For example, the location of a job may change over time resulting in new versions. The accumulated versions can be listed, used to rerun a specific job version or possibly help debug a failed job run.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Request Body schema: application/json
object

The ID of the job.

@@ -2266,29 +2266,29 @@
runId
string

An optional run ID used to associate a job version to an existing job run.

Responses

Request samples

Content type
application/json
{}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a job

Retrieve a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}

Request samples

Content type
application/json
{}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a job

Retrieve a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

List all jobs

Returns a list of jobs.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

List all jobs

Returns a list of jobs.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "jobs": [
    ],
  • "totalCount": 0
}

Retrieve a version for a job

Returns a version for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs

Response samples

Content type
application/json
{
  • "jobs": [
    ],
  • "totalCount": 0
}

Retrieve a version for a job

Returns a version for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "version": "56472c57-a2ef-4218-b7b7-d2af02a343fd",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "facets": { }
}

List all versions for a job

Returns a list of versions for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/versions/{version}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "version": "56472c57-a2ef-4218-b7b7-d2af02a343fd",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "facets": { }
}

List all versions for a job

Returns a list of versions for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

Create a run Deprecated

Creates a new run object for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/versions

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

Create a run Deprecated

Creates a new run object for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Request Body schema: application/json
id
string <uuid>

An optional user-provided unique ID of the run. A run ID must be an UUID. If an ID for the run is not provided, a random UUID will be generated for the given run.

@@ -2297,55 +2297,55 @@
args
object

The arguments of the run.

Responses

Request samples

Content type
application/json
{
  • "args": {
    }
}

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "facets": { }
}

List all runs

Returns a list of runs for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/runs

Request samples

Content type
application/json
{
  • "args": {
    }
}

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "facets": { }
}

List all runs

Returns a list of runs for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Retrieve a run

Retrieve a run.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/runs

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Retrieve a run

Retrieve a run.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Start a run Deprecated

Marks the run as RUNNING.

+
http://localhost:5000/api/v1/jobs/runs/{id}

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Start a run Deprecated

Marks the run as RUNNING.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Complete a run Deprecated

Marks the run as COMPLETED.

+
http://localhost:5000/api/v1/jobs/runs/{id}/start

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Complete a run Deprecated

Marks the run as COMPLETED.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "facets": { }
}

Fail a run Deprecated

Marks the run as FAILED.

+
http://localhost:5000/api/v1/jobs/runs/{id}/complete

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "facets": { }
}

Fail a run Deprecated

Marks the run as FAILED.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Abort a run Deprecated

Marks the run as ABORTED.

+
http://localhost:5000/api/v1/jobs/runs/{id}/fail

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Abort a run Deprecated

Marks the run as ABORTED.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Lineage

Record a single lineage event

Receive, process, and store lineage metadata using the OpenLineage standard.

+
http://localhost:5000/api/v1/jobs/runs/{id}/abort

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Lineage

Record a single lineage event

Receive, process, and store lineage metadata using the OpenLineage standard.

Request Body schema: application/json
any (LineageEvent)

Responses

Request samples

Content type
application/json
{}

Get a lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node.

+
http://localhost:5000/api/v1/lineage

Request samples

Content type
application/json
{}

Get a lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node.

depth
integer
Default: 20

Depth of lineage graph to create.

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Tags

Create a tag

Creates a new tag object.

+
http://localhost:5000/api/v1/lineage

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Tags

Create a tag

Creates a new tag object.

path Parameters
tag
required
string
Example: SENSITIVE

The name of the tag.

Request Body schema: application/json
description
string

The description of the tag.

Responses

Request samples

Content type
application/json
{
  • "description": "My first tag!"
}

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

List all tags

Returns a list of tags.

+
http://localhost:5000/api/v1/tags/{tag}

Request samples

Content type
application/json
{
  • "description": "My first tag!"
}

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

List all tags

Returns a list of tags.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

Search

Query all datasets and jobs

Returns one or more datasets and jobs of your query.

+
http://localhost:5000/api/v1/tags

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

Search

Query all datasets and jobs

Returns one or more datasets and jobs of your query.

query Parameters
q
required
string
Example: q=my-dataset

Query containing pattern to match; datasets and jobs pattern matching is string based and case-insensitive. Use percent sign (%) to match any string of zero or more characters (my-job%), or an underscore (_) to match a single character (_job_).

filter
string
Example: filter=dataset

Filters the results of your query by dataset or job.

sort
string
Example: sort=name

Sorts the results of your query by name or updated_at.

@@ -2354,7 +2354,7 @@

Response samples

Content type
application/json
{
  • "totalCount": 1,
  • "results": [
    ]
}