From 0b2f343a7b02b65f5ca301a7ee7fcf8c41458aee Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 10:33:30 -0700
Subject: [PATCH 01/33] Begin reorg

---
 metadata-ingestion/README.md                 | 751 -------------------
 metadata-ingestion/source_docs/athena.md     |  23 +
 metadata-ingestion/source_docs/bigquery.md   |  63 ++
 metadata-ingestion/source_docs/datahub.md    |   0
 metadata-ingestion/source_docs/dbt.md        |  39 +
 metadata-ingestion/source_docs/druid.md      |  22 +
 metadata-ingestion/source_docs/feast.md      |  22 +
 metadata-ingestion/source_docs/file.md       |  12 +
 metadata-ingestion/source_docs/glue.md       |  31 +
 metadata-ingestion/source_docs/hive.md       |  47 ++
 metadata-ingestion/source_docs/kafka.md      |  47 ++
 metadata-ingestion/source_docs/ldap.md       |  23 +
 metadata-ingestion/source_docs/looker.md     |  22 +
 metadata-ingestion/source_docs/lookml.md     |  27 +
 metadata-ingestion/source_docs/mongodb.md    |  31 +
 metadata-ingestion/source_docs/mssql.md      |  66 ++
 metadata-ingestion/source_docs/mysql.md      |  31 +
 metadata-ingestion/source_docs/oracle.md     |  25 +
 metadata-ingestion/source_docs/postgres.md   |  23 +
 metadata-ingestion/source_docs/redshift.md   |  41 +
 metadata-ingestion/source_docs/sagemaker.md  |  34 +
 metadata-ingestion/source_docs/snowflake.md  |  68 ++
 metadata-ingestion/source_docs/sqlalchemy.md |  22 +
 metadata-ingestion/source_docs/superset.md   |  19 +
 24 files changed, 738 insertions(+), 751 deletions(-)
 create mode 100644 metadata-ingestion/source_docs/athena.md
 create mode 100644 metadata-ingestion/source_docs/bigquery.md
 create mode 100644 metadata-ingestion/source_docs/datahub.md
 create mode 100644 metadata-ingestion/source_docs/dbt.md
 create mode 100644 metadata-ingestion/source_docs/druid.md
 create mode 100644 metadata-ingestion/source_docs/feast.md
 create mode 100644 metadata-ingestion/source_docs/file.md
 create mode 100644 metadata-ingestion/source_docs/glue.md
 create mode 100644 metadata-ingestion/source_docs/hive.md
 create mode 100644 metadata-ingestion/source_docs/kafka.md
 create mode 100644 metadata-ingestion/source_docs/ldap.md
 create mode 100644 metadata-ingestion/source_docs/looker.md
 create mode 100644 metadata-ingestion/source_docs/lookml.md
 create mode 100644 metadata-ingestion/source_docs/mongodb.md
 create mode 100644 metadata-ingestion/source_docs/mssql.md
 create mode 100644 metadata-ingestion/source_docs/mysql.md
 create mode 100644 metadata-ingestion/source_docs/oracle.md
 create mode 100644 metadata-ingestion/source_docs/postgres.md
 create mode 100644 metadata-ingestion/source_docs/redshift.md
 create mode 100644 metadata-ingestion/source_docs/sagemaker.md
 create mode 100644 metadata-ingestion/source_docs/snowflake.md
 create mode 100644 metadata-ingestion/source_docs/sqlalchemy.md
 create mode 100644 metadata-ingestion/source_docs/superset.md

diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 92eaca1b702c5..7d72a70bd0fdd 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -138,757 +138,6 @@ datahub ingest -c ./examples/recipes/mssql_to_datahub.yml
 
 A number of recipes are included in the examples/recipes directory.
 
-## Sources
-
-### Kafka Metadata `kafka`
-
-Extracts:
-
-- List of topics - from the Kafka broker
-- Schemas associated with each topic - from the schema registry
-
-```yml
-source:
-  type: "kafka"
-  config:
-    connection:
-      bootstrap: "broker:9092"
-      consumer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.DeserializingConsumer
-      schema_registry_url: http://localhost:8081
-      schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
-```
-
-The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
-
-For a full example with a number of security options, see this [example recipe](./examples/recipes/secured_kafka.yml).
-
-### MySQL Metadata `mysql`
-
-Extracts:
-
-- List of databases and tables
-- Column types and schema associated with each table
-
-```yml
-source:
-  type: mysql
-  config:
-    username: root
-    password: example
-    database: dbname
-    host_port: localhost:3306
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "performance_schema"
-      allow:
-        - "schema1.table2"
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
-    schema_pattern:
-      deny:
-        - "garbage_schema"
-      allow:
-        - "schema1"
-```
-
-### Microsoft SQL Server Metadata `mssql`
-
-We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections.
-
-Extracts:
-
-- List of databases, schema, tables and views
-- Column types associated with each table/view
-
-```yml
-source:
-  type: mssql
-  config:
-    username: user
-    password: pass
-    host_port: localhost:1433
-    database: DemoDatabase
-    include_views: True # whether to include views, defaults to True
-    table_pattern:
-      deny:
-        - "^.*\\.sys_.*" # deny all tables that start with sys_
-      allow:
-        - "schema1.table1"
-        - "schema1.table2"
-    options:
-      # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-      # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-      # Many of these options are specific to the underlying database driver, so that library's
-      # documentation will be a good reference for what is supported. To find which dialect is likely
-      # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-      charset: "utf8"
-    # If set to true, we'll use the pyodbc library. This requires you to have
-    # already installed the Microsoft ODBC Driver for SQL Server.
-    # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
-    use_odbc: False
-    uri_args: {}
-```
-
-<details>
-  <summary>Example: using ingestion with ODBC and encryption</summary>
-
-This requires you to have already installed the Microsoft ODBC Driver for SQL Server.
-See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
-
-```yml
-source:
-  type: mssql
-  config:
-    # See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc
-    use_odbc: True
-    username: user
-    password: pass
-    host_port: localhost:1433
-    database: DemoDatabase
-    include_views: True # whether to include views, defaults to True
-    uri_args:
-      # See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15
-      driver: "ODBC Driver 17 for SQL Server"
-      Encrypt: "yes"
-      TrustServerCertificate: "Yes"
-      ssl: "True"
-      # Trusted_Connection: "yes"
-```
-
-</details>
-
-### Hive `hive`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-- Detailed table and storage information
-
-```yml
-source:
-  type: hive
-  config:
-    # For more details on authentication, see the PyHive docs:
-    # https://github.com/dropbox/PyHive#passing-session-configuration.
-    # LDAP, Kerberos, etc. are supported using connect_args, which can be
-    # added under the `options` config parameter.
-    #scheme: 'hive+http' # set this if Thrift should use the HTTP transport
-    #scheme: 'hive+https' # set this if Thrift should use the HTTP with SSL transport
-    username: user # optional
-    password: pass # optional
-    host_port: localhost:10000
-    database: DemoDatabase # optional, defaults to 'default'
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
-```
-
-<details>
-  <summary>Example: using ingestion with Azure HDInsight</summary>
-
-```yml
-# Connecting to Microsoft Azure HDInsight using TLS.
-source:
-  type: hive
-  config:
-    scheme: "hive+https"
-    host_port: <cluster_name>.azurehdinsight.net:443
-    username: admin
-    password: "<password>"
-    options:
-      connect_args:
-        http_path: "/hive2"
-        auth: BASIC
-    # table_pattern/schema_pattern is same as above
-```
-
-</details>
-
-### PostgreSQL `postgres`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-- Also supports PostGIS extensions
-- database_alias (optional) can be used to change the name of database to be ingested
-
-```yml
-source:
-  type: postgres
-  config:
-    username: user
-    password: pass
-    host_port: localhost:5432
-    database: DemoDatabase
-    database_alias: DatabaseNameToBeIngested
-    include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
-```
-
-### Redshift `redshift`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-- Also supports PostGIS extensions
-
-```yml
-source:
-  type: redshift
-  config:
-    username: user
-    password: pass
-    host_port: example.something.us-west-2.redshift.amazonaws.com:5439
-    database: DemoDatabase
-    include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
-```
-
-<details>
-  <summary>Extra options when running Redshift behind a proxy</summary>
-
-This requires you to have already installed the Microsoft ODBC Driver for SQL Server.
-See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
-
-```yml
-source:
-  type: redshift
-  config:
-    # username, password, database, etc are all the same as above
-    host_port: my-proxy-hostname:5439
-    options:
-      connect_args:
-        sslmode: "prefer" # or "require" or "verify-ca"
-        sslrootcert: ~ # needed to unpin the AWS Redshift certificate
-```
-
-</details>
-
-### AWS SageMaker `sagemaker`
-
-Extracts:
-
-- Feature groups
-- Models, jobs, and lineage between the two (e.g. when jobs output a model or a model is used by a job)
-
-```yml
-source:
-  type: sagemaker
-  config:
-    aws_region: # aws_region_name, i.e. "eu-west-1"
-    env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
-
-    # Credentials. If not specified here, these are picked up according to boto3 rules.
-    # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
-    aws_access_key_id: # Optional.
-    aws_secret_access_key: # Optional.
-    aws_session_token: # Optional.
-    aws_role: # Optional (Role chaining supported by using a sorted list).
-
-    extract_feature_groups: True # if feature groups should be ingested, default True
-    extract_models: True # if models should be ingested, default True
-    extract_jobs: # if jobs should be ingested, default True for all
-      auto_ml: True
-      compilation: True
-      edge_packaging: True
-      hyper_parameter_tuning: True
-      labeling: True
-      processing: True
-      training: True
-      transform: True
-```
-
-### Snowflake `snowflake`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-
-```yml
-source:
-  type: snowflake
-  config:
-    username: user
-    password: pass
-    host_port: account_name
-    database_pattern:
-      # The escaping of the $ symbol helps us skip the environment variable substitution.
-      allow:
-        - ^MY_DEMO_DATA.*
-        - ^ANOTHER_DB_REGEX
-      deny:
-        - ^SNOWFLAKE\$
-        - ^SNOWFLAKE_SAMPLE_DATA\$
-    warehouse: "COMPUTE_WH" # optional
-    role: "sysadmin" # optional
-    include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
-```
-
-:::tip
-
-You can also get fine-grained usage statistics for Snowflake using the `snowflake-usage` source.
-
-:::
-
-### Superset `superset`
-
-Extracts:
-
-- List of charts and dashboards
-
-```yml
-source:
-  type: superset
-  config:
-    username: user
-    password: pass
-    provider: db | ldap
-    connect_uri: http://localhost:8088
-    env: "PROD" # Optional, default is "PROD"
-```
-
-See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api.
-
-### Oracle `oracle`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-
-Using the Oracle source requires that you've also installed the correct drivers; see the [cx_Oracle docs](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html). The easiest one is the [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html).
-
-```yml
-source:
-  type: oracle
-  config:
-    # For more details on authentication, see the documentation:
-    # https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect and
-    # https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings.
-    username: user
-    password: pass
-    host_port: localhost:5432
-    database: dbname
-    service_name: svc # omit database if using this option
-    include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
-```
-
-### Feast `feast`
-
-**Note: Feast ingestion requires Docker to be installed.**
-
-Extracts:
-
-- List of feature tables (modeled as [`MLFeatureTable`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureTableProperties.pdl)s),
-  features ([`MLFeature`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureProperties.pdl)s),
-  and entities ([`MLPrimaryKey`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLPrimaryKeyProperties.pdl)s)
-- Column types associated with each feature and entity
-
-Note: this uses a separate Docker container to extract Feast's metadata into a JSON file, which is then
-parsed to DataHub's native objects. This was done because of a dependency conflict in the `feast` module.
-
-```yml
-source:
-  type: feast
-  config:
-    core_url: localhost:6565 # default
-    env: "PROD" # Optional, default is "PROD"
-    use_local_build: False # Whether to build Feast ingestion image locally, default is False
-```
-
-### Google BigQuery `bigquery`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-
-```yml
-source:
-  type: bigquery
-  config:
-    project_id: project # optional - can autodetect from environment
-    options: # options is same as above
-      # See https://github.com/mxmzdlv/pybigquery#authentication for details.
-      credentials_path: "/path/to/keyfile.json" # optional
-    include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-```
-
-:::tip
-
-You can also get fine-grained usage statistics for BigQuery using the `bigquery-usage` source.
-
-:::
-
-### AWS Athena `athena`
-
-Extracts:
-
-- List of databases and tables
-- Column types associated with each table
-
-```yml
-source:
-  type: athena
-  config:
-    username: aws_access_key_id # Optional. If not specified, credentials are picked up according to boto3 rules.
-    # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
-    password: aws_secret_access_key # Optional.
-    database: database # Optional, defaults to "default"
-    aws_region: aws_region_name # i.e. "eu-west-1"
-    s3_staging_dir: s3_location # "s3://<bucket-name>/prefix/"
-    # The s3_staging_dir parameter is needed because Athena always writes query results to S3.
-    # See https://docs.aws.amazon.com/athena/latest/ug/querying.html
-    # However, the athena driver will transparently fetch these results as you would expect from any other sql client.
-    work_group: athena_workgroup # "primary"
-    # table_pattern/schema_pattern is same as above
-```
-
-### AWS Glue `glue`
-
-Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](./s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
-
-Extracts:
-
-- List of tables
-- Column types associated with each table
-- Table metadata, such as owner, description and parameters
-- Jobs and their component transformations, data sources, and data sinks
-
-```yml
-source:
-  type: glue
-  config:
-    aws_region: # aws_region_name, i.e. "eu-west-1"
-    extract_transforms: True # whether to ingest Glue jobs, defaults to True
-    env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
-
-    # Filtering patterns for databases and tables to scan
-    database_pattern: # Optional, to filter databases scanned, same as schema_pattern above.
-    table_pattern: # Optional, to filter tables scanned, same as table_pattern above.
-
-    # Credentials. If not specified here, these are picked up according to boto3 rules.
-    # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
-    aws_access_key_id: # Optional.
-    aws_secret_access_key: # Optional.
-    aws_session_token: # Optional.
-    aws_role: # Optional (Role chaining supported by using a sorted list).
-```
-
-### Druid `druid`
-
-Extracts:
-
-- List of databases, schema, and tables
-- Column types associated with each table
-
-**Note** It is important to define a explicitly define deny schema pattern for internal druid databases (lookup & sys)
-if adding a schema pattern otherwise the crawler may crash before processing relevant databases.
-This deny pattern is defined by default but is overriden by user-submitted configurations
-
-```yml
-source:
-  type: druid
-  config:
-    # Point to broker address
-    host_port: localhost:8082
-    schema_pattern:
-      deny:
-        - "^(lookup|sys).*"
-    # options is same as above
-```
-
-### Other databases using SQLAlchemy `sqlalchemy`
-
-The `sqlalchemy` source is useful if we don't have a pre-built source for your chosen
-database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
-defined elsewhere. In order to use this, you must `pip install` the required dialect packages yourself.
-
-Extracts:
-
-- List of schemas and tables
-- Column types associated with each table
-
-```yml
-source:
-  type: sqlalchemy
-  config:
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
-    connect_uri: "dialect+driver://username:password@host:port/database"
-    options: {} # same as above
-    schema_pattern: {} # same as above
-    table_pattern: {} # same as above
-    include_views: True # whether to include views, defaults to True
-```
-
-### MongoDB `mongodb`
-
-Extracts:
-
-- List of databases
-- List of collections in each database and infers schemas for each collection
-
-By default, schema inference samples 1,000 documents from each collection. Setting `schemaSamplingSize: null` will scan the entire collection.
-Moreover, setting `useRandomSampling: False` will sample the first documents found without random selection, which may be faster for large collections.
-
-Note that `schemaSamplingSize` has no effect if `enableSchemaInference: False` is set.
-
-```yml
-source:
-  type: "mongodb"
-  config:
-    # For advanced configurations, see the MongoDB docs.
-    # https://pymongo.readthedocs.io/en/stable/examples/authentication.html
-    connect_uri: "mongodb://localhost"
-    username: admin
-    password: password
-    env: "PROD" # Optional, default is "PROD"
-    authMechanism: "DEFAULT"
-    options: {}
-    database_pattern: {}
-    collection_pattern: {}
-    enableSchemaInference: True
-    schemaSamplingSize: 1000
-    useRandomSampling: True # whether to randomly sample docs for schema or just use the first ones, True by default
-    # database_pattern/collection_pattern are similar to schema_pattern/table_pattern from above
-```
-
-### LDAP `ldap`
-
-Extracts:
-
-- List of people
-- Names, emails, titles, and manager information for each person
-- List of groups
-
-```yml
-source:
-  type: "ldap"
-  config:
-    ldap_server: ldap://localhost
-    ldap_user: "cn=admin,dc=example,dc=org"
-    ldap_password: "admin"
-    base_dn: "dc=example,dc=org"
-    filter: "(objectClass=*)" # optional field
-    drop_missing_first_last_name: False # optional
-```
-
-The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
-for devices or services should be excluded when they do not contain a first and last name. This will only
-impact the ingestion of LDAP users, while LDAP groups will be unaffected by this config option.
-
-### LookML `lookml`
-
-Note! This plugin uses a package that requires Python 3.7+!
-
-Extracts:
-
-- LookML views from model files
-- Name, upstream table names, dimensions, measures, and dimension groups
-
-```yml
-source:
-  type: "lookml"
-  config:
-    base_folder: /path/to/model/files # where the *.model.lkml and *.view.lkml files are stored
-    connection_to_platform_map: # mappings between connection names in the model files to platform names
-      connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
-    model_pattern: {}
-    view_pattern: {}
-    env: "PROD" # optional, default is "PROD"
-    parse_table_names_from_sql: False # see note below
-    platform_name: "looker" # optional, default is "looker"
-```
-
-Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
-views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
-Looker supports, the result might not be correct. This parsing is disabled by default, but can be enabled by setting
-`parse_table_names_from_sql: True`.
-
-### Looker dashboards `looker`
-
-Extracts:
-
-- Looker dashboards and dashboard elements (charts)
-- Names, descriptions, URLs, chart types, input view for the charts
-
-See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
-
-```yml
-source:
-  type: "looker"
-  config:
-    client_id: # Your Looker API3 client ID
-    client_secret: # Your Looker API3 client secret
-    base_url: # The url to your Looker instance: https://company.looker.com:19999 or https://looker.company.com, or similar.
-    dashboard_pattern: # supports allow/deny regexes
-    chart_pattern: # supports allow/deny regexes
-    actor: urn:li:corpuser:etl # Optional, defaults to urn:li:corpuser:etl
-    env: "PROD" # Optional, default is "PROD"
-    platform_name: "looker" # Optional, default is "looker"
-```
-
-### File `file`
-
-Pulls metadata from a previously generated file. Note that the file sink
-can produce such files, and a number of samples are included in the
-[examples/mce_files](examples/mce_files) directory.
-
-```yml
-source:
-  type: file
-  config:
-    filename: ./path/to/mce/file.json
-```
-
-### dbt `dbt`
-
-Pull metadata from dbt artifacts files:
-
-- [dbt manifest file](https://docs.getdbt.com/reference/artifacts/manifest-json)
-  - This file contains model, source and lineage data.
-- [dbt catalog file](https://docs.getdbt.com/reference/artifacts/catalog-json)
-  - This file contains schema data.
-  - dbt does not record schema data for Ephemeral models, as such datahub will show Ephemeral models in the lineage, however there will be no associated schema for Ephemeral models
-- [dbt sources file](https://docs.getdbt.com/reference/artifacts/sources-json)
-  - This file contains metadata for sources with freshness checks.
-  - We transfer dbt's freshness checks to DataHub's last-modified fields.
-  - Note that this file is optional – if not specified, we'll use time of ingestion instead as a proxy for time last-modified.
-- target_platform:
-  - The data platform you are enriching with dbt metadata.
-  - [data platforms](https://github.com/linkedin/datahub/blob/master/gms/impl/src/main/resources/DataPlatformInfo.json)
-- load_schemas:
-  - Load schemas from dbt catalog file, not necessary when the underlying data platform already has this data.
-- node_type_pattern:
-  - Use this filter to exclude and include node types using allow or deny method
-
-```yml
-source:
-  type: "dbt"
-  config:
-    manifest_path: "./path/dbt/manifest_file.json"
-    catalog_path: "./path/dbt/catalog_file.json"
-    sources_path: "./path/dbt/sources_file.json" # (optional, used for freshness checks)
-    target_platform: "postgres" # optional, eg "postgres", "snowflake", etc.
-    load_schemas: True or False
-    node_type_pattern: # optional
-      deny:
-        - ^test.*
-      allow:
-        - ^.*
-```
-
-Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
-
-### Google BigQuery Usage Stats `bigquery-usage`
-
-- Fetch a list of queries issued
-- Fetch a list of tables and columns accessed
-- Aggregate these statistics into buckets, by day or hour granularity
-
-Note: the client must have one of the following OAuth scopes, and should be authorized on all projects you'd like to ingest usage stats from.
-
-- https://www.googleapis.com/auth/logging.read
-- https://www.googleapis.com/auth/logging.admin
-- https://www.googleapis.com/auth/cloud-platform.read-only
-- https://www.googleapis.com/auth/cloud-platform
-
-```yml
-source:
-  type: bigquery-usage
-  config:
-    projects: # optional - can autodetect a single project from the environment
-      - project_id_1
-      - project_id_2
-    options:
-      # See https://googleapis.dev/python/logging/latest/client.html for details.
-      credentials: ~ # optional - see docs
-    env: PROD
-
-    bucket_duration: "DAY"
-    start_time: ~ # defaults to the last full day in UTC (or hour)
-    end_time: ~ # defaults to the last full day in UTC (or hour)
-
-    top_n_queries: 10 # number of queries to save for each table
-```
-
-:::note
-
-This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source.
-
-:::
-
-### Snowflake Usage Stats `snowflake-usage`
-
-- Fetch a list of queries issued
-- Fetch a list of tables and columns accessed (excludes views)
-- Aggregate these statistics into buckets, by day or hour granularity
-
-Note: the user/role must have access to the account usage table. The "accountadmin" role has this by default, and other roles can be [granted this permission](https://docs.snowflake.com/en/sql-reference/account-usage.html#enabling-account-usage-for-other-roles).
-
-Note: the underlying access history views that we use are only available in Snowflake's enterprise edition or higher.
-
-```yml
-source:
-  type: snowflake-usage
-  config:
-    username: user
-    password: pass
-    host_port: account_name
-    role: ACCOUNTADMIN
-    env: PROD
-
-    bucket_duration: "DAY"
-    start_time: ~ # defaults to the last full day in UTC (or hour)
-    end_time: ~ # defaults to the last full day in UTC (or hour)
-
-    top_n_queries: 10 # number of queries to save for each table
-```
-
-:::note
-
-This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source.
-
-:::
-
-### Kafka Connect `kafka-connect`
-
-Extracts:
-
-- Kafka Connect connector as individual `DataFlowSnapshotClass` entity
-- Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
-- Lineage information between source database to Kafka topic
-
-```yml
-source:
-  type: "kafka-connect"
-  config:
-    connect_uri: "http://localhost:8083"
-    cluster_name: "connect-cluster"
-    connector_patterns:
-      deny:
-        - ^denied-connector.*
-      allow:
-        - ^allowed-connector.*
-```
-
-Current limitations:
-
-- Currently works only for Debezium source connectors.
-
 ## Sinks
 
 ### DataHub Rest `datahub-rest`
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
new file mode 100644
index 0000000000000..726c2521fa49e
--- /dev/null
+++ b/metadata-ingestion/source_docs/athena.md
@@ -0,0 +1,23 @@
+# AWS Athena `athena`
+
+Extracts:
+
+- List of databases and tables
+- Column types associated with each table
+
+```yml
+source:
+  type: athena
+  config:
+    username: aws_access_key_id # Optional. If not specified, credentials are picked up according to boto3 rules.
+    # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
+    password: aws_secret_access_key # Optional.
+    database: database # Optional, defaults to "default"
+    aws_region: aws_region_name # i.e. "eu-west-1"
+    s3_staging_dir: s3_location # "s3://<bucket-name>/prefix/"
+    # The s3_staging_dir parameter is needed because Athena always writes query results to S3.
+    # See https://docs.aws.amazon.com/athena/latest/ug/querying.html
+    # However, the athena driver will transparently fetch these results as you would expect from any other sql client.
+    work_group: athena_workgroup # "primary"
+    # table_pattern/schema_pattern is same as above
+```
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
new file mode 100644
index 0000000000000..5d3eb9f636109
--- /dev/null
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -0,0 +1,63 @@
+# Google BigQuery `bigquery`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+
+```yml
+source:
+  type: bigquery
+  config:
+    project_id: project # optional - can autodetect from environment
+    options: # options is same as above
+      # See https://github.com/mxmzdlv/pybigquery#authentication for details.
+      credentials_path: "/path/to/keyfile.json" # optional
+    include_views: True # whether to include views, defaults to True
+    # table_pattern/schema_pattern is same as above
+```
+
+:::tip
+
+You can also get fine-grained usage statistics for BigQuery using the `bigquery-usage` source.
+
+:::
+
+
+# Google BigQuery Usage Stats `bigquery-usage`
+
+- Fetch a list of queries issued
+- Fetch a list of tables and columns accessed
+- Aggregate these statistics into buckets, by day or hour granularity
+
+Note: the client must have one of the following OAuth scopes, and should be authorized on all projects you'd like to ingest usage stats from.
+
+- https://www.googleapis.com/auth/logging.read
+- https://www.googleapis.com/auth/logging.admin
+- https://www.googleapis.com/auth/cloud-platform.read-only
+- https://www.googleapis.com/auth/cloud-platform
+
+```yml
+source:
+  type: bigquery-usage
+  config:
+    projects: # optional - can autodetect a single project from the environment
+      - project_id_1
+      - project_id_2
+    options:
+      # See https://googleapis.dev/python/logging/latest/client.html for details.
+      credentials: ~ # optional - see docs
+    env: PROD
+
+    bucket_duration: "DAY"
+    start_time: ~ # defaults to the last full day in UTC (or hour)
+    end_time: ~ # defaults to the last full day in UTC (or hour)
+
+    top_n_queries: 10 # number of queries to save for each table
+```
+
+:::note
+
+This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source.
+
+:::
diff --git a/metadata-ingestion/source_docs/datahub.md b/metadata-ingestion/source_docs/datahub.md
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
new file mode 100644
index 0000000000000..7e52e30a7daa6
--- /dev/null
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -0,0 +1,39 @@
+# dbt `dbt`
+
+Pull metadata from dbt artifacts files:
+
+- [dbt manifest file](https://docs.getdbt.com/reference/artifacts/manifest-json)
+  - This file contains model, source and lineage data.
+- [dbt catalog file](https://docs.getdbt.com/reference/artifacts/catalog-json)
+  - This file contains schema data.
+  - dbt does not record schema data for Ephemeral models, as such datahub will show Ephemeral models in the lineage, however there will be no associated schema for Ephemeral models
+- [dbt sources file](https://docs.getdbt.com/reference/artifacts/sources-json)
+  - This file contains metadata for sources with freshness checks.
+  - We transfer dbt's freshness checks to DataHub's last-modified fields.
+  - Note that this file is optional – if not specified, we'll use time of ingestion instead as a proxy for time last-modified.
+- target_platform:
+  - The data platform you are enriching with dbt metadata.
+  - [data platforms](https://github.com/linkedin/datahub/blob/master/gms/impl/src/main/resources/DataPlatformInfo.json)
+- load_schemas:
+  - Load schemas from dbt catalog file, not necessary when the underlying data platform already has this data.
+- node_type_pattern:
+  - Use this filter to exclude and include node types using allow or deny method
+
+```yml
+source:
+  type: "dbt"
+  config:
+    manifest_path: "./path/dbt/manifest_file.json"
+    catalog_path: "./path/dbt/catalog_file.json"
+    sources_path: "./path/dbt/sources_file.json" # (optional, used for freshness checks)
+    target_platform: "postgres" # optional, eg "postgres", "snowflake", etc.
+    load_schemas: True or False
+    node_type_pattern: # optional
+      deny:
+        - ^test.*
+      allow:
+        - ^.*
+```
+
+Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
+
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
new file mode 100644
index 0000000000000..bd7dae7f2ac73
--- /dev/null
+++ b/metadata-ingestion/source_docs/druid.md
@@ -0,0 +1,22 @@
+# Druid `druid`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+
+**Note** It is important to define a explicitly define deny schema pattern for internal druid databases (lookup & sys)
+if adding a schema pattern otherwise the crawler may crash before processing relevant databases.
+This deny pattern is defined by default but is overriden by user-submitted configurations
+
+```yml
+source:
+  type: druid
+  config:
+    # Point to broker address
+    host_port: localhost:8082
+    schema_pattern:
+      deny:
+        - "^(lookup|sys).*"
+    # options is same as above
+```
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
new file mode 100644
index 0000000000000..24a2c1c72d788
--- /dev/null
+++ b/metadata-ingestion/source_docs/feast.md
@@ -0,0 +1,22 @@
+# Feast `feast`
+
+**Note: Feast ingestion requires Docker to be installed.**
+
+Extracts:
+
+- List of feature tables (modeled as [`MLFeatureTable`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureTableProperties.pdl)s),
+  features ([`MLFeature`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureProperties.pdl)s),
+  and entities ([`MLPrimaryKey`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLPrimaryKeyProperties.pdl)s)
+- Column types associated with each feature and entity
+
+Note: this uses a separate Docker container to extract Feast's metadata into a JSON file, which is then
+parsed to DataHub's native objects. This was done because of a dependency conflict in the `feast` module.
+
+```yml
+source:
+  type: feast
+  config:
+    core_url: localhost:6565 # default
+    env: "PROD" # Optional, default is "PROD"
+    use_local_build: False # Whether to build Feast ingestion image locally, default is False
+```
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
new file mode 100644
index 0000000000000..8b0389a753dd7
--- /dev/null
+++ b/metadata-ingestion/source_docs/file.md
@@ -0,0 +1,12 @@
+### File `file`
+
+Pulls metadata from a previously generated file. Note that the file sink
+can produce such files, and a number of samples are included in the
+[examples/mce_files](examples/mce_files) directory.
+
+```yml
+source:
+  type: file
+  config:
+    filename: ./path/to/mce/file.json
+```
\ No newline at end of file
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
new file mode 100644
index 0000000000000..ab723ea45af1c
--- /dev/null
+++ b/metadata-ingestion/source_docs/glue.md
@@ -0,0 +1,31 @@
+
+# AWS Glue `glue`
+
+Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](./s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
+
+Extracts:
+
+- List of tables
+- Column types associated with each table
+- Table metadata, such as owner, description and parameters
+- Jobs and their component transformations, data sources, and data sinks
+
+```yml
+source:
+  type: glue
+  config:
+    aws_region: # aws_region_name, i.e. "eu-west-1"
+    extract_transforms: True # whether to ingest Glue jobs, defaults to True
+    env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
+
+    # Filtering patterns for databases and tables to scan
+    database_pattern: # Optional, to filter databases scanned, same as schema_pattern above.
+    table_pattern: # Optional, to filter tables scanned, same as table_pattern above.
+
+    # Credentials. If not specified here, these are picked up according to boto3 rules.
+    # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
+    aws_access_key_id: # Optional.
+    aws_secret_access_key: # Optional.
+    aws_session_token: # Optional.
+    aws_role: # Optional (Role chaining supported by using a sorted list).
+```
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
new file mode 100644
index 0000000000000..3eb36bf620bae
--- /dev/null
+++ b/metadata-ingestion/source_docs/hive.md
@@ -0,0 +1,47 @@
+# Hive `hive`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+- Detailed table and storage information
+
+```yml
+source:
+  type: hive
+  config:
+    # For more details on authentication, see the PyHive docs:
+    # https://github.com/dropbox/PyHive#passing-session-configuration.
+    # LDAP, Kerberos, etc. are supported using connect_args, which can be
+    # added under the `options` config parameter.
+    #scheme: 'hive+http' # set this if Thrift should use the HTTP transport
+    #scheme: 'hive+https' # set this if Thrift should use the HTTP with SSL transport
+    username: user # optional
+    password: pass # optional
+    host_port: localhost:10000
+    database: DemoDatabase # optional, defaults to 'default'
+    # table_pattern/schema_pattern is same as above
+    # options is same as above
+```
+
+<details>
+  <summary>Example: using ingestion with Azure HDInsight</summary>
+
+```yml
+# Connecting to Microsoft Azure HDInsight using TLS.
+source:
+  type: hive
+  config:
+    scheme: "hive+https"
+    host_port: <cluster_name>.azurehdinsight.net:443
+    username: admin
+    password: "<password>"
+    options:
+      connect_args:
+        http_path: "/hive2"
+        auth: BASIC
+    # table_pattern/schema_pattern is same as above
+```
+
+</details>
+
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
new file mode 100644
index 0000000000000..20b49c417f124
--- /dev/null
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -0,0 +1,47 @@
+# Kafka Metadata `kafka`
+
+Extracts:
+
+- List of topics - from the Kafka broker
+- Schemas associated with each topic - from the schema registry
+
+```yml
+source:
+  type: "kafka"
+  config:
+    connection:
+      bootstrap: "broker:9092"
+      consumer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.DeserializingConsumer
+      schema_registry_url: http://localhost:8081
+      schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
+```
+
+The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
+
+For a full example with a number of security options, see this [example recipe](./examples/recipes/secured_kafka.yml).
+
+
+# Kafka Connect `kafka-connect`
+
+Extracts:
+
+- Kafka Connect connector as individual `DataFlowSnapshotClass` entity
+- Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
+- Lineage information between source database to Kafka topic
+
+```yml
+source:
+  type: "kafka-connect"
+  config:
+    connect_uri: "http://localhost:8083"
+    cluster_name: "connect-cluster"
+    connector_patterns:
+      deny:
+        - ^denied-connector.*
+      allow:
+        - ^allowed-connector.*
+```
+
+Current limitations:
+
+- Currently works only for Debezium source connectors.
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
new file mode 100644
index 0000000000000..243075da4a83a
--- /dev/null
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -0,0 +1,23 @@
+# LDAP `ldap`
+
+Extracts:
+
+- List of people
+- Names, emails, titles, and manager information for each person
+- List of groups
+
+```yml
+source:
+  type: "ldap"
+  config:
+    ldap_server: ldap://localhost
+    ldap_user: "cn=admin,dc=example,dc=org"
+    ldap_password: "admin"
+    base_dn: "dc=example,dc=org"
+    filter: "(objectClass=*)" # optional field
+    drop_missing_first_last_name: False # optional
+```
+
+The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
+for devices or services should be excluded when they do not contain a first and last name. This will only
+impact the ingestion of LDAP users, while LDAP groups will be unaffected by this config option.
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
new file mode 100644
index 0000000000000..a195a6ef3b2da
--- /dev/null
+++ b/metadata-ingestion/source_docs/looker.md
@@ -0,0 +1,22 @@
+# Looker dashboards `looker`
+
+Extracts:
+
+- Looker dashboards and dashboard elements (charts)
+- Names, descriptions, URLs, chart types, input view for the charts
+
+See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
+
+```yml
+source:
+  type: "looker"
+  config:
+    client_id: # Your Looker API3 client ID
+    client_secret: # Your Looker API3 client secret
+    base_url: # The url to your Looker instance: https://company.looker.com:19999 or https://looker.company.com, or similar.
+    dashboard_pattern: # supports allow/deny regexes
+    chart_pattern: # supports allow/deny regexes
+    actor: urn:li:corpuser:etl # Optional, defaults to urn:li:corpuser:etl
+    env: "PROD" # Optional, default is "PROD"
+    platform_name: "looker" # Optional, default is "looker"
+```
\ No newline at end of file
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
new file mode 100644
index 0000000000000..3843ba39bee6d
--- /dev/null
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -0,0 +1,27 @@
+# LookML `lookml`
+
+Note! This plugin uses a package that requires Python 3.7+!
+
+Extracts:
+
+- LookML views from model files
+- Name, upstream table names, dimensions, measures, and dimension groups
+
+```yml
+source:
+  type: "lookml"
+  config:
+    base_folder: /path/to/model/files # where the *.model.lkml and *.view.lkml files are stored
+    connection_to_platform_map: # mappings between connection names in the model files to platform names
+      connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
+    model_pattern: {}
+    view_pattern: {}
+    env: "PROD" # optional, default is "PROD"
+    parse_table_names_from_sql: False # see note below
+    platform_name: "looker" # optional, default is "looker"
+```
+
+Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
+views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
+Looker supports, the result might not be correct. This parsing is disabled by default, but can be enabled by setting
+`parse_table_names_from_sql: True`.
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
new file mode 100644
index 0000000000000..a951c992a2d61
--- /dev/null
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -0,0 +1,31 @@
+# MongoDB `mongodb`
+
+Extracts:
+
+- List of databases
+- List of collections in each database and infers schemas for each collection
+
+By default, schema inference samples 1,000 documents from each collection. Setting `schemaSamplingSize: null` will scan the entire collection.
+Moreover, setting `useRandomSampling: False` will sample the first documents found without random selection, which may be faster for large collections.
+
+Note that `schemaSamplingSize` has no effect if `enableSchemaInference: False` is set.
+
+```yml
+source:
+  type: "mongodb"
+  config:
+    # For advanced configurations, see the MongoDB docs.
+    # https://pymongo.readthedocs.io/en/stable/examples/authentication.html
+    connect_uri: "mongodb://localhost"
+    username: admin
+    password: password
+    env: "PROD" # Optional, default is "PROD"
+    authMechanism: "DEFAULT"
+    options: {}
+    database_pattern: {}
+    collection_pattern: {}
+    enableSchemaInference: True
+    schemaSamplingSize: 1000
+    useRandomSampling: True # whether to randomly sample docs for schema or just use the first ones, True by default
+    # database_pattern/collection_pattern are similar to schema_pattern/table_pattern from above
+```
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
new file mode 100644
index 0000000000000..8317ef7a7deb4
--- /dev/null
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -0,0 +1,66 @@
+# Microsoft SQL Server Metadata `mssql`
+
+We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections.
+
+Extracts:
+
+- List of databases, schema, tables and views
+- Column types associated with each table/view
+
+```yml
+source:
+  type: mssql
+  config:
+    username: user
+    password: pass
+    host_port: localhost:1433
+    database: DemoDatabase
+    include_views: True # whether to include views, defaults to True
+    table_pattern:
+      deny:
+        - "^.*\\.sys_.*" # deny all tables that start with sys_
+      allow:
+        - "schema1.table1"
+        - "schema1.table2"
+    options:
+      # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+      # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+      # Many of these options are specific to the underlying database driver, so that library's
+      # documentation will be a good reference for what is supported. To find which dialect is likely
+      # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+      charset: "utf8"
+    # If set to true, we'll use the pyodbc library. This requires you to have
+    # already installed the Microsoft ODBC Driver for SQL Server.
+    # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
+    use_odbc: False
+    uri_args: {}
+```
+
+<details>
+  <summary>Example: using ingestion with ODBC and encryption</summary>
+
+This requires you to have already installed the Microsoft ODBC Driver for SQL Server.
+See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
+
+```yml
+source:
+  type: mssql
+  config:
+    # See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc
+    use_odbc: True
+    username: user
+    password: pass
+    host_port: localhost:1433
+    database: DemoDatabase
+    include_views: True # whether to include views, defaults to True
+    uri_args:
+      # See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15
+      driver: "ODBC Driver 17 for SQL Server"
+      Encrypt: "yes"
+      TrustServerCertificate: "Yes"
+      ssl: "True"
+      # Trusted_Connection: "yes"
+```
+
+</details>
+
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
new file mode 100644
index 0000000000000..ecf564395d41c
--- /dev/null
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -0,0 +1,31 @@
+### MySQL `mysql`
+
+Extracts:
+
+- List of databases and tables
+- Column types and schema associated with each table
+
+```yml
+source:
+  type: mysql
+  config:
+    username: root
+    password: example
+    database: dbname
+    host_port: localhost:3306
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "performance_schema"
+      allow:
+        - "schema1.table2"
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "garbage_schema"
+      allow:
+        - "schema1"
+```
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
new file mode 100644
index 0000000000000..264c5e18804c6
--- /dev/null
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -0,0 +1,25 @@
+# Oracle `oracle`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+
+Using the Oracle source requires that you've also installed the correct drivers; see the [cx_Oracle docs](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html). The easiest one is the [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html).
+
+```yml
+source:
+  type: oracle
+  config:
+    # For more details on authentication, see the documentation:
+    # https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect and
+    # https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings.
+    username: user
+    password: pass
+    host_port: localhost:5432
+    database: dbname
+    service_name: svc # omit database if using this option
+    include_views: True # whether to include views, defaults to True
+    # table_pattern/schema_pattern is same as above
+    # options is same as above
+```
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
new file mode 100644
index 0000000000000..92ffef44a5718
--- /dev/null
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -0,0 +1,23 @@
+# PostgreSQL `postgres`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+- Also supports PostGIS extensions
+- database_alias (optional) can be used to change the name of database to be ingested
+
+```yml
+source:
+  type: postgres
+  config:
+    username: user
+    password: pass
+    host_port: localhost:5432
+    database: DemoDatabase
+    database_alias: DatabaseNameToBeIngested
+    include_views: True # whether to include views, defaults to True
+    # table_pattern/schema_pattern is same as above
+    # options is same as above
+```
+
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
new file mode 100644
index 0000000000000..22595a055df5a
--- /dev/null
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -0,0 +1,41 @@
+# Redshift `redshift`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+- Also supports PostGIS extensions
+
+```yml
+source:
+  type: redshift
+  config:
+    username: user
+    password: pass
+    host_port: example.something.us-west-2.redshift.amazonaws.com:5439
+    database: DemoDatabase
+    include_views: True # whether to include views, defaults to True
+    # table_pattern/schema_pattern is same as above
+    # options is same as above
+```
+
+<details>
+  <summary>Extra options when running Redshift behind a proxy</summary>
+
+This requires you to have already installed the Microsoft ODBC Driver for SQL Server.
+See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
+
+```yml
+source:
+  type: redshift
+  config:
+    # username, password, database, etc are all the same as above
+    host_port: my-proxy-hostname:5439
+    options:
+      connect_args:
+        sslmode: "prefer" # or "require" or "verify-ca"
+        sslrootcert: ~ # needed to unpin the AWS Redshift certificate
+```
+
+</details>
+
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
new file mode 100644
index 0000000000000..295e4b7f1a21e
--- /dev/null
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -0,0 +1,34 @@
+# AWS SageMaker `sagemaker`
+
+Extracts:
+
+- Feature groups
+- Models, jobs, and lineage between the two (e.g. when jobs output a model or a model is used by a job)
+
+```yml
+source:
+  type: sagemaker
+  config:
+    aws_region: # aws_region_name, i.e. "eu-west-1"
+    env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
+
+    # Credentials. If not specified here, these are picked up according to boto3 rules.
+    # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
+    aws_access_key_id: # Optional.
+    aws_secret_access_key: # Optional.
+    aws_session_token: # Optional.
+    aws_role: # Optional (Role chaining supported by using a sorted list).
+
+    extract_feature_groups: True # if feature groups should be ingested, default True
+    extract_models: True # if models should be ingested, default True
+    extract_jobs: # if jobs should be ingested, default True for all
+      auto_ml: True
+      compilation: True
+      edge_packaging: True
+      hyper_parameter_tuning: True
+      labeling: True
+      processing: True
+      training: True
+      transform: True
+```
+
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
new file mode 100644
index 0000000000000..c42d55e7b5f3a
--- /dev/null
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -0,0 +1,68 @@
+# Snowflake `snowflake`
+
+Extracts:
+
+- List of databases, schema, and tables
+- Column types associated with each table
+
+```yml
+source:
+  type: snowflake
+  config:
+    username: user
+    password: pass
+    host_port: account_name
+    database_pattern:
+      # The escaping of the $ symbol helps us skip the environment variable substitution.
+      allow:
+        - ^MY_DEMO_DATA.*
+        - ^ANOTHER_DB_REGEX
+      deny:
+        - ^SNOWFLAKE\$
+        - ^SNOWFLAKE_SAMPLE_DATA\$
+    warehouse: "COMPUTE_WH" # optional
+    role: "sysadmin" # optional
+    include_views: True # whether to include views, defaults to True
+    # table_pattern/schema_pattern is same as above
+    # options is same as above
+```
+
+:::tip
+
+You can also get fine-grained usage statistics for Snowflake using the `snowflake-usage` source.
+
+:::
+
+
+# Snowflake Usage Stats `snowflake-usage`
+
+- Fetch a list of queries issued
+- Fetch a list of tables and columns accessed (excludes views)
+- Aggregate these statistics into buckets, by day or hour granularity
+
+Note: the user/role must have access to the account usage table. The "accountadmin" role has this by default, and other roles can be [granted this permission](https://docs.snowflake.com/en/sql-reference/account-usage.html#enabling-account-usage-for-other-roles).
+
+Note: the underlying access history views that we use are only available in Snowflake's enterprise edition or higher.
+
+```yml
+source:
+  type: snowflake-usage
+  config:
+    username: user
+    password: pass
+    host_port: account_name
+    role: ACCOUNTADMIN
+    env: PROD
+
+    bucket_duration: "DAY"
+    start_time: ~ # defaults to the last full day in UTC (or hour)
+    end_time: ~ # defaults to the last full day in UTC (or hour)
+
+    top_n_queries: 10 # number of queries to save for each table
+```
+
+:::note
+
+This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source.
+
+:::
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
new file mode 100644
index 0000000000000..977f40b61f0fa
--- /dev/null
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -0,0 +1,22 @@
+# Other databases using SQLAlchemy `sqlalchemy`
+
+The `sqlalchemy` source is useful if we don't have a pre-built source for your chosen
+database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
+defined elsewhere. In order to use this, you must `pip install` the required dialect packages yourself.
+
+Extracts:
+
+- List of schemas and tables
+- Column types associated with each table
+
+```yml
+source:
+  type: sqlalchemy
+  config:
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
+    connect_uri: "dialect+driver://username:password@host:port/database"
+    options: {} # same as above
+    schema_pattern: {} # same as above
+    table_pattern: {} # same as above
+    include_views: True # whether to include views, defaults to True
+```
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
new file mode 100644
index 0000000000000..3c6dba608aefc
--- /dev/null
+++ b/metadata-ingestion/source_docs/superset.md
@@ -0,0 +1,19 @@
+# Superset `superset`
+
+Extracts:
+
+- List of charts and dashboards
+
+```yml
+source:
+  type: superset
+  config:
+    username: user
+    password: pass
+    provider: db | ldap
+    connect_uri: http://localhost:8088
+    env: "PROD" # Optional, default is "PROD"
+```
+
+See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api.
+

From 0916b7532fc5de9fb0d544fc176ea8acff26b4c1 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 12:41:49 -0700
Subject: [PATCH 02/33] Add links

---
 docs-website/sidebars.js                  |   8 ++
 metadata-ingestion/README.md              | 127 +++++++---------------
 metadata-ingestion/sink_docs/console.md   |   8 ++
 metadata-ingestion/sink_docs/datahub.md   |  32 ++++++
 metadata-ingestion/sink_docs/file.md      |  12 ++
 metadata-ingestion/source_docs/datahub.md |   0
 metadata-ingestion/source_docs/file.md    |   2 +-
 7 files changed, 100 insertions(+), 89 deletions(-)
 create mode 100644 metadata-ingestion/sink_docs/console.md
 create mode 100644 metadata-ingestion/sink_docs/datahub.md
 create mode 100644 metadata-ingestion/sink_docs/file.md
 delete mode 100644 metadata-ingestion/source_docs/datahub.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 42881b1f65460..e8900d80a6357 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -54,6 +54,14 @@ module.exports = {
       //"docs/what/gms",
       "datahub-web-react/README",
     ],
+    "Metadata Ingestion": [
+      // {
+      //   Sources: list_ids_in_directory("metadata-ingestion/source_docs"),
+      // },
+      {
+        Sinks: list_ids_in_directory("metadata-ingestion/sink_docs"),
+      },
+    ],
     "Metadata Modeling": [
       "docs/modeling/metadata-model",
       "docs/modeling/extending-the-metadata-model",
diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 7d72a70bd0fdd..5ffa0c67383c1 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -28,37 +28,45 @@ If you run into an error, try checking the [_common setup issues_](./developing.
 
 #### Installing Plugins
 
-We use a plugin architecture so that you can install only the dependencies you actually need.
-
-| Plugin Name     | Install Command                                            | Provides                            |
-| --------------- | ---------------------------------------------------------- | ----------------------------------- |
-| file            | _included by default_                                      | File source and sink                |
-| console         | _included by default_                                      | Console sink                        |
-| athena          | `pip install 'acryl-datahub[athena]'`                      | AWS Athena source                   |
-| bigquery        | `pip install 'acryl-datahub[bigquery]'`                    | BigQuery source                     |
-| bigquery-usage  | `pip install 'acryl-datahub[bigquery-usage]'`              | BigQuery usage statistics source    |
-| feast           | `pip install 'acryl-datahub[feast]'`                       | Feast source                        |
-| glue            | `pip install 'acryl-datahub[glue]'`                        | AWS Glue source                     |
-| hive            | `pip install 'acryl-datahub[hive]'`                        | Hive source                         |
-| mssql           | `pip install 'acryl-datahub[mssql]'`                       | SQL Server source                   |
-| mysql           | `pip install 'acryl-datahub[mysql]'`                       | MySQL source                        |
-| oracle          | `pip install 'acryl-datahub[oracle]'`                      | Oracle source                       |
-| postgres        | `pip install 'acryl-datahub[postgres]'`                    | Postgres source                     |
-| redshift        | `pip install 'acryl-datahub[redshift]'`                    | Redshift source                     |
-| sagemaker       | `pip install 'acryl-datahub[sagemaker]'`                   | AWS SageMaker source                |
-| sqlalchemy      | `pip install 'acryl-datahub[sqlalchemy]'`                  | Generic SQLAlchemy source           |
-| snowflake       | `pip install 'acryl-datahub[snowflake]'`                   | Snowflake source                    |
-| snowflake-usage | `pip install 'acryl-datahub[snowflake-usage]'`             | Snowflake usage statistics source   |
-| superset        | `pip install 'acryl-datahub[superset]'`                    | Superset source                     |
-| mongodb         | `pip install 'acryl-datahub[mongodb]'`                     | MongoDB source                      |
-| ldap            | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source                         |
-| looker          | `pip install 'acryl-datahub[looker]'`                      | Looker source                       |
-| lookml          | `pip install 'acryl-datahub[lookml]'`                      | LookML source, requires Python 3.7+ |
-| kafka           | `pip install 'acryl-datahub[kafka]'`                       | Kafka source                        |
-| druid           | `pip install 'acryl-datahub[druid]'`                       | Druid Source                        |
-| dbt             | _no additional dependencies_                               | dbt source                          |
-| datahub-rest    | `pip install 'acryl-datahub[datahub-rest]'`                | DataHub sink over REST API          |
-| datahub-kafka   | `pip install 'acryl-datahub[datahub-kafka]'`               | DataHub sink over Kafka             |
+We use a plugin architecture so that you can install only the dependencies you actually need. Click the plugin name to learn more about the specific source recipe and any FAQs!
+
+Sources:
+
+| Plugin Name                                   | Install Command                                            | Provides                            |
+| --------------------------------------------- | ---------------------------------------------------------- | ----------------------------------- | --- |
+| [file](./source_docs/file.md)                 | _included by default_                                      | File source and sink                |
+| [athena](./source_docs/athena.md)             | `pip install 'acryl-datahub[athena]'`                      | AWS Athena source                   |
+| [bigquery](./source_docs/bigquery.md)         | `pip install 'acryl-datahub[bigquery]'`                    | BigQuery source                     |
+| [bigquery-usage](./source_docs/bigquery.md)   | `pip install 'acryl-datahub[bigquery-usage]'`              | BigQuery usage statistics source    |
+| [dbt](./source_docs/dbt.md)                   | _no additional dependencies_                               | dbt source                          |
+| [druid](./source_docs/druid.md)               | `pip install 'acryl-datahub[druid]'`                       | Druid Source                        |
+| [feast](./source_docs/feast.md)               | `pip install 'acryl-datahub[feast]'`                       | Feast source                        |
+| [glue](./source_docs/glue.md)                 | `pip install 'acryl-datahub[glue]'`                        | AWS Glue source                     |
+| [hive](./source_docs/hive.md)                 | `pip install 'acryl-datahub[hive]'`                        | Hive source                         |
+| [kafka](./source_docs/kafka.md)               | `pip install 'acryl-datahub[kafka]'`                       | Kafka source                        |     |
+| [ldap](./source_docs/ldap.md)                 | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source                         |
+| [looker](./source_docs/looker.md)             | `pip install 'acryl-datahub[looker]'`                      | Looker source                       |
+| [lookml](./source_docs/lookml.md)             | `pip install 'acryl-datahub[lookml]'`                      | LookML source, requires Python 3.7+ |
+| [mongodb](./source_docs/mongodb.md)           | `pip install 'acryl-datahub[mongodb]'`                     | MongoDB source                      |
+| [mssql](./source_docs/mssql.md)               | `pip install 'acryl-datahub[mssql]'`                       | SQL Server source                   |
+| [mysql](./source_docs/mysql.md)               | `pip install 'acryl-datahub[mysql]'`                       | MySQL source                        |
+| [oracle](./source_docs/oracle.md)             | `pip install 'acryl-datahub[oracle]'`                      | Oracle source                       |
+| [postgres](./source_docs/postgres.md)         | `pip install 'acryl-datahub[postgres]'`                    | Postgres source                     |
+| [redshift](./source_docs/redshift.md)         | `pip install 'acryl-datahub[redshift]'`                    | Redshift source                     |
+| [sagemaker](./source_docs/sagemaker.md)       | `pip install 'acryl-datahub[sagemaker]'`                   | AWS SageMaker source                |
+| [sqlalchemy](./source_docs/sqlalchemy.md)     | `pip install 'acryl-datahub[sqlalchemy]'`                  | Generic SQLAlchemy source           |
+| [snowflake](./source_docs/snowflake.md)       | `pip install 'acryl-datahub[snowflake]'`                   | Snowflake source                    |
+| [snowflake-usage](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake-usage]'`             | Snowflake usage statistics source   |
+| [superset](./source_docs/superset.md)         | `pip install 'acryl-datahub[superset]'`                    | Superset source                     |
+
+Sinks
+
+| Plugin Name                             | Install Command                              | Provides                   |
+| --------------------------------------- | -------------------------------------------- | -------------------------- |
+| [file](./sink_docs/file.md)             | _included by default_                        | File source and sink       |
+| [console](./sink_docs/console.md)       | _included by default_                        | Console sink               |
+| [datahub-rest](./sink_docs/datahub.md)  | `pip install 'acryl-datahub[datahub-rest]'`  | DataHub sink over REST API |
+| [datahub-kafka](./sink_docs/datahub.md) | `pip install 'acryl-datahub[datahub-kafka]'` | DataHub sink over Kafka    |
 
 These plugins can be mixed and matched as desired. For example:
 
@@ -138,63 +146,6 @@ datahub ingest -c ./examples/recipes/mssql_to_datahub.yml
 
 A number of recipes are included in the examples/recipes directory.
 
-## Sinks
-
-### DataHub Rest `datahub-rest`
-
-Pushes metadata to DataHub using the GMA rest API. The advantage of the rest-based interface
-is that any errors can immediately be reported.
-
-```yml
-sink:
-  type: "datahub-rest"
-  config:
-    server: "http://localhost:8080"
-```
-
-### DataHub Kafka `datahub-kafka`
-
-Pushes metadata to DataHub by publishing messages to Kafka. The advantage of the Kafka-based
-interface is that it's asynchronous and can handle higher throughput. This requires the
-Datahub mce-consumer container to be running.
-
-```yml
-sink:
-  type: "datahub-kafka"
-  config:
-    connection:
-      bootstrap: "localhost:9092"
-      producer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer
-      schema_registry_url: "http://localhost:8081"
-      schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
-```
-
-The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
-
-For a full example with a number of security options, see this [example recipe](./examples/recipes/secured_kafka.yml).
-
-### Console `console`
-
-Simply prints each metadata event to stdout. Useful for experimentation and debugging purposes.
-
-```yml
-sink:
-  type: "console"
-```
-
-### File `file`
-
-Outputs metadata to a file. This can be used to decouple metadata sourcing from the
-process of pushing it into DataHub, and is particularly useful for debugging purposes.
-Note that the file source can read files generated by this sink.
-
-```yml
-sink:
-  type: file
-  config:
-    filename: ./path/to/mce/file.json
-```
-
 ## Transformations
 
 Beyond basic ingestion, sometimes there might exist a need to modify the source data before passing it on to the sink.
diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
new file mode 100644
index 0000000000000..eb3c00a68574d
--- /dev/null
+++ b/metadata-ingestion/sink_docs/console.md
@@ -0,0 +1,8 @@
+# Console `console`
+
+Simply prints each metadata event to stdout. Useful for experimentation and debugging purposes.
+
+```yml
+sink:
+  type: "console"
+```
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
new file mode 100644
index 0000000000000..d08850a1749e4
--- /dev/null
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -0,0 +1,32 @@
+# DataHub Rest `datahub-rest`
+
+Pushes metadata to DataHub using the GMA rest API. The advantage of the rest-based interface
+is that any errors can immediately be reported.
+
+```yml
+sink:
+  type: "datahub-rest"
+  config:
+    server: "http://localhost:8080"
+```
+
+# DataHub Kafka `datahub-kafka`
+
+Pushes metadata to DataHub by publishing messages to Kafka. The advantage of the Kafka-based
+interface is that it's asynchronous and can handle higher throughput. This requires the
+Datahub mce-consumer container to be running.
+
+```yml
+sink:
+  type: "datahub-kafka"
+  config:
+    connection:
+      bootstrap: "localhost:9092"
+      producer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer
+      schema_registry_url: "http://localhost:8081"
+      schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
+```
+
+The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
+
+For a full example with a number of security options, see this [example recipe](./examples/recipes/secured_kafka.yml).
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
new file mode 100644
index 0000000000000..cc8282cd609c5
--- /dev/null
+++ b/metadata-ingestion/sink_docs/file.md
@@ -0,0 +1,12 @@
+# File `file`
+
+Outputs metadata to a file. This can be used to decouple metadata sourcing from the
+process of pushing it into DataHub, and is particularly useful for debugging purposes.
+Note that the file source can read files generated by this sink.
+
+```yml
+sink:
+  type: file
+  config:
+    filename: ./path/to/mce/file.json
+```
diff --git a/metadata-ingestion/source_docs/datahub.md b/metadata-ingestion/source_docs/datahub.md
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 8b0389a753dd7..a86cf87baf713 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -9,4 +9,4 @@ source:
   type: file
   config:
     filename: ./path/to/mce/file.json
-```
\ No newline at end of file
+```

From 2bb1d79d6dc56f89872d73c09af2a8562c43950d Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 12:42:35 -0700
Subject: [PATCH 03/33] Fix link

---
 metadata-ingestion/sink_docs/datahub.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index d08850a1749e4..bc59af5e7092d 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -29,4 +29,4 @@ sink:
 
 The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
 
-For a full example with a number of security options, see this [example recipe](./examples/recipes/secured_kafka.yml).
+For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).

From 487a2b6e7da3e57ddbdc08de8e74a708f0dfeeb1 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 12:58:07 -0700
Subject: [PATCH 04/33] Fix glue link

---
 docs-website/generateDocsDir.ts         | 5 +++++
 docs-website/sidebars.js                | 6 +++---
 metadata-ingestion/source_docs/file.md  | 4 ++--
 metadata-ingestion/source_docs/glue.md  | 3 +--
 metadata-ingestion/source_docs/kafka.md | 3 +--
 metadata-ingestion/source_docs/mysql.md | 2 +-
 6 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts
index 1b142f6b625cc..180d3c97c5548 100644
--- a/docs-website/generateDocsDir.ts
+++ b/docs-website/generateDocsDir.ts
@@ -158,6 +158,11 @@ function markdown_guess_title(
   } else {
     // Find first h1 header and use it as the title.
     const headers = contents.content.match(/^# (.+)$/gm);
+
+    if (!headers) {
+      throw new Error(`${filepath} must have at least one h1 header`);
+    }
+
     if (headers.length > 1 && contents.content.indexOf("```") < 0) {
       throw new Error(`too many h1 headers in ${filepath}`);
     }
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index e8900d80a6357..714b6199602cd 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -55,9 +55,9 @@ module.exports = {
       "datahub-web-react/README",
     ],
     "Metadata Ingestion": [
-      // {
-      //   Sources: list_ids_in_directory("metadata-ingestion/source_docs"),
-      // },
+      {
+        Sources: list_ids_in_directory("metadata-ingestion/source_docs"),
+      },
       {
         Sinks: list_ids_in_directory("metadata-ingestion/sink_docs"),
       },
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index a86cf87baf713..905525c871df2 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,8 +1,8 @@
-### File `file`
+# File `file`
 
 Pulls metadata from a previously generated file. Note that the file sink
 can produce such files, and a number of samples are included in the
-[examples/mce_files](examples/mce_files) directory.
+[examples/mce_files](../examples/mce_files) directory.
 
 ```yml
 source:
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index ab723ea45af1c..c86bc62d476ea 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -1,7 +1,6 @@
-
 # AWS Glue `glue`
 
-Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](./s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
+Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 20b49c417f124..c447556d6834e 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -18,8 +18,7 @@ source:
 
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
-For a full example with a number of security options, see this [example recipe](./examples/recipes/secured_kafka.yml).
-
+For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
 
 # Kafka Connect `kafka-connect`
 
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index ecf564395d41c..a68a9dcebb09c 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -1,4 +1,4 @@
-### MySQL `mysql`
+# MySQL `mysql`
 
 Extracts:
 

From a24dc59176cd8fcc4ec4e54e9819b540edf6c5c9 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 13:16:47 -0700
Subject: [PATCH 05/33] Add module installs to each page

---
 metadata-ingestion/README.md                 | 4 ++--
 metadata-ingestion/sink_docs/console.md      | 2 +-
 metadata-ingestion/sink_docs/datahub.md      | 8 ++++++--
 metadata-ingestion/sink_docs/file.md         | 2 +-
 metadata-ingestion/source_docs/athena.md     | 4 +++-
 metadata-ingestion/source_docs/bigquery.md   | 7 +++++--
 metadata-ingestion/source_docs/dbt.md        | 3 +--
 metadata-ingestion/source_docs/druid.md      | 4 +++-
 metadata-ingestion/source_docs/feast.md      | 4 +++-
 metadata-ingestion/source_docs/file.md       | 2 +-
 metadata-ingestion/source_docs/glue.md       | 4 +++-
 metadata-ingestion/source_docs/hive.md       | 5 +++--
 metadata-ingestion/source_docs/kafka.md      | 4 +++-
 metadata-ingestion/source_docs/ldap.md       | 4 +++-
 metadata-ingestion/source_docs/looker.md     | 6 ++++--
 metadata-ingestion/source_docs/lookml.md     | 4 +++-
 metadata-ingestion/source_docs/mongodb.md    | 4 +++-
 metadata-ingestion/source_docs/mssql.md      | 5 +++--
 metadata-ingestion/source_docs/mysql.md      | 4 +++-
 metadata-ingestion/source_docs/oracle.md     | 4 +++-
 metadata-ingestion/source_docs/postgres.md   | 5 +++--
 metadata-ingestion/source_docs/redshift.md   | 5 +++--
 metadata-ingestion/source_docs/sagemaker.md  | 5 +++--
 metadata-ingestion/source_docs/snowflake.md  | 7 +++++--
 metadata-ingestion/source_docs/sqlalchemy.md | 4 +++-
 metadata-ingestion/source_docs/superset.md   | 5 +++--
 26 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 5ffa0c67383c1..9365ff1954f80 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -33,7 +33,7 @@ We use a plugin architecture so that you can install only the dependencies you a
 Sources:
 
 | Plugin Name                                   | Install Command                                            | Provides                            |
-| --------------------------------------------- | ---------------------------------------------------------- | ----------------------------------- | --- |
+| --------------------------------------------- | ---------------------------------------------------------- | ----------------------------------- |
 | [file](./source_docs/file.md)                 | _included by default_                                      | File source and sink                |
 | [athena](./source_docs/athena.md)             | `pip install 'acryl-datahub[athena]'`                      | AWS Athena source                   |
 | [bigquery](./source_docs/bigquery.md)         | `pip install 'acryl-datahub[bigquery]'`                    | BigQuery source                     |
@@ -43,7 +43,7 @@ Sources:
 | [feast](./source_docs/feast.md)               | `pip install 'acryl-datahub[feast]'`                       | Feast source                        |
 | [glue](./source_docs/glue.md)                 | `pip install 'acryl-datahub[glue]'`                        | AWS Glue source                     |
 | [hive](./source_docs/hive.md)                 | `pip install 'acryl-datahub[hive]'`                        | Hive source                         |
-| [kafka](./source_docs/kafka.md)               | `pip install 'acryl-datahub[kafka]'`                       | Kafka source                        |     |
+| [kafka](./source_docs/kafka.md)               | `pip install 'acryl-datahub[kafka]'`                       | Kafka source                        |
 | [ldap](./source_docs/ldap.md)                 | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source                         |
 | [looker](./source_docs/looker.md)             | `pip install 'acryl-datahub[looker]'`                      | Looker source                       |
 | [lookml](./source_docs/lookml.md)             | `pip install 'acryl-datahub[lookml]'`                      | LookML source, requires Python 3.7+ |
diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index eb3c00a68574d..d8bbf7c44c9bd 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -1,4 +1,4 @@
-# Console `console`
+# Console
 
 Simply prints each metadata event to stdout. Useful for experimentation and debugging purposes.
 
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index bc59af5e7092d..f77062d7866ae 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -1,4 +1,6 @@
-# DataHub Rest `datahub-rest`
+# DataHub Rest
+
+To install this plugin, run `pip install 'acryl-datahub[datahub-reset]'`.
 
 Pushes metadata to DataHub using the GMA rest API. The advantage of the rest-based interface
 is that any errors can immediately be reported.
@@ -10,7 +12,9 @@ sink:
     server: "http://localhost:8080"
 ```
 
-# DataHub Kafka `datahub-kafka`
+# DataHub Kafka
+
+To install this plugin, run `pip install 'acryl-datahub[datahub-kafka]'`.
 
 Pushes metadata to DataHub by publishing messages to Kafka. The advantage of the Kafka-based
 interface is that it's asynchronous and can handle higher throughput. This requires the
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index cc8282cd609c5..c7cbcc47d43b2 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -1,4 +1,4 @@
-# File `file`
+# File
 
 Outputs metadata to a file. This can be used to decouple metadata sourcing from the
 process of pushing it into DataHub, and is particularly useful for debugging purposes.
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 726c2521fa49e..33e3ad4e08739 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -1,4 +1,6 @@
-# AWS Athena `athena`
+# AWS Athena
+
+To install this plugin, run `pip install 'acryl-datahub[athena]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 5d3eb9f636109..16b4c67560c03 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -1,4 +1,6 @@
-# Google BigQuery `bigquery`
+# Google BigQuery
+
+To install this plugin, run `pip install 'acryl-datahub[bigquery]'`.
 
 Extracts:
 
@@ -23,8 +25,9 @@ You can also get fine-grained usage statistics for BigQuery using the `bigquery-
 
 :::
 
+# Google BigQuery Usage Stats
 
-# Google BigQuery Usage Stats `bigquery-usage`
+To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
 - Fetch a list of queries issued
 - Fetch a list of tables and columns accessed
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 7e52e30a7daa6..4699ef8ca13e8 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -1,4 +1,4 @@
-# dbt `dbt`
+# dbt
 
 Pull metadata from dbt artifacts files:
 
@@ -36,4 +36,3 @@ source:
 ```
 
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
-
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index bd7dae7f2ac73..62efb0d2a6c74 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -1,4 +1,6 @@
-# Druid `druid`
+# Druid
+
+To install this plugin, run `pip install 'acryl-datahub[druid]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 24a2c1c72d788..8cd5bea80d31c 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -1,7 +1,9 @@
-# Feast `feast`
+# Feast
 
 **Note: Feast ingestion requires Docker to be installed.**
 
+To install this plugin, run `pip install 'acryl-datahub[feast]'`.
+
 Extracts:
 
 - List of feature tables (modeled as [`MLFeatureTable`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureTableProperties.pdl)s),
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 905525c871df2..efb5315175344 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,4 +1,4 @@
-# File `file`
+# File
 
 Pulls metadata from a previously generated file. Note that the file sink
 can produce such files, and a number of samples are included in the
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index c86bc62d476ea..0d17afab678b5 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -1,7 +1,9 @@
-# AWS Glue `glue`
+# AWS Glue
 
 Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
 
+To install this plugin, run `pip install 'acryl-datahub[glue]'`.
+
 Extracts:
 
 - List of tables
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 3eb36bf620bae..ec9be6daa0db8 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -1,4 +1,6 @@
-# Hive `hive`
+# Hive
+
+To install this plugin, run `pip install 'acryl-datahub[hive]'`.
 
 Extracts:
 
@@ -44,4 +46,3 @@ source:
 ```
 
 </details>
-
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index c447556d6834e..6191d4952d87c 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -1,4 +1,6 @@
-# Kafka Metadata `kafka`
+# Kafka Metadata
+
+To install this plugin, run `pip install 'acryl-datahub[kafka]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 243075da4a83a..a2682189f181e 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -1,4 +1,6 @@
-# LDAP `ldap`
+# LDAP
+
+To install this plugin, run `pip install 'acryl-datahub[ldap]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index a195a6ef3b2da..c19c8f125fa22 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -1,4 +1,6 @@
-# Looker dashboards `looker`
+# Looker dashboards
+
+To install this plugin, run `pip install 'acryl-datahub[looker]'`.
 
 Extracts:
 
@@ -19,4 +21,4 @@ source:
     actor: urn:li:corpuser:etl # Optional, defaults to urn:li:corpuser:etl
     env: "PROD" # Optional, default is "PROD"
     platform_name: "looker" # Optional, default is "looker"
-```
\ No newline at end of file
+```
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 3843ba39bee6d..d50384f5f75cc 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -1,4 +1,6 @@
-# LookML `lookml`
+# LookML
+
+To install this plugin, run `pip install 'acryl-datahub[lookml]'`.
 
 Note! This plugin uses a package that requires Python 3.7+!
 
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index a951c992a2d61..eb05300ef02c1 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -1,4 +1,6 @@
-# MongoDB `mongodb`
+# MongoDB
+
+To install this plugin, run `pip install 'acryl-datahub[mongodb]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 8317ef7a7deb4..99624542dba7b 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -1,4 +1,6 @@
-# Microsoft SQL Server Metadata `mssql`
+# Microsoft SQL Server Metadata
+
+To install this plugin, run `pip install 'acryl-datahub[mssql]'`.
 
 We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections.
 
@@ -63,4 +65,3 @@ source:
 ```
 
 </details>
-
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index a68a9dcebb09c..03780d106a843 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -1,4 +1,6 @@
-# MySQL `mysql`
+# MySQL
+
+To install this plugin, run `pip install 'acryl-datahub[mysql]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 264c5e18804c6..d74aa9eb8aa19 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -1,4 +1,6 @@
-# Oracle `oracle`
+# Oracle
+
+To install this plugin, run `pip install 'acryl-datahub[oracle]'`.
 
 Extracts:
 
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 92ffef44a5718..9991f690d6dbe 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -1,4 +1,6 @@
-# PostgreSQL `postgres`
+# PostgreSQL
+
+To install this plugin, run `pip install 'acryl-datahub[postgres]'`.
 
 Extracts:
 
@@ -20,4 +22,3 @@ source:
     # table_pattern/schema_pattern is same as above
     # options is same as above
 ```
-
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 22595a055df5a..311647383cf11 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -1,4 +1,6 @@
-# Redshift `redshift`
+# Redshift
+
+To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
 
 Extracts:
 
@@ -38,4 +40,3 @@ source:
 ```
 
 </details>
-
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 295e4b7f1a21e..587df3f02970b 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -1,4 +1,6 @@
-# AWS SageMaker `sagemaker`
+# AWS SageMaker
+
+To install this plugin, run `pip install 'acryl-datahub[sagemaker]'`.
 
 Extracts:
 
@@ -31,4 +33,3 @@ source:
       training: True
       transform: True
 ```
-
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index c42d55e7b5f3a..73b5563728705 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -1,4 +1,6 @@
-# Snowflake `snowflake`
+# Snowflake
+
+To install this plugin, run `pip install 'acryl-datahub[snowflake]'`.
 
 Extracts:
 
@@ -33,9 +35,10 @@ You can also get fine-grained usage statistics for Snowflake using the `snowflak
 
 :::
 
-
 # Snowflake Usage Stats `snowflake-usage`
 
+To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
+
 - Fetch a list of queries issued
 - Fetch a list of tables and columns accessed (excludes views)
 - Aggregate these statistics into buckets, by day or hour granularity
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 977f40b61f0fa..57959aaa31411 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -1,4 +1,6 @@
-# Other databases using SQLAlchemy `sqlalchemy`
+# Other databases using SQLAlchemy
+
+To install this plugin, run `pip install 'acryl-datahub[sqlalchemy]'`.
 
 The `sqlalchemy` source is useful if we don't have a pre-built source for your chosen
 database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 3c6dba608aefc..55749c91686e4 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -1,4 +1,6 @@
-# Superset `superset`
+# Superset
+
+To install this plugin, run `pip install 'acryl-datahub[superset]'`.
 
 Extracts:
 
@@ -16,4 +18,3 @@ source:
 ```
 
 See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api.
-

From 5c6a19a52cf6c9ab29985aff354336dd83e2c7ce Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 15:07:26 -0700
Subject: [PATCH 06/33] Consistency

---
 metadata-ingestion/README.md                 | 4 ++--
 metadata-ingestion/source_docs/athena.md     | 2 +-
 metadata-ingestion/source_docs/bigquery.md   | 4 ++--
 metadata-ingestion/source_docs/dbt.md        | 2 +-
 metadata-ingestion/source_docs/druid.md      | 2 +-
 metadata-ingestion/source_docs/feast.md      | 2 +-
 metadata-ingestion/source_docs/file.md       | 2 +-
 metadata-ingestion/source_docs/glue.md       | 6 +++---
 metadata-ingestion/source_docs/hive.md       | 2 +-
 metadata-ingestion/source_docs/kafka.md      | 4 ++--
 metadata-ingestion/source_docs/ldap.md       | 2 +-
 metadata-ingestion/source_docs/looker.md     | 2 +-
 metadata-ingestion/source_docs/lookml.md     | 2 +-
 metadata-ingestion/source_docs/mongodb.md    | 2 +-
 metadata-ingestion/source_docs/mssql.md      | 2 +-
 metadata-ingestion/source_docs/mysql.md      | 2 +-
 metadata-ingestion/source_docs/oracle.md     | 2 +-
 metadata-ingestion/source_docs/postgres.md   | 2 +-
 metadata-ingestion/source_docs/redshift.md   | 2 +-
 metadata-ingestion/source_docs/sagemaker.md  | 2 +-
 metadata-ingestion/source_docs/snowflake.md  | 2 +-
 metadata-ingestion/source_docs/sqlalchemy.md | 2 +-
 metadata-ingestion/source_docs/superset.md   | 2 +-
 23 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 9365ff1954f80..41826a668a220 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -54,9 +54,9 @@ Sources:
 | [postgres](./source_docs/postgres.md)         | `pip install 'acryl-datahub[postgres]'`                    | Postgres source                     |
 | [redshift](./source_docs/redshift.md)         | `pip install 'acryl-datahub[redshift]'`                    | Redshift source                     |
 | [sagemaker](./source_docs/sagemaker.md)       | `pip install 'acryl-datahub[sagemaker]'`                   | AWS SageMaker source                |
-| [sqlalchemy](./source_docs/sqlalchemy.md)     | `pip install 'acryl-datahub[sqlalchemy]'`                  | Generic SQLAlchemy source           |
 | [snowflake](./source_docs/snowflake.md)       | `pip install 'acryl-datahub[snowflake]'`                   | Snowflake source                    |
 | [snowflake-usage](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake-usage]'`             | Snowflake usage statistics source   |
+| [sqlalchemy](./source_docs/sqlalchemy.md)     | `pip install 'acryl-datahub[sqlalchemy]'`                  | Generic SQLAlchemy source           |
 | [superset](./source_docs/superset.md)         | `pip install 'acryl-datahub[superset]'`                    | Superset source                     |
 
 Sinks
@@ -144,7 +144,7 @@ Running a recipe is quite easy.
 datahub ingest -c ./examples/recipes/mssql_to_datahub.yml
 ```
 
-A number of recipes are included in the examples/recipes directory.
+A number of recipes are included in the [examples/recipes](./examples/recipes) directory. See also pages described in the [table of plugins](#installing-plugins) for more context on recipe options for each source and sink.
 
 ## Transformations
 
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 33e3ad4e08739..5e0dcda9c4b23 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[athena]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 16b4c67560c03..f705bbe5ddd3e 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
@@ -21,7 +21,7 @@ source:
 
 :::tip
 
-You can also get fine-grained usage statistics for BigQuery using the `bigquery-usage` source.
+You can also get fine-grained usage statistics for BigQuery using the `bigquery-usage` source described below.
 
 :::
 
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 4699ef8ca13e8..8a4f72794e7f8 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -1,6 +1,6 @@
 # dbt
 
-Pull metadata from dbt artifacts files:
+This plugin pulls metadata from dbt's artifact files:
 
 - [dbt manifest file](https://docs.getdbt.com/reference/artifacts/manifest-json)
   - This file contains model, source and lineage data.
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 62efb0d2a6c74..e719b3c1ea5f5 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[druid]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 8cd5bea80d31c..46b16b41be223 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -4,7 +4,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[feast]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of feature tables (modeled as [`MLFeatureTable`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureTableProperties.pdl)s),
   features ([`MLFeature`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureProperties.pdl)s),
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index efb5315175344..0b1ba7c504dad 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,6 +1,6 @@
 # File
 
-Pulls metadata from a previously generated file. Note that the file sink
+This plugin pulls metadata from a previously generated file. The file sink
 can produce such files, and a number of samples are included in the
 [examples/mce_files](../examples/mce_files) directory.
 
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 0d17afab678b5..be65d0e5c0567 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -1,10 +1,10 @@
 # AWS Glue
 
-Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
-
 To install this plugin, run `pip install 'acryl-datahub[glue]'`.
 
-Extracts:
+Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
+
+This plugin extracts the following:
 
 - List of tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index ec9be6daa0db8..c3125e8ec9299 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[hive]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 6191d4952d87c..d87f0f4236b17 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[kafka]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of topics - from the Kafka broker
 - Schemas associated with each topic - from the schema registry
@@ -24,7 +24,7 @@ For a full example with a number of security options, see this [example recipe](
 
 # Kafka Connect `kafka-connect`
 
-Extracts:
+This plugin extracts the following:
 
 - Kafka Connect connector as individual `DataFlowSnapshotClass` entity
 - Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index a2682189f181e..b1df1f385a99f 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[ldap]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of people
 - Names, emails, titles, and manager information for each person
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index c19c8f125fa22..c395781b7a2d9 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[looker]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - Looker dashboards and dashboard elements (charts)
 - Names, descriptions, URLs, chart types, input view for the charts
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index d50384f5f75cc..407656c583850 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -4,7 +4,7 @@ To install this plugin, run `pip install 'acryl-datahub[lookml]'`.
 
 Note! This plugin uses a package that requires Python 3.7+!
 
-Extracts:
+This plugin extracts the following:
 
 - LookML views from model files
 - Name, upstream table names, dimensions, measures, and dimension groups
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index eb05300ef02c1..9cb1140e7bced 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[mongodb]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases
 - List of collections in each database and infers schemas for each collection
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 99624542dba7b..85c6be77c4939 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -4,7 +4,7 @@ To install this plugin, run `pip install 'acryl-datahub[mssql]'`.
 
 We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, tables and views
 - Column types associated with each table/view
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 03780d106a843..ae4f1b1823614 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[mysql]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases and tables
 - Column types and schema associated with each table
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index d74aa9eb8aa19..b516cc2dac716 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[oracle]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 9991f690d6dbe..5f41cd0fd95cc 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[postgres]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 311647383cf11..c8ad1aa4259f7 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 587df3f02970b..f6ea7009b2448 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[sagemaker]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - Feature groups
 - Models, jobs, and lineage between the two (e.g. when jobs output a model or a model is used by a job)
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 73b5563728705..2c90ffb9c9ff9 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 57959aaa31411..272db30599da5 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -6,7 +6,7 @@ The `sqlalchemy` source is useful if we don't have a pre-built source for your c
 database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
 defined elsewhere. In order to use this, you must `pip install` the required dialect packages yourself.
 
-Extracts:
+This plugin extracts the following:
 
 - List of schemas and tables
 - Column types associated with each table
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 55749c91686e4..5b83566edc960 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -2,7 +2,7 @@
 
 To install this plugin, run `pip install 'acryl-datahub[superset]'`.
 
-Extracts:
+This plugin extracts the following:
 
 - List of charts and dashboards
 

From 2382c3037b6c01c663681a7008f020c7d82d3cec Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 15:36:06 -0700
Subject: [PATCH 07/33] Standardize sqlalchemy pattern

---
 metadata-ingestion/source_docs/athena.md     |  3 ++
 metadata-ingestion/source_docs/bigquery.md   | 32 ++++++++++++++-
 metadata-ingestion/source_docs/druid.md      | 34 +++++++++++++++-
 metadata-ingestion/source_docs/glue.md       | 11 ++---
 metadata-ingestion/source_docs/hive.md       | 36 ++++++++++++++++-
 metadata-ingestion/source_docs/mssql.md      | 42 +++++++++++++++-----
 metadata-ingestion/source_docs/mysql.md      | 27 +++++++++++--
 metadata-ingestion/source_docs/oracle.md     | 35 +++++++++++++++-
 metadata-ingestion/source_docs/postgres.md   | 35 +++++++++++++++-
 metadata-ingestion/source_docs/redshift.md   | 35 +++++++++++++++-
 metadata-ingestion/source_docs/snowflake.md  | 35 +++++++++++++++-
 metadata-ingestion/source_docs/sqlalchemy.md | 36 +++++++++++++++--
 12 files changed, 326 insertions(+), 35 deletions(-)

diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 5e0dcda9c4b23..7792511729487 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -15,11 +15,14 @@ source:
     # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
     password: aws_secret_access_key # Optional.
     database: database # Optional, defaults to "default"
+
     aws_region: aws_region_name # i.e. "eu-west-1"
+
     s3_staging_dir: s3_location # "s3://<bucket-name>/prefix/"
     # The s3_staging_dir parameter is needed because Athena always writes query results to S3.
     # See https://docs.aws.amazon.com/athena/latest/ug/querying.html
     # However, the athena driver will transparently fetch these results as you would expect from any other sql client.
+
     work_group: athena_workgroup # "primary"
     # table_pattern/schema_pattern is same as above
 ```
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index f705bbe5ddd3e..d5c1d15b95aa6 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -12,11 +12,41 @@ source:
   type: bigquery
   config:
     project_id: project # optional - can autodetect from environment
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
     options: # options is same as above
       # See https://github.com/mxmzdlv/pybigquery#authentication for details.
       credentials_path: "/path/to/keyfile.json" # optional
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
     include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
 ```
 
 :::tip
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index e719b3c1ea5f5..a2e17e429dd4e 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -17,8 +17,38 @@ source:
   config:
     # Point to broker address
     host_port: localhost:8082
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "^(lookup|sys).*"
-    # options is same as above
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
+    include_views: True # whether to include views, defaults to True
 ```
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index be65d0e5c0567..661bab8f6a759 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -16,17 +16,18 @@ source:
   type: glue
   config:
     aws_region: # aws_region_name, i.e. "eu-west-1"
-    extract_transforms: True # whether to ingest Glue jobs, defaults to True
     env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
 
-    # Filtering patterns for databases and tables to scan
-    database_pattern: # Optional, to filter databases scanned, same as schema_pattern above.
-    table_pattern: # Optional, to filter tables scanned, same as table_pattern above.
-
     # Credentials. If not specified here, these are picked up according to boto3 rules.
     # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
     aws_access_key_id: # Optional.
     aws_secret_access_key: # Optional.
     aws_session_token: # Optional.
     aws_role: # Optional (Role chaining supported by using a sorted list).
+
+    extract_transforms: True # whether to ingest Glue jobs, defaults to True
+
+    # Filtering patterns for databases and tables to scan
+    database_pattern: # Optional, to filter databases scanned, same as schema_pattern above.
+    table_pattern: # Optional, to filter tables scanned, same as table_pattern above.
 ```
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index c3125e8ec9299..a0448728adebc 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -22,8 +22,40 @@ source:
     password: pass # optional
     host_port: localhost:10000
     database: DemoDatabase # optional, defaults to 'default'
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
+    include_views: True # whether to include views, defaults to True
 ```
 
 <details>
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 85c6be77c4939..2104cdd7445f7 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -17,20 +17,42 @@ source:
     password: pass
     host_port: localhost:1433
     database: DemoDatabase
-    include_views: True # whether to include views, defaults to True
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      charset: "utf8"
+
+    # Tables to allow/deny
     table_pattern:
       deny:
+        # Note that the deny patterns take precedence over the allow patterns.
         - "^.*\\.sys_.*" # deny all tables that start with sys_
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
       allow:
-        - "schema1.table1"
-        - "schema1.table2"
-    options:
-      # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-      # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-      # Many of these options are specific to the underlying database driver, so that library's
-      # documentation will be a good reference for what is supported. To find which dialect is likely
-      # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-      charset: "utf8"
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
+    include_views: True # whether to include views, defaults to True
+
     # If set to true, we'll use the pyodbc library. This requires you to have
     # already installed the Microsoft ODBC Driver for SQL Server.
     # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index ae4f1b1823614..a6d159760f0ac 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -15,19 +15,38 @@ source:
     password: example
     database: dbname
     host_port: localhost:3306
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
-        - "performance_schema"
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
       allow:
-        - "schema1.table2"
+        - "good_table"
+        - "excellent_table"
+
       # Although the 'table_pattern' enables you to skip everything from certain schemas,
       # having another option to allow/deny on schema level is an optimization for the case when there is a large number
       # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
       # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "garbage_schema"
+        - "bad_schema"
+        - "junk_table"
       allow:
-        - "schema1"
+        - "good_schema"
+        - "excellent_schema"
+
+    include_views: True # whether to include views, defaults to True
 ```
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index b516cc2dac716..fc0bd1e8f63ef 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -21,7 +21,38 @@ source:
     host_port: localhost:5432
     database: dbname
     service_name: svc # omit database if using this option
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
     include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
 ```
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 5f41cd0fd95cc..b8042c69d2e53 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -18,7 +18,38 @@ source:
     host_port: localhost:5432
     database: DemoDatabase
     database_alias: DatabaseNameToBeIngested
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
     include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
 ```
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index c8ad1aa4259f7..bf74400a0103c 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -16,9 +16,40 @@ source:
     password: pass
     host_port: example.something.us-west-2.redshift.amazonaws.com:5439
     database: DemoDatabase
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
     include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
 ```
 
 <details>
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 2c90ffb9c9ff9..57ede6a5d248a 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -24,9 +24,40 @@ source:
         - ^SNOWFLAKE_SAMPLE_DATA\$
     warehouse: "COMPUTE_WH" # optional
     role: "sysadmin" # optional
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
     include_views: True # whether to include views, defaults to True
-    # table_pattern/schema_pattern is same as above
-    # options is same as above
 ```
 
 :::tip
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 272db30599da5..13e4c7e6b02f0 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -17,8 +17,38 @@ source:
   config:
     # See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
     connect_uri: "dialect+driver://username:password@host:port/database"
-    options: {} # same as above
-    schema_pattern: {} # same as above
-    table_pattern: {} # same as above
+
+    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
+    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
+    # Many of these options are specific to the underlying database driver, so that library's
+    # documentation will be a good reference for what is supported. To find which dialect is likely
+    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    options:
+      # driver_option: some-option
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+      # Although the 'table_pattern' enables you to skip everything from certain schemas,
+      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+      # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        - "bad_schema"
+        - "junk_table"
+      allow:
+        - "good_schema"
+        - "excellent_schema"
+
     include_views: True # whether to include views, defaults to True
 ```

From 34fbccf5ff7f8daa14b416292824cd07ab2691a7 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 15:39:36 -0700
Subject: [PATCH 08/33] Add missing sql options

---
 metadata-ingestion/source_docs/bigquery.md   | 22 +++++++++++++-------
 metadata-ingestion/source_docs/druid.md      | 22 +++++++++++++-------
 metadata-ingestion/source_docs/hive.md       | 22 +++++++++++++-------
 metadata-ingestion/source_docs/mssql.md      | 22 +++++++++++++-------
 metadata-ingestion/source_docs/mysql.md      | 22 +++++++++++++-------
 metadata-ingestion/source_docs/oracle.md     | 22 +++++++++++++-------
 metadata-ingestion/source_docs/postgres.md   | 22 +++++++++++++-------
 metadata-ingestion/source_docs/redshift.md   | 22 +++++++++++++-------
 metadata-ingestion/source_docs/snowflake.md  | 22 +++++++++++++-------
 metadata-ingestion/source_docs/sqlalchemy.md | 22 +++++++++++++-------
 10 files changed, 140 insertions(+), 80 deletions(-)

diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index d5c1d15b95aa6..93f5b4949518e 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -34,19 +34,25 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
 
 :::tip
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index a2e17e429dd4e..df43204e2eb40 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -38,17 +38,23 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index a0448728adebc..9b96457f84eb1 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -43,19 +43,25 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
 
 <details>
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 2104cdd7445f7..12133cf439398 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -39,19 +39,25 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 
     # If set to true, we'll use the pyodbc library. This requires you to have
     # already installed the Microsoft ODBC Driver for SQL Server.
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index a6d159760f0ac..2ede14967c1d0 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -36,17 +36,23 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index fc0bd1e8f63ef..6550d74c4ea7f 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -42,17 +42,23 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index b8042c69d2e53..94bcacaa49775 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -39,17 +39,23 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index bf74400a0103c..609dfd8d87715 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -37,19 +37,25 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
 
 <details>
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 57ede6a5d248a..a1badd26f0aa6 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -45,19 +45,25 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```
 
 :::tip
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 13e4c7e6b02f0..ad20ed77bfc4c 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -38,17 +38,23 @@ source:
         - "good_table"
         - "excellent_table"
 
-      # Although the 'table_pattern' enables you to skip everything from certain schemas,
-      # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-      # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-      # them out afterwards via the table_pattern.
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
     schema_pattern:
       deny:
-        - "bad_schema"
-        - "junk_table"
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
       allow:
-        - "good_schema"
-        - "excellent_schema"
+        # ...
 
     include_views: True # whether to include views, defaults to True
+    include_tables: True # whether to include views, defaults to True
 ```

From 9808735a4d31827edc1d606d0c9403b0f2ab153b Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 15:58:54 -0700
Subject: [PATCH 09/33] More consistent recipes

---
 metadata-ingestion/source_docs/bigquery.md |  4 +++-
 metadata-ingestion/source_docs/dbt.md      | 10 ++++++++++
 metadata-ingestion/source_docs/glue.md     | 17 +++++++++++++++--
 metadata-ingestion/source_docs/hive.md     |  1 -
 metadata-ingestion/source_docs/kafka.md    | 19 +++++++++++++++++--
 5 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 93f5b4949518e..2aec46deade94 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -86,13 +86,15 @@ source:
     options:
       # See https://googleapis.dev/python/logging/latest/client.html for details.
       credentials: ~ # optional - see docs
-    env: PROD
 
+    # Common usage stats options
     bucket_duration: "DAY"
     start_time: ~ # defaults to the last full day in UTC (or hour)
     end_time: ~ # defaults to the last full day in UTC (or hour)
 
     top_n_queries: 10 # number of queries to save for each table
+
+    env: PROD
 ```
 
 :::note
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 8a4f72794e7f8..aee7c4736cf08 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -23,11 +23,21 @@ This plugin pulls metadata from dbt's artifact files:
 source:
   type: "dbt"
   config:
+    # https://docs.getdbt.com/reference/artifacts/manifest-json
     manifest_path: "./path/dbt/manifest_file.json"
+    # https://docs.getdbt.com/reference/artifacts/catalog-json
     catalog_path: "./path/dbt/catalog_file.json"
+    # https://docs.getdbt.com/reference/artifacts/sources-json
     sources_path: "./path/dbt/sources_file.json" # (optional, used for freshness checks)
+
+    # the platform that dbt is loading onto
     target_platform: "postgres" # optional, eg "postgres", "snowflake", etc.
+
+    # whether to load schemas of datasets from dbt
+    # (otherwise, only includes a simple list of tables)
     load_schemas: True or False
+
+    # regex pattern to allow/deny nodes
     node_type_pattern: # optional
       deny:
         - ^test.*
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 661bab8f6a759..a51add12c54be 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -27,7 +27,20 @@ source:
 
     extract_transforms: True # whether to ingest Glue jobs, defaults to True
 
-    # Filtering patterns for databases and tables to scan
-    database_pattern: # Optional, to filter databases scanned, same as schema_pattern above.
+    # Regex filters for databases to scan
+    database_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_database"
+        - "junk_database"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_database"
+      allow:
+        - "good_database"
+        - "excellent_database"
     table_pattern: # Optional, to filter tables scanned, same as table_pattern above.
+      deny:
+        # ...
+      allow:
+        # ...
 ```
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 9b96457f84eb1..e448ebba63fce 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -60,7 +60,6 @@ source:
       allow:
         # ...
 
-    include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
 
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index d87f0f4236b17..8fb57eda3b8f0 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -13,9 +13,24 @@ source:
   config:
     connection:
       bootstrap: "broker:9092"
-      consumer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.DeserializingConsumer
       schema_registry_url: http://localhost:8081
-      schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
+
+      # Extra schema registry config.
+      # These options will be passed into Kafka's SchemaRegistryClient.
+      # See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html?#schemaregistryclient
+      schema_registry_config: {}
+
+      # Extra consumer config.
+      # These options will be passed into Kafka's DeserializingConsumer.
+      # See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#deserializingconsumer
+      # and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md.
+      consumer_config: {}
+
+      # Extra producer config.
+      # These options will be passed into Kafka's SerializingProducer.
+      # See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#serializingproducer
+      # and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md.
+      producer_config: {}
 ```
 
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.

From 9af3cab61a66fbee0b27de8df43ab7ada31f5fed Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 16:41:25 -0700
Subject: [PATCH 10/33] Finish consistency checks for recipes

---
 metadata-ingestion/README.md                  | 53 ++++++++++---------
 metadata-ingestion/source_docs/bigquery.md    | 15 ++++++
 .../source_docs/kafka-connect.md              | 24 +++++++++
 metadata-ingestion/source_docs/kafka.md       | 25 ---------
 metadata-ingestion/source_docs/ldap.md        |  7 +++
 metadata-ingestion/source_docs/looker.md      | 20 +++++--
 metadata-ingestion/source_docs/lookml.md      | 20 +++++--
 metadata-ingestion/source_docs/mongodb.md     | 25 ++++++---
 metadata-ingestion/source_docs/mssql.md       |  1 +
 metadata-ingestion/source_docs/snowflake.md   | 19 ++++---
 metadata-ingestion/source_docs/superset.md    |  4 +-
 11 files changed, 141 insertions(+), 72 deletions(-)
 create mode 100644 metadata-ingestion/source_docs/kafka-connect.md

diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 41826a668a220..d13f846479950 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -32,32 +32,33 @@ We use a plugin architecture so that you can install only the dependencies you a
 
 Sources:
 
-| Plugin Name                                   | Install Command                                            | Provides                            |
-| --------------------------------------------- | ---------------------------------------------------------- | ----------------------------------- |
-| [file](./source_docs/file.md)                 | _included by default_                                      | File source and sink                |
-| [athena](./source_docs/athena.md)             | `pip install 'acryl-datahub[athena]'`                      | AWS Athena source                   |
-| [bigquery](./source_docs/bigquery.md)         | `pip install 'acryl-datahub[bigquery]'`                    | BigQuery source                     |
-| [bigquery-usage](./source_docs/bigquery.md)   | `pip install 'acryl-datahub[bigquery-usage]'`              | BigQuery usage statistics source    |
-| [dbt](./source_docs/dbt.md)                   | _no additional dependencies_                               | dbt source                          |
-| [druid](./source_docs/druid.md)               | `pip install 'acryl-datahub[druid]'`                       | Druid Source                        |
-| [feast](./source_docs/feast.md)               | `pip install 'acryl-datahub[feast]'`                       | Feast source                        |
-| [glue](./source_docs/glue.md)                 | `pip install 'acryl-datahub[glue]'`                        | AWS Glue source                     |
-| [hive](./source_docs/hive.md)                 | `pip install 'acryl-datahub[hive]'`                        | Hive source                         |
-| [kafka](./source_docs/kafka.md)               | `pip install 'acryl-datahub[kafka]'`                       | Kafka source                        |
-| [ldap](./source_docs/ldap.md)                 | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source                         |
-| [looker](./source_docs/looker.md)             | `pip install 'acryl-datahub[looker]'`                      | Looker source                       |
-| [lookml](./source_docs/lookml.md)             | `pip install 'acryl-datahub[lookml]'`                      | LookML source, requires Python 3.7+ |
-| [mongodb](./source_docs/mongodb.md)           | `pip install 'acryl-datahub[mongodb]'`                     | MongoDB source                      |
-| [mssql](./source_docs/mssql.md)               | `pip install 'acryl-datahub[mssql]'`                       | SQL Server source                   |
-| [mysql](./source_docs/mysql.md)               | `pip install 'acryl-datahub[mysql]'`                       | MySQL source                        |
-| [oracle](./source_docs/oracle.md)             | `pip install 'acryl-datahub[oracle]'`                      | Oracle source                       |
-| [postgres](./source_docs/postgres.md)         | `pip install 'acryl-datahub[postgres]'`                    | Postgres source                     |
-| [redshift](./source_docs/redshift.md)         | `pip install 'acryl-datahub[redshift]'`                    | Redshift source                     |
-| [sagemaker](./source_docs/sagemaker.md)       | `pip install 'acryl-datahub[sagemaker]'`                   | AWS SageMaker source                |
-| [snowflake](./source_docs/snowflake.md)       | `pip install 'acryl-datahub[snowflake]'`                   | Snowflake source                    |
-| [snowflake-usage](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake-usage]'`             | Snowflake usage statistics source   |
-| [sqlalchemy](./source_docs/sqlalchemy.md)     | `pip install 'acryl-datahub[sqlalchemy]'`                  | Generic SQLAlchemy source           |
-| [superset](./source_docs/superset.md)         | `pip install 'acryl-datahub[superset]'`                    | Superset source                     |
+| Plugin Name                                     | Install Command                                            | Provides                            |
+| ----------------------------------------------- | ---------------------------------------------------------- | ----------------------------------- |
+| [file](./source_docs/file.md)                   | _included by default_                                      | File source and sink                |
+| [athena](./source_docs/athena.md)               | `pip install 'acryl-datahub[athena]'`                      | AWS Athena source                   |
+| [bigquery](./source_docs/bigquery.md)           | `pip install 'acryl-datahub[bigquery]'`                    | BigQuery source                     |
+| [bigquery-usage](./source_docs/bigquery.md)     | `pip install 'acryl-datahub[bigquery-usage]'`              | BigQuery usage statistics source    |
+| [dbt](./source_docs/dbt.md)                     | _no additional dependencies_                               | dbt source                          |
+| [druid](./source_docs/druid.md)                 | `pip install 'acryl-datahub[druid]'`                       | Druid Source                        |
+| [feast](./source_docs/feast.md)                 | `pip install 'acryl-datahub[feast]'`                       | Feast source                        |
+| [glue](./source_docs/glue.md)                   | `pip install 'acryl-datahub[glue]'`                        | AWS Glue source                     |
+| [hive](./source_docs/hive.md)                   | `pip install 'acryl-datahub[hive]'`                        | Hive source                         |
+| [kafka](./source_docs/kafka.md)                 | `pip install 'acryl-datahub[kafka]'`                       | Kafka source                        |
+| [kafka-connect](./source_docs/kafka-connect.md) | `pip install 'acryl-datahub[kafka-connect]'`               | Kafka connect source                |
+| [ldap](./source_docs/ldap.md)                   | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source                         |
+| [looker](./source_docs/looker.md)               | `pip install 'acryl-datahub[looker]'`                      | Looker source                       |
+| [lookml](./source_docs/lookml.md)               | `pip install 'acryl-datahub[lookml]'`                      | LookML source, requires Python 3.7+ |
+| [mongodb](./source_docs/mongodb.md)             | `pip install 'acryl-datahub[mongodb]'`                     | MongoDB source                      |
+| [mssql](./source_docs/mssql.md)                 | `pip install 'acryl-datahub[mssql]'`                       | SQL Server source                   |
+| [mysql](./source_docs/mysql.md)                 | `pip install 'acryl-datahub[mysql]'`                       | MySQL source                        |
+| [oracle](./source_docs/oracle.md)               | `pip install 'acryl-datahub[oracle]'`                      | Oracle source                       |
+| [postgres](./source_docs/postgres.md)           | `pip install 'acryl-datahub[postgres]'`                    | Postgres source                     |
+| [redshift](./source_docs/redshift.md)           | `pip install 'acryl-datahub[redshift]'`                    | Redshift source                     |
+| [sagemaker](./source_docs/sagemaker.md)         | `pip install 'acryl-datahub[sagemaker]'`                   | AWS SageMaker source                |
+| [snowflake](./source_docs/snowflake.md)         | `pip install 'acryl-datahub[snowflake]'`                   | Snowflake source                    |
+| [snowflake-usage](./source_docs/snowflake.md)   | `pip install 'acryl-datahub[snowflake-usage]'`             | Snowflake usage statistics source   |
+| [sqlalchemy](./source_docs/sqlalchemy.md)       | `pip install 'acryl-datahub[sqlalchemy]'`                  | Generic SQLAlchemy source           |
+| [superset](./source_docs/superset.md)           | `pip install 'acryl-datahub[superset]'`                    | Superset source                     |
 
 Sinks
 
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 2aec46deade94..6ef504bd3b9f9 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -95,6 +95,21 @@ source:
     top_n_queries: 10 # number of queries to save for each table
 
     env: PROD
+
+    # Additional options to pass to google.cloud.logging_v2.client.Client
+    extra_client_options:
+
+    # To account for the possibility that the query event arrives after
+    # the read event in the audit logs, we wait for at least `query_log_delay`
+    # additional events to be processed before attempting to resolve BigQuery
+    # job information from the logs. If `query_log_delay` is None, it gets treated
+    # as an unlimited delay, which prioritizes correctness at the expense of memory usage.
+    query_log_delay:
+
+    # Correction to pad start_time and end_time with.
+    # For handling the case where the read happens within our time range but the query
+    # completion event is delayed and happens after the configured end time.
+    max_query_duration:
 ```
 
 :::note
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
new file mode 100644
index 0000000000000..1b15a4b20269f
--- /dev/null
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -0,0 +1,24 @@
+# Kafka Connect `kafka-connect`
+
+This plugin extracts the following:
+
+- Kafka Connect connector as individual `DataFlowSnapshotClass` entity
+- Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
+- Lineage information between source database to Kafka topic
+
+```yml
+source:
+  type: "kafka-connect"
+  config:
+    connect_uri: "http://localhost:8083"
+    cluster_name: "connect-cluster"
+    connector_patterns:
+      deny:
+        - ^denied-connector.*
+      allow:
+        - ^allowed-connector.*
+```
+
+Current limitations:
+
+- Currently works only for Debezium source connectors.
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 8fb57eda3b8f0..4dfdd901cffc6 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -36,28 +36,3 @@ source:
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
-
-# Kafka Connect `kafka-connect`
-
-This plugin extracts the following:
-
-- Kafka Connect connector as individual `DataFlowSnapshotClass` entity
-- Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
-- Lineage information between source database to Kafka topic
-
-```yml
-source:
-  type: "kafka-connect"
-  config:
-    connect_uri: "http://localhost:8083"
-    cluster_name: "connect-cluster"
-    connector_patterns:
-      deny:
-        - ^denied-connector.*
-      allow:
-        - ^allowed-connector.*
-```
-
-Current limitations:
-
-- Currently works only for Debezium source connectors.
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index b1df1f385a99f..696ab8277f6af 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -15,9 +15,16 @@ source:
     ldap_server: ldap://localhost
     ldap_user: "cn=admin,dc=example,dc=org"
     ldap_password: "admin"
+
+    # Extraction configuration.
     base_dn: "dc=example,dc=org"
     filter: "(objectClass=*)" # optional field
+
+    # If set to true, any users without first and last names will be dropped.
     drop_missing_first_last_name: False # optional
+
+    # For creating LDAP controls
+    page_size: # default is 20
 ```
 
 The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index c395781b7a2d9..20af147c0a4ef 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -16,9 +16,23 @@ source:
     client_id: # Your Looker API3 client ID
     client_secret: # Your Looker API3 client secret
     base_url: # The url to your Looker instance: https://company.looker.com:19999 or https://looker.company.com, or similar.
-    dashboard_pattern: # supports allow/deny regexes
-    chart_pattern: # supports allow/deny regexes
+
+    platform_name: "looker" # Optional, default is "looker"
     actor: urn:li:corpuser:etl # Optional, defaults to urn:li:corpuser:etl
+
+    # regex pattern to allow/deny dashboards
+    dashboard_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
+
+    # regex pattern to allow/deny charts
+    chart_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
+
     env: "PROD" # Optional, default is "PROD"
-    platform_name: "looker" # Optional, default is "looker"
 ```
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 407656c583850..5591f32cd74c1 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -16,11 +16,25 @@ source:
     base_folder: /path/to/model/files # where the *.model.lkml and *.view.lkml files are stored
     connection_to_platform_map: # mappings between connection names in the model files to platform names
       connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
-    model_pattern: {}
-    view_pattern: {}
+
+    platform_name: "looker" # optional, default is "looker"
+
+    # regex pattern to allow/deny models
+    model_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
+
+    # regex pattern to allow/deny views
+    view_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
+
     env: "PROD" # optional, default is "PROD"
     parse_table_names_from_sql: False # see note below
-    platform_name: "looker" # optional, default is "looker"
 ```
 
 Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 9cb1140e7bced..142fb3cb5f88c 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -21,13 +21,26 @@ source:
     connect_uri: "mongodb://localhost"
     username: admin
     password: password
-    env: "PROD" # Optional, default is "PROD"
+    # used for PyMongo
     authMechanism: "DEFAULT"
-    options: {}
-    database_pattern: {}
-    collection_pattern: {}
+
+    options: {} # kwargs to pass to pymongo.MongoClient
     enableSchemaInference: True
-    schemaSamplingSize: 1000
+    schemaSamplingSize: 1000 # number of samples for determining schema
     useRandomSampling: True # whether to randomly sample docs for schema or just use the first ones, True by default
-    # database_pattern/collection_pattern are similar to schema_pattern/table_pattern from above
+
+    env: "PROD" # Optional, default is "PROD"
+
+    # regex pattern to allow/deny databases
+    database_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
+    # regex pattern to allow/deny collections
+    collection_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
 ```
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 12133cf439398..a388231854c26 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -63,6 +63,7 @@ source:
     # already installed the Microsoft ODBC Driver for SQL Server.
     # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
     use_odbc: False
+    # args URL-encode and append to the mssql connection URL
     uri_args: {}
 ```
 
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index a1badd26f0aa6..dc1e7dda50bef 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -14,14 +14,7 @@ source:
     username: user
     password: pass
     host_port: account_name
-    database_pattern:
-      # The escaping of the $ symbol helps us skip the environment variable substitution.
-      allow:
-        - ^MY_DEMO_DATA.*
-        - ^ANOTHER_DB_REGEX
-      deny:
-        - ^SNOWFLAKE\$
-        - ^SNOWFLAKE_SAMPLE_DATA\$
+
     warehouse: "COMPUTE_WH" # optional
     role: "sysadmin" # optional
 
@@ -33,6 +26,16 @@ source:
     options:
       # driver_option: some-option
 
+    # Regexe filters for databases to allow/deny
+    database_pattern:
+      # The escaping of the $ symbol helps us skip the environment variable substitution.
+      allow:
+        - ^MY_DEMO_DATA.*
+        - ^ANOTHER_DB_REGEX
+      deny:
+        - ^SNOWFLAKE\$
+        - ^SNOWFLAKE_SAMPLE_DATA\$
+
     # Tables to allow/deny
     table_pattern:
       deny:
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 5b83566edc960..d0910528d0cba 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -10,10 +10,12 @@ This plugin extracts the following:
 source:
   type: superset
   config:
+    connect_uri: http://localhost:8088
+
     username: user
     password: pass
     provider: db | ldap
-    connect_uri: http://localhost:8088
+
     env: "PROD" # Optional, default is "PROD"
 ```
 

From 9dc365fa7c9742b856af5f28199db79ebb337293 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 17:39:50 -0700
Subject: [PATCH 11/33] As above

---
 docs/features.md                              |  2 +-
 .../examples/recipes/mongodb_to_datahub.yml   |  1 -
 metadata-ingestion/source_docs/athena.md      | 30 ++++++++++++++++++-
 metadata-ingestion/source_docs/bigquery.md    |  2 +-
 metadata-ingestion/source_docs/hive.md        |  2 +-
 metadata-ingestion/source_docs/redshift.md    |  2 +-
 6 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/docs/features.md b/docs/features.md
index e02c8dee47a36..01168a1109577 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -40,7 +40,7 @@ Our open sourcing [blog post](https://engineering.linkedin.com/blog/2020/open-so
  - **Schema history**: view and diff historic versions of schemas
  - **GraphQL**: visualization of GraphQL schemas
 
-### Jos/flows [*coming soon*]
+### Jobs/flows [*coming soon*]
  - **Search**: full-text & advanced search, search ranking
  - **Browse**: browsing through a configurable hierarchy
  - **Basic information**: 
diff --git a/metadata-ingestion/examples/recipes/mongodb_to_datahub.yml b/metadata-ingestion/examples/recipes/mongodb_to_datahub.yml
index 931524093284a..6f1c3cae832a2 100644
--- a/metadata-ingestion/examples/recipes/mongodb_to_datahub.yml
+++ b/metadata-ingestion/examples/recipes/mongodb_to_datahub.yml
@@ -13,7 +13,6 @@ source:
     collection_pattern: {}
     enableSchemaInference: True
     schemaSamplingSize: 1000
-    # database_pattern/collection_pattern are similar to schema_pattern/table_pattern from above
 sink:
   type: "datahub-rest"
   config:
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 7792511729487..665bf8a3905c2 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -24,5 +24,33 @@ source:
     # However, the athena driver will transparently fetch these results as you would expect from any other sql client.
 
     work_group: athena_workgroup # "primary"
-    # table_pattern/schema_pattern is same as above
+
+    # Tables to allow/deny
+    table_pattern:
+      deny:
+        # Note that the deny patterns take precedence over the allow patterns.
+        - "bad_table"
+        - "junk_table"
+        # Can also be a regular expression
+        - "(old|used|deprecated)_table"
+      allow:
+        - "good_table"
+        - "excellent_table"
+
+    # Although the 'table_pattern' enables you to skip everything from certain schemas,
+    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
+    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
+    # them out afterwards via the table_pattern.
+    schema_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
+
+    # Same format as table_pattern, used for filtering views
+    view_pattern:
+      deny:
+        # ...
+      allow:
+        # ...
 ```
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 6ef504bd3b9f9..a92014cd3bebe 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -18,7 +18,7 @@ source:
     # Many of these options are specific to the underlying database driver, so that library's
     # documentation will be a good reference for what is supported. To find which dialect is likely
     # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options: # options is same as above
+    options:
       # See https://github.com/mxmzdlv/pybigquery#authentication for details.
       credentials_path: "/path/to/keyfile.json" # optional
 
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index e448ebba63fce..87e0e137bab28 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -79,7 +79,7 @@ source:
       connect_args:
         http_path: "/hive2"
         auth: BASIC
-    # table_pattern/schema_pattern is same as above
+    # ... table_pattern/schema_pattern
 ```
 
 </details>
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 609dfd8d87715..a905d6771209b 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -68,7 +68,7 @@ See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-
 source:
   type: redshift
   config:
-    # username, password, database, etc are all the same as above
+    # username, password, database, etc...
     host_port: my-proxy-hostname:5439
     options:
       connect_args:

From 9afa393bf75a3387a40fe0ab23ec2f0258d70efd Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 17:48:03 -0700
Subject: [PATCH 12/33] Typo fixes

---
 metadata-ingestion/source_docs/bigquery.md  | 2 +-
 metadata-ingestion/source_docs/snowflake.md | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index a92014cd3bebe..5c5839e48260f 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -114,6 +114,6 @@ source:
 
 :::note
 
-This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source.
+This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source described above.
 
 :::
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index dc1e7dda50bef..25a709bdbc03f 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -71,7 +71,7 @@ source:
 
 :::tip
 
-You can also get fine-grained usage statistics for Snowflake using the `snowflake-usage` source.
+You can also get fine-grained usage statistics for Snowflake using the `snowflake-usage` source described below.
 
 :::
 
@@ -106,6 +106,6 @@ source:
 
 :::note
 
-This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source.
+This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source described above.
 
 :::

From c6388cba34d4ff5adc31579bed0d22406272b504 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 17:58:53 -0700
Subject: [PATCH 13/33] More typo fixes

---
 docs-website/generateDocsDir.ts | 4 +++-
 metadata-ingestion/README.md    | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts
index 180d3c97c5548..82c6568247ea3 100644
--- a/docs-website/generateDocsDir.ts
+++ b/docs-website/generateDocsDir.ts
@@ -160,7 +160,9 @@ function markdown_guess_title(
     const headers = contents.content.match(/^# (.+)$/gm);
 
     if (!headers) {
-      throw new Error(`${filepath} must have at least one h1 header`);
+      throw new Error(
+        `${filepath} must have at least one h1 header for setting the title`
+      );
     }
 
     if (headers.length > 1 && contents.content.indexOf("```") < 0) {
diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index d13f846479950..5d408ef45a400 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -145,7 +145,7 @@ Running a recipe is quite easy.
 datahub ingest -c ./examples/recipes/mssql_to_datahub.yml
 ```
 
-A number of recipes are included in the [examples/recipes](./examples/recipes) directory. See also pages described in the [table of plugins](#installing-plugins) for more context on recipe options for each source and sink.
+A number of recipes are included in the [examples/recipes](./examples/recipes) directory. For full info and context on each source and sink, see the pages described in the [table of plugins](#installing-plugins).
 
 ## Transformations
 

From 8588cb97729460251d680f620cf9ad0a66722824 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 18:06:41 -0700
Subject: [PATCH 14/33] More consistency fixes

---
 metadata-ingestion/sink_docs/datahub.md     | 2 +-
 metadata-ingestion/sink_docs/file.md        | 2 +-
 metadata-ingestion/source_docs/bigquery.md  | 2 ++
 metadata-ingestion/source_docs/feast.md     | 2 +-
 metadata-ingestion/source_docs/file.md      | 2 +-
 metadata-ingestion/source_docs/snowflake.md | 4 +++-
 6 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index f77062d7866ae..1341ed8700c65 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -18,7 +18,7 @@ To install this plugin, run `pip install 'acryl-datahub[datahub-kafka]'`.
 
 Pushes metadata to DataHub by publishing messages to Kafka. The advantage of the Kafka-based
 interface is that it's asynchronous and can handle higher throughput. This requires the
-Datahub mce-consumer container to be running.
+DataHub mce-consumer container to be running.
 
 ```yml
 sink:
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index c7cbcc47d43b2..7c906f991dc5d 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -2,7 +2,7 @@
 
 Outputs metadata to a file. This can be used to decouple metadata sourcing from the
 process of pushing it into DataHub, and is particularly useful for debugging purposes.
-Note that the file source can read files generated by this sink.
+Note that the [file source]("../source_docs/file") can read files generated by this sink.
 
 ```yml
 sink:
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 5c5839e48260f..3c110938ff458 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -65,6 +65,8 @@ You can also get fine-grained usage statistics for BigQuery using the `bigquery-
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
+This plugin extracts the following:
+
 - Fetch a list of queries issued
 - Fetch a list of tables and columns accessed
 - Aggregate these statistics into buckets, by day or hour granularity
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 46b16b41be223..a2a199fc71dc1 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -12,7 +12,7 @@ This plugin extracts the following:
 - Column types associated with each feature and entity
 
 Note: this uses a separate Docker container to extract Feast's metadata into a JSON file, which is then
-parsed to DataHub's native objects. This was done because of a dependency conflict in the `feast` module.
+parsed to DataHub's native objects. This separation was performed because of a dependency conflict in the `feast` module.
 
 ```yml
 source:
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 0b1ba7c504dad..268cc5084ff6a 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,6 +1,6 @@
 # File
 
-This plugin pulls metadata from a previously generated file. The file sink
+This plugin pulls metadata from a previously generated file. The [file sink](../sink_docs/file)
 can produce such files, and a number of samples are included in the
 [examples/mce_files](../examples/mce_files) directory.
 
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 25a709bdbc03f..a8286803dfaef 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -75,10 +75,12 @@ You can also get fine-grained usage statistics for Snowflake using the `snowflak
 
 :::
 
-# Snowflake Usage Stats `snowflake-usage`
+# Snowflake Usage Stats
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
 
+This plugin extracts the following:
+
 - Fetch a list of queries issued
 - Fetch a list of tables and columns accessed (excludes views)
 - Aggregate these statistics into buckets, by day or hour granularity

From 63691dd15245486ef180435bf2afa97021783aa5 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 27 Jul 2021 18:15:30 -0700
Subject: [PATCH 15/33] Fix broken links

---
 metadata-ingestion/sink_docs/datahub.md | 2 +-
 metadata-ingestion/sink_docs/file.md    | 2 +-
 metadata-ingestion/source_docs/file.md  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index 1341ed8700c65..a051422815909 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -1,6 +1,6 @@
 # DataHub Rest
 
-To install this plugin, run `pip install 'acryl-datahub[datahub-reset]'`.
+To install this plugin, run `pip install 'acryl-datahub[datahub-rest]'`.
 
 Pushes metadata to DataHub using the GMA rest API. The advantage of the rest-based interface
 is that any errors can immediately be reported.
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 7c906f991dc5d..d1fbab953c6a5 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -2,7 +2,7 @@
 
 Outputs metadata to a file. This can be used to decouple metadata sourcing from the
 process of pushing it into DataHub, and is particularly useful for debugging purposes.
-Note that the [file source]("../source_docs/file") can read files generated by this sink.
+Note that the [file source](../source_docs/file.md) can read files generated by this sink.
 
 ```yml
 sink:
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 268cc5084ff6a..56e969865eee2 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,6 +1,6 @@
 # File
 
-This plugin pulls metadata from a previously generated file. The [file sink](../sink_docs/file)
+This plugin pulls metadata from a previously generated file. The [file sink](../sink_docs/file.md)
 can produce such files, and a number of samples are included in the
 [examples/mce_files](../examples/mce_files) directory.
 

From eef2a62874d21afce1c2b709549fc484e66e661d Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Mon, 2 Aug 2021 17:50:49 -0400
Subject: [PATCH 16/33] Note on allow/deny

---
 metadata-ingestion/source_docs/athena.md     | 8 +++++++-
 metadata-ingestion/source_docs/bigquery.md   | 4 +++-
 metadata-ingestion/source_docs/dbt.md        | 2 +-
 metadata-ingestion/source_docs/druid.md      | 4 +++-
 metadata-ingestion/source_docs/hive.md       | 6 ++++--
 metadata-ingestion/source_docs/looker.md     | 4 ++--
 metadata-ingestion/source_docs/lookml.md     | 4 ++--
 metadata-ingestion/source_docs/mongodb.md    | 4 ++--
 metadata-ingestion/source_docs/mssql.md      | 6 ++++--
 metadata-ingestion/source_docs/mysql.md      | 6 ++++--
 metadata-ingestion/source_docs/oracle.md     | 6 ++++--
 metadata-ingestion/source_docs/postgres.md   | 6 ++++--
 metadata-ingestion/source_docs/redshift.md   | 6 ++++--
 metadata-ingestion/source_docs/snowflake.md  | 8 +++++---
 metadata-ingestion/source_docs/sqlalchemy.md | 6 ++++--
 15 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 665bf8a3905c2..b192359077368 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -25,7 +25,7 @@ source:
 
     work_group: athena_workgroup # "primary"
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -41,6 +41,8 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
@@ -54,3 +56,7 @@ source:
       allow:
         # ...
 ```
+
+## Questions
+
+If you've got any questions on configuring this source
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 3c110938ff458..d00fd8d8f37aa 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -22,7 +22,7 @@ source:
       # See https://github.com/mxmzdlv/pybigquery#authentication for details.
       credentials_path: "/path/to/keyfile.json" # optional
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -38,6 +38,8 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index aee7c4736cf08..1232c50f80a74 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -37,7 +37,7 @@ source:
     # (otherwise, only includes a simple list of tables)
     load_schemas: True or False
 
-    # regex pattern to allow/deny nodes
+    # Regex pattern to allow/deny nodes. If left blank, will ingest all.
     node_type_pattern: # optional
       deny:
         - ^test.*
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index df43204e2eb40..cce79550062b1 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -26,7 +26,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -42,6 +42,8 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 4e1a9895269d3..1387d79f3dd2d 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -31,7 +31,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -47,13 +47,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 20af147c0a4ef..668e784d33d41 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -20,14 +20,14 @@ source:
     platform_name: "looker" # Optional, default is "looker"
     actor: urn:li:corpuser:etl # Optional, defaults to urn:li:corpuser:etl
 
-    # regex pattern to allow/deny dashboards
+    # Regex pattern to allow/deny dashboards. If left blank, will ingest all.
     dashboard_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # regex pattern to allow/deny charts
+    # Regex pattern to allow/deny charts. If left blank, will ingest all.
     chart_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 5591f32cd74c1..51e818a604cc6 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -19,14 +19,14 @@ source:
 
     platform_name: "looker" # optional, default is "looker"
 
-    # regex pattern to allow/deny models
+    # Regex pattern to allow/deny models. If left blank, will ingest all.
     model_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # regex pattern to allow/deny views
+    # Regex pattern to allow/deny views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 142fb3cb5f88c..298a1a8b81f6a 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -31,13 +31,13 @@ source:
 
     env: "PROD" # Optional, default is "PROD"
 
-    # regex pattern to allow/deny databases
+    # Regex pattern to allow/deny databases. If left blank, will ingest all.
     database_pattern:
       deny:
         # ...
       allow:
         # ...
-    # regex pattern to allow/deny collections
+    # Regex pattern to allow/deny collections. If left blank, will ingest all.
     collection_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index a388231854c26..8ec73af926275 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -26,7 +26,7 @@ source:
     options:
       charset: "utf8"
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -43,13 +43,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 2ede14967c1d0..b6a05731bfaf3 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -24,7 +24,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -40,13 +40,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 6550d74c4ea7f..284f3f1a2ba1f 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -30,7 +30,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -46,13 +46,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 94bcacaa49775..94aae26fb304d 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -27,7 +27,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -43,13 +43,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index a905d6771209b..0344536adfc25 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -25,7 +25,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -41,13 +41,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index a8286803dfaef..242623d95c565 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -26,7 +26,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Regexe filters for databases to allow/deny
+    # Regex filters for databases to allow/deny. If left blank, will ingest all.
     database_pattern:
       # The escaping of the $ symbol helps us skip the environment variable substitution.
       allow:
@@ -36,7 +36,7 @@ source:
         - ^SNOWFLAKE\$
         - ^SNOWFLAKE_SAMPLE_DATA\$
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -52,13 +52,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index ad20ed77bfc4c..1b3d94b8af418 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -26,7 +26,7 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny
+    # Tables to allow/deny. If left blank, will ingest all.
     table_pattern:
       deny:
         # Note that the deny patterns take precedence over the allow patterns.
@@ -42,13 +42,15 @@ source:
     # having another option to allow/deny on schema level is an optimization for the case when there is a large number
     # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
     # them out afterwards via the table_pattern.
+
+    # If left blank, will ingest all.
     schema_pattern:
       deny:
         # ...
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...

From bee872f3a032b921714e132fd616f975a7fb171e Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Mon, 2 Aug 2021 17:53:13 -0400
Subject: [PATCH 17/33] Add questions section

---
 metadata-ingestion/sink_docs/console.md         | 4 ++++
 metadata-ingestion/sink_docs/datahub.md         | 4 ++++
 metadata-ingestion/sink_docs/file.md            | 4 ++++
 metadata-ingestion/source_docs/athena.md        | 2 +-
 metadata-ingestion/source_docs/bigquery.md      | 4 ++++
 metadata-ingestion/source_docs/dbt.md           | 4 ++++
 metadata-ingestion/source_docs/druid.md         | 4 ++++
 metadata-ingestion/source_docs/feast.md         | 4 ++++
 metadata-ingestion/source_docs/file.md          | 4 ++++
 metadata-ingestion/source_docs/glue.md          | 4 ++++
 metadata-ingestion/source_docs/hive.md          | 4 ++++
 metadata-ingestion/source_docs/kafka-connect.md | 4 ++++
 metadata-ingestion/source_docs/kafka.md         | 4 ++++
 metadata-ingestion/source_docs/ldap.md          | 4 ++++
 metadata-ingestion/source_docs/looker.md        | 4 ++++
 metadata-ingestion/source_docs/lookml.md        | 4 ++++
 metadata-ingestion/source_docs/mongodb.md       | 4 ++++
 metadata-ingestion/source_docs/mssql.md         | 4 ++++
 metadata-ingestion/source_docs/mysql.md         | 4 ++++
 metadata-ingestion/source_docs/oracle.md        | 4 ++++
 metadata-ingestion/source_docs/postgres.md      | 4 ++++
 metadata-ingestion/source_docs/redshift.md      | 4 ++++
 metadata-ingestion/source_docs/sagemaker.md     | 4 ++++
 metadata-ingestion/source_docs/snowflake.md     | 4 ++++
 metadata-ingestion/source_docs/sql_profiles.md  | 6 +++++-
 metadata-ingestion/source_docs/sqlalchemy.md    | 4 ++++
 metadata-ingestion/source_docs/superset.md      | 4 ++++
 27 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index d8bbf7c44c9bd..cc4cb5f126662 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -6,3 +6,7 @@ Simply prints each metadata event to stdout. Useful for experimentation and debu
 sink:
   type: "console"
 ```
+
+## Questions
+
+If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index a051422815909..b32b4c9566647 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -34,3 +34,7 @@ sink:
 The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
+
+## Questions
+
+If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index d1fbab953c6a5..dc8a43b8049f2 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -10,3 +10,7 @@ sink:
   config:
     filename: ./path/to/mce/file.json
 ```
+
+## Questions
+
+If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index b192359077368..9ebd094c11ad9 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -59,4 +59,4 @@ source:
 
 ## Questions
 
-If you've got any questions on configuring this source
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index d00fd8d8f37aa..3d6cc01e5cfd0 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -121,3 +121,7 @@ source:
 This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source described above.
 
 :::
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 1232c50f80a74..49bcfa3594b29 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -46,3 +46,7 @@ source:
 ```
 
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index cce79550062b1..89ccd12b84afd 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -60,3 +60,7 @@ source:
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index a2a199fc71dc1..48efed0443ddb 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -22,3 +22,7 @@ source:
     env: "PROD" # Optional, default is "PROD"
     use_local_build: False # Whether to build Feast ingestion image locally, default is False
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 56e969865eee2..826d6cf55abf2 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -10,3 +10,7 @@ source:
   config:
     filename: ./path/to/mce/file.json
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index a51add12c54be..3f7fadb63ae40 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -44,3 +44,7 @@ source:
       allow:
         # ...
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 1387d79f3dd2d..95df66613de52 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -85,3 +85,7 @@ source:
 ```
 
 </details>
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index 1b15a4b20269f..00f887aaa14d1 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -22,3 +22,7 @@ source:
 Current limitations:
 
 - Currently works only for Debezium source connectors.
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 4dfdd901cffc6..1a46799664173 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -36,3 +36,7 @@ source:
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 696ab8277f6af..aee334a67ac1b 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -30,3 +30,7 @@ source:
 The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
 for devices or services should be excluded when they do not contain a first and last name. This will only
 impact the ingestion of LDAP users, while LDAP groups will be unaffected by this config option.
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 668e784d33d41..34e7c15410b2e 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -36,3 +36,7 @@ source:
 
     env: "PROD" # Optional, default is "PROD"
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 51e818a604cc6..7af19441ad4a5 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -41,3 +41,7 @@ Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-meta
 views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
 Looker supports, the result might not be correct. This parsing is disabled by default, but can be enabled by setting
 `parse_table_names_from_sql: True`.
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 298a1a8b81f6a..13c901b509de6 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -44,3 +44,7 @@ source:
       allow:
         # ...
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 8ec73af926275..b3480df136ef4 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -96,3 +96,7 @@ source:
 ```
 
 </details>
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index b6a05731bfaf3..31aeee8c7da45 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -58,3 +58,7 @@ source:
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 284f3f1a2ba1f..2792ba477c106 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -64,3 +64,7 @@ source:
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 94aae26fb304d..605d08501f669 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -61,3 +61,7 @@ source:
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 0344536adfc25..614818c78e641 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -79,3 +79,7 @@ source:
 ```
 
 </details>
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index f6ea7009b2448..3e1ec47419c05 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -33,3 +33,7 @@ source:
       training: True
       transform: True
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 242623d95c565..1f309a05b1dea 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -113,3 +113,7 @@ source:
 This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source described above.
 
 :::
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index dde978dd781f2..08f7dbd49160e 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -57,4 +57,8 @@ While we've done our best to limit the expensiveness of the queries the profiler
 should be prudent about the set of tables profiling is enabled on or the frequency
 of the profiling runs.
 
-:::
\ No newline at end of file
+:::
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 1b3d94b8af418..f7bdf1523fc67 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -60,3 +60,7 @@ source:
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index d0910528d0cba..bc67ae5e67234 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -20,3 +20,7 @@ source:
 ```
 
 See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api.
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!

From 6ffd8a1c9269960e16ff43665be8097b82e67410 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 3 Aug 2021 13:05:02 -0400
Subject: [PATCH 18/33] Fix inconsistencies

---
 metadata-ingestion/sink_docs/datahub.md    | 6 ++++--
 metadata-ingestion/source_docs/athena.md   | 2 +-
 metadata-ingestion/source_docs/bigquery.md | 6 +++---
 metadata-ingestion/source_docs/druid.md    | 8 +++-----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index b32b4c9566647..d286d5fc7ea73 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -1,4 +1,6 @@
-# DataHub Rest
+# DataHub
+
+## DataHub Rest
 
 To install this plugin, run `pip install 'acryl-datahub[datahub-rest]'`.
 
@@ -12,7 +14,7 @@ sink:
     server: "http://localhost:8080"
 ```
 
-# DataHub Kafka
+## DataHub Kafka
 
 To install this plugin, run `pip install 'acryl-datahub[datahub-kafka]'`.
 
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 9ebd094c11ad9..39b1b21b94d00 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -49,7 +49,7 @@ source:
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 3d6cc01e5cfd0..ac0a56e700320 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -1,4 +1,4 @@
-# Google BigQuery
+# BigQuery
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery]'`.
 
@@ -46,7 +46,7 @@ source:
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...
@@ -63,7 +63,7 @@ You can also get fine-grained usage statistics for BigQuery using the `bigquery-
 
 :::
 
-# Google BigQuery Usage Stats
+# BigQuery Usage Stats
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 89ccd12b84afd..7031e22fcf379 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -7,9 +7,7 @@ This plugin extracts the following:
 - List of databases, schema, and tables
 - Column types associated with each table
 
-**Note** It is important to define a explicitly define deny schema pattern for internal druid databases (lookup & sys)
-if adding a schema pattern otherwise the crawler may crash before processing relevant databases.
-This deny pattern is defined by default but is overriden by user-submitted configurations
+**Note**: It is important to explicitly define the deny schema pattern for internal Druid databases (lookup & sys) if adding a schema pattern. Otherwise, the crawler may crash before processing relevant databases. This deny pattern is defined by default but is overriden by user-submitted configurations.
 
 ```yml
 source:
@@ -46,11 +44,11 @@ source:
     # If left blank, will ingest all.
     schema_pattern:
       deny:
-        # ...
+        - "^(lookup|sys).*" # default, ignores internal Druid databases (see note below)
       allow:
         # ...
 
-    # Same format as table_pattern, used for filtering views
+    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
     view_pattern:
       deny:
         # ...

From 8a4de6d4ebdf0439585dee865447c62478755ab6 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 3 Aug 2021 16:29:03 -0400
Subject: [PATCH 19/33] Begin separation of quickstart and config details

---
 metadata-ingestion/sink_docs/console.md       | 12 ++++++
 metadata-ingestion/sink_docs/datahub.md       | 20 ++++++++++
 metadata-ingestion/sink_docs/file.md          | 12 ++++++
 metadata-ingestion/source_docs/athena.md      | 12 +++++-
 metadata-ingestion/source_docs/bigquery.md    | 40 ++++++++++++++-----
 metadata-ingestion/source_docs/dbt.md         | 12 ++++++
 metadata-ingestion/source_docs/druid.md       | 10 +++++
 metadata-ingestion/source_docs/feast.md       | 10 +++++
 metadata-ingestion/source_docs/file.md        | 12 ++++++
 metadata-ingestion/source_docs/glue.md        | 10 +++++
 metadata-ingestion/source_docs/hive.md        | 10 +++++
 .../source_docs/kafka-connect.md              | 14 ++++++-
 metadata-ingestion/source_docs/kafka.md       | 10 +++++
 metadata-ingestion/source_docs/ldap.md        | 10 +++++
 metadata-ingestion/source_docs/looker.md      | 10 +++++
 metadata-ingestion/source_docs/lookml.md      | 10 +++++
 metadata-ingestion/source_docs/mongodb.md     | 10 +++++
 metadata-ingestion/source_docs/mssql.md       | 12 +++++-
 metadata-ingestion/source_docs/mysql.md       | 10 +++++
 metadata-ingestion/source_docs/oracle.md      | 10 +++++
 metadata-ingestion/source_docs/postgres.md    | 10 +++++
 metadata-ingestion/source_docs/redshift.md    | 10 +++++
 metadata-ingestion/source_docs/sagemaker.md   | 12 +++++-
 metadata-ingestion/source_docs/snowflake.md   | 40 ++++++++++++++-----
 .../source_docs/sql_profiles.md               | 26 ++++++++----
 metadata-ingestion/source_docs/sqlalchemy.md  | 12 +++++-
 metadata-ingestion/source_docs/superset.md    | 12 +++++-
 27 files changed, 343 insertions(+), 35 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index cc4cb5f126662..edad962582533 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -1,12 +1,24 @@
 # Console
 
+## Setup
+
+Works with `acryl-datahub` out of the box.
+
+## Capabilities
+
 Simply prints each metadata event to stdout. Useful for experimentation and debugging purposes.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 sink:
   type: "console"
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index d286d5fc7ea73..a19488d43eae4 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -2,11 +2,19 @@
 
 ## DataHub Rest
 
+### Setup
+
 To install this plugin, run `pip install 'acryl-datahub[datahub-rest]'`.
 
+### Capabilities
+
 Pushes metadata to DataHub using the GMA rest API. The advantage of the rest-based interface
 is that any errors can immediately be reported.
 
+### Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 sink:
   type: "datahub-rest"
@@ -14,14 +22,24 @@ sink:
     server: "http://localhost:8080"
 ```
 
+### Config details
+
 ## DataHub Kafka
 
+### Setup
+
 To install this plugin, run `pip install 'acryl-datahub[datahub-kafka]'`.
 
+### Capabilities
+
 Pushes metadata to DataHub by publishing messages to Kafka. The advantage of the Kafka-based
 interface is that it's asynchronous and can handle higher throughput. This requires the
 DataHub mce-consumer container to be running.
 
+### Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 sink:
   type: "datahub-kafka"
@@ -33,6 +51,8 @@ sink:
       schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
 ```
 
+### Config details
+
 The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index dc8a43b8049f2..2e2f95ef37f9f 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -1,9 +1,19 @@
 # File
 
+## Setup
+
+Works with `acryl-datahub` out of the box.
+
+## Capabilities
+
 Outputs metadata to a file. This can be used to decouple metadata sourcing from the
 process of pushing it into DataHub, and is particularly useful for debugging purposes.
 Note that the [file source](../source_docs/file.md) can read files generated by this sink.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 sink:
   type: file
@@ -11,6 +21,8 @@ sink:
     filename: ./path/to/mce/file.json
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 39b1b21b94d00..d41be109d8bbd 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -1,12 +1,20 @@
-# AWS Athena
+# Athena
+
+## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[athena]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases and tables
 - Column types associated with each table
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: athena
@@ -57,6 +65,8 @@ source:
         # ...
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index ac0a56e700320..270de3b16613d 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -1,12 +1,26 @@
 # BigQuery
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[bigquery]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
 
+:::tip
+
+You can also get fine-grained usage statistics for BigQuery using the `bigquery-usage` source described below.
+
+:::
+
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: bigquery
@@ -57,16 +71,16 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
-:::tip
-
-You can also get fine-grained usage statistics for BigQuery using the `bigquery-usage` source described below.
-
-:::
+## Config details
 
 # BigQuery Usage Stats
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - Fetch a list of queries issued
@@ -80,6 +94,16 @@ Note: the client must have one of the following OAuth scopes, and should be auth
 - https://www.googleapis.com/auth/cloud-platform.read-only
 - https://www.googleapis.com/auth/cloud-platform
 
+:::note
+
+This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source described above.
+
+:::
+
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: bigquery-usage
@@ -116,11 +140,7 @@ source:
     max_query_duration:
 ```
 
-:::note
-
-This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source described above.
-
-:::
+## Config details
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 49bcfa3594b29..155f0caa1d670 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -1,5 +1,11 @@
 # dbt
 
+## Setup
+
+Works with `acryl-datahub` out of the box.
+
+## Capabilities
+
 This plugin pulls metadata from dbt's artifact files:
 
 - [dbt manifest file](https://docs.getdbt.com/reference/artifacts/manifest-json)
@@ -19,6 +25,10 @@ This plugin pulls metadata from dbt's artifact files:
 - node_type_pattern:
   - Use this filter to exclude and include node types using allow or deny method
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "dbt"
@@ -45,6 +55,8 @@ source:
         - ^.*
 ```
 
+## Config details
+
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
 
 ## Questions
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 7031e22fcf379..0643d84535054 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -1,7 +1,11 @@
 # Druid
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[druid]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
@@ -9,6 +13,10 @@ This plugin extracts the following:
 
 **Note**: It is important to explicitly define the deny schema pattern for internal Druid databases (lookup & sys) if adding a schema pattern. Otherwise, the crawler may crash before processing relevant databases. This deny pattern is defined by default but is overriden by user-submitted configurations.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: druid
@@ -59,6 +67,8 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 48efed0443ddb..78ae0bedad32a 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -1,9 +1,13 @@
 # Feast
 
+## Setup
+
 **Note: Feast ingestion requires Docker to be installed.**
 
 To install this plugin, run `pip install 'acryl-datahub[feast]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of feature tables (modeled as [`MLFeatureTable`](https://github.com/linkedin/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureTableProperties.pdl)s),
@@ -14,6 +18,10 @@ This plugin extracts the following:
 Note: this uses a separate Docker container to extract Feast's metadata into a JSON file, which is then
 parsed to DataHub's native objects. This separation was performed because of a dependency conflict in the `feast` module.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: feast
@@ -23,6 +31,8 @@ source:
     use_local_build: False # Whether to build Feast ingestion image locally, default is False
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 826d6cf55abf2..f7347d840d142 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,9 +1,19 @@
 # File
 
+## Setup
+
+Works with `acryl-datahub` out of the box.
+
+## Capabilities
+
 This plugin pulls metadata from a previously generated file. The [file sink](../sink_docs/file.md)
 can produce such files, and a number of samples are included in the
 [examples/mce_files](../examples/mce_files) directory.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: file
@@ -11,6 +21,8 @@ source:
     filename: ./path/to/mce/file.json
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 3f7fadb63ae40..1112266db9470 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -1,9 +1,13 @@
 # AWS Glue
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[glue]'`.
 
 Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../s3-ingestion.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of tables
@@ -11,6 +15,10 @@ This plugin extracts the following:
 - Table metadata, such as owner, description and parameters
 - Jobs and their component transformations, data sources, and data sinks
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: glue
@@ -45,6 +53,8 @@ source:
         # ...
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 95df66613de52..f19df2743a225 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -1,13 +1,21 @@
 # Hive
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[hive]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
 - Detailed table and storage information
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: hive
@@ -86,6 +94,8 @@ source:
 
 </details>
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index 00f887aaa14d1..f82bf2a1747c9 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -1,4 +1,10 @@
-# Kafka Connect `kafka-connect`
+# Kafka Connect
+
+## Setup
+
+To install this plugin, run `pip install 'acryl-datahub[kafka-connect]'`.
+
+## Capabilities
 
 This plugin extracts the following:
 
@@ -6,6 +12,10 @@ This plugin extracts the following:
 - Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
 - Lineage information between source database to Kafka topic
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "kafka-connect"
@@ -23,6 +33,8 @@ Current limitations:
 
 - Currently works only for Debezium source connectors.
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 1a46799664173..88efcc99d9d7a 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -1,12 +1,20 @@
 # Kafka Metadata
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[kafka]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of topics - from the Kafka broker
 - Schemas associated with each topic - from the schema registry
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "kafka"
@@ -33,6 +41,8 @@ source:
       producer_config: {}
 ```
 
+## Config details
+
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index aee334a67ac1b..0a4dfefe16a76 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -1,13 +1,21 @@
 # LDAP
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[ldap]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of people
 - Names, emails, titles, and manager information for each person
 - List of groups
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "ldap"
@@ -27,6 +35,8 @@ source:
     page_size: # default is 20
 ```
 
+## Config details
+
 The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
 for devices or services should be excluded when they do not contain a first and last name. This will only
 impact the ingestion of LDAP users, while LDAP groups will be unaffected by this config option.
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 34e7c15410b2e..97d5e4184067b 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -1,7 +1,11 @@
 # Looker dashboards
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[looker]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - Looker dashboards and dashboard elements (charts)
@@ -9,6 +13,10 @@ This plugin extracts the following:
 
 See the [Looker authentication docs](https://docs.looker.com/reference/api-and-integration/api-auth#authentication_with_an_sdk) for the steps to create a client ID and secret.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "looker"
@@ -37,6 +45,8 @@ source:
     env: "PROD" # Optional, default is "PROD"
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 7af19441ad4a5..4b1aff7dfa34c 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -1,14 +1,22 @@
 # LookML
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[lookml]'`.
 
 Note! This plugin uses a package that requires Python 3.7+!
 
+## Capabilities
+
 This plugin extracts the following:
 
 - LookML views from model files
 - Name, upstream table names, dimensions, measures, and dimension groups
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "lookml"
@@ -37,6 +45,8 @@ source:
     parse_table_names_from_sql: False # see note below
 ```
 
+## Config details
+
 Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
 views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
 Looker supports, the result might not be correct. This parsing is disabled by default, but can be enabled by setting
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 13c901b509de6..d54257402eb41 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -1,7 +1,11 @@
 # MongoDB
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[mongodb]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases
@@ -12,6 +16,10 @@ Moreover, setting `useRandomSampling: False` will sample the first documents fou
 
 Note that `schemaSamplingSize` has no effect if `enableSchemaInference: False` is set.
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: "mongodb"
@@ -45,6 +53,8 @@ source:
         # ...
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index b3480df136ef4..b9bd958556870 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -1,14 +1,22 @@
-# Microsoft SQL Server Metadata
+# Microsoft SQL Server
+
+## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[mssql]'`.
 
 We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, tables and views
 - Column types associated with each table/view
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: mssql
@@ -97,6 +105,8 @@ source:
 
 </details>
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 31aeee8c7da45..28e846b97aba5 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -1,12 +1,20 @@
 # MySQL
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[mysql]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases and tables
 - Column types and schema associated with each table
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: mysql
@@ -59,6 +67,8 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 2792ba477c106..ba081d7a356d5 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -1,7 +1,11 @@
 # Oracle
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[oracle]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
@@ -9,6 +13,10 @@ This plugin extracts the following:
 
 Using the Oracle source requires that you've also installed the correct drivers; see the [cx_Oracle docs](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html). The easiest one is the [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html).
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: oracle
@@ -65,6 +73,8 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 605d08501f669..fa5c9be08a056 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -1,7 +1,11 @@
 # PostgreSQL
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[postgres]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
@@ -9,6 +13,10 @@ This plugin extracts the following:
 - Also supports PostGIS extensions
 - database_alias (optional) can be used to change the name of database to be ingested
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: postgres
@@ -62,6 +70,8 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 614818c78e641..75bc6ff990a9e 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -1,13 +1,21 @@
 # Redshift
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
 - Also supports PostGIS extensions
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: redshift
@@ -80,6 +88,8 @@ source:
 
 </details>
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 3e1ec47419c05..315654e8044c7 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -1,12 +1,20 @@
-# AWS SageMaker
+# SageMaker
+
+## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[sagemaker]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - Feature groups
 - Models, jobs, and lineage between the two (e.g. when jobs output a model or a model is used by a job)
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: sagemaker
@@ -34,6 +42,8 @@ source:
       transform: True
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 1f309a05b1dea..e42c96ba2d574 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -1,12 +1,26 @@
 # Snowflake
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[snowflake]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of databases, schema, and tables
 - Column types associated with each table
 
+:::tip
+
+You can also get fine-grained usage statistics for Snowflake using the `snowflake-usage` source described below.
+
+:::
+
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: snowflake
@@ -71,16 +85,14 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
-:::tip
-
-You can also get fine-grained usage statistics for Snowflake using the `snowflake-usage` source described below.
-
-:::
-
 # Snowflake Usage Stats
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - Fetch a list of queries issued
@@ -91,6 +103,16 @@ Note: the user/role must have access to the account usage table. The "accountadm
 
 Note: the underlying access history views that we use are only available in Snowflake's enterprise edition or higher.
 
+:::note
+
+This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source described above.
+
+:::
+
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: snowflake-usage
@@ -108,11 +130,7 @@ source:
     top_n_queries: 10 # number of queries to save for each table
 ```
 
-:::note
-
-This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake` source described above.
-
-:::
+## Config details
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 08f7dbd49160e..cce4f8df14d35 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -1,10 +1,23 @@
 # SQL Profiles
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[sql-profiles]'`.
 
 The SQL-based profiler does not run alone, but rather can be enabled for other SQL-based sources.
 Enabling profiling will slow down ingestion runs.
 
+:::caution
+
+Running profiling against many tables or over many rows can run up significant costs.
+While we've done our best to limit the expensiveness of the queries the profiler runs, you
+should be prudent about the set of tables profiling is enabled on or the frequency
+of the profiling runs.
+
+:::
+
+## Capabilities
+
 Extracts:
 
 - row and column counts for each table
@@ -28,6 +41,10 @@ Supported SQL sources:
 - Snowflake
 - Generic SQLAlchemy source
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: <sql-source> # can be bigquery, snowflake, etc - see above for the list
@@ -50,14 +67,7 @@ source:
     include_views: true
 ```
 
-:::caution
-
-Running profiling against many tables or over many rows can run up significant costs.
-While we've done our best to limit the expensiveness of the queries the profiler runs, you
-should be prudent about the set of tables profiling is enabled on or the frequency
-of the profiling runs.
-
-:::
+## Config details
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index f7bdf1523fc67..c47a1907de2fe 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -1,4 +1,6 @@
-# Other databases using SQLAlchemy
+# Other SQLAlchemy databases
+
+## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[sqlalchemy]'`.
 
@@ -6,11 +8,17 @@ The `sqlalchemy` source is useful if we don't have a pre-built source for your c
 database system, but there is an [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/14/dialects/)
 defined elsewhere. In order to use this, you must `pip install` the required dialect packages yourself.
 
+## Capabilities
+
 This plugin extracts the following:
 
 - List of schemas and tables
 - Column types associated with each table
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: sqlalchemy
@@ -61,6 +69,8 @@ source:
     include_tables: True # whether to include views, defaults to True
 ```
 
+## Config details
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index bc67ae5e67234..47c21001d3b91 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -1,11 +1,21 @@
 # Superset
 
+## Setup
+
 To install this plugin, run `pip install 'acryl-datahub[superset]'`.
 
+See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api.
+
+## Capabilities
+
 This plugin extracts the following:
 
 - List of charts and dashboards
 
+## Quickstart recipe
+
+Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+
 ```yml
 source:
   type: superset
@@ -19,7 +29,7 @@ source:
     env: "PROD" # Optional, default is "PROD"
 ```
 
-See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api.
+## Config details
 
 ## Questions
 

From 8bf27a5e41da3f987c3612fd650c78e3b0355aa1 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 3 Aug 2021 17:38:07 -0400
Subject: [PATCH 20/33] Write generic sqlalchemy options

---
 metadata-ingestion/sink_docs/console.md       |  5 ++
 metadata-ingestion/sink_docs/datahub.md       | 10 +++
 metadata-ingestion/sink_docs/file.md          |  5 ++
 metadata-ingestion/source_docs/athena.md      | 52 ++++++--------
 metadata-ingestion/source_docs/bigquery.md    | 66 ++++++-----------
 metadata-ingestion/source_docs/dbt.md         | 35 +++++----
 metadata-ingestion/source_docs/druid.md       | 61 +++++-----------
 metadata-ingestion/source_docs/feast.md       | 14 +++-
 metadata-ingestion/source_docs/file.md        |  6 ++
 metadata-ingestion/source_docs/glue.md        | 35 +++++----
 metadata-ingestion/source_docs/hive.md        | 60 +++++-----------
 .../source_docs/kafka-connect.md              | 20 +++---
 metadata-ingestion/source_docs/kafka.md       | 10 +++
 metadata-ingestion/source_docs/ldap.md        | 12 ++++
 metadata-ingestion/source_docs/looker.md      | 15 ++++
 metadata-ingestion/source_docs/lookml.md      | 14 ++++
 metadata-ingestion/source_docs/mongodb.md     | 18 +++++
 metadata-ingestion/source_docs/mssql.md       | 71 ++++++-------------
 metadata-ingestion/source_docs/mysql.md       | 58 +++++----------
 metadata-ingestion/source_docs/oracle.md      | 62 ++++++----------
 metadata-ingestion/source_docs/postgres.md    | 62 ++++++----------
 metadata-ingestion/source_docs/redshift.md    | 50 +++++--------
 metadata-ingestion/source_docs/sagemaker.md   | 40 ++++++-----
 metadata-ingestion/source_docs/snowflake.md   | 70 ++++++++----------
 .../source_docs/sql_profiles.md               | 23 +++---
 metadata-ingestion/source_docs/sqlalchemy.md  | 47 +++++-------
 metadata-ingestion/source_docs/superset.md    | 12 ++++
 27 files changed, 417 insertions(+), 516 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index edad962582533..f803ad94f764a 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -19,6 +19,11 @@ sink:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field | Required | Default | Description |
+| ----- | -------- | ------- | ----------- |
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index a19488d43eae4..6ff40c97a37a9 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -24,6 +24,11 @@ sink:
 
 ### Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field | Required | Default | Description |
+| ----- | -------- | ------- | ----------- |
+
 ## DataHub Kafka
 
 ### Setup
@@ -53,6 +58,11 @@ sink:
 
 ### Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field | Required | Default | Description |
+| ----- | -------- | ------- | ----------- |
+
 The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 2e2f95ef37f9f..a678a60efe09a 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -23,6 +23,11 @@ sink:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field | Required | Default | Description |
+| ----- | -------- | ------- | ----------- |
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index d41be109d8bbd..e77d0e15c8f1b 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -32,41 +32,31 @@ source:
     # However, the athena driver will transparently fetch these results as you would expect from any other sql client.
 
     work_group: athena_workgroup # "primary"
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default      | Description                                                                                                                                                                                                |
+| ---------------------- | -------- | ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             | ❌       | Autodetected | Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html                                                       |
+| `password`             | ❌       | Autodetected | Same detection scheme as `username`                                                                                                                                                                        |
+| `database`             | ❌       | Autodetected |                                                                                                                                                                                                            |
+| `aws_region`           | ✅       |              |                                                                                                                                                                                                            |
+| `s3_staging_dir`       | ✅       |              | Of format `"s3://<bucket-name>/prefix/"`. The `s3_staging_dir` parameter is needed because Athena always writes query results to S3. <br />See https://docs.aws.amazon.com/athena/latest/ug/querying.html. |
+| `work_group`           | ✅       |              | Name of Athena workgroup. <br />See https://docs.aws.amazon.com/athena/latest/ug/manage-queries-control-costs-with-workgroups.html.                                                                        |
+| `env`                  | ❌       | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                                    |
+| `options.<option>`     | ❌       |              | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                    |
+| `table_pattern.allow`  | ❌       |              | Regex pattern for tables to include in ingestion.                                                                                                                                                          |
+| `table_pattern.deny`   | ❌       |              | Regex pattern for tables to exclude from ingestion.                                                                                                                                                        |
+| `schema_pattern.allow` | ❌       |              | Regex pattern for schemas to include in ingestion.                                                                                                                                                         |
+| `schema_pattern.deny`  | ❌       |              | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                       |
+| `view_pattern.allow`   | ❌       |              | Regex pattern for views to include in ingestion.                                                                                                                                                           |
+| `view_pattern.deny`    | ❌       |              | Regex pattern for views to exclude from ingestion.                                                                                                                                                         |
+| `include_tables`       | ❌       | `True`       | Whether tables should be ingested.                                                                                                                                                                         |
+| `include_views`        | ❌       | `True`       | Whether views should be ingested.                                                                                                                                                                          |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 270de3b16613d..9174db793b4d1 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -25,54 +25,27 @@ Use the below recipe to get started with ingestion. See [below](#config-details)
 source:
   type: bigquery
   config:
-    project_id: project # optional - can autodetect from environment
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # See https://github.com/mxmzdlv/pybigquery#authentication for details.
-      credentials_path: "/path/to/keyfile.json" # optional
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
+    project_id: "my_project_id"
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `project_id`           |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 # BigQuery Usage Stats
 
 ## Setup
@@ -142,6 +115,11 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field | Required | Default | Description |
+| ----- | -------- | ------- | ----------- |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 155f0caa1d670..d0df253da3507 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -33,30 +33,29 @@ Use the below recipe to get started with ingestion. See [below](#config-details)
 source:
   type: "dbt"
   config:
-    # https://docs.getdbt.com/reference/artifacts/manifest-json
     manifest_path: "./path/dbt/manifest_file.json"
-    # https://docs.getdbt.com/reference/artifacts/catalog-json
     catalog_path: "./path/dbt/catalog_file.json"
-    # https://docs.getdbt.com/reference/artifacts/sources-json
-    sources_path: "./path/dbt/sources_file.json" # (optional, used for freshness checks)
-
-    # the platform that dbt is loading onto
-    target_platform: "postgres" # optional, eg "postgres", "snowflake", etc.
-
-    # whether to load schemas of datasets from dbt
-    # (otherwise, only includes a simple list of tables)
-    load_schemas: True or False
-
-    # Regex pattern to allow/deny nodes. If left blank, will ingest all.
-    node_type_pattern: # optional
-      deny:
-        - ^test.*
-      allow:
-        - ^.*
+    sources_path: "./path/dbt/sources_file.json"
+
+    target_platform: "my_target_platform_id"
+
+    load_schemas: True
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                     | Required | Default | Description                                                  |
+| ------------------------- | -------- | ------- | ------------------------------------------------------------ |
+| `manifest_path`           |          |         | See https://docs.getdbt.com/reference/artifacts/manifest-json |
+| `catalog_path`            |          |         | See https://docs.getdbt.com/reference/artifacts/catalog-json |
+| `sources_path`            |          |         | See https://docs.getdbt.com/reference/artifacts/sources-json |
+| `target_platform`         |          |         | The platform that dbt is loading onto                        |
+| `load_schemas`            |          |         |                                                              |
+| `node_type_pattern.allow` | ❌        |         |                                                              |
+| `node_type_pattern.deny`  | ❌        |         |                                                              |
+
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
 
 ## Questions
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 0643d84535054..770a1abb9a17e 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -21,54 +21,27 @@ Use the below recipe to get started with ingestion. See [below](#config-details)
 source:
   type: druid
   config:
-    # Point to broker address
-    host_port: localhost:8082
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        - "^(lookup|sys).*" # default, ignores internal Druid databases (see note below)
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
+    host_port: "localhost:8082"
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 78ae0bedad32a..b2070460abc64 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -26,13 +26,21 @@ Use the below recipe to get started with ingestion. See [below](#config-details)
 source:
   type: feast
   config:
-    core_url: localhost:6565 # default
-    env: "PROD" # Optional, default is "PROD"
-    use_local_build: False # Whether to build Feast ingestion image locally, default is False
+    core_url: "localhost:6565"
+    env: "PROD"
+    use_local_build: False
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field             | Required | Default            | Description |
+| ----------------- | -------- | ------------------ | ----------- |
+| `core_url`        |          | `"localhost:6565"` |             |
+| `env`             |          | `"PROD"`           |             |
+| `use_local_build` |          | `False`            |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index f7347d840d142..37da58e8cf8fb 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -23,6 +23,12 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field      | Required | Default | Description             |
+| ---------- | -------- | ------- | ----------------------- |
+| `filename` |          |         | Path to file to ingest. |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 1112266db9470..af9f22ac0cbe6 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -32,29 +32,26 @@ source:
     aws_secret_access_key: # Optional.
     aws_session_token: # Optional.
     aws_role: # Optional (Role chaining supported by using a sorted list).
-
-    extract_transforms: True # whether to ingest Glue jobs, defaults to True
-
-    # Regex filters for databases to scan
-    database_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_database"
-        - "junk_database"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_database"
-      allow:
-        - "good_database"
-        - "excellent_database"
-    table_pattern: # Optional, to filter tables scanned, same as table_pattern above.
-      deny:
-        # ...
-      allow:
-        # ...
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                    | Required | Default | Description |
+| ------------------------ | -------- | ------- | ----------- |
+| `aws_region`             |          |         |             |
+| `env`                    |          |         |             |
+| `aws_access_key_id`      |          |         |             |
+| `aws_secret_access_key`  |          |         |             |
+| `aws_session_token`      |          |         |             |
+| `aws_role`               |          |         |             |
+| `extract_transforms`     |          |         |             |
+| `database_pattern.allow` |          |         |             |
+| `database_pattern.deny`  |          |         |             |
+| `table_pattern.allow`    |          |         |             |
+| `table_pattern.deny`     |          |         |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index f19df2743a225..0e44be763a9a8 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -30,47 +30,6 @@ source:
     password: pass # optional
     host_port: localhost:10000
     database: DemoDatabase # optional, if not specified, ingests from all databases
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    include_tables: True # whether to include views, defaults to True
 ```
 
 <details>
@@ -96,6 +55,25 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |          |                                                                                                                                                                                         |
+| `password`             |          |          |                                                                                                                                                                                         |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `database`             |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index f82bf2a1747c9..6a68251cfc97a 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -12,6 +12,10 @@ This plugin extracts the following:
 - Creating individual `DataJobSnapshotClass` entity using `{connector_name}:{source_dataset}` naming
 - Lineage information between source database to Kafka topic
 
+Current limitations:
+
+- Currently works only for Debezium source connectors.
+
 ## Quickstart recipe
 
 Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
@@ -22,18 +26,18 @@ source:
   config:
     connect_uri: "http://localhost:8083"
     cluster_name: "connect-cluster"
-    connector_patterns:
-      deny:
-        - ^denied-connector.*
-      allow:
-        - ^allowed-connector.*
 ```
 
-Current limitations:
+## Config details
 
-- Currently works only for Debezium source connectors.
+Note that a `.` is used to denote nested fields in the YAML recipe.
 
-## Config details
+| Field                      | Required | Default | Description |
+| -------------------------- | -------- | ------- | ----------- |
+| `connect_uri`              |          |         |             |
+| `cluster_name`             |          |         |             |
+| `connector_patterns.deny`  |          |         |             |
+| `connector_patterns.allow` |          |         |             |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 88efcc99d9d7a..28a2bdaa730b5 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -43,6 +43,16 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                             | Required | Default | Description |
+| --------------------------------- | -------- | ------- | ----------- |
+| `bootstrap`                       |          |         |             |
+| `schema_registry_url`             |          |         |             |
+| `schema_registry_config.<option>` |          |         |             |
+| `consumer_config`                 |          |         |             |
+| `producer_config`                 |          |         |             |
+
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 0a4dfefe16a76..ae3dcea49b113 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -37,6 +37,18 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                          | Required | Default | Description |
+| ------------------------------ | -------- | ------- | ----------- |
+| `ldap_server`                  |          |         |             |
+| `ldap_user`                    |          |         |             |
+| `ldap_password`                |          |         |             |
+| `base_dn`                      |          |         |             |
+| `filter`                       |          |         |             |
+| `drop_missing_first_last_name` |          |         |             |
+| `page_size`                    |          |         |             |
+
 The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
 for devices or services should be excluded when they do not contain a first and last name. This will only
 impact the ingestion of LDAP users, while LDAP groups will be unaffected by this config option.
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 97d5e4184067b..5571a1a323ca8 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -47,6 +47,21 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                     | Required | Default | Description |
+| ------------------------- | -------- | ------- | ----------- |
+| `client_id`               |          |         |             |
+| `client_secret`           |          |         |             |
+| `base_url`                |          |         |             |
+| `platform_name`           |          |         |             |
+| `actor`                   |          |         |             |
+| `dashboard_pattern.allow` |          |         |             |
+| `dashboard_pattern.deny`  |          |         |             |
+| `chart_pattern.allow`     |          |         |             |
+| `chart_pattern.deny`      |          |         |             |
+| `env`                     |          |         |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 4b1aff7dfa34c..cf37f85b2cc9f 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -47,6 +47,20 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                                          | Required | Default | Description |
+| ---------------------------------------------- | -------- | ------- | ----------- |
+| `base_folder`                                  |          |         |             |
+| `connection_to_platform_map.<connection_name>` |          |         |             |
+| `platform_name`                                |          |         |             |
+| `model_pattern.allow`                          |          |         |             |
+| `model_pattern.deny`                           |          |         |             |
+| `view_pattern.allow`                           |          |         |             |
+| `view_pattern.deny`                            |          |         |             |
+| `env`                                          |          |         |             |
+| `parse_table_names_from_sql`                   |          |         |             |
+
 Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
 views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
 Looker supports, the result might not be correct. This parsing is disabled by default, but can be enabled by setting
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index d54257402eb41..e2c4e955dd820 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -55,6 +55,24 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                      | Required | Default | Description |
+| -------------------------- | -------- | ------- | ----------- |
+| `connect_uri`              |          |         |             |
+| `username`                 |          |         |             |
+| `password`                 |          |         |             |
+| `authMechanism`            |          |         |             |
+| `options`                  |          |         |             |
+| `enableSchemaInference`    |          |         |             |
+| `schemaSamplingSize`       |          |         |             |
+| `useRandomSampling`        |          |         |             |
+| `env`                      |          |         |             |
+| `database_pattern.allow`   |          |         |             |
+| `database_pattern.deny`    |          |         |             |
+| `collection_pattern.allow` |          |         |             |
+| `collection_pattern.deny`  |          |         |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index b9bd958556870..19ea948646c7a 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -25,56 +25,6 @@ source:
     password: pass
     host_port: localhost:1433
     database: DemoDatabase
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      charset: "utf8"
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "^.*\\.sys_.*" # deny all tables that start with sys_
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
-
-    # If set to true, we'll use the pyodbc library. This requires you to have
-    # already installed the Microsoft ODBC Driver for SQL Server.
-    # See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver15
-    use_odbc: False
-    # args URL-encode and append to the mssql connection URL
-    uri_args: {}
 ```
 
 <details>
@@ -107,6 +57,27 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |          |                                                                                                                                                                                         |
+| `password`             |          |          |                                                                                                                                                                                         |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `database`             |          |          |                                                                                                                                                                                         |
+| `use_odbc`             |          |          |                                                                                                                                                                                         |
+| `uri_args.<uri_arg>`   |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 28e846b97aba5..8800ae78da59f 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -24,51 +24,31 @@ source:
     database: dbname
     host_port: localhost:3306
 
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |          |                                                                                                                                                                                         |
+| `password`             |          |          |                                                                                                                                                                                         |
+| `database`             |          |          |                                                                                                                                                                                         |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index ba081d7a356d5..cf01c9548ffae 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -29,52 +29,30 @@ source:
     host_port: localhost:5432
     database: dbname
     service_name: svc # omit database if using this option
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |          |                                                                                                                                                                                         |
+| `password`             |          |          |                                                                                                                                                                                         |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `database`             |          |          |                                                                                                                                                                                         |
+| `service_name`         |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index fa5c9be08a056..49e6ea99ce523 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -26,52 +26,30 @@ source:
     host_port: localhost:5432
     database: DemoDatabase
     database_alias: DatabaseNameToBeIngested
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |          |                                                                                                                                                                                         |
+| `password`             |          |          |                                                                                                                                                                                         |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `database`             |          |          |                                                                                                                                                                                         |
+| `database_alias`       |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 75bc6ff990a9e..affa118682dbe 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -33,37 +33,6 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
@@ -90,6 +59,25 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |          |                                                                                                                                                                                         |
+| `password`             |          |          |                                                                                                                                                                                         |
+| `host_port`            |          |          |                                                                                                                                                                                         |
+| `database`             |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 315654e8044c7..5c8ab09042b4a 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -21,29 +21,31 @@ source:
   config:
     aws_region: # aws_region_name, i.e. "eu-west-1"
     env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
-
-    # Credentials. If not specified here, these are picked up according to boto3 rules.
-    # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
-    aws_access_key_id: # Optional.
-    aws_secret_access_key: # Optional.
-    aws_session_token: # Optional.
-    aws_role: # Optional (Role chaining supported by using a sorted list).
-
-    extract_feature_groups: True # if feature groups should be ingested, default True
-    extract_models: True # if models should be ingested, default True
-    extract_jobs: # if jobs should be ingested, default True for all
-      auto_ml: True
-      compilation: True
-      edge_packaging: True
-      hyper_parameter_tuning: True
-      labeling: True
-      processing: True
-      training: True
-      transform: True
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                                 | Required | Default | Description |
+| ------------------------------------- | -------- | ------- | ----------- |
+| `aws_region`                          |          |         |             |
+| `env`                                 |          |         |             |
+| `aws_access_key_id`                   |          |         |             |
+| `aws_secret_access_key`               |          |         |             |
+| `aws_session_token`                   |          |         |             |
+| `aws_role`                            |          |         |             |
+| `extract_feature_groups`              |          |         |             |
+| `extract_models`                      |          |         |             |
+| `extract_jobs.auto_ml`                |          |         |             |
+| `extract_jobs.compilation`            |          |         |             |
+| `extract_jobs.edge_packaging`         |          |         |             |
+| `extract_jobs.hyper_parameter_tuning` |          |         |             |
+| `extract_jobs.labeling`               |          |         |             |
+| `extract_jobs.processing`             |          |         |             |
+| `extract_jobs.training`               |          |         |             |
+| `extract_jobs.transform`              |          |         |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index e42c96ba2d574..35c6ae6626102 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -40,51 +40,34 @@ source:
     options:
       # driver_option: some-option
 
-    # Regex filters for databases to allow/deny. If left blank, will ingest all.
-    database_pattern:
-      # The escaping of the $ symbol helps us skip the environment variable substitution.
-      allow:
-        - ^MY_DEMO_DATA.*
-        - ^ANOTHER_DB_REGEX
-      deny:
-        - ^SNOWFLAKE\$
-        - ^SNOWFLAKE_SAMPLE_DATA\$
-
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
 
+## Config details
+
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                    | Required | Default  | Description                                                                                                                                                                             |
+| ------------------------ | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`               |          |          |                                                                                                                                                                                         |
+| `password`               |          |          |                                                                                                                                                                                         |
+| `host_port`              |          |          |                                                                                                                                                                                         |
+| `warehouse`              |          |          |                                                                                                                                                                                         |
+| `role`                   |          |          |                                                                                                                                                                                         |
+| `env`                    | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`       | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `database_pattern.allow` | ❌       |          | Regex pattern for databases to include in ingestion.                                                                                                                                    |
+| `database_pattern.deny`  | ❌       |          | Regex pattern for databases to exclude from ingestion.                                                                                                                                  |
+| `table_pattern.allow`    | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`     | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow`   | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`    | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`     | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`      | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`         | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`          | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 # Snowflake Usage Stats
 
 ## Setup
@@ -132,6 +115,11 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field | Required | Default | Description |
+| ----- | -------- | ------- | ----------- |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index cce4f8df14d35..bbb8d6ee7489a 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -52,23 +52,20 @@ source:
     # username, password, etc - varies by source type
     profiling:
       enabled: true
-      limit: 1000 # optional - max rows to profile
-      offset: 100 # optional - offset of first row to profile
-    profile_pattern:
-      deny:
-        # Skip all tables ending with "_staging"
-        - _staging\$
-      allow:
-        # Profile all tables in that start with "gold_" in "myschema"
-        - myschema\.gold_.*
-
-    # If you only want profiles (but no catalog information), set these to false
-    include_tables: true
-    include_views: true
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                   | Required | Default | Description |
+| ----------------------- | -------- | ------- | ----------- |
+| `profiling.enabled`     |          |         |             |
+| `profiling.limit`       |          |         |             |
+| `profiling.offset`      |          |         |             |
+| `profile_pattern.allow` |          |         |             |
+| `profile_pattern.deny`  |          |         |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index c47a1907de2fe..bb61d46d9ae44 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -34,43 +34,28 @@ source:
     options:
       # driver_option: some-option
 
-    # Tables to allow/deny. If left blank, will ingest all.
-    table_pattern:
-      deny:
-        # Note that the deny patterns take precedence over the allow patterns.
-        - "bad_table"
-        - "junk_table"
-        # Can also be a regular expression
-        - "(old|used|deprecated)_table"
-      allow:
-        - "good_table"
-        - "excellent_table"
-
-    # Although the 'table_pattern' enables you to skip everything from certain schemas,
-    # having another option to allow/deny on schema level is an optimization for the case when there is a large number
-    # of schemas that one wants to skip and you want to avoid the time to needlessly fetch those tables only to filter
-    # them out afterwards via the table_pattern.
-
-    # If left blank, will ingest all.
-    schema_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Same format as table_pattern, used for filtering views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
 ```
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                  | Required | Default  | Description                                                                                                                                                                             |
+| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `connect_uri`          |          |          |                                                                                                                                                                                         |
+| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 47c21001d3b91..0436847fe38df 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -1,3 +1,5 @@
+
+
 # Superset
 
 ## Setup
@@ -31,6 +33,16 @@ source:
 
 ## Config details
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field         | Required | Default | Description |
+| ------------- | -------- | ------- | ----------- |
+| `connect_uri` |          |         |             |
+| `username`    |          |         |             |
+| `password`    |          |         |             |
+| `provider`    |          |         |             |
+| `env`         |          |         |             |
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!

From 3dbb73651eb620f74df52a48d81b68d260c85aeb Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 3 Aug 2021 18:16:56 -0400
Subject: [PATCH 21/33] Up to looker

---
 metadata-ingestion/source_docs/athena.md      | 25 ++++++++--------
 metadata-ingestion/source_docs/bigquery.md    | 22 +++++++-------
 metadata-ingestion/source_docs/dbt.md         | 19 ++++++------
 metadata-ingestion/source_docs/druid.md       | 30 +++++++++++--------
 metadata-ingestion/source_docs/feast.md       | 10 +++----
 metadata-ingestion/source_docs/file.md        |  2 +-
 metadata-ingestion/source_docs/glue.md        | 26 ++++++++--------
 metadata-ingestion/source_docs/hive.md        | 28 ++++++++---------
 .../source_docs/kafka-connect.md              | 15 ++++++----
 metadata-ingestion/source_docs/kafka.md       | 27 ++++++-----------
 metadata-ingestion/source_docs/ldap.md        | 18 +++++------
 metadata-ingestion/source_docs/looker.md      | 24 +++++++--------
 metadata-ingestion/source_docs/lookml.md      | 22 +++++++-------
 metadata-ingestion/source_docs/mssql.md       | 20 ++++++-------
 metadata-ingestion/source_docs/mysql.md       | 20 ++++++-------
 metadata-ingestion/source_docs/oracle.md      | 20 ++++++-------
 metadata-ingestion/source_docs/postgres.md    | 20 ++++++-------
 metadata-ingestion/source_docs/redshift.md    | 20 ++++++-------
 metadata-ingestion/source_docs/snowflake.md   | 24 +++++++--------
 metadata-ingestion/source_docs/sqlalchemy.md  | 20 ++++++-------
 20 files changed, 205 insertions(+), 207 deletions(-)

diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index e77d0e15c8f1b..5b2bd8765d7c9 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -40,22 +40,21 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default      | Description                                                                                                                                                                                                |
 | ---------------------- | -------- | ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             | ❌       | Autodetected | Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html                                                       |
-| `password`             | ❌       | Autodetected | Same detection scheme as `username`                                                                                                                                                                        |
-| `database`             | ❌       | Autodetected |                                                                                                                                                                                                            |
+| `username`             |        | Autodetected | Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html                                                       |
+| `password`             |        | Autodetected | Same detection scheme as `username`                                                                                                                                                                        |
+| `database`             |        | Autodetected |                                                                                                                                                                                                            |
 | `aws_region`           | ✅       |              |                                                                                                                                                                                                            |
 | `s3_staging_dir`       | ✅       |              | Of format `"s3://<bucket-name>/prefix/"`. The `s3_staging_dir` parameter is needed because Athena always writes query results to S3. <br />See https://docs.aws.amazon.com/athena/latest/ug/querying.html. |
 | `work_group`           | ✅       |              | Name of Athena workgroup. <br />See https://docs.aws.amazon.com/athena/latest/ug/manage-queries-control-costs-with-workgroups.html.                                                                        |
-| `env`                  | ❌       | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                                    |
-| `options.<option>`     | ❌       |              | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                    |
-| `table_pattern.allow`  | ❌       |              | Regex pattern for tables to include in ingestion.                                                                                                                                                          |
-| `table_pattern.deny`   | ❌       |              | Regex pattern for tables to exclude from ingestion.                                                                                                                                                        |
-| `schema_pattern.allow` | ❌       |              | Regex pattern for schemas to include in ingestion.                                                                                                                                                         |
-| `schema_pattern.deny`  | ❌       |              | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                       |
-| `view_pattern.allow`   | ❌       |              | Regex pattern for views to include in ingestion.                                                                                                                                                           |
-| `view_pattern.deny`    | ❌       |              | Regex pattern for views to exclude from ingestion.                                                                                                                                                         |
-| `include_tables`       | ❌       | `True`       | Whether tables should be ingested.                                                                                                                                                                         |
-| `include_views`        | ❌       | `True`       | Whether views should be ingested.                                                                                                                                                                          |
+| `env`                  |        | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                                    |
+| `options.<option>`     |        |              | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                    |
+| `table_pattern.allow`  |        |              | Regex pattern for tables to include in ingestion.                                                                                                                                                          |
+| `table_pattern.deny`   |        |              | Regex pattern for tables to exclude from ingestion.                                                                                                                                                        |
+| `schema_pattern.allow` |        |              | Regex pattern for schemas to include in ingestion.                                                                                                                                                         |
+| `schema_pattern.deny`  |        |              | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                       |
+| `view_pattern.allow`   |        |              | Regex pattern for views to include in ingestion.                                                                                                                                                           |
+| `view_pattern.deny`    |        |              | Regex pattern for views to exclude from ingestion.                                                                                                                                                         |
+| `include_tables`       |        | `True`       | Whether tables should be ingested.                                                                                                                                                                         |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 9174db793b4d1..1010a00ad1076 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -34,17 +34,17 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `project_id`           |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `project_id`           |  | Autodetected | Project ID to ingest from. If not specified, will infer from environment. |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 # BigQuery Usage Stats
 
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index d0df253da3507..626b9d0aab624 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -46,15 +46,16 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                     | Required | Default | Description                                                  |
-| ------------------------- | -------- | ------- | ------------------------------------------------------------ |
-| `manifest_path`           |          |         | See https://docs.getdbt.com/reference/artifacts/manifest-json |
-| `catalog_path`            |          |         | See https://docs.getdbt.com/reference/artifacts/catalog-json |
-| `sources_path`            |          |         | See https://docs.getdbt.com/reference/artifacts/sources-json |
-| `target_platform`         |          |         | The platform that dbt is loading onto                        |
-| `load_schemas`            |          |         |                                                              |
-| `node_type_pattern.allow` | ❌        |         |                                                              |
-| `node_type_pattern.deny`  | ❌        |         |                                                              |
+| Field                     | Required | Default  | Description                                                  |
+| ------------------------- | -------- | -------- | ------------------------------------------------------------ |
+| `manifest_path`           | ✅        |          | Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json |
+| `catalog_path`            | ✅        |          | Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json |
+| `sources_path`            |          |          | Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. |
+| `env`                     |          | `"PROD"` | Environment to use in namespace when constructing URNs.      |
+| `target_platform`         | ✅        |          | The platform that dbt is loading onto.                       |
+| `load_schemas`            | ✅        |          | Whether to load database schemas. If set to `False`, table schema details (e.g. columns) will not be ingested. |
+| `node_type_pattern.allow` |          |          | Regex pattern for dbt nodes to include in ingestion.         |
+| `node_type_pattern.deny`  |          |          | Regex pattern for dbt nodes to exclude from ingestion.       |
 
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
 
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 770a1abb9a17e..fe3358048353f 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -28,19 +28,23 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                  | Required | Default  | Description                                                                                                                                                                             |
-| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| Field                  | Required | Default                 | Description                                                                                                                                                                             |
+| ---------------------- | -------- | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |  |  | Database username. |
+| `password`             |  |                         | Database password. |
+| `host_port`            | ✅ |                         | Host URL and port to connect to. |
+| `database`             |  |  | Database to ingest. |
+| `database_alias`       |  |                         | Alias to apply to database when ingesting. |
+| `env`                  |        | `"PROD"`                | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |                         | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |                         | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |                         | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |                         | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        | `"^(lookup \| sys).\*"` | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |                         | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |                         | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`                  | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`                  | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index b2070460abc64..5007bd1aeec0c 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -35,11 +35,11 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field             | Required | Default            | Description |
-| ----------------- | -------- | ------------------ | ----------- |
-| `core_url`        |          | `"localhost:6565"` |             |
-| `env`             |          | `"PROD"`           |             |
-| `use_local_build` |          | `False`            |             |
+| Field             | Required | Default            | Description                                             |
+| ----------------- | -------- | ------------------ | ------------------------------------------------------- |
+| `core_url`        |          | `"localhost:6565"` | URL of Feast Core instance.                             |
+| `env`             |          | `"PROD"`           | Environment to use in namespace when constructing URNs. |
+| `use_local_build` |          | `False`            | Whether to build Feast ingestion Docker image locally.  |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 37da58e8cf8fb..286e62e2984ed 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -27,7 +27,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field      | Required | Default | Description             |
 | ---------- | -------- | ------- | ----------------------- |
-| `filename` |          |         | Path to file to ingest. |
+| `filename` | ✅        |         | Path to file to ingest. |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index af9f22ac0cbe6..9bea43ee0f751 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -38,19 +38,19 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                    | Required | Default | Description |
-| ------------------------ | -------- | ------- | ----------- |
-| `aws_region`             |          |         |             |
-| `env`                    |          |         |             |
-| `aws_access_key_id`      |          |         |             |
-| `aws_secret_access_key`  |          |         |             |
-| `aws_session_token`      |          |         |             |
-| `aws_role`               |          |         |             |
-| `extract_transforms`     |          |         |             |
-| `database_pattern.allow` |          |         |             |
-| `database_pattern.deny`  |          |         |             |
-| `table_pattern.allow`    |          |         |             |
-| `table_pattern.deny`     |          |         |             |
+| Field                    | Required | Default      | Description                                                  |
+| ------------------------ | -------- | ------------ | ------------------------------------------------------------ |
+| `aws_region`             | ✅        |              |                                                              |
+| `env`                    |          | `"PROD"`     |                                                              |
+| `aws_access_key_id`      |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `aws_secret_access_key`  |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `aws_session_token`      |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `aws_role`               |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `extract_transforms`     |          | `True`       | Whether to extract Glue transform jobs.                      |
+| `database_pattern.allow` |          |              | Regex pattern for databases to include in ingestion.         |
+| `database_pattern.deny`  |          |              | Regex pattern for databases to exclude from ingestion.       |
+| `table_pattern.allow`    |          |              | Regex pattern for tables to include in ingestion.            |
+| `table_pattern.deny`     |          |              | Regex pattern for tables to exclude from ingestion.          |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 0e44be763a9a8..dffaff460adfd 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -59,20 +59,20 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          |                                                                                                                                                                                         |
-| `password`             |          |          |                                                                                                                                                                                         |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `database`             |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `username`             |          |          | Database username. |
+| `password`             |          |          | Database password. |
+| `host_port`            | ✅ |          | Host URL and port to connect to. |
+| `database`             |          |          | Database to ingest. |
+| `database_alias` | | | Alias to apply to database when ingesting. |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index 6a68251cfc97a..b4a222dae9f34 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -32,12 +32,15 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                      | Required | Default | Description |
-| -------------------------- | -------- | ------- | ----------- |
-| `connect_uri`              |          |         |             |
-| `cluster_name`             |          |         |             |
-| `connector_patterns.deny`  |          |         |             |
-| `connector_patterns.allow` |          |         |             |
+| Field                      | Required | Default                    | Description                                             |
+| -------------------------- | -------- | -------------------------- | ------------------------------------------------------- |
+| `connect_uri`              |          | `"http://localhost:8083/"` | URI to connect to.                                      |
+| `username`                 |          |                            | Kafka Connect username.                                 |
+| `password`                 |          |                            | Kafka Connect password.                                 |
+| `cluster_name`             |          | `"connect-cluster"`        | Cluster to ingest from.                                 |
+| `connector_patterns.deny`  |          |                            | Regex pattern for connectors to include in ingestion.   |
+| `connector_patterns.allow` |          |                            | Regex pattern for connectors to exclude from ingestion. |
+| `env`                      |          | `"PROD"`                   | Environment to use in namespace when constructing URNs. |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 28a2bdaa730b5..413d70d1ed578 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -23,21 +23,10 @@ source:
       bootstrap: "broker:9092"
       schema_registry_url: http://localhost:8081
 
-      # Extra schema registry config.
-      # These options will be passed into Kafka's SchemaRegistryClient.
-      # See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html?#schemaregistryclient
       schema_registry_config: {}
 
-      # Extra consumer config.
-      # These options will be passed into Kafka's DeserializingConsumer.
-      # See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#deserializingconsumer
-      # and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md.
       consumer_config: {}
 
-      # Extra producer config.
-      # These options will be passed into Kafka's SerializingProducer.
-      # See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#serializingproducer
-      # and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md.
       producer_config: {}
 ```
 
@@ -45,13 +34,15 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                             | Required | Default | Description |
-| --------------------------------- | -------- | ------- | ----------- |
-| `bootstrap`                       |          |         |             |
-| `schema_registry_url`             |          |         |             |
-| `schema_registry_config.<option>` |          |         |             |
-| `consumer_config`                 |          |         |             |
-| `producer_config`                 |          |         |             |
+| Field                                        | Required | Default                  | Description                                                  |
+| -------------------------------------------- | -------- | ------------------------ | ------------------------------------------------------------ |
+| `conection.bootstrap`                        |          | `"localhost:9092"`       | Bootstrap servers.                                           |
+| `connection.schema_registry_url`             |          | `http://localhost:8081"` | Schema registry location.                                    |
+| `connection.schema_registry_config.<option>` |          |                          | Extra schema registry config. These options will be passed into Kafka's SchemaRegistryClient. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html?#schemaregistryclient. |
+| `connection.consumer_config.<option>`        |          |                          | Extra consumer config. These options will be passed into Kafka's DeserializingConsumer. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#deserializingconsumer and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md. |
+| `connection.producer_config.<option>`        |          |                          | Extra producer config. These options will be passed into Kafka's SerializingProducer. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#serializingproducer and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md. |
+| `topic_patterns.allow`                       |          |                          | Regex pattern for topics to include in ingestion.            |
+| `topic_patterns.deny`                        |          |                          | Regex pattern for topics to exclude from ingestion.          |
 
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index ae3dcea49b113..8941aa5fcf4b2 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -39,15 +39,15 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                          | Required | Default | Description |
-| ------------------------------ | -------- | ------- | ----------- |
-| `ldap_server`                  |          |         |             |
-| `ldap_user`                    |          |         |             |
-| `ldap_password`                |          |         |             |
-| `base_dn`                      |          |         |             |
-| `filter`                       |          |         |             |
-| `drop_missing_first_last_name` |          |         |             |
-| `page_size`                    |          |         |             |
+| Field                          | Required | Default             | Description                                                  |
+| ------------------------------ | -------- | ------------------- | ------------------------------------------------------------ |
+| `ldap_server`                  | ✅        |                     | LDAP server URL.                                             |
+| `ldap_user`                    | ✅        |                     | LDAP user.                                                   |
+| `ldap_password`                | ✅        |                     | LDAP password.                                               |
+| `base_dn`                      | ✅        |                     | LDAP DN.                                                     |
+| `filter`                       |          | `"(objectClass=*)"` | LDAP extractor filter.                                       |
+| `drop_missing_first_last_name` |          | `True`              | If set to true, any users without first and last names will be dropped. |
+| `page_size`                    |          | `20`                | Size of each page to fetch when extracting metadata.         |
 
 The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
 for devices or services should be excluded when they do not contain a first and last name. This will only
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 5571a1a323ca8..b9c9527f303bf 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -49,18 +49,18 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                     | Required | Default | Description |
-| ------------------------- | -------- | ------- | ----------- |
-| `client_id`               |          |         |             |
-| `client_secret`           |          |         |             |
-| `base_url`                |          |         |             |
-| `platform_name`           |          |         |             |
-| `actor`                   |          |         |             |
-| `dashboard_pattern.allow` |          |         |             |
-| `dashboard_pattern.deny`  |          |         |             |
-| `chart_pattern.allow`     |          |         |             |
-| `chart_pattern.deny`      |          |         |             |
-| `env`                     |          |         |             |
+| Field                     | Required | Default                 | Description                                                  |
+| ------------------------- | -------- | ----------------------- | ------------------------------------------------------------ |
+| `client_id`               | ✅        |                         | Looker API3 client ID.                                       |
+| `client_secret`           | ✅        |                         | Looker API3 client secret.                                   |
+| `base_url`                | ✅        |                         | Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. |
+| `platform_name`           |          | `"looker"`              | Platform to use in namespace when constructing URNs.         |
+| `actor`                   |          | `"urn:li:corpuser:etl"` | Actor to use in ownership properties of ingested metadata.   |
+| `dashboard_pattern.allow` |          |                         | Regex pattern for dashboards to include in ingestion.        |
+| `dashboard_pattern.deny`  |          |                         | Regex pattern for dashboards to exclude from ingestion.      |
+| `chart_pattern.allow`     |          |                         | Regex pattern for charts to include in ingestion.            |
+| `chart_pattern.deny`      |          |                         | Regex pattern for charts to exclude from ingestion.          |
+| `env`                     |          | `"PROD"`                | Environment to use in namespace when constructing URNs.      |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index cf37f85b2cc9f..45798fbdeabdf 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -49,17 +49,17 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                                          | Required | Default | Description |
-| ---------------------------------------------- | -------- | ------- | ----------- |
-| `base_folder`                                  |          |         |             |
-| `connection_to_platform_map.<connection_name>` |          |         |             |
-| `platform_name`                                |          |         |             |
-| `model_pattern.allow`                          |          |         |             |
-| `model_pattern.deny`                           |          |         |             |
-| `view_pattern.allow`                           |          |         |             |
-| `view_pattern.deny`                            |          |         |             |
-| `env`                                          |          |         |             |
-| `parse_table_names_from_sql`                   |          |         |             |
+| Field                                          | Required | Default    | Description                                                  |
+| ---------------------------------------------- | -------- | ---------- | ------------------------------------------------------------ |
+| `base_folder`                                  | ✅        |            | Where the `*.model.lkml` and `*.view.lkml` files are stored. |
+| `connection_to_platform_map.<connection_name>` | ✅        |            | Mappings between connection names in the model files to platform names. |
+| `platform_name`                                |          | `"looker"` | Platform to use in namespace when constructing URNs.         |
+| `model_pattern.allow`                          |          |            | Regex pattern for models to include in ingestion.            |
+| `model_pattern.deny`                           |          |            | Regex pattern for models to exclude from ingestion.          |
+| `view_pattern.allow`                           |          |            | Regex pattern for views to include in ingestion.             |
+| `view_pattern.deny`                            |          |            | Regex pattern for views to exclude from ingestion.           |
+| `env`                                          |          | `"PROD"`   | Environment to use in namespace when constructing URNs.      |
+| `parse_table_names_from_sql`                   |          | `False`    | See note below.                                              |
 
 Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
 views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 19ea948646c7a..c870f4a9f1434 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -67,16 +67,16 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `database`             |          |          |                                                                                                                                                                                         |
 | `use_odbc`             |          |          |                                                                                                                                                                                         |
 | `uri_args.<uri_arg>`   |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 8800ae78da59f..ba3cde25fe40e 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -38,16 +38,16 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `password`             |          |          |                                                                                                                                                                                         |
 | `database`             |          |          |                                                                                                                                                                                         |
 | `host_port`            |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index cf01c9548ffae..36172b2cbf291 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -42,16 +42,16 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `host_port`            |          |          |                                                                                                                                                                                         |
 | `database`             |          |          |                                                                                                                                                                                         |
 | `service_name`         |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 49e6ea99ce523..057265c1b9459 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -39,16 +39,16 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `host_port`            |          |          |                                                                                                                                                                                         |
 | `database`             |          |          |                                                                                                                                                                                         |
 | `database_alias`       |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index affa118682dbe..431ca7020532c 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -67,16 +67,16 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `password`             |          |          |                                                                                                                                                                                         |
 | `host_port`            |          |          |                                                                                                                                                                                         |
 | `database`             |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 35c6ae6626102..41fde703899eb 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -55,18 +55,18 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `host_port`              |          |          |                                                                                                                                                                                         |
 | `warehouse`              |          |          |                                                                                                                                                                                         |
 | `role`                   |          |          |                                                                                                                                                                                         |
-| `env`                    | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`       | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `database_pattern.allow` | ❌       |          | Regex pattern for databases to include in ingestion.                                                                                                                                    |
-| `database_pattern.deny`  | ❌       |          | Regex pattern for databases to exclude from ingestion.                                                                                                                                  |
-| `table_pattern.allow`    | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`     | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow`   | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`    | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`     | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`      | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`         | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`          | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                    |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`       |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `database_pattern.allow` |        |          | Regex pattern for databases to include in ingestion.                                                                                                                                    |
+| `database_pattern.deny`  |        |          | Regex pattern for databases to exclude from ingestion.                                                                                                                                  |
+| `table_pattern.allow`    |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`     |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow`   |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`    |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`     |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`      |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`         |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`          |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 # Snowflake Usage Stats
 
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index bb61d46d9ae44..aed56c8598ef2 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -45,16 +45,16 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `connect_uri`          |          |          |                                                                                                                                                                                         |
-| `env`                  | ❌       | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     | ❌       |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  | ❌       |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   | ❌       |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` | ❌       |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  | ❌       |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   | ❌       |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    | ❌       |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       | ❌       | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        | ❌       | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 

From 186235f0a722134a5c65b15f6bf4ab2ebd41a8c1 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 3 Aug 2021 20:56:02 -0400
Subject: [PATCH 22/33] Add all config vars

---
 metadata-ingestion/source_docs/athena.md      | 24 ++++++------
 metadata-ingestion/source_docs/bigquery.md    | 26 ++++++-------
 metadata-ingestion/source_docs/dbt.md         | 18 ++++-----
 metadata-ingestion/source_docs/druid.md       | 30 +++++++--------
 metadata-ingestion/source_docs/file.md        |  2 +-
 metadata-ingestion/source_docs/glue.md        | 18 ++++-----
 metadata-ingestion/source_docs/hive.md        | 28 +++++++-------
 metadata-ingestion/source_docs/kafka.md       | 16 ++++----
 metadata-ingestion/source_docs/ldap.md        | 16 ++++----
 metadata-ingestion/source_docs/looker.md      | 24 ++++++------
 metadata-ingestion/source_docs/lookml.md      | 22 +++++------
 metadata-ingestion/source_docs/mongodb.md     | 30 +++++++--------
 metadata-ingestion/source_docs/mssql.md       | 37 +++++++++---------
 metadata-ingestion/source_docs/mysql.md       | 33 ++++++++--------
 metadata-ingestion/source_docs/oracle.md      | 33 ++++++++--------
 metadata-ingestion/source_docs/postgres.md    | 30 +++++++--------
 metadata-ingestion/source_docs/redshift.md    | 29 +++++++-------
 metadata-ingestion/source_docs/sagemaker.md   | 36 +++++++++---------
 metadata-ingestion/source_docs/snowflake.md   | 38 +++++++++----------
 .../source_docs/sql_profiles.md               | 14 +++----
 metadata-ingestion/source_docs/sqlalchemy.md  | 23 +++++------
 metadata-ingestion/source_docs/superset.md    | 16 ++++----
 22 files changed, 274 insertions(+), 269 deletions(-)

diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 5b2bd8765d7c9..78ba0f7258f61 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -40,21 +40,21 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default      | Description                                                                                                                                                                                                |
 | ---------------------- | -------- | ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |        | Autodetected | Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html                                                       |
-| `password`             |        | Autodetected | Same detection scheme as `username`                                                                                                                                                                        |
-| `database`             |        | Autodetected |                                                                                                                                                                                                            |
+| `username`             |          | Autodetected | Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html                                                       |
+| `password`             |          | Autodetected | Same detection scheme as `username`                                                                                                                                                                        |
+| `database`             |          | Autodetected |                                                                                                                                                                                                            |
 | `aws_region`           | ✅       |              |                                                                                                                                                                                                            |
 | `s3_staging_dir`       | ✅       |              | Of format `"s3://<bucket-name>/prefix/"`. The `s3_staging_dir` parameter is needed because Athena always writes query results to S3. <br />See https://docs.aws.amazon.com/athena/latest/ug/querying.html. |
 | `work_group`           | ✅       |              | Name of Athena workgroup. <br />See https://docs.aws.amazon.com/athena/latest/ug/manage-queries-control-costs-with-workgroups.html.                                                                        |
-| `env`                  |        | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                                    |
-| `options.<option>`     |        |              | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                    |
-| `table_pattern.allow`  |        |              | Regex pattern for tables to include in ingestion.                                                                                                                                                          |
-| `table_pattern.deny`   |        |              | Regex pattern for tables to exclude from ingestion.                                                                                                                                                        |
-| `schema_pattern.allow` |        |              | Regex pattern for schemas to include in ingestion.                                                                                                                                                         |
-| `schema_pattern.deny`  |        |              | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                       |
-| `view_pattern.allow`   |        |              | Regex pattern for views to include in ingestion.                                                                                                                                                           |
-| `view_pattern.deny`    |        |              | Regex pattern for views to exclude from ingestion.                                                                                                                                                         |
-| `include_tables`       |        | `True`       | Whether tables should be ingested.                                                                                                                                                                         |
+| `env`                  |          | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                                    |
+| `options.<option>`     |          |              | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                    |
+| `table_pattern.allow`  |          |              | Regex pattern for tables to include in ingestion.                                                                                                                                                          |
+| `table_pattern.deny`   |          |              | Regex pattern for tables to exclude from ingestion.                                                                                                                                                        |
+| `schema_pattern.allow` |          |              | Regex pattern for schemas to include in ingestion.                                                                                                                                                         |
+| `schema_pattern.deny`  |          |              | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                       |
+| `view_pattern.allow`   |          |              | Regex pattern for views to include in ingestion.                                                                                                                                                           |
+| `view_pattern.deny`    |          |              | Regex pattern for views to exclude from ingestion.                                                                                                                                                         |
+| `include_tables`       |          | `True`       | Whether tables should be ingested.                                                                                                                                                                         |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 1010a00ad1076..77d234cce31d3 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -32,19 +32,19 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                  | Required | Default  | Description                                                                                                                                                                             |
-| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `project_id`           |  | Autodetected | Project ID to ingest from. If not specified, will infer from environment. |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| Field                  | Required | Default      | Description                                                                                                                                                                             |
+| ---------------------- | -------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `project_id`           |          | Autodetected | Project ID to ingest from. If not specified, will infer from environment.                                                                                                               |
+| `env`                  |          | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |              | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |              | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |              | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |              | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |              | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |              | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |              | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`       | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`       | Whether views should be ingested.                                                                                                                                                       |
 
 # BigQuery Usage Stats
 
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 626b9d0aab624..07b950a2bf271 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -46,16 +46,16 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                     | Required | Default  | Description                                                  |
-| ------------------------- | -------- | -------- | ------------------------------------------------------------ |
-| `manifest_path`           | ✅        |          | Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json |
-| `catalog_path`            | ✅        |          | Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json |
+| Field                     | Required | Default  | Description                                                                                                                                           |
+| ------------------------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `manifest_path`           | ✅       |          | Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json                                                              |
+| `catalog_path`            | ✅       |          | Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json                                                                |
 | `sources_path`            |          |          | Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. |
-| `env`                     |          | `"PROD"` | Environment to use in namespace when constructing URNs.      |
-| `target_platform`         | ✅        |          | The platform that dbt is loading onto.                       |
-| `load_schemas`            | ✅        |          | Whether to load database schemas. If set to `False`, table schema details (e.g. columns) will not be ingested. |
-| `node_type_pattern.allow` |          |          | Regex pattern for dbt nodes to include in ingestion.         |
-| `node_type_pattern.deny`  |          |          | Regex pattern for dbt nodes to exclude from ingestion.       |
+| `env`                     |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                               |
+| `target_platform`         | ✅       |          | The platform that dbt is loading onto.                                                                                                                |
+| `load_schemas`            | ✅       |          | Whether to load database schemas. If set to `False`, table schema details (e.g. columns) will not be ingested.                                        |
+| `node_type_pattern.allow` |          |          | Regex pattern for dbt nodes to include in ingestion.                                                                                                  |
+| `node_type_pattern.deny`  |          |          | Regex pattern for dbt nodes to exclude from ingestion.                                                                                                |
 
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
 
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index fe3358048353f..4d08fb6676e8f 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -30,21 +30,21 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default                 | Description                                                                                                                                                                             |
 | ---------------------- | -------- | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |  |  | Database username. |
-| `password`             |  |                         | Database password. |
-| `host_port`            | ✅ |                         | Host URL and port to connect to. |
-| `database`             |  |  | Database to ingest. |
-| `database_alias`       |  |                         | Alias to apply to database when ingesting. |
-| `env`                  |        | `"PROD"`                | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |                         | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |                         | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |                         | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |                         | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        | `"^(lookup \| sys).\*"` | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |                         | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |                         | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`                  | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`                  | Whether views should be ingested.                                                                                                                                                       |
+| `username`             |          |                         | Database username.                                                                                                                                                                      |
+| `password`             |          |                         | Database password.                                                                                                                                                                      |
+| `host_port`            | ✅       |                         | Host URL and port to connect to.                                                                                                                                                        |
+| `database`             |          |                         | Database to ingest.                                                                                                                                                                     |
+| `database_alias`       |          |                         | Alias to apply to database when ingesting.                                                                                                                                              |
+| `env`                  |          | `"PROD"`                | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |                         | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |                         | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |                         | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |                         | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          | `"^(lookup \| sys).\*"` | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |                         | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |                         | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`                  | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`                  | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 286e62e2984ed..41a4e53a05494 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -27,7 +27,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field      | Required | Default | Description             |
 | ---------- | -------- | ------- | ----------------------- |
-| `filename` | ✅        |         | Path to file to ingest. |
+| `filename` | ✅       |         | Path to file to ingest. |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 9bea43ee0f751..4d5dd35b8b6ec 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -38,19 +38,19 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                    | Required | Default      | Description                                                  |
-| ------------------------ | -------- | ------------ | ------------------------------------------------------------ |
-| `aws_region`             | ✅        |              |                                                              |
-| `env`                    |          | `"PROD"`     |                                                              |
+| Field                    | Required | Default      | Description                                                                        |
+| ------------------------ | -------- | ------------ | ---------------------------------------------------------------------------------- |
+| `aws_region`             | ✅       |              |                                                                                    |
+| `env`                    |          | `"PROD"`     |                                                                                    |
 | `aws_access_key_id`      |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_secret_access_key`  |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_session_token`      |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_role`               |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
-| `extract_transforms`     |          | `True`       | Whether to extract Glue transform jobs.                      |
-| `database_pattern.allow` |          |              | Regex pattern for databases to include in ingestion.         |
-| `database_pattern.deny`  |          |              | Regex pattern for databases to exclude from ingestion.       |
-| `table_pattern.allow`    |          |              | Regex pattern for tables to include in ingestion.            |
-| `table_pattern.deny`     |          |              | Regex pattern for tables to exclude from ingestion.          |
+| `extract_transforms`     |          | `True`       | Whether to extract Glue transform jobs.                                            |
+| `database_pattern.allow` |          |              | Regex pattern for databases to include in ingestion.                               |
+| `database_pattern.deny`  |          |              | Regex pattern for databases to exclude from ingestion.                             |
+| `table_pattern.allow`    |          |              | Regex pattern for tables to include in ingestion.                                  |
+| `table_pattern.deny`     |          |              | Regex pattern for tables to exclude from ingestion.                                |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index dffaff460adfd..72c231d3842cd 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -59,20 +59,20 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          | Database username. |
-| `password`             |          |          | Database password. |
-| `host_port`            | ✅ |          | Host URL and port to connect to. |
-| `database`             |          |          | Database to ingest. |
-| `database_alias` | | | Alias to apply to database when ingesting. |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `username`             |          |          | Database username.                                                                                                                                                                      |
+| `password`             |          |          | Database password.                                                                                                                                                                      |
+| `host_port`            | ✅       |          | Host URL and port to connect to.                                                                                                                                                        |
+| `database`             |          |          | Database to ingest.                                                                                                                                                                     |
+| `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
+| `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 413d70d1ed578..712adec09559b 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -34,15 +34,15 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                                        | Required | Default                  | Description                                                  |
-| -------------------------------------------- | -------- | ------------------------ | ------------------------------------------------------------ |
-| `conection.bootstrap`                        |          | `"localhost:9092"`       | Bootstrap servers.                                           |
-| `connection.schema_registry_url`             |          | `http://localhost:8081"` | Schema registry location.                                    |
-| `connection.schema_registry_config.<option>` |          |                          | Extra schema registry config. These options will be passed into Kafka's SchemaRegistryClient. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html?#schemaregistryclient. |
+| Field                                        | Required | Default                  | Description                                                                                                                                                                                                                                                                          |
+| -------------------------------------------- | -------- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `conection.bootstrap`                        |          | `"localhost:9092"`       | Bootstrap servers.                                                                                                                                                                                                                                                                   |
+| `connection.schema_registry_url`             |          | `http://localhost:8081"` | Schema registry location.                                                                                                                                                                                                                                                            |
+| `connection.schema_registry_config.<option>` |          |                          | Extra schema registry config. These options will be passed into Kafka's SchemaRegistryClient. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html?#schemaregistryclient.                                                                   |
 | `connection.consumer_config.<option>`        |          |                          | Extra consumer config. These options will be passed into Kafka's DeserializingConsumer. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#deserializingconsumer and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md. |
-| `connection.producer_config.<option>`        |          |                          | Extra producer config. These options will be passed into Kafka's SerializingProducer. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#serializingproducer and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md. |
-| `topic_patterns.allow`                       |          |                          | Regex pattern for topics to include in ingestion.            |
-| `topic_patterns.deny`                        |          |                          | Regex pattern for topics to exclude from ingestion.          |
+| `connection.producer_config.<option>`        |          |                          | Extra producer config. These options will be passed into Kafka's SerializingProducer. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#serializingproducer and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md.     |
+| `topic_patterns.allow`                       |          |                          | Regex pattern for topics to include in ingestion.                                                                                                                                                                                                                                    |
+| `topic_patterns.deny`                        |          |                          | Regex pattern for topics to exclude from ingestion.                                                                                                                                                                                                                                  |
 
 The options in the consumer config and schema registry config are passed to the Kafka DeserializingConsumer and SchemaRegistryClient respectively.
 
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 8941aa5fcf4b2..f861c67c1cd0a 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -39,15 +39,15 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                          | Required | Default             | Description                                                  |
-| ------------------------------ | -------- | ------------------- | ------------------------------------------------------------ |
-| `ldap_server`                  | ✅        |                     | LDAP server URL.                                             |
-| `ldap_user`                    | ✅        |                     | LDAP user.                                                   |
-| `ldap_password`                | ✅        |                     | LDAP password.                                               |
-| `base_dn`                      | ✅        |                     | LDAP DN.                                                     |
-| `filter`                       |          | `"(objectClass=*)"` | LDAP extractor filter.                                       |
+| Field                          | Required | Default             | Description                                                             |
+| ------------------------------ | -------- | ------------------- | ----------------------------------------------------------------------- |
+| `ldap_server`                  | ✅       |                     | LDAP server URL.                                                        |
+| `ldap_user`                    | ✅       |                     | LDAP user.                                                              |
+| `ldap_password`                | ✅       |                     | LDAP password.                                                          |
+| `base_dn`                      | ✅       |                     | LDAP DN.                                                                |
+| `filter`                       |          | `"(objectClass=*)"` | LDAP extractor filter.                                                  |
 | `drop_missing_first_last_name` |          | `True`              | If set to true, any users without first and last names will be dropped. |
-| `page_size`                    |          | `20`                | Size of each page to fetch when extracting metadata.         |
+| `page_size`                    |          | `20`                | Size of each page to fetch when extracting metadata.                    |
 
 The `drop_missing_first_last_name` should be set to true if you've got many "headless" user LDAP accounts
 for devices or services should be excluded when they do not contain a first and last name. This will only
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index b9c9527f303bf..1fac7e32f47d2 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -49,18 +49,18 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                     | Required | Default                 | Description                                                  |
-| ------------------------- | -------- | ----------------------- | ------------------------------------------------------------ |
-| `client_id`               | ✅        |                         | Looker API3 client ID.                                       |
-| `client_secret`           | ✅        |                         | Looker API3 client secret.                                   |
-| `base_url`                | ✅        |                         | Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. |
-| `platform_name`           |          | `"looker"`              | Platform to use in namespace when constructing URNs.         |
-| `actor`                   |          | `"urn:li:corpuser:etl"` | Actor to use in ownership properties of ingested metadata.   |
-| `dashboard_pattern.allow` |          |                         | Regex pattern for dashboards to include in ingestion.        |
-| `dashboard_pattern.deny`  |          |                         | Regex pattern for dashboards to exclude from ingestion.      |
-| `chart_pattern.allow`     |          |                         | Regex pattern for charts to include in ingestion.            |
-| `chart_pattern.deny`      |          |                         | Regex pattern for charts to exclude from ingestion.          |
-| `env`                     |          | `"PROD"`                | Environment to use in namespace when constructing URNs.      |
+| Field                     | Required | Default                 | Description                                                                                                  |
+| ------------------------- | -------- | ----------------------- | ------------------------------------------------------------------------------------------------------------ |
+| `client_id`               | ✅       |                         | Looker API3 client ID.                                                                                       |
+| `client_secret`           | ✅       |                         | Looker API3 client secret.                                                                                   |
+| `base_url`                | ✅       |                         | Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. |
+| `platform_name`           |          | `"looker"`              | Platform to use in namespace when constructing URNs.                                                         |
+| `actor`                   |          | `"urn:li:corpuser:etl"` | Actor to use in ownership properties of ingested metadata.                                                   |
+| `dashboard_pattern.allow` |          |                         | Regex pattern for dashboards to include in ingestion.                                                        |
+| `dashboard_pattern.deny`  |          |                         | Regex pattern for dashboards to exclude from ingestion.                                                      |
+| `chart_pattern.allow`     |          |                         | Regex pattern for charts to include in ingestion.                                                            |
+| `chart_pattern.deny`      |          |                         | Regex pattern for charts to exclude from ingestion.                                                          |
+| `env`                     |          | `"PROD"`                | Environment to use in namespace when constructing URNs.                                                      |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 45798fbdeabdf..93d48be191671 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -49,17 +49,17 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                                          | Required | Default    | Description                                                  |
-| ---------------------------------------------- | -------- | ---------- | ------------------------------------------------------------ |
-| `base_folder`                                  | ✅        |            | Where the `*.model.lkml` and `*.view.lkml` files are stored. |
-| `connection_to_platform_map.<connection_name>` | ✅        |            | Mappings between connection names in the model files to platform names. |
-| `platform_name`                                |          | `"looker"` | Platform to use in namespace when constructing URNs.         |
-| `model_pattern.allow`                          |          |            | Regex pattern for models to include in ingestion.            |
-| `model_pattern.deny`                           |          |            | Regex pattern for models to exclude from ingestion.          |
-| `view_pattern.allow`                           |          |            | Regex pattern for views to include in ingestion.             |
-| `view_pattern.deny`                            |          |            | Regex pattern for views to exclude from ingestion.           |
-| `env`                                          |          | `"PROD"`   | Environment to use in namespace when constructing URNs.      |
-| `parse_table_names_from_sql`                   |          | `False`    | See note below.                                              |
+| Field                                          | Required | Default    | Description                                                             |
+| ---------------------------------------------- | -------- | ---------- | ----------------------------------------------------------------------- |
+| `base_folder`                                  | ✅       |            | Where the `*.model.lkml` and `*.view.lkml` files are stored.            |
+| `connection_to_platform_map.<connection_name>` | ✅       |            | Mappings between connection names in the model files to platform names. |
+| `platform_name`                                |          | `"looker"` | Platform to use in namespace when constructing URNs.                    |
+| `model_pattern.allow`                          |          |            | Regex pattern for models to include in ingestion.                       |
+| `model_pattern.deny`                           |          |            | Regex pattern for models to exclude from ingestion.                     |
+| `view_pattern.allow`                           |          |            | Regex pattern for views to include in ingestion.                        |
+| `view_pattern.deny`                            |          |            | Regex pattern for views to exclude from ingestion.                      |
+| `env`                                          |          | `"PROD"`   | Environment to use in namespace when constructing URNs.                 |
+| `parse_table_names_from_sql`                   |          | `False`    | See note below.                                                         |
 
 Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the
 views depends on. As these SQL's can be complicated, and the package doesn't official support all the SQL dialects that
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index e2c4e955dd820..99dc71f0015de 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -57,21 +57,21 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                      | Required | Default | Description |
-| -------------------------- | -------- | ------- | ----------- |
-| `connect_uri`              |          |         |             |
-| `username`                 |          |         |             |
-| `password`                 |          |         |             |
-| `authMechanism`            |          |         |             |
-| `options`                  |          |         |             |
-| `enableSchemaInference`    |          |         |             |
-| `schemaSamplingSize`       |          |         |             |
-| `useRandomSampling`        |          |         |             |
-| `env`                      |          |         |             |
-| `database_pattern.allow`   |          |         |             |
-| `database_pattern.deny`    |          |         |             |
-| `collection_pattern.allow` |          |         |             |
-| `collection_pattern.deny`  |          |         |             |
+| Field                      | Required | Default                 | Description                                                                                                              |
+| -------------------------- | -------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| `connect_uri`              |          | `"mongodb://localhost"` | MongoDB connection URI.                                                                                                  |
+| `username`                 |          |                         | MongoDB username.                                                                                                        |
+| `password`                 |          |                         | MongoDB password.                                                                                                        |
+| `authMechanism`            |          |                         | MongoDB authentication mechanism. See https://pymongo.readthedocs.io/en/stable/examples/authentication.html for details. |
+| `options`                  |          |                         | Additional options to pass to `pymongo.MongoClient()`.                                                                   |
+| `enableSchemaInference`    |          | `True`                  | Whether to infer schemas.                                                                                                |
+| `schemaSamplingSize`       |          | `1000`                  | Number of documents to use when inferring schema size. If set to `0`, all documents will be scanned.                     |
+| `useRandomSampling`        |          | `True`                  | If documents for schema inference should be randomly selected. If `False`, documents will be selected from start.        |
+| `env`                      |          | `"PROD"`                | Environment to use in namespace when constructing URNs.                                                                  |
+| `database_pattern.allow`   |          |                         | Regex pattern for databases to include in ingestion.                                                                     |
+| `database_pattern.deny`    |          |                         | Regex pattern for databases to exclude from ingestion.                                                                   |
+| `collection_pattern.allow` |          |                         | Regex pattern for collections to include in ingestion.                                                                   |
+| `collection_pattern.deny`  |          |                         | Regex pattern for collections to exclude from ingestion.                                                                 |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index c870f4a9f1434..620b33023b488 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -59,24 +59,25 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                  | Required | Default  | Description                                                                                                                                                                             |
-| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          |                                                                                                                                                                                         |
-| `password`             |          |          |                                                                                                                                                                                         |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `database`             |          |          |                                                                                                                                                                                         |
-| `use_odbc`             |          |          |                                                                                                                                                                                         |
-| `uri_args.<uri_arg>`   |          |          |                                                                                                                                                                                         |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| Field                  | Required | Default            | Description                                                                                                                                                                             |
+| ---------------------- | -------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |                    | MSSQL username.                                                                                                                                                                         |
+| `password`             |          |                    | MSSQL password.                                                                                                                                                                         |
+| `host_port`            |          | `"localhost:1433"` | MSSQL host URL.                                                                                                                                                                         |
+| `database`             |          |                    | MSSQL database.                                                                                                                                                                         |
+| `database_alias`       |          |                    | Alias to apply to database when ingesting.                                                                                                                                              |
+| `use_odbc`             |          | `False`            | See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc.                                                                                      |
+| `uri_args.<uri_arg>`   |          |                    | Arguments to URL-encode when connecting. See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15.                                   |
+| `env`                  |          | `"PROD"`           | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |                    | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |                    | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |                    | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |                    | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |                    | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |                    | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |                    | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`             | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`             | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index ba3cde25fe40e..adbf92448ee9b 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -32,22 +32,23 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                  | Required | Default  | Description                                                                                                                                                                             |
-| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          |                                                                                                                                                                                         |
-| `password`             |          |          |                                                                                                                                                                                         |
-| `database`             |          |          |                                                                                                                                                                                         |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| Field                  | Required | Default            | Description                                                                                                                                                                             |
+| ---------------------- | -------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |          |                    | MySQL username.                                                                                                                                                                         |
+| `password`             |          |                    | MySQL password.                                                                                                                                                                         |
+| `host_port`            |          | `"localhost:3306"` | MySQL host URL.                                                                                                                                                                         |
+| `database`             |          |                    | MySQL database.                                                                                                                                                                         |
+| `database_alias`       |          |                    | Alias to apply to database when ingesting.                                                                                                                                              |
+| `env`                  |          | `"PROD"`           | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |                    | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |                    | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |                    | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |                    | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |                    | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |                    | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |                    | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`             | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`             | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 36172b2cbf291..e345009e4a96b 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -35,23 +35,26 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
+Exactly one of `database` or `service_name` is required.
+
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          |                                                                                                                                                                                         |
-| `password`             |          |          |                                                                                                                                                                                         |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `database`             |          |          |                                                                                                                                                                                         |
-| `service_name`         |          |          |                                                                                                                                                                                         |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `username`             |          |          | Oracle username.                                                                                                                                                                        |
+| `password`             |          |          | Oracle password.                                                                                                                                                                        |
+| `host_port`            |          |          | Oracle host URL.                                                                                                                                                                        |
+| `database`             | Kind of  |          | If using, omit `service_name`.                                                                                                                                                          |
+| `service_name`         | Kind of  |          | Oracle service name. If using, omit `database`.                                                                                                                                         |
+| `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
+| `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 057265c1b9459..318b82164da97 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -34,21 +34,21 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          |                                                                                                                                                                                         |
-| `password`             |          |          |                                                                                                                                                                                         |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `database`             |          |          |                                                                                                                                                                                         |
-| `database_alias`       |          |          |                                                                                                                                                                                         |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `username`             |          |          | PostgreSQL username.                                                                                                                                                                    |
+| `password`             |          |          | PostgreSQL password.                                                                                                                                                                    |
+| `host_port`            |          |          | PostgreSQL host URL.                                                                                                                                                                    |
+| `database`             |          |          | PostgreSQL database.                                                                                                                                                                    |
+| `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
+| `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 431ca7020532c..9e863883d11a3 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -63,20 +63,21 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          |                                                                                                                                                                                         |
-| `password`             |          |          |                                                                                                                                                                                         |
-| `host_port`            |          |          |                                                                                                                                                                                         |
-| `database`             |          |          |                                                                                                                                                                                         |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `username`             |          |          | Redshift username.                                                                                                                                                                      |
+| `password`             |          |          | Redshift password.                                                                                                                                                                      |
+| `host_port`            |          |          | Redshift host URL.                                                                                                                                                                      |
+| `database`             |          |          | Redshift database.                                                                                                                                                                      |
+| `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
+| `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 5c8ab09042b4a..78e66210c8fbb 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -27,24 +27,24 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                                 | Required | Default | Description |
-| ------------------------------------- | -------- | ------- | ----------- |
-| `aws_region`                          |          |         |             |
-| `env`                                 |          |         |             |
-| `aws_access_key_id`                   |          |         |             |
-| `aws_secret_access_key`               |          |         |             |
-| `aws_session_token`                   |          |         |             |
-| `aws_role`                            |          |         |             |
-| `extract_feature_groups`              |          |         |             |
-| `extract_models`                      |          |         |             |
-| `extract_jobs.auto_ml`                |          |         |             |
-| `extract_jobs.compilation`            |          |         |             |
-| `extract_jobs.edge_packaging`         |          |         |             |
-| `extract_jobs.hyper_parameter_tuning` |          |         |             |
-| `extract_jobs.labeling`               |          |         |             |
-| `extract_jobs.processing`             |          |         |             |
-| `extract_jobs.training`               |          |         |             |
-| `extract_jobs.transform`              |          |         |             |
+| Field                                 | Required | Default      | Description                                                                        |
+| ------------------------------------- | -------- | ------------ | ---------------------------------------------------------------------------------- |
+| `aws_region`                          | ✅       |              |                                                                                    |
+| `env`                                 |          | `"PROD"`     |                                                                                    |
+| `aws_access_key_id`                   |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `aws_secret_access_key`               |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `aws_session_token`                   |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `aws_role`                            |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
+| `extract_feature_groups`              |          | `True`       | Whether to extract feather groups.                                                 |
+| `extract_models`                      |          | `True`       | Whether to extract models.                                                         |
+| `extract_jobs.auto_ml`                |          | `True`       | Whether to extract AutoML jobs.                                                    |
+| `extract_jobs.compilation`            |          | `True`       | Whether to extract compilation jobs.                                               |
+| `extract_jobs.edge_packaging`         |          | `True`       | Whether to extract edge packaging jobs.                                            |
+| `extract_jobs.hyper_parameter_tuning` |          | `True`       | Whether to extract hyperparameter tuning jobs.                                     |
+| `extract_jobs.labeling`               |          | `True`       | Whether to extract labeling jobs.                                                  |
+| `extract_jobs.processing`             |          | `True`       | Whether to extract processing jobs.                                                |
+| `extract_jobs.training`               |          | `True`       | Whether to extract training jobs.                                                  |
+| `extract_jobs.transform`              |          | `True`       | Whether to extract transform jobs.                                                 |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 41fde703899eb..38aa91e623346 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -48,25 +48,25 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                    | Required | Default  | Description                                                                                                                                                                             |
-| ------------------------ | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`               |          |          |                                                                                                                                                                                         |
-| `password`               |          |          |                                                                                                                                                                                         |
-| `host_port`              |          |          |                                                                                                                                                                                         |
-| `warehouse`              |          |          |                                                                                                                                                                                         |
-| `role`                   |          |          |                                                                                                                                                                                         |
-| `env`                    |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`       |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `database_pattern.allow` |        |          | Regex pattern for databases to include in ingestion.                                                                                                                                    |
-| `database_pattern.deny`  |        |          | Regex pattern for databases to exclude from ingestion.                                                                                                                                  |
-| `table_pattern.allow`    |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`     |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow`   |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`    |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`     |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`      |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`         |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`          |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| Field                    | Required | Default                                                              | Description                                                                                                                                                                             |
+| ------------------------ | -------- | -------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`               |          |                                                                      | Snowflake username.                                                                                                                                                                     |
+| `password`               |          |                                                                      | Snowflake password.                                                                                                                                                                     |
+| `host_port`              | ✅       |                                                                      | Snowflake host URL.                                                                                                                                                                     |
+| `warehouse`              |          |                                                                      | Snowflake warehouse.                                                                                                                                                                    |
+| `role`                   |          |                                                                      | Snowflake role.                                                                                                                                                                         |
+| `env`                    |          | `"PROD"`                                                             | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`       |          |                                                                      | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `database_pattern.allow` |          |                                                                      | Regex pattern for databases to include in ingestion.                                                                                                                                    |
+| `database_pattern.deny`  |          | `"^UTIL_DB$" `<br />`"^SNOWFLAKE$"`<br />`"^SNOWFLAKE_SAMPLE_DATA$"` | Regex pattern for databases to exclude from ingestion.                                                                                                                                  |
+| `table_pattern.allow`    |          |                                                                      | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`     |          |                                                                      | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow`   |          |                                                                      | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`    |          |                                                                      | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`     |          |                                                                      | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`      |          |                                                                      | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`         |          | `True`                                                               | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`          |          | `True`                                                               | Whether views should be ingested.                                                                                                                                                       |
 
 # Snowflake Usage Stats
 
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index bbb8d6ee7489a..499e11e7d69e9 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -58,13 +58,13 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                   | Required | Default | Description |
-| ----------------------- | -------- | ------- | ----------- |
-| `profiling.enabled`     |          |         |             |
-| `profiling.limit`       |          |         |             |
-| `profiling.offset`      |          |         |             |
-| `profile_pattern.allow` |          |         |             |
-| `profile_pattern.deny`  |          |         |             |
+| Field                   | Required | Default | Description                                                             |
+| ----------------------- | -------- | ------- | ----------------------------------------------------------------------- |
+| `profiling.enabled`     |          | `False` | Whether profiling should be done.                                       |
+| `profiling.limit`       |          |         | Max number of documents to profile. By default, profiles all documents. |
+| `profiling.offset`      |          |         | Offset in documents to profile. By default, uses no offset.             |
+| `profile_pattern.allow` |          |         | Regex pattern for tables to profile.                                    |
+| `profile_pattern.deny`  |          |         | Regex pattern for tables to not profile.                                |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index aed56c8598ef2..d8da8fe52035b 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -44,17 +44,18 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                  | Required | Default  | Description                                                                                                                                                                             |
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `connect_uri`          |          |          |                                                                                                                                                                                         |
-| `env`                  |        | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |        |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |        |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |        |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |        |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |        |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |        |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |        |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |        | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |        | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| `platform`             | ✅       |          | Name of platform being ingested, used in constructing URNs.                                                                                                                             |
+| `connect_uri`          | ✅       |          | URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls                                                                                    |
+| `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
+| `options.<option>`     |          |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
+| `table_pattern.allow`  |          |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
+| `table_pattern.deny`   |          |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
+| `schema_pattern.allow` |          |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
+| `schema_pattern.deny`  |          |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
+| `view_pattern.allow`   |          |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
+| `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
+| `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
+| `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 0436847fe38df..20d0138355936 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -1,5 +1,3 @@
-
-
 # Superset
 
 ## Setup
@@ -35,13 +33,13 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field         | Required | Default | Description |
-| ------------- | -------- | ------- | ----------- |
-| `connect_uri` |          |         |             |
-| `username`    |          |         |             |
-| `password`    |          |         |             |
-| `provider`    |          |         |             |
-| `env`         |          |         |             |
+| Field         | Required | Default            | Description                                             |
+| ------------- | -------- | ------------------ | ------------------------------------------------------- |
+| `connect_uri` |          | `"localhost:8088"` | Superset host URL.                                      |
+| `username`    |          |                    | Superset username.                                      |
+| `password`    |          |                    | Superset password.                                      |
+| `provider`    |          | `"db"`             | Superset provider.                                      |
+| `env`         |          | `"PROD"`           | Environment to use in namespace when constructing URNs. |
 
 ## Questions
 

From 35ecc452cab1ee99a84fcd69227f799dbb5a19aa Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Tue, 3 Aug 2021 20:59:53 -0400
Subject: [PATCH 23/33] Add source config docs

---
 metadata-ingestion/sink_docs/console.md |  5 +----
 metadata-ingestion/sink_docs/datahub.md | 13 +++++++++----
 metadata-ingestion/sink_docs/file.md    |  5 +++--
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index f803ad94f764a..8376a674bce74 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -19,10 +19,7 @@ sink:
 
 ## Config details
 
-Note that a `.` is used to denote nested fields in the YAML recipe.
-
-| Field | Required | Default | Description |
-| ----- | -------- | ------- | ----------- |
+None!
 
 ## Questions
 
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index 6ff40c97a37a9..07aed7ea16957 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -26,8 +26,9 @@ sink:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field | Required | Default | Description |
-| ----- | -------- | ------- | ----------- |
+| Field    | Required | Default | Description                  |
+| -------- | -------- | ------- | ---------------------------- |
+| `server` |          |         | URL of DataHub GMS endpoint. |
 
 ## DataHub Kafka
 
@@ -60,8 +61,12 @@ sink:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field | Required | Default | Description |
-| ----- | -------- | ------- | ----------- |
+| Field                                        | Required | Default | Description                                                                                                                                              |
+| -------------------------------------------- | -------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `connection.bootstrap`                       |          |         | Kafka bootstrap URL.                                                                                                                                     |
+| `connection.producer_config.<option>`        |          |         | Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer                  |
+| `connection.schema_registry_url`             |          |         | URL of schema registry being used.                                                                                                                       |
+| `connection.schema_registry_config.<option>` |          |         | Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient |
 
 The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
 
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index a678a60efe09a..c80957750e460 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -25,8 +25,9 @@ sink:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field | Required | Default | Description |
-| ----- | -------- | ------- | ----------- |
+| Field    | Required | Default | Description               |
+| -------- | -------- | ------- | ------------------------- |
+| filename | ✅       |         | Path to file to write to. |
 
 ## Questions
 

From 73a42fdc9c25e4090507ff5d69e8f1c7092a08e0 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Wed, 4 Aug 2021 13:55:31 -0400
Subject: [PATCH 24/33] Clean up quickstart configs

---
 metadata-ingestion/sink_docs/console.md       |  2 +-
 metadata-ingestion/sink_docs/datahub.md       |  6 +--
 metadata-ingestion/sink_docs/file.md          |  2 +-
 metadata-ingestion/source_docs/athena.md      | 20 ++++-----
 metadata-ingestion/source_docs/bigquery.md    |  6 +--
 metadata-ingestion/source_docs/dbt.md         |  2 +-
 metadata-ingestion/source_docs/druid.md       |  5 ++-
 metadata-ingestion/source_docs/feast.md       |  4 +-
 metadata-ingestion/source_docs/file.md        |  2 +-
 metadata-ingestion/source_docs/glue.md        | 16 ++-----
 metadata-ingestion/source_docs/hive.md        |  7 ++-
 .../source_docs/kafka-connect.md              |  5 ++-
 metadata-ingestion/source_docs/kafka.md       |  9 +---
 metadata-ingestion/source_docs/ldap.md        | 10 +----
 metadata-ingestion/source_docs/looker.md      | 27 ++----------
 metadata-ingestion/source_docs/lookml.md      | 26 ++---------
 metadata-ingestion/source_docs/mongodb.md     | 24 +---------
 metadata-ingestion/source_docs/mssql.md       | 14 +++---
 metadata-ingestion/source_docs/mysql.md       |  7 ++-
 metadata-ingestion/source_docs/oracle.md      | 44 +++++++++----------
 metadata-ingestion/source_docs/postgres.md    |  7 ++-
 metadata-ingestion/source_docs/redshift.md    |  6 +--
 metadata-ingestion/source_docs/sagemaker.md   |  9 ++--
 metadata-ingestion/source_docs/snowflake.md   | 19 ++------
 .../source_docs/sql_profiles.md               |  4 +-
 metadata-ingestion/source_docs/sqlalchemy.md  | 14 +-----
 metadata-ingestion/source_docs/superset.md    |  6 +--
 27 files changed, 98 insertions(+), 205 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index 8376a674bce74..9ba4d50c2b5d9 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -10,7 +10,7 @@ Simply prints each metadata event to stdout. Useful for experimentation and debu
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 sink:
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index 07aed7ea16957..d2dba39aad249 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -13,7 +13,7 @@ is that any errors can immediately be reported.
 
 ### Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 sink:
@@ -44,7 +44,7 @@ DataHub mce-consumer container to be running.
 
 ### Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 sink:
@@ -52,9 +52,7 @@ sink:
   config:
     connection:
       bootstrap: "localhost:9092"
-      producer_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer
       schema_registry_url: "http://localhost:8081"
-      schema_registry_config: {} # passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient
 ```
 
 ### Config details
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index c80957750e460..4054ba76dcd82 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -12,7 +12,7 @@ Note that the [file source](../source_docs/file.md) can read files generated by
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 sink:
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 78ba0f7258f61..1c7373caf699e 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -13,25 +13,21 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: athena
   config:
-    username: aws_access_key_id # Optional. If not specified, credentials are picked up according to boto3 rules.
-    # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
-    password: aws_secret_access_key # Optional.
-    database: database # Optional, defaults to "default"
+    username: my_aws_access_key_id
+    password: my_aws_secret_access_key
+    database: my_database
 
-    aws_region: aws_region_name # i.e. "eu-west-1"
+    aws_region: my_aws_region_name
 
-    s3_staging_dir: s3_location # "s3://<bucket-name>/prefix/"
-    # The s3_staging_dir parameter is needed because Athena always writes query results to S3.
-    # See https://docs.aws.amazon.com/athena/latest/ug/querying.html
-    # However, the athena driver will transparently fetch these results as you would expect from any other sql client.
+    s3_staging_dir: "s3://<bucket-name>/<folder>/"
 
-    work_group: athena_workgroup # "primary"
+    work_group: my_work_group
 ```
 
 ## Config details
@@ -43,7 +39,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `username`             |          | Autodetected | Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html                                                       |
 | `password`             |          | Autodetected | Same detection scheme as `username`                                                                                                                                                                        |
 | `database`             |          | Autodetected |                                                                                                                                                                                                            |
-| `aws_region`           | ✅       |              |                                                                                                                                                                                                            |
+| `aws_region`           | ✅       |              | AWS region code.                                                                                                                                                                                           |
 | `s3_staging_dir`       | ✅       |              | Of format `"s3://<bucket-name>/prefix/"`. The `s3_staging_dir` parameter is needed because Athena always writes query results to S3. <br />See https://docs.aws.amazon.com/athena/latest/ug/querying.html. |
 | `work_group`           | ✅       |              | Name of Athena workgroup. <br />See https://docs.aws.amazon.com/athena/latest/ug/manage-queries-control-costs-with-workgroups.html.                                                                        |
 | `env`                  |          | `"PROD"`     | Environment to use in namespace when constructing URNs.                                                                                                                                                    |
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 77d234cce31d3..ba76e81ac8b39 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -19,13 +19,13 @@ You can also get fine-grained usage statistics for BigQuery using the `bigquery-
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: bigquery
   config:
-    project_id: "my_project_id"
+    project_id: my_project_id
 ```
 
 ## Config details
@@ -75,7 +75,7 @@ This source only does usage statistics. To get the tables, views, and schemas in
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 07b950a2bf271..b7f7439a96e74 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -27,7 +27,7 @@ This plugin pulls metadata from dbt's artifact files:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 4d08fb6676e8f..73f6037613e54 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -15,13 +15,16 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: druid
   config:
     host_port: "localhost:8082"
+
+    username: admin
+    password: password
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 5007bd1aeec0c..882d3369babd0 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -20,15 +20,13 @@ parsed to DataHub's native objects. This separation was performed because of a d
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: feast
   config:
     core_url: "localhost:6565"
-    env: "PROD"
-    use_local_build: False
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 41a4e53a05494..7d7d2b3e394d4 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -12,7 +12,7 @@ can produce such files, and a number of samples are included in the
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 4d5dd35b8b6ec..0b563a32bd8fe 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -17,21 +17,13 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: glue
   config:
-    aws_region: # aws_region_name, i.e. "eu-west-1"
-    env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
-
-    # Credentials. If not specified here, these are picked up according to boto3 rules.
-    # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html)
-    aws_access_key_id: # Optional.
-    aws_secret_access_key: # Optional.
-    aws_session_token: # Optional.
-    aws_role: # Optional (Role chaining supported by using a sorted list).
+    aws_region: "my-aws-region"
 ```
 
 ## Config details
@@ -40,8 +32,8 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                    | Required | Default      | Description                                                                        |
 | ------------------------ | -------- | ------------ | ---------------------------------------------------------------------------------- |
-| `aws_region`             | ✅       |              |                                                                                    |
-| `env`                    |          | `"PROD"`     |                                                                                    |
+| `aws_region`             | ✅       |              | AWS region code.                                                                   |
+| `env`                    |          | `"PROD"`     | Environment to use in namespace when constructing URNs.                            |
 | `aws_access_key_id`      |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_secret_access_key`  |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_session_token`      |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 72c231d3842cd..6ff6e867a158e 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -14,7 +14,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -40,15 +40,14 @@ source:
 source:
   type: hive
   config:
-    scheme: "hive+https"
     host_port: <cluster_name>.azurehdinsight.net:443
     username: admin
-    password: "<password>"
+    password: password
+
     options:
       connect_args:
         http_path: "/hive2"
         auth: BASIC
-    # ... table_pattern/schema_pattern
 ```
 
 </details>
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index b4a222dae9f34..5e93bad26db15 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -18,7 +18,7 @@ Current limitations:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -26,6 +26,9 @@ source:
   config:
     connect_uri: "http://localhost:8083"
     cluster_name: "connect-cluster"
+
+    username: admin
+    password: password
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 712adec09559b..a6c4224ea7995 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -13,7 +13,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -21,13 +21,8 @@ source:
   config:
     connection:
       bootstrap: "broker:9092"
-      schema_registry_url: http://localhost:8081
-
-      schema_registry_config: {}
 
-      consumer_config: {}
-
-      producer_config: {}
+      schema_registry_url: http://localhost:8081
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index f861c67c1cd0a..1178c9b1056dd 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -14,7 +14,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -24,15 +24,7 @@ source:
     ldap_user: "cn=admin,dc=example,dc=org"
     ldap_password: "admin"
 
-    # Extraction configuration.
     base_dn: "dc=example,dc=org"
-    filter: "(objectClass=*)" # optional field
-
-    # If set to true, any users without first and last names will be dropped.
-    drop_missing_first_last_name: False # optional
-
-    # For creating LDAP controls
-    page_size: # default is 20
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 1fac7e32f47d2..a9db0b88635f9 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -15,34 +15,15 @@ See the [Looker authentication docs](https://docs.looker.com/reference/api-and-i
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: "looker"
   config:
-    client_id: # Your Looker API3 client ID
-    client_secret: # Your Looker API3 client secret
-    base_url: # The url to your Looker instance: https://company.looker.com:19999 or https://looker.company.com, or similar.
-
-    platform_name: "looker" # Optional, default is "looker"
-    actor: urn:li:corpuser:etl # Optional, defaults to urn:li:corpuser:etl
-
-    # Regex pattern to allow/deny dashboards. If left blank, will ingest all.
-    dashboard_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    # Regex pattern to allow/deny charts. If left blank, will ingest all.
-    chart_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    env: "PROD" # Optional, default is "PROD"
+    client_id: admin
+    client_secret: password
+    base_url: https://company.looker.com:19999
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 93d48be191671..cb5db04bc40a4 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -15,34 +15,16 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: "lookml"
   config:
-    base_folder: /path/to/model/files # where the *.model.lkml and *.view.lkml files are stored
-    connection_to_platform_map: # mappings between connection names in the model files to platform names
-      connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
-
-    platform_name: "looker" # optional, default is "looker"
-
-    # Regex pattern to allow/deny models. If left blank, will ingest all.
-    model_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
+    base_folder: /path/to/model/files
 
-    # Regex pattern to allow/deny views. If left blank, will ingest all.
-    view_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-
-    env: "PROD" # optional, default is "PROD"
-    parse_table_names_from_sql: False # see note below
+    connection_to_platform_map:
+      connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 99dc71f0015de..76894b5c377aa 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -18,39 +18,19 @@ Note that `schemaSamplingSize` has no effect if `enableSchemaInference: False` i
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: "mongodb"
   config:
-    # For advanced configurations, see the MongoDB docs.
-    # https://pymongo.readthedocs.io/en/stable/examples/authentication.html
     connect_uri: "mongodb://localhost"
     username: admin
     password: password
-    # used for PyMongo
     authMechanism: "DEFAULT"
 
-    options: {} # kwargs to pass to pymongo.MongoClient
     enableSchemaInference: True
-    schemaSamplingSize: 1000 # number of samples for determining schema
-    useRandomSampling: True # whether to randomly sample docs for schema or just use the first ones, True by default
-
-    env: "PROD" # Optional, default is "PROD"
-
-    # Regex pattern to allow/deny databases. If left blank, will ingest all.
-    database_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
-    # Regex pattern to allow/deny collections. If left blank, will ingest all.
-    collection_pattern:
-      deny:
-        # ...
-      allow:
-        # ...
+    useRandomSampling: True
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 620b33023b488..a84d1b536f401 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -15,7 +15,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -37,20 +37,18 @@ See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-
 source:
   type: mssql
   config:
-    # See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc
-    use_odbc: True
-    username: user
-    password: pass
+    username: admin
+    password: password
+
     host_port: localhost:1433
+
     database: DemoDatabase
-    include_views: True # whether to include views, defaults to True
+
     uri_args:
-      # See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15
       driver: "ODBC Driver 17 for SQL Server"
       Encrypt: "yes"
       TrustServerCertificate: "Yes"
       ssl: "True"
-      # Trusted_Connection: "yes"
 ```
 
 </details>
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index adbf92448ee9b..5ce085050b795 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -13,7 +13,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -21,11 +21,10 @@ source:
   config:
     username: root
     password: example
+
     database: dbname
-    host_port: localhost:3306
 
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
+    host_port: localhost:3306
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index e345009e4a96b..eb218e24d0917 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -15,19 +15,19 @@ Using the Oracle source requires that you've also installed the correct drivers;
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: oracle
   config:
-    # For more details on authentication, see the documentation:
-    # https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect and
-    # https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings.
     username: user
     password: pass
+
     host_port: localhost:5432
+
     database: dbname
+
     service_name: svc # omit database if using this option
 ```
 
@@ -37,24 +37,24 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 Exactly one of `database` or `service_name` is required.
 
-| Field                  | Required | Default  | Description                                                                                                                                                                             |
-| ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |          |          | Oracle username.                                                                                                                                                                        |
-| `password`             |          |          | Oracle password.                                                                                                                                                                        |
-| `host_port`            |          |          | Oracle host URL.                                                                                                                                                                        |
-| `database`             | Kind of  |          | If using, omit `service_name`.                                                                                                                                                          |
-| `service_name`         | Kind of  |          | Oracle service name. If using, omit `database`.                                                                                                                                         |
-| `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
-| `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`     |          |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`  |          |          | Regex pattern for tables to include in ingestion.                                                                                                                                       |
-| `table_pattern.deny`   |          |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                     |
-| `schema_pattern.allow` |          |          | Regex pattern for schemas to include in ingestion.                                                                                                                                      |
-| `schema_pattern.deny`  |          |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                    |
-| `view_pattern.allow`   |          |          | Regex pattern for views to include in ingestion.                                                                                                                                        |
-| `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
-| `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
+| Field                  | Required                       | Default  | Description                                                                                                                                                                                                                                                                     |
+| ---------------------- | ------------------------------ | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |                                |          | Oracle username. For more details on authentication, see the documentation: https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect <br> and https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings. |
+| `password`             |                                |          | Oracle password.                                                                                                                                                                                                                                                                |
+| `host_port`            |                                |          | Oracle host URL.                                                                                                                                                                                                                                                                |
+| `database`             | If `service_name` is not set   |          | If using, omit `service_name`.                                                                                                                                                                                                                                                  |
+| `service_name`         | If `database_alias` is not set |          | Oracle service name. If using, omit `database`.                                                                                                                                                                                                                                 |
+| `database_alias`       |                                |          | Alias to apply to database when ingesting.                                                                                                                                                                                                                                      |
+| `env`                  |                                | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                                                                                                         |
+| `options.<option>`     |                                |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                                                                                         |
+| `table_pattern.allow`  |                                |          | Regex pattern for tables to include in ingestion.                                                                                                                                                                                                                               |
+| `table_pattern.deny`   |                                |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                                                                                                             |
+| `schema_pattern.allow` |                                |          | Regex pattern for schemas to include in ingestion.                                                                                                                                                                                                                              |
+| `schema_pattern.deny`  |                                |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                                                                                            |
+| `view_pattern.allow`   |                                |          | Regex pattern for views to include in ingestion.                                                                                                                                                                                                                                |
+| `view_pattern.deny`    |                                |          | Regex pattern for views to exclude from ingestion.                                                                                                                                                                                                                              |
+| `include_tables`       |                                | `True`   | Whether tables should be ingested.                                                                                                                                                                                                                                              |
+| `include_views`        |                                | `True`   | Whether views should be ingested.                                                                                                                                                                                                                                               |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 318b82164da97..69794aae321cd 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -15,7 +15,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -23,8 +23,11 @@ source:
   config:
     username: user
     password: pass
+
     host_port: localhost:5432
+
     database: DemoDatabase
+
     database_alias: DatabaseNameToBeIngested
 ```
 
@@ -36,7 +39,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `username`             |          |          | PostgreSQL username.                                                                                                                                                                    |
 | `password`             |          |          | PostgreSQL password.                                                                                                                                                                    |
-| `host_port`            |          |          | PostgreSQL host URL.                                                                                                                                                                    |
+| `host_port`            | ✅       |          | PostgreSQL host URL.                                                                                                                                                                    |
 | `database`             |          |          | PostgreSQL database.                                                                                                                                                                    |
 | `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
 | `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 9e863883d11a3..b324917728d64 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -14,7 +14,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -47,8 +47,8 @@ See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-
 source:
   type: redshift
   config:
-    # username, password, database, etc...
     host_port: my-proxy-hostname:5439
+
     options:
       connect_args:
         sslmode: "prefer" # or "require" or "verify-ca"
@@ -65,7 +65,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | ---------------------- | -------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `username`             |          |          | Redshift username.                                                                                                                                                                      |
 | `password`             |          |          | Redshift password.                                                                                                                                                                      |
-| `host_port`            |          |          | Redshift host URL.                                                                                                                                                                      |
+| `host_port`            | ✅       |          | Redshift host URL.                                                                                                                                                                      |
 | `database`             |          |          | Redshift database.                                                                                                                                                                      |
 | `database_alias`       |          |          | Alias to apply to database when ingesting.                                                                                                                                              |
 | `env`                  |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                 |
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 78e66210c8fbb..4e283421b47af 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -13,14 +13,13 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: sagemaker
   config:
-    aws_region: # aws_region_name, i.e. "eu-west-1"
-    env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD".
+    aws_region: "my-aws-region"
 ```
 
 ## Config details
@@ -29,8 +28,8 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                                 | Required | Default      | Description                                                                        |
 | ------------------------------------- | -------- | ------------ | ---------------------------------------------------------------------------------- |
-| `aws_region`                          | ✅       |              |                                                                                    |
-| `env`                                 |          | `"PROD"`     |                                                                                    |
+| `aws_region`                          | ✅       |              | AWS region code.                                                                   |
+| `env`                                 |          | `"PROD"`     | Environment to use in namespace when constructing URNs.                            |
 | `aws_access_key_id`                   |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_secret_access_key`               |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
 | `aws_session_token`                   |          | Autodetected | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 38aa91e623346..6cd8d9c6b17e9 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -19,7 +19,7 @@ You can also get fine-grained usage statistics for Snowflake using the `snowflak
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -29,19 +29,8 @@ source:
     password: pass
     host_port: account_name
 
-    warehouse: "COMPUTE_WH" # optional
-    role: "sysadmin" # optional
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
+    warehouse: "COMPUTE_WH"
+    role: "sysadmin"
 ```
 
 ## Config details
@@ -94,7 +83,7 @@ This source only does usage statistics. To get the tables, views, and schemas in
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 499e11e7d69e9..2ad8df370be61 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -43,13 +43,13 @@ Supported SQL sources:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: <sql-source> # can be bigquery, snowflake, etc - see above for the list
   config:
-    # username, password, etc - varies by source type
+    # ... any other source-specific options ...
     profiling:
       enabled: true
 ```
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index d8da8fe52035b..35624690f16dc 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -17,25 +17,13 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
   type: sqlalchemy
   config:
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls
     connect_uri: "dialect+driver://username:password@host:port/database"
-
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
-    options:
-      # driver_option: some-option
-
-    include_views: True # whether to include views, defaults to True
-    include_tables: True # whether to include views, defaults to True
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 20d0138355936..019cdedab431b 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -14,7 +14,7 @@ This plugin extracts the following:
 
 ## Quickstart recipe
 
-Use the below recipe to get started with ingestion. See [below](#config-details) for full configuration options.
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
 source:
@@ -24,9 +24,7 @@ source:
 
     username: user
     password: pass
-    provider: db | ldap
-
-    env: "PROD" # Optional, default is "PROD"
+    provider: ldap
 ```
 
 ## Config details

From b1bf7e707d0cb79f4d79bb5ce007c6142c303335 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Wed, 4 Aug 2021 14:15:24 -0400
Subject: [PATCH 25/33] Update usage docs

---
 metadata-ingestion/source_docs/bigquery.md  | 39 ++++++++-------------
 metadata-ingestion/source_docs/snowflake.md | 23 ++++++++----
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index ba76e81ac8b39..8916ef0a40fb7 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -81,44 +81,33 @@ Check out the following recipe to get started with ingestion! See [below](#confi
 source:
   type: bigquery-usage
   config:
-    projects: # optional - can autodetect a single project from the environment
+    projects:
       - project_id_1
       - project_id_2
-    options:
-      # See https://googleapis.dev/python/logging/latest/client.html for details.
-      credentials: ~ # optional - see docs
 
-    # Common usage stats options
     bucket_duration: "DAY"
-    start_time: ~ # defaults to the last full day in UTC (or hour)
-    end_time: ~ # defaults to the last full day in UTC (or hour)
 
-    top_n_queries: 10 # number of queries to save for each table
+    top_n_queries: 10
 
-    env: PROD
 
-    # Additional options to pass to google.cloud.logging_v2.client.Client
-    extra_client_options:
-
-    # To account for the possibility that the query event arrives after
-    # the read event in the audit logs, we wait for at least `query_log_delay`
-    # additional events to be processed before attempting to resolve BigQuery
-    # job information from the logs. If `query_log_delay` is None, it gets treated
-    # as an unlimited delay, which prioritizes correctness at the expense of memory usage.
-    query_log_delay:
-
-    # Correction to pad start_time and end_time with.
-    # For handling the case where the read happens within our time range but the query
-    # completion event is delayed and happens after the configured end time.
-    max_query_duration:
 ```
 
 ## Config details
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field | Required | Default | Description |
-| ----- | -------- | ------- | ----------- |
+| Field                  | Required | Default                                                      | Description                                                  |
+| ---------------------- | -------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| `projects`             |          |                                                              |                                                              |
+| `extra_client_options` |          |                                                              |                                                              |
+| `env`                  |          | `"PROD"`                                                     | Environment to use in namespace when constructing URNs.      |
+| `bucket_duration`      |          | `"DAY"`                                                      | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. |
+| `start_time`           |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider.                     |
+| `end_time`             |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                       |
+| `top_n_queries`        |          | `10`                                                         | Number of top queries to save to each table.                 |
+| `extra_client_options` |          |                                                              | Additional options to pass to `google.cloud.logging_v2.client.Client`. |
+| `query_log_deplay`     |          |                                                              | To account for the possibility that the query event arrives after the read event in the audit logs, we wait for at least `query_log_delay` additional events to be processed before attempting to resolve BigQuery job information from the logs. If `query_log_delay` is `None`, it gets treated as an unlimited delay, which prioritizes correctness at the expense of memory usage. |
+| `max_query_duration`   |          | `15`                                                         | Correction to pad `start_time` and `end_time` with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time. |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 6cd8d9c6b17e9..4448bc9937efc 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -92,22 +92,31 @@ source:
     username: user
     password: pass
     host_port: account_name
-    role: ACCOUNTADMIN
-    env: PROD
+
+    warehouse: "COMPUTE_WH"
+    role: "sysadmin"
 
     bucket_duration: "DAY"
-    start_time: ~ # defaults to the last full day in UTC (or hour)
-    end_time: ~ # defaults to the last full day in UTC (or hour)
 
-    top_n_queries: 10 # number of queries to save for each table
+    top_n_queries: 10
 ```
 
 ## Config details
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field | Required | Default | Description |
-| ----- | -------- | ------- | ----------- |
+| Field             | Required | Default                                                        | Description                                                     |
+| ----------------- | -------- | -------------------------------------------------------------- | --------------------------------------------------------------- |
+| `username`        |          |                                                                | Snowflake username.                                             |
+| `password`        |          |                                                                | Snowflake password.                                             |
+| `host_port`       | ✅       |                                                                | Snowflake host URL.                                             |
+| `warehouse`       |          |                                                                | Snowflake warehouse.                                            |
+| `role`            |          |                                                                | Snowflake role.                                                 |
+| `env`             |          | `"PROD"`                                                       | Environment to use in namespace when constructing URNs.         |
+| `bucket_duration` |          | `"DAY"`                                                        | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. |
+| `start_time`      |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider.                        |
+| `end_time`        |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                          |
+| `top_n_queries`   |          | `10`                                                           | Number of top queries to save to each table.                    |
 
 ## Questions
 

From 5933f1fb62bd6da620d031b47185658241ef9102 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Wed, 4 Aug 2021 14:16:42 -0400
Subject: [PATCH 26/33] Formatting

---
 metadata-ingestion/source_docs/bigquery.md | 26 ++++++++++------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 8916ef0a40fb7..795c5f035802b 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -88,26 +88,24 @@ source:
     bucket_duration: "DAY"
 
     top_n_queries: 10
-
-
 ```
 
 ## Config details
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                  | Required | Default                                                      | Description                                                  |
-| ---------------------- | -------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| `projects`             |          |                                                              |                                                              |
-| `extra_client_options` |          |                                                              |                                                              |
-| `env`                  |          | `"PROD"`                                                     | Environment to use in namespace when constructing URNs.      |
-| `bucket_duration`      |          | `"DAY"`                                                      | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. |
-| `start_time`           |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider.                     |
-| `end_time`             |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                       |
-| `top_n_queries`        |          | `10`                                                         | Number of top queries to save to each table.                 |
-| `extra_client_options` |          |                                                              | Additional options to pass to `google.cloud.logging_v2.client.Client`. |
-| `query_log_deplay`     |          |                                                              | To account for the possibility that the query event arrives after the read event in the audit logs, we wait for at least `query_log_delay` additional events to be processed before attempting to resolve BigQuery job information from the logs. If `query_log_delay` is `None`, it gets treated as an unlimited delay, which prioritizes correctness at the expense of memory usage. |
-| `max_query_duration`   |          | `15`                                                         | Correction to pad `start_time` and `end_time` with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time. |
+| Field                  | Required | Default                                                        | Description                                                                                                                                                                                                                                                                                                                                                                            |
+| ---------------------- | -------- | -------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `projects`             |          |                                                                |                                                                                                                                                                                                                                                                                                                                                                                        |
+| `extra_client_options` |          |                                                                |                                                                                                                                                                                                                                                                                                                                                                                        |
+| `env`                  |          | `"PROD"`                                                       | Environment to use in namespace when constructing URNs.                                                                                                                                                                                                                                                                                                                                |
+| `bucket_duration`      |          | `"DAY"`                                                        | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`.                                                                                                                                                                                                                                                                                                                        |
+| `start_time`           |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider.                                                                                                                                                                                                                                                                                                                                               |
+| `end_time`             |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                                                                                                                                                                                                                                                                                                                                                 |
+| `top_n_queries`        |          | `10`                                                           | Number of top queries to save to each table.                                                                                                                                                                                                                                                                                                                                           |
+| `extra_client_options` |          |                                                                | Additional options to pass to `google.cloud.logging_v2.client.Client`.                                                                                                                                                                                                                                                                                                                 |
+| `query_log_deplay`     |          |                                                                | To account for the possibility that the query event arrives after the read event in the audit logs, we wait for at least `query_log_delay` additional events to be processed before attempting to resolve BigQuery job information from the logs. If `query_log_delay` is `None`, it gets treated as an unlimited delay, which prioritizes correctness at the expense of memory usage. |
+| `max_query_duration`   |          | `15`                                                           | Correction to pad `start_time` and `end_time` with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time.                                                                                                                                                                            |
 
 ## Questions
 

From bbbe61272344ec196991d099a1ab713907f350ba Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Wed, 4 Aug 2021 14:24:24 -0400
Subject: [PATCH 27/33] Revise capabilities

---
 metadata-ingestion/source_docs/athena.md      |  2 +-
 metadata-ingestion/source_docs/bigquery.md    |  7 ++--
 metadata-ingestion/source_docs/druid.md       |  2 +-
 metadata-ingestion/source_docs/glue.md        |  4 +--
 metadata-ingestion/source_docs/hive.md        |  2 +-
 metadata-ingestion/source_docs/kafka.md       |  4 +--
 metadata-ingestion/source_docs/ldap.md        |  2 +-
 metadata-ingestion/source_docs/mongodb.md     |  4 +--
 metadata-ingestion/source_docs/mssql.md       |  2 +-
 metadata-ingestion/source_docs/oracle.md      | 36 +++++++++----------
 metadata-ingestion/source_docs/postgres.md    |  2 +-
 metadata-ingestion/source_docs/redshift.md    |  2 +-
 metadata-ingestion/source_docs/snowflake.md   |  5 ++-
 .../source_docs/sql_profiles.md               |  4 +--
 metadata-ingestion/source_docs/superset.md    |  2 +-
 15 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 1c7373caf699e..1dd251637a246 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[athena]'`.
 
 This plugin extracts the following:
 
-- List of databases and tables
+- Database and table metadata
 - Column types associated with each table
 
 ## Quickstart recipe
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 795c5f035802b..f3c14d0955984 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[bigquery]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Metadata for databases, schemas, and tables
 - Column types associated with each table
 
 :::tip
@@ -56,9 +56,8 @@ To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
 This plugin extracts the following:
 
-- Fetch a list of queries issued
-- Fetch a list of tables and columns accessed
-- Aggregate these statistics into buckets, by day or hour granularity
+- Statistics on queries issued and tables and columns accessed (excludes views)
+- Aggregation of these statistics into buckets, by day or hour granularity
 
 Note: the client must have one of the following OAuth scopes, and should be authorized on all projects you'd like to ingest usage stats from.
 
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 73f6037613e54..69c55131c50c9 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[druid]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Metadata for databases, schemas, and tables
 - Column types associated with each table
 
 **Note**: It is important to explicitly define the deny schema pattern for internal Druid databases (lookup & sys) if adding a schema pattern. Otherwise, the crawler may crash before processing relevant databases. This deny pattern is defined by default but is overriden by user-submitted configurations.
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 0b563a32bd8fe..df9047441dfaf 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -1,4 +1,4 @@
-# AWS Glue
+# Glue
 
 ## Setup
 
@@ -10,7 +10,7 @@ Note: if you also have files in S3 that you'd like to ingest, we recommend you u
 
 This plugin extracts the following:
 
-- List of tables
+- Tables in the Glue catalog
 - Column types associated with each table
 - Table metadata, such as owner, description and parameters
 - Jobs and their component transformations, data sources, and data sinks
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 6ff6e867a158e..62d937a8392be 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[hive]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Metadata for databases, schema, and tables
 - Column types associated with each table
 - Detailed table and storage information
 
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index a6c4224ea7995..1322eb7cce4fb 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -8,8 +8,8 @@ To install this plugin, run `pip install 'acryl-datahub[kafka]'`.
 
 This plugin extracts the following:
 
-- List of topics - from the Kafka broker
-- Schemas associated with each topic - from the schema registry
+- Topics from the Kafka broker
+- Schemas associated with each topic from the schema registry
 
 ## Quickstart recipe
 
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 1178c9b1056dd..47d69e190132e 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[ldap]'`.
 
 This plugin extracts the following:
 
-- List of people
+- People
 - Names, emails, titles, and manager information for each person
 - List of groups
 
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 76894b5c377aa..26877367d874a 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -8,8 +8,8 @@ To install this plugin, run `pip install 'acryl-datahub[mongodb]'`.
 
 This plugin extracts the following:
 
-- List of databases
-- List of collections in each database and infers schemas for each collection
+- Databases and associated metadata
+- Collections in each database and schemas for each collection (via schema inference)
 
 By default, schema inference samples 1,000 documents from each collection. Setting `schemaSamplingSize: null` will scan the entire collection.
 Moreover, setting `useRandomSampling: False` will sample the first documents found without random selection, which may be faster for large collections.
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index a84d1b536f401..1b09545901e50 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -10,7 +10,7 @@ We have two options for the underlying library used to connect to SQL Server: (1
 
 This plugin extracts the following:
 
-- List of databases, schema, tables and views
+- Databases, schema, tables and views and associated metadata
 - Column types associated with each table/view
 
 ## Quickstart recipe
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index eb218e24d0917..426ce0cb156b3 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -37,24 +37,24 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 Exactly one of `database` or `service_name` is required.
 
-| Field                  | Required                       | Default  | Description                                                                                                                                                                                                                                                                     |
-| ---------------------- | ------------------------------ | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `username`             |                                |          | Oracle username. For more details on authentication, see the documentation: https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect <br> and https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings. |
-| `password`             |                                |          | Oracle password.                                                                                                                                                                                                                                                                |
-| `host_port`            |                                |          | Oracle host URL.                                                                                                                                                                                                                                                                |
-| `database`             | If `service_name` is not set   |          | If using, omit `service_name`.                                                                                                                                                                                                                                                  |
-| `service_name`         | If `database_alias` is not set |          | Oracle service name. If using, omit `database`.                                                                                                                                                                                                                                 |
-| `database_alias`       |                                |          | Alias to apply to database when ingesting.                                                                                                                                                                                                                                      |
-| `env`                  |                                | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                                                                                                         |
-| `options.<option>`     |                                |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                                                                                         |
-| `table_pattern.allow`  |                                |          | Regex pattern for tables to include in ingestion.                                                                                                                                                                                                                               |
-| `table_pattern.deny`   |                                |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                                                                                                             |
-| `schema_pattern.allow` |                                |          | Regex pattern for schemas to include in ingestion.                                                                                                                                                                                                                              |
-| `schema_pattern.deny`  |                                |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                                                                                            |
-| `view_pattern.allow`   |                                |          | Regex pattern for views to include in ingestion.                                                                                                                                                                                                                                |
-| `view_pattern.deny`    |                                |          | Regex pattern for views to exclude from ingestion.                                                                                                                                                                                                                              |
-| `include_tables`       |                                | `True`   | Whether tables should be ingested.                                                                                                                                                                                                                                              |
-| `include_views`        |                                | `True`   | Whether views should be ingested.                                                                                                                                                                                                                                               |
+| Field                  | Required                       | Default  | Description                                                                                                                                                                                                                                                                       |
+| ---------------------- | ------------------------------ | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `username`             |                                |          | Oracle username. For more details on authentication, see the documentation: https://docs.sqlalchemy.org/en/14/dialects/oracle.html#dialect-oracle-cx_oracle-connect <br /> and https://cx-oracle.readthedocs.io/en/latest/user_guide/connection_handling.html#connection-strings. |
+| `password`             |                                |          | Oracle password.                                                                                                                                                                                                                                                                  |
+| `host_port`            |                                |          | Oracle host URL.                                                                                                                                                                                                                                                                  |
+| `database`             | If `service_name` is not set   |          | If using, omit `service_name`.                                                                                                                                                                                                                                                    |
+| `service_name`         | If `database_alias` is not set |          | Oracle service name. If using, omit `database`.                                                                                                                                                                                                                                   |
+| `database_alias`       |                                |          | Alias to apply to database when ingesting.                                                                                                                                                                                                                                        |
+| `env`                  |                                | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                                                                                                                                                           |
+| `options.<option>`     |                                |          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                                                                                           |
+| `table_pattern.allow`  |                                |          | Regex pattern for tables to include in ingestion.                                                                                                                                                                                                                                 |
+| `table_pattern.deny`   |                                |          | Regex pattern for tables to exclude from ingestion.                                                                                                                                                                                                                               |
+| `schema_pattern.allow` |                                |          | Regex pattern for schemas to include in ingestion.                                                                                                                                                                                                                                |
+| `schema_pattern.deny`  |                                |          | Regex pattern for schemas to exclude from ingestion.                                                                                                                                                                                                                              |
+| `view_pattern.allow`   |                                |          | Regex pattern for views to include in ingestion.                                                                                                                                                                                                                                  |
+| `view_pattern.deny`    |                                |          | Regex pattern for views to exclude from ingestion.                                                                                                                                                                                                                                |
+| `include_tables`       |                                | `True`   | Whether tables should be ingested.                                                                                                                                                                                                                                                |
+| `include_views`        |                                | `True`   | Whether views should be ingested.                                                                                                                                                                                                                                                 |
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 69794aae321cd..08cb7b4f8b8f9 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[postgres]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Databases, schema, and tables and associated metadata
 - Column types associated with each table
 - Also supports PostGIS extensions
 - database_alias (optional) can be used to change the name of database to be ingested
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index b324917728d64..281e365e79050 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Databases, schema, and tables and associated metadata
 - Column types associated with each table
 - Also supports PostGIS extensions
 
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 4448bc9937efc..b212ea939774a 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -67,9 +67,8 @@ To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
 
 This plugin extracts the following:
 
-- Fetch a list of queries issued
-- Fetch a list of tables and columns accessed (excludes views)
-- Aggregate these statistics into buckets, by day or hour granularity
+- Statistics on queries issued and tables and columns accessed (excludes views)
+- Aggregation of these statistics into buckets, by day or hour granularity
 
 Note: the user/role must have access to the account usage table. The "accountadmin" role has this by default, and other roles can be [granted this permission](https://docs.snowflake.com/en/sql-reference/account-usage.html#enabling-account-usage-for-other-roles).
 
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 2ad8df370be61..a2ee8bfc98113 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -20,8 +20,8 @@ of the profiling runs.
 
 Extracts:
 
-- row and column counts for each table
-- for each column, if applicable:
+- Row and column counts for each table
+- For each column, if applicable:
   - null counts and proportions
   - distinct counts and proportions
   - minimum, maximum, mean, median, standard deviation, some quantile values
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 019cdedab431b..987aac29ac694 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -10,7 +10,7 @@ See documentation for superset's `/security/login` at https://superset.apache.or
 
 This plugin extracts the following:
 
-- List of charts and dashboards
+- Charts, dashboards, and associated metadata
 
 ## Quickstart recipe
 

From aa608b6daa60fcc4e02f75f8c9dc3bc031733eaf Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Fri, 6 Aug 2021 14:04:24 -0400
Subject: [PATCH 28/33] PR fixes

---
 metadata-ingestion/sink_docs/console.md       |  6 ++++++
 metadata-ingestion/sink_docs/datahub.md       | 21 ++++++++++++++-----
 metadata-ingestion/sink_docs/file.md          |  6 ++++++
 metadata-ingestion/source_docs/athena.md      |  6 +++++-
 metadata-ingestion/source_docs/bigquery.md    | 11 +++++++++-
 metadata-ingestion/source_docs/dbt.md         |  6 +++++-
 metadata-ingestion/source_docs/druid.md       |  4 ++++
 metadata-ingestion/source_docs/feast.md       |  4 ++++
 metadata-ingestion/source_docs/file.md        |  4 ++++
 metadata-ingestion/source_docs/glue.md        |  4 ++++
 metadata-ingestion/source_docs/hive.md        |  6 +++++-
 .../source_docs/kafka-connect.md              |  4 ++++
 metadata-ingestion/source_docs/kafka.md       |  4 ++++
 metadata-ingestion/source_docs/ldap.md        |  4 ++++
 metadata-ingestion/source_docs/looker.md      |  4 ++++
 metadata-ingestion/source_docs/lookml.md      |  4 ++++
 metadata-ingestion/source_docs/mongodb.md     |  4 ++++
 metadata-ingestion/source_docs/mssql.md       |  6 +++++-
 metadata-ingestion/source_docs/mysql.md       |  6 +++++-
 metadata-ingestion/source_docs/oracle.md      |  6 +++++-
 metadata-ingestion/source_docs/postgres.md    |  6 +++++-
 metadata-ingestion/source_docs/redshift.md    |  6 +++++-
 metadata-ingestion/source_docs/sagemaker.md   |  4 ++++
 metadata-ingestion/source_docs/snowflake.md   | 10 ++++++++-
 .../source_docs/sql_profiles.md               |  4 ++++
 metadata-ingestion/source_docs/sqlalchemy.md  |  6 +++++-
 metadata-ingestion/source_docs/superset.md    |  4 ++++
 27 files changed, 144 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index 9ba4d50c2b5d9..8d7426ceb9a24 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -13,6 +13,8 @@ Simply prints each metadata event to stdout. Useful for experimentation and debu
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
+source:
+  # source configs
 sink:
   type: "console"
 ```
@@ -21,6 +23,10 @@ sink:
 
 None!
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index d2dba39aad249..c701513eccd08 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -16,6 +16,8 @@ is that any errors can immediately be reported.
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
+source:
+  # source configs
 sink:
   type: "datahub-rest"
   config:
@@ -28,7 +30,11 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field    | Required | Default | Description                  |
 | -------- | -------- | ------- | ---------------------------- |
-| `server` |          |         | URL of DataHub GMS endpoint. |
+| `server` | ✅       |         | URL of DataHub GMS endpoint. |
+
+### Compatibility
+
+Coming soon!
 
 ## DataHub Kafka
 
@@ -39,14 +45,15 @@ To install this plugin, run `pip install 'acryl-datahub[datahub-kafka]'`.
 ### Capabilities
 
 Pushes metadata to DataHub by publishing messages to Kafka. The advantage of the Kafka-based
-interface is that it's asynchronous and can handle higher throughput. This requires the
-DataHub mce-consumer container to be running.
+interface is that it's asynchronous and can handle higher throughput.
 
 ### Quickstart recipe
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
+source:
+  # source configs
 sink:
   type: "datahub-kafka"
   config:
@@ -61,15 +68,19 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                                        | Required | Default | Description                                                                                                                                              |
 | -------------------------------------------- | -------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `connection.bootstrap`                       |          |         | Kafka bootstrap URL.                                                                                                                                     |
+| `connection.bootstrap`                       | ✅       |         | Kafka bootstrap URL.                                                                                                                                     |
 | `connection.producer_config.<option>`        |          |         | Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer                  |
-| `connection.schema_registry_url`             |          |         | URL of schema registry being used.                                                                                                                       |
+| `connection.schema_registry_url`             | ✅       |         | URL of schema registry being used.                                                                                                                       |
 | `connection.schema_registry_config.<option>` |          |         | Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient |
 
 The options in the producer config and schema registry config are passed to the Kafka SerializingProducer and SchemaRegistryClient respectively.
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
 
+### Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 4054ba76dcd82..27635d47e0eb9 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -15,6 +15,8 @@ Note that the [file source](../source_docs/file.md) can read files generated by
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
 ```yml
+source:
+  # source configs
 sink:
   type: file
   config:
@@ -29,6 +31,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | -------- | -------- | ------- | ------------------------- |
 | filename | ✅       |         | Path to file to write to. |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 1dd251637a246..2f9a02f4778d3 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[athena]'`.
 
 This plugin extracts the following:
 
-- Database and table metadata
+- Metadata for databases, schemas, and tables
 - Column types associated with each table
 
 ## Quickstart recipe
@@ -52,6 +52,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `view_pattern.deny`    |          |              | Regex pattern for views to exclude from ingestion.                                                                                                                                                         |
 | `include_tables`       |          | `True`       | Whether tables should be ingested.                                                                                                                                                                         |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index f3c14d0955984..384ad4067fddf 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -46,6 +46,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`       | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`       | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 # BigQuery Usage Stats
 
 ## Setup
@@ -93,12 +97,13 @@ source:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
+By default, we extract usage stats for the last day, with the recommendation that this source is executed every day.
+
 | Field                  | Required | Default                                                        | Description                                                                                                                                                                                                                                                                                                                                                                            |
 | ---------------------- | -------- | -------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `projects`             |          |                                                                |                                                                                                                                                                                                                                                                                                                                                                                        |
 | `extra_client_options` |          |                                                                |                                                                                                                                                                                                                                                                                                                                                                                        |
 | `env`                  |          | `"PROD"`                                                       | Environment to use in namespace when constructing URNs.                                                                                                                                                                                                                                                                                                                                |
-| `bucket_duration`      |          | `"DAY"`                                                        | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`.                                                                                                                                                                                                                                                                                                                        |
 | `start_time`           |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider.                                                                                                                                                                                                                                                                                                                                               |
 | `end_time`             |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                                                                                                                                                                                                                                                                                                                                                 |
 | `top_n_queries`        |          | `10`                                                           | Number of top queries to save to each table.                                                                                                                                                                                                                                                                                                                                           |
@@ -106,6 +111,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `query_log_deplay`     |          |                                                                | To account for the possibility that the query event arrives after the read event in the audit logs, we wait for at least `query_log_delay` additional events to be processed before attempting to resolve BigQuery job information from the logs. If `query_log_delay` is `None`, it gets treated as an unlimited delay, which prioritizes correctness at the expense of memory usage. |
 | `max_query_duration`   |          | `15`                                                           | Correction to pad `start_time` and `end_time` with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time.                                                                                                                                                                            |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index b7f7439a96e74..ff096ee3e0e48 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -39,7 +39,7 @@ source:
 
     target_platform: "my_target_platform_id"
 
-    load_schemas: True
+    load_schemas: True # note: if this is disabled
 ```
 
 ## Config details
@@ -59,6 +59,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 Note: when `load_schemas` is False, models that use [identifiers](https://docs.getdbt.com/reference/resource-properties/identifier) to reference their source tables are ingested using the model identifier as the model name to preserve the lineage.
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 69c55131c50c9..f496f0f4ab1cd 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -49,6 +49,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`                  | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`                  | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 882d3369babd0..7afbec269885f 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -39,6 +39,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `env`             |          | `"PROD"`           | Environment to use in namespace when constructing URNs. |
 | `use_local_build` |          | `False`            | Whether to build Feast ingestion Docker image locally.  |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 7d7d2b3e394d4..4f44f11d82ad7 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -29,6 +29,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | ---------- | -------- | ------- | ----------------------- |
 | `filename` | ✅       |         | Path to file to ingest. |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 4e013f4136589..bffd73970c263 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -45,6 +45,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `table_pattern.deny`     |          |                             | Regex pattern for tables to exclude from ingestion.                                |
 | `underlying_platform`    |          | Override for platform name. |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 62d937a8392be..a4a4d92645a17 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[hive]'`.
 
 This plugin extracts the following:
 
-- Metadata for databases, schema, and tables
+- Metadata for databases, schemas, and tables
 - Column types associated with each table
 - Detailed table and storage information
 
@@ -73,6 +73,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `view_pattern.deny`    |          |          | Regex pattern for views to exclude from ingestion.                                                                                                                                      |
 | `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index 5e93bad26db15..b9c698d2aec78 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -45,6 +45,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `connector_patterns.allow` |          |                            | Regex pattern for connectors to exclude from ingestion. |
 | `env`                      |          | `"PROD"`                   | Environment to use in namespace when constructing URNs. |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 1322eb7cce4fb..867d5f3e1c2ad 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -43,6 +43,10 @@ The options in the consumer config and schema registry config are passed to the
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 47d69e190132e..d80410c69cd40 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -45,6 +45,10 @@ The `drop_missing_first_last_name` should be set to true if you've got many "hea
 for devices or services should be excluded when they do not contain a first and last name. This will only
 impact the ingestion of LDAP users, while LDAP groups will be unaffected by this config option.
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index a9db0b88635f9..49bfe48b9c699 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -43,6 +43,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `chart_pattern.deny`      |          |                         | Regex pattern for charts to exclude from ingestion.                                                          |
 | `env`                     |          | `"PROD"`                | Environment to use in namespace when constructing URNs.                                                      |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index cb5db04bc40a4..8696393156258 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -48,6 +48,10 @@ views depends on. As these SQL's can be complicated, and the package doesn't off
 Looker supports, the result might not be correct. This parsing is disabled by default, but can be enabled by setting
 `parse_table_names_from_sql: True`.
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 26877367d874a..358496d69197a 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -53,6 +53,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `collection_pattern.allow` |          |                         | Regex pattern for collections to include in ingestion.                                                                   |
 | `collection_pattern.deny`  |          |                         | Regex pattern for collections to exclude from ingestion.                                                                 |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 1b09545901e50..52cbb2c25fbe1 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -10,7 +10,7 @@ We have two options for the underlying library used to connect to SQL Server: (1
 
 This plugin extracts the following:
 
-- Databases, schema, tables and views and associated metadata
+- Metadata for databases, schemas, views and tables
 - Column types associated with each table/view
 
 ## Quickstart recipe
@@ -77,6 +77,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`             | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`             | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 5ce085050b795..5bc6d74028ef0 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[mysql]'`.
 
 This plugin extracts the following:
 
-- List of databases and tables
+- Metadata for databases, schemas, and tables
 - Column types and schema associated with each table
 
 ## Quickstart recipe
@@ -49,6 +49,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`             | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`             | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 426ce0cb156b3..c83360d595833 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[oracle]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Metadata for databases, schemas, and tables
 - Column types associated with each table
 
 Using the Oracle source requires that you've also installed the correct drivers; see the [cx_Oracle docs](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html). The easiest one is the [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html).
@@ -56,6 +56,10 @@ Exactly one of `database` or `service_name` is required.
 | `include_tables`       |                                | `True`   | Whether tables should be ingested.                                                                                                                                                                                                                                                |
 | `include_views`        |                                | `True`   | Whether views should be ingested.                                                                                                                                                                                                                                                 |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 08cb7b4f8b8f9..040e9bedd687e 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[postgres]'`.
 
 This plugin extracts the following:
 
-- Databases, schema, and tables and associated metadata
+- Metadata for databases, schemas, views, and tables
 - Column types associated with each table
 - Also supports PostGIS extensions
 - database_alias (optional) can be used to change the name of database to be ingested
@@ -53,6 +53,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 281e365e79050..618f5e073c0b2 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
 
 This plugin extracts the following:
 
-- Databases, schema, and tables and associated metadata
+- Metadata for databases, schemas, views and tables
 - Column types associated with each table
 - Also supports PostGIS extensions
 
@@ -79,6 +79,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 4e283421b47af..d304d390ff376 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -45,6 +45,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `extract_jobs.training`               |          | `True`       | Whether to extract training jobs.                                                  |
 | `extract_jobs.transform`              |          | `True`       | Whether to extract transform jobs.                                                 |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index b212ea939774a..4be31b7c85ad5 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -8,7 +8,7 @@ To install this plugin, run `pip install 'acryl-datahub[snowflake]'`.
 
 This plugin extracts the following:
 
-- List of databases, schema, and tables
+- Metadata for databases, schemas, views and tables
 - Column types associated with each table
 
 :::tip
@@ -57,6 +57,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`         |          | `True`                                                               | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`          |          | `True`                                                               | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 # Snowflake Usage Stats
 
 ## Setup
@@ -117,6 +121,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `end_time`        |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                          |
 | `top_n_queries`   |          | `10`                                                           | Number of top queries to save to each table.                    |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index a2ee8bfc98113..671019dc0af31 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -66,6 +66,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `profile_pattern.allow` |          |         | Regex pattern for tables to profile.                                    |
 | `profile_pattern.deny`  |          |         | Regex pattern for tables to not profile.                                |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 35624690f16dc..721f3f4306567 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -12,7 +12,7 @@ defined elsewhere. In order to use this, you must `pip install` the required dia
 
 This plugin extracts the following:
 
-- List of schemas and tables
+- Metadata for databases, schemas, views, and tables
 - Column types associated with each table
 
 ## Quickstart recipe
@@ -45,6 +45,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `include_tables`       |          | `True`   | Whether tables should be ingested.                                                                                                                                                      |
 | `include_views`        |          | `True`   | Whether views should be ingested.                                                                                                                                                       |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 987aac29ac694..27bade48b79c0 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -39,6 +39,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `provider`    |          | `"db"`             | Superset provider.                                      |
 | `env`         |          | `"PROD"`           | Environment to use in namespace when constructing URNs. |
 
+## Compatibility
+
+Coming soon!
+
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!

From f4293248fd3dc82f726c55163bc58dd20d516e16 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Fri, 6 Aug 2021 14:07:58 -0400
Subject: [PATCH 29/33] Add link back to main readme

---
 metadata-ingestion/sink_docs/console.md         | 2 ++
 metadata-ingestion/sink_docs/datahub.md         | 4 ++++
 metadata-ingestion/sink_docs/file.md            | 2 ++
 metadata-ingestion/source_docs/athena.md        | 4 ++++
 metadata-ingestion/source_docs/bigquery.md      | 4 ++++
 metadata-ingestion/source_docs/dbt.md           | 2 ++
 metadata-ingestion/source_docs/druid.md         | 2 ++
 metadata-ingestion/source_docs/feast.md         | 2 ++
 metadata-ingestion/source_docs/file.md          | 2 ++
 metadata-ingestion/source_docs/glue.md          | 2 ++
 metadata-ingestion/source_docs/hive.md          | 2 ++
 metadata-ingestion/source_docs/kafka-connect.md | 2 ++
 metadata-ingestion/source_docs/kafka.md         | 2 ++
 metadata-ingestion/source_docs/ldap.md          | 2 ++
 metadata-ingestion/source_docs/looker.md        | 2 ++
 metadata-ingestion/source_docs/lookml.md        | 2 ++
 metadata-ingestion/source_docs/mongodb.md       | 2 ++
 metadata-ingestion/source_docs/mssql.md         | 2 ++
 metadata-ingestion/source_docs/mysql.md         | 2 ++
 metadata-ingestion/source_docs/oracle.md        | 2 ++
 metadata-ingestion/source_docs/postgres.md      | 2 ++
 metadata-ingestion/source_docs/redshift.md      | 2 ++
 metadata-ingestion/source_docs/sagemaker.md     | 2 ++
 metadata-ingestion/source_docs/snowflake.md     | 4 ++++
 metadata-ingestion/source_docs/sql_profiles.md  | 2 ++
 metadata-ingestion/source_docs/sqlalchemy.md    | 2 ++
 metadata-ingestion/source_docs/superset.md      | 2 ++
 27 files changed, 62 insertions(+)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index 8d7426ceb9a24..dba0be4110712 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -1,5 +1,7 @@
 # Console
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 Works with `acryl-datahub` out of the box.
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index c701513eccd08..e94dc7d37499a 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -2,6 +2,8 @@
 
 ## DataHub Rest
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ### Setup
 
 To install this plugin, run `pip install 'acryl-datahub[datahub-rest]'`.
@@ -38,6 +40,8 @@ Coming soon!
 
 ## DataHub Kafka
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ### Setup
 
 To install this plugin, run `pip install 'acryl-datahub[datahub-kafka]'`.
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 27635d47e0eb9..883a19973c78b 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -1,5 +1,7 @@
 # File
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 Works with `acryl-datahub` out of the box.
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 2f9a02f4778d3..8b871062feb9e 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -1,5 +1,9 @@
 # Athena
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[athena]'`.
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 384ad4067fddf..90c020f3e47d0 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -1,5 +1,7 @@
 # BigQuery
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery]'`.
@@ -52,6 +54,8 @@ Coming soon!
 
 # BigQuery Usage Stats
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index ff096ee3e0e48..07d5536829753 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -1,5 +1,7 @@
 # dbt
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 Works with `acryl-datahub` out of the box.
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index f496f0f4ab1cd..11c612af28efa 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -1,5 +1,7 @@
 # Druid
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[druid]'`.
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 7afbec269885f..103209525f7a9 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -1,5 +1,7 @@
 # Feast
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 **Note: Feast ingestion requires Docker to be installed.**
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 4f44f11d82ad7..72f97991a40cc 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -1,5 +1,7 @@
 # File
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 Works with `acryl-datahub` out of the box.
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index bffd73970c263..780183f6095aa 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -1,5 +1,7 @@
 # Glue
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[glue]'`.
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index a4a4d92645a17..49a6102fc1cc1 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -1,5 +1,7 @@
 # Hive
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[hive]'`.
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index b9c698d2aec78..90daad31481c0 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -1,5 +1,7 @@
 # Kafka Connect
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[kafka-connect]'`.
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 867d5f3e1c2ad..17229d03358a9 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -1,5 +1,7 @@
 # Kafka Metadata
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[kafka]'`.
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index d80410c69cd40..0550cf59b5bfa 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -1,5 +1,7 @@
 # LDAP
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[ldap]'`.
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 49bfe48b9c699..cfbfa5c4e20ca 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -1,5 +1,7 @@
 # Looker dashboards
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[looker]'`.
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 8696393156258..17c2f82b96b02 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -1,5 +1,7 @@
 # LookML
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[lookml]'`.
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 358496d69197a..7f0ddc2c2105b 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -1,5 +1,7 @@
 # MongoDB
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[mongodb]'`.
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index 52cbb2c25fbe1..cb1404ad48d01 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -1,5 +1,7 @@
 # Microsoft SQL Server
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[mssql]'`.
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 5bc6d74028ef0..ae4a06fff5b9e 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -1,5 +1,7 @@
 # MySQL
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[mysql]'`.
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index c83360d595833..896792eb11a84 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -1,5 +1,7 @@
 # Oracle
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[oracle]'`.
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 040e9bedd687e..40cc5910aabeb 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -1,5 +1,7 @@
 # PostgreSQL
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[postgres]'`.
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 618f5e073c0b2..60363de6947cb 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -1,5 +1,7 @@
 # Redshift
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index d304d390ff376..4b1968b4aaafb 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -1,5 +1,7 @@
 # SageMaker
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[sagemaker]'`.
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 4be31b7c85ad5..dc5539de18b2d 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -1,5 +1,7 @@
 # Snowflake
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake]'`.
@@ -63,6 +65,8 @@ Coming soon!
 
 # Snowflake Usage Stats
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 671019dc0af31..88290318fdecb 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -1,5 +1,7 @@
 # SQL Profiles
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[sql-profiles]'`.
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 721f3f4306567..f670326917817 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -1,5 +1,7 @@
 # Other SQLAlchemy databases
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[sqlalchemy]'`.
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 27bade48b79c0..3bd3cd1735c3f 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -1,5 +1,7 @@
 # Superset
 
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[superset]'`.

From 5fbac7bc6989d3f2a4b23b6d4582a007e4843f59 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Fri, 6 Aug 2021 14:13:38 -0400
Subject: [PATCH 30/33] Add link back to recipe section

---
 metadata-ingestion/sink_docs/console.md         |  2 ++
 metadata-ingestion/sink_docs/datahub.md         |  4 ++++
 metadata-ingestion/sink_docs/file.md            |  2 ++
 metadata-ingestion/source_docs/athena.md        |  4 ++--
 metadata-ingestion/source_docs/bigquery.md      | 16 ++++++++++------
 metadata-ingestion/source_docs/dbt.md           |  2 ++
 metadata-ingestion/source_docs/druid.md         |  2 ++
 metadata-ingestion/source_docs/feast.md         |  2 ++
 metadata-ingestion/source_docs/file.md          |  2 ++
 metadata-ingestion/source_docs/glue.md          |  2 ++
 metadata-ingestion/source_docs/hive.md          |  2 ++
 metadata-ingestion/source_docs/kafka-connect.md |  2 ++
 metadata-ingestion/source_docs/kafka.md         |  2 ++
 metadata-ingestion/source_docs/ldap.md          |  2 ++
 metadata-ingestion/source_docs/looker.md        |  2 ++
 metadata-ingestion/source_docs/lookml.md        |  2 ++
 metadata-ingestion/source_docs/mongodb.md       |  2 ++
 metadata-ingestion/source_docs/mssql.md         |  2 ++
 metadata-ingestion/source_docs/mysql.md         |  2 ++
 metadata-ingestion/source_docs/oracle.md        |  2 ++
 metadata-ingestion/source_docs/postgres.md      |  2 ++
 metadata-ingestion/source_docs/redshift.md      |  2 ++
 metadata-ingestion/source_docs/sagemaker.md     |  2 ++
 metadata-ingestion/source_docs/snowflake.md     | 16 ++++++++++------
 metadata-ingestion/source_docs/sql_profiles.md  |  2 ++
 metadata-ingestion/source_docs/sqlalchemy.md    |  2 ++
 metadata-ingestion/source_docs/superset.md      |  2 ++
 27 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index dba0be4110712..74d38c3be3dc3 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -14,6 +14,8 @@ Simply prints each metadata event to stdout. Useful for experimentation and debu
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   # source configs
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index e94dc7d37499a..9b660a04986ca 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -17,6 +17,8 @@ is that any errors can immediately be reported.
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   # source configs
@@ -55,6 +57,8 @@ interface is that it's asynchronous and can handle higher throughput.
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   # source configs
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 883a19973c78b..5f88b4c286814 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -16,6 +16,8 @@ Note that the [file source](../source_docs/file.md) can read files generated by
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   # source configs
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 8b871062feb9e..c43fd8d9bdd46 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -2,8 +2,6 @@
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
 
-For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
-
 ## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[athena]'`.
@@ -19,6 +17,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: athena
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 90c020f3e47d0..4900a44930370 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -23,6 +23,8 @@ You can also get fine-grained usage statistics for BigQuery using the `bigquery-
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: bigquery
@@ -52,15 +54,15 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 Coming soon!
 
-# BigQuery Usage Stats
+## BigQuery Usage Stats
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
 
-## Setup
+### Setup
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
-## Capabilities
+### Capabilities
 
 This plugin extracts the following:
 
@@ -80,10 +82,12 @@ This source only does usage statistics. To get the tables, views, and schemas in
 
 :::
 
-## Quickstart recipe
+### Quickstart recipe
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: bigquery-usage
@@ -97,7 +101,7 @@ source:
     top_n_queries: 10
 ```
 
-## Config details
+### Config details
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
@@ -115,7 +119,7 @@ By default, we extract usage stats for the last day, with the recommendation tha
 | `query_log_deplay`     |          |                                                                | To account for the possibility that the query event arrives after the read event in the audit logs, we wait for at least `query_log_delay` additional events to be processed before attempting to resolve BigQuery job information from the logs. If `query_log_delay` is `None`, it gets treated as an unlimited delay, which prioritizes correctness at the expense of memory usage. |
 | `max_query_duration`   |          | `15`                                                           | Correction to pad `start_time` and `end_time` with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time.                                                                                                                                                                            |
 
-## Compatibility
+### Compatibility
 
 Coming soon!
 
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 07d5536829753..b329069be4c3d 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -31,6 +31,8 @@ This plugin pulls metadata from dbt's artifact files:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "dbt"
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 11c612af28efa..70ea9a98d4cc2 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -19,6 +19,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: druid
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 103209525f7a9..4ab859e7e4337 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -24,6 +24,8 @@ parsed to DataHub's native objects. This separation was performed because of a d
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: feast
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 72f97991a40cc..4b22bd274e948 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -16,6 +16,8 @@ can produce such files, and a number of samples are included in the
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: file
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index 780183f6095aa..b97324d39da39 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -21,6 +21,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: glue
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 49a6102fc1cc1..e4b8c41358931 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -18,6 +18,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: hive
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index 90daad31481c0..d4adceb705e0e 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -22,6 +22,8 @@ Current limitations:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "kafka-connect"
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 17229d03358a9..e3309a849ff4e 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -17,6 +17,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "kafka"
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 0550cf59b5bfa..03653c1df4e1e 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -18,6 +18,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "ldap"
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index cfbfa5c4e20ca..53f57edfa7246 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -19,6 +19,8 @@ See the [Looker authentication docs](https://docs.looker.com/reference/api-and-i
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "looker"
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 17c2f82b96b02..ba5ba543e1510 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -19,6 +19,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "lookml"
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 7f0ddc2c2105b..89fe56dff37d8 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -22,6 +22,8 @@ Note that `schemaSamplingSize` has no effect if `enableSchemaInference: False` i
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: "mongodb"
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index cb1404ad48d01..d3a99dba0c662 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -19,6 +19,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: mssql
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index ae4a06fff5b9e..074bc97df8000 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -17,6 +17,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: mysql
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 896792eb11a84..f0290d760bfa0 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -19,6 +19,8 @@ Using the Oracle source requires that you've also installed the correct drivers;
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: oracle
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 40cc5910aabeb..e1f288a23550e 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -19,6 +19,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: postgres
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 60363de6947cb..18874bc590830 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -18,6 +18,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: redshift
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 4b1968b4aaafb..26fad7ca0d1a7 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -17,6 +17,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: sagemaker
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index dc5539de18b2d..238997efcc308 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -23,6 +23,8 @@ You can also get fine-grained usage statistics for Snowflake using the `snowflak
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: snowflake
@@ -63,15 +65,15 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 Coming soon!
 
-# Snowflake Usage Stats
+## Snowflake Usage Stats
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
 
-## Setup
+### Setup
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
 
-## Capabilities
+### Capabilities
 
 This plugin extracts the following:
 
@@ -88,10 +90,12 @@ This source only does usage statistics. To get the tables, views, and schemas in
 
 :::
 
-## Quickstart recipe
+### Quickstart recipe
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: snowflake-usage
@@ -108,7 +112,7 @@ source:
     top_n_queries: 10
 ```
 
-## Config details
+### Config details
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
@@ -125,7 +129,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `end_time`        |          | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider.                          |
 | `top_n_queries`   |          | `10`                                                           | Number of top queries to save to each table.                    |
 
-## Compatibility
+### Compatibility
 
 Coming soon!
 
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 88290318fdecb..e3490df90c1b3 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -47,6 +47,8 @@ Supported SQL sources:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: <sql-source> # can be bigquery, snowflake, etc - see above for the list
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index f670326917817..10779aaa2620c 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -21,6 +21,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: sqlalchemy
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 3bd3cd1735c3f..6a5e699be7c5e 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -18,6 +18,8 @@ This plugin extracts the following:
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
 ```yml
 source:
   type: superset

From 387137f9888feb39027ce841ee5590897638d9f3 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Fri, 6 Aug 2021 14:15:25 -0400
Subject: [PATCH 31/33] Add sink config placeholder

---
 metadata-ingestion/source_docs/athena.md        | 2 ++
 metadata-ingestion/source_docs/bigquery.md      | 4 ++++
 metadata-ingestion/source_docs/dbt.md           | 2 ++
 metadata-ingestion/source_docs/druid.md         | 2 ++
 metadata-ingestion/source_docs/feast.md         | 2 ++
 metadata-ingestion/source_docs/file.md          | 2 ++
 metadata-ingestion/source_docs/glue.md          | 2 ++
 metadata-ingestion/source_docs/kafka-connect.md | 2 ++
 metadata-ingestion/source_docs/kafka.md         | 2 ++
 metadata-ingestion/source_docs/ldap.md          | 2 ++
 metadata-ingestion/source_docs/looker.md        | 2 ++
 metadata-ingestion/source_docs/lookml.md        | 2 ++
 metadata-ingestion/source_docs/mongodb.md       | 2 ++
 metadata-ingestion/source_docs/mysql.md         | 2 ++
 metadata-ingestion/source_docs/oracle.md        | 2 ++
 metadata-ingestion/source_docs/postgres.md      | 2 ++
 metadata-ingestion/source_docs/sagemaker.md     | 2 ++
 metadata-ingestion/source_docs/snowflake.md     | 4 ++++
 metadata-ingestion/source_docs/sql_profiles.md  | 2 ++
 metadata-ingestion/source_docs/sqlalchemy.md    | 2 ++
 metadata-ingestion/source_docs/superset.md      | 2 ++
 21 files changed, 46 insertions(+)

diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index c43fd8d9bdd46..588870814fd71 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -32,6 +32,8 @@ source:
     s3_staging_dir: "s3://<bucket-name>/<folder>/"
 
     work_group: my_work_group
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 4900a44930370..4df96963e8563 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -30,6 +30,8 @@ source:
   type: bigquery
   config:
     project_id: my_project_id
+sink:
+  # sink configs
 ```
 
 ## Config details
@@ -99,6 +101,8 @@ source:
     bucket_duration: "DAY"
 
     top_n_queries: 10
+sink:
+  # sink configs
 ```
 
 ### Config details
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index b329069be4c3d..40f923c885d51 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -44,6 +44,8 @@ source:
     target_platform: "my_target_platform_id"
 
     load_schemas: True # note: if this is disabled
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 70ea9a98d4cc2..333c978f22e15 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -29,6 +29,8 @@ source:
 
     username: admin
     password: password
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 4ab859e7e4337..0194b1fbc6018 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -31,6 +31,8 @@ source:
   type: feast
   config:
     core_url: "localhost:6565"
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index 4b22bd274e948..b96304c1d7de2 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -23,6 +23,8 @@ source:
   type: file
   config:
     filename: ./path/to/mce/file.json
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index b97324d39da39..aedc43de370ce 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -28,6 +28,8 @@ source:
   type: glue
   config:
     aws_region: "my-aws-region"
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index d4adceb705e0e..23ce07669734e 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -33,6 +33,8 @@ source:
 
     username: admin
     password: password
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index e3309a849ff4e..7547ae67c5104 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -27,6 +27,8 @@ source:
       bootstrap: "broker:9092"
 
       schema_registry_url: http://localhost:8081
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 03653c1df4e1e..5cf5219f99784 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -29,6 +29,8 @@ source:
     ldap_password: "admin"
 
     base_dn: "dc=example,dc=org"
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 53f57edfa7246..76810c2a053c4 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -28,6 +28,8 @@ source:
     client_id: admin
     client_secret: password
     base_url: https://company.looker.com:19999
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index ba5ba543e1510..0dd0a42ec8360 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -29,6 +29,8 @@ source:
 
     connection_to_platform_map:
       connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 89fe56dff37d8..3cf914b988113 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -35,6 +35,8 @@ source:
 
     enableSchemaInference: True
     useRandomSampling: True
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 074bc97df8000..2604277eeff54 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -29,6 +29,8 @@ source:
     database: dbname
 
     host_port: localhost:3306
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index f0290d760bfa0..9de756e109069 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -33,6 +33,8 @@ source:
     database: dbname
 
     service_name: svc # omit database if using this option
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index e1f288a23550e..0f2a1fd9e4b55 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -33,6 +33,8 @@ source:
     database: DemoDatabase
 
     database_alias: DatabaseNameToBeIngested
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 26fad7ca0d1a7..31c8e944fca1d 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -24,6 +24,8 @@ source:
   type: sagemaker
   config:
     aws_region: "my-aws-region"
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 238997efcc308..d0f0d384c06c1 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -35,6 +35,8 @@ source:
 
     warehouse: "COMPUTE_WH"
     role: "sysadmin"
+sink:
+  # sink configs
 ```
 
 ## Config details
@@ -110,6 +112,8 @@ source:
     bucket_duration: "DAY"
 
     top_n_queries: 10
+sink:
+  # sink configs
 ```
 
 ### Config details
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index e3490df90c1b3..07f676a48ddbd 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -56,6 +56,8 @@ source:
     # ... any other source-specific options ...
     profiling:
       enabled: true
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index 10779aaa2620c..a38e0db7f926f 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -28,6 +28,8 @@ source:
   type: sqlalchemy
   config:
     connect_uri: "dialect+driver://username:password@host:port/database"
+sink:
+  # sink configs
 ```
 
 ## Config details
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index 6a5e699be7c5e..e34eaefbd3241 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -29,6 +29,8 @@ source:
     username: user
     password: pass
     provider: ldap
+sink:
+  # sink configs
 ```
 
 ## Config details

From 34d6c570347a233964aad3935d88ac7aafa7f843 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Fri, 6 Aug 2021 14:26:05 -0400
Subject: [PATCH 32/33] Categories

---
 metadata-ingestion/sink_docs/console.md       |  1 +
 metadata-ingestion/sink_docs/datahub.md       |  1 +
 metadata-ingestion/sink_docs/file.md          |  1 +
 metadata-ingestion/source_docs/athena.md      |  9 +++++---
 metadata-ingestion/source_docs/bigquery.md    |  7 ++++--
 metadata-ingestion/source_docs/dbt.md         |  4 +++-
 metadata-ingestion/source_docs/druid.md       |  3 +++
 metadata-ingestion/source_docs/feast.md       |  2 ++
 metadata-ingestion/source_docs/file.md        |  2 ++
 metadata-ingestion/source_docs/glue.md        |  2 ++
 metadata-ingestion/source_docs/hive.md        | 10 ++++++++
 .../source_docs/kafka-connect.md              |  3 +++
 metadata-ingestion/source_docs/kafka.md       |  2 ++
 metadata-ingestion/source_docs/ldap.md        |  5 ++++
 metadata-ingestion/source_docs/looker.md      |  6 ++++-
 metadata-ingestion/source_docs/lookml.md      |  3 +++
 metadata-ingestion/source_docs/mongodb.md     |  5 ++++
 metadata-ingestion/source_docs/mssql.md       | 23 ++++++++++++++-----
 metadata-ingestion/source_docs/mysql.md       |  8 ++++---
 metadata-ingestion/source_docs/oracle.md      | 11 +++++----
 metadata-ingestion/source_docs/postgres.md    | 11 +++++----
 metadata-ingestion/source_docs/redshift.md    | 19 +++++++++------
 metadata-ingestion/source_docs/sagemaker.md   |  2 ++
 metadata-ingestion/source_docs/snowflake.md   | 21 ++++++++++-------
 .../source_docs/sql_profiles.md               |  3 +++
 metadata-ingestion/source_docs/sqlalchemy.md  |  2 ++
 metadata-ingestion/source_docs/superset.md    |  3 +++
 27 files changed, 130 insertions(+), 39 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index 74d38c3be3dc3..1260048a3ba7b 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -19,6 +19,7 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 ```yml
 source:
   # source configs
+
 sink:
   type: "console"
 ```
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index 9b660a04986ca..b034b904ee0db 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -62,6 +62,7 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 ```yml
 source:
   # source configs
+
 sink:
   type: "datahub-kafka"
   config:
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 5f88b4c286814..0f488f43761bb 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -21,6 +21,7 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 ```yml
 source:
   # source configs
+
 sink:
   type: file
   config:
diff --git a/metadata-ingestion/source_docs/athena.md b/metadata-ingestion/source_docs/athena.md
index 588870814fd71..2c7373ee70bac 100644
--- a/metadata-ingestion/source_docs/athena.md
+++ b/metadata-ingestion/source_docs/athena.md
@@ -23,15 +23,18 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: athena
   config:
+    # Coordinates
+    aws_region: my_aws_region_name
+    work_group: my_work_group
+
+    # Credentials
     username: my_aws_access_key_id
     password: my_aws_secret_access_key
     database: my_database
 
-    aws_region: my_aws_region_name
-
+    # Options
     s3_staging_dir: "s3://<bucket-name>/<folder>/"
 
-    work_group: my_work_group
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 4df96963e8563..9e34e9b517282 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -29,7 +29,9 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: bigquery
   config:
+    # Coordinates
     project_id: my_project_id
+
 sink:
   # sink configs
 ```
@@ -94,13 +96,14 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: bigquery-usage
   config:
+    # Coordinates
     projects:
       - project_id_1
       - project_id_2
 
-    bucket_duration: "DAY"
-
+    # Options
     top_n_queries: 10
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 40f923c885d51..ba9d99892786c 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -37,13 +37,15 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "dbt"
   config:
+    # Coordinates
     manifest_path: "./path/dbt/manifest_file.json"
     catalog_path: "./path/dbt/catalog_file.json"
     sources_path: "./path/dbt/sources_file.json"
 
+    # Options
     target_platform: "my_target_platform_id"
-
     load_schemas: True # note: if this is disabled
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/druid.md b/metadata-ingestion/source_docs/druid.md
index 333c978f22e15..51e819b6d4ba8 100644
--- a/metadata-ingestion/source_docs/druid.md
+++ b/metadata-ingestion/source_docs/druid.md
@@ -25,10 +25,13 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: druid
   config:
+    # Coordinates
     host_port: "localhost:8082"
 
+    # Credentials
     username: admin
     password: password
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/feast.md b/metadata-ingestion/source_docs/feast.md
index 0194b1fbc6018..7758f0227ca1b 100644
--- a/metadata-ingestion/source_docs/feast.md
+++ b/metadata-ingestion/source_docs/feast.md
@@ -30,7 +30,9 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: feast
   config:
+    # Coordinates
     core_url: "localhost:6565"
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/file.md b/metadata-ingestion/source_docs/file.md
index b96304c1d7de2..3420f41a739d6 100644
--- a/metadata-ingestion/source_docs/file.md
+++ b/metadata-ingestion/source_docs/file.md
@@ -22,7 +22,9 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: file
   config:
+    # Coordinates
     filename: ./path/to/mce/file.json
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md
index aedc43de370ce..1d7ceeebd9fd3 100644
--- a/metadata-ingestion/source_docs/glue.md
+++ b/metadata-ingestion/source_docs/glue.md
@@ -27,7 +27,9 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: glue
   config:
+    # Coordinates
     aws_region: "my-aws-region"
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index e4b8c41358931..45366578e0490 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -34,6 +34,9 @@ source:
     password: pass # optional
     host_port: localhost:10000
     database: DemoDatabase # optional, if not specified, ingests from all databases
+
+sink:
+  # sink configs
 ```
 
 <details>
@@ -44,14 +47,21 @@ source:
 source:
   type: hive
   config:
+    # Coordinates
     host_port: <cluster_name>.azurehdinsight.net:443
+
+    # Credentials
     username: admin
     password: password
 
+    # Options
     options:
       connect_args:
         http_path: "/hive2"
         auth: BASIC
+
+sink:
+  # sink configs
 ```
 
 </details>
diff --git a/metadata-ingestion/source_docs/kafka-connect.md b/metadata-ingestion/source_docs/kafka-connect.md
index 23ce07669734e..1b1a618d0a2fd 100644
--- a/metadata-ingestion/source_docs/kafka-connect.md
+++ b/metadata-ingestion/source_docs/kafka-connect.md
@@ -28,11 +28,14 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "kafka-connect"
   config:
+    # Coordinates
     connect_uri: "http://localhost:8083"
     cluster_name: "connect-cluster"
 
+    # Credentials
     username: admin
     password: password
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/kafka.md b/metadata-ingestion/source_docs/kafka.md
index 7547ae67c5104..ca5d6c9e99558 100644
--- a/metadata-ingestion/source_docs/kafka.md
+++ b/metadata-ingestion/source_docs/kafka.md
@@ -23,10 +23,12 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "kafka"
   config:
+    # Coordinates
     connection:
       bootstrap: "broker:9092"
 
       schema_registry_url: http://localhost:8081
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/ldap.md b/metadata-ingestion/source_docs/ldap.md
index 5cf5219f99784..b1644ec22319d 100644
--- a/metadata-ingestion/source_docs/ldap.md
+++ b/metadata-ingestion/source_docs/ldap.md
@@ -24,11 +24,16 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "ldap"
   config:
+    # Coordinates
     ldap_server: ldap://localhost
+
+    # Credentials
     ldap_user: "cn=admin,dc=example,dc=org"
     ldap_password: "admin"
 
+    # Options
     base_dn: "dc=example,dc=org"
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md
index 76810c2a053c4..5a73aade94ff8 100644
--- a/metadata-ingestion/source_docs/looker.md
+++ b/metadata-ingestion/source_docs/looker.md
@@ -25,9 +25,13 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "looker"
   config:
+    # Coordinates
+    base_url: https://company.looker.com:19999
+
+    # Credentials
     client_id: admin
     client_secret: password
-    base_url: https://company.looker.com:19999
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/lookml.md b/metadata-ingestion/source_docs/lookml.md
index 0dd0a42ec8360..eb1535ec0872d 100644
--- a/metadata-ingestion/source_docs/lookml.md
+++ b/metadata-ingestion/source_docs/lookml.md
@@ -25,10 +25,13 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "lookml"
   config:
+    # Coordinates
     base_folder: /path/to/model/files
 
+    # Options
     connection_to_platform_map:
       connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/mongodb.md b/metadata-ingestion/source_docs/mongodb.md
index 3cf914b988113..1cf8cad64737d 100644
--- a/metadata-ingestion/source_docs/mongodb.md
+++ b/metadata-ingestion/source_docs/mongodb.md
@@ -28,13 +28,18 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: "mongodb"
   config:
+    # Coordinates
     connect_uri: "mongodb://localhost"
+
+    # Credentials
     username: admin
     password: password
     authMechanism: "DEFAULT"
 
+    # Options
     enableSchemaInference: True
     useRandomSampling: True
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/mssql.md b/metadata-ingestion/source_docs/mssql.md
index d3a99dba0c662..d3038ed08361a 100644
--- a/metadata-ingestion/source_docs/mssql.md
+++ b/metadata-ingestion/source_docs/mssql.md
@@ -25,10 +25,16 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: mssql
   config:
-    username: user
-    password: pass
+    # Coordinates
     host_port: localhost:1433
     database: DemoDatabase
+
+    # Credentials
+    username: user
+    password: pass
+
+sink:
+  # sink configs
 ```
 
 <details>
@@ -41,18 +47,23 @@ See https://docs.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-
 source:
   type: mssql
   config:
-    username: admin
-    password: password
-
+    # Coordinates
     host_port: localhost:1433
-
     database: DemoDatabase
 
+    # Credentials
+    username: admin
+    password: password
+
+    # Options
     uri_args:
       driver: "ODBC Driver 17 for SQL Server"
       Encrypt: "yes"
       TrustServerCertificate: "Yes"
       ssl: "True"
+
+sink:
+  # sink configs
 ```
 
 </details>
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 2604277eeff54..1e43c4420a93b 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -23,12 +23,14 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: mysql
   config:
+    # Coordinates
+    host_port: localhost:3306
+    database: dbname
+
+    # Credentials
     username: root
     password: example
 
-    database: dbname
-
-    host_port: localhost:3306
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/oracle.md b/metadata-ingestion/source_docs/oracle.md
index 9de756e109069..8d7a2d1f5fa39 100644
--- a/metadata-ingestion/source_docs/oracle.md
+++ b/metadata-ingestion/source_docs/oracle.md
@@ -25,14 +25,17 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: oracle
   config:
-    username: user
-    password: pass
-
+    # Coordinates
     host_port: localhost:5432
-
     database: dbname
 
+    # Credentials
+    username: user
+    password: pass
+
+    # Options
     service_name: svc # omit database if using this option
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/postgres.md b/metadata-ingestion/source_docs/postgres.md
index 0f2a1fd9e4b55..63cca13ec85f0 100644
--- a/metadata-ingestion/source_docs/postgres.md
+++ b/metadata-ingestion/source_docs/postgres.md
@@ -25,14 +25,17 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: postgres
   config:
-    username: user
-    password: pass
-
+    # Coordinates
     host_port: localhost:5432
-
     database: DemoDatabase
 
+    # Credentials
+    username: user
+    password: pass
+
+    # Options
     database_alias: DatabaseNameToBeIngested
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 18874bc590830..c858e7dbf7277 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -24,21 +24,23 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: redshift
   config:
-    username: user
-    password: pass
+    # Coordinates
     host_port: example.something.us-west-2.redshift.amazonaws.com:5439
     database: DemoDatabase
 
-    # Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.
-    # See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.
-    # Many of these options are specific to the underlying database driver, so that library's
-    # documentation will be a good reference for what is supported. To find which dialect is likely
-    # in use, consult this table: https://docs.sqlalchemy.org/en/14/dialects/index.html.
+    # Credentials
+    username: user
+    password: pass
+
+    # Options
     options:
       # driver_option: some-option
 
     include_views: True # whether to include views, defaults to True
     include_tables: True # whether to include views, defaults to True
+
+sink:
+  # sink configs
 ```
 
 <details>
@@ -57,6 +59,9 @@ source:
       connect_args:
         sslmode: "prefer" # or "require" or "verify-ca"
         sslrootcert: ~ # needed to unpin the AWS Redshift certificate
+
+sink:
+  # sink configs
 ```
 
 </details>
diff --git a/metadata-ingestion/source_docs/sagemaker.md b/metadata-ingestion/source_docs/sagemaker.md
index 31c8e944fca1d..d8d147f08bfe1 100644
--- a/metadata-ingestion/source_docs/sagemaker.md
+++ b/metadata-ingestion/source_docs/sagemaker.md
@@ -23,7 +23,9 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: sagemaker
   config:
+    # Coordinates
     aws_region: "my-aws-region"
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index d0f0d384c06c1..e727181d8e37a 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -29,12 +29,15 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: snowflake
   config:
-    username: user
-    password: pass
+    # Coordinates
     host_port: account_name
-
     warehouse: "COMPUTE_WH"
+
+    # Credentials
+    username: user
+    password: pass
     role: "sysadmin"
+
 sink:
   # sink configs
 ```
@@ -102,16 +105,18 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: snowflake-usage
   config:
-    username: user
-    password: pass
+    # Coordinates
     host_port: account_name
-
     warehouse: "COMPUTE_WH"
-    role: "sysadmin"
 
-    bucket_duration: "DAY"
+    # Credentials
+    username: user
+    password: pass
+    role: "sysadmin"
 
+    # Options
     top_n_queries: 10
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 07f676a48ddbd..17147c635eaaa 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -54,8 +54,11 @@ source:
   type: <sql-source> # can be bigquery, snowflake, etc - see above for the list
   config:
     # ... any other source-specific options ...
+
+    # Options
     profiling:
       enabled: true
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/sqlalchemy.md b/metadata-ingestion/source_docs/sqlalchemy.md
index a38e0db7f926f..d59f91e6d1deb 100644
--- a/metadata-ingestion/source_docs/sqlalchemy.md
+++ b/metadata-ingestion/source_docs/sqlalchemy.md
@@ -27,7 +27,9 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: sqlalchemy
   config:
+    # Coordinates
     connect_uri: "dialect+driver://username:password@host:port/database"
+
 sink:
   # sink configs
 ```
diff --git a/metadata-ingestion/source_docs/superset.md b/metadata-ingestion/source_docs/superset.md
index e34eaefbd3241..7172b5f8fd058 100644
--- a/metadata-ingestion/source_docs/superset.md
+++ b/metadata-ingestion/source_docs/superset.md
@@ -24,11 +24,14 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: superset
   config:
+    # Coordinates
     connect_uri: http://localhost:8088
 
+    # Credentials
     username: user
     password: pass
     provider: ldap
+
 sink:
   # sink configs
 ```

From 625baa0f595bb99c71093b24f89bbbd437f0a600 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhuwest@gmail.com>
Date: Fri, 6 Aug 2021 14:33:01 -0400
Subject: [PATCH 33/33] Remove sink compatibility

---
 metadata-ingestion/sink_docs/console.md |  4 ----
 metadata-ingestion/sink_docs/datahub.md |  8 --------
 metadata-ingestion/sink_docs/file.md    |  4 ----
 metadata-ingestion/source_docs/hive.md  | 12 ++++++++----
 4 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md
index 1260048a3ba7b..d1ad50b64c870 100644
--- a/metadata-ingestion/sink_docs/console.md
+++ b/metadata-ingestion/sink_docs/console.md
@@ -28,10 +28,6 @@ sink:
 
 None!
 
-## Compatibility
-
-Coming soon!
-
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md
index b034b904ee0db..ed130b25406a4 100644
--- a/metadata-ingestion/sink_docs/datahub.md
+++ b/metadata-ingestion/sink_docs/datahub.md
@@ -36,10 +36,6 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | -------- | -------- | ------- | ---------------------------- |
 | `server` | ✅       |         | URL of DataHub GMS endpoint. |
 
-### Compatibility
-
-Coming soon!
-
 ## DataHub Kafka
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
@@ -86,10 +82,6 @@ The options in the producer config and schema registry config are passed to the
 
 For a full example with a number of security options, see this [example recipe](../examples/recipes/secured_kafka.yml).
 
-### Compatibility
-
-Coming soon!
-
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md
index 0f488f43761bb..640f10f6a8790 100644
--- a/metadata-ingestion/sink_docs/file.md
+++ b/metadata-ingestion/sink_docs/file.md
@@ -36,10 +36,6 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | -------- | -------- | ------- | ------------------------- |
 | filename | ✅       |         | Path to file to write to. |
 
-## Compatibility
-
-Coming soon!
-
 ## Questions
 
 If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/hive.md b/metadata-ingestion/source_docs/hive.md
index 45366578e0490..843bae930e177 100644
--- a/metadata-ingestion/source_docs/hive.md
+++ b/metadata-ingestion/source_docs/hive.md
@@ -24,16 +24,20 @@ For general pointers on writing and running a recipe, see our [main recipe guide
 source:
   type: hive
   config:
+    # Coordinates
+    host_port: localhost:10000
+    database: DemoDatabase # optional, if not specified, ingests from all databases
+
+    # Credentials
+    username: user # optional
+    password: pass # optional
+
     # For more details on authentication, see the PyHive docs:
     # https://github.com/dropbox/PyHive#passing-session-configuration.
     # LDAP, Kerberos, etc. are supported using connect_args, which can be
     # added under the `options` config parameter.
     #scheme: 'hive+http' # set this if Thrift should use the HTTP transport
     #scheme: 'hive+https' # set this if Thrift should use the HTTP with SSL transport
-    username: user # optional
-    password: pass # optional
-    host_port: localhost:10000
-    database: DemoDatabase # optional, if not specified, ingests from all databases
 
 sink:
   # sink configs