Skip to content

Commit

Permalink
added run and edited
Browse files Browse the repository at this point in the history
  • Loading branch information
ruchika817 committed Jul 23, 2021
1 parent 9ff584f commit cad70c9
Show file tree
Hide file tree
Showing 9 changed files with 365 additions and 298 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,7 @@ $RECYCLE.BIN/
**/pre-processing-code.zip

response.json

### PyCharm
.idea/
venv/
180 changes: 85 additions & 95 deletions init.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,47 @@ while [[ $# -gt 0 ]]; do
shift;
current_arg="$1"
case ${opt} in
"-s"|"--s3-bucket") export S3_BUCKET="$1"; shift;;
"-d"|"--dataset-name") export DATASET_NAME="$1"; shift;;
"-p"|"--product-name") export PRODUCT_NAME="$1"; shift;;
"-i"|"--product-id") export PRODUCT_ID="$1"; shift;;
"-r"|"--region") export REGION="$1"; shift;;
"-f"|"--profile") PROFILE=" --profile $1"; shift;;
"--rdp-role-arn") export REARC_DATA_PLATFORM_ROLE_ARN="$1"; shift;;
"--rdp-external-id") export REARC_DATA_PLATFORM_EXTERNAL_ID="$1"; shift;;
"--customer-id") export CUSTOMER_ID="$1"; shift;;
"--schedule-cron") export SCHEDULE_CRON="$1"; shift;;
"--asset-bucket") export ASSET_BUCKET="$1"; shift;;
"--manifest-bucket") export MANIFEST_BUCKET="$1"; shift;;
"--dataset-name") export DATASET_NAME="$1"; shift;;
"--product-name") export PRODUCT_NAME="$1"; shift;;
"--product-id") export PRODUCT_ID="$1"; shift;;
"--dataset-arn") export DATASET_ARN="$1"; shift;;
"--region") export REGION="$1"; shift;;
"--first-revision") export FIRST_REVISION="$1"; shift;;
"--products-info-file") export PRODUCTS_INFO_FILE="$1"; shift;;
"--source-url") export SOURCE_URL="$1"; shift;;
"--product-code") export PRODUCT_CODE="$1"; shift;;
"--product-url") export PRODUCT_URL="$1"; shift;;
"--profile") PROFILE=" --profile $1"; shift;;
*) echo "ERROR: Invalid option: \""$opt"\"" >&2; exit 1;;
esac
done

echo "------------------------------------------------------------------------------"
echo "REARC_DATA_PLATFORM_ROLE_ARN: $REARC_DATA_PLATFORM_ROLE_ARN"
echo "REARC_DATA_PLATFORM_EXTERNAL_ID: $REARC_DATA_PLATFORM_EXTERNAL_ID"
echo "CUSTOMER_ID: $CUSTOMER_ID"
echo "ASSET_BUCKET: $ASSET_BUCKET"
echo "MANIFEST_BUCKET: $MANIFEST_BUCKET"
echo "DATASET_NAME: $DATASET_NAME"
echo "DATASET_ARN: $DATASET_ARN"
echo "PRODUCT_NAME: $PRODUCT_NAME"
echo "PRODUCT_ID: $PRODUCT_ID"
echo "SCHEDULE_CRON: $SCHEDULE_CRON"
echo "REGION: $REGION"
echo "PROFILE: $PROFILE"
echo "PRODUCTS_INFO_FILE: $PRODUCTS_INFO_FILE"
echo "SOURCE_URL: $SOURCE_URL"
echo "PRODUCT_CODE: $PRODUCT_CODE"
echo "PRODUCT_URL: $PRODUCT_URL"
echo "FIRST_REVISION: $FIRST_REVISION"
echo "------------------------------------------------------------------------------"

while [[ ${#DATASET_NAME} -gt 53 ]]; do
echo "dataset-name must be under 53 characters in length, enter a shorter name:"
read -p "New dataset-name: " DATASET_NAME
Expand All @@ -47,109 +78,68 @@ while [[ ${#PRODUCT_NAME} -gt 72 ]]; do
esac
done

#creating a pre-processing zip package, these commands may need to be adjusted depending on folder structure and dependencies
echo "creating a pre-processing zip package, these commands may need to be adjusted depending on folder structure and dependencies"
(cd pre-processing/pre-processing-code && zip -r pre-processing-code.zip . -x "*.dist-info/*" -x "bin/*" -x "**/__pycache__/*")

#upload pre-preprocessing.zip to s3
echo "uploading pre-preprocessing.zip to s3"
aws s3 cp pre-processing/pre-processing-code/pre-processing-code.zip s3:https://$S3_BUCKET/$DATASET_NAME/automation/pre-processing-code.zip --region $REGION$PROFILE

#creating dataset on ADX
echo "creating dataset on ADX"
DATASET_COMMAND="aws dataexchange create-data-set --asset-type "S3_SNAPSHOT" --description file:https://dataset-description.md --name \"${PRODUCT_NAME}\" --region $REGION --output json$PROFILE"
DATASET_OUTPUT=$(eval $DATASET_COMMAND)
DATASET_ARN=$(echo $DATASET_OUTPUT | tr '\r\n' ' ' | jq -r '.Arn')
DATASET_ID=$(echo $DATASET_OUTPUT | tr '\r\n' ' ' | jq -r '.Id')

#creating pre-processing cloudformation stack
echo "creating pre-processing cloudformation stack"
CFN_STACK_NAME="producer-${DATASET_NAME}-preprocessing"
aws cloudformation create-stack --stack-name $CFN_STACK_NAME --template-body file:https://pre-processing/pre-processing-cfn.yaml --parameters ParameterKey=S3Bucket,ParameterValue=$S3_BUCKET ParameterKey=DataSetName,ParameterValue=$DATASET_NAME ParameterKey=DataSetArn,ParameterValue=$DATASET_ARN ParameterKey=ProductId,ParameterValue=$PRODUCT_ID ParameterKey=Region,ParameterValue=$REGION --region $REGION --capabilities "CAPABILITY_AUTO_EXPAND" "CAPABILITY_NAMED_IAM" "CAPABILITY_IAM"$PROFILE

echo "waiting for cloudformation stack to complete"
aws cloudformation wait stack-create-complete --stack-name $CFN_STACK_NAME --region $REGION$PROFILE

if [[ $? -ne 0 ]]
then
# Cloudformation stack created
echo "Cloudformation stack creation failed"
exit 1
fi

#invoking the pre-processing lambda function to create first dataset revision
echo "invoking the pre-processing lambda function to create first dataset revision"
LAMBDA_FUNCTION_NAME="source-for-${DATASET_NAME}"
# AWS CLI version 2 changes require explicitly declairing `--cli-binary-format raw-in-base64-out` for the format of the `--payload`
LAMBDA_FUNCTION_STATUS_CODE=$(aws lambda invoke --function-name $LAMBDA_FUNCTION_NAME --invocation-type "RequestResponse" --payload '{ "test": "event" }' response.json --cli-binary-format raw-in-base64-out --region $REGION --query 'StatusCode' --output text$PROFILE)
aws s3 cp pre-processing/pre-processing-code/pre-processing-code.zip s3:https://$ASSET_BUCKET/$DATASET_NAME/automation/pre-processing-code.zip --region "$REGION" $PROFILE

#grabbing dataset revision status
echo "grabbing dataset revision status"
DATASET_REVISION_STATUS=$(aws dataexchange list-data-set-revisions --data-set-id $DATASET_ID --region $REGION --query "sort_by(Revisions, &CreatedAt)[-1].Finalized"$PROFILE)
if [[ "$FIRST_REVISION" == "true" ]]; then
echo "creating dataset on ADX"
DATASET_COMMAND="aws dataexchange create-data-set --asset-type "S3_SNAPSHOT" --description file:https://dataset-description.md --name \"${PRODUCT_NAME}\" --region $REGION --output json $PROFILE"
DATASET_OUTPUT=$(eval $DATASET_COMMAND)
DATASET_ARN=$(echo $DATASET_OUTPUT | tr '\r\n' ' ' | jq -r '.Arn')
DATASET_ID=$(echo $DATASET_OUTPUT | tr '\r\n' ' ' | jq -r '.Id')

update () {
echo ""
echo "Manually create the ADX product and enter in the Product ID below:"
read -p "Product ID: " NEW_PRODUCT_ID

# Cloudformation stack update
echo "updating pre-processing cloudformation stack"
aws cloudformation update-stack --stack-name $CFN_STACK_NAME --use-previous-template --parameters ParameterKey=S3Bucket,ParameterValue=$S3_BUCKET ParameterKey=DataSetName,ParameterValue=$DATASET_NAME ParameterKey=DataSetArn,ParameterValue=$DATASET_ARN ParameterKey=ProductId,ParameterValue=$NEW_PRODUCT_ID ParameterKey=Region,ParameterValue=$REGION --region $REGION --capabilities "CAPABILITY_AUTO_EXPAND" "CAPABILITY_NAMED_IAM" "CAPABILITY_IAM"$PROFILE

echo "waiting for cloudformation stack update to complete"
aws cloudformation wait stack-update-complete --stack-name $CFN_STACK_NAME --region $REGION$PROFILE

if [[ $? -ne 0 ]]
then
echo "Cloudformation stack update failed"
break
if [[ -n "$PRODUCTS_INFO_FILE" ]]; then
echo "{\"PRODUCT_CODE\":\"${PRODUCT_CODE}\",\"PRODUCT_URL\":\"${PRODUCT_URL}\",\"SOURCE_URL\": \"${SOURCE_URL}\",\"DATASET_NAME\":\"${DATASET_NAME}\",\"DATASET_ARN\":\"${DATASET_ARN}\",\"DATASET_ID\":\"${DATASET_ID}\",\"PRODUCT_NAME\":\"${PRODUCT_NAME}\",\"PRODUCT_ID\":\"${PRODUCT_ID}\",\"SCHEDULE_CRON\":\"${SCHEDULE_CRON}\"}" >> "$PRODUCTS_INFO_FILE"
fi
echo "cloudformation stack update completed"
}

delete () {
echo "Destroying the CloudFormation stack"
aws cloudformation delete-stack --stack-name $CFN_STACK_NAME --region $REGION$PROFILE

#check status of cloudformation stack delete action
aws cloudformation wait stack-delete-complete --stack-name $CFN_STACK_NAME --region $REGION$PROFILE
if [[ $? -eq 0 ]]
then
# Cloudformation stack deleted
echo "CloudFormation stack successfully deleted"
break
else
# Cloudformation stack deletion failed
echo "Cloudformation stack deletion failed"
exit 1
fi
}

if [[ $DATASET_REVISION_STATUS == "true" ]]
then
echo "Dataset revision completed successfully"
echo ""
echo "Uploading intial assets to asset_bucket for the first revision"
aws s3 cp product-description.md "s3:https://$ASSET_BUCKET/$DATASET_NAME/dataset/product-description.md"
aws s3 cp dataset-description.md "s3:https://$ASSET_BUCKET/$DATASET_NAME/dataset/dataset-description.md"

while true; do
echo "Do you want use this script to update the CloudFormation stack? If you enter 'n' your CloudFormation stack will be destroyed:"
read -p "('y' to update / 'n' to destroy): " Y_N
case $Y_N in
[Yy]* ) update; exit;;
[Nn]* ) delete; break;;
* ) echo "Enter 'y' or 'n'.";;
esac
done

echo "Manually create the ADX product and manually re-run the pre-processing CloudFormation template using the following params:"
REVISION_COMMAND="aws dataexchange create-data-set --asset-type "S3_SNAPSHOT" --description file:https://dataset-description.md --name \"${PRODUCT_NAME}\" --region $REGION --output json $PROFILE"
REVISION_OUTPUT=$(eval $REVISION_COMMAND)

echo "Manually, from ADX console, create the first revision of the dataset using
product-description.md and dataset-description.md files and
then create the ADX product.
Then manually re-run the pre-processing CloudFormation template using the following params:"
echo ""
echo "S3Bucket: $S3_BUCKET"
echo "AssetBucket: $ASSET_BUCKET"
echo "ManifestBucket: $MANIFEST_BUCKET"
echo "CustomerId: $CUSTOMER_ID"
echo "DataSetName: $DATASET_NAME"
echo "DataSetArn: $DATASET_ARN"
echo "Region: $REGION"
echo "S3Bucket: $S3_BUCKET"
echo "FIRST_REVISION: false"
echo ""
echo "For the ProductId param use the Product ID of the ADX product"

else
echo "Dataset revision failed"
cat response.json
DATASET_ID=$(echo $DATASET_ARN | awk -F/ '{print $NF}')

echo "creating pre-processing cloudformation stack"
CFN_STACK_NAME="producer-${DATASET_NAME}-preprocessing"
aws cloudformation create-stack --stack-name "$CFN_STACK_NAME" --template-body file:https://pre-processing/pre-processing-cfn.yaml --parameters ParameterKey=RearcDataPlatformRoleArn,ParameterValue="$REARC_DATA_PLATFORM_ROLE_ARN" ParameterKey=RearcDataPlatformExternalId,ParameterValue="$REARC_DATA_PLATFORM_EXTERNAL_ID" ParameterKey=AssetBucket,ParameterValue="$ASSET_BUCKET" ParameterKey=ManifestBucket,ParameterValue="$MANIFEST_BUCKET" ParameterKey=CustomerId,ParameterValue="$CUSTOMER_ID" ParameterKey=DataSetName,ParameterValue="$DATASET_NAME" ParameterKey=DataSetArn,ParameterValue="$DATASET_ARN" ParameterKey=ProductId,ParameterValue="$PRODUCT_ID" ParameterKey=Region,ParameterValue="$REGION" ParameterKey=ScheduleCron,ParameterValue="'$SCHEDULE_CRON'" --region "$REGION" --capabilities "CAPABILITY_AUTO_EXPAND" "CAPABILITY_NAMED_IAM" "CAPABILITY_IAM" $PROFILE

echo "waiting for cloudformation stack creation to complete"
aws cloudformation wait stack-create-complete --stack-name "$CFN_STACK_NAME" --region "$REGION" $PROFILE

if [[ $? -ne 0 ]]; then
echo "Cloudformation stack creation failed"
exit 1
fi

echo "invoking the pre-processing lambda function to upload manifest file to manifest bucket"
LAMBDA_FUNCTION_NAME="source-for-${DATASET_NAME}"
# AWS CLI version 2 changes require explicitly declairing `--cli-binary-format raw-in-base64-out` for the format of the `--payload`
aws lambda invoke --function-name "$LAMBDA_FUNCTION_NAME" --invocation-type "RequestResponse" --payload '{ "test": "event" }' response.json --cli-binary-format raw-in-base64-out --region "$REGION" --query 'StatusCode' --output text $PROFILE

if [[ $? -ne 0 ]]; then
echo "Lambda invocation failed"
exit 1
fi

fi
38 changes: 38 additions & 0 deletions migrate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

#### You only need to run this part ONCE

# clone the template
git clone https://github.com/rearc-data/adx-product-rearc-data-platform-template.git

# remove extra files / folders
cd temple_folder
rm *.md
rm -rf .git
rm pre-processing/pre-processing-code/source_data.py

cd ..


##### Now run the following commands FOR EACH PRODUCT
# Step 1:
# Go to the cloudformation console, find the stack for the product you want to migrate, copy the parameters section, delete the stack

# Step 2: once the stack is deleted:
git clone https://github.com/rearc-data/fred-privately-owned-housing.git
cd fred-privately-owned-housing

git checkout -b rdp
cp -a ../adx-product-rearc-data-platform-template/. ./

# in run.sh
# Step 3: Using the parameters you have copied from the cloudformation stack,
# and your AWS profile name, update the variable names in run.sh

# Step 4: in sorce.py
# replace: os.getenv('S3_BUCKET') => os.getenv('ASSET_BUCKET')
# replace: os.environ['DATA_SET_NAME'] => os.environ['DATASET_NAME']


# Step 5: make sure variabke names are correct in run.sh, then run it
chmod a+x run.sh
./run.sh
Loading

0 comments on commit cad70c9

Please sign in to comment.