initial commit

Melon-Rusk · Dec 17, 2022 · dabdf06 · dabdf06
commit dabdf06
Show file tree

Hide file tree

Showing 26 changed files with 1,251 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+video_editing/create_terminal_big_text.py
diff --git a/README.md b/README.md
@@ -0,0 +1,32 @@
+# Fetch Trending Topic(s) from Twitter and create video around that and upload in Youtube
+_________
+## Fetch Tredning Topics
+- Fetch Trending Topics for the given location
+- Library used: *Tweepy*, *geocoder*
+## Create summary for the each trending topics
+- Fetch Tweets related to text (from last 2hr)
+- Filter tweets of specified language
+- Clear tweets remove emojis, urls, punctutations, stopwords
+- Generate Sentiment
+- Create Extract summary from the text
+- Clean text remove ascii characters and remove urls
+- Create abstract summary from the extracted summary
+- Library module/used: *Tweepy*,*nltk*,*transformers*
+## Generate Video
+- Generate Audio from text using google text to speech
+- Generate Image from text using open cv
+- Generate Video using audio and image
+- Library/modules used: *gtts*,*cv2 (opencv)*, *moviepy*
+## Concatenate Videos
+- For merge video for each trending topic in to one
+- Library/modules used: *moviepy*
+## Upload video on YT
+- Using Twitter trending topics as Tags
+- And summary as description
+- Using Youtubes's library upload video on youtube
+- Library/Modules used: *googleapiclient*
+----
+# Update Credentials in config to test it
+----
+Refrences Code used from:
+-[text_processing.clean_text.py](https://gist.github.com/MrEliptik/b3f16179aa2f530781ef8ca9a16499af?permalink_comment_id=3970601) ((used almost as it is))
diff --git a/api_modules/__init__.py b/api_modules/__init__.py
diff --git a/api_modules/refresh_token.py b/api_modules/refresh_token.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This example will create an OAuth2 refresh token for the Google Ads API.
+This example works with both web and desktop app OAuth client ID types.
+https://console.cloud.google.com
+IMPORTANT: For web app clients types, you must add "http:https://127.0.0.1" to the
+"Authorized redirect URIs" list in your Google Cloud Console project before
+running this example. Desktop app client types do not require the local
+redirect to be explicitly configured in the console.
+Once complete, download the credentials and save the file path so it can be
+passed into this example.
+This example is a very simple implementation, for a more detailed example see:
+https://developers.google.com/identity/protocols/oauth2/web-server#python
+"""
+
+import argparse
+import hashlib
+import os
+import re
+import socket
+import sys
+from urllib.parse import unquote
+
+# If using Web flow, the redirect URL must match exactly what’s configured in GCP for
+# the OAuth client. If using Desktop flow, the redirect must be a localhost URL and
+# is not explicitly set in GCP.
+from google_auth_oauthlib.flow import Flow
+
+_SCOPE = "https://www.googleapis.com/auth/youtube"
+_SERVER = "127.0.0.1"
+_PORT = 8080
+_REDIRECT_URI = f"http:https://{_SERVER}:{_PORT}"
+
+
+def main(client_secrets_path, scopes):
+ """The main method, starts a basic server and initializes an auth request.
+ Args:
+ client_secrets_path: a path to where the client secrets JSON file is
+ located on the machine running this example.
+ scopes: a list of API scopes to include in the auth request, see:
+ https://developers.google.com/identity/protocols/oauth2/scopes
+ """
+ flow = Flow.from_client_secrets_file(client_secrets_path, scopes=scopes)
+ flow.redirect_uri = _REDIRECT_URI
+
+ # Create an anti-forgery state token as described here:
+ # https://developers.google.com/identity/protocols/OpenIDConnect#createxsrftoken
+ passthrough_val = hashlib.sha256(os.urandom(1024)).hexdigest()
+
+ authorization_url, state = flow.authorization_url(
+ access_type="offline",
+ state=passthrough_val,
+ prompt="consent",
+ include_granted_scopes="true",
+ )
+
+ # Prints the authorization URL so you can paste into your browser. In a
+ # typical web application you would redirect the user to this URL, and they
+ # would be redirected back to "redirect_url" provided earlier after
+ # granting permission.
+ print("Paste this URL into your browser: ")
+ print(authorization_url)
+ print(f"\nWaiting for authorization and callback to: {_REDIRECT_URI}")
+
+ # Retrieves an authorization code by opening a socket to receive the
+ # redirect request and parsing the query parameters set in the URL.
+ code = unquote(get_authorization_code(passthrough_val))
+
+ # Pass the code back into the OAuth module to get a refresh token.
+ flow.fetch_token(code=code)
+ refresh_token = flow.credentials.refresh_token
+
+ print(f"\nYour refresh token is: {refresh_token}\n")
+ print(
+ "Add your refresh token to your client library configuration as "
+ "described here: "
+ "https://developers.google.com/google-ads/api/docs/client-libs/python/configuration"
+ )
+
+
+def get_authorization_code(passthrough_val):
+ """Opens a socket to handle a single HTTP request containing auth tokens.
+ Args:
+ passthrough_val: an anti-forgery token used to verify the request
+ received by the socket.
+ Returns:
+ a str access token from the Google Auth service.
+ """
+ # Open a socket at _SERVER:_PORT and listen for a request
+ sock = socket.socket()
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ sock.bind((_SERVER, _PORT))
+ sock.listen(1)
+ connection, address = sock.accept()
+ data = connection.recv(1024)
+ # Parse the raw request to retrieve the URL query parameters.
+ params = parse_raw_query_params(data)
+
+ try:
+ if not params.get("code"):
+ # If no code is present in the query params then there will be an
+ # error message with more details.
+ error = params.get("error")
+ message = f"Failed to retrieve authorization code. Error: {error}"
+ raise ValueError(message)
+ elif params.get("state") != passthrough_val:
+ message = "State token does not match the expected state."
+ raise ValueError(message)
+ else:
+ message = "Authorization code was successfully retrieved."
+ except ValueError as error:
+ print(error)
+ sys.exit(1)
+ finally:
+ response = (
+ "HTTP/1.1 200 OK\n"
+ "Content-Type: text/html\n\n"
+ f"<b>{message}</b>"
+ "<p>Please check the console output.</p>\n"
+ )
+
+ connection.sendall(response.encode())
+ connection.close()
+
+ return params.get("code")
+
+
+def parse_raw_query_params(data):
+ """Parses a raw HTTP request to extract its query params as a dict.
+ Note that this logic is likely irrelevant if you're building OAuth logic
+ into a complete web application, where response parsing is handled by a
+ framework.
+ Args:
+ data: raw request data as bytes.
+ Returns:
+ a dict of query parameter key value pairs.
+ """
+ # Decode the request into a utf-8 encoded string
+ decoded = data.decode("utf-8")
+ # Use a regular expression to extract the URL query parameters string
+ match = re.search("GET\s\/\?(.*) ", decoded)
+ params = match.group(1)
+ # Split the parameters to isolate the key/value pairs
+ pairs = [pair.split("=") for pair in params.split("&")]
+ # Convert pairs to a dict to make it easy to access the values
+ return {key: val for key, val in pairs}
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description=(
+ "Generates OAuth2 refresh token using the Web application flow. "
+ "To retrieve the necessary client_secrets JSON file, first "
+ "generate OAuth 2.0 credentials of type Web application in the "
+ "Google Cloud Console (https://console.cloud.google.com). "
+ "Make sure 'http:https://_SERVER:_PORT' is included the list of "
+ "'Authorized redirect URIs' for this client ID."
+ ),
+ )
+
+ # args = parser.parse_args()
+
+ configured_scopes = [_SCOPE]
+ # if args.additional_scopes:
+ # configured_scopes.extend(args.additional_scopes)
+ file_path = 'config\client_secret_yt.json'
+ main(file_path, configured_scopes)
diff --git a/api_modules/twitter_api.py b/api_modules/twitter_api.py
@@ -0,0 +1,144 @@
+import requests
+import pickle
+from pathlib import Path
+import sys
+import re
+import os
+import geocoder
+import tweepy
+from tweepy import OAuthHandler #type: ignore
+import json
+# from datetime import datetime
+import datetime
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+from config import config
+
+
+# sys.path.append(os.path.dirname(__file__))
+
+def refresh_access_token():
+ url = "https://api.twitter.com/oauth2/token"
+
+ payload = 'grant_type=client_credentials'
+ headers = {
+ 'Authorization': f'Basic {config.TWITTER_REFRESH_TOKEN}',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }
+
+ response = requests.request("POST", url, headers=headers, data=payload)
+
+ print(response.text)
+ return response.json()['access_token']
+
+
+def load_access_token():
+ # load access token from file
+ with open('store_data/access_token.pickle', 'wb') as f1:
+ access_token = pickle.load(f1)
+ return access_token
+
+
+def get_tweets_from_id(id: str)->dict:
+
+ url = f"https://api.twitter.com/2/tweets?ids={id}"
+ payload = {}
+ headers = {'Authorization': f'Bearer {config.BEARER_TOKEN}'}
+
+ response = requests.request("GET", url, headers=headers, data=payload)
+
+ print(response.text)
+ return response.json()['data'][0]['text']
+
+def get_auth():
+ auth = OAuthHandler(config.API_KEY, config.API_SECRET_KEY)
+ auth.set_access_token(config.ACCESS_TOKEN, config.ACCESS_TOKEN_SECRET)
+ return auth
+
+def save_trends(trends,loc):
+ folder = f"{str(Path(__file__).resolve().parent.parent)}\\store_data"
+ loc = str(loc)
+ loc = re.sub('[^0-9a-zA-Z]+', '_', loc)
+ time = datetime.datetime.now()
+ time = re.sub('[^0-9a-zA-Z]+', '_', str(time))
+ file_name = f"{folder}\\trends_time_{time}_loc_{loc}.json"
+ with open(file_name,"w") as f1:
+ json.dump(trends,f1)
+
+
+def get_trends(loc):
+ # auth = # type: ignore 
+ auth = get_auth()
+ api = tweepy.API(auth) # type: ignore 
+ # Object that has location's latitude and longitude.
+ if type(loc) == str:
+ g = geocoder.osm(loc)
+ lat = g.lat
+ long = g.lng
+ else:
+ lat = loc[0]
+ long = loc[1]
+ closest_loc = api.closest_trends(lat, long)
+ trends = api.get_place_trends(closest_loc[0]["woeid"])
+ # trending = json.dumps(, indent=4)
+ trending = trends[0]["trends"]
+ save_trends(trending,loc)
+
+ return trending
+
+
+def get_tweet_from_text(text, start_time, end_time,next_token ='',max_tweets = 100,counter=1):
+ data = []
+ search_url = "https://api.twitter.com/2/tweets/search/recent"
+ text = re.sub('[^0-9a-zA-Z$]+', '',text)
+ url = f"{search_url}?query={text}&start_time={start_time}&end_time={end_time}&max_results={max_tweets}&tweet.fields=lang"
+ if len(next_token)>5:
+ url = f"{url}&next_token={next_token}"
+
+ payload={}
+ headers = {
+ 'Authorization': f'Bearer {config.BEARER_TOKEN}'}
+
+ response = requests.request("GET", url, headers=headers, data=payload)
+
+ if response.status_code == 200:
+ # response.json
+ data = response.json().get('data')
+ next_token = response.json()['meta'].get('next_token',"End")
+ if counter<6 and next_token != 'End':
+ counter = counter+1
+ print(f"Attempt in get_tweet_from_text {counter}")
+ data_temp = get_tweet_from_text(text, start_time, end_time,next_token =next_token,max_tweets = 100,counter=counter)
+ data = data + data_temp
+ return data 
+ return data 
+
+
+
+
+
+def main():
+ get_tweets_from_id('1601569532782735361')
+ text ='IshanKishan'
+ # start_time = '2022-12-10T12:48:14.743810+00:00'
+ # start_time = '2022-12-09T15:29:28.972488Z'
+ end_time = '2022-12-10T12:29:28.972488Z'
+
+ end_time = (datetime.datetime.now(datetime.timezone.utc)-datetime.timedelta(seconds=30)).isoformat().replace("+00:00","Z")
+ start_time=(datetime.datetime.now(datetime.timezone.utc)-datetime.timedelta(hours=2)).isoformat().replace("+00:00","Z")
+ response = get_tweet_from_text(text, start_time, end_time)
+
+ if True:
+ loc = (25.24, 85.6)
+ else:
+ loc = input("Enter the location name")
+ # trends = get_trends(loc)
+ # print(json.dumps(trends, indent=4))
+
+
+
+
+
+
+if __name__ == '__main__':
+ # refresh_token()
+ main()