Skip to content

Commit

Permalink
Merge pull request #34 from ananis25/main
Browse files Browse the repository at this point in the history
add text2sql example using neon as vector-store
  • Loading branch information
sophiamyang authored May 13, 2024
2 parents e200507 + 2da371b commit 493cd0c
Show file tree
Hide file tree
Showing 4 changed files with 4,606 additions and 0 deletions.
160 changes: 160 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
41 changes: 41 additions & 0 deletions data/northwind-queries.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{"question": "Select all category names with their descriptions from the Categories table.", "query": "SELECT category_name, description FROM categories"}
{"question": "Select the contact name, customer id, and company name of all Customers in London", "query": "SELECT contact_name, customer_id, company_name FROM customers WHERE city = 'London'"}
{"question": "Marketing managers and sales representatives have asked you to select all available columns in the Suppliers tables that have a FAX number.", "query": "SELECT * FROM suppliers WHERE NOT fax IS NULL"}
{"question": "Select a list of customers id\u2019s from the Orders table with required dates between Jan 1, 1997 and Jan 1, 1998 and with freight under 100 units.", "query": "SELECT customer_id FROM orders WHERE required_date BETWEEN '1997-01-01' AND 'Jan 1, 1998' AND freight < 100"}
{"question": "Select a list of company names and contact names of all the Owners from the Customer table from Mexico, Sweden and Germany.", "query": "SELECT company_name, contact_name FROM customers WHERE country IN ('Mexico', 'Sweden', 'Germany') AND contact_title = 'Owner'"}
{"question": "Count the number of discontinued products in the Products table.", "query": "SELECT COUNT(*) FROM products WHERE discontinued = 1"}
{"question": "Select a list of category names and descriptions of all categories beginning with 'Co' from the Categories table.", "query": "SELECT \"category_name\", \"description\" FROM \"categories\" WHERE \"category_name\" LIKE 'Co%'"}
{"question": "Select all the company names, city, country and postal code from the Suppliers table with the word 'rue' in their address. The list should be ordered alphabetically by company name.", "query": "SELECT company_name, city, country, postal_code FROM suppliers WHERE address LIKE '%rue%' ORDER BY company_name NULLS FIRST"}
{"question": "Select the product id and the total quantities ordered for each product id in the Order Details table.", "query": "SELECT product_id AS \"product_id\", SUM(quantity) AS \"total_quantity\" FROM \"order_details\" GROUP BY product_id ORDER BY \"total_quantity\" NULLS FIRST"}
{"question": "Select the customer name and customer address of all customers with orders that shipped using Speedy Express.", "query": "SELECT DISTINCT customers.contact_name, customers.address FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id INNER JOIN shippers ON orders.ship_via = shippers.shipper_id WHERE shippers.company_name = 'Speedy Express'"}
{"question": "Select a list of Suppliers containing company name, contact name, contact title and region description.", "query": "SELECT company_name, contact_name, contact_title, region FROM suppliers WHERE NOT contact_name IS NULL AND NOT contact_title IS NULL AND NOT region IS NULL"}
{"question": "Select all product names from the Products table that are condiments.", "query": "SELECT products.product_name FROM products INNER JOIN categories ON products.category_id = categories.category_id WHERE categories.category_name = 'Condiments'"}
{"question": "Select a list of customer names who have no orders in the Orders table.", "query": "SELECT contact_name FROM customers WHERE NOT customer_id IN (SELECT DISTINCT customer_id FROM orders)"}
{"question": "Select a complete list of company names from the Shippers table. \n--Include freight totals rounded to the nearest whole number for each shipper from the Orders table for those shippers with orders.", "query": "SELECT shippers.company_name, ROUND(CAST(SUM(orders.freight) AS INT), 0) AS \"total_freights\" FROM shippers LEFT OUTER JOIN orders ON orders.ship_via = shippers.shipper_id GROUP BY shippers.company_name"}
{"question": "Select all employee first and last names from the Employees table by combining the 2 columns aliased as 'DisplayName'.\n--The combined format should be 'LastName, FirstName'.", "query": "SELECT CONCAT(last_name, ', ', first_name) AS \"display_name\" FROM employees"}
{"question": "Select a list of products from the Products table along with the total units in stock for each product.\n--Give the computed column a name using the alias, 'TotalUnits'. Include only products with TotalUnits greater than 100.", "query": "SELECT product_name, units_in_stock AS total_units FROM products WHERE units_in_stock > 100"}
{"question": "Select the name, address, city, and region of employees.", "query": "SELECT first_name, address, city, COALESCE(region, '-') AS \"region\" FROM employees"}
{"question": "Select the name, address, city, and region of employees living in USA", "query": "SELECT first_name, \"address\", city, COALESCE(region, ' - ') AS \"region\" FROM employees WHERE country = 'USA'"}
{"question": "Select the name, address, city, and region of employees older than 50 years old.", "query": "SELECT last_name, first_name, \"address\", city, COALESCE(region, ' - ') AS \"region\" FROM employees WHERE CAST(EXTRACT(year FROM AGE(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS TIMESTAMP), CAST(CAST(birth_date AS TIMESTAMP) AS TIMESTAMP))) AS BIGINT) > 50"}
{"question": "Select the name, address, city, and region of employees that have placed orders to be delivered in Belgium. Write two versions of the query, with and without join.", "query": "SELECT DISTINCT e.last_name, e.first_name, e.\"address\", e.city, COALESCE(e.region, ' - ') AS \"region\" FROM employees AS e INNER JOIN orders AS o ON e.employee_id = o.employee_id WHERE o.ship_country = 'Belgium'"}
{"question": "Select the employee name and the customer name for orders that are sent by the company \u2018Speedy Express\u2019 to customers who live in Brussels.", "query": "SELECT DISTINCT e.last_name, e.first_name, c.contact_name FROM employees AS e JOIN orders AS o ON o.employee_id = e.employee_id JOIN customers AS c ON o.customer_id = c.customer_id JOIN shippers AS s ON o.ship_via = s.shipper_id WHERE s.company_name = 'Speedy Express' AND c.city = 'Bruxelles'"}
{"question": "Select the title and name of employees who have sold at least one of the products \u2018Gravad Lax\u2019 or \u2018Mishi Kobe Niku\u2019.", "query": "SELECT DISTINCT employees.last_name, employees.first_name, employees.title FROM employees INNER JOIN orders ON orders.employee_id = employees.employee_id INNER JOIN \"order_details\" ON \"order_details\".order_id = orders.order_id INNER JOIN products ON \"order_details\".product_id = products.product_id WHERE products.product_name IN ('Gravad Lax', 'Mishi Kobe Niku')"}
{"question": "Select the name and title of employees and the name and title of the person to which they refer (or null for the latter values if they don\u2019t refer to another employee).", "query": "SELECT e.first_name AS \"employee_name\", e.last_name AS \"employee_lastname\", b.last_name AS \"reports_to\" FROM employees AS e LEFT OUTER JOIN employees AS b ON e.reports_to = b.employee_id"}
{"question": "Select the customer name, the product name and the supplier name for customers who live in London and suppliers whose name is \u2018Pavlova, Ltd.\u2019 or \u2018Karkki Oy\u2019.", "query": "SELECT DISTINCT c.contact_name AS \"customer\", p.product_name AS \"product\", s.contact_name AS \"supplier\" FROM customers AS c JOIN orders AS o ON o.customer_id = c.customer_id JOIN \"order_details\" AS od ON od.order_id = o.order_id JOIN products AS p ON p.product_id = od.product_id JOIN suppliers AS s ON s.supplier_id = p.supplier_id WHERE c.city = 'London' AND s.company_name IN ('Pavlova, Ltd.', 'Karkki Oy')"}
{"question": "Select the name of products that were bought or sold by people who live in London.", "query": "SELECT DISTINCT p.product_name FROM products AS p JOIN \"order_details\" AS od ON od.product_id = p.product_id JOIN orders AS o ON o.order_id = od.order_id JOIN employees AS e ON e.employee_id = o.employee_id JOIN customers AS c ON c.customer_id = o.customer_id WHERE c.city = 'London' OR e.city = 'London'"}
{"question": "Select the names of employees who are strictly older than: (a) any employee who lives in London. (b) all employees who live in London.", "query": "SELECT last_name, first_name FROM employees WHERE birth_date < ALL (SELECT birth_date FROM employees WHERE city = 'London')"}
{"question": "Select the name of employees who work longer than any employee of London.", "query": "SELECT last_name, first_name FROM employees WHERE hire_date < ALL (SELECT hire_date FROM employees WHERE city = 'London')"}
{"question": "Select the name of employees and the city where they live for employees who have sold to customers in the same city.", "query": "SELECT DISTINCT e.last_name, e.first_name, e.city FROM employees AS e JOIN orders AS o ON o.employee_id = e.employee_id JOIN customers AS c ON c.customer_id = o.customer_id WHERE e.city = c.city"}
{"question": "", "query": "SELECT DISTINCT CONCAT(e.last_name, ' ', e.first_name) AS employee, e.city FROM employees AS e JOIN customers AS c ON e.city = c.city"}
{"question": "Select the name of customers who have not purchased any product.", "query": "SELECT DISTINCT contact_name FROM customers WHERE NOT customer_id IN (SELECT DISTINCT customer_id FROM orders)"}
{"question": "Select the name of customers who bought only products with price less than 50.", "query": "SELECT contact_name FROM customers WHERE NOT customer_id IN (SELECT DISTINCT customers.customer_id FROM customers INNER JOIN orders ON orders.customer_id = customers.customer_id INNER JOIN \"order_details\" ON \"order_details\".order_id = orders.order_id INNER JOIN products ON products.product_id = \"order_details\".product_id WHERE products.unit_price >= 50) AND customer_id IN (SELECT DISTINCT customer_id FROM orders)"}
{"question": "Select the name of the products sold by all employees.", "query": "SELECT p.product_name FROM products AS p JOIN \"order_details\" AS od ON p.product_id = od.product_id JOIN orders AS o ON od.order_id = o.order_id GROUP BY p.product_name HAVING COUNT(DISTINCT o.employee_id) = (SELECT COUNT(*) FROM employees)"}
{"question": "Select the name of customers who bought all products purchased by the customer whose identifier is \u2018LAZYK\u2019", "query": "WITH products_of_lazyk AS (SELECT DISTINCT \"order_details\".\"product_id\" FROM \"customers\" JOIN \"orders\" ON \"orders\".\"customer_id\" = \"customers\".\"customer_id\" JOIN \"order_details\" ON \"order_details\".\"order_id\" = \"orders\".\"order_id\" WHERE \"customers\".\"customer_id\" = 'LAZYK'), customers_of_all_products_of_lazyk AS (SELECT DISTINCT \"customers\".\"contact_name\" FROM \"customers\" JOIN \"orders\" ON \"orders\".\"customer_id\" = \"customers\".\"customer_id\" JOIN \"order_details\" ON \"order_details\".\"order_id\" = \"orders\".\"order_id\" WHERE \"order_details\".\"product_id\" IN (SELECT * FROM products_of_lazyk) AND \"customers\".\"customer_id\" <> 'LAZYK' GROUP BY \"customers\".\"contact_name\" HAVING COUNT(DISTINCT \"order_details\".\"product_id\") = (SELECT COUNT(*) FROM products_of_lazyk)) SELECT * FROM customers_of_all_products_of_lazyk"}
{"question": "Select the average price of products by category.", "query": "SELECT category_id, AVG(unit_price) AS \"average_price\" FROM products GROUP BY category_id"}
{"question": "Give the name of the categories and the average price of products in each category.", "query": "SELECT c.category_name, AVG(p.unit_price) AS \"average_price\" FROM categories AS c JOIN products AS p ON p.category_id = c.category_id GROUP BY c.category_name ORDER BY \"average_price\" NULLS FIRST"}
{"question": "Select the identifier and the name of the companies that provide more than 3 products.", "query": "SELECT s.supplier_id, s.company_name FROM suppliers AS s JOIN products AS p ON p.supplier_id = s.supplier_id GROUP BY s.supplier_id, s.company_name HAVING COUNT(p.product_id) > 3"}
{"question": "Select the identifier, name, and number of orders of employees, ordered by the employee identifier.", "query": "SELECT e.employee_id, e.last_name, e.first_name, COUNT(o.order_id) AS \"orders\" FROM employees AS e LEFT JOIN orders AS o ON o.employee_id = e.employee_id GROUP BY e.employee_id, e.last_name, e.first_name ORDER BY e.employee_id NULLS FIRST"}
{"question": "For each employee give the identifier, name, and the number of distinct products sold, ordered by the employee identifier.", "query": "SELECT e.employee_id, e.last_name, e.first_name, COUNT(DISTINCT od.product_id) AS \"products_sold\" FROM employees AS e LEFT JOIN orders AS o ON o.employee_id = e.employee_id LEFT JOIN \"order_details\" AS od ON od.order_id = o.order_id GROUP BY e.employee_id, e.last_name, e.first_name ORDER BY e.employee_id NULLS FIRST"}
{"question": "Select the identifier, name, and total sales of employees, ordered by the employee identifier.", "query": "SELECT e.employee_id, e.last_name, e.first_name, COUNT(od.quantity) AS \"total_sales\" FROM employees AS e LEFT JOIN orders AS o ON o.employee_id = e.employee_id LEFT JOIN \"order_details\" AS od ON od.order_id = o.order_id GROUP BY e.employee_id, e.last_name, e.first_name ORDER BY e.employee_id NULLS FIRST"}
{"question": "Select the identifier, name, and total sales of employees, ordered by the employee identifier\n--for employees who have sold more than 70 different products.", "query": "SELECT e.employee_id, e.last_name, e.first_name, COUNT(od.quantity) AS \"total_sales\" FROM employees AS e LEFT JOIN orders AS o ON o.employee_id = e.employee_id LEFT JOIN \"order_details\" AS od ON od.order_id = o.order_id GROUP BY e.employee_id, e.last_name, e.first_name HAVING COUNT(DISTINCT od.product_id) > 70 ORDER BY e.employee_id NULLS FIRST"}
{"question": "Select the names of employees who sell the products of more than 7 suppliers.", "query": "SELECT e.last_name, e.first_name FROM employees AS e LEFT JOIN orders AS o ON o.employee_id = e.employee_id LEFT JOIN \"order_details\" AS od ON od.order_id = o.order_id LEFT JOIN products AS p ON p.product_id = od.product_id GROUP BY e.last_name, e.first_name HAVING COUNT(DISTINCT p.supplier_id) > 7"}
Loading

0 comments on commit 493cd0c

Please sign in to comment.