Why Do We Need SQLalchemy ?
As a business intelligence or data science developers, we often need to work with traditional relational databases. This could be a database migration scenario, or it could be ETL or just getting desired data for building a data science model.
SQLAlchemy provides a nice “Pythonic” way of interacting with databases. So rather than dealing with the differences between specific dialects of traditional SQL such as MySQL or PostgreSQL or Oracle, you can leverage the Pythonic framework of SQLAlchemy to streamline your workflow and more efficiently query your data.
Installing The SQLalchemy Package
pip install sqlalchemy
Connecting to a Database
To start interacting with the database we first we need to establish a connection.
import sqlalchemy as db
engine = db.create_engine(‘dialect+driver://user:pass@host:port/db’)
Imports
import psycopg2
from psycopg2 import Error
Connection with Greenplum Database
# Import create_engine function
from sqlalchemy import create_engine
# PGGSSENCMODE=”disable”
# Create an engine to the tutorial database in Greenplum
engine = create_engine(‘postgresql+psycopg2://gpadmin:pivotal@localhost:5432/tutorial?gssencmode=disable’)
# Use the .table_names() method on the engine to print the table names
print(engine.table_names())
Output
[‘spatial_ref_sys’, ‘houses’, ‘houses_linregr’, ‘houses_linregr_summary’, ‘houses_linregr_bedroom’, ‘houses_linregr_bedroom_summary’, ‘patients’, ‘patients_logregr’, ‘patients_logregr_summary’, ‘dt_golf’, ‘prediction_results’, ‘prediction_prob’, ‘test12’, ‘german_credit’, ‘houses_svm’, ‘houses_svm_summary’, ‘houses_svm_gaussian_random’, ‘houses_svm_gaussian’, ‘houses_svm_gaussian_summary’, ‘houses_pred’, ‘houses_pred_gaussian’, ‘german_credit_test’, ‘german_credit_training’, ‘gercredit_svm_gaussian_random’, ‘gercredit_svm_gaussian’, ‘gercredit_svm_gaussian_summary’, ‘credit_prediction’, ‘mobile’, ‘data’]
Building a Select Query
# Import select
from sqlalchemy import select
# Import create_engine, MetaData, and Table
from sqlalchemy import MetaData, Table
# Create a metadata object: metadata
metadata = MetaData()
# Reflect Houses table via engine: census
houses = Table(‘houses’, metadata, autoload=True, autoload_with=engine)
# Create a select query: stmt
stmt = select([houses])
# Add a where clause to filter the results to only those for New York
stmt = stmt.where(houses.columns.bedroom == 2)
# Create a connection on engine
connection = engine.connect()
# Execute the statement and fetch the results: results
results = connection.execute(stmt).fetchall()
# Print results
print(results)
Iterating over Resultset
# Loop over the results and print the age, sex, and pop2008
for result in results:
print(result.tax, result.bath, result.price)
Adding Order By Clause to SQL Statement
# Order stmt by the state column
stmt = stmt.order_by(houses.columns.price)
# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()
# Print the first 10 results
print(results[:10])
Adding Order By DESC Clause to SQL Statement
# Import desc
from sqlalchemy import desc
# Order stmt by the state column
rev_stmt = stmt.order_by(desc(houses.columns.price))
# Execute the query and store the results: results
rev_results = connection.execute(rev_stmt).fetchall()
# Print the first 10 results
print(rev_results[:10])
Putting Query Results in a Pandas Dataframe
## Putting query results in a dataframe
import pandas as pd
# Create a DataFrame from the results: df
df = pd.DataFrame(rev_results)
# Set column names
df.columns = rev_results[0].keys()
# Print the Dataframe
print(df)
Basic Data Analysis and Visualization on Data Retrieved from Database
# Import Pyplot as plt from matplotlib
import matplotlib.pyplot as plt
# Plot the DataFrame
df[[‘bedroom’,’price’]].plot.bar()
plt.show()
Creating Tables with SQLAlchemy
# Creating Tables with SQLAlchemy
# Import Table, Column, String, Integer, Float, Boolean from sqlalchemy
from sqlalchemy import Table, Column, String, Integer, Float, Boolean
#data.drop(engine)
# Define a new table with a name, count, amount, and valid column: data
data = Table(‘data’, metadata,
Column(‘name’, String(255)),
Column(‘count’, Integer()),
Column(‘amount’, Float()),
Column(‘valid’, Boolean())
)
# Use the metadata to create the table
metadata.create_all(engine)
# Print table details
print(repr(data))
Creating Tables with Constraints using SQLAlchemy
# Constraints and Data Defaults
# Import Table, Column, String, Integer, Float, Boolean from sqlalchemy
from sqlalchemy import Table, Column, String, Integer, Float, Boolean
# Define a new table with a name, count, amount, and valid column: data
data = Table(‘data2’, metadata,
Column(‘name’, String(255), unique=True),
Column(‘count’, Integer(), default=1),
Column(‘amount’, Float()),
Column(‘valid’, Boolean(), default=False)
)
# Use the metadata to create the table
metadata.create_all(engine)
# Print the table details
print(repr(metadata.tables[‘data2’]))
Inserting a single row with an insert() statement
# Inserting a single row with an insert() statement
# Import insert and select from sqlalchemy
from sqlalchemy import insert, select
# Build an insert statement to insert a record into the data table: stmt
stmt = insert(data).values(name=’Anna’, count=1, amount=1000.00, valid=True)
# Execute the statement via the connection: results
results = connection.execute(stmt)
# Print result rowcount
print(results.rowcount)
# Build a select statement to validate the insert
stmt = select([data]).where(data.columns.name == ‘Anna’)
# Print the result of executing the query.
print(connection.execute(stmt).first())
Inserting Multiple Records
# Inserting Multiple Records at Once
# Build a list of dictionaries: values_list
values_list = [
{‘name’: ‘Maya’, ‘count’: 1, ‘amount’: 1000.00, ‘valid’: True},
{‘name’: ‘Taylor’, ‘count’: 1, ‘amount’: 750.00, ‘valid’: False}
]
# Build an insert statement for the data table: stmt
stmt = insert(data)
# Execute stmt with the values_list: results
results = connection.execute(stmt, values_list)
# Print rowcount
print(results.rowcount)