SQL

 SQL

Structured Query Language

CRUD
  • CREATE
  • READ
  • UPDATE
  • DELETE

Different relational database management systems:
  • MySQL
  • PostgreSQL
  • SQLite
Data/variable/columns Types:








Connect to Oracle Data Base

 Python 


R




PostgreSQL

psycopg2
ipython-sql

pip install psycopg2

conect to a virtual enviromet
virtualenv env && source env/bin/activate
pip install psycopg2-binary
connect()
cursor()
cursor = conn.cursor()
cursor.execute("SELECT * FROM db_table WHERE id = 1")


Best Practice to Work with Databases

R

  • dplyr as a database interface

Generales

  • Crating Visualization: create the data transformation, save it or move to the database, and the use a graphic function. -rendering process/
  • Enterprise-ready dashboards: Push as much of the calculations of the dashboard back to the database.
  • Make code portable
  • Run queries safely
  • Data needed for an analysis: select the right shema.
  • Protect your credentials, avoid to use as a plain text.
    • Method: DNS, encrypt credentials (keyring package), configuration file (config package), environment variables.
  • Secure access in deployed contect, shiny applications and Rmarkdown are two good options.
  • Select a database interface and connection that fit, support the DBI provides package.
  • Select and set up the right ODBC Drivers

Export Data

Sql Developer 


CODE AND PRACTICE - CLAUSES

  • CREATE AND INSERT



#----------------------------------
CREATE TABLE TNAME ( ID NUMBER GENERATED BY DEFAULT AS IDENTITY, #NOT NULL AUTO_INCREMENT, NAME CHAR(30) NOT NULL, VAR1 VARCHAR2(2) NOT NULL, VAR2 BINANRY_FLOAT(6) NULL, VAR3 NUMBER(9,8) NULL, S_DATE DATE NOT NULL, PRIMARI KEY(ID) ); CREATE TABLE TNAME2 ( ID NUMBER GENERATED BY DEFAULT AS IDENTITY, PHONE VARCHAR(20), EMAIL VARCHAR(60), PRIMARI KEY(ID) ); INSERT INTO TNAME (NAME, VAR1, VAR2, VAR3, S_DATE) VALUES ("NAME1", "RS", 4, 5.643264, '2023-03-06'), ("NAME2", "TS", 82, 94.38646158, '2023-03-06'); UPDATE TNAME SET VAR1 = 'SR', DATE = '2023-01-06', WHERE NAME = 'NAME 1'; UPDATE TNAME SET VAR1 = VAR1 * 0.8;
#----------------------------------

  • ALTER



#----------------------------------
ALTER TABLE TNAME2 ADD ( #MODIFY # S_DATE DATE NOT NULL, PHONE VARCHAR(10) ); ALTER TABLE TNAME DROP COLUMN VAR2;
#----------------------------------

  • FOREING KEY
  • WHERE AND HAVING

Both are like the other, but WHERE applies to individuals rows, HAVING applies to groups.

SELECT titles.pub_id, AVG(titles.price) FROM titles INNER JOIN publishers ON titles.pub_id = publishers.pub_id WHERE publishers.state = 'CA' GROUP BY titles.pub_id HAVING AVG(price) > 10;


  • JOINING DATA & SUBQUERY
Joins are preferable for large tables or when you need to return a large number of columns. Subqueries can be useful when you need to filter or aggregate data before joining it with another table, or when you need to perform a calculation using a subset of data before joining it with another table.




#----------------------------------

SELECT c.customer_name, COUNT(*) AS num_purchases FROM customers c JOIN orders o ON c.customer_id = o.customer_id WHERE o.order_date >= TRUNC(SYSDATE) - 30 GROUP BY c.customer_name ORDER BY num_purchases DESC FETCH FIRST 5 ROWS ONLY;
#----------------------------------
SELECT stock_name, date, price, AVG(price) OVER (PARTITION BY stock_name ORDER BY date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS moving_average FROM stock_prices WHERE date BETWEEN '2022-01-01' AND '2022-01-31'; #----------------------------------

SELECT e.employee_name, TRUNC(AVG(s.amount)) AS avg_sales_per_month FROM employees e JOIN sales s ON e.employee_id = s.employee_id WHERE s.sale_date >= ADD_MONTHS(TRUNC(SYSDATE, 'YEAR'), -12) GROUP BY e.employee_name ORDER BY avg_sales_per_month DESC FETCH FIRST 10 ROWS ONLY; #----------------------------------

SELECT p1.product_name AS product1, p2.product_name AS product2, COUNT(*) AS frequency FROM orders o JOIN order_details od1 ON o.order_id = od1.order_id JOIN order_details od2 ON o.order_id = od2.order_id AND od1.product_id < od2.product_id JOIN products p1 ON od1.product_id = p1.product_id JOIN products p2 ON od2.product_id = p2.product_id GROUP BY p1.product_name, p2.product_name ORDER BY frequency DESC FETCH FIRST 5 ROWS ONLY; #----------------------------------

SELECT c.customer_name, pc.category_name, SUM(op.unit_price * op.quantity) AS total_spent FROM customers c JOIN orders o ON c.customer_id = o.customer_id JOIN order_products op ON o.order_id = op.order_id JOIN products p ON op.product_id = p.product_id JOIN product_categories pc ON p.category_id = pc.category_id WHERE op.unit_price IS NOT NULL GROUP BY c.customer_name, pc.category_name HAVING COUNT(DISTINCT p.product_id) >= 3 ORDER BY pc.category_name, total_spent DESC FETCH FIRST 3 ROWS WITH TIES; #----------------------------------

SELECT customer_name FROM customers WHERE customer_id IN ( SELECT customer_id FROM orders WHERE order_total > ( SELECT AVG(order_total) FROM orders ) ); #----------------------------------

SELECT employee_name FROM employees WHERE employee_id IN ( SELECT DISTINCT employee_id FROM orders WHERE customer_id IN ( SELECT customer_id FROM customers WHERE customer_city = employees.employee_city ) ); #----------------------------------

SELECT customer_name FROM customers WHERE customer_id IN ( SELECT DISTINCT customer_id FROM orders WHERE product_id NOT IN ( SELECT product_id FROM products WHERE product_stock > 0 ) ); #----------------------------------
SELECT customer_name FROM customers WHERE ( SELECT COUNT(DISTINCT product_category) FROM products ) = ( SELECT COUNT(DISTINCT product_category) FROM orders o JOIN products p ON o.product_id = p.product_id WHERE o.customer_id = customers.customer_id ); #----------------------------------
SELECT c.customer_id, c.first_name, c.last_name FROM customers c INNER JOIN orders o ON c.customer_id = o.customer_id LEFT JOIN sales_reps s ON c.sales_rep_id = s.sales_rep_id WHERE o.order_value > 1000 AND s.sales_rep_id IS NULL #----------------------------------
SELECT d.department_name, AVG(e.salary) as avg_salary FROM employees e INNER JOIN departments d ON e.department_id = d.department_id GROUP BY d.department_name HAVING AVG(e.salary) > 75000 #----------------------------------
SELECT c.customer_id, COUNT(*) as num_orders FROM customers c INNER JOIN orders o ON c.customer_id = o.customer_id WHERE o.order_value > 100 GROUP BY c.customer_id #----------------------------------

SELECT d1.name, d1.position,         salario, cumplimientoMeta FROM tabla1 as d1 INNER JOIN tabla2 as d2 ON d1.name = d2.name

SELECT left_table.c1name AS L_id,         left_table.c2name AS L_val,         right_table.c2name AS R_val FROM left_table INNER JOIN right_table ON left_table.c1name = right_table.c1name

  • USING IN INNER JOIN #avoid the condition
SELECT left_table.c1name AS L_id,         left_table.c2name AS L_val,         right_table.c2name AS R_val FROM left_table INNER JOIN right_table USING (id);


SELECT d1.name, d1.postion, salario, cumplimientoMeta FROM left_table as d1 INNER JOIN right_table as d2 ON d1.name = d2.name #OR
SELECT d1.name, d1.postion, salario, cumplimientoMeta FROM left_table as d1 INNER JOIN right_table as d2 USING (name)

  •  VIEW


CREATE OR REPLACE VIEW customer_sales AS
SELECT name AS customer, SUM( quantity * unit_price ) sales_amount, EXTRACT( YEAR FROM order_date ) YEAR FROM orders INNER JOIN order_items USING(order_id) INNER JOIN customers USING(customer_id) WHERE status = 'Shipped' GROUP BY name, EXTRACT( YEAR FROM order_date ); #----------------------------------

Complex Query

Good Practices:
  • See where is the data
  • Write the code by parts and then 

SELECT country.country_name_eng, SUM(CASE WHEN call.id IS NOT NULL THEN 1 ELSE 0 END) AS calls, AVG(ISNULL(DATEDIFF(SECOND, call.start_time, call.end_time),0)) AS avg_difference FROM country LEFT JOIN city ON city.country_id = country.id LEFT JOIN customer ON city.id = customer.city_id LEFT JOIN call ON call.customer_id = customer.id GROUP BY country.id, country.country_name_eng HAVING AVG(ISNULL(DATEDIFF(SECOND, call.start_time, call.end_time),0)) > (SELECT AVG(DATEDIFF(SECOND, call.start_time, call.end_time)) FROM call) ORDER BY calls DESC, country.id ASC; SELECT C.CUSTOMER_NAME, COUNT(*) AS NUM_PURCHASES FROM CUSTOMERS C JOINT ORDERS O ON C.CUSTOMER_ID = O.CUSTOMER_ID WHERE O.ORDER_DATA >= TRUNC(SYSDATA) - 30 GROUP BY C.CUSTOMER_NAME ORDER BY NUM_PURCHASES DESC FETCH FIRST 5 ROWS ONLI; SELECT STOCK_NAME, DATE, PRICE, AVG(PRICE) OVER (PARTITION BY STOCK_NAME ORDER BY DATE ROWS BETWEEN 4 PRECEDIN AND CURRENT ROW) AS MOVIN_AVG FROM STOCK_PRICES WHERE DATE BETWEEN '2022-02-01' AND '2022-03-31'; ----------------------------------

WITH t_table as (

    select *

    from t_datos

)

select *

from t_table

  • TRUNC


TRUNC (ARG1, INTEGER)
TRUNC (DATE, 'FORMAT')


PRACTICE


#----------------------------------
SELECT STOCK_NAME, DATE, PRICE,
AVG(PRICE) OVER (PARTITION BY STOCK_NAME ORDER BY DATE ROWS BETWEEN 4 PRECEDIN AND CURRENT ROW) AS MOVIN_AVG FROM STOCK_PRICES WHERE DATE BETWEEN '2022-02-01' AND '2022-03-31'; #----------------------------------
SELECT SELECT C.CUSTOMER_NAME, COUNT(*) AS NUM_PURCHASES FROM CUSTOMERS C JOINT ORDERS O ON C.CUSTOMER_ID = O.CUSTOMER_ID WHERE O.ORDER_DATA >= TRUNC(SYSDATA) - 30 GROUP BY C.CUSTOMER_NAME ORDER BY NUM_PURCHASES DESC FETCH FIRST 5 ROWS ONLI; SELECT STOCK_NAME, DATE, PRICE, AVG(PRICE) OVER (PARTITION BY STOCK_NAME ORDER BY DATE ROWS BETWEEN 4 PRECEDIN AND CURRENT ROW) AS MOVIN_AVG FROM STOCK_PRICES WHERE DATE BETWEEN '2022-02-01' AND '2022-03-31'; #----------------------------------





#---------------------------------- #----------------------------------


Resource
$pwd\Documents\Arch_Usuario\Infor\SQL


Aditional Resource:



SQL in Python Reference Resource:

Entradas populares