ESIEE Paris — Data Engineering I — Assignment 2¶

Author : Badr TAJINI

Academic year: 2025–2026
Program: Data & Applications - Engineering - (FD)
Course: Data Engineering I


Check retail.events table access with the new user¶

In [4]:
# After restore: definitive checks and a sample SELECT
# Improved: single-block printing (avoids duplicate output) and a Q1 price lookup
import os
from sqlalchemy import create_engine, text
import pandas as pd
from urllib.parse import quote_plus

def _build_db_url():
    db_url = os.environ.get('DATABASE_URL')
    if db_url:
        return db_url
    user = os.environ.get('PGUSER', 'esiee_reader')
    pwd = os.environ.get('PGPASSWORD', 'azerty123')  # replace with the actual password
    host = os.environ.get('PGHOST', '127.0.0.1')
    port = os.environ.get('PGPORT', '5433')
    db = os.environ.get('PGDATABASE', 'esiee_full')
    if pwd:
        return f"postgresql+psycopg2://{user}:{quote_plus(pwd)}@{host}:{port}/{db}"
    return f"postgresql+psycopg2://{user}@{host}:{port}/{db}"

db_url = _build_db_url()
summary_lines = []
summary_lines.append('Using DB URL (password hidden).')
session_q1 = '789d3699-028e-4367-b515-b82e2cb5225f'
try:
    engine = create_engine(db_url)
    with engine.connect() as conn:
        # small, safe queries
        cnt = conn.execute(text('SELECT count(*) FROM retail.events')).scalar()
        df = pd.read_sql_query('SELECT * FROM retail.events LIMIT 10;', conn)
        q_price = text("SELECT price FROM retail.events WHERE session_id = :sid AND event_type = 'purchase' ORDER BY event_time DESC LIMIT 1")
        price = conn.execute(q_price, {'sid': session_q1}).scalar()
    summary_lines.append(f'retail.events row count = {cnt}')
    if cnt == 0:
        summary_lines.append('Table is empty')
    else:
        summary_lines.append('Table is NOT empty — sample rows below:')
    # print the summary once, then show the dataframe (if any)
    print('\n'.join(summary_lines))
    if cnt != 0:
        display(df)
    print()
    if price is None:
        print(f'Q1 — session {session_q1}: no purchase row found')
    else:
        print(f'Q1 — session {session_q1}: purchase price = {price}')
except Exception as e:
    print('Failed to run sample query or connect to DB:')
    print(e)
    print('\nMake sure PostgreSQL is installed and running, the `pg_restore` step succeeded, and environment variables are set:')
    print('  - export DATABASE_URL=postgresql+psycopg2://user:pass@host:port/db')
    print("  - or set PGHOST, PGPORT, PGDATABASE, PGUSER, PGPASSWORD")
Using DB URL (password hidden).
retail.events row count = 42418541
Table is NOT empty — sample rows below:
event_time event_type session_id product_id price
0 2019-10-01 03:51:51+00:00 view 30b02f47-f6b1-4462-8342-26a8eb7ada9d 19300086 26.74
1 2019-10-01 03:51:51+00:00 view 54a2e1f8-c882-4510-b5d7-e49c219f0c60 1005115 975.57
2 2019-10-01 03:51:51+00:00 view 2b47123f-f3cb-42d9-bc05-bf87733e61e5 32701284 48.21
3 2019-10-01 03:51:51+00:00 view d94ab37d-c956-4ecc-9162-b9229fdd8afd 1004523 221.31
4 2019-10-01 03:51:51+00:00 view ea815ccc-0d0d-4ad2-b56f-8b207e5bc5b9 16400027 43.73
5 2019-10-01 03:51:51+00:00 view cbdc9ae9-163d-4148-9609-d1a6dbb1ff98 1307397 881.59
6 2019-10-01 03:51:51+00:00 view 37a65c91-4146-4642-93c2-842a5c483f62 16400025 128.68
7 2019-10-01 03:51:51+00:00 view e73441b4-c2bf-4294-8e84-fa9e1497e6f8 1306724 875.16
8 2019-10-01 03:51:51+00:00 view ce25c84e-11ad-4f47-a34a-9a4e3ca4b414 14400024 21.85
9 2019-10-01 03:51:51+00:00 view 92d3a4f2-5198-4328-abdc-6eb367a5c2f7 5100594 1181.50
Q1 — session 789d3699-028e-4367-b515-b82e2cb5225f: purchase price = 100.39
In [ ]: