SQL Query
Tables
CREATE TABLE YELP_USER (
Yelp_ID VARCHAR(10) PRIMARY KEY,
Email VARCHAR(30) NOT NULL,
FN VARCHAR(10) NOT NULL,
LN VARCHAR(10) NOT NULL,
DOB DATE NOT NULL,
BirthPlace VARCHAR(30) NOT NULL,
Gender CHAR NOT NULL
);
CREATE TABLE BUNCATEGORY (
Category_ID VARCHAR(10) PRIMARY KEY,
CategoryNamne VARCHAR(30) NOT NULL
);
CREATE TABLE BUSINESS (
Business_ID VARCHAR(10) PRIMARY KEY,
BusinessName VARCHAR(50) NOT NULL,
City VARCHAR(20) NOT NULL,
StateName VARCHAR(20) NOT NULL,
Phone VARCHAR(14) NOT NULL,
Category_ID VARCHAR(10) NOT NULL,
OPENED CHAR NOT NULL,
FOREIGN KEY (Category_ID) REFERENCES BUNCATEGORY(Category_ID) ON DELETE SET NULL
);
CREATE TABLE REVIEWS (
Review_ID VARCHAR(10) PRIMARY KEY,
Rating Number(1) NOT NULL,
Author VARCHAR(10) NOT NULL,
Publish_Date DATE NOT NULL,
Rec_NotREC CHAR NOT NULL,
Business_ID VARCHAR(10) NOT NULL,
Votes_Funny NUMBER NOT NULL,
Votes_Cool NUMBER NOT NULL,
Votes_Useful NUMBER NOT NULL,
FOREIGN KEY (Author) REFERENCES YELP_USER(Yelp_ID) ON DELETE SET NULL ,
FOREIGN KEY (Business_ID) REFERENCES BUSINESS(Business_ID) ON DELETE SET NULL
);
Queries
- Get the businesses in San Jose, CA that are still “in business”. Results should be sorted by
review counts in descending order. Return for the top 7 businesses, the business id, name,
review count, and average rating of the business.
SELECT*
FROM (
SELECT B.Business_ID, B.BusinessName, COUNT(B.Business_ID) as count, AVG(R.Rating)
FROM BUSINESS B, REVIEWS R
WHERE B.City = 'San Jose' AND B.StateName = 'CA' AND B.OPENED = 'T' AND R.Business_ID = B.Business_ID
GROUP BY B.Business_ID, B.BusinessName
ORDER BY count DESC
)
WHERE ROWNUM <= 7;
- Get the review counts for businesses in each business category. Results should be sorted by
the review counts in descending order.
SELECT B.Category_ID, B.Business_ID,COUNT(B.Business_ID) as CN
FROM BUSINESS B, REVIEWS R
WHERE B.Business_ID = R.Business_ID
GROUP BY B.Business_ID, B.Category_ID
ORDER BY CN DESC;
- Find the average rating across all reviews written by a particular user.
SELECT Y.Yelp_ID, nvl(AVG(R.Rating),0)
FROM REVIEWS R, YELP_USER Y
WHERE R.Author(+) = Y.Yelp_ID
GROUP BY Y.Yelp_ID;
- Get the businesses in San Jose, CA that have been reviewed by more than 5
elite' users. Users who have written more than 10 reviews are called
elite’ users. Results should be
ordered by theelite' user count in descending order. Return for the top 5 businesses, the business id, business name, business review count, average rating, and the count of the
elite’
users for the particular business.
SELECT *
FROM (
SELECT T.Business_ID, TT.BusinessName, T.Review_Count, T.Avg_Rating, TT.elite
FROM
(
SELECT R.Business_ID as Business_ID, COUNT(R.Review_ID) as Review_Count, AVG(R.Rating) as Avg_Rating
FROM Reviews R
WHERE R.Business_ID in (
SELECT B.Business_ID
FROM BUSINESS B, REVIEWS R
WHERE B.City = 'San Jose'
AND B.StateName = 'CA'
AND R.Business_ID = B.Business_ID
AND R.Author in (
SELECT T1.Author
FROM (
SELECT R.Author Author, COUNT(R.Review_ID) as CN
FROM Reviews R
Group By R.Author
) T1
WHERE CN > 10
)
GROUP BY B.Business_ID
HAVING COUNT(R.Author) > 5
)
GROUP BY (R.Business_ID)
) T,
(
SELECT B.Business_ID as Business_ID, B.BusinessName as BusinessName,COUNT(R.Author) as elite
FROM BUSINESS B, REVIEWS R
WHERE B.City = 'San Jose'
AND B.StateName = 'CA'
AND R.Business_ID = B.Business_ID
AND R.Author in (
SELECT T2.Author
FROM (
SELECT R.Author Author, COUNT(R.Review_ID) as CN
FROM Reviews R
Group By R.Author
) T2
WHERE CN > 10
)
GROUP BY B.Business_ID,B.BusinessName
HAVING COUNT(R.Author) > 5
) TT
WHERE TT.Business_ID = T.Business_ID
ORDER BY TT.elite
)
WHERE ROWNUM <= 5;
- Get the businesses in San Jose, CA that have the highest percentage of reviews with rating of
5, and have been reviewed at least 5 times. Results should be ordered by the percentage in
descending order. Return for the top 5 businesses, the business id, business name, review
count, and percentage of reviews with rating of 5.
SELECT *
FROM (
SELECT T.Business_ID, COUNT(T.BUSINESS_ID), ((T.five/COUNT(T.BUSINESS_ID))*100) percent
FROM (
SELECT R.Business_ID, COUNT(R.Business_ID) as five
FROM REVIEWS R
WHERE R.Rating = 5
GROUP BY R.Business_ID
) T, Reviews R
WHERE T.Business_ID = R.Business_ID
AND T.Business_ID in (
SELECT B.Business_ID
FROM BUSINESS B, REVIEWS R
WHERE B.City = 'San Jose'
AND B.StateName = 'CA'
AND B.Business_ID = R.Business_ID
GROUP BY B.Business_ID
HAVING COUNT(R.Review_ID) >= 5
)
GROUP BY T.Business_ID, T.five
ORDER BY percent DESC
) WHERE ROWNUM <= 5;
- Find top two businesses, which have highest average ratings in each business category. For
each business, list business id, business name and business category name.
SELECT TT.business_ID, TT.BusinessName, TT.categoryname
FROM (SELECT T.Business_ID as business_ID, T.BusinessName as businessname, T.rate, C.CategoryNamne as categoryname,
ROW_NUMBER() OVER (PARTITION BY T.Category_ID Order by T.rate DESC) AS ranking
FROM (
SELECT B.Business_ID Business_ID, B.BusinessName BusinessName, B.Category_ID Category_ID, AVG(R.Rating) as rate
FROM BUSINESS B, REVIEWS R
WHERE B.Business_ID = R.Business_ID
GROUP BY B.Business_ID, B.BusinessName, B.Category_ID
) T, BUNCATEGORY C
WHERE T.Category_ID = C.Category_ID
) TT
WHERE ranking <= 2;
- Find the oldest user who never reviewed any businesses.
SELECT *
FROM (
SELECT Y.Yelp_ID, Y.DOB as birthday
FROM YELP_USER Y
WHERE Y.Yelp_ID not in (
SELECT R.Author
FROM Reviews R
GROUP BY R.Author
)
ORDER BY birthday
)
WHERE ROWNUM <= 1;
- Find every business in CA that has the word “Coffee” in its name.
SELECT B.BUSINESS_ID, B.BusinessName
FROM BUSINESS B
WHERE StateName = 'CA'
AND B.BusinessName LIKE '%Coffee%';
- List all 5-star businesses that have been reviewed by any users between the ages of 15 and
A 5-star business is a business which has an average review rating of 5.
SELECT R.Business_ID, R.Author
FROM Reviews R
WHERE R.Author in (
SELECT Y.Yelp_ID
FROM YELP_USER Y
WHERE (months_between(current_date, Y.DOB)/12) > 15
AND (months_between(current_date, Y.DOB)/12) < 25
)
GROUP BY R.Business_ID, R.Author
HAVING AVG(R.Rating) = 5;
- List users that have been to at least 3 distinct states. Order by number of states traveled to
(decreasing), break ties with user id (increasing). For each user, list his/her user id, name and
list of states traveled to.
SELECT Y.Yelp_ID, Y.FN, Y.LN, T.StateName
FROM (
SELECT R.Author as Author, B.StateName as StateName
FROM REVIEWS R, BUSINESS B
WHERE R.Business_ID = B.Business_ID
AND R.Author in (
SELECT T.Yelp_User Yelp_User
FROM
(SELECT R.Author as Yelp_User, B.StateName as StateName
FROM REVIEWS R, BUSINESS B
WHERE R.Business_ID = B.Business_ID
GROUP BY R.Author, B.StateName
) T
GROUP BY T.Yelp_User
HAVING COUNT(T.Yelp_User) >= 3
)
GROUP BY R.Author, B.StateName
) T, YELP_USER Y
WHERE T.Author = Y.Yelp_ID
ORDER BY (Y.Yelp_ID);