library(DBI)
library(RSQLite)
library(ggplot2)
BUSINES ANALYTICS FOUNDATINS INCLUDING THE R AND SQL
R & SQL
Loading libraries
Connect to the Chinook SQLite database
<- dbConnect(RSQLite::SQLite(), dbname = "Chinook_Sqlite.sqlite") con
List all tables in the database
<- dbListTables(con)
tables print(tables)
[1] "Album" "Artist" "Customer" "Employee"
[5] "Genre" "Invoice" "InvoiceLine" "MediaType"
[9] "Playlist" "PlaylistTrack" "Track"
Choose a Album table from the database
<- "SELECT * FROM Album LIMIT 10"
query_album <- dbGetQuery(con, query_album)
data_album print(data_album)
AlbumId Title ArtistId
1 1 For Those About To Rock We Salute You 1
2 2 Balls to the Wall 2
3 3 Restless and Wild 2
4 4 Let There Be Rock 1
5 5 Big Ones 3
6 6 Jagged Little Pill 4
7 7 Facelift 5
8 8 Warner 25 Anos 6
9 9 Plays Metallica By Four Cellos 7
10 10 Audioslave 8
Query to get album details along with artist names
<- "
query_album_artist SELECT Album.AlbumId, Album.Title AS AlbumTitle, Artist.Name AS ArtistName
FROM Album
JOIN Artist ON Album.ArtistId = Artist.ArtistId
LIMIT 10"
<- dbGetQuery(con, query_album_artist)
data_album_artist print(data_album_artist)
AlbumId AlbumTitle ArtistName
1 1 For Those About To Rock We Salute You AC/DC
2 2 Balls to the Wall Accept
3 3 Restless and Wild Accept
4 4 Let There Be Rock AC/DC
5 5 Big Ones Aerosmith
6 6 Jagged Little Pill Alanis Morissette
7 7 Facelift Alice In Chains
8 8 Warner 25 Anos Antônio Carlos Jobim
9 9 Plays Metallica By Four Cellos Apocalyptica
10 10 Audioslave Audioslave
Query to get the top-selling albums along with artist names
<- "
query_top_selling_albums SELECT
Album.Title AS AlbumTitle,
Artist.Name AS ArtistName,
SUM(InvoiceLine.Quantity) AS TotalQuantitySold
FROM
InvoiceLine
JOIN
Track ON InvoiceLine.TrackId = Track.TrackId
JOIN
Album ON Track.AlbumId = Album.AlbumId
JOIN
Artist ON Album.ArtistId = Artist.ArtistId
GROUP BY
Album.AlbumId, Album.Title, Artist.Name
ORDER BY
TotalQuantitySold DESC
LIMIT 10"
# Execute the query
<- dbGetQuery(con, query_top_selling_albums)
top_selling_albums ::kable(top_selling_albums) knitr
AlbumTitle | ArtistName | TotalQuantitySold |
---|---|---|
Minha Historia | Chico Buarque | 27 |
Greatest Hits | Lenny Kravitz | 26 |
Unplugged | Eric Clapton | 25 |
Acústico | Titãs | 22 |
Greatest Kiss | Kiss | 20 |
Prenda Minha | Caetano Veloso | 19 |
Chronicle, Vol. 2 | Creedence Clearwater Revival | 19 |
My Generation - The Very Best Of The Who | The Who | 19 |
International Superhits | Green Day | 18 |
Chronicle, Vol. 1 | Creedence Clearwater Revival | 18 |
Query to get the top-selling albums with the number of units sold, artist names, and total earnings
<- "
query_top_selling_albums SELECT
Album.Title AS AlbumTitle,
Artist.Name AS ArtistName,
SUM(InvoiceLine.Quantity) AS TotalQuantitySold,
SUM(InvoiceLine.Quantity * InvoiceLine.UnitPrice) AS TotalEarnings
FROM
InvoiceLine
JOIN
Track ON InvoiceLine.TrackId = Track.TrackId
JOIN
Album ON Track.AlbumId = Album.AlbumId
JOIN
Artist ON Album.ArtistId = Artist.ArtistId
GROUP BY
Album.AlbumId, Album.Title, Artist.Name
ORDER BY
TotalEarnings DESC
LIMIT 10"
# Execute the query
<- dbGetQuery(con, query_top_selling_albums)
top_selling_albums print(top_selling_albums)
AlbumTitle ArtistName
1 Battlestar Galactica (Classic), Season 1 Battlestar Galactica (Classic)
2 The Office, Season 3 The Office
3 Minha Historia Chico Buarque
4 Heroes, Season 1 Heroes
5 Lost, Season 2 Lost
6 Greatest Hits Lenny Kravitz
7 Unplugged Eric Clapton
8 Battlestar Galactica, Season 3 Battlestar Galactica
9 Lost, Season 3 Lost
10 Acústico Titãs
TotalQuantitySold TotalEarnings
1 18 35.82
2 16 31.84
3 27 26.73
4 13 25.87
5 13 25.87
6 26 25.74
7 25 24.75
8 12 23.88
9 11 21.89
10 22 21.78
Sales Trends Over Time: Analyze how sales figures have changed over time to identify seasonal trends or growth patterns
# Query monthly sales trends
<- "
query_monthly_sales SELECT
strftime('%Y-%m', Invoice.InvoiceDate) AS Month,
SUM(InvoiceLine.Quantity) AS TotalQuantitySold
FROM
InvoiceLine
JOIN
Invoice ON InvoiceLine.InvoiceId = Invoice.InvoiceId
GROUP BY
strftime('%Y-%m', Invoice.InvoiceDate)
ORDER BY
Month;"
# Execute the query and fetch the data
<- dbGetQuery(con, query_monthly_sales)
monthly_sales
# Plot the data
ggplot(monthly_sales, aes(x = Month, y = TotalQuantitySold)) +
geom_line() +
geom_point() +
labs(title = "Monthly Quantity Sold",
x = "Month",
y = "Total Quantity Sold") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?