In [None]:
import time
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

In [None]:
# Path to the uBlock Origin .crx file in the same directory as the notebook
ublock_origin_path = './ublockorigin.crx'

# Define the username
username = 'StayyDench-EUW'

# URL of the website to scrape, incorporating the username variable
url = f'https://www.op.gg/summoners/euw/{username}?queue_type=ARENA'

# Set up Selenium WebDriver with Chrome
options = Options()
options.add_extension(ublock_origin_path)
options.headless = True  # Run in headless mode, set to False if you want to see the browser
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

# Open the URL
driver.get(url)

# Function to click the "Show More" button until it disappears
def click_show_more():
    while True:
        try:
            # Locate the "Show More" button using the full XPath
            show_more_button = driver.find_element(By.XPATH, '/html/body/div[1]/div[10]/div[2]/button')
            show_more_button.click()
            time.sleep(2)  # Wait for new content to load
        except Exception as e:
            print(f"No more 'Show More' button found or an error occurred: {e}")
            break

# Click the "Show More" button until it disappears
click_show_more()

# Extract the <ul class="games-standing__list"> element
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
games_standing_list = soup.find('ul', class_='games-standing__list')

# Save the extracted element to a file
with open('rankings.html', 'w', encoding='utf-8') as file:
    file.write(str(games_standing_list))

print("Data extracted and saved to rankings.html")

# Close the browser
driver.quit()

In [None]:
# Load the file content
file_path = 'rankings.html'  # Make sure this path is correct

with open(file_path, 'r') as file:
    content = file.read()

# Use regex to find all occurrences of the ranks
rankings = re.findall(r'games-standing__item--(\d)', content)

# Count the occurrences of each rank
rank_counts = {str(i): rankings.count(str(i)) for i in range(1, 9)}

# Calculate total games played
total_games_played = sum(rank_counts.values())

# Calculate top 4 percentage rate
top_4_finishes = rank_counts['1'] + rank_counts['2'] + rank_counts['3'] + rank_counts['4']
top_4_percentage = (top_4_finishes / total_games_played) * 100

# Calculate 1st place win percentage
first_place_percentage = (rank_counts['1'] / total_games_played) * 100

# Calculate average placement
rank_values = [int(rank) * count for rank, count in rank_counts.items()]
average_placement = sum(rank_values) / total_games_played

print(f"Total games played: {total_games_played}")
print(f"Top 4 percentage rate: {top_4_percentage:.2f}%")
print(f"1st place win percentage: {first_place_percentage:.2f}%")
print(f"Average placement: {average_placement:.2f}")

In [None]:
# Assuming `rank_counts` from previous code
places = list(rank_counts.keys())
frequencies = list(rank_counts.values())

# Create the bar chart with numbers on the bars and improved colors
plt.figure(figsize=(10, 6))
bars = plt.bar(places, frequencies, color=['#4daf4a', '#377eb8', '#ff7f00', '#984ea3', '#e41a1c', '#f781bf', '#a65628', '#999999'])

# Add text on the bars
for bar, freq in zip(bars, frequencies):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 2, freq, ha='center', va='bottom')

plt.xlabel('Finishing Place')
plt.ylabel('Frequency')
plt.title('Frequency of Finishes from 1st to 8th Place')
plt.show()