Quick little project to scrape a long word list from a paginated web site.
import random
import time
import requests
from bs4 import BeautifulSoup
x = range(165, 175)
for n in x:
delay = random.uniform(0.3, 2.5)
print(f"Starting page {n} in {delay} seconds.")
time.sleep(delay)
# URL of the web page to scrape
url = f"https://www.[wordlistdomain].com/words?page={n}" # Replace with the actual URL
# Send an HTTP request to fetch the page content
response = requests.get(url)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")
# Find the unordered list with ID "words-list"
word_list = soup.find("ul", id="words-list")
# Extract the names of list items (li) from inside the word list
list_items = word_list.find_all("li")
# Create or open the text file for appending
with open("word_list.txt", "a") as file:
for item in list_items:
# Extract the text content of the <a> tag inside each list item
word = item.find("a").get_text()
# Append the word to the text file
file.write(word + "\n")
print("Words appended to word_list.txt")