Code: Select all
https://disc.icai.org/reports-orders-of-the-disciplinary-committee-for-the-council-year-2023-2024/
Please generate a Python script to Download all the PDF caselaws from the above webpage
The Generated Code is like
Code: Select all
pip intall requests
import os
import requests
from bs4 import BeautifulSoup
# URL of the webpage containing the reports and orders
BASE_URL = "https://disc.icai.org/reports-orders-of-the-disciplinary-committee-for-the-council-year-2024-2025/"
# Create a directory to save the orders
DOWNLOAD_DIR = "ICAI_Orders"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
def download_file(url, filename):
"""Download a file from a URL and save it locally."""
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
f.write(chunk)
print(f"Downloaded: {filename}")
else:
print(f"Failed to download: {url} (Status code: {response.status_code})")
def scrape_and_download(base_url):
"""Scrape the webpage and download all order files."""
try:
response = requests.get(base_url)
if response.status_code != 200:
print(f"Failed to access the webpage: {base_url} (Status code: {response.status_code})")
return
soup = BeautifulSoup(response.content, 'html.parser')
# Find all links to PDF or other files
links = soup.find_all('a', href=True)
for link in links:
href = link['href']
if href.endswith('.pdf'): # Assuming orders are in PDF format
file_url = href if href.startswith('http') else BASE_URL + href
filename = os.path.join(DOWNLOAD_DIR, os.path.basename(file_url))
download_file(file_url, filename)
except Exception as e:
print(f"An error occurred: {e}")
# Run the scraper
scrape_and_download(BASE_URL)