WEB SCRAPPING

Januari 29, 2025
import requests  
import pandas as pd  
from bs4 import BeautifulSoup  

#MENGAMBIL DATA DARI WEBSITE
url = "https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population"
response = requests.get(url)

# Cek apakah permintaan berhasil
if response.status_code == 200:
    print("Berhasil mengambil halaman!")
else:
    print(f"Gagal! Kode status: {response.status_code}")

#Parsing HTML dengan BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")


#Menemukan Tabel yang Dibutuhkan
table = soup.find("table", {"class": "wikitable"})

#Mengambil Data dari Tabel
rows = table.find_all("tr")

#Menyimpan Data ke dalam Pandas DataFrame
data = []  # List untuk menyimpan data

for row in rows[4:]:  # Lewati header tabel(mengambil data mulai baris ke-3 dari tabel)
    cols = row.find_all(["td", "th"])  
    if len(cols) >= 5:  
        no = cols[0].text.strip() #nomor
        country = cols[1].text.strip()  # Nama negara
        population = cols[2].text.strip().replace(",", "")  # Populasi tanpa koma
        persentation = cols[3].text.strip() #persen 
        year = cols[4].text.strip()  # Tahun data

        data.append([no, country, population, persentation, year])

# Buat DataFrame
df = pd.DataFrame(data, columns=["No", "Country", "Population", "Persentation", "Year"])

# Tampilkan 5 data pertama
print(df.head())


#Menyimpan Data ke CSV
df.to_csv("countries_population.csv", index=False)
print("Data berhasil disimpan ke countries_population.csv")
Cari Blog Ini

Blog Pembelajaran Informatika

WEB SCRAPPING

Komentar

Posting Komentar

Postingan populer dari blog ini

Analisa Data

WEB SCRAPING