这是我第一次浏览网站.问题是两个不同的表具有相同的类名.到目前为止,我已经了解到,要找到数据,我必须通过HTML标记的类名来找到它.
import bs4 as bs
from urllib.request import Request, urlopen
import pandas as pd
from pyparsing import col
req = Request('https://www.worldometers.info/world-population/albania-population/',
headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
soup = bs.BeautifulSoup(webpage, 'html5lib')
# albania population
pupulation = soup.find(class_='col-md-8 country-pop-description')
for i in pupulation.find_all('strong')[1]:
print()
# print(i.text, end=" ")
# getting all city populattion
city_population = soup.find(
class_='table table-hover table-condensed table-list')
# print(city_population.text, end=" ")
# the first table
# population of albania(historical)
df = pd.DataFrame(columns=['Year', 'Population' 'Yearly Change %', 'Yearly Change', 'Migrants (net)', 'Median Age', 'Fertility Rate',
'Density(P/Km2)', 'Urban Pop %', 'Urban Population', "Countrys Share of Population", 'World Population', 'Albania Global Rank'])
hisoric_population = soup.find('table',
class_='table table-striped table-bordered table-hover table-condensed table-list')
for row in hisoric_population.tbody.find_all('tr'):
columns = row.find_all('td')
if (columns != []):
Year = columns[0].text.strip()
Population = columns[1].text.strip()
YearlyChange_percent = columns[2].text.strip('&0')
YearlyChange = columns[3].text.strip()
Migrants_net = columns[4].text.strip()
MedianAge = columns[5].text.strip('&0')
FertilityRate = columns[6].text.strip('&0')
Density_P_Km2 = columns[7].text.strip()
UrbanPop_percent = columns[8].text.strip('&0')
Urban_Population = columns[9].text.strip()
Countrys_Share_of_Population = columns[10].text.strip('&0')
World_Population = columns[11].text.strip()
Albania_Global_Rank = columns[12].text.strip()
df = df.append({'Year': Year, 'Population': Population, 'Yearly Change %': YearlyChange_percent, 'Yearly Change': YearlyChange, 'Migrants (net)': Migrants_net, 'Median Age': MedianAge, 'Fertility Rate': FertilityRate,
'Density(P/Km2)': Density_P_Km2, 'Urban Pop %': UrbanPop_percent, 'Countrys Share of Population': Countrys_Share_of_Population, 'World Population': World_Population, 'Albania Global Rank': Albania_Global_Rank}, ignore_index=True)
df.head()
# print(df)
#the second table
# Albania Population Forecast
forecast_population = soup.find(
'table', class_='table table-striped table-bordered table-hover table-condensed table-list')
for row in hisoric_population.tbody.find_all('tr'):
columns = row.find_all('td')
print(columns)