有一个API调用来做同样的事情.
接口:https://www.globusmedical.com/wp-json/em-locator/v1/locations/?page=1
import requests
from lxml import etree
import json
def get_response_using_headers(page):
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'en-GB,en;q=0.9',
'sec-ch-ua': '"Chromium";v="123", "Not:A-Brand";v="8"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
}
params = {
'page': page,
}
response = requests.get('https://www.globusmedical.com/wp-json/em-locator/v1/locations/', params=params, headers=headers)
return response
# api_url = "https://www.globusmedical.com/wp-json/em-locator/v1/locations/?page=1"
doctors_data = []
# you can change the pagination range here
for page in range(1,2):
response = get_response_using_headers(page)
data = response.json()
for item in data:
info = {}
info["name"] = item["post"]["post_title"]
info["location"] = item["name"]
address = item["formatted_address"]
if address == "":
address = item["address"]
info["address"] = address
info["phone"] = item["phone"]
info["email"] = item["email"]
html_str = item["list_item_html"]
dom = etree.HTML(html_str)
info["specialty"] = dom.xpath("//div[contains(@class,'procedures')]/text()")[0].strip()
doctors_data.append(info)
print(doctors_data)
OUTPUT:
[{'name': 'Aaron Dumont', 'location': 'Tulane University Medical Center', 'address': '1415 Tulane Avenue, Fifth Floor, Neuroscience Center, New Orleans, LA, 70112', 'phone': '(504) 988-5565', 'email': 'adumont2@tulane.edu', 'specialty': 'Adult Deformity, Degenerative Spine, Minimally Invasive Surgery, Robotic Spine Surgery'},
{'name': 'Aaron Greenberg', 'location': 'Hackensack UMC Pascack Valley', 'address': '784 Franklin Avenue', 'phone': '(844) 777-0910', 'email': 'Ajberg214@gmail.com', 'specialty': 'Adult Deformity, Degenerative Spine, Minimally Invasive Surgery'},
.
.
.
{'name': 'Albert Wong', 'location': 'Cedars Sinai and DOCS Health', 'address': '8436 W 3rd St suite 800, Los Angeles, CA, USA', 'phone': '(310) 746-5918', 'email': 'AW@docshealth.com', 'specialty': 'Adult Deformity, Cervical Artificial Disc, Degenerative Spine, Minimally Invasive Surgery, Robotic Spine Surgery, Sacroiliac Joint Fusion, Spinal Deformity, Surgery of the Neck & Back'}]