我try 从EMCSG网站https://www.nems.emcsg.com/nems-prices下载一张桌子 它的工作方式是在Web浏览器https://www.nems.emcsg.com/api/sitecore/DataSync/DataDownload?value=10&fromDate=2023-07-21&toDate=2023-07-21&tpcValue=1中输入URL下面的内容
然而,当我使用下面的python代码下载该文件时,它返回"下载失败:禁止".
它可能会被网站屏蔽,以便进行网络抓取.有没有办法使用Python来解决这个问题?
import pandas as pd
import numpy as np
import datetime
import pytz
import urllib.request
def download_file(file_name):
url = f'https://www.nems.emcsg.com/api/sitecore/DataSync/DataDownload?value=10&fromDate={file_name}&toDate={file_name}&tpcValue=1'
file_path = f'raw data/omie_spain/{file_name}'
try:
# download file from url
urllib.request.urlretrieve(url, file_path)
print(f'Downloaded {file_name} successfully!')
return file_path
except urllib.error.URLError as e:
print("Download failed:", e.reason)
if __name__ == '__main__':
# file_name_date = datetime.datetime.today().strftime("%Y%m%d")
# get tomorrow's forecast
# file_name_date = datetime.datetime.today() + datetime.timedelta(days=1)
file_name_date = datetime.date(year=2023, month=7, day=20)
file_name_date = file_name_date.strftime("%Y-%m-%d")
file_name_time = datetime.datetime.now().strftime("%H%M")
print(file_name_time)
print(file_name_date)
raw_file_name, formatted_file_name = f'{file_name_date}', f'{file_name_date}{file_name_time}.csv'
raw_file_path = download_file(raw_file_name)
以下是结果:
1653
2023-07-20
Download failed: Forbidden
如果还try 使用urlOpen方法,则也会返回以下错误:
1651
2023-07-20
Traceback (most recent call last):
urllib.request.urlopen(url,context=context)
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 525, in open
response = meth(req, response)
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 634, in http_response
response = self.parent.error(
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 563, in error
return self._call_chain(*args)
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 496, in _call_chain
result = func(*args)
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
1645
2023-07-20
Traceback (most recent call last):
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\connectionpool.py", line 467, in _make_request
self._validate_conn(conn)
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\connectionpool.py", line 1092, in _validate_conn
conn.connect()
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\connection.py", line 642, in connect
sock_and_verified = _ssl_wrap_socket_and_match_hostname(
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\connection.py", line 783, in _ssl_wrap_socket_and_match_hostname
ssl_sock = ssl_wrap_socket(
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\util\ssl_.py", line 469, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\util\ssl_.py", line 513, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 512, in wrap_socket
return self.sslsocket_class._create(
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 1070, in _create
self.do_handshake()
File "C:\Users\XX\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 1341, in do_handshake
self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\connectionpool.py", line 790, in urlopen
response = self._make_request(
File "C:\Users\XX\hello\.venv\lib\site-packages\urllib3\connectionpool.py", line 491, in _make_request
raise new_e
urllib3.exceptions.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\XX\hello\.venv\lib\site-packages\requests\adapters.py", line 486, in send
resp = conn.urlopen(
retries = retries.increment(
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.nems.emcsg.com', port=443): Max retries exceeded with url: /api/sitecore/DataSync/DataDownload?value=10&fromDate=2023-07-21&toDate=2023-07-21&tpcValue=1 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997)')))
During handling of the above exception, another exception occurred:
File "c:\Users\XX\hello\Dynamic_Tariff\emcsg.py", line 92, in <module>
r = requests.get(url)
File "C:\Users\XX\hello\.venv\lib\site-packages\requests\api.py", line 73, in get
return request("get", url, params=params, **kwargs)
return session.request(method=method, url=url, **kwargs)
File "C:\Users\XX\hello\.venv\lib\site-packages\requests\sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\XX\hello\.venv\lib\site-packages\requests\sessions.py", line 703, in send
File "C:\Users\XX\hello\.venv\lib\site-packages\requests\adapters.py", line 517, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='www.nems.emcsg.com', port=443): Max retries exceeded with url: /api/sitecore/DataSync/DataDownload?value=10&fromDate=2023-07-21&toDate=2023-07-21&tpcValue=1 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997)')))