您可以对html.parser
处理HTML的方式进行poke through bs4
internals和修改(这适用于我的bs4==4.12.2
版):
from bs4 import BeautifulSoup
from bs4.builder import builder_registry
from bs4.formatter import HTMLFormatter
class UnsortedAttributes(HTMLFormatter):
def __init__(self):
super().__init__(
void_element_close_prefix=""
) # <-- use void_element_close_prefix="" here
def attributes(self, tag):
yield from tag.attrs.items()
html_text = """\
<closed_tag>
<my_tag id="xxx">
<my_other_tag id="zzz">
</closed_tag>"""
builder_registry.lookup("html.parser").empty_element_tags = {"my_tag", "my_other_tag"}
soup = BeautifulSoup(html_text, "html.parser")
print(soup.encode(formatter=UnsortedAttributes()).decode())
打印:
<closed_tag>
<my_tag id="xxx">
<my_other_tag id="zzz">
</closed_tag>