from scrapy.linkextractors import LinkExtractor link_extractor = LinkExtractor( restrict_xpaths=['//div[@class="article"]'], # Extract links only from <div class="article"> tags=('a',), # Extract links only from <a> tags attrs=('href',), # Extract links only from the 'href' attribute unique=True, # Ensure unique links allow=('page',), # Allow links containing 'page' in their text deny=('logout',), # Exclude links containing 'logout' in their text process_links='process_links_function', # Apply a custom function to process the extracted links deny_extensions=['pdf', 'zip'], # Exclude links with extensions 'pdf' and 'zip' )