我有这样的 playbook :

    import scrapy
    from scrapy.crawler import CrawlerProcess
    from datetime import datetime
    import os
    
    
    if os.path.exists('jfs_hombre.csv'):
        os.remove('jfs_hombre.csv')
        print("The file has been deleted successfully")
    else:
        print("The file does not exist!")
    
    
    class JfsSpider_hombre(scrapy.Spider):
        name = 'jfs_hombre'
        #start_urls = ["https://www.justforsport.com.ar/hombre?page=1"]
            
        def start_requests(self):
    
            yield scrapy.Request(
                url='https://www.justforsport.com.ar/_v/segment/graphql/v1?workspace=master&maxAge=short&appsEtag=remove&domain=store&locale=es-AR&__bindingId=e841e6ce-1216-4569-a2ad-0188ba5a92fc&operationName=productSearchV3&variables=%7B%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%226869499be99f20964918e2fe0d1166fdf6c006b1766085db9e5a6bc7c4b957e5%22%2C%22sender%22%3A%22vtex.store-resources%400.x%22%2C%22provider%22%3A%22vtex.search-graphql%400.x%22%7D%2C%22variables%22%3A%22eyJoaWRlVW5hdmFpbGFibGVJdGVtcyI6ZmFsc2UsInNrdXNGaWx0ZXIiOiJGSVJTVF9BVkFJTEFCTEUiLCJzaW11bGF0aW9uQmVoYXZpb3IiOiJkZWZhdWx0IiwiaW5zdGFsbG1lbnRDcml0ZXJpYSI6Ik1BWF9XSVRIT1VUX0lOVEVSRVNUIiwicHJvZHVjdE9yaWdpblZ0ZXgiOmZhbHNlLCJtYXAiOiJjIiwicXVlcnkiOiJob21icmUiLCJvcmRlckJ5IjoiT3JkZXJCeVJlbGVhc2VEYXRlREVTQyIsImZyb20iOjY0LCJ0byI6OTUsInNlbGVjdGVkRmFjZXRzIjpbeyJrZXkiOiJjIiwidmFsdWUiOiJob21icmUifV0sIm9wZXJhdG9yIjoiYW5kIiwiZnV6enkiOiIwIiwic2VhcmNoU3RhdGUiOm51bGwsImZhY2V0c0JlaGF2aW9yIjoiU3RhdGljIiwiY2F0ZWdvcnlUcmVlQmVoYXZpb3IiOiJkZWZhdWx0Iiwid2l0aEZhY2V0cyI6ZmFsc2V9%22%7D',
                callback=self.parse,
                method="GET"
            )
    
        def parse(self, response):
            resp = response.json()
            #print(resp)
            for item in range(0,576,32):
                resp['recordsFiltered']=item
           
                for result  in resp['data']['productSearch']['products']:
                    yield {
                        'Casa':'Just_For_Sports',
                        'Sku' :result['productReference'],
                        'Name': result['productName'],
                        'precio': result['priceRange']['sellingPrice']['highPrice'],
                        'Link': result['link'],
                        'Date':datetime.today().strftime('%Y-%m-%d')
    
                    }
    
    
    process= CrawlerProcess(
        settings = { 
            'FEED_URI':'jfs_hombre.csv' ,
            'FEED_FORMAT': 'csv',
            'FEED_EXPORT_ENCODING':'utf-8',
            } )   

if __name__ == "__main__":
    process =CrawlerProcess()
    process.crawl(JfsSpider_hombre)
    process.start()

这是我第一次使用调用Api、Json...所以我对此一无所知,但我一直在和scrapy合作.问题是我拿不到 keys .csv,它不会创建任何文件...刮板工作正常,我得到了所有的数据,但无法导出...我做错了什么??

提前谢谢!

推荐答案

现在,它起作用了

import scrapy
from scrapy.crawler import CrawlerProcess
from datetime import datetime
import os
    
    
if os.path.exists('jfs_hombre.csv'):
    os.remove('jfs_hombre.csv')
    print("The file has been deleted successfully")
else:
    print("The file does not exist!")
    
    
class JfsSpider_hombre(scrapy.Spider):
    name = 'jfs_hombre'

    #start_urls = ["https://www.justforsport.com.ar/hombre?page=1"]

    custom_settings = {"FEEDS": {'jfs_hombre.csv': {'format': 'csv'}}} 

    def start_requests(self):
    
        yield scrapy.Request(
            url='https://www.justforsport.com.ar/_v/segment/graphql/v1?workspace=master&maxAge=short&appsEtag=remove&domain=store&locale=es-AR&__bindingId=e841e6ce-1216-4569-a2ad-0188ba5a92fc&operationName=productSearchV3&variables=%7B%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%226869499be99f20964918e2fe0d1166fdf6c006b1766085db9e5a6bc7c4b957e5%22%2C%22sender%22%3A%22vtex.store-resources%400.x%22%2C%22provider%22%3A%22vtex.search-graphql%400.x%22%7D%2C%22variables%22%3A%22eyJoaWRlVW5hdmFpbGFibGVJdGVtcyI6ZmFsc2UsInNrdXNGaWx0ZXIiOiJGSVJTVF9BVkFJTEFCTEUiLCJzaW11bGF0aW9uQmVoYXZpb3IiOiJkZWZhdWx0IiwiaW5zdGFsbG1lbnRDcml0ZXJpYSI6Ik1BWF9XSVRIT1VUX0lOVEVSRVNUIiwicHJvZHVjdE9yaWdpblZ0ZXgiOmZhbHNlLCJtYXAiOiJjIiwicXVlcnkiOiJob21icmUiLCJvcmRlckJ5IjoiT3JkZXJCeVJlbGVhc2VEYXRlREVTQyIsImZyb20iOjY0LCJ0byI6OTUsInNlbGVjdGVkRmFjZXRzIjpbeyJrZXkiOiJjIiwidmFsdWUiOiJob21icmUifV0sIm9wZXJhdG9yIjoiYW5kIiwiZnV6enkiOiIwIiwic2VhcmNoU3RhdGUiOm51bGwsImZhY2V0c0JlaGF2aW9yIjoiU3RhdGljIiwiY2F0ZWdvcnlUcmVlQmVoYXZpb3IiOiJkZWZhdWx0Iiwid2l0aEZhY2V0cyI6ZmFsc2V9%22%7D',
            callback=self.parse,
            method="GET"
        )
    
    def parse(self, response):
        resp = response.json()
        #print(resp)
        for item in range(0,576,32):
            resp['recordsFiltered']=item
           
            for result  in resp['data']['productSearch']['products']:
                yield {
                    'Casa':'Just_For_Sports',
                    'Sku' :result['productReference'],
                    'Name': result['productName'],
                    'precio': result['priceRange']['sellingPrice']['highPrice'],
                    'Link': 'https://www.justforsport.com.ar' + result['link'],
                    'Date':datetime.today().strftime('%Y-%m-%d')
                    }
    
    
  

if __name__ == "__main__":
    process =CrawlerProcess()
    process.crawl(JfsSpider_hombre)
    process.start()

Python相关问答推荐

连接两个具有不同标题的收件箱

抓取rotowire MLB球员新闻并使用Python形成表格

Python—从np.array中 Select 复杂的列子集

在Python中,从给定范围内的数组中提取索引组列表的更有效方法

如何使用Pandas DataFrame按日期和项目汇总计数作为列标题

如何排除prefecture_related中查询集为空的实例?

为什么常规操作不以其就地对应操作为基础?

Cython无法识别Numpy类型

如何在Python Pandas中填充外部连接后的列中填充DDL值

在电影中向西北方向对齐""

获取git修订版中每个文件的最后修改时间的最有效方法是什么?

read_csv分隔符正在创建无关的空列

对于数组中的所有元素,Pandas SELECT行都具有值

按列表分组到新列中

try 在单个WITH_COLUMNS_SEQ操作中链接表达式时,使用Polars数据帧时出现ComputeError

在使用ROLING()获得最大值时,是否可以排除每个窗口中的前n个值?

用LAKEF划分实木地板AWS Wrangler

如何从具有完整层次数据的Pandas框架生成图形?

保存由PYTHON在EXCEL中所做更改的问题

IpyWidget Select 框未打开