所以我试图在Twitter上抓取一个页面来获取推文:
我想获取元素;单独获取文本、图像、视频,但我不断获取空数组
//Scraper.js
const puppeteer = require('puppeteer');
const fs = require('fs');
async function scrapeTwitter() {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://twitter.com/coindesk');
await page.waitForLoadState('networkidle2');
const html = await page.content();
const $ = cheerio.load(html);
const tweets = $('[data-testid="tweet"]');
const posts = [];
tweets.each(function () {
const text = $(this).find('.tweet-text').text().trim();
const image = $(this).find('.tweet-image').attr('src');
const video = $(this).find('.tweet-video').attr('src');
posts.push({ text, image, video });
});
await browser.close();
return posts;
} catch (error) {
console.error('Error scraping Twitter:', error);
return [];
}
}
module.exports = scrapeTwitter;