how to scrape the web with javascript
// You will need to install and run Node.js prior to setting up const puppeteer = require('puppeteer'); async function scrapeProduct(url) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(url); const [el] = await page.$x('/html/body/div[1]/div[2]/div[9]/div[4]/div[3]/div[1]/div[1]/div/div/div[2]/div[1]/div[1]/ul/li[1]/span/span/div/img'); const src = await el.getProperty('src'); const imgURL = await src.jsonValue(); const [el2] = await page.$x('/html/body/div[1]/div[2]/div[9]/div[4]/div[4]/div[1]/div/h1/span'); const txt = await el2.getProperty('textContent'); const title = await txt.jsonValue(); const [el3] = await page.$x('/html/body/div[1]/div[2]/div[9]/div[4]/div[4]/div[10]/div[1]/div/table/tbody/tr[2]/td[2]/span[1]'); const txt2 = await el3.getProperty('textContent'); const price = await txt2.jsonValue(); console.log({ imgURL, title, price }); browser.close(); } scrapeProduct('https://www.amazon.com/Business-Microphone-Upgraded-NexiGo-Computer/dp/B08BHX7GYY/?_encoding=UTF8&smid=A1HNC035CZ2MR5&pd_rd_w=GsaOJ&pf_rd_p=45f0d3b0-8ddc-4840-9ac2-c26f2608345f&pf_rd_r=A1TQ15FXBKJH1JWYXXAD&pd_rd_r=82f7f31d-db1c-4831-96a3-bb110b1133f9&pd_rd_wg=urW4C&ref_=pd_gw_unk');
web scrape example js
// This example uses axios, cheerio & Express const PORT = 8000; const axios = require('axios'); const cheerio = require('cheerio'); const express = require('express'); const app = express(); const url = 'https://www.theguardian.com/uk'; axios(url) .then(response => { const html = response.data const $ = cheerio.load(html) const articles = []; $('.fc-sublink__title', html).each(function() { const title = $(this).text(); const url = $(this).find('a').attr('href'); articles.push({ title, url }) }) console.log(articles) }).catch(err => console.log(err)) app.listen(PORT, () => console.log(`server running on PORT ${PORT}`))
javascript scrape page
const http = require('http'); const PORT = 3000; const server = http.createServer((req, res) => { res.statusCode = 200; res.setHeader('Content-Type', 'text/plain'); res.end('Hello World'); }); server.listen(port, () => { console.log(`Server running at PORT:${port}/`); });
Source: www.scrapingbee.com