Page Scraper

Web page scraper with a jQuery-like syntax for Node. Powered by got and cheerio.

Installation

$ npm install page-scraper

Usage

const scrape = require('page-scraper');
 
(async () => {
  const $ = await scrape('https://example.com');
 
  // Extract the page with jQuery like syntax.
  console.log({
    title: $('title').text(),
    heading: $('h1').text(),
    paragraphs: $('p').map((index, el) => $(el).text()).get(),
    link: $('p > a').attr('href')
  });
})();

Check the cheerio documentation for a complete guide on how to scrape the page using jQuery like syntax.

Recipes

Handling Error

const scrape = require('page-scraper');
 
(async () => {
  try {
    const $ = await scrape('https://httpbin.org/status/400');
  } catch(error) {
    // The error message.
    console.error(error.message);
 
    if (error.hasOwnProperty('response')) {
      // The HTTP status code.
      console.error(error.response.statusCode);
    }
 
    if (error.hasOwnProperty('$')) {
      // The HTML document.
      console.error(error.$.html());
    }
  }
})();

Note that if the page is not an HTML document, it will throw an error too.

const scrape = require('./src');
 
(async () => {
  try {
    const $ = await scrape('https://httpbin.org/json');
  } catch(error) {
    console.error(error.message);
 
    if (error.hasOwnProperty('response')) {
      // The response body.
      console.error(error.response.body);
    }
  }
})();

Scraping Multiple Pages

const scrape = require('./src');
 
(async () => {
  const $ = await Promise.all([
    scrape('https://example.com'),
    scrape('https://httpbin.org/html')
  ]);
 
  console.log({
    heading_1: $[0]('h1').text(),
    heading_2: $[1]('h1').text()
  });
})();

page-scraper

Page Scraper

Installation

Usage

Recipes

Handling Error

Scraping Multiple Pages

License

Readme

Keywords

Package Sidebar

Install

Repository

Homepage

Weekly Downloads

Version

License

Unpacked Size

Total Files

Last publish

Collaborators

page-scraper

Page Scraper

Installation

Usage

Recipes

Handling Error

Scraping Multiple Pages

License

Readme

Keywords

Package Sidebar

Install

Repository

Homepage

DownloadsWeekly Downloads

Version

License

Unpacked Size

Total Files

Last publish

Collaborators

Weekly Downloads