Uploading CSV files faster in Node js using Streams

Amir Mustafa
6 min readSep 6, 2022

--

→ There are lots of good javascript packages with which uploading and downloading of excel files is possible such as exceljs, filedownloader and it worked like a charm.

→ I would personally prefer exceljs when it comes to downloading excel. Lots of colour formatting options are available.

→ We first implemented uploading data using exceljs for health-care domain applications mainly with a large amount of data — medicines names customer names etc.

→ We were uploading more than 20K to 30K rows of data, and the server took lots of time. Sometimes it took more than 20 minutes 😥

→ Our prime goal was to improve the uploading time. This was achievable with the help of csv-parser.

What makes csv-parser different?

→ Csv parser uses Node JS streams to convert 90000 rows per second

→ We have implemented it and we were shocked with such positive results ⚡

→ Let us see how to implement it in Node.js

STEP 1: Installing csv-parser package

Using npm:

$ npm install csv-parser // For Parsing Excel file
$ npm install multer // For uploading file

Using yarn:

$ yarn add csv-parser
$ yarn add multer

STEP 2: Create Routes:

We can upload a single file as well as multiple files.

var express = require("express");
var router = express.Router();
const upload = require("../common");router.post("/users", common.uploadFile(importPath).upload, User.validateFile);module.exports = router;

STEP 3: Controller for Uploading File:

→ CSV Parser has majorly three endpoints using node streams:

a. header event — here header of the excel file can be checked

const readable =   fs.createReadStream(importedFilepath).pipe(csv(csvOption));readable
.on('headers', async (headers) => {
// FIND CODE BELOW})

b. data event — here data of excel parses

readable
.on('data', async (headers) => {
// FIND CODE BELOW})

c. end event — after parsing and database uploading, this event will run.

readable
.on('end', async (headers) => {
// FIND CODE BELOW})

d. error event— If some errors are induced below code will run:

.on('error', err => {// This executes when some error happens in csv-parser executionconsole.log("NODE STREAM ERROR ===>", err);
req.file.path ? fs.unlinkSync(req.file.path) : "";
return res.status(500).send({
status: "error",
message: err,
});
});

Controller file: app/src/server/controllers/User.js

const csv = require('csv-parser');const fs = require("fs");const { execArgv } = require('process');// Import Controller of Node JS routeconst validateFile = async (req, res) => {  try {    const importFilename = req.file.filename;    const importedFilepath = req.file.path;    const loggedinUser = parseInt(req.params.loggedinUser);    if(importFilename && !importedFilepath) {      if(!req.file.originalname.endsWith("csv")) {        invalidData.push({          row: "NA",          column: "General",          notes: "File should be in CSV Format      })      return res.send({ error: invalidData });     }// NEW CODE - CSV PARSERconst results = [];let csvOption = {  mapHeaders: ({ header, index }) => header.trim()};let uniqueData = [];let err = [];let start_date_highest = null;let end_date_highest = null;let fromYYYY = null;let toYYYY = null;let fromMM = null;let toMM = null;let dataRows = await new Promise((resolve, reject) => {try {  let index = 1;  const readable =   fs.createReadStream(importedFilepath).pipe(csv(csvOption));readable
.on('headers', async (headers) => {
// We can validate headers of excel here - Validating excel columns try { const isValidExcel = await validImportColumn("MyFile", headers); if(!isValidExcel) {

invalidData.push({
row: "NA", column: "General", notes: "Invalid excel header name." }); readable.destroy(); return res.send({ err: invalidData }) }} catch (err) { console.log(err); readable.destroy(); return res.send({ err: invalidData }); }}).on('data', (eachData) => {// Here we get data from the excel file let iterate = { country: eachData.country || "", id: eachData.id || "", customername1: eachData.customername1 || "", customername2: eachData.customername2 || "", phone: eachData.phone || "", address1: eachData.address1 || "", address2: eachData.address2 || "", status: eachData.status || "",};// Here we do all possible excel validation checks based on requirements// Push in validData arr when row is correct// Push in invalidData arr when some error found// Push in updateData array when some data exists and need to be updated// eg:// invalidData.push({// row: "2",// column: "Empty Excel",// notes: "Excel has no data!"// });if(invalidData.length === 0) { // Insert data if(validData.length > 0) { // WRITE ADD TO DATABASE LOGIC } // Update data if(updateData.length > 0) { // WRITE UPDATE TO DATABASE LOGIC }// Removing File once data is captured in database
req.file.path ? fs.unlinkSync(req.file.path) :"";
return res.send({
success: updateData.length
? validData.length + updateData.length
: validData.length
});
}results.push(iterate);}).on('end', () => {
// This executes when data handling is done i.e. uploaded
console.log("Parsing done ", results.length);

fromYYYY = moment(start_date_highest).format("YYYY");
fromMM = moment(start_date_highest).format("MM");
toYYYY = moment(start_date_highest).format("YYYY");
toMM = moment(start_date_highest).format("MM");
return resolve(results);
})
.on('error', err => { // This executes when some error happens in csv-parser execution console.log("NODE STREAM ERROR ===>", err);
req.file.path ? fs.unlinkSync(req.file.path) : "";
return res.status(500).send({
status: "error",
message: err,
});
});} catch (err) { req.file.path ? fs.unlinkSync(req.file.path) : ""; return res.status(500).send({
status: "error",
message: err,
});
} });}} catch (err) { req.file.path ? fs.unlinkSync(req.file.path) : ""; return res.status(500).send({
status: "error",
message: err,
});
}}const validateImportColumn = async(importType, columns) => { if (importType === "MyFile") { if ( columns[0] = "Country"; columns[1] = "ID"; columns[2] = "Customer Name 1"; columns[3] = "Customer Name 2"; columns[4] = "Phone Number"; columns[5] = "Address 1"; columns[6] = "Address 2"; columns[7] = "Status" ) { return true; } }}module.exports = { validateFile };

→ To know more about multer for uploading files read here

common.js

const multer = require("multer");var fs = require("fs");var dir = "./public/images/nov";   // PATH TO UPLOAD FILEif (!fs.existsSync(dir)) {  // CREATE DIRECTORY IF NOT FOUND
fs.mkdirSync(dir, { recursive: true });
}
const fileStorageEngine = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, dir);
},
filename: (req, file, cb) => {
cb(null, Date.now() + "-" + file.originalname);
},
});
const upload = multer({ storage: fileStorageEngine }).single("file");const uploadMultiple = multer({ storage: fileStorageEngine }).array("file");module.exports = { upload, uploadMultiple };

→ We will use Postman for importing files. Please check here for the postman and multer.

Route: http://localhost:5000/users

Type: POST

Body: form-data, select key as image of type File

Closing Thoughts:

The performance of the application drastically improved due to Node Stream concepts. This becomes very useful when we import very large rows of data (say 20K to 30K data). 20 minutes reduced to 2 to 3 mins maximum 😊.

Thank you for reading till the end 🙌 . If you enjoyed this article or learned something new, support me by clicking the share button below to reach more people and/or give me a follow on Twitter and subscribe Happy Learnings !! to see some other tips, articles, and things I learn about and share there.

--

--

Amir Mustafa
Amir Mustafa

Written by Amir Mustafa

JavaScript Specialist | Consultant | YouTuber 🎬. | AWS ☁️ | Docker 🐳 | Digital Nomad | Human. Connect with me on https://www.linkedin.com/in/amirmustafa1/

No responses yet