Complete new to Node.js. Recently I was provided with a task to web scrape a website with python and connect the python script to Node.js using python-shell package in Node.js. I'm done with the scraping part but not having any prior knowledge of Node.js. Can you please guide me on how to approach this problem?
import sys
# Takes first name and last name via command
# line arguments and then display them
print("Output from Python")
print("First name: " + sys.argv[1])
print("Last name: " + sys.argv[2])
# save the script as hello.py
// import express JS module into app
// and creates its variable.
var express = require('express');
var app = express();
// Creates a server which runs on port 3000 and
// can be accessed through localhost:3000
app.listen(3000, function() {
console.log('server running on port 3000');
} )
// Function callName() is executed whenever
// url is of the form localhost:3000/name
app.get('/name', callName);
function callName(req, res) {
// Use child_process.spawn method from
// child_process module and assign it
// to variable spawn
var spawn = require("child_process").spawn;
// Parameters passed in spawn -
// 1. type_of_script
// 2. list containing Path of the script
// and arguments for the script
// E.g : http://localhost:3000/name?firstname=Mike&lastname=Will
// so, first name = Mike and last name = Will
var process = spawn('python',["./hello.py",
req.query.firstname,
req.query.lastname] );
// Takes stdout data from script which executed
// with arguments and send this data to res object
process.stdout.on('data', function(data) {
res.send(data.toString());
} )
}
// save code as start.js
After saving the Python script and server script code, run the code from its source folder by following command :
node start.js
Access the application through link :
localhost:3000/name?firstname="Enter first name"&lastname="Enter last name"
For e g. : localhost:3000/name?firstname=Ram&lastname=Sharma
Since you must use python-shell package and assuming that your source file is my_file.py, you can simply read the documentation of the npm package and in particular this example:
import {PythonShell} from 'python-shell';
let pyshell = new PythonShell('my_script.py'); // 1
pyshell.on('message', function (message) {
// received a message sent from the Python script (a simple "print" statement)
console.log(message); // here there will be logged your pyoutput
}); // 2
// end the input stream and allow the process to exit
pyshell.end(function (err,code,signal) {
if (err) throw err;
console.log('The exit code was: ' + code);
console.log('The exit signal was: ' + signal);
console.log('finished');
console.log('finished');
}); // 3
What this script does:
launches the python script without arguments
gets every write on the stdoutput of your pyscript and logs it into nodejs stdout.
closes the python script reporting some optional info
If your scripts logs the webscrape into the stdout this should work fine.
Related
I'm new to using child process module in nodejs, and got stuck while using python's virtualenv libs
I have a post route in an expressjs api, which is intended to activate a python virtualenv, run python on exec() child process and deactivate the virtualenv.
I have it setup like this in one of the modules in the project folder
// importing node modules
const { exec } = require("child_process");
const kill = require("tree-kill");
Within the controller function, I'm trying to create child processes this way
let activate_proc = exec("source pyenv/bin/activate");
let command_proc = exec("python insert.py arg1 arg2", (error, stdout, stderr) => {
// handle status/messages with http codes
});
let deactivate_proc = exec("deactivate");
I was able to capture the pids for these processes and tried to kill using the tree-kill by pausing for 10 seconds
setTimeout(() => {
kill(command_proc.pid);
kill(activate_proc.pid);
kill(deactivate_proc.pid);
console.log("Killed all processes");
}, 10000)
This seems to be working but somehow, if the command_proc takes more time, it might conflict.
Is there a more efficient way to kill them asynchronously?
you can run them synchronously instead, with execSync
try this:
// importing node modules
const {execSync } = require('child_process');
let activate_proc = execSync('source pyenv/bin/activate');
let command_proc;
let deactivate_proc;
// use try/catch for stderr
try {
command_proc = execSync('python insert.py arg1 arg2');
// stdout buffer, turn it to string
const result = command_proc.toString();
console.log('command_proc result: ', result);
// handle status/messages with http codes
// ... process command_proc
// finish
deactivate_proc = execSync('deactivate');
} catch (err) {
// handle command_proc err
console.log('output', err);
console.log('sdterr', err.stderr.toString());
}
Error, could not create TXT output file: No such file or directory
tesseract ./downloads/en.jpg outputs/1585336201.287781output -l eng
is the error i'm having trouble with, this command works fine from the python script but not through a childprocess spawn, downloads and the .py script are in the same folder, and both of them are in a folder next to the nodejs script
this is the method that i call from a post function to give it the imagine fine that i need to transcribe, then the python script can't run the tesseract command, even though it can do it when its run manually
const verif = async (fileName, filePath) => {
var uint8arrayToString = function(data){
return String.fromCharCode.apply(null, data);
};
const spawn = require('child_process').spawn;
const scriptExecution = spawn('python',['-u', './diploma/app.py']);
var data = JSON.stringify([fileName]);
scriptExecution.stdin.write(data);
scriptExecution.stdin.end();
scriptExecution.stdout.on('data', (data) => {
console.log(uint8arrayToString(data));
});
scriptExecution.stderr.on('data', (data) => {
// As said before, convert the Uint8Array to a readable string.
console.log(uint8arrayToString(data));
});
scriptExecution.on('exit', (code) => {
console.log("Process quit with code : " + code);
});
return true;
}
The problem was that tesseract needed windows permissions when executed from the js file
I am using Naked library of Python mentioned here for executing a Nodejs script which is also available here too. The point is, whenever I run my python script(exactly taken from the above link) through the terminal I can able to see the output of the nodejs script. I included this python script into my Django server and running it whenever an API call is made.
I couldn't able to catch the errors properly when the validation of the page fails. It always returns 'success'. I feel this is because Nodejs is asynchronous. If it is so, how can I make the function 'amphtmlValidator.getInstance().then(function(validator)' mentioned in the npm website to synchronous? I am really very much new to Nodejs.
Actually I just need to validate AMP pages from python script in Django and the only way I found is calling the node script through python. Everything is ok, but I couldn't catch the errors properly.
Please help.
I solved this by writing the "getInstance()" code into an async function and then added a sleep of 2 seconds which ensures that all the data is written into the console. I am adding manually a keyword "ERROR" if any errors are raised in the node script else I am adding "PASS".
Whenever I execute this script using Python, I will check for the keywords. I knew that this answer is not good, but I could able to manage the things working.
Here is what I have done:
NODE Script:
function sleep(ms) {
return new Promise(resolve => {
setTimeout(resolve, ms)
})
}
// async function that accepts a html file content and validates
async function validation(file) {
//console.log(file);
amphtmlValidator.getInstance().then(
function(validator) {
var result = validator.validateString(file);
if (result.status === 'PASS') {
console.log("PASS")
} else {
for (var ii = 0; ii < result.errors.length; ii++) {
var error = result.errors[ii];
var msg = 'ERROR : ' + 'line ' + error.line + ', col ' + error.col + ': ' + error.message;
console.log(msg);
}
}
});
await sleep(2000);
}
Python:
response = muterun_js("validate.js", args)
if response:
if "ERROR" in response.stdout.decode():
print("validation failed")
elif "PASS" in response.stdout.decode():
print("success")
I am running a simple nodejs web application that executes a Python 2.7 script using the module python-shell. However, since this script takes too long to execute, around 3 minutes, the function fails with an ERR_EMPTY_RESPONSE error.
The function executes at the end, since Python still runs it in the background but the web application crashes.
generatePPage: (req, res) => {
const ps = require('python-shell');
let nombre = req.params.id;
var PythonShell = require('python-shell');
var options = {
pythonPath: '/usr/bin/python2.7',
args: [nombre],
};
var users = ps.PythonShell.run('./generateDetail.py', options, function (err, results) {
if (err){throw err;}
console.log('The results: %j', results);
res.render('success.ejs', {
title: 'Edit Project'
,project: ''
,message: 'Success'
});
});
},
How could I force it to wait longer?
Yes, you have to use setTimeout.
Check this link for more information: http://www.java2s.com/Tutorials/Javascript/Node.js_Tutorial/0270__Node.js_setTimeout_setInterval.htm
And also, check out this link: https://nodejs.org/ar/docs/guides/timers-in-node/
Solved it. The way to do it is to assign the return value of calling the listen function to a const and then assign a new timeout value in milliseconds.
const server = app.listen(port, () => {
console.log(`Server running on port: ${port}`);
});
server.timeout = 300000;
Hello I'm trying to interact with a python script inside a nodejs application at runtime.
The python script is more a command center for doing whatsapp operations called yowsup.
https://github.com/tgalal/yowsup/tree/master
I'm able to run the 'Yowsup Cli client' in a shell and work with it. But I want to run it in a nodejs application because it is written in python and I'm not good in python.
So what I did was to spawn the command I normally use in the shell like this:
var spawn = require('child_process').spawn,
ls = spawn('./yowsup/yowsup-cli', ['demos','--login', '49XXXXXXXXXXX:8bF0hUewVcX1hf6adpuasonFdEP=', '--yowsup', '-E', 's40']);
ls.stdout.on('data', function (data) {
console.log('stdout: ' + data.toString());
});
ls.stderr.on('data', function (data) {
console.log('stderr: ' + data.toString());
});
ls.on('exit', function (code) {
console.log('child process exited with code ' + code.toString());
});
The problem is, that I don't get any data from the process. The python script normally prints some output as start but I can't get anything inside node while the process is running.
I looked inside the python script and saw that the output is generated like this:
print("%s send '%s'" % (messageProtocolEntity.getFrom(False), messageProtocolEntity.getBody()))
How can I get some data from the python script on runtime?
This is slightly different than your approach and uses an npm lib, but does work (results is the stdout of the random_geo.py script):
var py = require('python-shell');
var pyOptions = {
mode: 'text',
pythonPath: '/opt/local/bin/python',
scriptPath: '.'
};
function getCoords(req, res, next) {
py.run('random_geo.py', pyOptions, function(err, results) {
if (err) {
console.log(err);
} else {
console.log(results);
}
});
}