Python pexpect sendline contents being buffered? - python

I have the following code in Python using the pexpect module to run a perl script which asks a series of questions. I have used this for quite some time and works well until now.
pexpect python
index = child.expect(['Question 1:'])
os.system('sleep 2')
print 'Question 1:' + 'answer1'
child.sendline('answer1')
index = child.expect(['Question 2:'])
os.system('sleep 2')
print 'Question 1:' + 'answer2'
child.sendline('answer2')
index = child.expect(['Question 3:'])
os.system('sleep 2')
print 'Question 1:' + 'answer2'
child.sendline('answer2')
At this point, i have some code that checks if error will output when Question 2 & Question 3 does not match. I checked the pexpect log and the statements being sent are exactly what i want (number of bytes and string).
However when I check what gets accepted by the perl code its getting the following:
Question 1: 'answer 1' <-- CORRECT
Question 2: 'answer 1' <-- INCORRECT
Question 3: 'answer 2answer 2' <-- its being combined into one for some reason
Any ideas out there? I'm not using anything special when spawning my pexpect object: child=pexpect.spawn(cmd,timeout=140)
Edit: Added the perl code function that asks for Question 2 & 3
sub getpw
sub getpw
{ my ($name, $var, $encoding) = #_;
my $pw = $$var;
PWD: while(1) {
system("/bin/stty -echo");
getvar($name, \$pw, 1);
print "\n";
system("/bin/stty echo");
return if $pw eq $$var && length($pw) == 80;
if (length($pw) > 32) {
print STDERR "ERROR: Password cannot exceed 32 characters, please reenter.\n";
next PWD;
}
return if $pw eq $$var;
my $pw2;
system("/bin/stty -echo");
getvar("Repeat password", \$pw2, 1);
print "\n";
system("/bin/stty echo");
print "#1: ";
print $pw;
print "\n";
print "#2: ";
print $pw2;
if ($pw2 ne $pw) {
print STDERR "ERROR: Passwords do not match, please reenter.\n";
next PWD;
}
last;
}
# Escape dangerous shell characters
$pw =~ s/([ ;\*\|`&\$!#\(\)\[\]\{\}:'"])/\\$1/g;
my $correctlength=80;
my $encoded=`$AVTAR --quiet --encodepass=$pw`;
chomp($encoded);
if($? == 0 && length($encoded) == $correctlength) {
$$var = $encoded;
} else {
print "Warning: Password could not be encoded.\n";
$$var = $pw;
}
}
sub getvar
sub getvar
{ my ($name, $var, $hide) = #_;
my $default = $$var;
while(1) {
if($default) {
$default = "*****" if $hide;
print "$name [$default]: ";
} else {
print "$name: ";
}
my $val = <STDIN>;
chomp $val;
### $val =~ s/ //g; # do not mess with the password
$$var = $val if $val;
last if $$var;
print "ERROR: You must enter a value\n";
}
}

There is nothing wrong with your code on the python side. Check your perl side of the code. To demonstrate I have created below a python questions and answers scripts that call each other. When you execute answers.py it will produce the expected output as below. Also you don't need to do the sleep statements child.expect will block until the expected output has been received.
questions.py
answers = {}
for i in range(1, 4):
answers[i] = raw_input('Question %s:' % i)
print 'resuls:'
print answers
answers.py
import pexpect
child=pexpect.spawn('./questions.py', timeout=140)
for i in range(1, 4):
index = child.expect('Question %s:' % i)
child.sendline('answer%s' % i)
child.expect('resuls:')
print child.read()
output
{1: 'answer1', 2: 'answer2', 3: 'answer3'}

Related

How can I use the concatenate command to automate the merging of several files? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 1 year ago.
Improve this question
I need to merge two sets of files together. I don't mind what language is used as long as it works.
For example:
I have a folder with both sets of files
N-4A-2A_S135_L001_R1_001.fastq.gz
N-4A-2A_S135_L001_R2_001.fastq.gz
N-4A-2A_S135_L001_R1_001-2.fastq.gz
N-4A-2A_S135_L001_R2_001-2.fastq.gz
N-4-Bmp-1_S20_L001_R1_001.fastq.gz
N-4-Bmp-1_S20_L001_R2_001.fastq.gz
N-4-Bmp-1_S20_L001_R1_001-2.fastq.gz
N-4-Bmp-1_S20_L001_R2_001-2.fastq.gz
I need them merged like this:
N-4A-2A-S135-L001_R1_001.fastq.gz + N-4A-2A-S135-L001_R1_001-2.fastq.gz > N-4A-2A-S135-L001_R1_001-cat.fastq.gz
N-4A-2A-S135-L001_R2_001.fastq.gz + N-4A-2A-S135-L001_R2_001-2.fastq.gz > N-4A-2A-S135-L001_R2_001-cat.fastq.gz
N-4-Bmp-1_S20_L001_R1_001.fastq.gz + N-4-Bmp-1_S20_L001_R1_001-2.fastq.gz > N-4-Bmp-1_S20_L001_R1_001-cat.fastq.gz
N-4-Bmp-1_S20_L001_R2_001.fastq.gz + N-4-Bmp-1_S20_L001_R2_001-2.fastq.gz > N-4-Bmp-1_S20_L001_R2_001-cat.fastq.gz
I have a total of ~180 files so any way to automate this would be amazing.
They all follow the same format of:
(sample name) + "R1_001" or "R2_001" + "" or "-2" + .fastq.gz
According to example you provided, your file names look sortable. If this is not the case, this will not work and this answer will be invalid.
Idea of the following snippet is really simple - sort all the filenames and then join adjacent tuples.
This should work in bash:
#!/bin/bash
files=( $( find . -type f | sort -r) )
files_num=${#files[#]}
for (( i=0 ; i < ${files_num} ; i=i+2 )); do
f1="${files[i]}"
f2="${files[i+1]}"
fname=$(basename -s '.gz' "$f1")".cat.gz"
echo "$f1 and $f2 -> $fname"
cat "$f1" "$f2" > "$fname"
done
or simpler version in ZShell (zsh)
#!/bin/zsh
files=( $( find . -type f | sort -r) )
for f1 f2 in $files; do
fname=$(basename -s '.gz' "$f1")".cat.gz"
echo "$f1 and $f2 -> $fname"
cat "$f1" "$f2" > "$fname"
done
You did say any language, so ... I prefer perl
#!/usr/bin/perl
# concatenate files
master(#ARGV);
exit(0);
# master -- master control
sub master
{
my(#argv) = #_;
my($tail,#tails);
local(%tails);
# scan argv for -opt
while (1) {
my($opt) = $argv[0];
last unless (defined($opt));
last unless ($opt =~ s/^-/opt_/);
$$opt = 1;
shift(#argv);
}
# load up the current directory
#tails = dirload(".");
foreach $tail (#tails) {
$tails{$tail} = 0;
}
# look for the base name of a file
foreach $tail (#tails) {
docat($tail)
if ($tail =~ /R[12]_001\.fastq\.gz$/);
}
# default mode is "dry run"
unless ($opt_go) {
printf("\n");
printf("rerun with -go to actually do it\n");
}
}
# docat -- process a pairing
sub docat
{
my($base) = #_;
my($tail);
my($out);
my($cmd);
my($code);
# all commands are joining just two files
$tail = $base;
$tail =~ s/\.fastq/-2.fastq/;
# to an output file
$out = $base;
$out =~ s/\.fastq/-cat.fastq/;
$cmd = "cat $base $tail > $out";
if ($opt_v) {
printf("\n");
printf("IN1: %s\n",$base);
printf("IN2: %s\n",$tail);
printf("OUT: %s\n",$out);
}
else {
printf("%s\n",$cmd);
}
die("docat: duplicate IN1\n")
if ($tails{$base});
$tails{$base} = 1;
die("docat: duplicate IN2\n")
if ($tails{$tail});
$tails{$tail} = 1;
die("docat: duplicate OUT\n")
if ($tails{$out});
$tails{$out} = 1;
{
last unless ($opt_go);
# execute the command and get error code
system($cmd);
$code = $? >> 8;
exit($code) if ($code);
}
}
# dirload -- get list of files in a directory
sub dirload
{
my($dir) = #_;
my($xf);
my($tail);
my(#tails);
# open the directory
opendir($xf,$dir) or
die("dirload: unable to open '$dir' -- $!\n");
# get list of files in the directory excluding "." and ".."
while ($tail = readdir($xf)) {
next if ($tail eq ".");
next if ($tail eq "..");
push(#tails,$tail);
}
closedir($xf);
#tails = sort(#tails);
#tails;
}
Here's the program output with the -v option:
IN1: N-4-Bmp-1_S20_L001_R1_001.fastq.gz
IN2: N-4-Bmp-1_S20_L001_R1_001-2.fastq.gz
OUT: N-4-Bmp-1_S20_L001_R1_001-cat.fastq.gz
IN1: N-4-Bmp-1_S20_L001_R2_001.fastq.gz
IN2: N-4-Bmp-1_S20_L001_R2_001-2.fastq.gz
OUT: N-4-Bmp-1_S20_L001_R2_001-cat.fastq.gz
IN1: N-4A-2A_S135_L001_R1_001.fastq.gz
IN2: N-4A-2A_S135_L001_R1_001-2.fastq.gz
OUT: N-4A-2A_S135_L001_R1_001-cat.fastq.gz
IN1: N-4A-2A_S135_L001_R2_001.fastq.gz
IN2: N-4A-2A_S135_L001_R2_001-2.fastq.gz
OUT: N-4A-2A_S135_L001_R2_001-cat.fastq.gz
rerun with -go to actually do it

Avoid Selenium to be detected by javascript duck typing in python?

Is there any way to change Selenium properties in order to avoid its own detection by duck typing method in Javascript?
Last time I was hardly looking to make Selenium to be fully undetected and identically to Chrome. Almost succeed but just found this new detection method which I cannot bypass: duck typing in Javascript.
Here is the Javascript detection code:
// Opera 8.0+
var isOpera = (!!window.opr && !!opr.addons) || !!window.opera || navigator.userAgent.indexOf(' OPR/') >= 0;
// Firefox 1.0+
var isFirefox = typeof InstallTrigger !== 'undefined';
// Safari 3.0+ "[object HTMLElementConstructor]"
var isSafari = /constructor/i.test(window.HTMLElement) || (function (p) { return p.toString() === "[object SafariRemoteNotification]"; })(!window['safari'] || (typeof safari !== 'undefined' && safari.pushNotification));
// Internet Explorer 6-11
var isIE = /*#cc_on!#*/false || !!document.documentMode;
// Edge 20+
var isEdge = !isIE && !!window.StyleMedia;
// Chrome 1 - 71
var isChrome = !!window.chrome && (!!window.chrome.webstore || !!window.chrome.runtime);
// Blink engine detection
var isBlink = (isChrome || isOpera) && !!window.CSS;
var output = 'Detecting browsers by ducktyping:<hr>';
output += 'isFirefox: ' + isFirefox + '<br>';
output += 'isChrome: ' + isChrome + '<br>';
output += 'isSafari: ' + isSafari + '<br>';
output += 'isOpera: ' + isOpera + '<br>';
output += 'isIE: ' + isIE + '<br>';
output += 'isEdge: ' + isEdge + '<br>';
output += 'isBlink: ' + isBlink + '<br>';
document.body.innerHTML = output;
Source: https://jsfiddle.net/6spj1059/
Look what are the results for this js code in real Chrome browser:
results for real Chrome browser
While all of them are FALSE when open the same jsfiddle link in Selenium.
Here is what I did into Python. I tried to change window title in python by asserting it as Chrome since just by setting it directly isn't possible. Also tried to first navigate to the chrome://newtab url and after that to the url I want but also fails.
browser.get("chrome://newtab")
try:
assert u'New Tab' in browser.title
print "Assertion succeed!"
print browser.title
except AssertionError:
_, _, tb = sys.exc_info()
traceback.print_tb(tb) # Fixed format
tb_info = traceback.extract_tb(tb)
filename, line, func, text = tb_info[-1]
print('An error occurred on line {} in statement {}'.format(line, text))
exit(1)
print("Page Title is :", browser.title)
clear_cache_and_cookies(browser)
browser.get(url16)
print browser.title
print type(browser.title)
print u'New Tab'
print type(u'New Tab')
Thank you for your help!
Should be:
driver.execute_script("""
window.chrome.webstore = true
""")

libvirt: From state integer to string?

I have this simple script to check the memory usage of virtual machines managed by libvirt.
How can I convert the integer for state from dom.info() to a human readable string?
import libvirt
import re
import sys
def mem_total_kb():
meminfo = open('/proc/meminfo').read()
matched = re.search(r'^MemTotal:\s+(\d+)', meminfo)
return int(matched.groups()[0])
def main():
conn = libvirt.openReadOnly(None)
if conn == None:
print 'Failed to open connection to the hypervisor'
sys.exit(1)
used_mem_sum = 0
for domain_id in conn.listDomainsID():
dom = conn.lookupByID(domain_id)
state, max_mem, used_mem, vcpus, cpu_time_used = dom.info()
print(
'name=%s state=%s max_mem=%s used_mem=%s vcpus=%s cpu_time_used=%s' % (dom.name(), state, max_mem, used_mem, vcpus, cpu_time_used))
used_mem_sum += used_mem
print('Sum of used mem: %s KiB' % used_mem_sum)
mem_total = mem_total_kb()
print('Sum of physical mem: %s KiB' % mem_total)
if used_mem_sum > mem_total:
print('########## VMs use more RAM than available!')
return
mem_left=mem_total - used_mem_sum
print('Memory left: %s KiB' % (mem_left))
mem_left_should=4000000
if mem_left<mem_left_should:
print('less than mem_left_should=%sKiB left!' % mem_left_should)
if __name__ == '__main__':
main()
Docs: https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainInfo
state: the running state, one of virDomainState
enum virDomainState {
VIR_DOMAIN_NOSTATE = 0
no state
VIR_DOMAIN_RUNNING = 1
the domain is running
VIR_DOMAIN_BLOCKED = 2
the domain is blocked on resource
VIR_DOMAIN_PAUSED = 3
the domain is paused by user
VIR_DOMAIN_SHUTDOWN = 4
the domain is being shut down
VIR_DOMAIN_SHUTOFF = 5
the domain is shut off
VIR_DOMAIN_CRASHED = 6
the domain is crashed
VIR_DOMAIN_PMSUSPENDED = 7
the domain is suspended by guest power management
VIR_DOMAIN_LAST = 8
NB: this enum value will increase over time as new events are added to the libvirt API. It reflects the last state supported by this version of the libvirt API.
}
Looks like at least the enum constant names are exposed in the libvirt module. The following works for me...
import libvirt
import pprint
import re
d = {}
for k, v in libvirt.__dict__.iteritems():
if re.match('VIR_DOMAIN_[A-Z]+$', k):
d[v] = k
pprint.pprint(d)
...and prints...
{0: 'VIR_DOMAIN_NOSTATE',
1: 'VIR_DOMAIN_RUNNING',
2: 'VIR_DOMAIN_BLOCKED',
3: 'VIR_DOMAIN_PAUSED',
4: 'VIR_DOMAIN_SHUTDOWN',
5: 'VIR_DOMAIN_SHUTOFF',
6: 'VIR_DOMAIN_CRASHED',
7: 'VIR_DOMAIN_PMSUSPENDED'}
The descriptions, which are likely just comments in the original source code, are probably not exposed. One of the examples defines its own dictionary on line 106...
state_names = { libvirt.VIR_DOMAIN_RUNNING : "running",
libvirt.VIR_DOMAIN_BLOCKED : "idle",
libvirt.VIR_DOMAIN_PAUSED : "paused",
libvirt.VIR_DOMAIN_SHUTDOWN : "in shutdown",
libvirt.VIR_DOMAIN_SHUTOFF : "shut off",
libvirt.VIR_DOMAIN_CRASHED : "crashed",
libvirt.VIR_DOMAIN_NOSTATE : "no state" }
...so you could just take it from there, although it seems to be incomplete.
This bash script will fetch the list of virDomainState values (excluding VIR_DOMAIN_LAST) and their descriptions in JSON format:
#!/bin/bash
HEADER=include/libvirt/libvirt-domain.h
get_header_file()
{
curl -s https://raw.githubusercontent.com/libvirt/libvirt/master/"$HEADER"
}
select_virDomainState_block()
{
sed -n '/virDomainState:/,/^\} virDomainState;/ p'
}
join_multiline_comments()
{
sed -n '\%/\*[^*]*$% N; \%/\*.*\*/$% { s/\s*\n\s*/ /; p; }'
}
enum_values_to_json_map()
{
echo '{'
sed "s/\s*VIR_DOMAIN\S\+\s*=\s*//; s^,\s*/\*\s*^ : '^; s^\s*\*/^',^;"
echo '}'
}
get_header_file \
| select_virDomainState_block \
| join_multiline_comments \
| grep 'VIR_DOMAIN\S\+\s*=' \
| enum_values_to_json_map
Example usage:
$ ./get_libvirt_domain_states
{
0 : 'no state',
1 : 'the domain is running',
2 : 'the domain is blocked on resource',
3 : 'the domain is paused by user',
4 : 'the domain is being shut down',
5 : 'the domain is shut off',
6 : 'the domain is crashed',
7 : 'the domain is suspended by guest power management',
}
Note that the script downloads a ~150KB file from the libvirt mirror repository at GitHub. It is intended for facilitating staying up-to-date with the libvirt code. Of course you can call it from your python code, but personally I wouldn't do that.

splitting a diff file using regex in Python

I'm trying to split a diff (unified format) into each section using the re module in python. The format of a diff is like this...
diff --git a/src/core.js b/src/core.js
index 9c8314c..4242903 100644
--- a/src/core.js
+++ b/src/core.js
## -801,7 +801,7 ## jQuery.extend({
return proxy;
},
- // Mutifunctional method to get and set values to a collection
+ // Multifunctional method to get and set values of a collection
// The value/s can optionally be executed if it's a function
access: function( elems, fn, key, value, chainable, emptyGet, pass ) {
var exec,
diff --git a/src/sizzle b/src/sizzle
index fe2f618..feebbd7 160000
--- a/src/sizzle
+++ b/src/sizzle
## -1 +1 ##
-Subproject commit fe2f618106bb76857b229113d6d11653707d0b22
+Subproject commit feebbd7e053bff426444c7b348c776c99c7490ee
diff --git a/test/unit/manipulation.js b/test/unit/manipulation.js
index 18e1b8d..ff31c4d 100644
--- a/test/unit/manipulation.js
+++ b/test/unit/manipulation.js
## -7,7 +7,7 ## var bareObj = function(value) { return value; };
var functionReturningObj = function(value) { return (function() { return value; }); };
test("text()", function() {
- expect(4);
+ expect(5);
var expected = "This link has class=\"blog\": Simon Willison's Weblog";
equal( jQuery("#sap").text(), expected, "Check for merged text of more then one element." );
## -20,6 +20,10 ## test("text()", function() {
frag.appendChild( document.createTextNode("foo") );
equal( jQuery( frag ).text(), "foo", "Document Fragment Text node was retreived from .text().");
+
+ var $newLineTest = jQuery("<div>test<br/>testy</div>").appendTo("#moretests");
+ $newLineTest.find("br").replaceWith("\n");
+ equal( $newLineTest.text(), "test\ntesty", "text() does not remove new lines (#11153)" );
});
test("text(undefined)", function() {
diff --git a/version.txt b/version.txt
index 0a182f2..0330b0e 100644
--- a/version.txt
+++ b/version.txt
## -1 +1 ##
-1.7.2
\ No newline at end of file
+1.7.3pre
\ No newline at end of file
I've tried the following combinations of patterns but can't quite get it right. This is the closest I have come so far...
re.compile(r'(diff.*?[^\rdiff])', flags=re.S|re.M)
but this yields
['diff ', 'diff ', 'diff ', 'diff ']
How would I match all sections in this diff?
This does it:
r=re.compile(r'^(diff.*?)(?=^diff|\Z)', re.M | re.S)
for m in re.findall(r, s):
print '===='
print m
You don't need to use regex, just split the file:
diff_file = open('diff.txt', 'r')
diff_str = diff_file.read()
diff_split = ['diff --git%s' % x for x in diff_str.split('diff --git') \
if x.strip()]
print diff_split
Why are you using regex? How about just iterating over the lines and starting a new section when a line starts with diff?
list_of_diffs = []
temp_diff = ''
for line in patch:
if line.startswith('diff'):
list_of_diffs.append(temp_diff)
temp_diff = ''
else: temp_diff.append(line)
Disclaimer, above code should be considered illustrative pseudocode only and is not expected to actually run.
Regex is a hammer but your problem isn't a nail.
Just split on any linefeed that's followed by the word diff:
result = re.split(r"\n(?=diff\b)", subject)
Though for safety's sake, you probably should try to match \r or \r\n as well:
result = re.split(r"(?:\r\n|[\r\n])(?=diff\b)", subject)

decompress name

what is the easiest way to decompress a data name?
For example, change compressed form:
abc[3:0]
into decompressed form:
abc[3]
abc[2]
abc[1]
abc[0]
preferable 1 liner :)
In Perl:
#!perl -w
use strict;
use 5.010;
my #abc = qw/ a b c d /;
say join( " ", reverse #abc[0..3] );
Or if you wanted them into separate variables:
my( $abc3, $abc2, $abc1, $abc0 ) = reverse #abc[0..3];
Edit: Per your clarification:
my $str = "abc[3:0]";
$str =~ /(abc)\[(\d+):(\d+)\]/;
my $base = $1;
my $from = ( $2 < $3 ? $2 : $3 );
my $to = ( $2 > $3 ? $2 : $3 );
my #strs;
foreach my $num ( $from .. $to ) {
push #strs, $base . '[' . $num . ']';
}
This is a little pyparsing exercise that I've done in the past, adapted to your example (also supports multiple ranges and unpaired indexes, all separated by commas - see the last test case):
from pyparsing import (Suppress, Word, alphas, alphanums, nums, delimitedList,
Combine, Optional, Group)
LBRACK,RBRACK,COLON = map(Suppress,"[]:")
ident = Word(alphas+"_", alphanums+"_")
integer = Combine(Optional('-') + Word(nums))
integer.setParseAction(lambda t : int(t[0]))
intrange = Group(integer + COLON + integer)
rangedIdent = ident("name") + LBRACK + delimitedList(intrange|integer)("indexes") + RBRACK
def expandIndexes(t):
ret = []
for ind in t.indexes:
if isinstance(ind,int):
ret.append("%s[%d]" % (t.name, ind))
else:
offset = (-1,1)[ind[0] < ind[1]]
ret.extend(
"%s[%d]" % (t.name, i) for i in range(ind[0],ind[1]+offset,offset)
)
return ret
rangedIdent.setParseAction(expandIndexes)
print rangedIdent.parseString("abc[0:3]")
print rangedIdent.parseString("abc[3:0]")
print rangedIdent.parseString("abc[0:3,7,14:16,24:20]")
Prints:
['abc[0]', 'abc[1]', 'abc[2]', 'abc[3]']
['abc[3]', 'abc[2]', 'abc[1]', 'abc[0]']
['abc[0]', 'abc[1]', 'abc[2]', 'abc[3]', 'abc[7]', 'abc[14]', 'abc[15]', 'abc[16]', 'abc[24]', 'abc[23]', 'abc[22]', 'abc[21]', 'abc[20]']

Categories

Resources