Hey I've got a really poorly labelled element I need to get the text from. It's surrounded by other elements that I don't want.
I can easily target the sibling of the element I want.
How can I target the "Next Sibling" of?
dlg4.window(title='WindowTitleThing',class_name='Text').draw_outline()
Can I target first child of?
Using UIA backend. Thanks!
Found this, but can't get it to work:
# Monkey patch for pywinauto that adds a first_only parameter to the window function
# for the UIA backend, in order to work around slow FindAll calls (win10 bug?)
# First copy paste this code on your REPL, then do:
# d1 = pywinauto.Desktop("uia")
# d1.window(first_only=True, title="Calculator").window_text()
# Currently only title is supported, but its easy to implement all the others as well,
# most importantly title_re
def first_child(self):
print("Getting first child")
child = pywinauto.uia_defines.IUIA().iuia.RawViewWalker.GetFirstChildElement(self._element)
if child:
return pywinauto.uia_element_info.UIAElementInfo(child)
else:
return None
def next_sibling(self):
print("Getting sibling")
sibling = pywinauto.uia_defines.IUIA().iuia.RawViewWalker.GetNextSiblingElement(self._element)
if sibling:
return pywinauto.uia_element_info.UIAElementInfo(sibling)
else:
return None
def find_first_element(first_only=None,
title=None,
title_re=None,
top_level_only=True,
backend=None
):
if backend is None:
backend = pywinauto.backend.registry.active_backend.name
backend_obj = pywinauto.backend.registry.backends[backend]
if not top_level_only:
raise NotImplementedError # or can we actually accept this?
rootElement = backend_obj.element_info_class()
element = None
child = rootElement.first_child()
while child is not None:
print(child.name + " ?= " + title)
if child.name == title:
# TODO all the other conditions..
# class_name(_re)
# title_re
# process
# visible / enabled / handle / predicate_func / active_only / control_id / control_type / auto_id / framework_id
break
child = child.next_sibling()
return child
def new_find_element(**kwargs):
if 'first_only' in kwargs and kwargs['first_only'] is True:
print("Using patched function to get only the first match")
el = pywinauto.findwindows.find_first_element(**kwargs)
if el is None:
raise pywinauto.findwindows.ElementNotFoundError(kwargs)
else:
return el
else:
print("Using original function")
return pywinauto.findwindows.original_find_element(**kwargs)
import pywinauto
pywinauto.uia_element_info.UIAElementInfo.first_child = first_child
pywinauto.uia_element_info.UIAElementInfo.next_sibling = next_sibling
pywinauto.findwindows.find_first_element = find_first_element
pywinauto.findwindows.original_find_element = pywinauto.findwindows.find_element
pywinauto.findwindows.find_element = new_find_element
>>> first_child(dlg4)
Getting first child
Using original function
Using original function
Using original function
Using original function
Using original function
Using original function
Using original function
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 3, in first_child
ctypes.ArgumentError: argument 1: <class 'pywinauto.findbestmatch.MatchError'>: Could not find '_element' in 'dict_keys(
Related
Problem
For a Markdown document I want to filter out all sections whose header titles are not in the list to_keep. A section consists of a header and the body until the next section or the end of the document. For simplicity lets assume that the document only has level 1 headers.
When I make a simple case distinction on whether the current element has been preceeded by a header in to_keep and do either return None or return [] I get an error. That is, for pandoc --filter filter.py -o output.pdf input.md I get TypeError: panflute.dump needs input of type "panflute.Doc" but received one of type "list" (code, example file and complete error message at the end).
I use Python 3.7.4 and panflute 1.12.5 and pandoc 2.2.3.2.
Question
If make a more fine grained distinction on when to do return [], it works (function action_working). My question is, why is this more fine grained distinction neccesary? My solution seems to work, but it might well be accidental... How can I get this to work properly?
Files
error
Traceback (most recent call last):
File "filter.py", line 42, in <module>
main()
File "filter.py", line 39, in main
return run_filter(action_not_working, doc=doc)
File "C:\Users\ody_he\AppData\Local\Continuum\anaconda3\lib\site-packages\panflute\io.py", line 266, in run_filter
return run_filters([action], *args, **kwargs)
File "C:\Users\ody_he\AppData\Local\Continuum\anaconda3\lib\site-packages\panflute\io.py", line 253, in run_filters
dump(doc, output_stream=output_stream)
File "C:\Users\ody_he\AppData\Local\Continuum\anaconda3\lib\site-packages\panflute\io.py", line 132, in dump
raise TypeError(msg)
TypeError: panflute.dump needs input of type "panflute.Doc" but received one of type "list"
Error running filter filter.py:
Filter returned error status 1
input.md
# English
Some cool english text this is!
# Deutsch
Dies ist die deutsche Übersetzung!
# Sources
Some source.
# Priority
**Medium** *[Low | Medium | High]*
# Status
**Open for Discussion** *\[Draft | Open for Discussion | Final\]*
# Interested Persons (mailing list)
- Franz, Heinz, Karl
fiter.py
from panflute import *
to_keep = ['Deutsch', 'Status']
keep_current = False
def action_not_working(elem, doc):
'''For every element we check if it occurs in a section we wish to keep.
If it is, we keep it and return None (indicating to keep the element unchanged).
Otherwise we remove the element (return []).'''
global to_keep, keep_current
update_keep(elem)
if keep_current:
return None
else:
return []
def action_working(elem, doc):
global to_keep, keep_current
update_keep(elem)
if keep_current:
return None
else:
if isinstance(elem, Header):
return []
elif isinstance(elem, Para):
return []
elif isinstance(elem, BulletList):
return []
def update_keep(elem):
'''if the element is a header we update to_keep.'''
global to_keep, keep_current
if isinstance(elem, Header):
# Keep if the title of a section is in too keep
keep_current = stringify(elem) in to_keep
def main(doc=None):
return run_filter(action_not_working, doc=doc)
if __name__ == '__main__':
main()
I think what happens is that panflute call the action on all elements, including the Doc root element. If keep_current is False when walking the Doc element, it will be replaced by a list. This leads to the error message you are seeing, as panflute expectes the root node to always be there.
The updated filter only acts on Header, Para, and BulletList elements, so the Doc root node will be left untouched. You'll probably want to use something more generic like isinstance(elem, Block) instead.
An alternative approach could be to use panflute's load and dump elements directly: load the document into a Doc element, manually iterate over all blocks in args and remove all that are unwanted, then dump the resulting doc back into the output stream.
from panflute import *
to_keep = ['Deutsch', 'Status']
keep_current = False
doc = load()
for top_level_block in doc.args:
# do things, remove unwanted blocks
dump(doc)
I want to scrape <p> from pages and since there will be a couple thousands of them I want to use multiprocessing. However, it doesn't work when I try to append the result to some variable
I want to append the result of scraping to the data = []
I made a url_common for a base website since some pages don't start with HTTP etc.
from tqdm import tqdm
import faster_than_requests as requests #20% faster on average in my case than urllib.request
import bs4 as bs
def scrape(link, data):
for i in tqdm(link):
if i[:3] !='htt':
url_common = 'https://www.common_url.com/'
else:
url_common = ''
try:
ht = requests.get2str(url_common + str(i))
except:
pass
parsed = bs.BeautifulSoup(ht,'lxml')
paragraphs = parsed.find_all('p')
for p in paragraphs:
data.append(p.text)
Above doesn't work, since map() doesn't accept function like above
I tried to use it another way:
def scrape(link):
for i in tqdm(link):
if i[:3] !='htt':
url_common = 'https://www.common_url.com/'
else:
url_common = ''
try:
ht = requests.get2str(url_common + str(i))
except:
pass
parsed = bs.BeautifulSoup(ht,'lxml')
paragraphs = parsed.find_all('p')
for p in paragraphs:
print(p.text)
from multiprocessing import Pool
p = Pool(10)
links = ['link', 'other_link', 'another_link']
data = p.map(scrape, links)
I get this error while using above function:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 110, in worker
task = get()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 354, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'scrape' on <module '__main__' (built-in)>
I have not figured a way to do it so that it uses Pool and at the same time appending the result of scraping to the given variable
EDIT
I change a little bit to see where it stops:
def scrape(link):
for i in tqdm(link):
if i[:3] !='htt':
url_common = 'https://www.investing.com/'
else:
url_common = ''
try: #tries are always halpful with url as you never know
ht = requests.get2str(url_common + str(i))
except:
pass
print('works1')
parsed = bs.BeautifulSoup(ht,'lxml')
paragraphs = parsed.find_all('p')
print('works2')
for p in paragraphs:
print(p.text)
links = ['link', 'other_link', 'another_link']
scrape(links)
#WORKS PROPERLY AND PRINTS EVERYTHING
if __name__ == '__main__':
p = Pool(5)
print(p.map(scrape, links))
#DOESN'T WORK, NOTHING PRINTS. Error like above
You are using the map function incorrectly.
It iterates over each element of the iterable and calls the function on each element.
You can see the map function as doing something like the following:
to_be_mapped = [1, 2, 3]
mapped = []
def mapping(x): # <-- note that the mapping accepts a single value
return x**2
for item in to_be_mapped:
res = mapping(item)
mapped.append(res)
So to solve your problem remove the outermost for-loop as iterating is handled by the map function
def scrape(link):
if link[:3] !='htt':
url_common = 'https://www.common_url.com/'
else:
url_common = ''
try:
ht = requests.get2str(url_common + str(link))
except:
pass
parsed = bs.BeautifulSoup(ht,'lxml')
paragraphs = parsed.find_all('p')
for p in paragraphs:
print(p.text)
I want to refer to an element (mem[0]) of a list (mem) with a different name (fetch):
mem = [0]
f = open("File.lx", "rb").read()
for b in f: mem += [b]
size = len(mem)
while mem[0] < size: #using mem[0]
char = (mem[0]*2)+1
source = mem[char]
target = mem[char + 1]
mem[0] += 1
mem[target] = mem[source]
And I tried that with the with statement:
mem = [0]
f = open("File.lx", "rb").read()
for b in f: mem += [b]
size = len(mem)
with mem[0] as fetch: #with statement
while fetch < size: #using mem[0] as fetch
char = (fetch*2)+1
source = mem[char]
target = mem[char + 1]
fetch += 1
mem[target] = mem[source]
But I got an error:
Traceback (most recent call last):
File "C:\documents\test.py", line 6, in <module>
with mem[0] as fetch:
AttributeError: __enter__
I thought this would be the way because that's how it's done with file objects:
with open("File.lx", "rb") as file:
fileBytes = file.read()
I read the docs for the with statement and it says that the __exit()__ and __enter()__ methods are loaded. According to what I understood after reading that and from the AttributeError, my guess is that sequence elements (mem[0]) do not have an __enter()__ method.
as the comments already mentioned, mem[0] is a literal integer, which doesn't have __enter__ and __exit__ which are required for the as keyword to work and it would be indeed simpler if you just used mem[0]
but that would be too easy, what you CAN do (as an exercise don't actually do this)
is extend the int class and add __enter__ and __exit__ like so:
class FancyInt(int):
def __enter__(self):
return self
def __exit__(self, *args):
pass
mem = [FancyInt(0)]
with mem[0] as fetch:
print(fetch)
this is neat but fetch is an alias to a LITERAL! if you change fetch, mem[0] will not change!
You seem to want a mutable object which functions as an alias for a specific location in a list. I could see some utility in that (since explicit indices are somewhat ugly in Python). You could create such a class. Here is a proof of concept, implementing the three things that you tried to do with fetch in your code:
class Fetcher:
def __init__(self,target_list, index):
self._list = target_list
self._i = index
def __iadd__(self,v):
self._list[self._i] += v
return self
def __mul__(self,v):
return self._list[self._i] * v
def __lt__(self,v):
return self._list[self._i] < v
For example,
mem = [0,1,2]
fetch = Fetcher(mem,0)
print(fetch < 2) #true
mem[0] = 1
print(fetch < 2) #still true
fetch += 1
print(fetch < 2) #false!
print(mem[0]) #2, showing that mem[0] was changed
print(fetch*2) #4 -- but 2*fetch won't work!
The last line shows that there is a limit to what you could achieve here. To make this really useful, you would want to implement many more magic methods (beyond __iadd__ etc.). Whether or not all this is useful just to avoid [0], you be the judge.
I have a situation whereby the data set feeding into the code is creating a NoneType error. After attempting a couple of variations to workaround the error, I am not sure how revise the code to do so. Here is the trace back.
Traceback (most recent call last):
File "A:\anoth\test.py", line 64, in <module>
pretty_print(master)
File "A:\anoth\\test.py", line 53, in pretty_print
categories = find_elms(soup, 'div', {'id': 'categories'})
File "A:\anoth\\test.py", line 37, in find_elms
for region in regions:
TypeError: 'NoneType' object is not iterable
The following if-then loop has not worked when the NoneType data enters the code.
regions = soup.find(tag, attribute)
print('this ' + str(regions))
for region in regions: #this works for portions of the data set
if [elm.text for elm in regions.find_all('a')] is None:
return []
else:
return [elm.text for elm in regions.find_all('a')]
return []
Any ideas?
The reason your code doen't work is because regions at some point is None, and you are trying to iterate over None in your for loop, hence the error 'NoneType' object is not iterable
regions = soup.find(tag, attribute) #<------ this is returning None
print('this ' + str(regions))
for region in regions: #<-- you can't take a region in regions, when regions is None
if [elm.text for elm in regions.find_all('a')] is None:
return []
else:
return [elm.text for elm in regions.find_all('a')]
return []
You can add a try except in there:
try:
regions = soup.find(tag, attribute)
print('this ' + str(regions))
for region in regions: #this works for portions of the data set
if [elm.text for elm in regions.find_all('a')] == []:
return []
else:
return [elm.text for elm in regions.find_all('a')]
except:
print ('regions is NoneType object')
return []
First, regions might be None, so you need to check for that:
if regions:
for region in regions:
# do stuff
The next line is problematic as well:
if [elm.text for elm in regions.find_all('a')] is None:
A list comprehension will never return None. On the other hand, if regions.find_all('a') returns None you'll get the same error. So try:
region_a_tags = regions.find_all('a')
if region_a_tags:
return [elm.text for elm in region_a_tags]
else:
return []
if regions:
for region in regions:
if some_thing_is_true_about_the_region:
do_some_thing
else:
do_some_thing_else
else:
do_some_thing_for_none_regions
Error
Traceback (most recent call last):
File "C:/Users/RCS/Desktop/Project/SHM.py", line 435, in <module>
app = SHM()
File "C:/Users/RCS/Desktop/Project/SHM.py", line 34, in __init__
frame = F(container, self)
File "C:/Users/RCS/Desktop/Project/SHM.py", line 384, in __init__
if "3202" in q:
TypeError: argument of type 'method' is not iterable
code
some part of code, initialisation and all
while 1:
q = variable1.get
if "3202" in q:
variable2.set("NI NODE3202")
try:
switch(labelframe2, labelframe1)
except:
switch(labelframe3, labelframe1)
elif "3212" in q:
variable2.set("NI NODE3212")
try:
switch(labelframe1, labelframe2)
except:
switch(labelframe3, labelframe2)
elif "3214" in q:
variable2.set("NI NODE3214")
try:
switch(labelframe1, labelframe3)
except:
switch(labelframe2, labelframe3)
else:
None
some other part of code
def switch(x, y):
if x.isGridded:
x.isGridded = False
x.grid_forget()
y.isGridded = True
y.grid(row=0, column=0)
else:
return False
I am trying to create a switch between three labelframes which are inside another labelframe, and outside this labelframe are other labelframes that are not changing.
I have read some similar answers but I don't want to use __iter__() in my code. Can anybody provide any other suggestions?
You forgot to call the Entry.get() method:
q = variable1.get()
# ^^ call the method
Because the method object itself doesn't support containment testing directly, Python is instead trying to iterate over the object to see if there are any elements contained in it that match your string.
If you call the method, you get a string value instead. Strings do support containment testing.
The reason you got that error was because you did not add "()" after.get query hence the error to fix this change q = variable1.get to q = variable.get()