String and variables won't concatenate on same line - python

I am unable to print the serial number and definition of a word on the same line using below code. Any advice would be much appreciated. I've read through previously asked Q&A on this but no luck.
from bs4 import BeautifulSoup
import requests
loops = ""
while loops == "":
url = "http://dictionary.reference.com/browse/"
c = 1
word = input("Enter word (0 to quit): ")
if word == "0":
break
data = requests.get(url + word)
soup = BeautifulSoup(data.text, "html.parser")
data1 = soup.find_all("div",{"class":"def-content"})
print("The meaning/s of " + word + " is/are:")
for i in data1:
if i.string != None:
print(c, i.string) # Trying to print serial number and definition on same line
c = c + 1

Try this:
print(str(c)+ " " + i.string.strip('\n'))

Related

getting data from sites and making summary of it

Hi I was working on two different scripts one is getting data through selenium and one is getting summary of data. so Getting data from sites is working fine but when I am passing that data to do summary of the data, the data is not being passed in my summary. please let me know where i am making error and how to fix this. I am new to python selenium.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
"""
Taking input from user
"""
search_input = input("Input the keyword you want to search for:")
search_input = search_input.replace(' ', '+')
driver = webdriver.Chrome(executable_path="E:\chromedriver\chromedriver.exe")
for i in range(1):
matched_elements = driver.get("https://www.google.com/search?q=" +
search_input + "&start=" + str(i))
print(driver.title)
driver.maximize_window()
time.sleep(5)
links_url = driver.find_elements_by_xpath("//div[#class='yuRUbf']/a[#href]")
links = []
for x in links_url:
links.append(x.get_attribute('href'))
link_data = []
for new_url in links:
# print('\nnew url : ', new_url)
driver.get(new_url)
#Getting the data from the site
try:
link = driver.find_elements(By.TAG_NAME, "p")
for p in link:
datas = p.get_attribute("innerText")
print(datas)
except:
continue
driver.quit()
#getting summary of data
print("\nOriginal text:")
print(datas)
textWordCount = len(datas.split())
print("The number of words in Original text are : " + str(textWordCount))
stopWords = set(stopwords.words("english"))
words = word_tokenize(datas)
freqTable = dict()
for word in words:
word = word.lower()
if word in stopWords:
continue
if word in freqTable:
freqTable[word] += 1
else:
freqTable[word] = 1
sentences = sent_tokenize(datas)
sentenceValue = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentenceValue:
sentenceValue[sentence] += freq
else:
sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
sumValues += sentenceValue[sentence]
average = int(sumValues / len(sentenceValue))
summary = ''
for sentence in sentences:
if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
summary += " " + sentence
print("\nSummary:")
print(summary)
summaryWordCount = len(summary.split())
print("\nThe number of words in summary are : " + str(summaryWordCount))
the problem is with this line:
datas = p.get_attribute("innerText")
this rewrites the value of datas with each iteration of the loop.
I'm guessing that you are really wanting to append to a list, or expand a string with a space between words?

Object of type 'NoneType' has no len() error

What I want is to count the occurrence of a specific word on goole first page result site, and then once again count for another word - if this word appears more than 2 times, then I will change the occurrence of the first word to 0. But I get this error:
File "D:\HQ_Bot-master\answer_bot.py", line 307, in
get_points_live()
File "D:\HQ_Bot-master\answer_bot.py", line 293, in get_points_live
points,maxo = google_wiki(simq, options, neg)
File "D:\HQ_Bot-master\answer_bot.py", line 242, in google_wiki
count2 = len(words2)
TypeError: object of type 'NoneType' has no len()
Here is my code:
import string
import requests
import json
import urllib.request as urllib2
from bs4 import BeautifulSoup
from google import google
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import pyscreenshot as Imagegrab
import sys
import wx
from halo import Halo
def google_wiki(sim_ques, options, neg):
spinner = Halo(text='Googling and searching Wikipedia', spinner='dots2')
spinner.start()
num_pages = 1
points = list()
content = ""
maxo=""
maxp=-sys.maxsize
i = 0
temp = 0
ques = ""
translator = str.maketrans(string.punctuation, ' '*len(string.punctuation))
sim_ques22 = sim_ques.translate(translator)
while i < 3:
o = options[i]
if i <= 1:
x = options[i+1]
else:
x = options[i-1]
o = o.lower()
x = x.lower()
ques += sim_ques22 + ' ' + o + ' wiki'
print(ques)
page = requests.get("http://www.google.com/search?q="+ques)
soup = BeautifulSoup(page.text,"lxml")
words = soup.find(text=lambda text: text and o in text)
if(type(words)is not None):
count = len(words)
words2 = soup.find(text=lambda text: text and x in text)
if(type(words)is not None):
count2 = len(words2)
if count2 >= 2:
temp = 0
else:
temp = count
if neg:
temp*=-1
points.append(temp)
if temp>maxp:
maxp=temp
maxo=o
ques = ""
i += 1
spinner.succeed()
spinner.stop()
return points,maxo
You can use a simple ternary statement:
count = len(words) if words else 0
which is the same as this
if words: # This checks if it is truthy (which None is not)
count = len(words)
else:
count = 0
If you want, you can swap the conditional for if words is None.
EDIT: I used a ternary expression as you use the variable later on. Otherwise, you'll end up with a NameError.
just use try and except if at all you want to continue without the error or catch the error and print if required
try:
// your code where you got the error
except:
pass
// or print the error caught if you want

Error in wikipedia subcategory crawling using python3

Hello Community Members,
I am getting the error NameError: name 'f' is not defined. The code is as follows. Please help. Any sort of help is appreciated. I have been strucked onto this since 3 days. The code is all about to extract all the subcategories name of wikipedia category in Python 3.
I have tried both the relative and absolute paths.
The code is as follows:
import httplib2
from bs4 import BeautifulSoup
import subprocess
import time, wget
import os, os.path
#declarations
catRoot = "http://en.wikipedia.org/wiki/Category:"
MAX_DEPTH = 100
done = []
ignore = []
path = 'trivial'
#Removes all newline characters and replaces with spaces
def removeNewLines(in_text):
return in_text.replace('\n', ' ')
# Downloads a link into the destination
def download(link, dest):
# print link
if not os.path.exists(dest) or os.path.getsize(dest) == 0:
subprocess.getoutput('wget "' + link + '" -O "' + dest+ '"')
print ("Downloading")
def ensureDir(f):
if not os.path.exists(f):
os.mkdir(f)
# Cleans a text by removing tags
def clean(in_text):
s_list = list(in_text)
i,j = 0,0
while i < len(s_list):
#iterate until a left-angle bracket is found
if s_list[i] == '<':
if s_list[i+1] == 'b' and s_list[i+2] == 'r' and s_list[i+3] == '>':
i=i+1
print ("hello")
continue
while s_list[i] != '>':
#pop everything from the the left-angle bracket until the right-angle bracket
s_list.pop(i)
#pops the right-angle bracket, too
s_list.pop(i)
elif s_list[i] == '\n':
s_list.pop(i)
else:
i=i+1
#convert the list back into text
join_char=''
return (join_char.join(s_list))#.replace("<br>","\n")
def getBullets(content):
mainSoup = BeautifulSoup(contents, "html.parser")
# Gets empty bullets
def getAllBullets(content):
mainSoup = BeautifulSoup(str(content), "html.parser")
subcategories = mainSoup.findAll('div',attrs={"class" : "CategoryTreeItem"})
empty = []
full = []
for x in subcategories:
subSoup = BeautifulSoup(str(x))
link = str(subSoup.findAll('a')[0])
if (str(x)).count("CategoryTreeEmptyBullet") > 0:
empty.append(clean(link).replace(" ","_"))
elif (str(x)).count("CategoryTreeBullet") > 0:
full.append(clean(link).replace(" ","_"))
return((empty,full))
def printTree(catName, count):
catName = catName.replace("\\'","'")
if count == MAX_DEPTH : return
download(catRoot+catName, path)
filepath = "categories/Category:"+catName+".html"
print(filepath)
content = open('filepath', 'w+')
content.readlines()
(emptyBullets,fullBullets) = getAllBullets(content)
f.close()
for x in emptyBullets:
for i in range(count):
print (" "),
download(catRoot+x, "categories/Category:"+x+".html")
print (x)
for x in fullBullets:
for i in range(count):
print (" "),
print (x)
if x in done:
print ("Done... "+x)
continue
done.append(x)
try: printTree(x, count + 1)
except:
print ("ERROR: " + x)
name = "Cricket"
printTree(name, 0)
The error encountered is as follows.
I think f.close() should be content.close().
It's common to use a context manager for such cases, though, like this:
with open(filepath, 'w+') as content:
(emptyBullets,fullBullets) = getAllBullets(content)
Then Python will close the file for you, even in case of an exception.
(I also changed 'filepath' to filepath, which I assume is the intent here.)

Python dictionary search not finding result given a string

I'm writing code for a project and it searches a text file for occurrences of a word on each line. When I use a example text file and search for a word it always prints out "No results for: " even if the word I searched for is in it. Did I setup the dictionary wrong or something?
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 14 11:31:17 2017
#author: Ben Roux
"""
import re
from collections import Counter
stringinput = raw_input("Please enter a filename to open: ")
dictionary = {}
def openFile(stringinput):
try:
filevariable = open(stringinput, 'r')
return filevariable
except IOError:
print("Cannot Find File!")
def readData(stringinput):
filevariable = open(stringinput, 'r')
rawline = filevariable.readline()
line = 1
while (rawline !=""):
pl1 = rawline.replace(",","")
pl2 = pl1.replace("'","")
pl3 = pl2.replace("!","")
pl4 = pl3.replace("-"," ")
pl5 = pl4.replace(".","")
pl6 = re.sub('(\\b[A-Za-z] \\b|\\b [A-Za-z]\\b)', '', pl5)
pl7 = pl6.lower()
checkdictionary = sorted(Counter(pl7.split()).items())
for i in range(len(checkdictionary)):
if checkdictionary[i] in dictionary:
firstvalue = dictionary.get(checkdictionary[i])
newvalue = str(firstvalue) + ", " + str(line)
d1 = {checkdictionary[i]: newvalue}
dictionary.update(d1)
else:
d2 = {checkdictionary[i]: line}
dictionary.update(d2)
rawline = filevariable.readline()
line+=1
def processText(dictionary, searchkey):
if searchkey in dictionary:
print(str(searchkey) + " Appears On Lines: " + (str(dictionary[searchkey])))
else:
print("No results for: " + str(searchkey))
while (True):
try:
openFile(stringinput)
readData(stringinput)
searchkey = raw_input("Enter a keyword to search for: ")
processText(dictionary, searchkey)
break
except IOError:
break
#AK47's answer for changing the if else statement works and this also works:
checkdictionary = sorted(Counter(pl7.split()).items())
change to
checkdictionary = pl7.split()
Update this following code;
if checkdictionary[i][0] in dictionary:
firstvalue = dictionary.get(checkdictionary[i][0])
newvalue = str(firstvalue) + ", " + str(line)
d1 = {checkdictionary[i][0]: newvalue}
dictionary.update(d1)
else:
d2 = {checkdictionary[i][0]: line}
dictionary.update(d2)

Python I/O, URL Reading, Strings, Count

I'm having issues with my python program it supposed to read from text file URL address and read and count the occurrence of for example div tags etc.
I got error in line 23, in
di[ffline[k]]-=1
import urllib
with open('top5_BRZ.txt') as urlf:
uf=urlf.readlines()
for i in range(len(uf)):
link = uf[i]
f = urllib.urlopen(link)
myfile = f.read()
fline=myfile.split('\n')
di={}
for j in range(len(fline)):
line = fline[j]
line = line.replace('"', " ")
line = line.replace("'", " ")
line = line.replace('<', " ")
line = line.replace('>', " ")
line = line.replace('=', " ")
line = line.replace('/', " ")
line = line.replace("\\", " ")
ffline=line.split(' ')
for k in range(len(ffline)):
di[ffline[k]]-=1
sx = sorted(di.items(), key=operator.itemgetter(1))
rr=0
for key, value in di:
if(rr==25): break
print key,value
rr+=1
I agree with #brian. You can use below code (on line 22) which checks whether key is in dictionary and then decrements the value.
for k in range(len(ffline)):
if ffline[k] in di.keys():
di[ffline[k]] -= 1
else:
di[ffline[k]] = something
The dict di doesn't have any keys in it when di[ffline[k]]-=1 is run. di is still an empty dict when you try to decrement the value of the ffline[k] key.
You forgot to use html5lib to parse your html:
import html5lib
import urllib
def main():
for link in ["http://www.google.com/"]:
f = urllib.urlopen(link)
tree = html5lib.parse(f)
divs = len(tree.findall("*//{http://www.w3.org/1999/xhtml}div"))
print("{}: {} divs".format(link, divs))
main()

Categories

Resources