Python dictionary search not finding result given a string - python

I'm writing code for a project and it searches a text file for occurrences of a word on each line. When I use a example text file and search for a word it always prints out "No results for: " even if the word I searched for is in it. Did I setup the dictionary wrong or something?
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 14 11:31:17 2017
#author: Ben Roux
"""
import re
from collections import Counter
stringinput = raw_input("Please enter a filename to open: ")
dictionary = {}
def openFile(stringinput):
try:
filevariable = open(stringinput, 'r')
return filevariable
except IOError:
print("Cannot Find File!")
def readData(stringinput):
filevariable = open(stringinput, 'r')
rawline = filevariable.readline()
line = 1
while (rawline !=""):
pl1 = rawline.replace(",","")
pl2 = pl1.replace("'","")
pl3 = pl2.replace("!","")
pl4 = pl3.replace("-"," ")
pl5 = pl4.replace(".","")
pl6 = re.sub('(\\b[A-Za-z] \\b|\\b [A-Za-z]\\b)', '', pl5)
pl7 = pl6.lower()
checkdictionary = sorted(Counter(pl7.split()).items())
for i in range(len(checkdictionary)):
if checkdictionary[i] in dictionary:
firstvalue = dictionary.get(checkdictionary[i])
newvalue = str(firstvalue) + ", " + str(line)
d1 = {checkdictionary[i]: newvalue}
dictionary.update(d1)
else:
d2 = {checkdictionary[i]: line}
dictionary.update(d2)
rawline = filevariable.readline()
line+=1
def processText(dictionary, searchkey):
if searchkey in dictionary:
print(str(searchkey) + " Appears On Lines: " + (str(dictionary[searchkey])))
else:
print("No results for: " + str(searchkey))
while (True):
try:
openFile(stringinput)
readData(stringinput)
searchkey = raw_input("Enter a keyword to search for: ")
processText(dictionary, searchkey)
break
except IOError:
break

#AK47's answer for changing the if else statement works and this also works:
checkdictionary = sorted(Counter(pl7.split()).items())
change to
checkdictionary = pl7.split()

Update this following code;
if checkdictionary[i][0] in dictionary:
firstvalue = dictionary.get(checkdictionary[i][0])
newvalue = str(firstvalue) + ", " + str(line)
d1 = {checkdictionary[i][0]: newvalue}
dictionary.update(d1)
else:
d2 = {checkdictionary[i][0]: line}
dictionary.update(d2)

Related

Bitcoin address convert to RIPEMD160

Why the result is not saved RIPEMD160.txt gives an error
I can see on the processor that the code is working but the file is empty
I always get the same
IndentationError: unindent does not match any outer indentation level
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import base58
def count_lines(file):
return sum(1 for line in open(file, 'r'))
def convert(file_in,file_out,nom):
print("===========File input -> " + file_in)
print("===========File output -> " + file_out)
i = 0
line_10 = 100000
ii = 0
f = open(file_in,'r')
fw = open(file_out,'a')
while i <= nom:
if (i+ii) == nom:
print("\n Finish")
break
if line_10 == i:
print("Error - {} | Total line -> {}".format(ii,line_10),end='\r')
line_10 += 100000
try:
adr58 = f.readline().strip()
adr160 = base58.b58decode_check(adr58).hex()[2:]
except:
ii +=1
else:
fw.write(adr160+'\n')
i += 1
f.close()
fw.close()
if __name__ == "__main__":
if len (sys.argv) < 3:
print ("error")
sys.exit (1)
if len (sys.argv) > 3:
print ("error")
sys.exit (1)
file_in = sys.argv[1]
file_out = sys.argv[2]
line_count = count_lines(file_in)
print("all lines -> " + str(line_count))
convert(file_in,file_out,line_count)
print('Finish')
because you are not writing anything to the file and your code is not formatted correctly.
import base58
def base58_to_dec(addr):
dec = 0
for i in range(len(addr)):
dec = int(dec * 58 + b58.index(addr[i]))
return(dec)
def dec_to_byte(dec):
out = ''
while dec != 0:
remn = mpf(dec % 256)
dec = mpf((dec - remn) / 256)
temp = hex(int(remn))
if len(temp) == 3:
temp = '0' + temp[-1]
else:
temp = temp[2:]
out = temp + out
return (out)
def decode(addr):
b58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
dec = base58_to_dec(addr)
out = dec_to_byte(dec)
return (out)
with open('addresses.txt', 'r') as f, \
open('RIPEMD160.txt', 'a') as i:
for addr in f:
addr = base58.b58decode_check(addr).encode('hex')[2:]
ads = decode(addr)
i.write(ads)
i.close()
The process of generating bitcoin address is like this
public_key=>sha256(sha256(public_key))=>RIPEMD160_address=>base58_address
so there is no need for other procedures but simply reverse base58 to rmd160 like below
import base58
i =open('RIPEMD160.txt', 'a') #open file with append mode on
with open('addresses.txt', 'r') as f:#open files with addresses
for addr in f:
addr = addr.strip()#remove trailing space and newline character
rmd160 = base58.b58decode_check(str(addr)).encode('hex')[2:]#notice str forcing addr to be a string
i.write(rmd160+"\n")
i.close()
f.close()

File operations - filtering data from txt file in python

This code works fine but is too big, i would like to know if there is any other way to write this code to make it shorter.
import openpyxl as excel
PATH = "/home/Fathima/workspace/training/"
ACCESS_LIST = []
def READ_CONFIG():
FROM_ZONE = ""
TO_ZONE = ""
POLICY_NAME = ""
SOURCE_ADDR = ""
DESTINATION_ADDR = ""
PORT = ""
global PATH
global ACCESS_LIST
count = 0
CONFIG_PATH=PATH+"hofwdcn05dcn_20210216(2).txt"
fh = open(CONFIG_PATH, 'r')
CONFIG_LINES=fh.readlines()
config_lines_cnt = len(CONFIG_LINES)
while count < config_lines_cnt:
line = CONFIG_LINES[count].strip()
if len(line) > 0:
line_to_array = line.split(' ')
if line.startswith('from-zone '):
FROM_ZONE = line_to_array[1]
TO_ZONE = line_to_array[3]
elif line.startswith('policy '):
POLICY_NAME = line_to_array[1]
elif line.startswith('source-address '):
SOURCE_ADDR = line_to_array[1].replace(";", "")
elif line.startswith('destination-address '):
DESTINATION_ADDR = line_to_array[1].replace(";", "")
elif line.startswith('application '):
PORT = line_to_array[1].replace(";", "")
elif line.startswith('then {'):
count = count+1
line = CONFIG_LINES[count].strip()
if line == "permit;":
dummy = { 'FROM_ZONE' : FROM_ZONE,'TO_ZONE' : TO_ZONE,'POLICY_NAME' : POLICY_NAME,'SOURCE_ADDR' : SOURCE_ADDR,'DESTINATION_ADDR' : DESTINATION_ADDR,'PORT' : PORT}
ACCESS_LIST.append(dummy)
FROM_ZONE = ""
TO_ZONE = ""
POLICY_NAME = ""
SOURCE_ADDR = ""
DESTINATION_ADDR = ""
PORT = ""
count +=1
#MAIN PROGRAM STARTS FROM HERE
READ_CONFIG()
print(ACCESS_LIST)
Here i have a huge file and need the output appearing as below format
[{
from-zone:
to-zone:
policy:
source-address:
destination-address:
application:
},{
from-zone:
to-zone:
policy:
source-address:
destination-address:
application:
}]
There is a separate related site for a review of working code i.e. StackExchange Code Review
That said, below is a more Pythonic code flow. I didn't change conditionals since they are easy to follow.
Main Changes
Eliminate globals (discouraged--only for special needs)
Use file context manager (i.e. use 'with block' on file open)
Iterate through file rather than read the entire file (allows processing arbitrary file size)
Use Python variable and function naming convention i.e. PEP 8
Remove import openpyxl (unused)
Code
def read_config(path):
from_zone, to_zone, policy_name, source_addr, destination_addr, port = [''] * 6
access_list = []
with open(path + "hofwdcn05dcn_20210216(2).txt", 'r') as fh:
for line in fh:
line = line.strip()
if line:
line_to_array = line.split(' ')
if line.startswith('from-zone '):
from_zone = line_to_array[1]
to_zone = line_to_array[3]
elif line.startswith('policy '):
policy_name = line_to_array[1]
elif line.startswith('source-address '):
source_addr = line_to_array[1].replace(";", "")
elif line.startswith('destination-address '):
destination_addr = line_to_array[1].replace(";", "")
elif line.startswith('application '):
port = line_to_array[1].replace(";", "")
elif line.startswith('then {'):
line = next(fh).strip() # Gets next line in file
if line == "permit;":
access_list.append({'FROM_ZONE': from_zone,
'TO_ZONE': to_zone,
'POLICY_NAME': policy_name,
'SOURCE_ADDR': source_addr,
'DESTINATION_ADDR': destination_addr,
'PORT': port})
from_zone, to_zone, policy_name, source_addr, destination_addr, port = [''] * 6
return access_list
access_list = read_config("/home/Fathima/workspace/training/")
print(access_list)

Updates to text file are not being parsed using python

I'm parsing data from a text file ('placlog.txt') that is continuously being updated. As I run the code everything prints as expected, but if there are any updates to the placlog file while the code is running it is not printed.
The placlog file is being updated by a third-party program and I am using the above code to read the file and print any updates.
Once formatted, the text should be sent via a Telegram API. This part is also working initially.
import urllib.parse
import time
import requests
import os
def post_to_telegram(msg):
#print(msg)
base_url = 'https://api.telegram.org/bot&text="{}'.format(msg)
requests.get(base_url)
def check_url_inMsgList(stringToMatch, msgList):
for i in msgList:
if (stringToMatch in i):
return False
return True
try:
f = open("oldFile.txt", "r")
msgList = f.read().split("\n")
f.close()
except:
f = open("oldFile.txt", "w")
msgList = []
f.close()
selections = []
urr = ""
name = ""
pie = ""
ourLines = 2400
url_found = 0
name_found = 0
pie_found = 0
while (True):
file1 = open('placlog.txt', 'r')
Lines = file1.readlines()
file1.close()
while (True):
# print("-------------------------------")
if (ourLines == len(Lines)):
break
elif (ourLines > len(Lines)):
ourLines = 0
else:
txt = Lines[ourLines].strip()
tlist = txt.split("&")
ourLines = ourLines + 1
for subtxt in tlist:
if "eventurl=" in subtxt:
a = subtxt[9:len(subtxt) - 3]
url = "www.awebsite.com/%23" + a.replace("%23", "/")
#url = url.replace("%23", "#")
for i in range(10):
if "F" + str(i) + "/" in url:
url = url.split("F" + str(i) + "/")[0] + "F" + str(i) + "/"
urr = url
url_found = 1
elif "bit=" in subtxt:
name = urllib.parse.unquote(subtxt[4:len(subtxt)])
name_found = 1
elif "pie\":" in subtxt:
a = subtxt.split("price")[1]
pie = a.split("\"")[2]
pie = float(pie)
pie = round(pie, 1)
pie = str(pie)
pie_found = 1
selections.append(url + name + pie)
msg = (url + " " + name + " " + pie)
stringToFind = url + " " + name
if (check_url_inMsgList(stringToFind, msgList)):
post_to_telegram(msg)
msgList.append(msg)
print(msg)
f = open("oldFile.txt", "a+")
f.write(msg + "\n")
f.close()
time.sleep(0.5)
elif "minodds=" in subtxt:
a = subtxt.split("minodds=")[1]
pie = a.split("&")[0]
pie = float(pie)
rie = round(pie, 1)
pie = str(pie)
pie_found = 1
selections.append(url + name + pie)
msg = (url + " " + name + " " + pie)
stringToFind = url + " " + name
if (check_url_inMsgList(stringToFind, msgList)):
post_to_telegram(msg)
msgList.append(msg)
print(msg)
f = open("oldFile.txt", "a+")
f.write(msg + "\n")
f.close()
time.sleep(0.5)
time.sleep(1)
I would recommend using watchdog, and seeing if that helps your situation. It can monitor for file system changes, so you could define a function which is executed when the placlog.txt file is changed/updated.
A good guide can be found here: http://thepythoncorner.com/dev/how-to-create-a-watchdog-in-python-to-look-for-filesystem-changes/
From that guide, you can simply change the functions defined to suit your needs i.e.
def on_modified(event):
if event.src_path == "path/to/placlog.txt":
with open('placlog.txt', 'r') as placlog:
lines = file1.readlines()
Could you try this out and see if it helps? I still recommend the with statement for file i/o since you always want your file to close no matter what.
This link might also be useful since they are also monitoring a single .txt file: Python Watchdog - src_path inconsistent
watchdog documentation: https://pythonhosted.org/watchdog/
Note: Deleted the old answer since you clarified the question.

Error in wikipedia subcategory crawling using python3

Hello Community Members,
I am getting the error NameError: name 'f' is not defined. The code is as follows. Please help. Any sort of help is appreciated. I have been strucked onto this since 3 days. The code is all about to extract all the subcategories name of wikipedia category in Python 3.
I have tried both the relative and absolute paths.
The code is as follows:
import httplib2
from bs4 import BeautifulSoup
import subprocess
import time, wget
import os, os.path
#declarations
catRoot = "http://en.wikipedia.org/wiki/Category:"
MAX_DEPTH = 100
done = []
ignore = []
path = 'trivial'
#Removes all newline characters and replaces with spaces
def removeNewLines(in_text):
return in_text.replace('\n', ' ')
# Downloads a link into the destination
def download(link, dest):
# print link
if not os.path.exists(dest) or os.path.getsize(dest) == 0:
subprocess.getoutput('wget "' + link + '" -O "' + dest+ '"')
print ("Downloading")
def ensureDir(f):
if not os.path.exists(f):
os.mkdir(f)
# Cleans a text by removing tags
def clean(in_text):
s_list = list(in_text)
i,j = 0,0
while i < len(s_list):
#iterate until a left-angle bracket is found
if s_list[i] == '<':
if s_list[i+1] == 'b' and s_list[i+2] == 'r' and s_list[i+3] == '>':
i=i+1
print ("hello")
continue
while s_list[i] != '>':
#pop everything from the the left-angle bracket until the right-angle bracket
s_list.pop(i)
#pops the right-angle bracket, too
s_list.pop(i)
elif s_list[i] == '\n':
s_list.pop(i)
else:
i=i+1
#convert the list back into text
join_char=''
return (join_char.join(s_list))#.replace("<br>","\n")
def getBullets(content):
mainSoup = BeautifulSoup(contents, "html.parser")
# Gets empty bullets
def getAllBullets(content):
mainSoup = BeautifulSoup(str(content), "html.parser")
subcategories = mainSoup.findAll('div',attrs={"class" : "CategoryTreeItem"})
empty = []
full = []
for x in subcategories:
subSoup = BeautifulSoup(str(x))
link = str(subSoup.findAll('a')[0])
if (str(x)).count("CategoryTreeEmptyBullet") > 0:
empty.append(clean(link).replace(" ","_"))
elif (str(x)).count("CategoryTreeBullet") > 0:
full.append(clean(link).replace(" ","_"))
return((empty,full))
def printTree(catName, count):
catName = catName.replace("\\'","'")
if count == MAX_DEPTH : return
download(catRoot+catName, path)
filepath = "categories/Category:"+catName+".html"
print(filepath)
content = open('filepath', 'w+')
content.readlines()
(emptyBullets,fullBullets) = getAllBullets(content)
f.close()
for x in emptyBullets:
for i in range(count):
print (" "),
download(catRoot+x, "categories/Category:"+x+".html")
print (x)
for x in fullBullets:
for i in range(count):
print (" "),
print (x)
if x in done:
print ("Done... "+x)
continue
done.append(x)
try: printTree(x, count + 1)
except:
print ("ERROR: " + x)
name = "Cricket"
printTree(name, 0)
The error encountered is as follows.
I think f.close() should be content.close().
It's common to use a context manager for such cases, though, like this:
with open(filepath, 'w+') as content:
(emptyBullets,fullBullets) = getAllBullets(content)
Then Python will close the file for you, even in case of an exception.
(I also changed 'filepath' to filepath, which I assume is the intent here.)

Python I/O, URL Reading, Strings, Count

I'm having issues with my python program it supposed to read from text file URL address and read and count the occurrence of for example div tags etc.
I got error in line 23, in
di[ffline[k]]-=1
import urllib
with open('top5_BRZ.txt') as urlf:
uf=urlf.readlines()
for i in range(len(uf)):
link = uf[i]
f = urllib.urlopen(link)
myfile = f.read()
fline=myfile.split('\n')
di={}
for j in range(len(fline)):
line = fline[j]
line = line.replace('"', " ")
line = line.replace("'", " ")
line = line.replace('<', " ")
line = line.replace('>', " ")
line = line.replace('=', " ")
line = line.replace('/', " ")
line = line.replace("\\", " ")
ffline=line.split(' ')
for k in range(len(ffline)):
di[ffline[k]]-=1
sx = sorted(di.items(), key=operator.itemgetter(1))
rr=0
for key, value in di:
if(rr==25): break
print key,value
rr+=1
I agree with #brian. You can use below code (on line 22) which checks whether key is in dictionary and then decrements the value.
for k in range(len(ffline)):
if ffline[k] in di.keys():
di[ffline[k]] -= 1
else:
di[ffline[k]] = something
The dict di doesn't have any keys in it when di[ffline[k]]-=1 is run. di is still an empty dict when you try to decrement the value of the ffline[k] key.
You forgot to use html5lib to parse your html:
import html5lib
import urllib
def main():
for link in ["http://www.google.com/"]:
f = urllib.urlopen(link)
tree = html5lib.parse(f)
divs = len(tree.findall("*//{http://www.w3.org/1999/xhtml}div"))
print("{}: {} divs".format(link, divs))
main()

Categories

Resources