how to merge same object in json file using python

how to merge same object in json file using python - python

1.json file contain many sniffing WIFI packets, I want get the mac address of receiver and transmitter which can be found in the first "wlan" object called "wlan.ra" and "wlan.sa". data[0] is the first WIFI packet.
Q1:
But when I try to print the elements of wlan after json load, it only show the elements of the second "wlan" object so there is no "wlan.ra" and "wlan.sa" in the data.
with open('1.json','r') as json_data:
data = json.load(json_data)
a=data[0]
print a
Q2:
There are two 'wlan' objects in my json file. How can I merge the elements in these two 'wlan' objects into just one 'wlan' object?
The following is my code, but it doesn't work:
with open('1.json','r') as f:
data=json.load(f)
for i in data:
i['_source']['layers']['wlan'].update()
Screenshot of json file:

'''
Created on 2017/10/3
#author: DD
'''
import os
def modify_jsonfile(jsonfile):
'''
replace wlan to wlan1/wlan2
'''
FILESUFFIX = '_new' # filename suffix
LBRACKET = '{' # json object delimiter
RBRACKET = '}'
INTERSETED = '"wlan"' # string to be replaced
nBrackets = 0 # stack to record object status
nextIndex = 1 # next index of wlan
with open(jsonfile, 'r') as fromJsonFile:
fields = os.path.splitext(jsonfile) # generate new filename
with open(fields[0] + FILESUFFIX + fields[1], 'w') as toJsonFile:
for line in fromJsonFile.readlines():
for ch in line: # record bracket
if ch == LBRACKET:
nBrackets += 1
elif ch == RBRACKET:
nBrackets -= 1
if nBrackets == 0:
nextIndex = 1
if (nextIndex == 1 or nextIndex == 2) and line.strip().find(INTERSETED) == 0: # replace string
line = line.replace(INTERSETED, INTERSETED[:-1] + str(nextIndex) + INTERSETED[-1])
nextIndex += 1
toJsonFile.write(line);
print 'done.'
if __name__ == '__main__':
jsonfile = r'C:\Users\DD\Desktop\1.json';
modify_jsonfile(jsonfile)

Related

Bitcoin address convert to RIPEMD160

Why the result is not saved RIPEMD160.txt gives an error
I can see on the processor that the code is working but the file is empty
I always get the same
IndentationError: unindent does not match any outer indentation level
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import base58
def count_lines(file):
return sum(1 for line in open(file, 'r'))
def convert(file_in,file_out,nom):
print("===========File input -> " + file_in)
print("===========File output -> " + file_out)
i = 0
line_10 = 100000
ii = 0
f = open(file_in,'r')
fw = open(file_out,'a')
while i <= nom:
if (i+ii) == nom:
print("\n Finish")
break
if line_10 == i:
print("Error - {} | Total line -> {}".format(ii,line_10),end='\r')
line_10 += 100000
try:
adr58 = f.readline().strip()
adr160 = base58.b58decode_check(adr58).hex()[2:]
except:
ii +=1
else:
fw.write(adr160+'\n')
i += 1
f.close()
fw.close()
if __name__ == "__main__":
if len (sys.argv) < 3:
print ("error")
sys.exit (1)
if len (sys.argv) > 3:
print ("error")
sys.exit (1)
file_in = sys.argv[1]
file_out = sys.argv[2]
line_count = count_lines(file_in)
print("all lines -> " + str(line_count))
convert(file_in,file_out,line_count)
print('Finish')

because you are not writing anything to the file and your code is not formatted correctly.
import base58
def base58_to_dec(addr):
dec = 0
for i in range(len(addr)):
dec = int(dec * 58 + b58.index(addr[i]))
return(dec)
def dec_to_byte(dec):
out = ''
while dec != 0:
remn = mpf(dec % 256)
dec = mpf((dec - remn) / 256)
temp = hex(int(remn))
if len(temp) == 3:
temp = '0' + temp[-1]
else:
temp = temp[2:]
out = temp + out
return (out)
def decode(addr):
b58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
dec = base58_to_dec(addr)
out = dec_to_byte(dec)
return (out)
with open('addresses.txt', 'r') as f, \
open('RIPEMD160.txt', 'a') as i:
for addr in f:
addr = base58.b58decode_check(addr).encode('hex')[2:]
ads = decode(addr)
i.write(ads)
i.close()

The process of generating bitcoin address is like this
public_key=>sha256(sha256(public_key))=>RIPEMD160_address=>base58_address
so there is no need for other procedures but simply reverse base58 to rmd160 like below
import base58
i =open('RIPEMD160.txt', 'a') #open file with append mode on
with open('addresses.txt', 'r') as f:#open files with addresses
for addr in f:
addr = addr.strip()#remove trailing space and newline character
rmd160 = base58.b58decode_check(str(addr)).encode('hex')[2:]#notice str forcing addr to be a string
i.write(rmd160+"\n")
i.close()
f.close()

Extract IP addresses from text file without using REGEX

I am trying to extract IPv4 addresses from a text file and save them as a list to a new file, however, I can not use regex to parse the file, Instead, I have check the characters individually. Not really sure where to start with that, everything I find seems to have import re as the first line.
So far this is what I have,
#Opens and prints wireShark txt file
fileObject = open("wireShark.txt", "r")
data = fileObject.read()
print(data)
#Save IP adresses to new file
with open('wireShark.txt') as fin, open('IPAdressess.txt', 'wt') as fout:
list(fout.write(line) for line in fin if line.rstrip())
#Opens and prints IPAdressess txt file
fileObject = open("IPAdressess.txt", "r")
data = fileObject.read()
print(data)
#Close Files
fin.close()
fout.close()
So I open the file, and I have created the file that I will put the extracted IP's in, I just don't know ow to pull them without using REGEX.
Thanks for the help.

Here is a possible solution.
The function find_first_digit, position the index at the next digit in the text if any and return True. Else return False
The functions get_dot and get_num read a number/dot and, lets the index at the position just after the number/dot and return the number/dot as str. If one of those functions fails to get the number/dot raise an MissMatch exception.
In the main loop, find a digit, save the index and then try to get an ip.
If sucess, write it to output file.
If any of the called functions raises a MissMatch exception, set the current index to the saved index plus one and start over.
class MissMatch(Exception):pass
INPUT_FILE_NAME = 'text'
OUTPUT_FILE_NAME = 'ip_list'
def find_first_digit():
while True:
c = input_file.read(1)
if not c: # EOF found!
return False
elif c.isdigit():
input_file.seek(input_file.tell() - 1)
return True
def get_num():
num = input_file.read(1) # 1st digit
if not num.isdigit():
raise MissMatch
if num != '0':
for i in range(2): # 2nd 3th digits
c = input_file.read(1)
if c.isdigit():
num += c
else:
input_file.seek(input_file.tell() - 1)
break
return num
def get_dot():
if input_file.read(1) == '.':
return '.'
else:
raise MissMatch
with open(INPUT_FILE_NAME) as input_file, open(OUTPUT_FILE_NAME, 'w') as output_file:
while True:
ip = ''
if not find_first_digit():
break
saved_position = input_file.tell()
try:
ip = get_num() + get_dot() \
+ get_num() + get_dot() \
+ get_num() + get_dot() \
+ get_num()
except MissMatch:
input_file.seek(saved_position + 1)
else:
output_file.write(ip + '\n')

Python API call JSON to CSV iteration overwriting data

New to Python...trying to perform an API call and output the data to CSV.
Works fine for a single variable being passed but when I parse through a list the last item in the list is the only output I receive.
Looking to find the best way to approach this. Not sure if it's just an issue of logic or I need to find a way to consistently append to the file and remove results when it's rerun.
list = open(infile, "r")
print("-------------Attempting to Query Data----------------")
for item in list:
try:
if inp == 1:
eval_string = "&value=" + item
elif inp == 2:
eval_string = "&value__regexp=.*." + item + "*"
else:
print("Invalid input!")
result = requests.get(url + "api_key=" + api + "&username=" + user + eval_string + "&limit=" + str(limit) + "&status=" + status)
data = result.json()
if result.status_code == 200:
with open(outfile, 'w', newline='') as data_file:
writer = csv.writer(data_file)
count = 0
for obj in data['objects']:
if count == 0:
header = 'value', 'confidence', 'type', 'source', 'date', 'status'
writer.writerow(header)
count += 1
writer.writerow([obj['value'], obj['confidence'], obj['type'], obj['source'], obj['date'], obj['status']])
else:
print("-------------Failed to connect - check API config info or that site is up----------------")
except OSError:
print("Failed to query.")
print("-------------Results returned in " + outfile + "----------------")

Error in wikipedia subcategory crawling using python3

Hello Community Members,
I am getting the error NameError: name 'f' is not defined. The code is as follows. Please help. Any sort of help is appreciated. I have been strucked onto this since 3 days. The code is all about to extract all the subcategories name of wikipedia category in Python 3.
I have tried both the relative and absolute paths.
The code is as follows:
import httplib2
from bs4 import BeautifulSoup
import subprocess
import time, wget
import os, os.path
#declarations
catRoot = "http://en.wikipedia.org/wiki/Category:"
MAX_DEPTH = 100
done = []
ignore = []
path = 'trivial'
#Removes all newline characters and replaces with spaces
def removeNewLines(in_text):
return in_text.replace('\n', ' ')
# Downloads a link into the destination
def download(link, dest):
# print link
if not os.path.exists(dest) or os.path.getsize(dest) == 0:
subprocess.getoutput('wget "' + link + '" -O "' + dest+ '"')
print ("Downloading")
def ensureDir(f):
if not os.path.exists(f):
os.mkdir(f)
# Cleans a text by removing tags
def clean(in_text):
s_list = list(in_text)
i,j = 0,0
while i < len(s_list):
#iterate until a left-angle bracket is found
if s_list[i] == '<':
if s_list[i+1] == 'b' and s_list[i+2] == 'r' and s_list[i+3] == '>':
i=i+1
print ("hello")
continue
while s_list[i] != '>':
#pop everything from the the left-angle bracket until the right-angle bracket
s_list.pop(i)
#pops the right-angle bracket, too
s_list.pop(i)
elif s_list[i] == '\n':
s_list.pop(i)
else:
i=i+1
#convert the list back into text
join_char=''
return (join_char.join(s_list))#.replace("<br>","\n")
def getBullets(content):
mainSoup = BeautifulSoup(contents, "html.parser")
# Gets empty bullets
def getAllBullets(content):
mainSoup = BeautifulSoup(str(content), "html.parser")
subcategories = mainSoup.findAll('div',attrs={"class" : "CategoryTreeItem"})
empty = []
full = []
for x in subcategories:
subSoup = BeautifulSoup(str(x))
link = str(subSoup.findAll('a')[0])
if (str(x)).count("CategoryTreeEmptyBullet") > 0:
empty.append(clean(link).replace(" ","_"))
elif (str(x)).count("CategoryTreeBullet") > 0:
full.append(clean(link).replace(" ","_"))
return((empty,full))
def printTree(catName, count):
catName = catName.replace("\\'","'")
if count == MAX_DEPTH : return
download(catRoot+catName, path)
filepath = "categories/Category:"+catName+".html"
print(filepath)
content = open('filepath', 'w+')
content.readlines()
(emptyBullets,fullBullets) = getAllBullets(content)
f.close()
for x in emptyBullets:
for i in range(count):
print (" "),
download(catRoot+x, "categories/Category:"+x+".html")
print (x)
for x in fullBullets:
for i in range(count):
print (" "),
print (x)
if x in done:
print ("Done... "+x)
continue
done.append(x)
try: printTree(x, count + 1)
except:
print ("ERROR: " + x)
name = "Cricket"
printTree(name, 0)
The error encountered is as follows.

I think f.close() should be content.close().
It's common to use a context manager for such cases, though, like this:
with open(filepath, 'w+') as content:
(emptyBullets,fullBullets) = getAllBullets(content)
Then Python will close the file for you, even in case of an exception.
(I also changed 'filepath' to filepath, which I assume is the intent here.)

Python string compare and replace

I have a .txt with:
#Date 111111:UhUidsiIds
#Name Sebastian-Forset
#Date 222222:UdfasdUDsa
#Name Sebastian_Forset2
#Date 333333:UDsafduD
#Name Solaris Mage
#Date 444444:Ghdsasra
#Name Marge S
and a file whith:
#Name Sebastian Forset
#Date 191020
#Name Sebastian Forset2
#Date 201020
#Date Homer S
#Date 281902
The names are the same, with some differences of characters (spaces, -, _ etc.)
I would copy the numbers of the second file to the first file in order to have a final file txt with:
#Name Sebastian Forset
#Date 191020:UhUidsiIds
#Name Sebastian Forset2
#Date 201020:UdfasdUDsa
#Name Solaris Mage
#Date 281902:UDsafduD
#Name Marge S
#Date 444444:Ghdsasra
This is my code, but merge the file, copy only same name
def isInFile(l, f):
with open(f, 'r') as f2:
for line in f2:
if l == line:
return True
return False
def similitudes(file1, file2):
same = 0
data = ''
copy = False
with open(file1, 'r') as f1:
for line in f1:
if copy == True:
data += line
if line == '\n' or line[0:6] != '#Name ':
copy = False
if (line[0:6] == '#Name ') or line[0:6] == '#Date ':
print line
if isInFile(line, file2) == True:
copy = True
data += line
print "true"
else:
print "ok"
same += 1
return data
def main(argv=2):
print (sys.argv[1])
print (sys.argv[2])
if argv == 2:
out = open('final.txt', 'w')
data = (
similitudes(sys.argv[1], sys.argv[2]) + '\n'
)
out.write(data)
out.close()
else:
print ("This program need 2 files")
exit (0)
return 0
if __name__ == '__main__':
status = main()
sys.exit(status)

First, list out the characters that will differ. Let's say "-" , "_" and " ".
Now split the two strings using these delimiters. you can use "re" package in python.
>>> a='Mr-Sebastian_Forset '
>>> import re
>>> re.split('- |_ | ',a)
['Mr', 'Sebastian', 'Forset']
If the resultant lists for the two strings are equal, paste the number in second file in first one.
You can use the same delimiter concept to split the number and paste it in other file.

Adding another answer, which will points out the bug in your code
Coming to the following piece of code
if (line[0:6] == '#Name ') or line[0:6] == '#Date ':
print line
if isInFile(line, file2) == True:
copy = True
data += line
Here, you are checking If your line starts with either "#Name " or "#Date ", and calling isInFile() method with line and file2 as arguments.
This is the first issue, there is no use of sending just one line that starts with "#Name " in your case.
If the current line starts with "#Date ", send the previous line and file as arguments to this method.
And second Issue is with the isInFile() definition, which is doing effectively nothing.
if l == line:
return true
You are just checking if two lines in file1 and file2 are same and if yes, you writing this line in sysout.
So, your program will just print the common lines between file1 and file2.
Modified code should like the below one:
def isInFile(l, f):
line_found = false
required_line = null
with open(f, 'r') as f2:
for line in f2:
if line_found:
required_line = line
break
elif l == line:
line_found = true
return (line_found, required_line)
def similitudes(file1, file2):
same = 0
data = ''
copy = False
previous_line = null
with open(file1, 'r') as f1:
for line in f1:
if copy == True:
data += line
if line == '\n' or line[0:6] != '#Name ':
copy = False
if (line[0:6] == '#Name '):
print line
previous_line = line
elif line[0:6] == '#Date ':
print line
file2_line_info = isInFile(previous_line, file2)
if file2_line_info[0] == True:
copy = True
data += file2_line_info[1]
print "true"
return data
def main(argv=2):
print (sys.argv[1])
print (sys.argv[2])
if argv == 2:
out = open('final.txt', 'w')
data = (
similitudes(sys.argv[1], sys.argv[2]) + '\n'
)
out.write(data)
out.close()
else:
print ("This program need 2 files")
exit (0)
return 0
if __name__ == '__main__':
status = main()
sys.exit(status)
Note: This is not the pythonic way of doing things. As I have mentioned in the above answer https://stackoverflow.com/a/34696778/3534696 use "re" module and solve the problem efficiently.

Read the first file into a dictionary, using maketrans/translate to clean up the name.
Using zip(file, file) to read 2 lines of the file at a time makes it much easier to handle.
And using .split(' ', 1)[1] to get rid of the first column.
And .strip() to get rid of any surrounding whitespace (i.e. \n)
Then you can read the second file updating the dictionary.
In Python3 this looks like:
>>> punc = str.maketrans('_-', ' ') # import string & string.maketrans() in Py2
>>> with open(filename1) as file1, open(filename2) as file2:
... data = {name.split(' ', 1)[1].strip().translate(punc):
... date.split(' ', 1)[1].strip().split(':')
... for name, date in zip(file1, file1)}
... for n, d in zip(file2, file2):
... data[n.split(' ', 1)[1].strip()][0] = d.split(' ', 1)[1].strip()
>>> data
{'Marge S': ['444444', 'Ghdsasra'],
'Sebastian Forset': ['191020', 'UhUidsiIds'],
'Sebastian Forset2': ['201020', 'UdfasdUDsa'],
'Solaris Mage': ['281902', 'UDsafduD']}
After that it is just a matter of writing the dictionary out to a new file.
>>> with open(<output>, 'w+') as output:
... for name, date in data.items():
... output.write('#Name {}\n'.format(name))
... output.write('#Date {}:{}\n'.format(*date))
Note: I had to change 'Homer S' to 'Solaris Mage' in the second file to get the stated output.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to merge same object in json file using python - python

Related

Bitcoin address convert to RIPEMD160

Extract IP addresses from text file without using REGEX

Python API call JSON to CSV iteration overwriting data

Error in wikipedia subcategory crawling using python3

Python string compare and replace

Categories

Resources