I am trying to write data to an excel file. Every link that meets the requirements in the if-test should be written out in the excel file. It starts writing at (0,0) and goes on downwards in the same column (0,1),(0,2).. (0,3) etc. The problem is that it writes out data to the excel file, but only when the if-test has reached its last time.
Json-file:
[
{
"beds": "3",
"bath": "2",
"link": "https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790",
"price": "382,76"
},
{
"beds": "3",
"bath": "1",
"link": "https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790",
"price": "382,76"
},
{
"beds": "2",
"bath": "3",
"link": "https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790",
"price": "382,76"
},
{
"beds": "3",
"bath": "2",
"link": "https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790",
"price": "382,76"
}
]
Python code: Tried this
import json
import re
from xlwt import Workbook
class Products:
def __init__(self):
self.list_links=[]
def product(self,index):
for k, v in index.items():
if k=='link':
link=v
if k=='bath':
bath=v
fl_bath=int(bath)
wb=Workbook()
sheet1=wb.add_sheet('sheet1')
sheet1.col(0).width = 7000
if fl_bath >= 2:
length=len(self.list_links)
sheet1.write(length,0,link)
self.list_links.append(link)
print(link)
wb.save("python.xls")
with open('./try.json') as json_file:
data = json.load(json_file)
i=0
p=Products()
while i <= 3:
dicts = data[i]
p.product(dicts)
i+=1
It should write out the links downwards in each row in the excel file, every links thats meets the requirments:
row1: https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790
row2:
https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790
row3:
https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790
I get this output (excel-file):
row1:
row2:
row3:
https://www.realestate.com/5619-w-michelle-dr-glendale-az-85308--790
3 of the links meets the criterium. But only the last one in the iteration gets written out in the excel file. Are they being overwritten in some way after each iteration? Any good tips on how to fix this?
You can simplify your code since the requirement is a simple greater-than comparison:
import json
from xlwt import Workbook
with open('inputFile.json') as json_file:
data = json.load(json_file)
wb = Workbook()
firstSheet = wb.add_sheet('sheet1')
firstSheet.col(0).width = 7000
row = -1
for item in data:
if int(item['bath']) >= 2:
row = row + 1
firstSheet.write(row,0,item['link'])
wb.save("outputFile.xls")
It seems like you are overwriting Excel file each time? Move your workbook definition code to the init of Product class and save function to separate class method and call it after processing of your dict.
Problem here is that for every iteration you are creating a new workbook and a sheet and writing one link and saving it as "python.xls" every time. You should create the workbook outside the the function, only once and in the function product write a link to it. Something like this:
import json
from xlwt import Workbook
wb = Workbook()
sheet1 = wb.add_sheet('sheet1')
sheet1.col(0).width = 7000
class Products:
def __init__(self):
self.list_links=[]
def product(self,index):
for k, v in index.items():
if k=='link':
link = v
if k=='bath':
bath = v
fl_bath=int(bath)
if fl_bath >= 2:
length=len(self.list_links)
sheet1.write(length,0,link)
self.list_links.append(link)
print(link)
with open('./try.json') as json_file:
data = json.load(json_file)
while i <= 3:
dicts = data[i]
p.product(dicts)
i+=1
wb.save("python.xls")
Related
helpp.json
{
"States":{
"Illinois":{
"county":[
{
"population":100000,
"nameofcounty":"Dupage"
},
{
"population":200000,
"nameofcounty":"Marion"
}
]
},
"Indiana":{
"county":[
{
"population":100000,
"nameofcounty":"Dupage"
},
{
"population":200000,
"nameofcounty":"Marion"
}
]
}
}
}
mycode
import json
with open('helpp.json') as file:
package_json = json.load(file)
IN = package_json['States']['Illinois']['county']
IL = package_json['States']['Indiana']['county']
for i in IN:
county = i['nameofcounty']
population = i['population']
for j in IL:
population = j['population']
county = j['nameofcounty']
total_population = i['population']+j['population']
print(county,total_population)
I cant figure out how to add numbers from multiple for loops correctly. my current output is Dupage 300000
Marion 400000 but its suppose to be Dupage 200000
Marion 400000.
The simple error which you are doing in your code is the second last line wherein you are adding i['population'] with j['population'. Herein, the first for loop has been exited so the code takes the last value of i to be 200000 (population of Marion). Thus, to prevent this error you can firstly have different names because I don't really know why would you define population 2 different times for 2 different datasets with the same variable. Here are some of the ways you can go through it:
import json
with open('test.json') as file:
package_json = json.load(file)
IN = package_json['States']['Illinois']['county']
IL = package_json['States']['Indiana']['county']
for i in IN:
county = i['nameofcounty']
population = i['population']
for j in IL:
population_1 = j['population']
county_1 = j['nameofcounty']
if county_1 == county: #To check if they are the same thing
total_population = population_1+population #adds them
print(county, total_population) #Print
This method above is however not so efficient. I personally suggest trying:
import json
with open('test.json') as file:
package_json = json.load(file)
IN = package_json['States']['Illinois']['county']
IL = package_json['States']['Indiana']['county']
for i,j in zip(IN,IL): #zip() basically makes it combined
print(i['nameofcounty'],i['population']+j['population'])
Both have the same output, but the second one is more efficient in your case. For more info about zip() you can check the docs
I put data into a csv file (called "Essential Data_posts"). In my main, I extract a particular column from this file (called 'Post Texts') so that I can analyze the post texts for sentiment entity analysis using Google Cloud NLP. I then put this analysis in another csv file (called "SentimentAnalysis"). To do this, I put all of the information pertaining to sentiment entity analysis into an array (one for each piece of information).
The problem I am having is that when I execute my code, nothing shows up in SentimentAnalysis file, other than the headers, ex. "Representative Name". When I requested the lengths of all the arrays, I found out that each array had a length of 0, so they didn't have information being added to them.
I am using Ubuntu 21.04 and Google Cloud Natural Language. I am running this all in Terminal, not the Google Cloud Platform. I am also using Python3 and emacs text editor.
from google.cloud import language_v1
import pandas as pd
import csv
import os
#lists we are appending to
representativeName = []
entity = []
salienceScore = []
entitySentimentScore = []
entitySentimentMagnitude = []
metadataNames = []
metadataValues = []
mentionText = []
mentionType = []
def sentiment_entity(postTexts):
client = language_v1.LanguageServiceClient()
type_ = language_v1.Document.Type.PLAIN_TEXT
language = "en"
document = {"content": post_texts, "type": type_, "language": language}
encodingType = language_v1.EncodingType.UTF8
response = client.analyze_entity_sentiment(request = {'document': document, 'encoding type': encodingType})
#loop through entities returned from the API
for entity in response.entities:
representativeName.append(entity.name)
entity.append(language_v1.Entity.Type(entity.type_).name)
salienceScore.append(entity.salience)
entitySentimentScore.append(sentiment.score)
entitySentimentMagnitude.append(sentiment.magnitude)
#loop over metadata associated with entity
for metadata_name, metadata_value in entity.metadata.items():
metadataNames.append(metadata_name)
metadataValues.append(metadata_value)
#loop over the mentions of this entity in the input document
for mention in entity.mentions:
mentionText.append(mention.text.content)
mentionType.append(mention.type_)
#put the lists into the csv file (using pandas)
data = {
"Representative Name": representativeName,
"Entity": entity,
"Salience Score": salienceScore,
"Entity Sentiment Score": entitySentimentScore,
"Entity Sentiment Magnitude": entitySentimentMagnitude,
"Metadata Name": metadataNames,
"Metadata Value": metadataValues,
"Mention Text": mentionText,
"Mention Type": mentionType
}
df = pd.DataFrame(data)
df
df.to_csv("SentimentAnalysis.csv", encoding='utf-8', index=False)
def main():
import argparse
#read the csv file containing the post text we need to analyze
filename = open('Essential Data_posts.csv', 'r')
#create dictreader object
file = csv.DictReader(filename)
postTexts = []
#iterate over each column and append values to list
for col in file:
postTexts.append(col['Post Text'])
parser = arg.parse.ArgumentParser()
parser.add_argument("--postTexts", type=str, default=postTexts)
args = parser.parse_args()
sentiment_entity(args.postTexts)
I tried running your code and I encountered the following errors:
You did not use the passed parameter postTexts in sentiment_entity() thus this will error at document = {"content": post_texts, "type": type_, "language": language}.
A list cannot be passed to "content": post_texts, it should be string. See Document reference.
In variable request, 'encoding type' should be 'encoding_type'
Local variable entity should not not have the same name with entity = []. Python will try to append values in the local variable entity which is not a list.
Should be entity.sentiment.score and entity.sentiment.magnitude instead of sentiment.score and sentiment.magnitude
Loop for metadata and mention should be under loop for entity in response.entities:
I edited your code and fixed the errors mentioned above. In your main(), I included a step to convert the list postTexts to string so it can be used in your sentiment_entity() function. metadataNames and metadataValues are temporarily commented since I do not have an example that could populate these values.
from google.cloud import language_v1
import pandas as pd
import csv
import os
#lists we are appending to
representativeName = []
entity_arr = []
salienceScore = []
entitySentimentScore = []
entitySentimentMagnitude = []
metadataNames = []
metadataValues = []
mentionText = []
mentionType = []
def listToString(s):
""" Transform list to string"""
str1 = " "
return (str1.join(s))
def sentiment_entity(postTexts):
client = language_v1.LanguageServiceClient()
type_ = language_v1.Document.Type.PLAIN_TEXT
language = "en"
document = {"content": postTexts, "type_": type_, "language": language}
encodingType = language_v1.EncodingType.UTF8
response = client.analyze_entity_sentiment(request = {'document': document, 'encoding_type': encodingType})
#loop through entities returned from the API
for entity in response.entities:
representativeName.append(entity.name)
entity_arr.append(language_v1.Entity.Type(entity.type_).name)
salienceScore.append(entity.salience)
entitySentimentScore.append(entity.sentiment.score)
entitySentimentMagnitude.append(entity.sentiment.magnitude)
#loop over the mentions of this entity in the input document
for mention in entity.mentions:
mentionText.append(mention.text.content)
mentionType.append(mention.type_)
#loop over metadata associated with entity
for metadata_name, metadata_value in entity.metadata.items():
metadataNames.append(metadata_name)
metadataValues.append(metadata_value)
data = {
"Representative Name": representativeName,
"Entity": entity_arr,
"Salience Score": salienceScore,
"Entity Sentiment Score": entitySentimentScore,
"Entity Sentiment Magnitude": entitySentimentMagnitude,
#"Metadata Name": metadataNames,
#"Metadata Value": metadataValues,
"Mention Text": mentionText,
"Mention Type": mentionType
}
df = pd.DataFrame(data)
df.to_csv("SentimentAnalysis.csv", encoding='utf-8', index=False)
def main():
import argparse
#read the csv file containing the post text we need to analyze
filename = open('test.csv', 'r')
#create dictreader object
file = csv.DictReader(filename)
postTexts = []
#iterate over each column and append values to list
for col in file:
postTexts.append(col['Post Text'])
content = listToString(postTexts) #convert list to string
print(content)
sentiment_entity(content)
if __name__ == "__main__":
main()
test.csv:
col_1,Post Text
dummy,Grapes are good.
dummy,Bananas are bad.
When code is ran, I printed the converted list to string and SentimentAnalysis.csv is generated:
SentimentAnalysis.csv:
Representative Name,Entity,Salience Score,Entity Sentiment Score,Entity Sentiment Magnitude,Mention Text,Mention Type
Grapes,OTHER,0.8335162997245789,0.800000011920929,0.800000011920929,Grapes,2
Bananas,OTHER,0.16648370027542114,-0.699999988079071,0.699999988079071,Bananas,2
I am trying to convert a CSV file to JSON but there is a header in my csv that is empty. Is there a way to name it when outputting it to JSON?
Example data
"" Calories Fat Sodium
Bread 100 10 23
I got this code from geeksforgeeks
import csv
import json
# Function to convert a CSV to JSON
# Takes the file paths as arguments
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'No' to
# be the primary key
key = rows['']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
# Driver Code
# Decide the two file paths according to your
# computer system
csvFilePath = r'Names.csv'
jsonFilePath = r'Names.json'
# Call the make_json function
make_json(csvFilePath, jsonFilePath)
I did this and it gets the first row, but i'm not sure how to rename it when i output it to JSON.
It appears as "":"Bread" in the JSON file.
key = rows['']
Thanks in advance if anyone can help!
Edit: Expected output
{
"Food": "Bread",
"Calories": "45",
"Fat (g)": "0",
"Carb. (g)": "11",
"Fiber (g)": "0",
"Protein": "0",
"Sodium": "10"
}
I am trying to write line by line the JSON output from my Python request. I already checked some similar issue on StackOverflow in the question: write to file line by line python, without success.
Here is the code:
myfile = open ("data.txt", "a")
for item in pretty_json["geonames"]:
print (item["geonameId"],item["name"])
myfile.write ("%s\n" % item["geonameId"] + "https://www.geonames.org/" + item["name"])
myfile.close()
Here the output from my pretty_json["geonames"]
{
"adminCode1": "FR",
"lng": "7.2612",
"geonameId": 2661847,
"toponymName": "Aeschlenberg",
"countryId": "2658434",
"fcl": "P",
"population": 0,
"countryCode": "CH",
"name": "Aeschlenberg",
"fclName": "city, village,...",
"adminCodes1": {
"ISO3166_2": "FR"
},
"countryName": "Switzerland",
"fcodeName": "populated place",
"adminName1": "Fribourg",
"lat": "46.78663",
"fcode": "PPL"
}
Then, as output saved on my data.txt, I'm having :
11048419
https://www.geonames.org/Aïre2661847
https://www.geonames.org/Aeschlenberg2661880
https://www.geonames.org/Aarberg6295535
The expected result should be something like:
Aïre , https://www.geonames.org/11048419
Aeschlenberg , https://www.geonames.org/2661847
Aarberg , https://www.geonames.org/2661880
Writing the output in CSV could be a solution?
Regards.
Using the csv module.
Ex:
import csv
with open("data.txt", "a") as myfile:
writer = csv.writer(myfile) #Create Writer Object
for item in pretty_json["geonames"]: #Iterate list
writer.writerow([item["name"], "https://www.geonames.org/{}".format(item["geonameId"])]) #Write row.
If I understand correctly, you want the same screen output to your file. That's easy. If you are on python 3 just add to your print function:
print (item["geonameId"],item["name"], file=myfile)
Just compose a proper printing format for the needed items:
...
for item in pretty_json["geonames"]:
print("{}, https://www.geonames.org/{}".format(item["name"], item["geonameId"]))
Sample output:
Aeschlenberg, https://www.geonames.org/2661847
I have an excel file in which data is saved in csv format in such a way.This data is present in the excel file as shown below,under column A (The CSV File is generated by LabView Software code which i have written to generate data).I have also attached an image of the csv file for reference at the end of my question.
RPM,Load Current,Battery Output,Power Capacity
1200,30,12,37
1600,88,18,55
I want to create a Json file in such format
{
"power_capacity_data" :
{
"rpm" : ["1200","1600"],
"load_curr" : ["30","88"],
"batt_output" : ["12","18"],
"power_cap" : ["37","55"]
}
}
This is my code
import csv
import json
def main():
#created a dictionary so that i can append data to it afterwards
power_data = {"rpm":[],"load_curr":[],"batt_output":[],"power_cap":[]}
with open('power1.lvm') as f:
reader = csv.reader(f)
#trying to append the data of column "RPM" to dictionary
rowcount = 0
for row in reader:
if rowcount == 0:
#trying to skip the first row
rowcount = rowcount + 1
else:
power_data['rpm'].append(row[0])
print(row)
json_report = {}
json_report['pwr_capacity_data'] = power_data
with open('LVMJSON', "w") as f1:
f1.write(json.dumps(json_report, sort_keys=False, indent=4, separators=(',', ': '),encoding="utf-8",ensure_ascii=False))
f1.close()
if __name__ == "__main__":
main()
The output json file that i am getting is this:(please ignore the print(row) statement in my code)
{
"pwr_capacity_data":
{
"load_curr": [],
"rpm": [
"1200,30,12.62,37.88",
"1600,88,18.62,55.88"
],
"batt_output": [],
"power_cap": []
}
}
The whole row is getting saved in the list,but I just want the values under the column RPM to be saved .Can someone help me out with what I may be doing wrong.Thanks in advance.I have attached an image of csv file to just in case it helps
You could use Python's defaultdict to make it a bit easier. Also a dictionary to map all your header values.
from collections import defaultdict
import csv
import json
power_data = defaultdict(list)
header_mappings = {
'RPM' : 'rpm',
'Load Current' : 'load_curr',
'Battery Output' : 'batt_output',
'Power Capacity' : 'power_cap'}
with open('power1.lvm', newline='') as f_input:
csv_input = csv.DictReader(f_input)
for row in csv_input:
for key, value in row.items():
power_data[header_mappings[key]].append(value)
with open('LVMJSON.json', 'w') as f_output:
json.dump({'power_capacity_data' : power_data}, f_output, indent=2)
Giving you an output JSON file looking like:
{
"power_capacity_data": {
"batt_output": [
"12",
"18"
],
"power_cap": [
"37",
"55"
],
"load_curr": [
"30",
"88"
],
"rpm": [
"1200",
"1600"
]
}
}