#write data in .csv file
def data_save_csv(type,data,id,name,header,since = None):
#get the date when storage data
date_storage()
#create the data storage directory
csv_parent_directory = os.path.join("dataset","csv",type,glovar.date)
#write data in .csv
if type == "group_members":
csv_file_prefix = "gm"
elif type == "group_feed":
csv_file_prefix = "gf"
elif type == "public_figure_posts":
csv_file_prefix = "pfp"
elif "user_" in type:
# create the data storage directory
csv_parent_directory = os.path.join("dataset", "csv", "user", type, glovar.date)
if type == "user_friends":
csv_file_prefix = "uf"
elif type == "user_likes":
csv_file_prefix = "ul"
elif type == "user_feed":
csv_file_prefix = "uf"
# create (mkdir) the csv_parent_directory
directory_create(csv_parent_directory)
if since:
csv_file_name = csv_file_prefix + "_" + since.strftime("%Y%m%d-%H%M%S") + "_" + time_storage() + id + "_" +name + ".csv"
else:
csv_file_name = csv_file_prefix + "_" + time_storage() + "_" + id + "_" +name + ".csv"
csv_file_directory = os.path.join(csv_parent_directory,csv_file_name)
if type == "user_feed":
feed = data
for item in feed:
# parse the feed data from group_download.py
print("id=" + item['id'] + ",")
print("permalink_url=" + item['permalink_url'] + ",")
print("created_time=" + item['created_time'] + ",")
print("updated_time=" + item['updated_time'] + ",")
print("name=" + item['from']['name'] + ",")
print("from_id=" + item['from']['id'] + ",")
print("message=" + item['message'] + ",")
print("link=" + item['link'] + ",")
print("likes_total_count=" + str(item['likes']['summary']['total_count']) + ",")
print("comments_total_count=" + str(item['comments']['summary']['total_count']) + ",")
with open(csv_file_directory,'w',newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile,delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
#csv header
writer.writerow(header)
#if data is group members(group_manage.py)
if type == "group_members" or "user_friends" or "user_likes":
row = []
for i in range(len(data)):
for k in data[i].keys():
if isinstance(data[i][k],bool):
data[i][k] = str(data[i][k])
row.append(data[i][k])
writer.writerow(row)
row = []
#if data is group feed(group_download.py)
elif type == "group_feed" or "public_figure_posts" or "user_feed":
feed = data
for item in feed:
#parse the feed data from group_download.py
row = [item['id'],item['permalink_url'],item['created_time'],item['updated_time'],item['from']['name'],item['from']['id'],item['message'],item['link'],item['likes']['summary']['total_count'],item['comments']['summary']['total_count']]
writer.writerow(row)
csvfile.close()
Write a python program to write data in .csv file, when the type is “user_feed”,
I print the items of the data:
id=110286969468305_112459422584393,
permalink_url=https://www.facebook.com/110286969468305/posts/112459422584393,
created_time=2016-12-18T12:44:52+0000,
updated_time=2016-12-18T12:47:10+0000,
name=Dewi Nurfitri Oktaviani,
from_id=10202749157833181,
message=Hi, nice to meet you,
link=,
likes_total_count=0,
comments_total_count=1,
They are right, but when write the data in the .csv file, I found the sequence of the data does not match the head order, the head is :
header = ["POST ID", "Permalink", "Create time", "Updated time", "Author", "Author ID", "Message", "Link", "Likes", "Comments"]
and you can see that in this method "data_save_csv",
elif type == "group_feed" or "public_figure_posts" or "user_feed":
feed = data
for item in feed:
#parse the feed data from group_download.py
row = [item['id'],item['permalink_url'],item['created_time'],item['updated_time'],item['from']['name'],item['from']['id'],item['message'],item['link'],item['likes']['summary']['total_count'],item['comments']['summary']['total_count']]
writer.writerow(row)
You can see that the sequence of the data item is the same with that in the head, but when I open the csv file, I found the sequence of the head item is right, but the sequence of the data item is disorder, no "id" data, and the other items order is not in the right order.
could you please help me?
Problem 1: This line
if type == "group_members" or "user_friends" or "user_likes":
isn't doing what you want. The expression always evaluates to True. Possible replacements:
if type == "group_members" or type == "user_friends" or type == "user_likes":
if type in ("group_members", "user_friends", "user_likes", ):
if type in {"group_members", "user_friends", "user_likes", }:
and this line
elif type == "group_feed" or "public_figure_posts" or "user_feed":
has the same problem. You should fix both lines and try again.
Related
I have a text file, 'student.txt'. Some keys have multiple values. I only want data that is tied to the name, and the sibling & hobby values below that name.
'student.txt'
ignore me
name-> Alice
name-> Sam
sibling-> Kate,
unwanted
sibling-> Luke,
hobby_1-> football
hobby_2-> games
name-> Ramsay
hobby_1-> dance
unwanted data
hobby_2-> swimming
hobby_3-> jogging
ignore data
Code I've done:
file = open("student.txt", "r")
with open("student.csv", "w") as writer:
main_dict = {}
student_dict = {"Siblings": "N/A", "Hobbies": "N/A"}
sibling_list = []
hobby_list = []
flag = True
writer.write ('name,siblings,hobbies\n')
header = 'Name,Siblings,Hobbies'.split(',')
sib_str = ''
hob_str =''
for eachline in file:
try:
key, value = eachline.split("-> ")
value = value.strip(",\n")
if flag:
if key == "name":
print (key,value)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
#print (main_dict)
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
sibling_list = []
hobby_list = []
name = value
main_dict[name] = student_dict.copy()
main_dict[name]["Name"] = name
elif key == "sibling":
sibling_list.append(value)
sib_str= ' '.join(sibling_list).replace(' ', '\n')
elif key.startswith("hobby"):
hobby_list.append(value)
hob_str = ' '.join(hobby_list)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
if 'name' in eachline:
if 'name' in eachline:
flag = True
else:
flag = False
except:
pass
for eachname in main_dict.keys():
for eachkey in header:
writer.write(str(main_dict[eachname][eachkey]))
writer.write (',')
if 'Hobbies' in eachkey:
writer.write ('\n')
CSV Output from Code above:
Expected CSV Output:
P.S: I can't seem to figure out how to not forgo the try/pass. As some lines (without '->') are unwanted, and I can't use the eachline.split("-> "). Would appreciate help on this too.
Thanks so much!
The code below gives the csv file which you can import in your Excel and it will be in exact format you are expecting.
You can use something like
if "->" not in line:
continue
To skip lines that don't contain "->" values, see in the code below:
import csv
file = open("student.txt", "r")
students = {}
name = ""
for line in file:
if "->" not in line:
continue
line = line.strip(",\n")
line = line.replace(" ", "")
key, value = line.split("->")
if key == "name":
name = value
students[name] = {}
students[name]["siblings"] = []
students[name]["hobbies"] = []
else:
if "sibling" in key:
students[name]["siblings"].append(value)
elif "hobby" in key:
students[name]["hobbies"].append(value)
#print(students)
csvlines = []
for student in students:
name = student
hobbies = students[name]["hobbies"]
siblings = students[name]["siblings"]
maxlength = 0
if len(hobbies) > len(siblings) :
maxlength = len(hobbies)
else:
maxlength = len(siblings)
if maxlength == 0:
csvlines.append([name, "N/A", "N/A"])
continue
for i in range(maxlength):
if i < len(siblings):
siblingvalue = siblings[i]
elif i == len(siblings):
siblingvalue = "N/A"
else:
siblingvalue = ""
if i < len(hobbies):
hobbyvalue = hobbies[i]
elif i == len(siblings):
hobbyvalue = "N/A"
else:
hobbyvalue = ""
if i == 0:
csvlines.append([name, siblingvalue, hobbyvalue])
else:
csvlines.append(["", siblingvalue, hobbyvalue])
print(csvlines)
fields = ["name", "siblings", "hobbies"]
with open("students.csv", 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the fields
csvwriter.writerow(fields)
# writing the data rows
csvwriter.writerows(csvlines)
I'm struggling in the last step of building a crawler. If I try to crawl a couple of files with the same structure it works perfect. But if I try to grab ones with an older schema (missing columns) I get ex.
ParserError: Expected 42 fields in line 273, saw 47
If I change the engine to python-fwf it works. But I can't use index_col = "Div" any more. But this one is needed to deal with na rows without producing errors.
def dataImport(df_raw):
header_appended = False
concat_i = []
progress = len(linkGenerator())
count = 1
debugging_var = 1
print("0 of " + str(progress))
for i in list(linkGenerator()):
if debbuging_mode == True:
df_debugger = pd.read_csv(i, sep = ",", header = 0, engine = "python", encoding = "ISO-8859-1", index_col = "Div")
df_debugger.to_csv(debbuging_path + str(debugging_var) + "_of_" + str(progress) + ".csv")
debugging_var = debugging_var + 1
if header_appended != True:
print("Appending : " + str(i))
df_raw = pd.read_csv(i, sep = ",", engine = "python", encoding = "ISO-8859-1", index_col = False)
header_appended = True
print("Appended.")
time.sleep(2)
else:
print("Appending : " + str(i))
df_internal = pd.read_csv(i, sep = ",", engine = "python", encoding = "ISO-8859-1", index_col = False)
concat_i.append(df_internal)
print("Appended.")
time.sleep(2)
print(str(count) + " of " + str(progress))
count = count + 1
df_raw = pd.concat(concat_i, ignore_index = True)
df_raw.dropna(subset = ["Div"], inplace = True)
return df_raw
I tried useing names = range(100) or stuff like this: import csv with different number of columns per row using Pandas
In my opinion df_raw = pd.concat(concat_i, ignore_index = True) is the problem.
Glad to receive help.
Cheers! :)
I am trying to automate a functions based on what input is received but I'm getting an error when I try to pass the input as and arg for a function. Heres an example of what Im trying to do
var ='hello world'
def example(data):
#function code
example(var)
thats a basic usage of what Im doing and its returning an error like
var is not defined
here is my actual code
import AriaAudioConfig as Ariaconfig
import AriaMathModule as AriaMath
import AriaLocationModule as AriaLocation
import AriaNLPModule as AriaNLP
from inspect import getmembers, isfunction
import re
import pandas as pd
import csv
from typing import Awaitable, Callable, TypeVar
location = ['geolocatecity','citydiff','locate', 'location', 'where is', 'far', 'distance']
math = ['calculate', 'add', 'subtract', 'multiply', 'divide', 'addition', 'subtraction', 'multiplication', 'division', 'square-root', 'power', 'squared', 'minus']
audio = ['volume','speak', 'sound']
nlp = ['translate', 'translation', 'language', 'english', 'spanish', 'french']
locdict = {'geolocatecity':'blabla','citydiff':'blabla'}
state = 0
city2 = 0
file = pd.read_csv('geolocations.csv')
def dataProcess(data):
global state
global city2
datasearch = data.split()
argsearch = datasearch
datalength = len(datasearch)
for i in range(datalength):
if datasearch[i] in location:
data = datasearch[i]
datacom = typeremoval(functiongrep(AriaLocation))
datacom = str(datacom).split()
datalen = len(datacom)
with open('geolocations.csv', 'rt') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
for field in row[0]:
for i in range(datalength):
if argsearch[i] == row[0]:
try:
if city in locals():
city2 = argsearch[i]
except:
city = argsearch[i]
if argsearch[i] == row[1]:
state = argsearch[i]
if argsearch[i] == row[2]:
country = argsearch[i]
f.close()
for i in range(datalen):
if str(data) in str(datacom[i]):
activefunction = datacom[i]
if state != 0:
eval('AriaLocation.' + activefunction +'(' + city + ',' + state + ',' + country + ')')
elif city2 != 0:
eval('AriaLocation.' + activefunction + '(' + city + ',' + city2 + ')')
else:
print('uh-oh something went wrong')
elif datasearch[i] in math:
data = datasearch[i]
datacom = typeremoval(functiongrep(AriaMath))
print(data)
if data in datacom:
print('found')
elif datasearch[i] in audio:
data = datasearch[i]
datacom = typeremoval(functiongrep(Ariaconfig))
elif datasearch[i] in nlp:
data = datasearch[i]
datacom = typeremoval(functiongrep(AriaNLP))
#dataProcess('Aria how far am I from Arizona')
def functiongrep(function):
string = ''
functions_list = [o for o in getmembers(function) if isfunction(o[1])]
flen = len(functions_list)
for i in range(flen):
head, sep, tail = str(functions_list[i]).partition('<')
string = string + head
return string
def typeremoval(function):
func = str(function)
func = str(''.join(func))
func = re.sub("[',()]", '', func)
return func
dataProcess('locate Scottsdale Arizona USA')
I want dataProcess() to activate different commands based on what is given as the input.
Exception has occurred: NameError
name 'Scottsdale' is not defined
File "/Users/timyc1/Desktop/DeadIdeas/smartroom/Seavernet/Aria/AriaProcessingModule.py", line 58, in dataProcess
eval('AriaLocation.' + activefunction +'(' + city + ',' + state + ',' + country + ')')
File "/Users/timyc1/Desktop/DeadIdeas/smartroom/Seavernet/Aria/AriaProcessingModule.py", line 95, in <module>
dataProcess('locate Scottsdale Arizona USA')
Don't use eval for this. eval is almost never the solution.
if state != 0:
getattr(AriaLocation, activefunction)(city, state, country)
elif city2 != 0:
getattr(AriaLocation, activefunction)(city, cit2)
else:
print('uh-oh something went wrong')
I have more than 200 columns in csv file and want to define Class in django model.
But I really can't find handful instructions to create django model from csv files (first row indicates columns)
Is there any way to define django model from csv file or pandas dataframe?
I generate texts to feed on model based on below codes
import numpy as np
for col in speech_data.columns:
arr = speech_data[col]
size = 0
value = ""
for ar in arr:
if len(str(ar)) > size:
size = len(str(ar))
value = ar
print col, size, type(speech_data.iloc[0][col]), ar
if type(speech_data.iloc[0][col]) == np.int64:
print col + " = " + "models.IntegerField()"
elif type(speech_data.iloc[0][col]) == str:
print col + " = " + "models.CharField(max_length="+ str(size+3) +")"
elif type(speech_data.iloc[0][col]) == np.float64:
print col + " = " + "models.FloatField(null=True, blank=True, default=None)"
elif type(speech_data.iloc[0][col]) == np.bool_:
print col + " = " + "models.BooleanField()"
I'm relatively new to python and was wondering if I could get some assistance in parsing data so that it is easier to analyze.
My data is in the following form (each is an entire line):
20160930-07:06:54.481737|I|MTP_4|CL:BF K7-M7-N7 Restrict for maxAggressive: -4.237195
20160930-07:06:54.481738|I|MTP_4|CL:BF K7-M7-N7 BidPrice: -5.0 mktBestBid: -5.0 bidTheo: -4.096774 bidSeedEdge: 0.195028 bidUnseedEdge: CL:BF K7-M7-N7 = 0.14042 Min Bid: -6.0 Max Ticks Offset: 1 Max Aggressive Ticks: 1
This is my code so far
# Output file
output_filename = os.path.normpath("Mypath/testList.log")
# Overwrites the file
with open(output_filename, "w") as out_file:
out_file.write("")
# Open output file
with open(output_filename, "a") as out_file:
# Open input file in 'read' mode
with open("mypath/tradedata.log", "r") as in_file:
# Loop over each log line, Grabs lines with necessary data
for line in islice(in_file, 177004, 8349710):
out_file.write(line)
Would it be easiest to just go through and do it by keywords like; bidUnseedEdge, mktBesdBid, etc. ?
infilename = "path/data.log"
outfilename = "path/OutputData.csv"
with open(infilename, 'r') as infile,\
open(outfilename, "w") as outfile:
lineCounter = 0
for line in infile:
lineCounter += 1
if lineCounter % 1000000 == 0:
print lineCounter
data = line.split("|")
if len(data) < 4:
continue
bidsplit = data[3].split("bidTheo:")
namebid = data[3].split("BidPrice:")
if len(bidsplit) == 2:
bid = float(bidsplit[1].strip().split()[0])
bidname = namebid[0].strip().split(",")[0]
#print "bidTheo," + data[0] + "," + str(bid)
outfile.write("bidTheo," + data[0] + "," + bidname + "," + str(bid) + "\n")
offersplit = data[3].split("offerTheo:")
nameoffer = data[3].split("AskPrice:")
if len(offersplit) == 2:
offer = float(offersplit[1].strip().split()[0])
offername = nameoffer[0].strip().split(",")[0]
#print "offerTheo," + data[0] + "," + str(offer)
outfile.write("offerTheo," + data[0] + "," + offername + "," + str(offer) + "\n")
print "Done"