I have a text file, 'student.txt'. Some keys have multiple values. I only want data that is tied to the name, and the sibling & hobby values below that name.
'student.txt'
ignore me
name-> Alice
name-> Sam
sibling-> Kate,
unwanted
sibling-> Luke,
hobby_1-> football
hobby_2-> games
name-> Ramsay
hobby_1-> dance
unwanted data
hobby_2-> swimming
hobby_3-> jogging
ignore data
Code I've done:
file = open("student.txt", "r")
with open("student.csv", "w") as writer:
main_dict = {}
student_dict = {"Siblings": "N/A", "Hobbies": "N/A"}
sibling_list = []
hobby_list = []
flag = True
writer.write ('name,siblings,hobbies\n')
header = 'Name,Siblings,Hobbies'.split(',')
sib_str = ''
hob_str =''
for eachline in file:
try:
key, value = eachline.split("-> ")
value = value.strip(",\n")
if flag:
if key == "name":
print (key,value)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
#print (main_dict)
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
sibling_list = []
hobby_list = []
name = value
main_dict[name] = student_dict.copy()
main_dict[name]["Name"] = name
elif key == "sibling":
sibling_list.append(value)
sib_str= ' '.join(sibling_list).replace(' ', '\n')
elif key.startswith("hobby"):
hobby_list.append(value)
hob_str = ' '.join(hobby_list)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
if 'name' in eachline:
if 'name' in eachline:
flag = True
else:
flag = False
except:
pass
for eachname in main_dict.keys():
for eachkey in header:
writer.write(str(main_dict[eachname][eachkey]))
writer.write (',')
if 'Hobbies' in eachkey:
writer.write ('\n')
CSV Output from Code above:
Expected CSV Output:
P.S: I can't seem to figure out how to not forgo the try/pass. As some lines (without '->') are unwanted, and I can't use the eachline.split("-> "). Would appreciate help on this too.
Thanks so much!
The code below gives the csv file which you can import in your Excel and it will be in exact format you are expecting.
You can use something like
if "->" not in line:
continue
To skip lines that don't contain "->" values, see in the code below:
import csv
file = open("student.txt", "r")
students = {}
name = ""
for line in file:
if "->" not in line:
continue
line = line.strip(",\n")
line = line.replace(" ", "")
key, value = line.split("->")
if key == "name":
name = value
students[name] = {}
students[name]["siblings"] = []
students[name]["hobbies"] = []
else:
if "sibling" in key:
students[name]["siblings"].append(value)
elif "hobby" in key:
students[name]["hobbies"].append(value)
#print(students)
csvlines = []
for student in students:
name = student
hobbies = students[name]["hobbies"]
siblings = students[name]["siblings"]
maxlength = 0
if len(hobbies) > len(siblings) :
maxlength = len(hobbies)
else:
maxlength = len(siblings)
if maxlength == 0:
csvlines.append([name, "N/A", "N/A"])
continue
for i in range(maxlength):
if i < len(siblings):
siblingvalue = siblings[i]
elif i == len(siblings):
siblingvalue = "N/A"
else:
siblingvalue = ""
if i < len(hobbies):
hobbyvalue = hobbies[i]
elif i == len(siblings):
hobbyvalue = "N/A"
else:
hobbyvalue = ""
if i == 0:
csvlines.append([name, siblingvalue, hobbyvalue])
else:
csvlines.append(["", siblingvalue, hobbyvalue])
print(csvlines)
fields = ["name", "siblings", "hobbies"]
with open("students.csv", 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the fields
csvwriter.writerow(fields)
# writing the data rows
csvwriter.writerows(csvlines)
all_filenames = [i for i in glob.glob("/home/greenvulcano/Scrivania/File_csv/*.csv")]
dest_file = "/home/greenvulcano/Scrivania/Unione_files.csv"
tag = ["inclination","deformation","temperature"]
content = []
for file_name in sorted(all_filenames):
with open(file_name, "r") as f:
lines = f.readlines()
for i, l in enumerate(lines):
if len(content) == 0:
content.append(l)
elif i != 0:
content.append(l)
with open(dest_file, "w") as f:
f.writelines(content)
seleziona = input("Inserisci le colonne che vuoi visualizzare: ") with open(dest_file, "r") as f:
titoli = f.readline().split(",")
indici = []
for titolo in titoli:
titolo_split = titolo.split('*')
if seleziona in titolo_split:
indice = titoli.index(titolo)
indici.append(indice)
righe = f.readlines()
lista = []
for riga in righe:
rigasplit = riga.split(",")
for i in indici:
lista.append(rigasplit[i])
I guess you can use:
test.csv
id,name,dob
1,pedro,2000-01-01
2,nuno,1999-01-01
import csv
csv_file = "test.csv"
seleziona = input("Inserisci le colonne che vuoi visualizzare: ").strip()
with open(csv_file) as f:
reader = csv.DictReader(f, delimiter=',')
for row in reader:
print(row[seleziona])
pedro
nuno
Demo
I am starting in Python. In the current code, A1 is "returned" infinitely, as if there was no sum of the variable célula. What can I do to get a "return" from A1 and A2?
import xlwings as xw
wb = xw.Book(r'C:\Users\Guilh\bin\teste\Contabilização Automática\myproject\myproject.xlsx')
sht = wb.sheets[0]
def buscar():
linha = '1'
célula = 'A' + linha
valor_célula = sht.range(célula).value
while valor_célula != None:
print('A1')
linha = int(linha) + 1
célula = 'A' + str(linha)
else:
print('A2')
You have neglected to update valor_célula and thus are stuck in the while loop.
I believe you want to update your code to be:
def buscar():
linha = '1'
célula = 'A' + linha
valor_célula = sht.range(célula).value
while valor_célula != None:
print('A1')
linha = int(linha) + 1
célula = 'A' + str(linha)
valor_célula = sht.range(célula).value # Update Value
else:
print('A2')
Hello all so i've been tasked to count lines and paragraphs. Counting every line is obviously easy but im stuck on counting the paragraphs. If a paragraph has no character it will give back the number zero and for every paragraph is an increment higher. For example an input file is: Input and an Output should come out Output
so my code is:
def insert_line_para_nums(infile, outfile):
f = open(infile, 'r')
out = open(outfile, 'w')
linecount = 0
for i in f:
paragraphcount = 0
if '\n' in i:
linecount += 1
if len(i) < 2: paragraphcount *= 0
elif len(i) > 2: paragraphcount = paragraphcount + 1
out.write('%-4d %4d %s' % (paragraphcount, linecount, i))
f.close()
out.close()
def insert_line_para_nums(infile, outfile):
f = open(infile, 'r')
out = open(outfile, 'w')
linecount = 0
paragraphcount = 0
empty = True
for i in f:
if '\n' in i:
linecount += 1
if len(i) < 2:
empty = True
elif len(i) > 2 and empty is True:
paragraphcount = paragraphcount + 1
empty = False
if empty is True:
paragraphnumber = 0
else:
paragraphnumber = paragraphcount
out.write('%-4d %4d %s' % (paragraphnumber, linecount, i))
f.close()
out.close()
This is one way to do it, and not the prettiest.
import re
f = open('a.txt', 'r')
paragraph = 0
lines = f.readlines()
for idx, line in enumerate(lines):
if not line == '\n':
m = re.search(r'\w', line)
str = m.group(0)
try:
# if the line is a newline, and the previous line has a str in it, then
# count it as a paragraph.
if line == '\n' and str in lines[idx-1]:
paragraph +=1
except:
pass
if lines[-1] != '\n': # if the last line is not a new line, count a paragraph.
paragraph +=1
print paragraph
Very new to this, but so far have had a little help in making this. It keeps giving syntax error on line 131.. and many other errors too. Im not really sure on what it is and the person who helped me create this is gone leaving me by my noob self to try and fix this.There is more to the script in the middle after -------- which decompiles the direct files but i cant post them. But i only need to figure out whats wrong with this "syntax error" so, Whats worng with this script?
#o-----------------------------------------------------------------------------o
#(-----------------------------------------------------------------------------)
#|Compatible w/ Python 3.2 |
#########################
# Configuration section #
#########################
toptablefolder = 'C:/Games/TOP/kop/scripts/table/'
toptableversion = 6
toptableencoding = 'gbk'
####end of config####
import struct
from types import *
#---------------------------------------------
class top_tsv:
name = ''
file_name = ''
list = []
version = ''
frombin_map = [('',{'v':0})]
def __init__(self, file_name=0, version=0):
if file_name == 0:
self.file_name = toptablefolder+self.__class__.__name__
else:
self.file_name = file_name
if (version == 0):
self.version = toptableversion
else:
self.version = version
self.list = []
self.frombin_map = [('',{'v':0})]
def unencrypt(self):
item = []
#Load data
file = open(self.file_name+'.bin', 'rb')
data = bytearray(file.read())
file.close()
i = 0
array_size = int(list(struct.unpack_from('<i',data[i:i+4]))[0])
rdata = data[i:i+4]
data = data[i+4:len(data)]
m = 0
k = 0
l = len(data)//array_size
#2.2 encryption
if ((self.version >= 5) and ((self.__class__.__name__ != 'magicsingleinfo') and (self.__class__.__name__ != 'magicgroupinfo') and (self.__class__.__name__ != 'resourceinfo') and (self.__class__.__name__ != 'terraininfo'))):
newbytes = bytes.fromhex('98 9D 9F 68 E0 66 AB 70 E9 D1 E0 E0 CB DD D1 CB D5 CF')
while(k<l):
item = data[i:i+array_size]
i = i+array_size
if ((self.version >= 5) and ((self.__class__.__name__ != 'magicsingleinfo') and (self.__class__.__name__ != 'magicgroupinfo') and (self.__class__.__name__ != 'resourceinfo') and (self.__class__.__name__ != 'terraininfo'))):
for m,c in enumerate(item):
j = m % len(newbytes)
item[m] = ((item[m] - newbytes[j]) + 256) % 256
rdata = rdata + item
k = k + 1
m = 0
file = open(self.file_name+'-un.bin', 'wb')
file.write(rdata)
file.close()
def load_bin_data(self):
array = []
item = []
addresses = []
#Load structure (calculate size)
struct_type_map={
"char":"c",
"byte":"b",
"ubyte":"B",
"_bool":"?",
"short":"h",
"ushort":"H",
"int":"i",
"uint":"I",
"long":"l",
"ulong":"L",
"quad":"q",
"uquad":"Q",
"float":"f",
"double":"d",
"str":"s",
"color":"B",
"rcolor":"B",
}
struct_vars = {'t':'','s':1,'l':1,'stp':1,'f':-1,'v':-1,'lpad':0,'rpad':0,'sa':-1,'sap':-1,'ea':-1,'eap':-1,'st':'','lm':0,'sm':0,'smb':0,'smea':0,'sv':0,'func':0}
#initialize addresses
struct_init_address = 0
struct_limit_address = 0
struct_marked_addresses = [0,0,0,0,0,0]
struct_func_indexes = []
for i, v in enumerate(list(zip(*self.frombin_map))[1]):
struct_item = struct_vars.copy()
struct_item.update(v)
if struct_item['smb']>=1:
struct_marked_addresses[struct_item['smb']] = struct_init_address
if struct_item['lm']>=1:
struct_init_address = struct_marked_addresses[struct_item['lm']]
if struct_item['sm']>=1:
struct_marked_addresses[struct_item['sm']] = struct_init_address
if (type(struct_item['func']) == FunctionType):
struct_func_indexes.append(i)
elif (struct_item['v'] == -1):
if type(struct_item['t']) == tuple:
struct_item['s'] = len(struct_item['t'])
struct_item['st'] = []
for j,t in enumerate(struct_item['t']):
struct_item['st'].append(struct_type_map[struct_item['t'][j]])
struct_item['st'] = tuple(struct_item['st'])
struct_item['s'] = len(struct_item['st'])
else:
struct_item['st'] = struct_type_map[struct_item['t']]
if ((struct_item['t'] == 'color') or (struct_item['t'] == 'rcolor')):
struct_item['s'] = 3
struct_item['sa'] = struct_init_address
struct_item['sap'] = struct_item['sa'] + struct_item['lpad']
struct_item['ea'] = struct_item['sap']
if type(struct_item['t']) == tuple:
for j,t in enumerate(struct_item['t']):
struct_item['ea'] = struct_item['ea'] + struct.calcsize(struct_item['st'][j]) * ((struct_item['stp'] * struct_item['l'])-(struct_item['stp']-1))
else:
struct_item['ea'] = struct_item['ea'] + struct.calcsize(struct_item['st']) * ((struct_item['s'] * struct_item['stp'] * struct_item['l'])-(struct_item['stp']-1))
if struct_item['smea']>=1:
struct_marked_addresses[struct_item['smea']] = struct_item['ea']
struct_item['eap'] = struct_item['ea'] + struct_item['rpad']
struct_init_address = struct_item['eap']
if (struct_init_address > struct_limit_address):
struct_limit_address = struct_init_address
#print(struct_item)
self.frombin_map[i] = (self.frombin_map[i][0],struct_item)
struct_size = struct_limit_address
#Load data
file = open(self.file_name+'.bin', 'rb')
data = bytearray(file.read())
file.close()
i = 0
k = 0
array_size = int(list(struct.unpack_from('<i',data[i:i+4]))[0])
if array_size != struct_size:
print(self.file_name+'.bin: Actual array size ('+str(array_size)+') doesn''t match structure size ('+str(struct_size)+')')
raise 'Size error.'
data = data[i+4:len(data)]
m = 0
k = 0
l = len(data)//struct_size
#2.2 encryption
if ((self.version >= 5) and ((self.__class__.__name__ != 'magicsingleinfo') and (self.__class__.__name__ != 'magicgroupinfo') and (self.__class__.__name__ != 'resourceinfo') and (self.__class__.__name__ != 'terraininfo'))):
newbytes = bytes.fromhex('98 9D 9F 68 E0 66 AB 70 E9 D1 E0 E0 CB DD D1 CB D5 CF')
while(k<l):
item = data[i:i+struct_size]
i = i+struct_size
if ((self.version >= 5) and ((self.__class__.__name__ != 'magicsingleinfo') and (self.__class__.__name__ != 'magicgroupinfo') and (self.__class__.__name__ != 'resourceinfo') and (self.__class__.__name__ != 'terraininfo'))):
for m,c in enumerate(item):
j = m % len(newbytes)
item[m] = ((item[m] - newbytes[j]) + 256) % 256
array.append(item)
k = k + 1
m = 0
#Associate the data with the structure
self.list = []
self.list.append(list(zip(*self.frombin_map))[0])
for y,rawrow in enumerate(array):
row = []
for x,cell_struct in enumerate(list(zip(*self.frombin_map))[1]):
if type(cell_struct['func']) == FunctionType:
cell = []
cell.append('0')
row.append(self.transformtostr(cell,**cell_struct))
else:
cell = []
if (cell_struct['v'] == -1):
i = cell_struct['sap']
for j in range(cell_struct['l']):
processed_data=list(struct.unpack_from('<'+str((cell_struct['s']//len(cell_struct['st']))*cell_struct['stp'])+"".join(cell_struct['st']),rawrow,i))[::cell_struct['stp']]
#if (x == 4) and (y == 70):
# #print(cell_struct['s'])
cell.append(processed_data)
i=i+cell_struct['s']*struct.calcsize("".join(cell_struct['st']))*cell_struct['stp'] #sizeof here
if (cell_struct['t'] == 'rcolor'):
cell[0].reverse()
else:
cell.append(cell_struct['v'])
#if y == 70:
# print(cell) #'s':0x02,'l':0x08
row.append(self.transformtostr(cell,**cell_struct))
self.list.append(row)
for x,row in enumerate(self.list):
if x>0:
for func_index in struct_func_indexes:
self.list[x][func_index] = list(zip(*self.frombin_map))[1][func_index]['func'](func_index, row)
deletions = 0
for z,struct_item_name in enumerate(list(zip(*self.frombin_map))[0]):
if (struct_item_name == ''):
del self.list[x][z-deletions]
deletions = deletions + 1
return
def i(self,x):
for i,v in enumerate(self.list):
if(x==v):
return i
break
return -1
def j(self,x):
for i,v in enumerate(list(zip(*self.frombin_map))[0]):
if(x==v):
return i
break
return -1
def remap(self,v):
for i,n in enumerate(list(zip(*v))[0]):
if self.j(n) == -1:
self.frombin_map.append((list(zip(*v))[0][i],list(zip(*v))[1][i]))
else:
if (list(zip(*v))[1][i] == {}):
del self.frombin_map[self.j(n)]
else:
self.frombin_map[self.j(n)] = (list(zip(*v))[0][i],list(zip(*v))[1][i])
def bintotsv(self):
file = open(self.file_name+'.txt', 'wt', encoding=toptableencoding)
for i,a in enumerate(self.list):
a=list(a)
if (i==0):
deletions = 0
for z,item in enumerate(a[:]):
if (item == ''):
del a[z-deletions]
deletions = deletions + 1
file.write('//'+'\t'.join(a))
else:
#print(a)
file.write('\n'+'\t'.join(a))
file.close()
return self
def trim_nullbytestr(string, flag=0): #flag=1, return "0" when string is empty
result = string
for i,byte in enumerate(string[:]):
if byte == '\00':
result = string[:i]
break
if (flag & 1) and ((string == '\00') or (string == '')):
result = '0'
return result
def transformtostr(self,result,t,s,l,stp,f,v,ea,eap,lpad,rpad,sa,sap,st,sm,lm,smb,smea,sv,func):
if v != -1:
return str(v)
_type = t
j = 0
for n,v in enumerate(result[:]):
m = 0
if type(t) == tuple:
_type = t[j]
is_integer = not((_type=="float") or (_type=="double") or (_type=="str") or (_type=="char") or (_type=="_bool"))
if is_integer:
if f==-1:
f=0
for k,w in enumerate(v[:]):
if is_integer:
#Result: flag=0x0001 = remove FF's, #flag=0x0002 = cut file defect bytes, #flag=0x0004 = remove 0's
if ((((f & 0x4) and (w == 0))) or
((f & 0x1) and ((w == (2**8)-1) or (w == (2**16)-1) or (w == (2**32)-1) or (w == (2**64)-1) or (w == -1))) or
((f & 0x2) and ((((_type=="byte") or (_type=="ubyte")) and (w == (2**8)-51)) or (((_type=="short") or (_type=="ushort")) and (w == (2**16)-12851)) or (((_type=="long") or (_type=="ulong")) and (w == (2**32)-842150451)) or (((_type=="quad") or (_type=="uquad")) and (w == (2**64)-3617008641903833651))))):
del result[j][m]
m=m-1
else:
if (f & 0x2 and w == 0):
result[j] = result[j][:m]
break
result[j][m]=str(result[j][m])
m=m+1
if (_type=="float"):
if f==-1:
f=3
result[j][k]=str(round(float(w),f))
if (_type=="str"):
if f==-1:
f=0
if (w[0] == 205) and (w[1] == 205):
result[j][m] = ''
else:
result[j][m] = w.decode('gbk','ignore')
for o,x in enumerate(result[j][m]):
if x == '\00':
result[j][m] = result[j][m][:o]
break
#result[j][m] = result[j][m].strip()
if ((f & 1) and (result[j][m] == '')):
result = '0'
if (result[j] != '0'):
result[j] = ','.join(result[j])
if is_integer:
if (result[j] == ''):
result[j] = '0'
j=j+1
if (_type=="str"):
result = ','.join(result)
if ((f & 1) and (result == '')):
result = '0'
else:
result = ';'.join(result)
if is_integer:
if (result == ''):
result = '0'
return result
]
for c in toptablebins:
myc = c()
try:
myc.load_bin_data()
except IOError:
print('"'+myc.file_name+'.bin'+'" doesn''t exist, skipping...')
myc.bintotsv()
these are a few lines below and above line 131 which is the error
for j,t in enumerate(struct_item['t']):
struct_item['st'].append(struct_type_map[struct_item['t'][j]])
struct_item['st'] = tuple(struct_item['st'])
struct_item['s'] = len(struct_item['st'])
else: <--- this is 131
struct_item['st'] = struct_type_map[struct_item['t']]
if ((struct_item['t'] == 'color') or (struct_item['t'] == 'rcolor')):
struct_item['s'] = 3
struct_item['sa'] = struct_init_address
struct_item['sap'] = struct_item['sa'] + struct_item['lpad']
struct_item['ea'] = struct_item['sap']
if type(struct_item['t']) == tuple:
for j,t in enumerate(struct_item['t']):
struct_item['ea'] = struct_item['ea'] + struct.calcsize(struct_item['st'][j]) * ((struct_item['stp'] * struct_item['l'])-(struct_item['stp']-1))
It doesn't look like the you have the proper indention. Remember, in Python, whitespace is very very important.
The problem section of code should probably start looking something like:
elif (struct_item['v'] == -1):
if type(struct_item['t']) == tuple:
struct_item['s'] = len(struct_item['t'])
struct_item['st'] = []
for j,t in enumerate(struct_item['t']):
struct_item['st'].append(struct_type_map[struct_item['t'][j]])
struct_item['st'] = tuple(struct_item['st'])
struct_item['s'] = len(struct_item['st'])
This seems a bit weird:
l = len(data)//struct_size
You really need to post the traceback the the interpreter is giving you - it makes finding the error much easier.