I have some arrays with the same customer names on different orders, what I am trying to do and the following:
1 - Take the name of customers and compare to return when it is the same, solving the problem of random order;
2 - After obtaining this comparison, the output should be as follows:
The solution below resolves if I use the arrays manually, but when I enter through the database, I have an error as output
It works:
array1 = [['CLIENT1', '2', '3'],['CLIENT2', '3', '4'],['CLIENT3', '4', '5']]
array2 = [['CLIENT3', '2', '3'],['CLIENT2', '3', '4'],['CLIENT1', '4', '5']]
array3 = [['CLIENT2', '2', '3'],['CLIENT1', '3', '4'],['CLIENT3', '4', '5']]
The output should look like this: Customer Name, value contained in array1 for this customer name, value contained in array2 for this customer name and value contained in array3 for this customer name
SCRIPT
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
from datetime import datetime, date, time, timedelta
# create script head
print ('----------------------------------------------------------------------------')
print ('Initializing script: '+str(date.today()))
print ('----------------------------------------------------------------------------')
################################################################################
# Set connection to postgres
connpostgres = psycopg2.connect("host='192.168.0.245'"
" dbname='metrics'"
" user='postgres'"
" password=pass123")
cursorpost = connpostgres.cursor()
################################################################################
# Create arrays
################################################################################
cursorpost.execute(rz_collect)
rz_collect = cursorpost.fetchall()
array_rz_collect = []
for row in rz_collect:
array_rz_collect.append(row)
cursorpost.execute(sql_on_off)
sql_on_off = cursorpost.fetchall()
array_sql_on_off = []
for row in sql_on_off:
array_sql_on_off.append(row)
cursorpost.execute(sql_gaps_so)
sql_gaps_so = cursorpost.fetchall()
array_sql_gaps_so = []
for row in sql_gaps_so:
array_sql_gaps_so.append(row)
cursorpost.execute(sql_gaps_db)
sql_gaps_db = cursorpost.fetchall()
array_sql_gaps_db = []
for row in sql_gaps_db:
array_sql_gaps_db.append(row)
cursorpost.execute(sql_gaps_sap)
sql_gaps_sap = cursorpost.fetchall()
array_sql_gaps_sap = []
for row in sql_gaps_sap:
array_sql_gaps_sap.append(row)
################################################################################
# Check and align arrays
# Initialize a dictionary with key = client name, value = list of client entries
result = {}
# Add values from array1
for client_info in array_rz_collect:
# Parse current entry
client_name = client_info[0]
client_values = client_info[1:]
print(client_values)
# Add previous values if exitant
if client_name in result.keys():
client_values.extend(result[client_name])
# Update clients dictionary
result[client_name] = client_values
# Add values from array2
for client_info in array_sql_on_off:
# Parse current entry
client_name = client_info[0]
client_values = client_info[1:]
# Add previous values if exitant
if client_name in result.keys():
client_values.extend(result[client_name])
# Update clients dictionary
result[client_name] = client_values
# Add values from array3
for client_info in array_sql_gaps_so:
# Parse current entry
client_name = client_info[0]
client_values = client_info[1:]
# Add previous values if exitant
if client_name in result.keys():
client_values.extend(result[client_name])
# Update clients dictionary
result[client_name] = client_values
# Print result information
for client_name, client_values in result.items():
print("Result: " + str(client_name) + ", " + str(client_values))
OUTPUT
File "SCRIPT.py", line 166, in <module>
client_values.extend(result[client_name])
AttributeError: 'tuple' object has no attribute 'extend'
DESERIED OUTPUT
Result: CLIENT1, ['3', '4', '4', '5', '2', '3']
Result: CLIENT2, ['2', '3', '3', '4', '3', '4']
Result: CLIENT3, ['4', '5', '2', '3', '4', '5']
The output of cursorpost.fetchall() (rz_collect) is a list of tuples.
In the code,
rz_collect = cursorpost.fetchall()
array_rz_collect = []
for row in rz_collect:
array_rz_collect.append(row)
array_rz_collect is same as rz_collect, therefore it too is a list of tuples.
Side note: That for loop is unnecessary, you can directly operate on rz_collect
In the code,
for client_info in array_rz_collect:
# Parse current entry
client_name = client_info[0]
client_values = client_info[1:]
print(client_values)
# Add previous values if exitant
if client_name in result.keys():
client_values.extend(result[client_name])
client_info is a tuple.
client_values is also a tuple since you are just slicing another tuple. Tuples do not have the extend method since they are not mutable like lists.
A simple fix to your problem is to convert the tuple to list.
client_info = list(client_info) # new line
client_name = client_info[0]
client_values = client_info[1:]
Related
I have a .txt file that contains elements that look like this:
Smith 25 35 NC
Johnson 12 4 OH
Jones 23 14 FL
Lopez 2 7 TX
And I want to read the .txt file line by line, and save each of the elements (Name, number, number, state) in an array matrix or a list 4 x number_of_people , while ignoring any blank spaces. I'm trying to not use split() for it, but could use a "manual" form of split() instead, like shown below with split1.
def split1(line,delim):
s=[]
j=0
for i in range (len(line)):
if delim== line [i]:
s.append(line[j:i])
j=i+1
s.append (line[j:])
return s
f = open("Names.txt")
number_of_people = 0
#This portion is meant to go through the entire .txt file, 1 time and count how many people are listed on the file so I can make an appropiatly sized matrix, in the case of the example is 4
while True:
file_eof = f.readline()
if file_eof != '':
number_of_people = number_of_people + 1
if file_eof == '':
break
#This portion reads through the .txt file agin and saves the names of the list
while True:
file_eof = f.readline()
if file_eof != '':
split1(file_eof, '')
#print(file_eof)
if file_eof == '':
print('No more names on the list')
break
f.close()
I know there could be things missing here, and that's exactly what I would need help with here. If there is any "better" way of dealing with this than what I got please let me know and show me if possible.
Thank you for your time!
I don't understand why you want to create an array of a specific size first. I suppose you have a background in C? How large is the file?
Here are 2 pythonic ways to read and store that information:
filename = r"data.txt"
# Access items by index, e.g. people_as_list[0][0] is "Smith"
with open(filename) as f: # with statement = context manager = implicit/automatic closing of the file
people_as_list = [line.split() for line in f] # List comprehension
# Access items by index, then key, e.g. people_as_dict[0]["name"] is "Smith"
people_as_dict = []
with open(filename) as f:
for line in f:
name, number1, number2, state = line.split() # Iterable unpacking
person = {}
person["name"] = name
person["number1"] = number1
person["number2"] = number2
person["state"] = state
people_as_dict.append(person)
print(people_as_list)
print(people_as_dict)
Output:
[['Smith', '25', '35', 'NC'], ['Johnson', '12', '4', 'OH'], ['Jones', '23', '14', 'FL'], ['Lopez', '2', '7', 'TX']]
[{'name': 'Smith', 'number1': '25', 'number2': '35', 'state': 'NC'}, {'name': 'Johnson', 'number1': '12', 'number2': '4', 'state': 'OH'}, {'name': 'Jones', 'number1': '23', 'number2': '14', 'state': 'FL'}, {'name': 'Lopez', 'number1': '2', 'number2': '7', 'state': 'TX'}]
I have a dictionary named data.
Now I want to append more data to the dictionary. However it seems that I do not append, but overwrite the dictionary. How can I append data?
Code:
# Add something to data
data = {'level_a_title': 'Disk 1', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Windows'}
# Add another line for Data
data = {**data, **{'level_a_title': 'Disk 2', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Backup'}}
# Print everything
for key, value in data.items():
print(key + ' = ' + str(value))
Output:
C:\Users\dpa\PycharmProjects\json\venv\Scripts\python.exe C:/Users/dpa/PycharmProjects/json/main.py
level_a_title = Disk 2
level_a_show_on_analysis_report = 1
level_a_type = text
level_a_value = Backup
Process finished with exit code 0
Unique mapping of keys and values, so level_a_titleonly map Disk 1 or Disk 2. So if you want to have two value, just like this
data = [{'level_a_title': 'Disk 1', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Windows'}]
data.append({'level_a_title': 'Disk 2', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Backup'})
# Print everything
for idx, line in enumerate(data):
print("Index: ", idx)
for key, value in line.items():
print(key + ' = ' + str(value))
What I am trying to accomplish is printing 10 lines only instead of the whole list using pprint(dict(str_types))
Here is my code
from collections import defaultdict
str_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)
expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road",
"Trail", "Parkway", "Commons"]
def audit_str_type(str_types, str_name, rex):
stn = rex.search(str_name)
if stn :
str_type = stn.group()
if str_type not in expected:
str_types[str_type].add(str_name)
I defined a function that audits tag elements where k="addr:street", and also any tag elements match the is_str_name function.
def audit(osmfile,rex):
osm_file = open(osmfile, "r", encoding="utf8")
str_types = defaultdict(set)
for event, elem in ET.iterparse(osm_file, events=("start",)):
if elem.tag == "node" or elem.tag == "way":
for tag in elem.iter("tag"):
if is_str_name(tag):
audit_str_type(str_types, tag.attrib['v'],rex)
return str_types
In the code above , I used "is_str_name" function to filter tag when calling the audit function to audit street names.
def is_str_name(elem):
return (elem.attrib['k'] == "addr:street")
str_types = audit(mydata, rex = str_type_re)
pprint.pprint(dict(str_types[:10]))
Use pprint.pformat to get back the string representation of the object instead of printing it directly, then you can split it up by lines and only print out the first few:
whole_repr = pprint.pformat(dict(str_types))
for line in whole_repr.splitlines()[:10]:
print(line)
Note that I couldn't test this since you did not have a MCVE but I did verify it with a more trivial example:
>>> import pprint
>>> thing = pprint.pformat({i:str(i) for i in range(10000)})
>>> type(thing), len(thing)
(<class 'str'>, 147779)
>>> for line in thing.splitlines()[:10]:print(line)
{0: '0',
1: '1',
2: '2',
3: '3',
4: '4',
5: '5',
6: '6',
7: '7',
8: '8',
9: '9',
I am bit stuck in reading a file block-wise, and facing difficulty in getting some selective data in each block :
Here is my file content :
DATA.txt
#-----FILE-----STARTS-----HERE--#
#--COMMENTS CAN BE ADDED HERE--#
BLOCK IMPULSE DATE 01-JAN-2010 6 DEHDUESO203028DJE \
SEQUENCE=ai=0:at=221:ae=3:lu=100:lo=NNU:ei=1021055:lr=1: \
USERID=ID=291821 NO_USERS=3 GROUP=ONE id_info=1021055 \
CREATION_DATE=27-JUNE-2013 SN=1021055 KEY ="22WS \
DE34 43RE ED54 GT65 HY67 AQ12 ES23 54CD 87BG 98VC \
4325 BG56"
BLOCK PASSION DATE 01-JAN-2010 6 DEHDUESO203028DJE \
SEQUENCE=ai=0:at=221:ae=3:lu=100:lo=NNU:ei=324356:lr=1: \
USERID=ID=291821 NO_USERS=1 GROUP=ONE id_info=324356 \
CREATION_DATE=27-MAY-2012 SN=324356 KEY ="22WS \
DE34 43RE 342E WSEW T54R HY67 TFRT 4ER4 WE23 XS21 \
CD32 12QW"
BLOCK VICTOR DATE 01-JAN-2010 6 DEHDUESO203028DJE \
SEQUENCE=ai=0:at=221:ae=3:lu=100:lo=NNU:ei=324356:lr=1: \
USERID=ID=291821 NO_USERS=5 GROUP=ONE id_info=324356 \
CREATION_DATE=27-MAY-2012 SN=324356 KEY ="22WS \
DE34 43RE 342E WSEW T54R HY67 TFRT 4ER4 WE23 XS21 \
CD32 12QW"
#--BLOCK--ENDS--HERE#
#--NEW--BLOCKS--CAN--BE--APPENDED--HERE--#
I am only interested in Block Name , NO_USERS, and id_info of each block .
these three data to be saved to a data-structure(lets say dict), which is further stored in a list :
[{Name: IMPULSE ,NO_USER=3,id_info=1021055},{Name: PASSION ,NO_USER=1,id_info=324356}. . . ]
any other data structure which can hold the info would also be fine.
So far i have tried getting the block names by reading line by line :
fOpen = open('DATA.txt')
unique =[]
for row in fOpen:
if "BLOCK" in row:
unique.append(row.split()[1])
print unique
i am thinking of regular expression approach, but i have no idea where to start with.
Any help would be appreciate.Meanwhile i am also trying , will update if i get something . Please help .
You could use groupy to find each block, use a regex to extract the info and put the values in dicts:
from itertools import groupby
import re
with open("test.txt") as f:
data = []
# find NO_USERS= 1+ digits or id_info= 1_ digits
r = re.compile("NO_USERS=\d+|id_info=\d+")
grps = groupby(f,key=lambda x:x.strip().startswith("BLOCK"))
for k,v in grps:
# if k is True we have a block line
if k:
# get name after BLOCK
name = next(v).split(None,2)[1]
# get lines after BLOCK and get the second of those
t = next(grps)[1]
# we want two lines after BLOCK
_, l = next(t), next(t)
d = dict(s.split("=") for s in r.findall(l))
# add name to dict
d["Name"] = name
# add sict to data list
data.append(d)
print(data)
Output:
[{'NO_USERS': '3', 'id_info': '1021055', 'Name': 'IMPULSE'},
{'NO_USERS': '1', 'id_info': '324356', 'Name': 'PASSION'},
{'NO_USERS': '5', 'id_info': '324356', 'Name': 'VICTOR'}]
Or without groupby as your file follows a format we just need to extract the second line after the BLOCK line:
with open("test.txt") as f:
data = []
r = re.compile("NO_USERS=\d+|id_info=\d+")
for line in f:
# if True we have a new block
if line.startswith("BLOCK"):
# call next twice to get thw second line after BLOCK
_, l = next(f), next(f)
# get name after BLOCK
name = line.split(None,2)[1]
# find our substrings from l
d = dict(s.split("=") for s in r.findall(l))
d["Name"] = name
data.append(d)
print(data)
Output:
[{'NO_USERS': '3', 'id_info': '1021055', 'Name': 'IMPULSE'},
{'NO_USERS': '1', 'id_info': '324356', 'Name': 'PASSION'},
{'NO_USERS': '5', 'id_info': '324356', 'Name': 'VICTOR'}]
To extract values you can iterate:
for dct in data:
print(dct["NO_USERS"])
Output:
3
1
5
If you want a dict of dicts and to access each section from 1-n you can store as nested dicts using from 1-n as tke key:
from itertools import count
import re
with open("test.txt") as f:
data, cn = {}, count(1)
r = re.compile("NO_USERS=\d+|id_info=\d+")
for line in f:
if line.startswith("BLOCK"):
_, l = next(f), next(f)
name = line.split(None,2)[1]
d = dict(s.split("=") for s in r.findall(l))
d["Name"] = name
data[next(cn)] = d
data["num_blocks"] = next(cn) - 1
Output:
from pprint import pprint as pp
pp(data)
{1: {'NO_USERS': '3', 'Name': 'IMPULSE', 'id_info': '1021055'},
2: {'NO_USERS': '1', 'Name': 'PASSION', 'id_info': '324356'},
3: {'NO_USERS': '5', 'Name': 'VICTOR', 'id_info': '324356'},
'num_blocks': 3}
'num_blocks' will tell you exactly how many blocks you extracted.
These are previously defined.
def get_service_code(service):
return str(service[0])
service_106_data = filter_routes(bus_stations, "106") #[('106', '1', '1', '43009'), ('106', '1', '2', '43179'), .... ('106', '2', '51', '43009')]
service_106 = make_service(service_106_data, "106") # ('106', [['43009', '43179',...'43009']])
print(get_service_code(service_106)) --> should return 106
bus_stations here is a txt file that contain a list of numbers like this
106,1,1,43009
106,1,2,43179
.
.
.
106,2,1,03239
106,2,2,03211
.
.
.
106,2,50,43171
106,2,51,43009
Then this is also previously defined
def get_route(service, direction):
return str(service[int(direction)][0])
print(get_route(service_106, '1'))
should return this
['43009', '43179', '43189', '43619', '43629', '42319', '28109', '28189', '28019', '20109', '17189', '17179', '17169', '17159', '19049', '19039', '19029', '19019', '11199', '11189', '11401', '11239', '11229', '11219', '11209', '13029', '13019', '09149', '09159', '09169', '09179', '09048', '09038', '08138', '08057', '08069', '04179', '02049', 'E0200', '02151', '02161', '02171', '03509', '03519', '03539', '03129', '03218', '03219']
def make_service(service_data, service_code):
routes = []
curr_route = []
first = service_data[0] #('106', '1', '1', '43009')
curr_dir = first[1] # '1'
l = list(curr_dir) # ['1']
for entry in service_data:
direction = entry[1] #'1'
stop = entry[3] #'43009'
if direction == curr_dir:
curr_route.append(stop) #[43009]
else:
routes.append(curr_route) #[[]]
curr_route = [stop] #['43009']
curr_dir = direction #not '1'
l.append(list(direction)) # ['1', __]
routes.append(curr_route) #[['43009']]
#modify this code below
return (service_code,curr_route) #("106", [['43009']])
service_106 = make_service(service_106_data, "106")
print(service_106)
print(get_service_code((service_106))) # should return 106
expected output for print(service_106) is
('106',['1','2'],['03239', '03211', 'E0321', 'E0564', '03222', 'E0599', '03531', '03511', '03501', '02051', '02061', '04111', '04121', '08041', '08031', '08111', '08121', '09059', '09022', '09111', '09121', '09131', '09141', '13011', '13021', '11201', '11211', '11221', '11231', '11419', '11409', '11181', '11191', '19011', '19021', '19031', '19041', '17151', '17161', '17171', '17181', '20101', '28011', '28181', '28101', '42311', '43621', '43611', '43181', '43171', '43009'])
Where ['1','2'] suppose to be the newly added list also not only should i be able to add ['1','2'] I should be able to add ['A4'] / ['A2','A4'] or other non-numeric list in as well
I am only suppose to add new lines to the code and modify the last line.
I suppose you can use:
return tuple([service_code] + service_data)
I think you just need
return (service_code, list(set(zip(*service_data)[1])), curr_route) #("106", [['43009']])
its very hard to tell though(but this does give the expected output)
using
make_service([('106', '1', '1', '43009'), ('106', '1', '2', '43179'), ('106', '2', '51', '43009')],"106")
results in ('106', ['1', '2'], ['43009'])