Iterate over data frame and create a text file - python

I want to create multiple line items for the agreement number. As you can see below i have used only agreement number[0] to print first data from dental_df[Agreement_number]
But i want to take all the items in the agreement number to be printed.
if i take that whole column def str gives me an error
AttributeError: 'Series' object has no attribute 'ljust'
can anybody help me. I am missing something
class Reporting_Information:
""" ES0388F4 UCCI 130 Record (Reporting Information) """
fields = (
('Agreement_Number',13),
("Contract_ID",27),
('Record_Identifier',3),
('Sequence_Number',7),
('Report_Count',2),
('Report_Qualifier_Code',2),
('Report_Qualifier',0),
('Effective_Date',10),
('Report_ID',30),
('Report_Qualifier2',10),
('Cancel_Date',0),
('Filler',396)
)
def __init__(self):
self.Agreement_Number =''
self.Contract_ID =''
self.Record_Identifier ='130'
self.Sequence_Number ='0'
self.Report_Count ='01'
self.Report_Qualifier_Code='LU'
self.Report_Qualifier =''
self.Effective_Date =''
self.Report_ID =''
self.Report_Qualifier2 =''
self.Cancel_Date = ''
self.Filler =''
def __str__(self):
for field_name, width in self.fields:
return ''.join([getattr(self, field_name).ljust(width)])
r = Reporting_Information()
r.Agreement_Number = dental_df['Agreement_Number'][0]
r1 = str(r)
print(r1)

Related

SQLAlchemy with_entities with dynamic inputs

how can I provide the with_entities option with dynamic input?
At the moment I have to do this:
columns = [DataModel.col1, DataModel.col2, ...]
data = DataModel.query.order_by(DataModel.id.desc()).with_entities(*columns).first()
But what should I do, if I get the col name as query-string parameters and have to define it dynamically?
EDIT EDIT EDIT
solved it this way:
In my model I define the classmethod:
#classmethod
def find_by_filter(cls, attr):
search_columns = [getattr(cls, i) for i in attr]
return cls.query.order_by(cls.id.desc()).with_entities(*search_columns).first()
and then I can call it from my Rest API this way:
liste = ["column 1", "column2", "column3", "column4"]
data = DataModel.find_by_filter(liste)
I had this same challenge and this is how I solved it.
columns is just a CSV string eg "field1,field2,field3"
Could be further condensed but this is enough for readability.
def pick_Columns(modelClass, columns):
if len(columns) == 0:
return None
colArray = columns.split(',')
modelObjects = [eval(f'{modelClass.__name__}.{col}') for col in colArray]
return modelObjects
def Get_Data(**kwargs):
retVal = kwargs['db'].query(m_DataModel)
if kwargs.get('columns', None) is not None:
colObj = pick_Columns(m_DataModel, kwargs['columns'])
retVal = retVal.with_entities(*colObj)
retVal = [row._asdict() for row in retVal.all()] # with_entities returns a tuple instead of dict. Remove this line if that's what you want
else:
retVal = retVal.all()
return retVal

How to write dictionaries to Bigquery in Dataflow using python

I am trying to read from a csv from in GCP Storage, converting that into dictionaries and then write to a Bigquery table as follows:
p | ReadFromText("gs://bucket/file.csv")
| (beam.ParDo(BuildAdsRecordFn()))
| WriteToBigQuery('ads_table',dataset='dds',project='doubleclick-2',schema=ads_schema)
where: 'doubleclick-2' and 'dds' are existing project and dataset, ads_schema is defined as follows:
ads_schema='Advertiser_ID:INTEGER,Campaign_ID:INTEGER,Ad_ID:INTEGER,Ad_Name:STRING,Click_through_URL:STRING,Ad_Type:STRING'
BuildAdsRecordFn() is defined as follows:
class AdsRecord:
dict = {}
def __init__(self, line):
record = line.split(",")
self.dict['Advertiser_ID'] = record[0]
self.dict['Campaign_ID'] = record[1]
self.dict['Ad_ID'] = record[2]
self.dict['Ad_Name'] = record[3]
self.dict['Click_through_URL'] = record[4]
self.dict['Ad_Type'] = record[5]
class BuildAdsRecordFn(beam.DoFn):
def __init__(self):
super(BuildAdsRecordFn, self).__init__()
def process(self, element):
text_line = element.strip()
ads_record = AdsRecord(text_line).dict
return ads_record
However, when I run the pipeline, I got the following error:
"dataflow_job_18146703755411620105-B" failed., (6c011965a92e74fa): BigQuery job "dataflow_job_18146703755411620105-B" in project "doubleclick-2" finished with error(s): errorResult: JSON table encountered too many errors, giving up. Rows: 1; errors: 1., error: JSON table encountered too many errors, giving up. Rows: 1; errors: 1., error: JSON parsing error in row starting at position 0: Value encountered without start of object
Here is the sample testing data I used:
100001,1000011,10000111,ut,https://bloomberg.com/aliquam/lacus/morbi.xml,Brand-neutral
100001,1000011,10000112,eu,http://weebly.com/sed/vel/enim/sit.jsp,Dynamic Click
I'm new to both Dataflow and python so could not figure out what could be wrong in the above code. Greatly appreciate any help!
I just implemented your code and it didn't work as well, but I got a different message error (something like "you can't return a dict as the result of a ParDo").
This code worked normally for me, notice not only I'm not using the class attribute dict as well as now a list is returned:
ads_schema='Advertiser_ID:INTEGER,Campaign_ID:INTEGER,Ad_ID:INTEGER,Ad_Name:STRING,Click_through_URL:STRING,Ad_Type:STRING'
class BuildAdsRecordFn(beam.DoFn):
def __init__(self):
super(BuildAdsRecordFn, self).__init__()
def process(self, element):
text_line = element.strip()
ads_record = self.process_row(element)
return ads_record
def process_row(self, row):
dict_ = {}
record = row.split(",")
dict_['Advertiser_ID'] = int(record[0]) if record[0] else None
dict_['Campaign_ID'] = int(record[1]) if record[1] else None
dict_['Ad_ID'] = int(record[2]) if record[2] else None
dict_['Ad_Name'] = record[3]
dict_['Click_through_URL'] = record[4]
dict_['Ad_Type'] = record[5]
return [dict_]
with beam.Pipeline() as p:
(p | ReadFromText("gs://bucket/file.csv")
| beam.Filter(lambda x: x[0] != 'A')
| (beam.ParDo(BuildAdsRecordFn()))
| WriteToBigQuery('ads_table', dataset='dds',
project='doubleclick-2', schema=ads_schema))
#| WriteToText('test.csv'))
This is the data I simulated:
Advertiser_ID,Campaign_ID,Ad_ID,Ad_Name,Click_through_URL,Ad_Type
1,1,1,name of ad,www.url.com,sales
1,1,2,name of ad2,www.url2.com,sales with sales
I also filtered out the header line that I created in my file (in the Filter operation), if you don't have a header then this is not necessary

Python: how to parse and and add contents to a text file

Hello I have a network in a particular format, i.e. .gdf. However this is a text file in the following format
network:
nodedef>name VARCHAR,label VARCHAR
0,' 0 '
1,' 1 '
2,' 2 '
edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE
0,1,0.2
0,2,0.2
0,3,0.2
0,4,0.333333
where the first part refers to nodes and the second part to edges.
I want to add feature to read the file and add a feature to the nodes and return the following:
network:
nodedef>name VARCHAR,label VARCHAR, att1 VARCHAR
0,' 0 ', 'Paul'
1,' 1 ', 'Jack'
2,' 2 ', 'John'
edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE
0,1,0.2
0,2,0.2
0,3,0.2
0,4,0.333333
Here is some code that does the first half of what you asked for. It will parse the .GDF file and make the information available to you. Adding attributes and writing them is left as an exercise for the reader.
import ast
import collections
import re
def main():
parser = GDFParser()
with open('network.gdf') as file:
parser.read(file)
print(*parser.data, sep='\n')
def pivot(iterable):
columns = []
for row in iterable:
columns.extend([] for _ in range(len(row) - len(columns)))
for column, cell in zip(columns, row):
column.append(cell)
return columns
class GDFParser:
HEADER = re.compile('\w+:')
DEF = re.compile('\w+>\w+ (?:DOUBLE|VARCHAR)(?:,\w+ (?:DOUBLE|VARCHAR))*')
CAST = dict(DOUBLE=float, VARCHAR=str)
def __init__(self):
self.__header = None
self.__type = []
self.__data = []
#property
def header(self):
return self.__header
#property
def data(self):
return tuple(self.__data)
def read(self, file):
for line in file:
self.__read_line(line.strip())
def __read_line(self, line):
if self.HEADER.fullmatch(line):
self.__process_header(line)
elif self.DEF.fullmatch(line):
self.__process_def(line)
else:
self.__process_data(line)
def __process_header(self, line):
if self.header:
raise ValueError('header was previously set')
self.__header = line[:-1]
def __process_def(self, line):
name, fields = line.split('>')
columns, casts = pivot(field.split() for field in fields.split(','))
self.__type.append((collections.namedtuple(name, columns),
tuple(map(self.CAST.__getitem__, casts))))
def __process_data(self, line):
if not self.__type:
raise ValueError('a definition must come before its data')
kind, casts = self.__type[-1]
self.__data.append(kind(*(cast(item) for cast, item in
zip(casts, ast.literal_eval(line)))))
if __name__ == '__main__':
main()

Python - Getting Attributes From A File of Constants

I have a file of constant variables that I need to query and I am not sure how to go about it.
I have a database query which is returning user names and I need to find the matching user name in the file of constant variables.
The file looks like this:
SALES_MANAGER_01 = {"user_name": "BO01", "password": "password", "attend_password": "BO001",
"csm_password": "SM001", "employee_num": "BOSM001"}
There is just a bunch of users just like the one above.
My function looks like this:
#attr("user_test")
def test_get_user_for_login(self):
application_code = 'BO'
user_from_view = self.select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
password = ""
global_users = dir(gum)
for item in global_users:
if user_wo_ent not in item.__getattr__("user_name"):
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = item.__getattr__("password")
print(user_wo_ent, password)
global_users = dir(gum) is my file of constants. So I know I am doing something wrong since I am getting an attribute error AttributeError: 'str' object has no attribute '__getattr__', I am just not sure how to go about resolving it.
You should reverse your looping as you want to compare each item to your match condition. Also, you have a dictionary, so use it to do some heavy lifting.
You need to add some imports
import re
from ast import literal_eval
I've changed the dir(gum) bit to be this function.
def get_global_users(filename):
gusers = {} # create a global users dict
p_key = re.compile(ur'\b\w*\b') # regex to get first part, e.g.. SALES_MANAGER_01
p_value = re.compile(ur'\{.*\}') # regex to grab everything in {}
with (open(filename)) as f: # open the file and work through it
for line in f: # for each line
gum_key = p_key.match(line) # pull out the key
gum_value = p_value.search(line) # pull out the value
''' Here is the real action. update a dictionary
with the match of gum_key and with match of gum_value'''
gusers[gum_key.group()] = literal_eval(gum_value.group())
return(gusers) # return the dictionary
The bottom of your existing code is replaced with this.
global_users = get_global_users(gum) # assign return to global_users
for key, value in global_users.iteritems(): # walk through all key, value pairs
if value['user_name'] != user_wo_ent:
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = value['password']
So a very simple answer was get the dir of the constants file then parsing over it like so:
global_users = dir(gum)
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if gum.__dict__[item].get("user_name") == user_wo_ent:
print(user_wo_ent, o.get("password"))
else:
print("User was not in global_user_mappings")
I was able to find the answer by doing the following:
def get_user_for_login(application_code='BO'):
user_from_view = BaseServiceTest().select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[4:]
global_users = dir(gum)
user_dict = {'user_name': '', 'password': ''}
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if user_wo_ent == o.get("user_name"):
user_dict['user_name'] = user_wo_ent
user_dict['password'] = o.get("password")
return user_dict

how to loop list to pass string to function name in python

I'm trying to find the most efficient way to create different function name myfunction_a ,.. b , c with slightly different code ( input file name 'app/data/mydata_a.csv' ) so here below is the a function I got
def myfunction_a(request):
os.getcwd() # Should get this Django project root (where manage.py is)
fn = os.path.abspath(os.path.join(os.getcwd(),'app/data/mydata_a.csv'))
# TODO: Move to helper module
response_data = {}
data_format = 'tsv'
if data_format == 'json':
with open(fn, 'rb') as tsvin:
tsvin = csv.reader(tsvin, delimiter='\t')
for row in tsvin:
print 'col1 = %s col2 = %s' % (row[0], row[1])
response_data[row[0]] = row[1]
result = HttpResponse(json.dumps(response_data), content_type = 'application/json')
else:
with open(fn, 'rb') as tsvin:
buff = tsvin.read()
result = HttpResponse(buff, content_type = 'text/tsv')
return result
I want to be able to loop through my list and create multiple function name:
mylist = ['a','b','c' ... 'z' ]
def myfunction_a(request):
... ( 'app/data/mydata_a.csv' )
return request
to get final result of :
def myfunction_a => taking 'app/data/mydata_a.csv'
def myfunction_b => taking 'app/data/mydata_b.csv'
def myfunction_c => taking 'app/data/mydata_c.csv'
right now I just copy and past and change it. is there a better to do this ? Any recommendation would be appreciated. Thanks.
you can add a variable to a string with
"app/data/mydata_%s.csv" % (character)
so
for character in mylist:
print "app/data/mydata_%s.csv" % (character)
should append everytime another charcter at the place of %s
So since you want for every function use another string to get another file you can
do something like this:
def myfunction(label, request):
return "app/data/mydata_%s.csv" % (label)
so you get the function label at the end of your documentpath. Since you described that you
only want to change the name so that it equals to the function label, you only need another parameter and not a new function name
If you must have a special function name, you could do this. Though why you'd need to I'm not sure.
import functools, sys
namespace = sys._getframe(0).f_globals
def myfunction(label, request):
print request
return "app/data/mydata_%s.csv" % (label)
my_labels = ['a','b','c']
for label in my_labels:
namespace['myfunction_%s'%label] = functools.partial(myfunction, label)
print myfunction_a('request1')
print myfunction_b('request2')
Output is this:
request1
app/data/mydata_a.csv
request2
app/data/mydata_b.csv
Or possibly a better implementation would be:
class MyClass(object):
def __init__(self, labels):
for label in labels:
setattr(self, label, functools.partial(self._myfunction, label))
def _myfunction(self, label, request):
print request
return "app/data/mydata_%s.csv" % (label)
myfunction = MyClass(['a','b','c'])
print myfunction.c('request3')
Output is this:
request3
app/data/mydata_c.csv

Categories

Resources