Generating email address using first name and last name in Faker python - python

I am trying to generate a pandas dataset comprising person data. I am employing Python's Faker library. Is there a way to generate a valid email address using the first name and last name?
import pandas as pd
import numpy as np
import os
import random
from faker import Faker
def faker_categorical(num=1, seed=None):
np.random.seed(seed)
fake.seed_instance(seed)
output = []
for x in range(num):
gender = np.random.choice(["M", "F"], p=[0.5, 0.5])
output.append(
{
"First name": fake.first_name_male() if gender=="M" else
fake.first_name_female(),
"Last name": fake.last_name(),
"E-mail": fake.ascii_email(),
})
return output

You can use Faker's domain_name method and string formatting alongside the already generated values:
first_name = fake.first_name_male() if gender =="M" else fake.first_name_female()
last_name = fake.last_name()
output.append(
{
"First name": first_name,
"Last Name": last_name,
"E-mail": f"{first_name}.{last_name}#{fake.domain_name()}"
}
)
On a more complete approach, you could add factoryboy to the mix:
from factory import DictFactory, LazyAttribute
from factory.fuzzy import FuzzyChoice
from factory import Faker
class PersonDataFactory(DictFactory):
first = LazyAttribute(lambda obj: fake.first_name_male() if obj._gender == "M" else fake.first_name_female())
last = Faker("last_name")
email = LazyAttribute(lambda obj: f"{obj.first}.{obj.last}#{fake.domain_name()}")
_gender = FuzzyChoice(("M", "F"))
class Meta:
exclude = ("_gender",)
rename = {"first": "First Name", "last": "Last Name", "email": "E-mail"}
PersonDataFactory()
which will result in something like:
{'First Name': 'Albert',
'Last Name': 'Martinez',
'E-mail': 'Albert.Martinez#wheeler.com'}

I'll show an alternative to the accepted answer if you need to build more complex data. This solution relies on the use of data provider that are part of the Faker library. You define a new Provider and then you add to your instance of Faker(). You can then call this generator where ever you need.
from faker.providers import BaseProvider
class CustomProvider(BaseProvider):
__provider__ = "personalia"
def personalia(self):
gender = self.random_element(["F", "M"])
first_name = self.generator.first_name_male() if gender == "M" else self.generator.first_name_female()
last_name = self.generator.last_name()
email_address = f"{first_name.lower()}.{last_name.lower()}#{self.generator.domain_name()}"
return {
"First name": first_name,
"Last Name": last_name,
"E-mail": email_address
}
fake = Faker()
fake.add_provider(CustomProvider)
personalia = fake.personalia()
print(personalia)
The output should look like this:
{
'First name': 'Olivia',
'Last Name': 'Cook',
'E-mail': 'olivia.cook#sloan.com'
}
Of course this is just an simple example based on the code you have provided. ;)

Related

Inserting data using PyMongo based on a defined data model

I have a dataset consisting of 250 rows that looks like to following:
In MongoDB Compass, I inserted the first row as follows:
db.employees.insertOne([{"employee_id": 412153,
"first_name": "Carrol",
"last_name": "Dhin",
"email": "carrol.dhin#company.com",
"managing": [{"manager_id": 412153, "employee_id": 174543}],
"department": [{"department_name": "Accounting", "department_budget": 500000}],
"laptop": [{"serial_number": "CSS49745",
"manufacturer": "Lenovo",
"model": "X1 Gen 10",
"date_assigned": {$date: 01-15-2022},
"installed_software": ["MS Office", "Adobe Acrobat", "Slack"]}]})
If I wanted to insert all 250 rows into the database using PyMongo in Python, how would I ensure that every row is entered following the format that I used when I inserted it manually in the Mongo shell?
from pymongo import MongoClient
import pandas as pd
client = MongoClient(‘localhost’, 27017)
db = client.MD
collection = db.gammaCorp
df = pd.read_csv(‘ ’) #insert CSV name here
data = {}
for i in df.index:
data['employee_id'] = df['employee_id'][i]
data['first_name'] = df['first_name'][i]
data['last_name'] = df['last_name'][i]
data['email'] = df['email'][i]
data['managing'] = [{'manager_id': df['employee_id'][i]}, {'employee_id': df['managing'][i]}]
data['department'] = [{'department_name': df['department'][i]}, {'department_budget': df['department_budget'][i]}]
data['laptop'] = [{'serial_number': df['serial_number'][i]}, {'manufacturer': df['manufacturer'][i]}, {'model': df['model'][i]}, {'date_assigned': df['date_assigned'][i]}, {'installed_software': df['installed_software'][i]}]
collection.insert_one(data)

Python Append Data from Loop into Data frame

I created this code where I am able to pull the data I want but not able to sort it as it should be. I am guessing it has to do with the way I am appending each item by ignoring index but I can't find my way around it.
This is my code:
import json
import pandas as pd
#load json object
with open("c:\Sample.json","r",encoding='utf-8') as file:
data = file.read()
data2 = json.loads(data)
print("Type:", type(data2))
cls=['Image', 'Email', 'User', 'Members', 'Time']
df = pd.DataFrame(columns = cls )
for d in data2['mydata']:
for k,v in d.items():
#print(k)
if k == 'attachments':
#print(d.get('attachments')[0]['id'])
image = (d.get('attachments')[0]['id'])
df=df.append({'Image':image},ignore_index = True)
#df['Message'] = image
if k == 'author_user_email':
#print(d.get('author_user_email'))
email = (d.get('author_user_email'))
df=df.append({'Email':email}, ignore_index = True)
#df['Email'] = email
if k == 'author_user_name':
#print(d.get('author_user_name'))
user = (d.get('author_user_name'))
df=df.append({'User':user}, ignore_index = True)
#df['User'] = user
if k == 'room_name':
#print(d.get('room_name'))
members = (d.get('room_name'))
df=df.append({'Members':members}, ignore_index = True)
#df['Members'] = members
if k == 'ts_iso':
#print(d.get('ts_iso'))
time = (d.get('ts_iso'))
df=df.append({'Time':time}, ignore_index = True)
#df['Time'] = time
df
print('Finished getting Data')
df1 = (df.head())
print(df)
print(df.head())
df.to_csv(r'c:\sample.csv', encoding='utf-8')
The code gives me this as the result
I am looking to get this
Data of the file is this:
{
"mydata": [
{
"attachments": [
{
"filename": "image.png",
"id": "888888888"
}
],
"author_user_email": "email#email.com",
"author_user_id": "91",
"author_user_name": "Marlone",
"message": "",
"room_id": "999",
"room_members": [
{
"room_member_id": "91",
"room_member_name": "Marlone"
},
{
"room_member_id": "9191",
"room_member_name": " +16309438985"
}
],
"room_name": "SMS [Marlone] [ +7777777777]",
"room_type": "sms",
"ts": 55,
"ts_iso": "2021-06-13T18:17:32.877369+00:00"
},
{
"author_user_email": "email#email.com",
"author_user_id": "21",
"author_user_name": "Chris",
"message": "Hi",
"room_id": "100",
"room_members": [
{
"room_member_id": "21",
"room_member_name": "Joe"
},
{
"room_member_id": "21",
"room_member_name": "Chris"
}
],
"room_name": "Direct [Chris] [Joe]",
"room_type": "direct",
"ts": 12345678910,
"ts_iso": "2021-06-14T14:42:07.572479+00:00"
}]}
Any help would be appreciated. I am new to python and am learning on my own.
Try:
import json
import pandas as pd
with open("your_data.json", "r") as f_in:
data = json.load(f_in)
tmp = []
for d in data["mydata"]:
image = d.get("attachments", [{"id": None}])[0]["id"]
email = d.get("author_user_email")
user = d.get("author_user_name")
members = d.get("room_name")
time = d.get("ts_iso")
tmp.append((image, email, user, members, time))
df = pd.DataFrame(tmp, columns=["Image", "Email", "User", "Members", "Time"])
print(df)
Prints:
Image Email User Members Time
0 888888888 email#email.com Marlone SMS [Marlone] [ +7777777777] 2021-06-13T18:17:32.877369+00:00
1 None email#email.com Chris Direct [Chris] [Joe] 2021-06-14T14:42:07.572479+00:00
Although the other answer does work, pandas has a built in reader for json files pd.read_json: https://pandas.pydata.org/pandas-docs/version/1.1.3/reference/api/pandas.read_json.html
It has the benefit of being able to handle very large datasets via chunking, as well as processing quite a few different formats. The other answer would not be performant for a large dataset.
This would get you started:
import pandas as pd
df = pd.read_json("c:\Sample.json")
The probblem is that append() adds a new row. So, you have to use at[] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html specifying the index/row. Se below. Some print/debug messages were left and path to input and output files was changed a little because I'm on Linux.
import json
import pandas as pd
import pprint as pp
#load json object
with open("Sample.json","r",encoding='utf-8') as file:
data = file.read()
data2 = json.loads(data)
#pp.pprint(data2)
cls=['Image', 'Email', 'User', 'Members', 'Time']
df = pd.DataFrame(columns = cls )
pp.pprint(df)
index = 0
for d in data2['mydata']:
for k,v in d.items():
#print(k)
if k == 'attachments':
#print(d.get('attachments')[0]['id'])
image = (d.get('attachments')[0]['id'])
df.at[index, 'Image'] = image
#df['Message'] = image
if k == 'author_user_email':
#print(d.get('author_user_email'))
email = (d.get('author_user_email'))
df.at[index, 'Email'] = email
#df['Email'] = email
if k == 'author_user_name':
#print(d.get('author_user_name'))
user = (d.get('author_user_name'))
df.at[index, 'User'] = user
#df['User'] = user
if k == 'room_name':
#print(d.get('room_name'))
members = (d.get('room_name'))
df.at[index, 'Members'] = members
#df['Members'] = members
if k == 'ts_iso':
#print(d.get('ts_iso'))
time = (d.get('ts_iso'))
df.at[index, 'Time'] = time
#df['Time'] = time
index += 1
# start indexing from 0
df.reset_index()
# replace empty str/cells witn None
df.fillna('None', inplace=True)
pp.pprint(df)
print('Finished getting Data')
df1 = (df.head())
print(df)
print(df.head())
df.to_csv(r'sample.csv', encoding='utf-8')

json serialization of a list of objects of a custom class

I have a song class, which holds the attributes to a song, and it is a custom class. I also have a list of songs in a list called track list. When I try to json.dump the list, I get an error that says :
TypeError: Object of type 'Song' is not JSON serializable
How would I go about converting this list of songs to json?
Here is the additional relevant code that returns the error:
class Song:
def __init__(self, sname, sartist, coverart, albname, albartist, spotid):
self.sname = sname
self.sartist = sartist
self.coverart = coverart
self.albname = albname
self.albartist = albartist
self.spotid = spotid
tracklist = createDict(tracks) ##creates the list of songs, works fine
jsontracks = json.dumps(tracklist)
pp.pprint(jsontracks)
Thanks
I've solved this by adding an encode() method to the class:
def encode(self):
return self.__dict__
and adding some arguments to json.dumps:
jsontracks = json.dumps(tracklist, default=lambda o: o.encode(), indent=4)
This will "crawl" down your class tree (if you have any child classes) and encode every object as a json list/object automatically. This should work with just about any class and is fast to type. You may also want to control which class parameters get encoded with something like:
def encode(self):
return {'name': self.name,
'code': self.code,
'amount': self.amount,
'minimum': self.minimum,
'maximum': self.maximum}
or a little bit faster to edit (if you're lazy like me):
def encode(self):
encoded_items = ['name', 'code', 'batch_size', 'cost',
'unit', 'ingredients', 'nutrients']
return {k: v for k, v in self.__dict__.items() if k in encoded_items}
full code:
import json
class Song:
def __init__(self, sname, sartist, coverart, albname, albartist, spotid):
self.sname = sname
self.sartist = sartist
self.coverart = coverart
self.albname = albname
self.albartist = albartist
self.spotid = spotid
def encode(self):
return self.__dict__
tracklist = [
Song('Imagine', 'John Lennon', None, None, None, None),
Song('Hey Jude', 'The Beatles', None, None, None, None),
Song('(I Can\'t Get No) Satisfaction', 'The Rolling Stones', None, None, None, None),
]
jsontracks = json.dumps(tracklist, default=lambda o: o.encode(), indent=4)
print(jsontracks)
output:
[
{
"sname": "Imagine",
"sartist": "John Lennon",
"coverart": null,
"albname": null,
"albartist": null,
"spotid": null
},
{
"sname": "Hey Jude",
"sartist": "The Beatles",
"coverart": null,
"albname": null,
"albartist": null,
"spotid": null
},
{
"sname": "(I Can't Get No) Satisfaction",
"sartist": "The Rolling Stones",
"coverart": null,
"albname": null,
"albartist": null,
"spotid": null
}
]

Saving list data in python

I am wondering how I can save whatever I added to a list when I close a python file. For example, in this "my contact" program that I wrote below, if I add information about 'Jane Doe', what could I do so that next time I open up the same file, Jane Doe still exists.
def main():
myBook = Book([{"name": 'John Doe', "phone": '123-456-7890', "address": '1000 Constitution Ave'}])
class Book:
def __init__(self, peoples):
self.peoples = peoples
self.main_menu()
def main_menu(self):
print('Main Menu')
print('1. Display Contact Names')
print('2. Search For Contacts')
print('3. Edit Contact')
print('4. New Contact')
print('5. Remove Contact')
print('6. Exit')
self.selection = input('Enter a # form the menu: ')
if (self.selection == "1"):
self.display_names()
if (self.selection == "2"):
self.search()
if (self.selection == "3"):
self.edit()
if (self.selection == "4"):
self.new()
if (self.selection == "5"):
self.delete()
if (self.selection == "6"):
self.end()
def display_names(self):
for people in self.peoples:
print("Name: " + people["name"])
self.main_menu()
def search(self):
searchname = input('What is the name of your contact: ')
for index in range(len(self.peoples)):
if (self.peoples[index]["name"] == searchname):
print("Name: " + self.peoples[index]["name"])
print("Address: " + self.peoples[index]["address"])
print("Phone: " + self.peoples[index]["phone"])
self.main_menu()
def edit(self):
searchname = input('What is the name of the contact that you want to edit: ')
for index in range(len(self.peoples)):
if (self.peoples[index]["name"] == searchname):
self.peoples.pop(index)
name = input('What is your name: ')
address = input('What is your address: ')
phone = input('What is your phone number: ')
self.peoples.append({"name": name, "phone": phone, "address": address})
self.main_menu()
def new(self):
name = input('What is your name: ')
address = input('What is your address: ')
phone = input('What is your phone number: ')
self.peoples.append({"name": name, "phone": phone, "address": address})
self.main_menu()
def delete(self):
searchname = input('What is the name of the contact that you want to delete: ')
for index in reversed(range(len(self.peoples))):
if (self.peoples[index]["name"] == searchname):
self.peoples.pop(index)
print(searchname, 'has been removed')
self.main_menu()
def end(self):
print('Thank you for using the contact book, have a nice day')
print('Copyright Carson147 2019©, All Rights Reserved')
main()
Use a module from the Data Persistence section of the standard library, or save it as json, or as a csv file.
You just convert your list to array inside in function .
np.save('path/to/save', np.array(your_list))
to load :
arr=np.load(''path/to/save.npy').tolist()
I hope it will be helpful
There are innumerable kinds of serialization options, but a time-tested favorite is JSON. JavaScript Object Notation looks like:
[
"this",
"is",
"a",
"list",
"of",
"strings",
"with",
"a",
{
"dictionary": "of",
"values": 4,
"an": "example"
},
"can strings be single-quoted?",
false,
"can objects nest?",
{
"I": {
"Think": {
"They": "can"
}
}
}
]
JSON is widely used, and the Python stdlib has a method of converting objects to and from JSON in the json package.
>>> import json
>>> data = ['a', 'list', 'full', 'of', 'entries']
>>> json.dumps(data) # dumps will dump to string
["a", "list", "full", "of", "entries"]
You can then save your Book data to json before the program shuts down, and read from json after it starts up.
# at the top
import json
from pathlib import Path
# at the bottom of your program:
if __name__ == '__main__':
persistence = Path('book.json')
if persistence.exists():
with persistence.open() as f:
data = json.load(f)
else:
data = [{"name": 'John Doe', "phone": '123-456-7890', "address": '1000 Constitution Ave'}]
book = Book(data)
with persistence.open('w') as f:
json.dump(f, indent=4)
There is no way you can do that without any external modules, such as numpy or pickle. Using pickle, you can do this: (I am assuming you want to save the myBook variable)
import pickle
pickle.dump(myBook, open("foo.bar", "wb")) #where foo is name of file and bar is extension
#also wb is saving type, you can find documentation online
To load:
pickle.load(myBook, open("foo.bar", "rb"))
EDIT:
I was wrong in my first statement. There is a way to save without importing a module. Here is how:
myBook.save(foo.bar) #foo is file name and bar is extention
To load:
myBook=open(foo.bar)
As evinced by the many other answers, there are many ways to do this, but I thought it was helpful to have a example.
By changing the top of your file as so, you can use the shelve module.
There are a variety of other things you can fix in your code if you are curious, you could try https://codereview.stackexchange.com/ if you want more feedback.
import shelve
def main():
default = [
{'name': 'John Doe', 'phone': '123-456-7890',
'address': '1000 Constitution Ave'}
]
with Book('foo', default=default) as myBook:
myBook.main_menu()
class Book:
def __init__(self, filename, default=None):
if default is None:
default = []
self._db = shelve.open(filename)
self.people = self._db.setdefault('people', default)
def __enter__(self):
return self
def __exit__(self):
self._db['people'] = self.people
self._db.close()

How do I parse the people's first and last name in Python?

So basically I need to parse a name and find the following info:
First Name
First Initial (if employee has initials for a first name like D.J., use both initials)
Last Name (include if employee has a suffix such as Jr. or III.)
So here's the interface I'm working with:
Input:
names = ["D.J. Richies III", "John Doe", "A.J. Hardie Jr."]
for name in names:
print parse_name(name)
Expected Output:
{'FirstName': 'D.J.', 'FirstInitial': 'D.J.', 'LastName': 'Richies III' }
{'FirstName': 'John', 'FirstInitial': 'J.', 'LastName': 'Doe' }
{'FirstName': 'A.J.', 'FirstInitial': 'A.J.', 'LastName': 'Hardie Jr.' }
Not really good at Regex, and actually that's probably overkill for this. I'm just guessing:
if name[1] == ".": # we have a name like D.J.?
I found this library quite useful for parsing names. https://code.google.com/p/python-nameparser/
It can also deal with names that are formatted Lastname, Firstname.
There is no general solution and solution will depend on the constraints you put. For the specs you have given here is a simple solution which gives exactly what you want
def parse_name(name):
fl = name.split()
first_name = fl[0]
last_name = ' '.join(fl[1:])
if "." in first_name:
first_initial = first_name
else:
first_initial = first_name[0]+"."
return {'FirstName':first_name, 'FirstInitial':first_initial, 'LastName':last_name}
names = ["D.J. Richies III", "John Doe", "A.J. Hardie Jr."]
for name in names:
print parse_name(name)
output:
{'LastName': 'Richies III', 'FirstInitial': 'D.J.', 'FirstName': 'D.J.'}
{'LastName': 'Doe', 'FirstInitial': 'J.', 'FirstName': 'John'}
{'LastName': 'Hardie Jr.', 'FirstInitial': 'A.J.', 'FirstName': 'A.J.'}
Well, for your simple example names, you can do something like this.
# This separates the first and last names
name = name.partition(" ")
firstName = name[0]
# now figure out the first initial
# we're assuming that if it has a dot it's an initialized name,
# but this may not hold in general
if "." in firstName:
firstInitial = firstName
else:
firstInitial = firstName[0] + "."
lastName = name[2]
return {"FirstName":firstName, "FirstInitial":firstInitial, "LastName": lastName}
I haven't tested it, but a function like that should do the job on the input example you provided.
This is basically the same solution as the one Anurag Uniyal provided, only a little more compact:
import re
def parse_name(name):
first_name, last_name = name.split(' ', 1)
first_initial = re.search("^[A-Z.]+", first_name).group()
if not first_initial.endswith("."):
first_initial += "."
return {"FirstName": first_name,
"FirstInitial": first_initial,
"LastName": last_name}

Categories

Resources