How to get h1 text into existing excel table Python - python

I have some code that scrapes several tables from webpages and then puts the data into several excel files.
I want to also be able to add the company name at the bottom of the excel file. I have worked out how to get the name of the company using companyname = soup.find('h1').text as shown in the first code block below.
One of the excel files is generated from the following code lines:
all_data = {}
#for every table found on the page
for table in soup.select('table.BordCollapseYear2'):
table_name = table.find_previous('b').text
all_data[table_name] = []
#scrape for every row
for tr in table.select('tr'):
row = [td.get_text(strip=True, separator=' ') for td in tr.select('td')]
if len(row) == 7:
all_data[table_name].append(row)
companyname = soup.find('h1').text
with open('data2.csv', 'a', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in all_data:
spamwriter.writerow(row)
I tried adding the line writerows(companyname) this worked but it separated out each letter. So I think I am nearly there...

Put [] around companyname in writerow().
For example:
with open('data2.csv', 'a', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in all_data:
spamwriter.writerow(row)
spamwriter.writerow([companyname]) # <-- notice the `[]` around companyname

Related

How to rewrite only one field (line[n]) in csv but in same time be able to work with data?

I have a csv file with three lines and three columns here.
This is the csv file
At first I want to print all the lines.
Subsequently, for each of them, program check whether it is written in the second field(index 1) USA. If so, program will take the price from the third field and multiply it by two.
Now I need to rewrite this doubled price instead of 2000 (in line with the USA)
import csv
with open('countries.csv', 'r') as source:
reader = csv.reader(source)
writer = csv.writer(source)
for line in reader:
print(*line, sep=';')
with open('countries.csv', 'r') as source:
reader = csv.reader(source)
for line in reader:
if line[2] == "USA":
actual_price = int(line[2])
print(actual_price)
new_price = int(actual_price) * 2
print(new_price)
Someone has already advised me to use the creation of a new file.
But this causes problems when I want to work with the data in the file first.
import csv
import os
with open('countries.csv', mode='r') as oldfile, open(
'countries.tmp', mode='w', newline='') as newfile:
# define a reader and a writer
reader = csv.reader(oldfile, delimiter=';', quotechar='"')
writer = csv.writer(newfile, delimiter=';', quotechar='"',
quoting=csv.QUOTE_MINIMAL)
for line in reader:
print(*line, sep=';')
# copy everything changing the third field
for line in reader:
if line[2] == "USA":
actual_price = int(line[2])
print(actual_price)
new_price = int(actual_price) * 2
print(new_price)
for row in reader:
writer.writerow([row[0], row[1], ,new_price])
# ok, time to rename the file
os.replace('countries.tmp', 'countries.csv')
Thank you for answer
You are changing new_price at every iteration of your for loop. You should therefore be writing the row within the loop where you change the value:
with open('countries.csv', mode='r') as oldfile, open('countries.tmp', mode='w', newline='') as newfile:
reader = csv.reader(oldfile, delimiter=';', quotechar='"')
writer = csv.writer(newfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
price = int(row[2])
if row[1] == "USA":
price = price*2
writer.writerow([row[0], row[1], price])
os.replace('countries.tmp', 'countries.csv')

How to iterate through all rows and save the data

I am working on a Python script to do some steps.
Right now everything is working except the part that it has to go through all table rows and save each line as a JSON object. The problem is: It only saves the last line. So it's not saving the previous lines. I know where is the problem but don't know how to fix.
Here is the code:
url = 'http://website.com/group1.html'
htmlFile= urlopen(urlGroup1)
soup= BeautifulSoup(htmlGroup1, 'html.parser')
table = soup2.find_all("table", {"class": "sortable employeeList result-table"})[0]
rows = table.find_all('tr')
Filegroup1 = open('group1.csv', 'wt+')
Datagroup1 = csv.writer(Filegroup1)
jsonFilePath = r'group1.json'
def make_json(Filegroup1, jsonFilePath):
data2 = {}
with open('group1.csv', encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
data2 = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(simplejson.dumps([data2], indent=4))
try:
for row in rows:
FilteredRow = []
for cell in row.find_all(['td', 'th']):
FilteredRow.append(cell.get_text().strip())
Datagroup1.writerow(FilteredRow)
finally:
Filegroup1.close()
make_json(Filegroup1, jsonFilePath)
The issue is here:
for rows in csvReader:
data2 = rows
If I change it to the following, it will work!! But it will group each object by Employee ID. Which I don't want that.
for rows in csvReader:
key = rows['Employee Name']
data3[key] = rows

CSV created with Python is blank

This question was answered/resolved in the comments section.
I am a noob to Python and I wrote the code below thinking it would copy all the rows with "NY" as the state in the state field/column to a new csv file called "Output.csv".
import csv
f = open(r'C:\Users..\input.csv', 'r')
reader = csv.DictReader(f, delimiter=',')
output = open("C:...\Output.csv",'w')
fieldnames = ['firstScan', 'FinalScan', 'City', 'State', 'cld', 'daydiff']
writer = csv.DictWriter(output, fieldnames=fieldnames, delimiter=',')
for row in reader:
if row['State'] == 'NY':
writer.writerow(row)
Everything runs fine but the output csv is completely blank. The first tab is named "Output" but the sheet is blank. If I have it output to txt, that is blank as well. Any suggestions?
Try this instead:
import csv
with open('C:/Users/felasniper/Desktop/input.csv') as f:
reader = csv.DictReader(f, delimiter=',')
output = open("C:/Users/felasniper/Desktop/Output.csv", 'w')
fieldnames = ['firstScan', 'FinalScan', 'City', 'State', 'cld', 'daydiff']
writer = csv.DictWriter(output, fieldnames=fieldnames, delimiter=',')
for row in reader:
if row['State'] == 'NY':
writer.writerow(row)

Writing and reading to a csv file - I/O operation error

I need to create a file and write its header contents before the for loop. If I do it inside the loop, header content repeats for every iteration of the loop. When I execute the below code, I get the error "I/O operation on a closed file".
Is wondering there a work around for this issue? Any suggestions would be appreciated. TIA !!
csv_filename = "testfile4.csv"
with open(csv_filename, "wt") as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# CSV Header
header = ["Post ID", "Permalink", "Create Time"]
writer.writerow(header)
for group in getGroups():
feed = getValues(group["id"], group["name"])
with open(csv_filename, "a") as csvfile:
for item in feed:
row = [item["id"], item["permalink_url"], item["created_time"]]
writer.writerow(row)
You could process all necessary operations in single with statement
csv_filename = "testfile4.csv"
with open(csv_filename, "wt") as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
header = ["Post ID", "Permalink", "Create Time"]
writer.writerow(header)
for group in getGroups():
feed = getValues(group["id"], group["name"])
for item in feed:
row = [item["id"], item["permalink_url"], item["created_time"]]
writer.writerow(row)

Adding a column in csv using python

I have hundreds of .csv files with 40 rows and 34 columns each. I want to add a column at position 26 and column 26-34 should shift to make space for the new one. First row of the file is empty and second row has the titles and rest have the values. The new column should have a title in row two and rest of the rows can be zero.
Please help me with this code in python.
import csv
infilename = r'C:\Users\Sulabh Kumra\Desktop\input.csv'
outfilename = r'C:\Users\Sulabh Kumra\Desktop\output.csv'
with open(infilename, 'rb') as fp_in, open(outfilename, 'wb') as fp_out:
reader = csv.reader(fp_in, delimiter=",")
headers = next(reader) # read first row
writer = csv.writer(fp_out, delimiter=",")
writer.writerow(headers)
for row in reader:
row.append(row[2])
writer.writerow(row)
Inserting into a python list is pretty easy: some_list[2:2] = ['stuff','to','insert']
So your code would look like the following:
import csv
infilename = r'C:\Users\Sulabh Kumra\Desktop\input.csv'
outfilename = r'C:\Users\Sulabh Kumra\Desktop\output.csv'
with open(infilename, 'rb') as fp_in, open(outfilename, 'wb') as fp_out:
reader = csv.reader(fp_in, delimiter=",")
writer = csv.writer(fp_out, delimiter=",")
blank_line = next(reader)
writer.writerow(blank_line)
headers = next(reader) # read title row
headers[26:26] = ['New Label']
writer.writerow(headers)
for row in reader:
row[26:26] = [0]
writer.writerow(row)

Categories

Resources