How to convert txt to excel file - python

I have a txt file looks like this:
Cellname;Ncellname;Technology
52822;13621;GSM;
52822;13622;GSM;
52822;13623;GSM;
52822;16322;UMTS;
52822;16323;UMTS;
52822;16324;UMTS;
52822;16361;UMTS;
52822;16362;UMTS;
52822;16363;UMTS;
I tried to convert it using the below code:
import pandas as pd
import os
excel = 'gsmrelation_mnm.txt'
df = pd.read_csv(os.path.join(os.path.dirname(__file__), excel))
df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1')
but I found this error:
Traceback (most recent call last):
File "C:/Users/haroo501/PycharmProjects/MyLiveRobo/convert_txt_csv.py", line 8, in <module>
df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1')
File "C:\Users\haroo501\PycharmProjects\MyLiveRobo\venv\lib\site-packages\pandas\core\generic.py", line 2250, in to_excel
formatter.write(
File "C:\Users\haroo501\PycharmProjects\MyLiveRobo\venv\lib\site-packages\pandas\io\formats\excel.py", line 730, in write
writer = ExcelWriter(_stringify_path(writer), engine=engine)
File "C:\Users\haroo501\PycharmProjects\MyLiveRobo\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 19, in __init__
from openpyxl.workbook import Workbook
ModuleNotFoundError: No module named 'openpyxl'
enter image description here
How to solve this problem

Try this:
import pandas as pd
excel = 'test.txt'
df = pd.read_csv(excel,sep=';')
column_indexes = list(df.columns)
df.reset_index(inplace=True)
df.drop(columns=df.columns[-1], inplace=True)
column_indexes = dict(zip(list(df.columns),column_indexes))
df.rename(columns=column_indexes, inplace=True)
df
and then
df.to_excel('output.xlsx', 'Sheet1')
and in case you don't want the indexes in the output sheet, use this
df.to_excel('output.xlsx', 'Sheet1', index=False)

import pandas as pd
file = pd.read_csv('input.txt', sep=';', index=False)
file.to_excel('output.xlsx', 'Sheet1')

I tried following,
finally what you expected
import pandas as pd
df = pd.read_csv('gsmrelation_mnm.txt', sep = ';', header=None, names=['Cellname', 'Ncellname', 'Technology'])
df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1', index=False, header=None)

Related

ValueError: cannot insert column, already exists

I have a column called Soru-TR. There are two Turkish data in the column. What I want to do is to translate the data in the Soru-TR column and then save it under the Soru-EN column.
The error I get in the output.
Traceback (most recent call last):
File "C:\Users\User1\Desktop\test.py", line 17, in <module>
df.insert(1, "Soru-EN", output2)
File "C:\Users\User1\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\frame.py", line 4443, in insert
raise ValueError(f"cannot insert {column}, already exists")
ValueError: cannot insert Soru-EN, already exists
before running the code
after running the code
import os
from typing import List
from openpyxl import load_workbook
import pandas as pd
from google.cloud import translate_v2
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r"C:\Users\User1\Desktop\translate-598740482087.json"
translate_client = translate_v2.Client()
target = "en"
df = pd.read_excel('file.xlsx')
for i in df.index:
x = df['Soru-TR'][i]
output = translate_client.translate(x, target_language=target)
output2 = output['translatedText']
df.insert(1, "Soru-EN", output2)
with pd.ExcelWriter('file.xlsx', mode='a', engine="openpyxl", if_sheet_exists='overlay') as writer:
df.to_excel(writer, sheet_name='Sayfa1', index=False)
with pd.ExcelWriter('file.xlsx', mode='a', engine="openpyxl", if_sheet_exists='overlay') as writer:
df.to_excel(writer, sheet_name='Sayfa1', index=False)
try this
df = pd.read_excel('file.xlsx')
df["Soru-EN"] = "Pending" # add the column "Soru-EN" with value "Pending" for all rows
for i in df.index:
x = df.loc[i,'Soru-TR'] # tip: using pandas.loc[] to access values is better.
output = translate_client.translate(x, target_language=target)
output2 = output['translatedText']
df.loc[i, "Soru-EN"] = output2 # assign the translated text to its equivalent cell.
Hope this helps.

exporting lists in python as excel rows, why it just export first row?

I try to export 2 specific lines from multiple (.txt) files in path as excel rows, and exporting them into excel files.. But the (.xlsx) file just containing first row (just exported lines of one text file).
import pandas as pd
import linecache
import xlsxwriter as xlsw
import os
import glob
directory=('C:\Users\john\Desktop')
os.chdir(directory)
files=glob.glob('*.txt')
for filename in files:
name = linecache.getline(filename,5)
id = linecache.getline(filename,13)
info = [name,id]
final_list = []
for i in info:
final_list.append(i.strip())
print (final_list)
df = pd.DataFrame(final_list)
df = df.transpose()
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='welcome',startrow=1,startcol=0, header=False, index=False)
writer.save()
Use:
import pandas as pd
import linecache
import xlsxwriter as xlsw
import os
import glob
directory=('test')
os.chdir(directory)
files=glob.glob('*.txt')
final_list = []
for filename in files:
name = linecache.getline(filename,5)
id = linecache.getline(filename,13)
info = [name,id]
final_list.append([i.strip() for i in info])
print (final_list)
df = pd.DataFrame(final_list)
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='welcome',startrow=1,startcol=0, header=False, index=False)
writer.save()

Read CSV file with Python function

I'm trying to write my read/write function to a csv, but it can't return any value.
I'm reading from a CSV, replacing the " ; " in the second column with " " and performing and saving the csv already handled.
But for some reason it doesn't save my csv, is my function wrong?
I'm starting out in the Python world, and I'm having a bit of trouble.
import pandas as pd
header_col = ['col0','col1','col2','col3','col4','col5','col6','col7','col8','col9']
df = pd.read_csv('myfile_<date>.csv', encoding="ISO-8859-1", sep=';', names=header_col, header=None)
def file_load(df):
df['col1'] = df['col1'].str.replace(';',' ')
df.drop(columns=['col8'], inplace=True)
df.drop(columns=['col9'], inplace=True)
return df
def save_file(dataframe):
df = dataframe
df.to_csv('myfile_<date>_treat.csv' ,sep=';', encoding='utf-8', index=False)
import pandas as pd
def file_load(df):
df['col1'] = str(df['col1']).replace(';',' ')
df.drop(columns=['col8'], inplace=True)
df.drop(columns=['col9'], inplace=True)
return df
def save_file(dataframe):
df = dataframe
df.to_csv('myfile_<date>_treat.csv' ,sep=',', encoding='utf-8',
index=False)
def main():
header_col=
['col0','col1','col2','col3','col4','col5','col6','col7','col8','col9']
df = pd.read_csv('myfile_<date>.csv', encoding="ISO-8859-1", sep=';',
names=header_col, header=None)
df1 = file_load(df)
save_file(df1)
if __name__ == '__main__':
main()

Append row to top of excel sheet python

How can I append a row at the top of an excel sheet? Goal as follows:
The file itself is written by using pandas.df.to_excel as follows:
import pandas
with pandas.ExcelWriter(output_filename) as writer:
for file in files:
df = pandas.read_csv(file)
df.to_excel(writer, sheet_name=file.replace(".csv", "").replace("_", " ").title(), index=False)
Here is one way to do it using XlsxWriter as the Excel engine:
with pandas.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
for file in files:
df = pandas.read_csv(file)
sheet_name = file.replace(".csv", "").replace("_", " ").title()
df.to_excel(writer, sheet_name=sheet_name, index=False, startrow=1)
worksheet = writer.sheets[sheet_name]
worksheet.write('A1', 'Here is some additional text')
You can use openpyxl to edit your Excel file afterwards:
import contextlib
import openpyxl
import pandas as pd
new_row = "THIS ROW IS APPENDED AFTER THE FILE IS WRITTEN BY PANDAS"
with contextlib.closing(openpyxl.open(output_filename)) as wb:
for file in files:
sheet_name = file.replace(".csv", "").replace("_", " ").title()
sheet = wb[sheet_name]
sheet.insert_rows(0)
sheet["A1"] = new_row
wb.save(output_filename)

AttributeError: 'Worksheet' object has no attribute 'set_column'

I am getting an error that seems... wrong. Because of course worksheet object has set_column() as a function, it's in the docs. I've probably done something dumb like drop a parenthesis.
Here's the error:
Traceback (most recent call last):
File "scrubaddresses.py", line 137, in <module>
run()
File "scrubaddresses.py", line 118, in run
format_col_width(worksheet)
File "scrubaddresses.py", line 24, in auto_format_cell_width
ws.set_column('B:C', 20)
AttributeError: 'Worksheet' object has no attribute 'set_column'
Here's my ridiculous import. Config is some constants, controller has some helper functions.
from smartystreets_python_sdk import StaticCredentials, exceptions, Batch, ClientBuilder
from smartystreets_python_sdk.us_street import Lookup as StreetLookup
from pathlib import Path
import pandas as pd
import numpy as np
import config
from controller import getExcel, clean
The func in question:
def format_col_width(ws):
ws.set_column('B:C', 20)
ws.set_column('D', 1)
ws.set_column('E', 20)
Where the ws being passed comes from:
df1 = df.replace(np.nan, '', regex=True)
print(df1)
df1.to_excel(writer, sheet, index = False, engine='xlsxwriter')
worksheet = writer.sheets[sheet]
format_col_width(worksheet)
Did I forget to import something? Xlsxwriter is installed.
The reason it gives: AttributeError: 'Worksheet' object has no attribute 'write'
This is because you have not installed xlsxwriter on your PC.
you can use:
pip install xlsxwriter
and it will work isa.
I had the same problem, the following worked for me:
def format_col_width(ws):
ws.column_dimensions['B'].width = 20
ws.column_dimensions['C'].width = 20
ws.column_dimensions['D'].width = 1
ws.column_dimensions['E'].width = 20
There is an error in the single column ranges. They should be D:D instead of D since the method needs a start and end column even if they are the same.
With that modification the code should work:
import pandas as pd
def format_col_width(ws):
ws.set_column('B:C', 20)
ws.set_column('D:D', 1)
ws.set_column('E:E', 20)
df = pd.DataFrame({'Data1': [10, 20, 30, 20, 15, 30, 45]})
writer = pd.ExcelWriter('pandas_test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format_col_width(worksheet)
writer.save()
Output:
Try the above code and see if it works. If it doesn't then XlsxWriter may not be installed and Pandas is defaulting to OpenPyXL.

Categories

Resources