Install System Fonts in Powershell (or python) - python

At the beginning, I simplly use the COM 0x14 to install. And it works when I test it.
But when I use it in my program, it doesn't works.
links = ['https://p-f-t.com/cdn/CaskaydiaCove.zip', 'https://p-f-t.com/cdn/FiraCode.zip']
print('Downloading Nerd Font'.center(45,'=') + '\n')
def downFile(url):
resp = requests.get(url)
file_name = os.path.basename(url)
file = open(file_name, 'wb')
file.write(resp.content)
file.close()
print('Download', file_name, 'done!')
return file_name
font_zip_list = []
for link in links:
font_zip_list.append(downFile(link))
print('Installing Nerd Font'.center(45,'=') + '\n')
font_inst_script = '''$FONTS = 0x14
$objShell = New-Object -ComObject Shell.Application
$objFolder = $objShell.Namespace($FONTS)'''
def unzip_inst(zipfile_name):
zip_file = zipfile.ZipFile(zipfile_name, 'r')
font_list = []
for i in zip_file.namelist():
zip_file.extract(i, os.getcwd())
if i.rfind(r'/') != len(i) - 1:
print('Extracted', i, 'from', zipfile_name)
font_list.append(i)
return font_list
font_list = []
for z in font_zip_list:
font_list += unzip_inst(z)
print(font_list)
for f in font_list:
font_inst_script += '\n$objFolder.CopyHere("{font_path}")'.format(font_path='./' + f)
script = open('install.ps1', 'w')
script.write(font_inst_script)
script.close()
os.system('powershell.exe ./install.ps1')
print(''.center(45,'=') + '\n')
I need a simple way to install fonts families on python or powershell.

Related

Getting UnicodeDecodeError

I getting this weird UnicodeDecodeError and
I don't know why this error is caused but it would be really nice if someone could help me out with this issue:)
Error message:
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 6456:character maps to
Full Error message as an screenshot
screenshot of the Error message
My code:
import os
import json
import random
import csv
from pydub import AudioSegment
file_path = '/path/to/file/.tsv '
save_json_path = '/path/where/you/want/the/jsons/saved'
def main(args):
data = []
directory = file_path.rpartition('/')[0]
percent = int(100)
with open(file_path) as f:
lenght = sum(1 for ine in f)
with open(file_path, newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter='\t')
index = 1
if(args.convert):
print(str(lenght) + "files found")
for row in reader:
file_name = row['path']
filename = file_name.rpartition('.')[0] + ".wav"
text = row['sentence']
if(args.convert):
data.append({
"key": directory + "/clips/" + filename,
"text": text
})
print("converting file " + str(index) + "/" + str(lenght) + " to wav", end="\r")
src = directory + "/clips/" + file_name
dst = directory + "/clips/" + filename
sound = AudioSegment.from_mp3(src)
sound.export(dst, format="wav")
index = index + 1
else:
data.append({
"key": directory + "/clips/" + file_name,
"text": text
})
random.shuffle(data)
print("creating JSON's")
f = open(save_json_path +"/"+ "train.json", "w")
with open(save_json_path +"/"+ 'train.json','w') as f:
d = len(data)
i=0
while(i<int(d-d/percent)):
r=data[i]
line = json.dumps(r)
f.write(line + "\n")
i = i+1
f = open(save_json_path +"/"+ "test.json", "w")
with open(save_json_path +"/"+ 'test.json','w') as f:
d = len(data)
i=int(d-d/percent)
while(i<d):
r=data[i]
line = json.dumps(r)
f.write(line + "\n")
i = i+1
print("Done!")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="""
Utility script to convert commonvoice into wav and create the training and test json files for speechrecognition. """
)
parser.add_argument('--convert', default=True, action='store_true',
help='says that the script should convert mp3 to wav')
args = parser.parse_known_args()
main(args)
It looks like you're getting this error in this block
with open(file_path) as f:
length = sum(1 for line in f)
In another post, though it doesn't have an accepted answer, this is shown to likely be because of the encoding of your file.
Try adding the encoding kwarg to open
with open(file_path, encoding="latin-1") as f:
length = sum(1 for line in f)

Problem opening up text file of the downloadPath with gedit

After downloading the dependencies from nexus, I have a download path for the data to be in, but I wasn't able to open the textile its not responding, why is this so?
for item in data["items"]:
for asset in item["assets"]:
fileurl = asset["downloadUrl"]
print(fileurl)
downloadPath = '/home/centos/'
filename = downloadPath + fileurl.split('/')[-1]# '\' for Windows
outfile = open(filename, "w")
outfile.write(str(urllib.request.urlopen(fileurl).read()))
outfile.close()
if data["continuationToken"] is None:
sys.exit()
else:
#construct pagination url and loop
url = baseurl + 'components?continuationToken=' + data["continuationToken"] + '&repository=' + downloadRepository
return

Can't Move / Delete PDF after processing with pdfrw

I've updated the question to contain the bulk of the code as I feel there may be some of it that is blocking each other...
Can be tested by simply adding a pdf file or two to your c:\temp folder (on windows).
I've just started with Python so may be missing basic stuff...
import glob
from datetime import datetime
from pathlib import Path
import PyPDF4
from pdfrw import PdfReader, PdfWriter
def safe_open_pdf(pdf):
pdf_reader = None
result = True
file = open(pdf, 'rb')
try:
pdf_reader = PyPDF4.PdfFileReader(file)
result = True
except:
# some older PDF files on my disk raise a missing EOF error, which cannot be handled by PyPDF4
print(pdf.split('\\')[-1] + " needs to be fixed")
result = False
if not result:
# if file had EOF error, I "rebuild" it with PdfReader and PdfWriter
x = PdfReader(pdf)
y = PdfWriter()
y.addpages(x.pages)
y.write(pdf)
pdf_reader = PyPDF4.PdfFileReader(file)
return pdf_reader
def move_processed_pdf(source_file):
Path(new_path).mkdir(parents=True, exist_ok=True)
print("Copying to " + new_path + new_file)
f = open(PDFFile, 'rb')
x = PdfReader(f)
y = PdfWriter()
y.addpages(x.pages)
y.write(new_path + new_file)
f.close()
# time.sleep(5)
Path(PDFFile).unlink()
if __name__ == '__main__':
relevant_path = 'C:\\temp\\'
file_count = 0
new_path = 'C:\\temp\\processed\\'
for PDFFile in glob.iglob(relevant_path + '*.pdf', recursive=True):
new_file = datetime.today().strftime('%Y-%m-%d') + PDFFile.split('\\')[-1]
print('Processing File: ' + PDFFile.split('\\')[-1])
pdfReader = safe_open_pdf(PDFFile)
file_count += 1
num_pages = pdfReader.numPages
print(num_pages)
page_count = 0
text = ''
while page_count < num_pages:
pageObj = pdfReader.getPage(page_count)
page_count += 1
text += pageObj.extractText()
# Main processing occurs here
move_processed_pdf(PDFFile)
the issue I get is PermissionError: [WinError 32] The process cannot access the file because it is being used by another process.
folders and files exist.
any ideas?

How to use a CFS_Config textfile to create path directories to auto generate text files in python?

Below is the data in CFS_Config.txt. What this textfile does is to know where the documents have stored and to avoid hardcodes in the program. For instance, if the program is moved to other environment, we only need to change the directory paths in the CFS_Config.txt file.
Folder Path = ../dataprep/source_documents
ED Notes name = ED Notes
ED Notes output = ../dataprep/ED_Notes
This below codes in a python file what it actually does is to read configuration from the CFS_Config.txt mentioned earlier and also to do an auto generated textfile.
The problem encountered is that they tell me the ../dataprep/ED_Notes path was not found. Please do take a look at the codes if need more codes I will try my best to provide, thanks!!! :((
from preprocessing import ednotes_extractor
import os
def read_config():
# open existing file to read configuration
cfs_config_txt = open("..\CFS_Config.txt", "r")
file_list = []
root_dir = ""
ednotes_name = ""
ednotes_output = ""
for line in cfs_config_txt:
file_list.append(line)
if "Folder Path = " in file_list[0]:
root_dir = str(file_list[0])
root_dir = root_dir.replace("Folder Path = ", "")
root_dir = root_dir.replace("\n", "")
if "ED Notes name = " in file_list[1]:
ednotes_name = str(file_list[1])
ednotes_name = ednotes_name.replace("ED Notes name = ", "")
ednotes_name = ednotes_name.replace("\n", "")
if "ED Notes output = " in file_list[2]:
ednotes_output = str(file_list[2])
ednotes_output = ednotes_output.replace("ED Notes output = ", "")
ednotes_output = ednotes_output + ".txt"
ednotes_output = ednotes_output.replace("\n", "")
return root_dir, ednotes_name, ednotes_output
def convert_txt(choices):
root_dir, ednotes_name, ednotes_output = read_config()
if(choices == 1):
# open new file to write string data textfile
text_file = open(ednotes_output, 'w', encoding='utf-8')
text_file.write("cat_id|content\n")
for filename in os.listdir(root_dir):
source_directory = root_dir + '/' + filename
arr = ednotes_extractor.get_ednotes(source_directory)
# open existing file to append the items in the array to the previously written textfile
text_file = open(ednotes_output, 'a', encoding='utf-8')
for item in arr:
text_file.write("%s\n" % item)
elif(choices==2):
print("Hi")

Having trouble into saving something to a csv file

My program does all that I want, but is not saving the final data to the csv file, I used a print before it to see if the data was right and it is, It is just not writing to the csv file, I'm using 'a' because I don't want it to rewrite what's already written, but it is still returning an error.
here's the part of the code:
soup = BeautifulSoup(answer)
for table in soup.findAll('table', {"class":"formTable"}):
for row in table.findAll('tr'):
#heading = row.find('td', {"class":"sectionHeading"})
#if heading is not None:
#print(heading.get_text());
#else:
label = row.find('td', {"class":"fieldLabel"})
data = row.find('td', {"class":"fieldData"})
if data is not None and label is not None:
csvline += label.get_text() + "," + data.get_text() + ","
print(csvline)
#csvline.encode('utf-8')
with open ('output_file_two.csv', 'a', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(csvline)
Here's the error:
Traceback (most recent call last):
File "C:\PROJECT\pdfs\final.py", line 95, in <module>
with open ('output_file_two.csv', 'a', encoding='utf-8') as f:
TypeError: 'encoding' is an invalid keyword argument for this function
Here's the entire program code in case of need
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
#import unicodecsv as csv
import csv
#import pickle
import requests
from robobrowser import RoboBrowser
import codecs
def rename_files():
file_list = os.listdir(r"C:\\PROJECT\\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\\PROJECT\\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
# Change this to your PDF file base directory
base_directory = 'C:\\PROJECT\\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
# If this is not a PDF file
if not file_name.endswith('.pdf'):
# Skip it
continue
file_path = os.path.join(dir_path, file_name)
# Convert your PDF to HTML here
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\\PROJECT'
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)", text)#To remove the names that appear, just remove the last (\S*), to add them is just add the (\S*), before it there was a \s*
print(match)
writer.writerow(match)
for item in match:
data = item.split('/')
case_number = data[0]
case_year = data[1]
csvline = case_number + ","
browser = RoboBrowser()
browser.open('http://www.pa.org.mt/page.aspx?n=63C70E73&CaseType=PA')
form = browser.get_forms()[0] # Get the first form on the page
form['ctl00$PageContent$ContentControl$ctl00$txtCaseNo'].value = case_number
form['ctl00$PageContent$ContentControl$ctl00$txtCaseYear'].value = case_year
browser.submit_form(form, submit=form['ctl00$PageContent$ContentControl$ctl00$btnSubmit'])
# Use BeautifulSoup to parse this data
answer = browser.response.text
#print(answer)
soup = BeautifulSoup(answer)
for table in soup.findAll('table', {"class":"formTable"}):
for row in table.findAll('tr'):
#heading = row.find('td', {"class":"sectionHeading"})
#if heading is not None:
#print(heading.get_text());
#else:
label = row.find('td', {"class":"fieldLabel"})
data = row.find('td', {"class":"fieldData"})
if data is not None and label is not None:
csvline += label.get_text() + "," + data.get_text() + ","
print(csvline)
with open ('output_file_two.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(csvline)
EDIT
It's working, here's the code working
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import unicodecsv as csv
import requests
from robobrowser import RoboBrowser
import codecs
def rename_files():
file_list = os.listdir(r"C:\\PROJECT\\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\\PROJECT\\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
base_directory = 'C:\\PROJECT\\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
if not file_name.endswith('.pdf'):
continue
file_path = os.path.join(dir_path, file_name)
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\\PROJECT'
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)", text)
print(match)
writer.writerow(match)
for item in match:
data = item.split('/')
case_number = data[0]
case_year = data[1]
csvline = case_number + ","
browser = RoboBrowser()
browser.open('http://www.pa.org.mt/page.aspx?n=63C70E73&CaseType=PA')
form = browser.get_forms()[0]
form['ctl00$PageContent$ContentControl$ctl00$txtCaseNo'].value = case_number
form['ctl00$PageContent$ContentControl$ctl00$txtCaseYear'].value = case_year
browser.submit_form(form, submit=form['ctl00$PageContent$ContentControl$ctl00$btnSubmit'])
answer = browser.response.text
soup = BeautifulSoup(answer)
for table in soup.findAll('table', {"class":"formTable"}):
for row in table.findAll('tr'):
label = row.find('td', {"class":"fieldLabel"})
data = row.find('td', {"class":"fieldData"})
if data is not None and label is not None:
csvline += label.get_text() + "," + data.get_text() + ","
print(csvline)
my_file = codecs.open('final_output.csv', 'a', 'utf-8')
my_file.write(csvline)
At the end there is a problem with your code
writer = csv.writer(f)
csv.writer(csvline) # here is the problem
See you initialize the writer, but then you don't use it.
writer = csv.writer(f)
writer.writerow(csvline)
Here :
with open ('output_file_two.csv', 'a') as f:
writer = csv.writer(f)
csv.writer (csvline)
You are instanciating a csv.writer, but not using it. This should read:
with open ('output_file_two.csv', 'a') as f:
writer = csv.writer(f)
writer.write(csvline)
Now there are quite a few other problems with your code, the first one being to manually create the 'csvline as text then using csv.writer to store it to file. csv.writer.write() expects a list of rows (tuples) and takes care of properly escaping what needs to be escaped, inserting the proper delimiters etc. It also has a writerow() method that takes a single tuple and so avoid building the whole list in memory FWIW.

Categories

Resources