Merge jpg links into pdf python - python

I'm trying to use kissmanga api to add mangas to my website.
This is the code:
from kissmanga import get_search_results, get_manga_details, get_manga_episode, get_manga_chapter
manga_search = get_search_results(query="Attack on titan")
for k in manga_search:
titleManga=(k.get('title' ))
for k in manga_search:
IdManga=(k.get('mangaid' ))
for k in manga_search:
print(titleManga)
manga_chapter = get_manga_chapter(mangaid=IdManga, chapNumber=1)
print(manga_chapter)
However, when I print manga_chapter I get:
{'totalPages': "['https://cdn.mangaclash.com/manga_5f3c9f1374eb8/237848eb4cd4b762b981f4e863a3edf9/1.jpg', 'https://cdn.mangaclash.com/manga_5f3c9f1374eb8/237848eb4cd4b762b981f4e863a3edf9/2.jpg', 'https://cdn.mangaclash.com/manga_5f3c9f1374eb8/237848eb4cd4b762b981f4e863a3edf9/3.jpg', 'https://cdn.mangaclash.com/manga_5f3c9f1374eb8/237848eb4cd4b762b981f4e863a3edf9/4.jpg', ']"}
How would I go about combining those jpgs into a pdf that a user can later download from my site? (so sends pdf to user and then deletes it to save space)?
I tried separating them individually with json but no luck, still kinda newish to python.

Related

How do I download PDF invoices from stripe.com for previous year?

I need to download all invoices from stripe.com for the past year for accounting purposes. I didn't find a button for that, and when I contacted stripe.com support, they said it's not possible and I should use API if I can.
I found this page, but it wasn't working. I didn't want to spend that much time on it, as I was sure this is a common use case and why fintech unicorn would not support this simple use case. Well, so I wrote a Python script for that and sharing it here. As I spend some time on it, I am sharing it here in the hope to be useful to somebody else as well.
These are the steps to create a new Stripe API Key:
Log in to your Stripe dashboard.
Got to the API Keys section.
Click on Create secret key or Create restricted key (recommended).
If you chose to use a restricted key, select the Invoices -> Read permission.
Hit Save, copy the Secret key and paste it in the STRIPE_KEY.
import os
import arrow
import requests
STRIPE_KEY = "{digrin.com}"
SAVE_PATH = "./Invoices/"
import stripe
def get_invoices(year):
last_item_id = None
result = []
while True:
invoices = stripe.Invoice.list(
api_key=STRIPE_KEY,
status='paid',
created={'gte': int(arrow.get(f"{year}-01-01").timestamp()), 'lte': int(arrow.get(f"{year}-12-31 23:59").timestamp())},
limit=100,
starting_after=last_item_id,
)
for invoice in invoices['data']:
result.append({"number": invoice['number'], "url": invoice['invoice_pdf']})
if not invoices['data']:
break
last_item_id = invoices['data'][-1]['id']
return result
if __name__ == "__main__":
invoices = get_invoices(2022)
print(f"There are {len(invoices)} invoices.")
for invoice in invoices:
with open(f"{SAVE_PATH}{invoice['number']}.pdf", "wb") as f:
f.write(requests.get(invoice['url']).content)
print(f"Saved file {invoice['number']}.pdf")
# check count
file_list = os.listdir(SAVE_PATH)
pdf_list = [file for file in file_list if file.endswith(".pdf")]
if len(pdf_list) != len(invoices):
print(f"WARNING: There are {len(invoices)} invoices but {len(pdf_list)} pdf files in the directory.")
else:
print(f"There are {len(pdf_list)} files in the directory, matches stripe response.")

How to combine boto3 python code and get one csv output

I have written two codes to get some details about EC2, the reason written two code, I am not able to get the 'ComputerName' in EC2 describe_instance, so I have created separate code using boto3 client SSM get the 'ComputerName'. Now I tried to combine both codes into single code and get the output in single csv with separate columns and rows, someone help me with the below code to get the single csv output. Also please find the sample output.
import boto3
import csv
profiles = ['Dev_Databases','Dev_App','Prod_Database','Prod_App']
########################EC2-Details################################
csv_ob=open("EC2-Inventory.csv","w" ,newline='')
csv_w=csv.writer(csv_ob)
csv_w.writerow(["S_NO","profile","Instance_Id",'Instance_Type','Platform','State','LaunchTime','Privat_Ip'])
cnt=1
for ec2 in profiles:
aws_mag_con=boto3.session.Session(profile_name=ec2)
ec2_con_re=aws_mag_con.resource(service_name="ec2",region_name="ap-southeast-1")
for each in ec2_con_re.instances.all():
print(cnt,ec2,each.instance_id,each.instance_type,each.platform,each.state,each.launch_time.strftime("%Y-%m-%d"),each.private_ip_address,)
csv_w.writerow([cnt,ec2,each.instance_id,each.instance_type,each.platform,each.state,each.launch_time.strftime("%Y-%m-%d"),each.private_ip_address])
cnt+=1
csv_ob.close()
#######################HostName-Details###########################
csv_ob1=open("Hostname-Inventory.csv","w" ,newline='')
csv_w1=csv.writer(csv_ob1)
csv_w1.writerow(["S_NO",'Profile','InstanceId','ComputerName','PlatformName'])
cnt1=1
for ssm in profiles:
session = boto3.Session(profile_name=ssm)
ssm_client=session.client('ssm', region_name='ap-southeast-1')
paginator = ssm_client.get_paginator('describe_instance_information')
response_iterator = paginator.paginate(Filters=[{'Key': 'PingStatus','Values': ['Online']}])
for item in response_iterator:
for instance in item['InstanceInformationList']:
if instance.get('PingStatus') == 'Online':
InstanceId = instance.get('InstanceId')
ComputerName = instance.get('ComputerName')#.replace(".WORKGROUP", "")
PlatformName = instance.get('PlatformName')
print(InstanceId,ComputerName,PlatformName)
csv_w1.writerow([cnt1,ssm,InstanceId,ComputerName,PlatformName])
cnt1+=1
csv_ob1.close()
Sample Output Below:

Python Reddit API converting gifv to readable mp4

I am completely stuck as when dabbling in Reddit's API aka Praw I wanted to learn to save the number 1 hottest post as an mp4 however Reddit saves all of their gifs on Imgur which convert all gifs to gifv, how would I go around converting the gifv to mp4 so I can read them? Btw simply renaming it seems to lead to corruption.
This is my code so far: (details have been xxxx'd for confidentiality)
reddit = praw.Reddit(client_id ="xxxx" , client_secret ="xxxx", username = "xxxx", password ="xxxx", user_agent="xxxx")
subreddit = reddit.subreddit("dankmemes")
hot_dm = subreddit.hot(limit=1);
for sub in hot_dm:
print(sub)
url = sub.url
print(url)
print(sub.permalink)
meme = requests.get(url)
newF = open("{}.mp4".format(sub), "wb") #here the file is created but when played is corrupted
newF.write(meme.content)
newF.close()
Some posts already have an mp4 conversion inside the preview > variants portion of the json response.
Therefore to download only those posts that have a gif and therefore have an mp4 version you could do something like this:
subreddit = reddit.subreddit("dankmemes")
hot_dm = subreddit.hot(limit=10)
for sub in hot_dm:
if sub.selftext == "": # check that the post is a link to some content (image/video/link)
continue
try: # try to access variants and catch the exception thrown
has_variants = sub.preview['images'][0]['variants'] # variants contain both gif and mp4 versions (if available)
except AttributeError:
continue # no conversion available as variants doesn't exist
if 'mp4' not in has_variants: # check that there is an mp4 conversion available
continue
mp4_video = has_variants['mp4']['source']['url']
print(sub, sub.url, sub.permalink)
meme = requests.get(mp4_video)
with open(f"{sub}.mp4", "wb") as newF:
newF.write(meme.content)
Though you are most likely going to want to increase the limit of posts that you look through when searching through hot as the first post may be a pinned post (usually some rules about the subreddit), this is why I initially checked the selftext. In addition, there may be other posts that are only images, therefore with a small limit you might not return any posts that could be converted to mp4s.

unotools insert image into document (libreoffice)

I'm trying to insert an image into a libreoffice document that is handled/controlled by unotools.
Therefore I start LibreOffice with this command:
soffice --accept='socket,host=localhost,port=8100;urp;StarOffice.Service'
Inside my python code I can connect to LibreOffice:
from unotools import Socket, connect
from unotools.component.writer import Writer
context = connect(Socket('localhost', 8100))
writer = Writer(context)
(This code is taken from this documentation: https://pypi.org/project/unotools/)
By using writer.set_string_to_end() I can add some text to the document. But I also want to insert an image into the document. So far I couldn't find any resource where this was done. The image is inside of my clipboard, so ideally I want to insert the image directly from there. Alternatively I can save the image temporarily and insert the saved file.
Is there any known way how to insert images by using unotools? Any alternative solution would also be great.
I've found a way to insert images by using uno instead of unotools:
import uno
from com.sun.star.awt import Size
from pythonscript import ScriptContext
def connect_to_office():
if not 'XSCRIPTCONTEXT' in globals():
localContext = uno.getComponentContext()
resolver = localContext.ServiceManager.createInstanceWithContext(
'com.sun.star.bridge.UnoUrlResolver', localContext )
client = resolver.resolve("uno:socket,host=localhost,port=8100;urp;StarOffice.ComponentContext" )
global XSCRIPTCONTEXT
XSCRIPTCONTEXT = ScriptContext(client, None, None)
def insert_image(doc):
size = Size()
path = uno.systemPathToFileUrl('/somepath/image.png')
draw_page = self.doc.DrawPage
image = doc.createInstance( 'com.sun.star.drawing.GraphicObjectShape')
image.GraphicURL = path
draw_page.add(image)
size.Width = 7500
size.Height = 5000
image.setSize(size)
image.setPropertyValue('AnchorType', 'AT_FRAME')
connect_to_office()
doc = XSCRIPTCONTEXT.getDocument()
insert_image(doc)
sources:
https://ask.libreoffice.org/en/question/38844/how-do-i-run-python-macro-from-the-command-line/
https://forum.openoffice.org/en/forum/viewtopic.php?f=45&t=80302
I still don't know how to insert an image from my clipboard, I worked around that problem by saving the image first. If someone knows a way to insert the image directly from the clipboard that would still be helpful.

How to fill PDF forms using Python

I have a PDF form created using Adobe LiveCycle Designer ES 10.4. I need to fill it using Python so that we can reduce manual labor. I searched the web and read some article most of them were focused around pdfrw library, I tried using it and extracted some information from PDF form as shown below
Code
from pdfrw import PdfReader
pdf = PdfReader('sample.pdf')
print(pdf.keys())
print(pdf.Info)
print(pdf.Root.keys())
print('PDF has {} pages'.format(len(pdf.pages)))
Output
['/Root', '/Info', '/ID', '/Size']
{'/CreationDate': "(D:20180822164509+05'30')", '/Creator': '(Adobe LiveCycle Designer ES 10.4)', '/ModDate': "(D:20180822165611+05'30')", '/Producer': '(Adobe XML Form Module Library)'}
['/AcroForm', '/MarkInfo', '/Metadata', '/Names', '/NeedsRendering', '/Pages', '/Perms', '/StructTreeRoot', '/Type']
PDF has 1 pages
I am not sure how further I can use pdfrw to access the fillable fields from the PDF form and fill them using Python is it possible. Any suggestions would be helpful.
You can find the form fields here:
pdf.Root.AcroForm.Fields
or here
pdf.Root.Pages.Kids[page_index].Annots
This is a PdfArray object. Basically a List.
The Name of the field is found here:
pdf.Root.AcroForm.Fields[field_index].T
Other keys include the value .V
There's a bunch of display information, like the font etc under .AP.N.Resources
However, if you update the value for a field and output the pdf file. It might only display the value when the field has focus i.e is clicked on.
I haven't figured out how to fix that yet.
I wrote a library built upon:'pdfrw', 'pdf2image', 'Pillow', 'PyPDF2' called fillpdf (pip install fillpdf and poppler dependency conda install -c conda-forge poppler)
Basic usage:
from fillpdf import fillpdfs
fillpdfs.get_form_fields("blank.pdf")
# returns a dictionary of fields
# Set the returned dictionary values a save to a variable
# For radio boxes ('Off' = not filled, 'Yes' = filled)
data_dict = {
'Text2': 'Name',
'Text4': 'LastName',
'box': 'Yes',
}
fillpdfs.write_fillable_pdf('blank.pdf', 'new.pdf', data_dict)
# If you want it flattened:
fillpdfs.flatten_pdf('new.pdf', 'newflat.pdf')
More info here:
https://github.com/t-houssian/fillpdf
If some fields don't fill, you can use fitz (pip install PyMuPDF) and PyPDF2 (pip install PyPDF2) like the following altering the points as needed:
import fitz
from PyPDF2 import PdfFileReader
file_handle = fitz.open('blank.pdf')
pdf = PdfFileReader(open('blank.pdf','rb'))
box = pdf.getPage(0).mediaBox
w = box.getWidth()
h = box.getHeight()
# For images
image_rectangle = fitz.Rect((w/2)-200,h-255,(w/2)-100,h-118)
pages = pdf.getNumPages() - 1
last_page = file_handle[pages]
last_page._wrapContents()
last_page.insertImage(image_rectangle, filename=f'image.png')
# For text
last_page.insertText(fitz.Point((w/2)-247 , h-478), 'John Smith', fontsize=14, fontname="times-bold")
file_handle.save(f'newpdf.pdf')
Use this to fill every fields if they are indexed.
template = PdfReader('template.pdf')
page_c = 0
while page_c < len(template.Root.Pages.Kids): #LOOP through pages
annot_c = 0
while annot_c < len(template.Root.Pages.Kids[page_c].Annots): #LOOP through fields
template.Root.Pages.Kids[page_c].Annots[annot_c].update(PdfDict(V=str(annot_c)+'-'+str(page_c)))
annot_c=annot_c+1
page_c=page_c+1
PdfWriter().write('output.pdf', template)
AcroForm based Forms using PDFix SDK
def SetFormFieldValue(email, key, open_path, save_path):
pdfix = GetPdfix()
if pdfix is None:
raise Exception('Pdfix Initialization fail')
if not pdfix.Authorize(pdfix_email, pdfix_license):
raise Exception('Authorization fail : ' + pdfix.GetError())
doc = pdfix.OpenDoc(open_path, "")
if doc is None:
raise Exception('Unable to open pdf : ' + pdfix.GetError())
field = doc.GetFormFieldByName("Text1")
if field is not None:
value = field.GetValue()
value = "New Value"
field.SetValue(value)
if not doc.Save(save_path, kSaveFull):
raise Exception(pdfix.GetError())
doc.Close()
pdfix.Destroy()
A full solution was provided here: How to edit editable pdf using the pdfrw library?
The key part is the:
template_pdf.Root.AcroForm.update(pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))

Categories

Resources