How to pass multiple value in gitlab cicd variables - python

I have a multiple project in my gitlab repository wherein I do perform multiple commits when it requires.
I have develop a code in python through which I can get report of all the commits done by me in a csv format for all the projects available in gitlab repository as I have hard coded the the project ids in my python code as a LIST.
The Header of the csv file is : Date, submitted, gitlab_url, project, username, subject.
Now I want to run the pipeline manually by setting up an environment variable as 'Project_Ids'
and want to pass some of the project ids as value (More than one project id as a value) so that csv report should get generated for only these projects which has been passed as a value in environment variable.
so My question is , How can I pass multiple project ids as a value in 'Project_Ids' key while running the pipeline manually.
enter image description here
import gitlab
import os
import datetime
import csv
import re
Project_id_list = ['9427','8401','17937','26813','24899','23729','34779','27638','28600']
headerList = ['Date', 'Submitted', 'Gitlab_url', 'Project', 'Branch', 'Status', 'Username', 'Ticket', 'Subject']
filename = 'mydemo_{}'.format(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))
# private token authentication
gl = gitlab.Gitlab('https://main.gitlab.in.com/', private_token="MLyWwLyEhU2zZjjjhZXog")
gl.auth()
# list all projects
for m in Project_id_list:
i=0
if (i<len(Project_id_list)):
i=+1
print(m)
projects = gl.projects.get(m)
commits = projects.commits.list(all=True, query_parameters={'ref_name': 'master'})
with open(f"{filename}_{m}.csv", 'w', newline="") as file:
dw = csv.DictWriter(file, delimiter=',',
fieldnames=headerList)
dw.writeheader()
for commit in commits:
print(commit)
msg = commit.message
if 'master' in msg or 'LCS-' in msg:
projectName = projects.path_with_namespace
branch = 'master'
status = 'merged'
date = commit.committed_date.split('T')[0]
submitted1 = commit.created_at.split('T')[1]
submitted = submitted1.split('.000')[0]
Gitlab_url = commit.web_url.split('-')[0]
username = commit.author_name
subject = commit.title
subject1 = commit.message.splitlines()
print(subject1)
subject2 = subject1[0:3]
print(subject2)
subject3 = ' '.join(subject2)
print(subject3)
match = re.search('S-\d+', subject3)
if match:
ticket = match.group(0)
ticket_url = 'https://.in.com/browse/' + str(ticket)
ticket1 = ticket_url
dw.writerow({'Date': date, 'Submitted': submitted, 'Gitlab_url': Gitlab_url, 'Project': projectName,
'Branch': branch, 'Status': status, 'Username': username, 'Ticket': ticket1,
'Subject': subject3})
else:
ticket1 = 'Not Found'
dw.writerow({'Date': date, 'Submitted': submitted, 'Gitlab_url': Gitlab_url, 'Project': projectName,
'Branch': branch, 'Status': status, 'Username': username, 'Ticket': ticket1,
'Subject': subject3})

Just use a space or some other delimiter in the variable value. For example, a string like 123 456 789
Then in Python, simply parse the variable. For example, using the string .split method to split on whitespace.
import os
...
project_ids_variable = os.environ.get('PROJECT_IDS', '') # '123 456 789'
project_ids = project_ids_variable.split() # ['123', '456', '789']
for project_id in project_ids:
project = gl.projects.get(project_id)
print(project)

Related

Cannot access custom column values in To-do tasks via MS Graph API using Python

I have created custom columns "VESSEL NAME", "VOYAGE NUMBER", "ETD" and "CUT-OFF" in my Outlook To-do task as shown on the pic below.
Outlook tasks snapshot
I need to access values in those columns via MS Graph API, but have had no luck so far.
Not sure if I am moving in the right direction, but I have added an openTypeExtension named "ZZZ" to my task as a test. I can retrieve it via the 'GET' method, but cannot locate it anywhere in Outlook hoping to find it amongst custom columns or other task fields.
Here is the Python code:
# In[1]:
import json
import requests
# In[2]:
token = json.load(open('ms_graph_state.jsonc'))["access_token"]
header = {'Authorization':'Bearer '+token}
header1 = {'Authorization':'Bearer '+token,'Content-Type':'application/json'}
base_url = 'https://graph.microsoft.com/v1.0/me/'
# In[3]:
task_list_id = requests.get(base_url+'todo/lists/',headers=header).json()['value'][1]['id']
task_list = base_url+'todo/lists/'+task_list_id
task_id = requests.get(task_list+'/tasks/',headers=header).json()['value'][0]['id']
# In[4]:
payload = {"#odata.type" : "microsoft.graph.openTypeExtension","extensionName" : "ZZZ","xxx" : "yyy"}
# In[5]:
create_oe = requests.post(task_list+'/tasks/'+task_id+'/extensions',headers=header1,json=payload).json()
# In[6]:
oe = requests.get(task_list+'/tasks/'+task_id+'/extensions/ZZZ',headers=header1).json()
oe
'''
Output:
{'#odata.context': "https://graph.microsoft.com/v1.0/$metadata#users('to-do-app%40outlook.co.nz')/todo/lists('AQMkADAwATZiZmYAZC0xNDM3LTZlYmMtMDACLTAwCgAuAAADtVcV-o2b90KtdxZu_nQLmgEA2HIj8QQFbES8Q4ESBpmcmgAAAgESAAAA')/tasks('AQMkADAwATZiZmYAZC0xNDM3LTZlYmMtMDACLTAwCgBGAAADtVcV-o2b90KtdxZu_nQLmgcA2HIj8QQFbES8Q4ESBpmcmgAAAgESAAAA2HIj8QQFbES8Q4ESBpmcmgAAAUeYHQAAAA%3D%3D')/extensions/$entity",
'extensionName': 'ZZZ',
'id': 'microsoft.graph.openTypeExtension.ZZZ',
'xxx': 'yyy'}
'''
# In[7]:
task = requests.get(task_list+'/tasks/'+task_id,headers=header).json()
task
'''
Output:
{'#odata.context': "https://graph.microsoft.com/v1.0/$metadata#users('to-do-app%40outlook.co.nz')/todo/lists('AQMkADAwATZiZmYAZC0xNDM3LTZlYmMtMDACLTAwCgAuAAADtVcV-o2b90KtdxZu_nQLmgEA2HIj8QQFbES8Q4ESBpmcmgAAAgESAAAA')/tasks/$entity",
'#odata.etag': 'W/"2HIj8QQFbES8Q4ESBpmcmgAAAa4dUQ=="',
'importance': 'normal',
'isReminderOn': False,
'status': 'notStarted',
'title': 'test-to-do-task',
'createdDateTime': '2021-08-14T20:14:22.5557165Z',
'lastModifiedDateTime': '2021-08-17T06:46:46.260686Z',
'id': 'AQMkADAwATZiZmYAZC0xNDM3LTZlYmMtMDACLTAwCgBGAAADtVcV-o2b90KtdxZu_nQLmgcA2HIj8QQFbES8Q4ESBpmcmgAAAgESAAAA2HIj8QQFbES8Q4ESBpmcmgAAAUeYHQAAAA==',
'body': {'content': '\r\n\r\n', 'contentType': 'text'}}
'''
Appreciate you help on this.
Thank you
AFAIK, this is currently not supported. Being said that, consider filing user voice for your specific scenario so it could be considered for future implementation.

PRAW Loop With HTTP Exceptions

I am using a Python script to loop through a list of subreddits and pull their posts. The list is long, however, and occassionally there will be 403, 404, etc. errors in there. I am attempting to bypass those which give errors, but have been unable to do so thus far. The code is below.
I am using a list of subreddits and praw to pull from them. However, the list is quite long and occasionally a subreddit on it will be deleted, resulting in an HTTP exception (403, 404, etc). My code is below, does anyone know a line or two I can put in to skip those which give errors?
df = pd.read_csv('reddits.csv', sep = ',')
df.head()
Submission = namedtuple('Submission', ['time', 'score', 'title', 'text', 'author', 'comments', 'url', 'domain', 'permalink', 'ups', 'downs', 'likes', 'crosspost', 'duplicates', 'views'])
data = []
for i in df.reddits:
subreddit = reddit.subreddit(i)
for submission in subreddit.new(limit=10):
time = datetime.utcfromtimestamp(submission.created_utc)
score = submission.score
title = submission.title
text = submission.selftext
author = submission.author
comments = submission.num_comments
url = submission.url
domain = submission.domain
permalink = submission.permalink
ups = submission.ups
downs = submission.downs
likes = submission.likes
crosspost = submission.num_crossposts
duplicates = submission.num_duplicates
views = submission.view_count
data.append(Submission(time, score, title, text, author, comments, url, domain, permalink, ups, downs, likes, crosspost, duplicates, views))
df = pd.DataFrame(data)
os.chdir('wd')
filename = i + str(datetime.now()) + '.csv'
df.to_csv(filename, index=False, encoding='utf-8')
You need to catch the exception, then you can continue
df = pd.read_csv('reddits.csv', sep = ',')
df.head()
Submission = namedtuple('Submission', ['time', 'score', 'title', 'text', 'author', 'comments', 'url', 'domain', 'permalink', 'ups', 'downs', 'likes', 'crosspost', 'duplicates', 'views'])
data = []
for i in df.reddits:
try:
subreddit = reddit.subreddit(i)
except HTTPError as e:
print(f"Got {e} retrieving {subreddit}")
continue # control passes back to next iteration of outer loop
for submission in subreddit.new(limit=10):
submission = Submission(
datetime.utcfromtimestamp(submission.created_utc),
submission.score,
submission.title,
submission.selftext,
submission.author,
submission.num_comments,
submission.url,
submission.domain,
submission.permalink,
submission.ups,
submission.downs,
submission.likes,
submission.num_crossposts,
submission.num_duplicates,
submission.view_count,
)
data.append(submission)
df = pd.DataFrame(data)
os.chdir('wd')
filename = i + str(datetime.now()) + '.csv'
df.to_csv(filename, index=False, encoding='utf-8')
also, unrelated: i is not a good name for the value; it traditionally stands for "index", which is not what is contained there. e would be the corresponding generic name, standing for "element", but reddit would be the idiomatic choice in python.

Appending data to previous row of a CSV using Python

I'm working on a Python script that takes Nessus data exported as CSV and removes duplicate data, however due to the way the exporting works results for different ports and protocols have their own unique row, even though all the other data in the row is the same. I need to remove these duplicates, but I want to keep the Port and Protocol column data and append it to the previous row.
Here is a very small CSV I'm using to test and build the script:
Screenshot of CSV File
As you can see all fields are the exact same apart from the port field and sometimes the protocol field will be different too, so I need to read both rows of the CSV file and then append the port like this: 80, 443 and the same with protocol: tcp, tcp
Then only save the one line to remove duplicate data, I have tried doing this by checking if there has already been an instance of the Plugin ID, however my output is only printing the second rows Port and Protocol.
protocollist = []
portlist = []
pluginid_list = []
multiple = False
with open(csv_file_input, 'rb') as csvfile:
nessusreader = csv.DictReader(csvfile)
for row in nessusreader:
pluginid = row['Plugin ID']
if pluginid != '':
pluginid_list.append(row['Plugin ID'])
print(pluginid_list)
count = pluginid_list.count(pluginid)
cve = row['CVE']
if count > 0:
protocollist.append(row['Protocol'])
print(protocollist)
portlist.append(row['Port'])
print(portlist)
print('Counted more than 1')
multiple = True
if multiple == True:
stringlist = ', '.join(protocollist)
newstring1 = stringlist
protocol = newstring1
stringlist2 = ', '.join(portlist)
newstring2 = stringlist2
port = newstring2
else:
protocol = row['Protocol']
port = row['Port']
cvss = row['CVSS']
risk = row['Risk']
host = row['Host']
name = row['Name']
synopsis = row['Synopsis']
description = row['Description']
solution = row['Solution']
seealso = row['See Also']
pluginoutput = row['Plugin Output']
with open(csv_file_output, 'w') as csvfile:
fieldnames = ['Plugin ID', 'CVE', 'CVSS', 'Risk', 'Host', 'Protocol', 'Port', 'Name', 'Synopsis', 'Description', 'Solution', 'See Also', 'Plugin Output']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({'Plugin ID': pluginid, 'CVE': cve, 'CVSS': cvss, 'Risk': risk, 'Host': host, 'Protocol': protocol, 'Port': port, 'Name': name, 'Synopsis': synopsis, 'Description': description, 'Solution': solution, 'See Also': seealso, 'Plugin Output': pluginoutput})
There are probably a few errors in the code as I've been trying different things, but just wanted to show the code I've been working on to give more context to the issue. This code works if the data is only as shown in the CSV as there are only two items, however I introduced a third set of data with a different Plugin ID and it then added that to the list also, probably due to the if statement being set to > 0.

Check whether an item with a certain label and description already exists on Wikidata by Pywikibot

I am looking for a way to find out whether an item with a certain label and description already exists on Wikidata. This task should be performed by the Pywikibot. I don't want my Bot to create a new item if it already exists. So far, my code looks like this:
...
def check_item_existence(self):
transcript_file = self.transcript_file
with open(transcript_file) as csvfile:
transcript_dict = csv.DictReader(csvfile, delimiter="\t")
for row in transcript_dict:
site = pywikibot.Site("en", "TillsWiki")
existing_item = pywikibot.ItemPage(site, row['Name'])
title = existing_item.title()
You can use the wbsearchentities api module from the Wikibase API. The code to check whether any item with specific English label exists in WikiData is:
from pywikibot.data import api
...
def wikiitemexists(label):
params = {'action': 'wbsearchentities', 'format': 'json',
'language': 'en', 'type': 'item', 'limit':1,
'search': label}
request = api.Request(site=acta_site, **params)
result = request.submit()
return True if len(result['search'])>0 else False
Notice that the labels in Wikidata are not unique and that API search for aliases as well.

Reading metadata with Python

For the past two days I have been scanning the Internet to try to find the solution to my problem. I have a folder of different files. They run the gambit of file types. I am trying to write a python script that will read the metadata from each file, if it exists. The intent is to eventually output the data to a file to compare with another program's metadata extraction.
I have found some examples where it worked for a very few number of the files in the directory. All the ways I have found have dealt with opening a Storage Container object. I am new to Python and am not sure what a Storage Container object is. I just know that most of my files error out when trying to use
pythoncom.StgOpenStorage(<File Name>, None, flags)
With the few that actually work, I am able to get the main metadata tags, like Title, Subject, Author, Created, etc.
Does anyone know a way other than Storage Containers to get to the metadata? Also, if there is an easier way to do this with another language, by all means, suggest it.
Thanks
You can use the Shell com objects to retrieve any metadata
visible in Explorer:
import win32com.client
sh=win32com.client.gencache.EnsureDispatch('Shell.Application',0)
ns = sh.NameSpace(r'm:\music\Aerosmith\Classics Live!')
colnum = 0
columns = []
while True:
colname=ns.GetDetailsOf(None, colnum)
if not colname:
break
columns.append(colname)
colnum += 1
for item in ns.Items():
print (item.Path)
for colnum in range(len(columns)):
colval=ns.GetDetailsOf(item, colnum)
if colval:
print('\t', columns[colnum], colval)
I decided to write my own answer as an attempt to combine and clarify the answers above (which heavily helped me solve my problems).
I'd say there are two approaches to this problem.
Situation 1: you know which metadata the file contains (which metadata you're interested in).
In this case, lets say you have a list of strings, which contains the metadata you're interested in. I assume here that these tags are correct (i.e. you're not interested in the number of pixels of a .txt file).
metadata = ['Name', 'Size', 'Item type', 'Date modified', 'Date created']
Now, using the code provided by Greedo and Roger Upole I created a function which accepts the file's full path and name separately and returns a dictionary containing the metadata of interest:
def get_file_metadata(path, filename, metadata):
# Path shouldn't end with backslash, i.e. "E:\Images\Paris"
# filename must include extension, i.e. "PID manual.pdf"
# Returns dictionary containing all file metadata.
sh = win32com.client.gencache.EnsureDispatch('Shell.Application', 0)
ns = sh.NameSpace(path)
# Enumeration is necessary because ns.GetDetailsOf only accepts an integer as 2nd argument
file_metadata = dict()
item = ns.ParseName(str(filename))
for ind, attribute in enumerate(metadata):
attr_value = ns.GetDetailsOf(item, ind)
if attr_value:
file_metadata[attribute] = attr_value
return file_metadata
# *Note: you must know the total path to the file.*
# Example usage:
if __name__ == '__main__':
folder = 'E:\Docs\BMW'
filename = 'BMW series 1 owners manual.pdf'
metadata = ['Name', 'Size', 'Item type', 'Date modified', 'Date created']
print(get_file_metadata(folder, filename, metadata))
Results with:
{'Name': 'BMW series 1 owners manual.pdf', 'Size': '11.4 MB', 'Item type': 'Foxit Reader PDF Document', 'Date modified': '8/30/2020 11:10 PM', 'Date created': '8/30/2020 11:10 PM'}
Which is correct, as I just created the file and I use Foxit PDF reader as my main pdf reader.
So this function returns a dictionary, where the keys are the metadata tags and the values are the values of those tags for the given file.
Situtation 2: you don't know which metadata the file contains
This is a somewhat tougher situation, especially in terms of optimality. I analyzed the code proposed by Roger Upole, and well, basically, he attempts to read metadata of a None file, which results in him obtaining a list of all possible metadata tags. So I thought it might just be easier to hardcopy this list and then attempt to read every tag. That way, once you're done, you'll have a dictionary containing all tags the file actually possesses.
Simply copy what I THINK is every possible metadata tag and just attempt to obtain all the tags from the file.
Basically, just copy this declaration of a python list, and use the code above (replace metadata with this new list):
metadata = ['Name', 'Size', 'Item type', 'Date modified', 'Date created', 'Date accessed', 'Attributes', 'Offline status', 'Availability', 'Perceived type', 'Owner', 'Kind', 'Date taken', 'Contributing artists', 'Album', 'Year', 'Genre', 'Conductors', 'Tags', 'Rating', 'Authors', 'Title', 'Subject', 'Categories', 'Comments', 'Copyright', '#', 'Length', 'Bit rate', 'Protected', 'Camera model', 'Dimensions', 'Camera maker', 'Company', 'File description', 'Masters keywords', 'Masters keywords']
I don't think this is a great solution, but on the other hand, you can keep this list as a global variable and then use it without needing to pass it to every function call. For the sake of completness, here is the output of the previous function using this new metadata list:
{'Name': 'BMW series 1 owners manual.pdf', 'Size': '11.4 MB', 'Item type': 'Foxit Reader PDF Document', 'Date modified': '8/30/2020 11:10 PM', 'Date created': '8/30/2020 11:10 PM', 'Date accessed': '8/30/2020 11:10 PM', 'Attributes': 'A', 'Perceived type': 'Unspecified', 'Owner': 'KEMALS-ASPIRE-E\\kemal', 'Kind': 'Document', 'Rating': 'Unrated'}
As you can see, the dictionary returned now contains all the metadata that the file contains.
The reason this works is because of the if statement:
if attribute_value:
which means that whenever an attribute is equal to None, it won't be added to the returning dictionary.
I'd underline that in case of processing many files it would be better to declare the list as a global/static variable, instead of passing it to the function every time.
The problem is that there are two ways that Windows stores file metadata. The approach you're using is suitable for files created by COM applications; this data is included inside the file itself. However, with the introduction of NTFS5, any file can contain metadata as part of an alternate data stream. So it's possible the files that succeed are COM-app created ones, and the ones that are failing aren't.
Here's a possibly more robust way of dealing with the COM-app created files: Get document summary information from any file.
With alternate data streams, it's possible to read them directly:
meta = open('myfile.ext:StreamName').read()
Update: okay, now I see none of this is relevant because you were after document metadata and not file metadata. What a difference clarity in a question can make :|
Try this: How to retrieve author of a office file in python?
Windows API Code Pack may be used with Python for .NET to read/write file metadata.
Download the NuGet packages for WindowsAPICodePack-Core and
WindowsAPICodePack-Shell
Extract the .nupkg files with a compression utility like 7-Zip to the script's path or someplace defined in the system path variable.
Install Python for .NET with pip install pythonnet.
Example code to get and set the title of an MP4 video:
import clr
clr.AddReference("Microsoft.WindowsAPICodePack")
clr.AddReference("Microsoft.WindowsAPICodePack.Shell")
from Microsoft.WindowsAPICodePack.Shell import ShellFile
# create shell file object
f = ShellFile.FromFilePath(r'movie..mp4')
# read video title
print(f.Properties.System.Title.Value)
# set video title
f.Properties.System.Title.Value = 'My video'
Hack to check available properties:
dir(f.Properties.System)
Roger Upole's answer helped immensely. However, I also needed to read the "last saved by" detail in an ".xls" file.
XLS file attributes can be read with win32com. The Workbook object has a BuiltinDocumentProperties.
https://gist.github.com/justengel/87bac3355b1a925288c59500d2ce6ef5
import os
import win32com.client # Requires "pip install pywin32"
__all__ = ['get_xl_properties', 'get_file_details']
# https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.tools.excel.workbook.builtindocumentproperties?view=vsto-2017
BUILTIN_XLS_ATTRS = ['Title', 'Subject', 'Author', 'Keywords', 'Comments', 'Template', 'Last Author', 'Revision Number',
'Application Name', 'Last Print Date', 'Creation Date', 'Last Save Time', 'Total Editing Time',
'Number of Pages', 'Number of Words', 'Number of Characters', 'Security', 'Category', 'Format',
'Manager', 'Company', 'Number of Btyes', 'Number of Lines', 'Number of Paragraphs',
'Number of Slides', 'Number of Notes', 'Number of Hidden Slides', 'Number of Multimedia Clips',
'Hyperlink Base', 'Number of Characters (with spaces)']
def get_xl_properties(filename, xl=None):
"""Return the known XLS file attributes for the given .xls filename."""
quit = False
if xl is None:
xl = win32com.client.DispatchEx('Excel.Application')
quit = True
# Open the workbook
wb = xl.Workbooks.Open(filename)
# Save the attributes in a dictionary
attrs = {}
for attrname in BUILTIN_XLS_ATTRS:
try:
val = wb.BuiltinDocumentProperties(attrname).Value
if val:
attrs[attrname] = val
except:
pass
# Quit the excel application
if quit:
try:
xl.Quit()
del xl
except:
pass
return attrs
def get_file_details(directory, filenames=None):
"""Collect the a file or list of files attributes.
Args:
directory (str): Directory or filename to get attributes for
filenames (str/list/tuple): If the given directory is a directory then a filename or list of files must be given
Returns:
file_attrs (dict): Dictionary of {filename: {attribute_name: value}} or dictionary of {attribute_name: value}
if a single file is given.
"""
if os.path.isfile(directory):
directory, filenames = os.path.dirname(directory), [os.path.basename(directory)]
elif filenames is None:
filenames = os.listdir(directory)
elif not isinstance(filenames, (list, tuple)):
filenames = [filenames]
if not os.path.exists(directory):
raise ValueError('The given directory does not exist!')
# Open the com object
sh = win32com.client.gencache.EnsureDispatch('Shell.Application', 0) # Generates local compiled with make.py
ns = sh.NameSpace(os.path.abspath(directory))
# Get the directory file attribute column names
cols = {}
for i in range(512): # 308 seemed to be max for excel file
attrname = ns.GetDetailsOf(None, i)
if attrname:
cols[i] = attrname
# Get the information for the files.
files = {}
for file in filenames:
item = ns.ParseName(os.path.basename(file))
files[os.path.abspath(item.Path)] = attrs = {} # Store attributes in dictionary
# Save attributes
for i, attrname in cols.items():
attrs[attrname] = ns.GetDetailsOf(item, i)
# For xls file save special properties
if os.path.splitext(file)[-1] == '.xls':
xls_attrs = get_xl_properties(item.Path)
attrs.update(xls_attrs)
# Clean up the com object
try:
del sh
except:
pass
if len(files) == 1:
return files[list(files.keys())[0]]
return files
if __name__ == '__main__':
import argparse
P = argparse.ArgumentParser(description="Read and print file details.")
P.add_argument('filename', type=str, help='Filename to read and print the details for.')
P.add_argument('-v', '--show-empty', action='store_true', help='If given print keys with empty values.')
ARGS = P.parse_args()
# Argparse Variables
FILENAME = ARGS.filename
SHOW_EMPTY = ARGS.show_empty
DETAILS = get_file_details(FILENAME)
print(os.path.abspath(FILENAME))
for k, v in DETAILS.items():
if v or SHOW_EMPTY:
print('\t', k, '=', v)
I know this is an old question, but I had the same problem and ended up creating a package to solve my problem: windows-metadata.
An aside, Roger Upole's answer was a good starting point, however it doesn't capture all the attributes a file can have (the break if not colname ends the loop too soon, since Windows skips some column numbers, for whatever reason. So Roger's answer gives the first 30 or so attributes, when there are actually nearly 320).
Now, to answer the question using this package:
from windows_metadata import WindowsAttributes
attr = WindowsAttributes(<File Name>) # this will load all the filled attributes a file has
title = attr["Title"] # dict-like access
title = attr.title # attribute like access -> these two will return the same value
subject = attr.subject
author = attr.author
...
And so on for any available attributes a file has.

Categories

Resources