Duplicate data store in database PYTHON - python

My current python script:
import ftplib
import hashlib
import httplib
import pytz
from datetime import datetime
import urllib
from pytz import timezone
import os.path, time
import glob
def ftphttp(cam_name):
for image in glob.glob(os.path.join('/tmp/image/*.png')):
ts = os.path.getmtime(image)
dt = datetime.fromtimestamp(ts, pytz.utc)
timeZone= timezone('Asia/Singapore')
localtime = dt.astimezone(timeZone).isoformat()
camid = cam_name(cam_name)
tscam = camid + localtime
ftp = ftplib.FTP('10.217.137.121','kevin403','S$ip1234')
ftp.cwd('/var/www/html/image')
m=hashlib.md5()
m.update(tscam)
dd=m.hexdigest()
x = httplib.HTTPConnection('10.217.137.121', 8086)
x.connect()
f = {'ts' : localtime}
x.request('GET','/camera/store?fn='+dd+'&'+urllib.urlencode(f)+'&cam='+cam_name(cam_name))
y = x.getresponse()
z=y.read()
x.close()
with open(image, 'rb') as file:
ftp.storbinary('STOR '+dd+ '.png', file)
ftp.quit()
Right now I'm able to send multiple files into another folder but the data that is store in the database is duplicated. Like example, when i store 3 files into the folder and then my database stored 6 data via httplib. Anybody got any ideas why the data is duplicated? HELP needed!

Related

Scraping from web site into HDFS

I'm trying to scrap data from website into HDFS, at first it was working well the scraping, and then I added the line of storing data into HDFS it's not working:
import requests
from pathlib import Path
import os
from datetime import date
from hdfs import InsecureClient
date= date.today()
date
def downloadFile(link, destfolder):
r = requests.get(link,stream=True)
filename="datanew1"+ str(date)+".xls"
downloaded_file = open(os.path.join(destfolder, filename), 'wb')
client= InsecureClient('http://hdfs-namenode.default.svc.cluster.local:50070', user='hdfs')
with client.download('/data/test.csv')
for chunk in r.iter_content(chunk_size=256):
if chunk:
downloaded_file.write(chunk)
link="https://api.worldbank.org/v2/fr/indicator/FP.CPI.TOTL.ZG?downloadformat=excel"
Path('http://hdfs-namenode.default.svc.cluster.local:50070/data').mkdir(parents=True, exist_ok=True)
downloadFile(link, 'http://hdfs-namenode.default.svc.cluster.local:50070/data')
There is no error in the code, just I can't found the data scraped!

How to append current date and time to file name?

I have a script to download a PDF from the internet and save it to a specific directory, how can I go about appending the date and time to the file name?
# Import all needed modules and tools
from fileinput import filename
import os
import os.path
from datetime import datetime
import urllib.request
import requests
# Disable SSL and HTTPS Certificate Warnings
import urllib3
urllib3.disable_warnings()
resp = requests.get('url.org', verify=False)
# Get current date and time
current_datetime = datetime.now()
print("Current date & time : ". current_datetime)
# Convert datetime obj to string
str_current_datetime = str(current_datetime)
# Download and name the PDF file from the URL
response= urllib.request.urlretrieve('url.pdf',
filename = 'my directory\civil.pdf')
# Save to the preferred directory
with open("my directory\civil.pdf", 'wb') as f: f.write(resp.content)
Use f-strings:
open(f"file - {datetime.now().strftime('%Y-%m-%D')}.txt", "w")
# will create a new file with the title: "file - Year-Month-Date.txt"
# then you can do whatever you want with it
f-string docs

How do I fix my code so that it is automated?

I have the below code that takes my standardized .txt file and converts it into a JSON file perfectly. The only problem is that sometimes I have over 300 files and doing this manually (i.e. changing the number at the end of the file and running the script is too much and takes too long. I want to automate this. The files as you can see reside in one folder/directory and I am placing the JSON file in a differentfolder/directory, but essentially keeping the naming convention standardized except instead of ending with .txt it ends with .json but the prefix or file names are the same and standardized. An example would be: CRAZY_CAT_FINAL1.TXT, CRAZY_CAT_FINAL2.TXT and so on and so forth all the way to file 300. How can I automate and keep the file naming convention in place, and read and output the files to different folders/directories? I have tried, but can't seem to get this to iterate. Any help would be greatly appreciated.
import glob
import time
from glob import glob
import pandas as pd
import numpy as np
import csv
import json
csvfile = open(r'C:\Users\...\...\...\Dog\CRAZY_CAT_FINAL1.txt', 'r')
jsonfile = open(r'C:\Users\...\...\...\Rat\CRAZY_CAT_FINAL1.json', 'w')
reader = csv.DictReader(csvfile)
out = json.dumps([row for row in reader])
jsonfile.write(out)
****************************************************************************
I also have this code using the python library "requests". How do I make this code so that it uploads multiple json files with a standard naming convention? The files end with a number...
import requests
#function to post to api
def postData(xactData):
url = 'http link'
headers = {
'Content-Type': 'application/json',
'Content-Length': str(len(xactData)),
'Request-Timeout': '60000'
}
return requests.post(url, headers=headers, data=xactData)
#read data
f = (r'filepath/file/file.json', 'r')
data = f.read()
print(data)
# post data
result = postData(data)
print(result)
Use f-strings?
for i in range(1,301):
csvfile = open(f'C:\Users\...\...\...\Dog\CRAZY_CAT_FINAL{i}.txt', 'r')
jsonfile = open(f'C:\Users\...\...\...\Rat\CRAZY_CAT_FINAL{i}.json', 'w')
import time
from glob import glob
import csv
import json
import os
INPATH r'C:\Users\...\...\...\Dog'
OUTPATH = r'C:\Users\...\...\...\Rat'
for csvname in glob(INPATH+'\*.txt'):
jsonname = OUTPATH + '/' + os.basename(csvname[:-3] + 'json')
reader = csv.DictReader(open(csvname,'r'))
json.dump( list(reader), open(jsonname,'w') )

I want to convert .docx to .dotx

I have populated some mail merge fields in a .docx file and now I want my script to convert the saved .docx file to a .dotx file. I am using Python 3.6.
from __future__ import print_function
from mailmerge import MailMerge
from datetime import date
from docx import Document
from docx.opc.constants import CONTENT_TYPE as CT
import csv
import sys
import os
import numpy as np
import pandas as pd
# . . .
for i in range(0, numTemplates):
theTemplateName = templateNameCol[i]
theTemplateFileLocation = templateFileLocationCol[i]
template = theTemplateFileLocation
document = MailMerge(template)
print(document.get_merge_fields())
theOffice = officeCol[i]
theAddress = addressCol[i]
theSuite = suiteCol[i]
theCity = cityCol[i]
theState = stateCol[i]
theZip = zipCol[i]
thePhoneNum = phoneNumCol[i]
theFaxNum = faxNumCol[i]
document.merge(
Address = theAddress
)
document.write(r'\Users\me\mailmergeproject\test-output' + str(i) + r'.docx')
#do conversion here
Here at the bottom is where I want to do the conversion. As you can see, I've written a file and it's just sitting in a folder right now
Here is the code snippet for converting the .docx file to dotx.
You have to change the content-type while saving the document
pip install python-docx
import docx
document = docx.Document('foo.dotx')
document_part = document.part
document_part._content_type = 'application/vnd.openxmlformats-
officedocument.wordprocessingml.template.main+xml'
document.save('bar.docx')

Reccommended way to redirect file-like streams in Python?

I am writing a backup script for a sqlite database that changes very intermittently. Here's how it is now:
from bz2 import BZ2File
from datetime import datetime
from os.path import dirname, abspath, join
from hashlib import sha512
def backup_target_database(target):
backup_dir = dirname(abspath(target))
hash_file = join(backup_dir, 'last_hash')
new_hash = sha512(open(target, 'rb').read()).digest()
if new_hash != open(hash_file, 'rb').read():
fmt = '%Y%m%d-%H%M.sqlite3.bz2'
snapshot_file = join(backup_dir, datetime.now().strftime(fmt))
BZ2File(snapshot_file, 'wb').write(open(target, 'rb').read())
open(hash_file, 'wb').write(new_hash)
Currently the database weighs just shy of 20MB, so it's not that taxing when this runs and reads the whole file into memory (and do it twice when changes are detected), but I don't want to wait until this becomes a problem.
What is the proper way to do this sort of (to use Bashscript terminology) stream piping?
First, there's a duplication in your code (reading target file twice).
And you can use shutil.copyfileobj and hashlib.update for memory-efficient routine.
from bz2 import BZ2File
from datetime import datetime
from hashlib import sha512
from os.path import dirname, abspath, join
from shutil import copyfileobj
def backup_target_database(target_path):
backup_dir = dirname(abspath(target_path))
hash_path = join(backup_dir, 'last_hash')
old_hash = open(hash_path, 'rb').read()
hasher = sha512()
with open(target_path, 'rb') as target:
while True:
data = target.read(1024)
if not data:
break
hasher.update(data)
new_hash = hasher.digest()
if new_hash != old_hash:
fmt = '%Y%m%d-%H%M.sqlite3.bz2'
snapshot_path = join(backup_dir, datetime.now().strftime(fmt))
with open(target_path, 'rb') as target:
with BZ2File(snapshot_path, 'wb', compresslevel=9) as snapshot:
copyfileobj(target, snapshot)
(Note: I didn't test this code. If you have problem please notice me)

Categories

Resources