First working release.
Other files (not .docx) support is not guaranteed
This commit is contained in:
0
modules/document/__init__.py
Normal file
0
modules/document/__init__.py
Normal file
30
modules/document/document_file.py
Normal file
30
modules/document/document_file.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import os.path
|
||||
import tempfile
|
||||
import zipfile
|
||||
import shutil
|
||||
|
||||
|
||||
class ExtractedDocument:
|
||||
def __init__(self, path: str):
|
||||
self.documentroot = None
|
||||
self.app = None
|
||||
self.core = None
|
||||
if zipfile.is_zipfile(path):
|
||||
self.documentroot = tempfile.mkdtemp()
|
||||
zipfile.ZipFile(path).extractall(self.documentroot)
|
||||
self.app = os.path.join(self.documentroot, 'docProps', 'app.xml')
|
||||
self.core = os.path.join(self.documentroot, 'docProps', 'core.xml')
|
||||
|
||||
def pack(self, path):
|
||||
with zipfile.ZipFile(path, "w", compresslevel=9, compression=zipfile.ZIP_DEFLATED) as z:
|
||||
for root, dirs, files in os.walk(self.documentroot):
|
||||
for file in files:
|
||||
z.write(os.path.join(root, file),
|
||||
os.path.relpath(os.path.join(root, file),
|
||||
self.documentroot))
|
||||
|
||||
def remove(self):
|
||||
try:
|
||||
shutil.rmtree(self.documentroot, True)
|
||||
except Exception as e:
|
||||
print(f'Error while removing {self.documentroot}: {e}, remove it manually if you want')
|
||||
36
modules/document/document_properties.py
Normal file
36
modules/document/document_properties.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import os.path
|
||||
import lxml
|
||||
from lxml import etree
|
||||
from modules.document.document_file import ExtractedDocument
|
||||
from modules.helpers import xml
|
||||
from modules.helpers.convert import Int
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class DocumentProps:
|
||||
def __init__(self, extracted_document: ExtractedDocument):
|
||||
if not extracted_document.documentroot or not os.path.isdir(extracted_document.documentroot):
|
||||
self.parsed_ = False
|
||||
return
|
||||
|
||||
self.parsed_ = True
|
||||
|
||||
core_xml = etree.parse(extracted_document.core)
|
||||
app_xml = etree.parse(extracted_document.app)
|
||||
|
||||
self.extracted_document = extracted_document
|
||||
self.application = xml.get_value(app_xml, '//Application')
|
||||
self.paragraphs = Int(xml.get_value(app_xml, '//Paragraphs'))
|
||||
self.lines = Int(xml.get_value(app_xml, '//Lines'))
|
||||
self.characters = Int(xml.get_value(app_xml, '//Characters'))
|
||||
self.words = Int(xml.get_value(app_xml, '//Words'))
|
||||
self.pages = Int(xml.get_value(app_xml, '//Pages'))
|
||||
self.total_time = Int(xml.get_value(app_xml, '//TotalTime'))
|
||||
self.template = xml.get_value(app_xml, '//Template')
|
||||
self.modified = datetime.strptime(xml.get_value(core_xml, 'dcterms:modified'), '%Y-%m-%dT%H:%M:%SZ')
|
||||
self.created = datetime.strptime(xml.get_value(core_xml, 'dcterms:created'), '%Y-%m-%dT%H:%M:%SZ')
|
||||
self.revision = xml.get_value(core_xml, 'cp:revision')
|
||||
self.last_modified_by = xml.get_value(core_xml, 'cp:lastModifiedBy')
|
||||
self.creator = xml.get_value(core_xml, 'dc:creator')
|
||||
self.core_xml = core_xml
|
||||
self.app_xml = app_xml
|
||||
0
modules/helpers/__init__.py
Normal file
0
modules/helpers/__init__.py
Normal file
6
modules/helpers/convert.py
Normal file
6
modules/helpers/convert.py
Normal file
@@ -0,0 +1,6 @@
|
||||
def Int(s: str) -> int | None:
|
||||
try:
|
||||
return int(s)
|
||||
except Exception as e:
|
||||
assert e
|
||||
return 0
|
||||
22
modules/helpers/xml.py
Normal file
22
modules/helpers/xml.py
Normal file
@@ -0,0 +1,22 @@
|
||||
namespaces = {
|
||||
"cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
|
||||
"dc": "http://purl.org/dc/elements/1.1/",
|
||||
"dcterms": "http://purl.org/dc/terms/",
|
||||
"dcmitype": "http://purl.org/dc/dcmitype/",
|
||||
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
||||
"vt": "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes",
|
||||
"": "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
|
||||
}
|
||||
|
||||
|
||||
def get_value(parsed, name):
|
||||
return str(parsed.findall(name, namespaces)[0].text) \
|
||||
if parsed.findall(name, namespaces) else None
|
||||
|
||||
|
||||
def set_value(parsed, name, value):
|
||||
if value:
|
||||
parsed.findall(name, namespaces)[0].text = str(value)
|
||||
else:
|
||||
parsed.findall(name, namespaces)[0].getparent().remove(parsed.findall(name, namespaces)[0]) \
|
||||
if parsed.findall(name, namespaces) else None
|
||||
0
modules/time_tools/__init__.py
Normal file
0
modules/time_tools/__init__.py
Normal file
9
modules/time_tools/time_diff.py
Normal file
9
modules/time_tools/time_diff.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import datetime
|
||||
|
||||
|
||||
def compute_time_diff(date_first, time_first,
|
||||
date_second, time_second):
|
||||
dt1 = datetime.datetime.combine(date_first, time_first)
|
||||
dt2 = datetime.datetime.combine(date_second, time_second)
|
||||
|
||||
return int((max(dt1, dt2) - min(dt1, dt2)).total_seconds() / 60)
|
||||
Reference in New Issue
Block a user