fabriquedoc-backend/main.py

179 lines
5.1 KiB
Python
Raw Normal View History

2022-12-28 05:04:27 +00:00
import datetime
import logging
from fastapi import FastAPI
from fastapi.responses import FileResponse
from pydantic import BaseModel
2023-01-05 03:18:29 +00:00
from typing import List
2022-12-28 05:04:27 +00:00
import pypandoc
import json
from fastapi.testclient import TestClient
import os
from wand.image import Image
from wand.color import Color
import shutil
2022-12-28 05:04:27 +00:00
class DocumentSpecs(BaseModel):
format: str
style: str
linkcolor: str
tocdepth: int
pdfengine: str
content: str
fontsize: int
paperwidth: int
paperheight: int
margin: int
2023-01-01 19:16:56 +00:00
vmargin: int
2022-12-28 05:04:27 +00:00
extension: str
class FormatParameters(BaseModel):
linkcolor: str
tocdepth: int
pdfengine: str
fontsize: int
paperwidth: int
paperheight: int
margin: int
vmargin: int
extension: str
2023-01-05 03:18:29 +00:00
class Styles(BaseModel):
styles: List[str]
2023-01-05 03:18:29 +00:00
class Formats(BaseModel):
formats: List[str]
2023-01-05 03:18:29 +00:00
class App(BaseModel):
app: str
2023-01-01 19:16:56 +00:00
def convert_pdf(filename, filetype, output_path, resolution=300):
2022-12-28 05:04:27 +00:00
""" Convert a PDF into images.
All the pages will give a single png file with format:
{pdf_filename}-{page_number}.png
The function removes the alpha channel from the image and
replace it with a white background.
"""
all_pages = Image(filename=filename, resolution=resolution)
for i, page in enumerate(all_pages.sequence):
with Image(page) as img:
2023-01-01 19:16:56 +00:00
img.format = filetype
2023-01-05 03:18:29 +00:00
img.background_color = Color('white')
img.alpha_channel = 'remove'
2022-12-28 05:04:27 +00:00
image_filename = os.path.splitext(os.path.basename(filename))[0]
2023-01-01 19:16:56 +00:00
image_filename = f'{image_filename}-{i}.{filetype}'
2022-12-28 05:04:27 +00:00
image_filename = os.path.join(output_path, image_filename)
img.save(filename=image_filename)
2022-12-28 05:04:27 +00:00
app = FastAPI()
2023-01-05 03:18:29 +00:00
@app.get("/")
async def get_root():
app = App(app='fabriquedoc')
return app
# function to list only directories inside given directory
def list_dir(path):
return [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
2023-01-05 03:18:29 +00:00
@app.get("/styles/")
async def get_styles():
styles = Styles(styles=list_dir("./styles"))
2023-01-05 03:18:29 +00:00
return styles
2023-01-05 03:18:29 +00:00
@app.get("/formats/{style}/")
async def get_formats(style: str):
formats = Formats(formats=list_dir(f"./styles/{style}/"))
2023-01-05 03:18:29 +00:00
return formats
2022-12-28 05:04:27 +00:00
@app.get("/format_parameters/{style}/{format}/")
async def get_format_parameters(style: str, format: str):
# open styles/format_parameters.json as a dictionary
with open(f"./styles/{style}/format_parameters.json", "r") as f:
format_data = json.load(f).get(format)
logging.log(logging.INFO, str(format_data))
# load data from format_data into the FormatParameters object
parameters = FormatParameters(**format_data)
return parameters
2022-12-28 05:04:27 +00:00
@app.get("/generer/")
async def generer(specs: DocumentSpecs):
header_file = f'{os.getcwd()}/styles/{specs.style}/{specs.format}/header.tex'
cover_file = f'{os.getcwd()}/styles/{specs.style}/{specs.format}/cover.tex'
datef = datetime.datetime.now().strftime("%m-%d-%Y")
os.makedirs("out", exist_ok=True)
2023-01-05 03:18:29 +00:00
output_file = f"./out/{specs.style}-{specs.format}-{datef}-output.pdf"
2022-12-28 05:04:27 +00:00
filters = []
pdoc_args = [
f'--include-in-header={header_file}',
f'--include-after-body={cover_file}',
'--listings',
'--dpi=300',
f'--toc-depth={specs.tocdepth}',
f'--pdf-engine={specs.pdfengine}',
'-V', f'linkcolor={specs.linkcolor}',
'-V', f'fontsize={specs.fontsize}pt',
'-V', f'geometry:paperwidth={specs.paperwidth / 300}in',
'-V', f'geometry:paperheight={specs.paperheight / 300}in',
'-V', f'geometry:margin={specs.margin / 300}in',
'-V', f'geometry:vmargin={specs.vmargin / 300}in'
2022-12-28 05:04:27 +00:00
]
try:
logging.info("Dossier courant = " + os.getcwd())
result = pypandoc.convert_text(source=specs.content,
to='pdf',
format='markdown+implicit_figures+smart',
encoding='utf-8',
extra_args=pdoc_args,
filters=filters,
cworkdir=os.getcwd(),
outputfile=output_file
)
except RuntimeError as rerr:
logging.exception(rerr)
except OSError as oerr:
logging.exception(oerr)
if specs.extension in ["png", "jpg"]:
2022-12-28 05:04:27 +00:00
zip_filename = os.path.splitext(os.path.basename(output_file))[0]
png_output_dir = "./png_output"
if not os.path.exists(png_output_dir):
os.mkdir(png_output_dir)
try:
2023-01-01 19:16:56 +00:00
convert_pdf(output_file, specs.extension, png_output_dir, resolution=300)
2022-12-28 05:04:27 +00:00
shutil.make_archive(zip_filename, 'zip', png_output_dir)
shutil.rmtree(png_output_dir)
except Exception as e:
logging.exception(e)
return FileResponse(zip_filename + ".zip")
elif specs.extension == "pdf":
2022-12-28 05:04:27 +00:00
return FileResponse(output_file)
else:
return 0
2022-12-28 05:04:27 +00:00
client = TestClient(app)
def test_getroot():
response = client.get("/")
2022-12-28 05:04:27 +00:00
assert response.status_code == 200