Files and Related Functionality
Below we have defined the MIME types (and extensions) that Publish
supports. It will ignore anything not on this list.
const MIMETYPES = Dict(
".ipynb" => MIME("application/ipynb+json"),
".jl" => MIME("text/julia"),
".md" => MIME("text/markdown"),
".toml" => MIME("application/toml"),
".js" => MIME("text/javascript"),
".css" => MIME("text/css"),
".html" => MIME("text/html"),
".tex" => MIME("text/tex"),
".mustache" => MIME("text/mustache"),
".png" => MIME("image/png"),
".svg" => MIME("image/svg"),
".jpg" => MIME("image/jpeg"),
)
We also want an inverse lookup Dict to that we can handle converting a
MIME to it’s file extension.
const EXTENSIONS = Dict(v => k for (k, v) in MIMETYPES)
Next we define a File type.
"""
File(; kws...)
A `File` object represents a real, or "virtual", file within a [`Project`](#).
The following are the available keyword's supported by the `File` constructor.
- `name`: full path to the `File`. `nothing` if it is "virtual".
- `mime`: the `MIME` type as defined [above](# "mime-type-defs").
- `text`: Raw `String` content of the `File`.
- `dict`: `Dict{String,Any}` data from a parsed `.toml` file.
- `node`: `CommonMark.Node` abstract syntax tree from a parsed markdown file.
"""
Base.@kwdef struct File
name :: Union{Nothing,String} = nothing
mime :: Union{Nothing,MIME} = nothing
text :: Union{Nothing,String} = nothing
dict :: Union{Nothing,Dict{String,Any}} = nothing
node :: Union{Nothing,CommonMark.Node} = nothing
end
Base.show(io::IO, file::File) = print(io, "$File($(file.name), $(file.mime))")
We also have a number of constructors for File objects that make it
easier to create different variants of Files.
This one dispatches to others further down by examining the file extensions.
function File(path::AbstractString)
if isfile(path)
_, ext = splitext(path)
mime = get(MIMETYPES, ext, nothing)
return File(mime, path)
else
@error "unknown file '$path'"
end
end
const CODE_FENCE = "~"^10
This File constructor handles Jupyter Notebooks, which are written in
JSON format.
function File(mime::MIME"application/ipynb+json", path::AbstractString)
dict = JSON.parsefile(path)
io = IOBuffer()
if haskey(dict, "cells")
for cell in dict["cells"]
if haskey(cell, "cell_type")
type = cell["cell_type"]
source = get(cell, "source", "")
if type == "markdown"
join(io, source)
println(io)
elseif type == "code"
println(io, CODE_FENCE, "julia")
join(io, source)
println(io)
println(io, CODE_FENCE)
end
end
end
end
node = load_markdown(io)
return File(
name = path,
mime = mime,
node = node,
dict = frontmatter(node),
)
end
.jl files are treated as Literate Julia. This function below provides a
reduced set of functionality compared to the Literate.jl package.
function File(mime::MIME"text/julia", path::AbstractString)
io = IOBuffer()
code = String[]
state = :text
# Helper function the reduces code duplication below.
code_block_helper = function (state)
if state === :code
first = findfirst(l -> any(!isspace, l), code)
last = findlast(l -> any(!isspace, l), code)
(first === last === nothing) || join(io, code[first:last])
empty!(code)
println(io, CODE_FENCE, "\n")
end
end
for line in eachline(path)
m = match(r"^(\s*)([#]*)(.*)", line)
if m !== nothing
ws, comments, rest = m[1], m[2], m[3]
count = length(comments)
if count == 1
# Remove single whitespace after the comment.
line = chop(rest; head=1, tail=0)
code_block_helper(state)
println(io, line)
state = :text
else
# Start a new code block.
state === :text && println(io, CODE_FENCE, "julia")
push!(code, string(ws, count === 0 ? "" : '#'^(count-1), rest, '\n'))
state = :code
end
end
end
# Clean up last code block.
code_block_helper(state)
node = load_markdown(io)
return File(
name = path,
mime = mime,
node = node,
dict = frontmatter(node),
)
end
Markdown files, .md extension, is pretty simple to handle.
function File(mime::MIME"text/markdown", path::AbstractString)
node = open(load_markdown, path)
return File(
name = path,
mime = mime,
node = node,
dict = frontmatter(node)
)
end
As is the .toml filetype. For both we’re just using the provided packages
that parse those file types.
function File(mime::MIME"application/toml", path::AbstractString)
dict = TOML.parsefile(path)
return File(
name = path,
mime = mime,
dict = dict,
)
end
There’s also a number of file types that we don’t want to do any kind of
parsing to. These are listed below and just produce a raw File.
const SIMPLE_FILETYPES = Union{
MIME"text/javascript",
MIME"text/css",
MIME"text/html",
MIME"text/tex",
MIME"text/mustache",
}
File(m::SIMPLE_FILETYPES, p::AbstractString) = File(name=p, mime=m, text=read(p, String))
A nice error message is provided for other mime types.
function File(::Nothing, path::AbstractString)
supported = join(repr.(keys(MIMETYPES)), ", ", ", and ")
@error "unsupported file '$path'. Only $supported are supported."
end
File Utilities
Our markdown parser setup:
"""
init_markdown_parser()
Create a new `CommonMark.Parser` object with the extensions we want to support
in [`Publish`](#).
"""
function init_markdown_parser()
cm = CommonMark
return cm.enable!(cm.Parser(), [
cm.AdmonitionRule(),
cm.AttributeRule(),
cm.AutoIdentifierRule(),
cm.CitationRule(),
cm.DollarMathRule(),
cm.FootnoteRule(),
cm.FrontMatterRule(toml=TOML.parse),
cm.MathRule(),
cm.RawContentRule(),
cm.TableRule(),
cm.TypographyRule(),
])
end
Parsing of markdown files:
"""
load_markdown(io, [parser])
Parse the contents found in `io` as markdown using the provided `parser` or the
default created by [`init_markdown_parser`](#).
"""
load_markdown(io::IO, parser=init_markdown_parser()) = parser(seekstart(io))
Extraction of frontmatter content from a markdown AST:
"""
frontmatter(ast) -> Dict{String,Any}
Return the frontmatter content of the given `ast` if it exists, otherwise
return an empty `Dict`.
"""
function frontmatter(ast::CommonMark.Node)
CommonMark.isnull(ast.first_child) && return Dict{String,Any}()
ast.first_child.t isa CommonMark.FrontMatter && return ast.first_child.t.data
return Dict{String,Any}()
end