Refactor split long functions ()

we have big functions that handle all stuff ... we should split this into smaler chuncks so we could test them seperate and make clear cuts in what happens where

Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/135
This commit is contained in:
6543 2022-11-12 20:43:44 +01:00
parent b9966487f6
commit 6c63b66ce4
17 changed files with 547 additions and 427 deletions

View file

@ -20,6 +20,7 @@ import (
"codeberg.org/codeberg/pages/server/certificates"
"codeberg.org/codeberg/pages/server/database"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/handler"
)
// AllowedCorsDomains lists the domains for which Cross-Origin Resource Sharing is allowed.
@ -88,9 +89,9 @@ func Serve(ctx *cli.Context) error {
}
// Create handler based on settings
httpsHandler := server.Handler(mainDomainSuffix, rawDomain,
httpsHandler := handler.Handler(mainDomainSuffix, rawDomain,
giteaClient,
giteaRoot, rawInfoPage,
rawInfoPage,
BlacklistedPaths, allowedCorsDomains,
dnsLookupCache, canonicalDomainCache)

View file

@ -78,8 +78,12 @@ func TLSConfig(mainDomainSuffix string,
// DNS not set up, return main certificate to redirect to the docs
sni = mainDomainSuffix
} else {
_, _ = targetRepo, targetBranch
_, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, sni, mainDomainSuffix, canonicalDomainCache)
targetOpt := &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: targetRepo,
TargetBranch: targetBranch,
}
_, valid := targetOpt.CheckCanonicalDomain(giteaClient, sni, mainDomainSuffix, canonicalDomainCache)
if !valid {
sni = mainDomainSuffix
}

View file

@ -3,6 +3,8 @@ package context
import (
stdContext "context"
"net/http"
"codeberg.org/codeberg/pages/server/utils"
)
type Context struct {
@ -42,10 +44,6 @@ func (c *Context) String(raw string, status ...int) {
_, _ = c.RespWriter.Write([]byte(raw))
}
func (c *Context) IsMethod(m string) bool {
return c.Req.Method == m
}
func (c *Context) Redirect(uri string, statusCode int) {
http.Redirect(c.RespWriter, c.Req, uri, statusCode)
}
@ -60,3 +58,7 @@ func (c *Context) Path() string {
func (c *Context) Host() string {
return c.Req.URL.Host
}
func (c *Context) TrimHostPort() string {
return utils.TrimHostPort(c.Req.Host)
}

View file

@ -45,6 +45,8 @@ type Client struct {
sdkClient *gitea.Client
responseCache cache.SetGetKey
giteaRoot string
followSymlinks bool
supportLFS bool
@ -79,6 +81,8 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo
sdkClient: sdk,
responseCache: respCache,
giteaRoot: giteaRoot,
followSymlinks: followSymlinks,
supportLFS: supportLFS,
@ -87,6 +91,10 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo
}, err
}
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
if err != nil {

View file

@ -1,321 +0,0 @@
package server
import (
"fmt"
"net/http"
"path"
"strings"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/dns"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/upstream"
"codeberg.org/codeberg/pages/server/utils"
"codeberg.org/codeberg/pages/server/version"
)
const (
headerAccessControlAllowOrigin = "Access-Control-Allow-Origin"
headerAccessControlAllowMethods = "Access-Control-Allow-Methods"
)
// Handler handles a single HTTP request to the web server.
func Handler(mainDomainSuffix, rawDomain string,
giteaClient *gitea.Client,
giteaRoot, rawInfoPage string,
blacklistedPaths, allowedCorsDomains []string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
log := log.With().Strs("Handler", []string{string(req.Host), req.RequestURI}).Logger()
ctx := context.New(w, req)
ctx.RespWriter.Header().Set("Server", "CodebergPages/"+version.Version)
// Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin
ctx.RespWriter.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
// Enable browser caching for up to 10 minutes
ctx.RespWriter.Header().Set("Cache-Control", "public, max-age=600")
trimmedHost := utils.TrimHostPort(req.Host)
// Add HSTS for RawDomain and MainDomainSuffix
if hsts := getHSTSHeader(trimmedHost, mainDomainSuffix, rawDomain); hsts != "" {
ctx.RespWriter.Header().Set("Strict-Transport-Security", hsts)
}
// Block all methods not required for static pages
if !ctx.IsMethod(http.MethodGet) && !ctx.IsMethod(http.MethodHead) && !ctx.IsMethod(http.MethodOptions) {
ctx.RespWriter.Header().Set("Allow", http.MethodGet+", "+http.MethodHead+", "+http.MethodOptions) // duplic 1
ctx.String("Method not allowed", http.StatusMethodNotAllowed)
return
}
// Block blacklisted paths (like ACME challenges)
for _, blacklistedPath := range blacklistedPaths {
if strings.HasPrefix(ctx.Path(), blacklistedPath) {
html.ReturnErrorPage(ctx, "requested blacklisted path", http.StatusForbidden)
return
}
}
// Allow CORS for specified domains
allowCors := false
for _, allowedCorsDomain := range allowedCorsDomains {
if strings.EqualFold(trimmedHost, allowedCorsDomain) {
allowCors = true
break
}
}
if allowCors {
ctx.RespWriter.Header().Set(headerAccessControlAllowOrigin, "*")
ctx.RespWriter.Header().Set(headerAccessControlAllowMethods, http.MethodGet+", "+http.MethodHead)
}
ctx.RespWriter.Header().Set("Allow", http.MethodGet+", "+http.MethodHead+", "+http.MethodOptions) // duplic 1
if ctx.IsMethod(http.MethodOptions) {
ctx.RespWriter.WriteHeader(http.StatusNoContent)
return
}
// Prepare request information to Gitea
var targetOwner, targetRepo, targetBranch, targetPath string
targetOptions := &upstream.Options{
TryIndexPages: true,
}
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will
// also disallow search indexing and add a Link header to the canonical URL.
// TODO: move into external func to not alert vars indirectly
tryBranch := func(log zerolog.Logger, repo, branch string, _path []string, canonicalLink string) bool {
if repo == "" {
log.Debug().Msg("tryBranch: repo is empty")
return false
}
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
branch = strings.ReplaceAll(branch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch)
if branchTimestampResult == nil {
log.Debug().Msg("tryBranch: branch doesn't exist")
return false
}
// Branch exists, use it
targetRepo = repo
targetPath = path.Join(_path...)
targetBranch = branchTimestampResult.Branch
targetOptions.BranchTimestamp = branchTimestampResult.Timestamp
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
log.Debug().Msg("tryBranch: true")
return true
}
log.Debug().Msg("preparations")
if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) {
// Serve raw content from RawDomain
log.Debug().Msg("raw domain")
targetOptions.TryIndexPages = false
targetOptions.ServeRaw = true
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
if len(pathElements) < 2 {
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect)
return
}
targetOwner = pathElements[0]
targetRepo = pathElements[1]
// raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch")
if tryBranch(log,
targetRepo, pathElements[2][1:], pathElements[3:],
giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p",
) {
log.Debug().Msg("tryBranch, now trying upstream 1")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
return
}
log.Debug().Msg("missing branch info")
html.ReturnErrorPage(ctx, "missing branch info", http.StatusFailedDependency)
return
}
log.Debug().Msg("raw domain preparations, now trying with default branch")
tryBranch(log,
targetRepo, "", pathElements[2:],
giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p",
)
log.Debug().Msg("tryBranch, now trying upstream 2")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
return
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
// Serve pages from subdomains of MainDomainSuffix
log.Debug().Msg("main domain suffix")
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
targetOwner = strings.TrimSuffix(trimmedHost, mainDomainSuffix)
targetRepo = pathElements[0]
targetPath = strings.Trim(strings.Join(pathElements[1:], "/"), "/")
if targetOwner == "www" {
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
ctx.Redirect("https://"+string(mainDomainSuffix[1:])+string(ctx.Path()), http.StatusPermanentRedirect)
return
}
// Check if the first directory is a repo with the second directory as a branch
// example.codeberg.page/myrepo/@main/index.html
if len(pathElements) > 1 && strings.HasPrefix(pathElements[1], "@") {
if targetRepo == "pages" {
// example.codeberg.org/pages/@... redirects to example.codeberg.org/@...
ctx.Redirect("/"+strings.Join(pathElements[1:], "/"), http.StatusTemporaryRedirect)
return
}
log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
branch := pathElements[1][1:]
if tryBranch(log,
pathElements[0], branch, pathElements[2:],
"/"+pathElements[0]+"/%p",
) {
log.Debug().Msg("tryBranch, now trying upstream 3")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo),
http.StatusFailedDependency)
}
return
}
// Check if the first directory is a branch for the "pages" repo
// example.codeberg.page/@main/index.html
if strings.HasPrefix(pathElements[0], "@") {
log.Debug().Msg("main domain preparations, now trying with specified branch")
branch := pathElements[0][1:]
if tryBranch(log,
"pages", branch, pathElements[1:], "/%p") {
log.Debug().Msg("tryBranch, now trying upstream 4")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, "pages", targetBranch, targetPath,
canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"),
http.StatusFailedDependency)
}
return
}
// Check if the first directory is a repo with a "pages" branch
// example.codeberg.page/myrepo/index.html
// example.codeberg.page/pages/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != "pages" && tryBranch(log,
pathElements[0], "pages", pathElements[1:], "") {
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
return
}
// Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch")
if tryBranch(log,
"pages", "", pathElements, "") {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
return
}
// Couldn't find a valid repo/branch
html.ReturnErrorPage(ctx,
fmt.Sprintf("couldn't find a valid repo[%s]/branch[%s]", targetRepo, targetBranch),
http.StatusFailedDependency)
return
} else {
trimmedHostStr := string(trimmedHost)
// Serve pages from custom domains
targetOwner, targetRepo, targetBranch = dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache)
if targetOwner == "" {
html.ReturnErrorPage(ctx,
"could not obtain repo owner from custom domain",
http.StatusFailedDependency)
return
}
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
canonicalLink := ""
if strings.HasPrefix(pathElements[0], "@") {
targetBranch = pathElements[0][1:]
pathElements = pathElements[1:]
canonicalLink = "/%p"
}
// Try to use the given repo on the given branch or the default branch
log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if tryBranch(log,
targetRepo, targetBranch, pathElements, canonicalLink) {
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return
} else if canonicalDomain != trimmedHostStr {
// only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache)
if targetOwner != "" {
ctx.Redirect("https://"+canonicalDomain+string(ctx.Path()), http.StatusTemporaryRedirect)
return
}
html.ReturnErrorPage(ctx, "target is no codeberg page", http.StatusFailedDependency)
return
}
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
return
}
html.ReturnErrorPage(ctx, "could not find target for custom domain", http.StatusFailedDependency)
return
}
}
}

111
server/handler/handler.go Normal file
View file

@ -0,0 +1,111 @@
package handler
import (
"net/http"
"strings"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/version"
)
const (
headerAccessControlAllowOrigin = "Access-Control-Allow-Origin"
headerAccessControlAllowMethods = "Access-Control-Allow-Methods"
)
// Handler handles a single HTTP request to the web server.
func Handler(mainDomainSuffix, rawDomain string,
giteaClient *gitea.Client,
rawInfoPage string,
blacklistedPaths, allowedCorsDomains []string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
log := log.With().Strs("Handler", []string{string(req.Host), req.RequestURI}).Logger()
ctx := context.New(w, req)
ctx.RespWriter.Header().Set("Server", "CodebergPages/"+version.Version)
// Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin
ctx.RespWriter.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
// Enable browser caching for up to 10 minutes
ctx.RespWriter.Header().Set("Cache-Control", "public, max-age=600")
trimmedHost := ctx.TrimHostPort()
// Add HSTS for RawDomain and MainDomainSuffix
if hsts := getHSTSHeader(trimmedHost, mainDomainSuffix, rawDomain); hsts != "" {
ctx.RespWriter.Header().Set("Strict-Transport-Security", hsts)
}
// Handle all http methods
ctx.RespWriter.Header().Set("Allow", http.MethodGet+", "+http.MethodHead+", "+http.MethodOptions)
switch ctx.Req.Method {
case http.MethodOptions:
// return Allow header
ctx.RespWriter.WriteHeader(http.StatusNoContent)
return
case http.MethodGet,
http.MethodHead:
// end switch case and handle allowed requests
break
default:
// Block all methods not required for static pages
ctx.String("Method not allowed", http.StatusMethodNotAllowed)
return
}
// Block blacklisted paths (like ACME challenges)
for _, blacklistedPath := range blacklistedPaths {
if strings.HasPrefix(ctx.Path(), blacklistedPath) {
html.ReturnErrorPage(ctx, "requested blacklisted path", http.StatusForbidden)
return
}
}
// Allow CORS for specified domains
allowCors := false
for _, allowedCorsDomain := range allowedCorsDomains {
if strings.EqualFold(trimmedHost, allowedCorsDomain) {
allowCors = true
break
}
}
if allowCors {
ctx.RespWriter.Header().Set(headerAccessControlAllowOrigin, "*")
ctx.RespWriter.Header().Set(headerAccessControlAllowMethods, http.MethodGet+", "+http.MethodHead)
}
// Prepare request information to Gitea
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) {
log.Debug().Msg("raw domain request detecded")
handleRaw(log, ctx, giteaClient,
mainDomainSuffix, rawInfoPage,
trimmedHost,
pathElements,
canonicalDomainCache)
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
log.Debug().Msg("subdomain request detecded")
handleSubDomain(log, ctx, giteaClient,
mainDomainSuffix,
trimmedHost,
pathElements,
canonicalDomainCache)
} else {
log.Debug().Msg("custom domain request detecded")
handleCustomDomain(log, ctx, giteaClient,
mainDomainSuffix,
trimmedHost,
pathElements,
dnsLookupCache, canonicalDomainCache)
}
}
}

View file

@ -0,0 +1,71 @@
package handler
import (
"net/http"
"path"
"strings"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/dns"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/upstream"
"github.com/rs/zerolog"
)
func handleCustomDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix string,
trimmedHost string,
pathElements []string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
) {
// Serve pages from custom domains
targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHost, string(mainDomainSuffix), dnsLookupCache)
if targetOwner == "" {
html.ReturnErrorPage(ctx,
"could not obtain repo owner from custom domain",
http.StatusFailedDependency)
return
}
pathParts := pathElements
canonicalLink := false
if strings.HasPrefix(pathElements[0], "@") {
targetBranch = pathElements[0][1:]
pathParts = pathElements[1:]
canonicalLink = true
}
// Try to use the given repo on the given branch or the default branch
log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: targetRepo,
TargetBranch: targetBranch,
TargetPath: path.Join(pathParts...),
}, canonicalLink); works {
canonicalDomain, valid := targetOpt.CheckCanonicalDomain(giteaClient, trimmedHost, string(mainDomainSuffix), canonicalDomainCache)
if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return
} else if canonicalDomain != trimmedHost {
// only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache)
if targetOwner != "" {
ctx.Redirect("https://"+canonicalDomain+string(targetOpt.TargetPath), http.StatusTemporaryRedirect)
return
}
html.ReturnErrorPage(ctx, "target is no codeberg page", http.StatusFailedDependency)
return
}
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
html.ReturnErrorPage(ctx, "could not find target for custom domain", http.StatusFailedDependency)
}

View file

@ -0,0 +1,67 @@
package handler
import (
"fmt"
"net/http"
"path"
"strings"
"github.com/rs/zerolog"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/upstream"
)
func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, rawInfoPage string,
trimmedHost string,
pathElements []string,
canonicalDomainCache cache.SetGetKey,
) {
// Serve raw content from RawDomain
log.Debug().Msg("raw domain")
if len(pathElements) < 2 {
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect)
return
}
// raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
ServeRaw: true,
TargetOwner: pathElements[0],
TargetRepo: pathElements[1],
TargetBranch: pathElements[2][1:],
TargetPath: path.Join(pathElements[3:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
log.Debug().Msg("missing branch info")
html.ReturnErrorPage(ctx, "missing branch info", http.StatusFailedDependency)
return
}
log.Debug().Msg("raw domain preparations, now trying with default branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: false,
ServeRaw: true,
TargetOwner: pathElements[0],
TargetRepo: pathElements[1],
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with default branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusNotFound)
}
}

View file

@ -0,0 +1,120 @@
package handler
import (
"fmt"
"net/http"
"path"
"strings"
"github.com/rs/zerolog"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/upstream"
)
func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix string,
trimmedHost string,
pathElements []string,
canonicalDomainCache cache.SetGetKey,
) {
// Serve pages from subdomains of MainDomainSuffix
log.Debug().Msg("main domain suffix")
targetOwner := strings.TrimSuffix(trimmedHost, mainDomainSuffix)
targetRepo := pathElements[0]
if targetOwner == "www" {
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
ctx.Redirect("https://"+string(mainDomainSuffix[1:])+string(ctx.Path()), http.StatusPermanentRedirect)
return
}
// Check if the first directory is a repo with the second directory as a branch
// example.codeberg.page/myrepo/@main/index.html
if len(pathElements) > 1 && strings.HasPrefix(pathElements[1], "@") {
if targetRepo == "pages" {
// example.codeberg.org/pages/@... redirects to example.codeberg.org/@...
ctx.Redirect("/"+strings.Join(pathElements[1:], "/"), http.StatusTemporaryRedirect)
return
}
log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: pathElements[0],
TargetBranch: pathElements[1][1:],
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve with specified repo and branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusFailedDependency)
}
return
}
// Check if the first directory is a branch for the "pages" repo
// example.codeberg.page/@main/index.html
if strings.HasPrefix(pathElements[0], "@") {
log.Debug().Msg("main domain preparations, now trying with specified branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: "pages",
TargetBranch: pathElements[0][1:],
TargetPath: path.Join(pathElements[1:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve default pages repo with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusFailedDependency)
}
return
}
// Check if the first directory is a repo with a "pages" branch
// example.codeberg.page/myrepo/index.html
// example.codeberg.page/pages/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != "pages" {
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: pathElements[0],
TargetBranch: "pages",
TargetPath: path.Join(pathElements[1:]...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
}
// Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: "pages",
TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
// Couldn't find a valid repo/branch
html.ReturnErrorPage(ctx,
fmt.Sprintf("couldn't find a valid repo[%s]", targetRepo),
http.StatusFailedDependency)
}

View file

@ -1,4 +1,4 @@
package server
package handler
import (
"net/http/httptest"
@ -11,12 +11,11 @@ import (
)
func TestHandlerPerformance(t *testing.T) {
giteaRoot := "https://codeberg.org"
giteaClient, _ := gitea.NewClient(giteaRoot, "", cache.NewKeyValueCache(), false, false)
giteaClient, _ := gitea.NewClient("https://codeberg.org", "", cache.NewKeyValueCache(), false, false)
testHandler := Handler(
"codeberg.page", "raw.codeberg.org",
giteaClient,
giteaRoot, "https://docs.codeberg.org/pages/raw-content/",
"https://docs.codeberg.org/pages/raw-content/",
[]string{"/.well-known/acme-challenge/"},
[]string{"raw.codeberg.org", "fonts.codeberg.org", "design.codeberg.org"},
cache.NewKeyValueCache(),

View file

@ -1,4 +1,4 @@
package server
package handler
import (
"strings"

76
server/handler/try.go Normal file
View file

@ -0,0 +1,76 @@
package handler
import (
"net/http"
"strings"
"github.com/rs/zerolog"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/upstream"
)
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, trimmedHost string,
options *upstream.Options,
canonicalDomainCache cache.SetGetKey,
) {
// check if a canonical domain exists on a request on MainDomain
if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
canonicalDomain, _ := options.CheckCanonicalDomain(giteaClient, "", string(mainDomainSuffix), canonicalDomainCache)
if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) {
canonicalPath := ctx.Req.RequestURI
if options.TargetRepo != "pages" {
path := strings.SplitN(canonicalPath, "/", 3)
if len(path) >= 3 {
canonicalPath = "/" + path[2]
}
}
ctx.Redirect("https://"+canonicalDomain+canonicalPath, http.StatusTemporaryRedirect)
return
}
}
// add host for debugging
options.Host = string(trimmedHost)
// Try to request the file from the Gitea API
if !options.Upstream(ctx, giteaClient) {
html.ReturnErrorPage(ctx, "", ctx.StatusCode)
}
}
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty,
// it will also disallow search indexing and add a Link header to the canonical URL.
func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
targetOptions *upstream.Options, canonicalLink bool,
) (*upstream.Options, bool) {
if targetOptions.TargetOwner == "" || targetOptions.TargetRepo == "" {
log.Debug().Msg("tryBranch: owner or repo is empty")
return nil, false
}
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
targetOptions.TargetBranch = strings.ReplaceAll(targetOptions.TargetBranch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchExist, _ := targetOptions.GetBranchTimestamp(giteaClient)
if !branchExist {
log.Debug().Msg("tryBranch: branch doesn't exist")
return nil, false
}
if canonicalLink {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link", targetOptions.ContentWebLink(giteaClient)+"; rel=\"canonical\"")
}
log.Debug().Msg("tryBranch: true")
return targetOptions, true
}

View file

@ -1,49 +0,0 @@
package server
import (
"net/http"
"strings"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
"codeberg.org/codeberg/pages/server/upstream"
)
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, trimmedHost string,
targetOptions *upstream.Options,
targetOwner, targetRepo, targetBranch, targetPath string,
canonicalDomainCache cache.SetGetKey,
) {
// check if a canonical domain exists on a request on MainDomain
if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, "", string(mainDomainSuffix), canonicalDomainCache)
if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) {
canonicalPath := ctx.Req.RequestURI
if targetRepo != "pages" {
path := strings.SplitN(canonicalPath, "/", 3)
if len(path) >= 3 {
canonicalPath = "/" + path[2]
}
}
ctx.Redirect("https://"+canonicalDomain+canonicalPath, http.StatusTemporaryRedirect)
return
}
}
targetOptions.TargetOwner = targetOwner
targetOptions.TargetRepo = targetRepo
targetOptions.TargetBranch = targetBranch
targetOptions.TargetPath = targetPath
targetOptions.Host = string(trimmedHost)
// Try to request the file from the Gitea API
if !targetOptions.Upstream(ctx, giteaClient) {
html.ReturnErrorPage(ctx, "", ctx.StatusCode)
}
}

View file

@ -16,12 +16,12 @@ var canonicalDomainCacheTimeout = 15 * time.Minute
const canonicalDomainConfig = ".domains"
// CheckCanonicalDomain returns the canonical domain specified in the repo (using the `.domains` file).
func CheckCanonicalDomain(giteaClient *gitea.Client, targetOwner, targetRepo, targetBranch, actualDomain, mainDomainSuffix string, canonicalDomainCache cache.SetGetKey) (string, bool) {
func (o *Options) CheckCanonicalDomain(giteaClient *gitea.Client, actualDomain, mainDomainSuffix string, canonicalDomainCache cache.SetGetKey) (string, bool) {
var (
domains []string
valid bool
)
if cachedValue, ok := canonicalDomainCache.Get(targetOwner + "/" + targetRepo + "/" + targetBranch); ok {
if cachedValue, ok := canonicalDomainCache.Get(o.TargetOwner + "/" + o.TargetRepo + "/" + o.TargetBranch); ok {
domains = cachedValue.([]string)
for _, domain := range domains {
if domain == actualDomain {
@ -30,7 +30,7 @@ func CheckCanonicalDomain(giteaClient *gitea.Client, targetOwner, targetRepo, ta
}
}
} else {
body, err := giteaClient.GiteaRawContent(targetOwner, targetRepo, targetBranch, canonicalDomainConfig)
body, err := giteaClient.GiteaRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, canonicalDomainConfig)
if err == nil {
for _, domain := range strings.Split(string(body), "\n") {
domain = strings.ToLower(domain)
@ -45,16 +45,16 @@ func CheckCanonicalDomain(giteaClient *gitea.Client, targetOwner, targetRepo, ta
}
}
} else {
log.Info().Err(err).Msgf("could not read %s of %s/%s", canonicalDomainConfig, targetOwner, targetRepo)
log.Info().Err(err).Msgf("could not read %s of %s/%s", canonicalDomainConfig, o.TargetOwner, o.TargetRepo)
}
domains = append(domains, targetOwner+mainDomainSuffix)
domains = append(domains, o.TargetOwner+mainDomainSuffix)
if domains[len(domains)-1] == actualDomain {
valid = true
}
if targetRepo != "" && targetRepo != "pages" {
domains[len(domains)-1] += "/" + targetRepo
if o.TargetRepo != "" && o.TargetRepo != "pages" {
domains[len(domains)-1] += "/" + o.TargetRepo
}
_ = canonicalDomainCache.Set(targetOwner+"/"+targetRepo+"/"+targetBranch, domains, canonicalDomainCacheTimeout)
_ = canonicalDomainCache.Set(o.TargetOwner+"/"+o.TargetRepo+"/"+o.TargetBranch, domains, canonicalDomainCacheTimeout)
}
return domains[0], valid
}

28
server/upstream/header.go Normal file
View file

@ -0,0 +1,28 @@
package upstream
import (
"net/http"
"time"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
)
// setHeader set values to response header
func (o *Options) setHeader(ctx *context.Context, header http.Header) {
if eTag := header.Get(gitea.ETagHeader); eTag != "" {
ctx.RespWriter.Header().Set(gitea.ETagHeader, eTag)
}
if cacheIndicator := header.Get(gitea.PagesCacheIndicatorHeader); cacheIndicator != "" {
ctx.RespWriter.Header().Set(gitea.PagesCacheIndicatorHeader, cacheIndicator)
}
if length := header.Get(gitea.ContentLengthHeader); length != "" {
ctx.RespWriter.Header().Set(gitea.ContentLengthHeader, length)
}
if mime := header.Get(gitea.ContentTypeHeader); mime == "" || o.ServeRaw {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, rawMime)
} else {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, mime)
}
ctx.RespWriter.Header().Set(headerLastModified, o.BranchTimestamp.In(time.UTC).Format(time.RFC1123))
}

View file

@ -2,35 +2,46 @@ package upstream
import (
"errors"
"fmt"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/server/gitea"
)
// GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
// (or nil if the branch doesn't exist)
func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string) *gitea.BranchTimestamp {
log := log.With().Strs("BranchInfo", []string{owner, repo, branch}).Logger()
// GetBranchTimestamp finds the default branch (if branch is "") and save branch and it's last modification time to Options
func (o *Options) GetBranchTimestamp(giteaClient *gitea.Client) (bool, error) {
log := log.With().Strs("BranchInfo", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch}).Logger()
if len(branch) == 0 {
if len(o.TargetBranch) == 0 {
// Get default branch
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(owner, repo)
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(o.TargetOwner, o.TargetRepo)
if err != nil {
log.Err(err).Msg("Could't fetch default branch from repository")
return nil
return false, err
}
log.Debug().Msgf("Succesfully fetched default branch %q from Gitea", defaultBranch)
branch = defaultBranch
o.TargetBranch = defaultBranch
}
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, branch)
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(o.TargetOwner, o.TargetRepo, o.TargetBranch)
if err != nil {
if !errors.Is(err, gitea.ErrorNotFound) {
log.Error().Err(err).Msg("Could not get latest commit's timestamp from branch")
}
return nil
return false, err
}
if timestamp == nil || timestamp.Branch == "" {
return false, fmt.Errorf("empty response")
}
log.Debug().Msgf("Succesfully fetched latest commit's timestamp from branch: %#v", timestamp)
return timestamp
o.BranchTimestamp = timestamp.Timestamp
o.TargetBranch = timestamp.Branch
return true, nil
}
func (o *Options) ContentWebLink(giteaClient *gitea.Client) string {
return giteaClient.ContentWebLink(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath) + "; rel=\"canonical\""
}

View file

@ -34,10 +34,10 @@ var upstreamNotFoundPages = []string{
// Options provides various options for the upstream request.
type Options struct {
TargetOwner,
TargetRepo,
TargetBranch,
TargetPath,
TargetOwner string
TargetRepo string
TargetBranch string
TargetPath string
// Used for debugging purposes.
Host string
@ -62,16 +62,22 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
// Check if the branch exists and when it was modified
if o.BranchTimestamp.IsZero() {
branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch)
if branch == nil || branch.Branch == "" {
branchExist, err := o.GetBranchTimestamp(giteaClient)
// handle 404
if err != nil && errors.Is(err, gitea.ErrorNotFound) || !branchExist {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch %q", o.TargetBranch),
fmt.Sprintf("branch %q for '%s/%s' not found", o.TargetBranch, o.TargetOwner, o.TargetRepo),
http.StatusNotFound)
return true
}
// handle unexpected errors
if err != nil {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch %q: %v", o.TargetBranch, err),
http.StatusFailedDependency)
return true
}
o.TargetBranch = branch.Branch
o.BranchTimestamp = branch.Timestamp
}
// Check if the browser has a cached version
@ -172,21 +178,7 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
}
// Set ETag & MIME
if eTag := header.Get(gitea.ETagHeader); eTag != "" {
ctx.RespWriter.Header().Set(gitea.ETagHeader, eTag)
}
if cacheIndicator := header.Get(gitea.PagesCacheIndicatorHeader); cacheIndicator != "" {
ctx.RespWriter.Header().Set(gitea.PagesCacheIndicatorHeader, cacheIndicator)
}
if length := header.Get(gitea.ContentLengthHeader); length != "" {
ctx.RespWriter.Header().Set(gitea.ContentLengthHeader, length)
}
if mime := header.Get(gitea.ContentTypeHeader); mime == "" || o.ServeRaw {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, rawMime)
} else {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, mime)
}
ctx.RespWriter.Header().Set(headerLastModified, o.BranchTimestamp.In(time.UTC).Format(time.RFC1123))
o.setHeader(ctx, header)
log.Debug().Msg("Prepare response")