Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds option to parse form, input, textarea & select elements #464

Merged
merged 4 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ CONFIGURATION:
-mrs, -max-response-size int maximum response size to read (default 9223372036854775807)
-timeout int time to wait for request in seconds (default 10)
-aff, -automatic-form-fill enable automatic form filling (experimental)
-fx, -form-extraction enable extraction of form, input, textarea & select elements
-retry int number of times to retry the request (default 1)
-proxy string http/socks5 proxy to use
-H, -headers string[] custom header/cookie to include in all http request in header:value format (file)
Expand Down Expand Up @@ -535,6 +536,7 @@ CONFIGURATION:
-mrs, -max-response-size int maximum response size to read (default 9223372036854775807)
-timeout int time to wait for request in seconds (default 10)
-aff, -automatic-form-fill enable automatic form filling (experimental)
-fx, -form-extraction enable extraction of form, input, textarea & select elements
-retry int number of times to retry the request (default 1)
-proxy string http/socks5 proxy to use
-H, -headers string[] custom header/cookie to include in request
Expand Down
1 change: 1 addition & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", math.MaxInt, "maximum response size to read"),
flagSet.IntVar(&options.Timeout, "timeout", 10, "time to wait for request in seconds"),
flagSet.BoolVarP(&options.AutomaticFormFill, "automatic-form-fill", "aff", false, "enable automatic form filling (experimental)"),
flagSet.BoolVarP(&options.FormExtraction, "form-extraction", "fx", false, "enable extraction of form, input, textarea & select elements"),
flagSet.IntVar(&options.Retries, "retry", 1, "number of times to retry the request"),
flagSet.StringVar(&options.Proxy, "proxy", "", "http/socks5 proxy to use"),
flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in all http request in header:value format (file)", goflags.FileStringSliceOptions),
Expand Down
3 changes: 3 additions & 0 deletions pkg/engine/hybrid/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re
}

response.Body = body
if c.Options.Options.FormExtraction {
response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...)
}

response.Reader, err = goquery.NewDocumentFromReader(strings.NewReader(response.Body))
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions pkg/engine/standard/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ func (c *Crawler) makeRequest(s *common.CrawlSession, request *navigation.Reques
response.Reader, err = goquery.NewDocumentFromReader(bytes.NewReader(data))
response.StatusCode = resp.StatusCode
response.Headers = utils.FlattenHeaders(resp.Header)
if c.Options.Options.FormExtraction {
response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...)
}

resp.ContentLength = int64(len(data))

Expand Down
8 changes: 8 additions & 0 deletions pkg/navigation/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ import (

type Headers map[string]string

type Form struct {
Method string `json:"method,omitempty"`
Action string `json:"action,omitempty"`
Enctype string `json:"enctype,omitempty"`
Parameters []string `json:"parameters,omitempty"`
}

func (h *Headers) MarshalJSON() ([]byte, error) {
hCopy := make(Headers)
for k, v := range *h {
Expand All @@ -30,6 +37,7 @@ type Response struct {
RootHostname string `json:"-"`
Technologies []string `json:"technologies,omitempty"`
Raw string `json:"raw,omitempty"`
Forms []Form `json:"forms,omitempty"`
}

func (n Response) AbsoluteURL(path string) string {
Expand Down
2 changes: 2 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ type Options struct {
Headless bool
// AutomaticFormFill enables optional automatic form filling and submission
AutomaticFormFill bool
// FormExtraction enables extraction of form, input, textarea & select elements
FormExtraction bool
// UseInstalledChrome skips chrome install and use local instance
UseInstalledChrome bool
// ShowBrowser specifies whether the show the browser in headless mode
Expand Down
37 changes: 37 additions & 0 deletions pkg/utils/formfields.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package utils

import (
"github.com/projectdiscovery/katana/pkg/navigation"

"github.com/PuerkitoBio/goquery"
)

// parses form, input, textarea & select elements
func ParseFormFields(document *goquery.Document) []navigation.Form {
var forms []navigation.Form

document.Find("form").Each(func(i int, formElem *goquery.Selection) {
form := navigation.Form{}

action, _ := formElem.Attr("action")
method, _ := formElem.Attr("method")
enctype, _ := formElem.Attr("enctype")

form.Action = action
form.Method = method
form.Enctype = enctype

formElem.Find("input, textarea, select").Each(func(i int, inputElem *goquery.Selection) {
name, ok := inputElem.Attr("name")
if !ok {
return
}

form.Parameters = append(form.Parameters, name)
})

forms = append(forms, form)
})

return forms
}
38 changes: 38 additions & 0 deletions pkg/utils/formfields_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package utils

import (
"strings"
"testing"

"github.com/PuerkitoBio/goquery"
"github.com/stretchr/testify/require"
)

var htmlFormExample = `<html>
<head>
<title>HTML Form Test</title>
</head>
<body>
<form method="POST" action="/test">
<input type="text" name="firstname"><br>
<textarea name=textarea1></textarea>
<select name=select1></select>
<input type=text />
</form>
</body>
</html>`

func TestParseFormFields(t *testing.T) {
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlFormExample))
require.NoError(t, err, "could not read document")

forms := ParseFormFields(document)

require.Equal(t, "/test", forms[0].Action)
require.Equal(t, "POST", forms[0].Method)
require.Equal(t, "", forms[0].Enctype)
require.Contains(t, forms[0].Parameters, "firstname")
require.Contains(t, forms[0].Parameters, "textarea1")
require.Contains(t, forms[0].Parameters, "select1")
require.Equal(t, 1, len(forms), "found more or less params than where present")
}