Skip to content

Commit

Permalink
Merge pull request technosophos#19 from ppwwyyxx/master
Browse files Browse the repository at this point in the history
add path matching to `Transform`
  • Loading branch information
technosophos committed Feb 4, 2016
2 parents ea36963 + dbd729b commit 2f42d6d
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 14 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,12 @@ do that, you can use the `ignores` directive in the JSON file:
The above will ignore anything whose text matches the exact text "DESCRIPTION"
or "MORE", even if the selectors match.

## Regular Expressions for Substitutions
## Other Mappers/Filters on Selectors

Instead of using a simple mapping of selector to type, you have the
option of mapping a selector to a substitution pattern.
option to map/filter the selected results.

The format for this type of `selectors` map looks like this:
The format for this extended type of `selectors` looks like this:

```json
{
Expand All @@ -127,7 +127,8 @@ The format for this type of `selectors` map looks like this:
"title": {
"type":"Package",
"regexp": " - The Swiss Army Knife of Embedded Linux",
"replacement": ""
"replacement": "",
"matchpath": "doc/.*\\.html"
}
},
"ignore": [
Expand All @@ -142,7 +143,8 @@ The format of the selector value is:
"css selector": {
"type":"Dash data type",
"regexp": "PCRE regular expression (no need to enclose in //)",
"replacement": "Replacement text"
"replacement": "Replacement text for each match of 'regexp'",
"matchpath": "Only files matching this regular expression will be parsed. Will match all files if not set."
}
```

Expand Down
34 changes: 25 additions & 9 deletions dashing.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,13 @@ type Dashing struct {
// Transform structs.
type Transform struct {
Type string

// Perform a replace operation on the text
Regexp *regexp.Regexp
Replacement string

// Skip files that don't match this path
MatchPath *regexp.Regexp
}

var ignoreHash map[string]bool
Expand Down Expand Up @@ -224,27 +229,33 @@ func decodeSelectField(d *Dashing) error {
}
} else if rv.Kind() == reflect.Map {
val := val.(map[string]interface{})
var ttype, treg, trep string
if t, ok := val["type"]; ok {
ttype = t.(string)
var ttype, trep string
var creg, cmatchpath *regexp.Regexp
var err error

if r, ok := val["type"]; ok {
ttype = r.(string)
}
if r, ok := val["regexp"]; ok {
treg = r.(string)
creg, err = regexp.Compile(r.(string))
if err != nil {
return fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
if r, ok := val["replacement"]; ok {
trep = r.(string)
}
var creg *regexp.Regexp
var err error
if len(treg) > 0 {
if creg, err = regexp.Compile(treg); err != nil {
return fmt.Errorf("failed to compile regexp '%s': %s", treg, err)
if r, ok := val["matchpath"]; ok {
cmatchpath, err = regexp.Compile(r.(string))
if err != nil {
return fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
trans = &Transform{
Type: ttype,
Regexp: creg,
Replacement: trep,
MatchPath: cmatchpath,
}
} else {
fmt.Errorf("Expected string or map. Kind is %s.", rv.Kind().String())
Expand Down Expand Up @@ -441,6 +452,11 @@ func parseHTML(path string, source_depth int, dest string, dashing Dashing) ([]*
}

for pattern, sel := range dashing.selectors {
// Skip this selector if file path doesn't match
if sel.MatchPath != nil && ! sel.MatchPath.MatchString(path) {
continue
}

m := css.MustCompile(pattern)
found := m.MatchAll(top)
for _, n := range found {
Expand Down

0 comments on commit 2f42d6d

Please sign in to comment.