perl regex syntax
# Perl regex metacharacters: | Symbol | Meaning | |:------:|:-----------------------:| | . | any character | | \w | alphanumeric and _ | | \W | any non-word character | | \s | any whitespace | | \S | any non-whitespace | | \d | any digit character | | \D | any non-digit character | | \t | tab | | \n | newline | | * | match 0 or more times | | + | match 1 or more times | | ? | match 0 or 1 times | | {n} | match exactly n times | | {n,m} | match n to m times | | ^ | match from start | | $ | match to end | # Note: # - use square brackets to match any of a set of characters, like [ACGT] # - use ^ inside square brackets to negate matching those characters (i.e. # when you don't want to match any of them) # - use - to specify a character range, e.g. [a-d] to match any of a, b, c, d # Example usage: if ($dna_seq =~ m/^ATGCC[ACGT]GGN{6,9}(TAG|TGA|TAA)$/) {print "It's a match"}; # Where this will match any $dna_seq that starts with ATGCC, followed by one # character from ACGT, followed by GG, followed by 6-9 N characters, and # ending with TAG, TGA, or TAA
Source: korflab.ucdavis.edu