字符类或者字符集

  • 字符组(character classes)
  • 字符集(character sets)
  • 元字符 metacharacters

[abc] A single character of: a, b or c
abc Any single character except: a, b, or c
[a-z] Any single character in the range a-z
[a-zA-Z] Any single character in the range a-z or A-Z
^ Start of line
$ End of line

开头结尾:^$,正好对应键盘上的64。

\A Start of string
\z End of string
. Any single character
\s Any whitespace character
\S Any non-whitespace character
\d Any digit
\D Any non-digit
\w Any word character (letter, number, underscore)
\W Any non-word character
\b Any word boundary
(…) Capture everything enclosed
(a|b) a or b
a? Zero or one of a
a* Zero or more of a
a+ One or more of a

  • * 对前面字符匹配0次或者无限次
  • + 对前面字符匹配1次或者无限次
  • ? 对前面字符匹配0次或者1次

a{3} Exactly 3 of a
a{3,} 3 or more of a
a{3,6} Between 3 and 6 of a
.* 任意关键字匹配

前瞻后顾

前瞻:
exp1(?=exp2) 查找exp2前面的exp1
后顾:
(?<=exp2)exp1 查找exp2后面的exp1
负前瞻:
exp1(?!exp2) 查找后面不是exp2的exp1
负后顾:
(?<!=exp2)exp1 查找前面不是exp2的exp1

这里的括号不是捕获组的意思。
可以看出正则表达式是可以嵌套的。

捕获组与命名捕获组

另外需要说明的一点是,除(Expression)和(?Expression)语法外,其它的(?…)语法都不是捕获组。

资料网站

https://www.freeformatter.com/regex-tester.html
https://regex-golang.appspot.com/assets/html/index.html

例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
Pattern errorPattern = Pattern.compile("\\[ERROR\\] \\((.*)\\) logError", Pattern.DOTALL);

File theFile = new File("/Users/magicliang/error.log-2021-06-08-1.log");
Map<String, Integer> countMap = Maps.newHashMap();
try (LineIterator it = FileUtils.lineIterator(theFile, "UTF-8");) {
while (it.hasNext()) {
String line = it.nextLine();
Matcher m = errorPattern.matcher(line);
while (m.find()) {
String capture = m.group(1);
if (!countMap.containsKey(capture)) {
countMap.put(capture, 1);
} else {
final Integer count = countMap.get(capture);
countMap.put(capture, 1 + count );
}
// System.out.println(capture);
}

}
}
final Map<String, Integer> sortedByCount = countMap.entrySet()
.stream()
// .sorted(Map.Entry.comparingByValue())
.sorted((Map.Entry.<String, Integer>comparingByValue().reversed()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
System.out.println(JsonUtils.toJson(sortedByCount));

}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
public static void main(String[] args) throws IOException {
Pattern logPattern = Pattern.compile("\\[INFO\\] (.*) --", Pattern.DOTALL);

Map<String, Integer> countMap = Maps.newHashMap();

try (BufferedReader br = new BufferedReader(new FileReader("/Users/magicliang/info.log-2021-07-08-9.log"))) {

String line;
while ((line = br.readLine()) != null) {
Matcher m = logPattern.matcher(line);
while (m.find()) {
String capture = m.group(1);
final byte[] utf8Bytes = capture.getBytes("UTF-8");
final int length = utf8Bytes.length;
if (!countMap.containsKey(capture)) {
countMap.put(capture, 1);
} else {
final Integer count = countMap.get(capture);
countMap.put(capture, 1 + count);
}
}
}

} catch (IOException e) {
e.printStackTrace();
}
final Map<String, Integer> sortedByCount = countMap.entrySet()
.stream()
// .sorted(Map.Entry.comparingByValue())
.sorted((Map.Entry.<String, Integer>comparingByValue().reversed()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
FileWriter fileWriter = new FileWriter("/Users/magicliang/info统计.log");
PrintWriter printWriter = new PrintWriter(fileWriter);

for (Map.Entry entry : sortedByCount.entrySet()) {
printWriter.println(String.format("%s,%s", entry.getKey(), entry.getValue()));

}
printWriter.close();
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
public static void main(String[] args) throws IOException {
Pattern logPattern = Pattern.compile("\\[INFO\\] (.*) --", Pattern.DOTALL);

Map<String, Long> countMap = Maps.newHashMap();

try (BufferedReader br = new BufferedReader(new FileReader("/Users/magicliang/info.log-2021-07-08-9.log"))) {

String line;
while ((line = br.readLine()) != null) {
Matcher m = logPattern.matcher(line);
while (m.find()) {
String capture = m.group(1);
final byte[] utf8Bytes = capture.getBytes("UTF-8");
final long length = utf8Bytes.length;
if (!countMap.containsKey(capture)) {
countMap.put(capture, length);
} else {
final Long count = countMap.get(capture);
countMap.put(capture, length + count);
}
}
}

} catch (IOException e) {
e.printStackTrace();
}
final Map<String, Long> sortedByCount = countMap.entrySet()
.stream()
// .sorted(Map.Entry.comparingByValue())
.sorted((Map.Entry.<String, Long>comparingByValue().reversed()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
FileWriter fileWriter = new FileWriter("/Users/magicliang/info统计.log");
PrintWriter printWriter = new PrintWriter(fileWriter);

for (Map.Entry entry : sortedByCount.entrySet()) {
printWriter.println(String.format("%s,%s bytes", entry.getKey(), entry.getValue()));

}
printWriter.close();
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
public static void main(String[] args) throws IOException {
Pattern logPattern = Pattern.compile("\\[INFO\\] (.*) --", Pattern.DOTALL);

Map<String, Long> countMap = Maps.newHashMap();
long sum = 0;
try (BufferedReader br = new BufferedReader(new FileReader("/Users/magicliang/info.log-2021-07-08-9.log"))) {

String line;
while ((line = br.readLine()) != null) {
Matcher m = logPattern.matcher(line);
while (m.find()) {
String capture = m.group(1);
final byte[] utf8Bytes = capture.getBytes("UTF-8");
final long length = utf8Bytes.length;
sum += length;
if (!countMap.containsKey(capture)) {
countMap.put(capture, length);
} else {
final Long count = countMap.get(capture);
countMap.put(capture, length + count);
}
}
}

} catch (IOException e) {
e.printStackTrace();
}
final Map<String, Long> sortedByCount = countMap.entrySet()
.stream()
// .sorted(Map.Entry.comparingByValue())
.sorted((Map.Entry.<String, Long>comparingByValue().reversed()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
FileWriter fileWriter = new FileWriter("/Users/magicliang/info统计.log");
PrintWriter printWriter = new PrintWriter(fileWriter);

for (Map.Entry entry : sortedByCount.entrySet()) {
final Long value = (Long)entry.getValue();
printWriter.println(String.format("%s,%s %%", entry.getKey(), new BigDecimal(value).divide(new BigDecimal(sum),4, BigDecimal.ROUND_HALF_UP).multiply(new BigDecimal(100))));

}
printWriter.close();
}