字符类或者字符集
字符组(character classes)
字符集(character sets)
元字符 metacharacters
[abc] A single character of: a, b or cabc Any single character except: a, b, or c [a-z] Any single character in the range a-z [a-zA-Z] Any single character in the range a-z or A-Z ^ Start of line $ End of line
开头结尾:^$,正好对应键盘上的64。
\A Start of string \z End of string . Any single character \s Any whitespace character \S Any non-whitespace character \d Any digit \D Any non-digit \w Any word character (letter, number, underscore) \W Any non-word character \b Any word boundary (…) Capture everything enclosed (a|b) a or b a? Zero or one of a a* Zero or more of a a+ One or more of a
* 对前面字符匹配0次或者无限次
+ 对前面字符匹配1次或者无限次
? 对前面字符匹配0次或者1次
a{3} Exactly 3 of a a{3,} 3 or more of a a{3,6} Between 3 and 6 of a .* 任意关键字匹配
前瞻后顾 前瞻: exp1(?=exp2) 查找exp2前面的exp1 后顾: (?<=exp2)exp1 查找exp2后面的exp1 负前瞻: exp1(?!exp2) 查找后面不是exp2的exp1 负后顾: (?<!=exp2)exp1 查找前面不是exp2的exp1
这里的括号不是捕获组的意思。 可以看出正则表达式是可以嵌套的。
捕获组与命名捕获组 另外需要说明的一点是,除(Expression)和(?Expression)语法外,其它的(?…)语法都不是捕获组。
资料网站 https://www.freeformatter.com/regex-tester.html https://regex-golang.appspot.com/assets/html/index.html
例子 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 Pattern errorPattern = Pattern.compile("\\[ERROR\\] \\((.*)\\) logError" , Pattern.DOTALL); File theFile = new File ("/Users/magicliang/error.log-2021-06-08-1.log" ); Map<String, Integer> countMap = Maps.newHashMap(); try (LineIterator it = FileUtils.lineIterator(theFile, "UTF-8" );) { while (it.hasNext()) { String line = it.nextLine(); Matcher m = errorPattern.matcher(line); while (m.find()) { String capture = m.group(1 ); if (!countMap.containsKey(capture)) { countMap.put(capture, 1 ); } else { final Integer count = countMap.get(capture); countMap.put(capture, 1 + count ); } } } } final Map<String, Integer> sortedByCount = countMap.entrySet() .stream() .sorted((Map.Entry.<String, Integer>comparingByValue().reversed())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); System.out.println(JsonUtils.toJson(sortedByCount)); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 public static void main (String[] args) throws IOException { Pattern logPattern = Pattern.compile("\\[INFO\\] (.*) --" , Pattern.DOTALL); Map<String, Integer> countMap = Maps.newHashMap(); try (BufferedReader br = new BufferedReader (new FileReader ("/Users/magicliang/info.log-2021-07-08-9.log" ))) { String line; while ((line = br.readLine()) != null ) { Matcher m = logPattern.matcher(line); while (m.find()) { String capture = m.group(1 ); final byte [] utf8Bytes = capture.getBytes("UTF-8" ); final int length = utf8Bytes.length; if (!countMap.containsKey(capture)) { countMap.put(capture, 1 ); } else { final Integer count = countMap.get(capture); countMap.put(capture, 1 + count); } } } } catch (IOException e) { e.printStackTrace(); } final Map<String, Integer> sortedByCount = countMap.entrySet() .stream() .sorted((Map.Entry.<String, Integer>comparingByValue().reversed())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); FileWriter fileWriter = new FileWriter ("/Users/magicliang/info统计.log" ); PrintWriter printWriter = new PrintWriter (fileWriter); for (Map.Entry entry : sortedByCount.entrySet()) { printWriter.println(String.format("%s,%s" , entry.getKey(), entry.getValue())); } printWriter.close(); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 public static void main (String[] args) throws IOException { Pattern logPattern = Pattern.compile("\\[INFO\\] (.*) --" , Pattern.DOTALL); Map<String, Long> countMap = Maps.newHashMap(); try (BufferedReader br = new BufferedReader (new FileReader ("/Users/magicliang/info.log-2021-07-08-9.log" ))) { String line; while ((line = br.readLine()) != null ) { Matcher m = logPattern.matcher(line); while (m.find()) { String capture = m.group(1 ); final byte [] utf8Bytes = capture.getBytes("UTF-8" ); final long length = utf8Bytes.length; if (!countMap.containsKey(capture)) { countMap.put(capture, length); } else { final Long count = countMap.get(capture); countMap.put(capture, length + count); } } } } catch (IOException e) { e.printStackTrace(); } final Map<String, Long> sortedByCount = countMap.entrySet() .stream() .sorted((Map.Entry.<String, Long>comparingByValue().reversed())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); FileWriter fileWriter = new FileWriter ("/Users/magicliang/info统计.log" ); PrintWriter printWriter = new PrintWriter (fileWriter); for (Map.Entry entry : sortedByCount.entrySet()) { printWriter.println(String.format("%s,%s bytes" , entry.getKey(), entry.getValue())); } printWriter.close(); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 public static void main (String[] args) throws IOException { Pattern logPattern = Pattern.compile("\\[INFO\\] (.*) --" , Pattern.DOTALL); Map<String, Long> countMap = Maps.newHashMap(); long sum = 0 ; try (BufferedReader br = new BufferedReader (new FileReader ("/Users/magicliang/info.log-2021-07-08-9.log" ))) { String line; while ((line = br.readLine()) != null ) { Matcher m = logPattern.matcher(line); while (m.find()) { String capture = m.group(1 ); final byte [] utf8Bytes = capture.getBytes("UTF-8" ); final long length = utf8Bytes.length; sum += length; if (!countMap.containsKey(capture)) { countMap.put(capture, length); } else { final Long count = countMap.get(capture); countMap.put(capture, length + count); } } } } catch (IOException e) { e.printStackTrace(); } final Map<String, Long> sortedByCount = countMap.entrySet() .stream() .sorted((Map.Entry.<String, Long>comparingByValue().reversed())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); FileWriter fileWriter = new FileWriter ("/Users/magicliang/info统计.log" ); PrintWriter printWriter = new PrintWriter (fileWriter); for (Map.Entry entry : sortedByCount.entrySet()) { final Long value = (Long)entry.getValue(); printWriter.println(String.format("%s,%s %%" , entry.getKey(), new BigDecimal (value).divide(new BigDecimal (sum),4 , BigDecimal.ROUND_HALF_UP).multiply(new BigDecimal (100 )))); } printWriter.close(); }