diff --git a/crates/oxc_semantic/src/builder.rs b/crates/oxc_semantic/src/builder.rs index 599a4873fbaea..74975e853e8b0 100644 --- a/crates/oxc_semantic/src/builder.rs +++ b/crates/oxc_semantic/src/builder.rs @@ -98,6 +98,7 @@ pub struct SemanticBuilder<'a> { unused_labels: UnusedLabels<'a>, build_jsdoc: bool, jsdoc: JSDocBuilder<'a>, + stats: Option, /// Should additional syntax checks be performed? /// @@ -144,6 +145,7 @@ impl<'a> SemanticBuilder<'a> { unused_labels: UnusedLabels::default(), build_jsdoc: false, jsdoc: JSDocBuilder::new(source_text, trivias), + stats: None, check_syntax_error: false, cfg: None, class_table_builder: ClassTableBuilder::new(), @@ -193,6 +195,19 @@ impl<'a> SemanticBuilder<'a> { self } + /// Provide statistics about AST to optimize memory usage of semantic analysis. + /// + /// Accurate statistics can greatly improve performance, especially for large ASTs. + /// If no stats are provided, [`SemanticBuilder::build`] will compile stats by performing + /// a complete AST traversal. + /// If semantic analysis has already been performed on this AST, get the existing stats with + /// [`Semantic::stats`], and pass them in with this method, to avoid the stats collection AST pass. + #[must_use] + pub fn with_stats(mut self, stats: Stats) -> Self { + self.stats = Some(stats); + self + } + /// Get the built module record from `build_module_record` pub fn module_record(&self) -> Arc { Arc::clone(&self.module_record) @@ -221,18 +236,23 @@ impl<'a> SemanticBuilder<'a> { let scope_id = self.scope.add_scope(None, NodeId::DUMMY, ScopeFlags::Top); program.scope_id.set(Some(scope_id)); } else { - // Count the number of nodes, scopes, symbols, and references. - // Use these counts to reserve sufficient capacity in `AstNodes`, `ScopeTree` - // and `SymbolTable` to store them. + // Use counts of nodes, scopes, symbols, and references to pre-allocate sufficient capacity + // in `AstNodes`, `ScopeTree` and `SymbolTable`. + // // This means that as we traverse the AST and fill up these structures with data, // they never need to grow and reallocate - which is an expensive operation as it // involves copying all the memory from the old allocation to the new one. // For large source files, these structures are very large, so growth is very costly // as it involves copying massive chunks of memory. // Avoiding this growth produces up to 30% perf boost on our benchmarks. - // TODO: It would be even more efficient to calculate counts in parser to avoid - // this extra AST traversal. - let stats = Stats::count(program); + // + // If user did not provide existing `Stats`, calculate them by visiting AST. + let (stats, check_stats) = if let Some(stats) = self.stats { + (stats, false) + } else { + let stats = Stats::count(program); + (stats, true) + }; self.nodes.reserve(stats.nodes as usize); self.scope.reserve(stats.scopes as usize); self.symbols.reserve(stats.symbols as usize, stats.references as usize); @@ -240,9 +260,9 @@ impl<'a> SemanticBuilder<'a> { // Visit AST to generate scopes tree etc self.visit_program(program); - // Check that estimated counts accurately + // Check that estimated counts accurately (unless in release mode) #[cfg(debug_assertions)] - { + if check_stats { #[allow(clippy::cast_possible_truncation)] let actual_stats = Stats::new( self.nodes.len() as u32,