/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ //! Memory profiling functions. use ipc_channel::ipc::{self, IpcReceiver}; use ipc_channel::router::ROUTER; use profile_traits::mem::{ProfilerChan, ProfilerMsg, ReportKind, Reporter, ReporterRequest}; use profile_traits::mem::ReportsChan; use std::borrow::ToOwned; use std::cmp::Ordering; use std::collections::HashMap; use std::thread; use std::time::Instant; use time::duration_from_seconds; pub struct Profiler { /// The port through which messages are received. pub port: IpcReceiver, /// Registered memory reporters. reporters: HashMap, /// Instant at which this profiler was created. created: Instant, } const JEMALLOC_HEAP_ALLOCATED_STR: &'static str = "jemalloc-heap-allocated"; const SYSTEM_HEAP_ALLOCATED_STR: &'static str = "system-heap-allocated"; impl Profiler { pub fn create(period: Option) -> ProfilerChan { let (chan, port) = ipc::channel().unwrap(); // Create the timer thread if a period was provided. if let Some(period) = period { let chan = chan.clone(); thread::Builder::new().name("Memory profiler timer".to_owned()).spawn(move || { loop { thread::sleep(duration_from_seconds(period)); if chan.send(ProfilerMsg::Print).is_err() { break; } } }).expect("Thread spawning failed"); } // Always spawn the memory profiler. If there is no timer thread it won't receive regular // `Print` events, but it will still receive the other events. thread::Builder::new().name("Memory profiler".to_owned()).spawn(move || { let mut mem_profiler = Profiler::new(port); mem_profiler.start(); }).expect("Thread spawning failed"); let mem_profiler_chan = ProfilerChan(chan); // Register the system memory reporter, which will run on its own thread. It never needs to // be unregistered, because as long as the memory profiler is running the system memory // reporter can make measurements. let (system_reporter_sender, system_reporter_receiver) = ipc::channel().unwrap(); ROUTER.add_route(system_reporter_receiver.to_opaque(), Box::new(|message| { let request: ReporterRequest = message.to().unwrap(); system_reporter::collect_reports(request) })); mem_profiler_chan.send(ProfilerMsg::RegisterReporter("system".to_owned(), Reporter(system_reporter_sender))); mem_profiler_chan } pub fn new(port: IpcReceiver) -> Profiler { Profiler { port: port, reporters: HashMap::new(), created: Instant::now(), } } pub fn start(&mut self) { while let Ok(msg) = self.port.recv() { if !self.handle_msg(msg) { break } } } fn handle_msg(&mut self, msg: ProfilerMsg) -> bool { match msg { ProfilerMsg::RegisterReporter(name, reporter) => { // Panic if it has already been registered. let name_clone = name.clone(); match self.reporters.insert(name, reporter) { None => true, Some(_) => panic!(format!("RegisterReporter: '{}' name is already in use", name_clone)), } }, ProfilerMsg::UnregisterReporter(name) => { // Panic if it hasn't previously been registered. match self.reporters.remove(&name) { Some(_) => true, None => panic!(format!("UnregisterReporter: '{}' name is unknown", &name)), } }, ProfilerMsg::Print => { self.handle_print_msg(); true }, ProfilerMsg::Exit => false } } fn handle_print_msg(&self) { let elapsed = self.created.elapsed(); println!("Begin memory reports {}", elapsed.as_secs()); println!("|"); // Collect reports from memory reporters. // // This serializes the report-gathering. It might be worth creating a new scoped thread for // each reporter once we have enough of them. // // If anything goes wrong with a reporter, we just skip it. // // We also track the total memory reported on the jemalloc heap and the system heap, and // use that to compute the special "jemalloc-heap-unclassified" and // "system-heap-unclassified" values. let mut forest = ReportsForest::new(); let mut jemalloc_heap_reported_size = 0; let mut system_heap_reported_size = 0; let mut jemalloc_heap_allocated_size: Option = None; let mut system_heap_allocated_size: Option = None; for reporter in self.reporters.values() { let (chan, port) = ipc::channel().unwrap(); reporter.collect_reports(ReportsChan(chan)); if let Ok(mut reports) = port.recv() { for report in &mut reports { // Add "explicit" to the start of the path, when appropriate. match report.kind { ReportKind::ExplicitJemallocHeapSize | ReportKind::ExplicitSystemHeapSize | ReportKind::ExplicitNonHeapSize | ReportKind::ExplicitUnknownLocationSize => report.path.insert(0, String::from("explicit")), ReportKind::NonExplicitSize => {}, } // Update the reported fractions of the heaps, when appropriate. match report.kind { ReportKind::ExplicitJemallocHeapSize => jemalloc_heap_reported_size += report.size, ReportKind::ExplicitSystemHeapSize => system_heap_reported_size += report.size, _ => {}, } // Record total size of the heaps, when we see them. if report.path.len() == 1 { if report.path[0] == JEMALLOC_HEAP_ALLOCATED_STR { assert!(jemalloc_heap_allocated_size.is_none()); jemalloc_heap_allocated_size = Some(report.size); } else if report.path[0] == SYSTEM_HEAP_ALLOCATED_STR { assert!(system_heap_allocated_size.is_none()); system_heap_allocated_size = Some(report.size); } } // Insert the report. forest.insert(&report.path, report.size); } } } // Compute and insert the heap-unclassified values. if let Some(jemalloc_heap_allocated_size) = jemalloc_heap_allocated_size { forest.insert(&path!["explicit", "jemalloc-heap-unclassified"], jemalloc_heap_allocated_size - jemalloc_heap_reported_size); } if let Some(system_heap_allocated_size) = system_heap_allocated_size { forest.insert(&path!["explicit", "system-heap-unclassified"], system_heap_allocated_size - system_heap_reported_size); } forest.print(); println!("|"); println!("End memory reports"); println!(""); } } /// A collection of one or more reports with the same initial path segment. A ReportsTree /// containing a single node is described as "degenerate". struct ReportsTree { /// For leaf nodes, this is the sum of the sizes of all reports that mapped to this location. /// For interior nodes, this is the sum of the sizes of all its child nodes. size: usize, /// For leaf nodes, this is the count of all reports that mapped to this location. /// For interor nodes, this is always zero. count: u32, /// The segment from the report path that maps to this node. path_seg: String, /// Child nodes. children: Vec, } impl ReportsTree { fn new(path_seg: String) -> ReportsTree { ReportsTree { size: 0, count: 0, path_seg: path_seg, children: vec![] } } // Searches the tree's children for a path_seg match, and returns the index if there is a // match. fn find_child(&self, path_seg: &str) -> Option { for (i, child) in self.children.iter().enumerate() { if child.path_seg == *path_seg { return Some(i); } } None } // Insert the path and size into the tree, adding any nodes as necessary. fn insert(&mut self, path: &[String], size: usize) { let mut t: &mut ReportsTree = self; for path_seg in path { let i = match t.find_child(&path_seg) { Some(i) => i, None => { let new_t = ReportsTree::new(path_seg.clone()); t.children.push(new_t); t.children.len() - 1 }, }; let tmp = t; // this temporary is needed to satisfy the borrow checker t = &mut tmp.children[i]; } t.size += size; t.count += 1; } // Fill in sizes for interior nodes and sort sub-trees accordingly. Should only be done once // all the reports have been inserted. fn compute_interior_node_sizes_and_sort(&mut self) -> usize { if !self.children.is_empty() { // Interior node. Derive its size from its children. if self.size != 0 { // This will occur if e.g. we have paths ["a", "b"] and ["a", "b", "c"]. panic!("one report's path is a sub-path of another report's path"); } for child in &mut self.children { self.size += child.compute_interior_node_sizes_and_sort(); } // Now that child sizes have been computed, we can sort the children. self.children.sort_by(|t1, t2| t2.size.cmp(&t1.size)); } self.size } fn print(&self, depth: i32) { if !self.children.is_empty() { assert_eq!(self.count, 0); } let mut indent_str = String::new(); for _ in 0..depth { indent_str.push_str(" "); } let mebi = 1024f64 * 1024f64; let count_str = if self.count > 1 { format!(" [{}]", self.count) } else { "".to_owned() }; println!("|{}{:8.2} MiB -- {}{}", indent_str, (self.size as f64) / mebi, self.path_seg, count_str); for child in &self.children { child.print(depth + 1); } } } /// A collection of ReportsTrees. It represents the data from multiple memory reports in a form /// that's good to print. struct ReportsForest { trees: HashMap, } impl ReportsForest { fn new() -> ReportsForest { ReportsForest { trees: HashMap::new(), } } // Insert the path and size into the forest, adding any trees and nodes as necessary. fn insert(&mut self, path: &[String], size: usize) { let (head, tail) = path.split_first().unwrap(); // Get the right tree, creating it if necessary. if !self.trees.contains_key(head) { self.trees.insert(head.clone(), ReportsTree::new(head.clone())); } let t = self.trees.get_mut(head).unwrap(); // Use tail because the 0th path segment was used to find the right tree in the forest. t.insert(tail, size); } fn print(&mut self) { // Fill in sizes of interior nodes, and recursively sort the sub-trees. for (_, tree) in &mut self.trees { tree.compute_interior_node_sizes_and_sort(); } // Put the trees into a sorted vector. Primary sort: degenerate trees (those containing a // single node) come after non-degenerate trees. Secondary sort: alphabetical order of the // root node's path_seg. let mut v = vec![]; for (_, tree) in &self.trees { v.push(tree); } v.sort_by(|a, b| { if a.children.is_empty() && !b.children.is_empty() { Ordering::Greater } else if !a.children.is_empty() && b.children.is_empty() { Ordering::Less } else { a.path_seg.cmp(&b.path_seg) } }); // Print the forest. for tree in &v { tree.print(0); // Print a blank line after non-degenerate trees. if !tree.children.is_empty() { println!("|"); } } } } //--------------------------------------------------------------------------- mod system_reporter { #[cfg(all(feature = "unstable", not(target_os = "windows")))] use libc::{c_void, size_t}; #[cfg(target_os = "linux")] use libc::c_int; use profile_traits::mem::{Report, ReportKind, ReporterRequest}; #[cfg(all(feature = "unstable", not(target_os = "windows")))] use std::ffi::CString; #[cfg(all(feature = "unstable", not(target_os = "windows")))] use std::mem::size_of; #[cfg(all(feature = "unstable", not(target_os = "windows")))] use std::ptr::null_mut; use super::{JEMALLOC_HEAP_ALLOCATED_STR, SYSTEM_HEAP_ALLOCATED_STR}; #[cfg(target_os = "macos")] use task_info::task_basic_info::{virtual_size, resident_size}; /// Collects global measurements from the OS and heap allocators. pub fn collect_reports(request: ReporterRequest) { let mut reports = vec![]; { let mut report = |path, size| { if let Some(size) = size { reports.push(Report { path: path, kind: ReportKind::NonExplicitSize, size: size, }); } }; // Virtual and physical memory usage, as reported by the OS. report(path!["vsize"], vsize()); report(path!["resident"], resident()); // Memory segments, as reported by the OS. for seg in resident_segments() { report(path!["resident-according-to-smaps", seg.0], Some(seg.1)); } // Total number of bytes allocated by the application on the system // heap. report(path![SYSTEM_HEAP_ALLOCATED_STR], system_heap_allocated()); // The descriptions of the following jemalloc measurements are taken // directly from the jemalloc documentation. // "Total number of bytes allocated by the application." report(path![JEMALLOC_HEAP_ALLOCATED_STR], jemalloc_stat("stats.allocated")); // "Total number of bytes in active pages allocated by the application. // This is a multiple of the page size, and greater than or equal to // |stats.allocated|." report(path!["jemalloc-heap-active"], jemalloc_stat("stats.active")); // "Total number of bytes in chunks mapped on behalf of the application. // This is a multiple of the chunk size, and is at least as large as // |stats.active|. This does not include inactive chunks." report(path!["jemalloc-heap-mapped"], jemalloc_stat("stats.mapped")); } request.reports_channel.send(reports); } #[cfg(target_os = "linux")] extern { fn mallinfo() -> struct_mallinfo; } #[cfg(target_os = "linux")] #[repr(C)] pub struct struct_mallinfo { arena: c_int, ordblks: c_int, smblks: c_int, hblks: c_int, hblkhd: c_int, usmblks: c_int, fsmblks: c_int, uordblks: c_int, fordblks: c_int, keepcost: c_int, } #[cfg(target_os = "linux")] fn system_heap_allocated() -> Option { let info: struct_mallinfo = unsafe { mallinfo() }; // The documentation in the glibc man page makes it sound like |uordblks| would suffice, // but that only gets the small allocations that are put in the brk heap. We need |hblkhd| // as well to get the larger allocations that are mmapped. // // These fields are unfortunately |int| and so can overflow (becoming negative) if memory // usage gets high enough. So don't report anything in that case. In the non-overflow case // we cast the two values to usize before adding them to make sure the sum also doesn't // overflow. if info.hblkhd < 0 || info.uordblks < 0 { None } else { Some(info.hblkhd as usize + info.uordblks as usize) } } #[cfg(not(target_os = "linux"))] fn system_heap_allocated() -> Option { None } #[cfg(all(feature = "unstable", not(target_os = "windows")))] use jemalloc_sys::mallctl; #[cfg(all(feature = "unstable", not(target_os = "windows")))] fn jemalloc_stat(value_name: &str) -> Option { // Before we request the measurement of interest, we first send an "epoch" // request. Without that jemalloc gives cached statistics(!) which can be // highly inaccurate. let epoch_name = "epoch"; let epoch_c_name = CString::new(epoch_name).unwrap(); let mut epoch: u64 = 0; let epoch_ptr = &mut epoch as *mut _ as *mut c_void; let mut epoch_len = size_of::() as size_t; let value_c_name = CString::new(value_name).unwrap(); let mut value: size_t = 0; let value_ptr = &mut value as *mut _ as *mut c_void; let mut value_len = size_of::() as size_t; // Using the same values for the `old` and `new` parameters is enough // to get the statistics updated. let rv = unsafe { mallctl(epoch_c_name.as_ptr(), epoch_ptr, &mut epoch_len, epoch_ptr, epoch_len) }; if rv != 0 { return None; } let rv = unsafe { mallctl(value_c_name.as_ptr(), value_ptr, &mut value_len, null_mut(), 0) }; if rv != 0 { return None; } Some(value as usize) } #[cfg(any(target_os = "windows", not(feature = "unstable")))] fn jemalloc_stat(_value_name: &str) -> Option { None } #[cfg(target_os = "linux")] fn page_size() -> usize { unsafe { ::libc::sysconf(::libc::_SC_PAGESIZE) as usize } } #[cfg(target_os = "linux")] fn proc_self_statm_field(field: usize) -> Option { use std::fs::File; use std::io::Read; let mut f = File::open("/proc/self/statm").ok()?; let mut contents = String::new(); f.read_to_string(&mut contents).ok()?; let s = contents.split_whitespace().nth(field)?; let npages = s.parse::().ok()?; Some(npages * page_size()) } #[cfg(target_os = "linux")] fn vsize() -> Option { proc_self_statm_field(0) } #[cfg(target_os = "linux")] fn resident() -> Option { proc_self_statm_field(1) } #[cfg(target_os = "macos")] fn vsize() -> Option { virtual_size() } #[cfg(target_os = "macos")] fn resident() -> Option { resident_size() } #[cfg(not(any(target_os = "linux", target_os = "macos")))] fn vsize() -> Option { None } #[cfg(not(any(target_os = "linux", target_os = "macos")))] fn resident() -> Option { None } #[cfg(target_os = "linux")] fn resident_segments() -> Vec<(String, usize)> { use regex::Regex; use std::collections::HashMap; use std::collections::hash_map::Entry; use std::fs::File; use std::io::{BufReader, BufRead}; // The first line of an entry in /proc//smaps looks just like an entry // in /proc//maps: // // address perms offset dev inode pathname // 02366000-025d8000 rw-p 00000000 00:00 0 [heap] // // Each of the following lines contains a key and a value, separated // by ": ", where the key does not contain either of those characters. // For example: // // Rss: 132 kB let f = match File::open("/proc/self/smaps") { Ok(f) => BufReader::new(f), Err(_) => return vec![], }; let seg_re = Regex::new( r"^[:xdigit:]+-[:xdigit:]+ (....) [:xdigit:]+ [:xdigit:]+:[:xdigit:]+ \d+ +(.*)").unwrap(); let rss_re = Regex::new(r"^Rss: +(\d+) kB").unwrap(); // We record each segment's resident size. let mut seg_map: HashMap = HashMap::new(); #[derive(PartialEq)] enum LookingFor { Segment, Rss } let mut looking_for = LookingFor::Segment; let mut curr_seg_name = String::new(); // Parse the file. for line in f.lines() { let line = match line { Ok(line) => line, Err(_) => continue, }; if looking_for == LookingFor::Segment { // Look for a segment info line. let cap = match seg_re.captures(&line) { Some(cap) => cap, None => continue, }; let perms = cap.get(1).unwrap().as_str(); let pathname = cap.get(2).unwrap().as_str(); // Construct the segment name from its pathname and permissions. curr_seg_name.clear(); if pathname == "" || pathname.starts_with("[stack:") { // Anonymous memory. Entries marked with "[stack:nnn]" // look like thread stacks but they may include other // anonymous mappings, so we can't trust them and just // treat them as entirely anonymous. curr_seg_name.push_str("anonymous"); } else { curr_seg_name.push_str(pathname); } curr_seg_name.push_str(" ("); curr_seg_name.push_str(perms); curr_seg_name.push_str(")"); looking_for = LookingFor::Rss; } else { // Look for an "Rss:" line. let cap = match rss_re.captures(&line) { Some(cap) => cap, None => continue, }; let rss = cap.get(1).unwrap().as_str().parse::().unwrap() * 1024; if rss > 0 { // Aggregate small segments into "other". let seg_name = if rss < 512 * 1024 { "other".to_owned() } else { curr_seg_name.clone() }; match seg_map.entry(seg_name) { Entry::Vacant(entry) => { entry.insert(rss); }, Entry::Occupied(mut entry) => *entry.get_mut() += rss, } } looking_for = LookingFor::Segment; } } // Note that the sum of all these segments' RSS values differs from the "resident" // measurement obtained via /proc//statm in resident(). It's unclear why this // difference occurs; for some processes the measurements match, but for Servo they do not. seg_map.into_iter().collect() } #[cfg(not(target_os = "linux"))] fn resident_segments() -> Vec<(String, usize)> { vec![] } }