/* Copyright (C) 2018 Anders Blomdell This file is part of hashtoc. Hashtoc is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ extern crate clap; extern crate libc; extern crate threadpool; extern crate bytes; use clap::{Arg, App}; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; use std::fs::{Metadata}; use std::path::{Path, PathBuf}; use std::sync::mpsc::{channel, Receiver, Sender}; use std::thread; use std::fmt; use std::process::exit; use threadpool::ThreadPool; use std::collections::HashMap; use std::cell::RefCell; use libc::{c_int, time_t}; use std::result::Result; use std::io::Error as IOError; use std::io::{self, Write}; use bytes::BytesMut; use std::ops::Deref; mod libhash; use libhash::{FLAGS_CLEAR_XATTR, FLAGS_READ_XATTR, FLAGS_WRITE_XATTR, FLAGS_MAX_AGE, FLAGS_NO_CALC_HASH, FLAGS_VERBOSE0, FLAGS_VERBOSE1, FLAGS_VERBOSE2, FLAGS_VERBOSE3}; mod hash; use hash::{md5_file, md5_symlink, sha512_file, sha512_symlink}; mod walk; macro_rules! reportln { () => (writeln!(io::stderr()).unwrap()); ($($arg:tt)*) => (writeln!(io::stderr(), $($arg)*).unwrap()); } macro_rules! bprint { ($($arg : tt)*) => (io::stdout().write_fmt(format_args!($($arg)*)).unwrap()); } macro_rules! bwrite { ($($arg : tt)+) => (io::stdout().write_all($($arg)+).unwrap()); } // Extract clap optin as an Option macro_rules! option_t_or_exit { ($m:ident, $v:expr, $t:ty) => ( if let Some(v) = $m.value_of($v) { match v.trim().parse::<$t>() { Ok(val) => Option::Some(val), Err(_) => { reportln!("Argument '--{} {}' cant be parsed as '{}'", $v, v, stringify!($t)); exit(1); } } } else { Option::None } ) } #[derive(Clone)] struct Inode { path: PathBuf, metadata: Option } impl std::fmt::Debug for Inode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Inode {{ path: {:?} }}", self.path) }} enum WalkerMessage { Start(PathBuf), Inode(Inode), //path: PathBuf, metadata: Metadata}, Done } #[derive(Debug)] enum WorkerMessageHash { MD5(Result), SHA512(Result) } #[derive(Debug)] struct WorkerMessage { index:usize, hash: WorkerMessageHash } #[derive(Debug)] enum HashOption { None, Unused, Pending, Err(E), Some(T) } #[derive(Debug)] struct HashEntry { inode: Inode, kind: char, md5: RefCell>, sha512: RefCell> } #[derive(Debug)] struct WorkList { first: usize, last: usize, list: HashMap } #[allow(dead_code)] impl WorkList { fn new() -> WorkList { WorkList{first: 0, last: 0, list: HashMap::::new() } } fn used(&self) -> usize { self.last - self.first } fn get_front(&self) -> Option<&HashEntry> { if self.first == self.last { None } else { self.list.get(&self.first) } } fn get(&self, index: usize) -> Option<&HashEntry> { self.list.get(&index) } fn push_back(&mut self, entry: HashEntry) -> usize { let last = self.last; self.last += 1; self.list.insert(last, entry); last } fn pop_front(&mut self) -> Option { if self.first == self.last { None } else { let first = self.first; self.first += 1; self.list.remove(&first) } } } struct WorkerPool { pool: ThreadPool, pending: usize, rx: Receiver, tx: Sender } /*trait ExecuteIf { fn execute_if(&self, guard: bool, func: F) where F: FnOnce() -> G + Send + 'static, G: FnOnce() -> H + Send + 'static, H: std::fmt::Debug; } impl ExecuteIf for WorkerPool { */ impl WorkerPool { fn new(jobs: usize) -> WorkerPool { let (tx, rx) = channel::(); WorkerPool { pool: ThreadPool::new(jobs), pending: 0, rx: rx, tx: tx } } fn free(&self) -> bool { self.pool.max_count() > self.pending } fn execute_if(&mut self, guard: bool, func: F) where F: FnOnce() -> T + Send + 'static, T: Send + std::fmt::Debug + 'static { if guard { self.pending += 1; let tx = self.tx.clone(); self.pool.execute(move || { tx.send(func()).unwrap(); }) } } fn recv(&mut self) -> T { self.pending -= 1; self.rx.recv().unwrap() } } fn options_to_flags(options: &clap::ArgMatches) -> (c_int, time_t) { let xattr = options.is_present("xattr"); let read_xattr = options.is_present("read_xattr"); let write_xattr = options.is_present("write_xattr"); let clear_xattr = options.is_present("clear_xattr"); let maxage = option_t_or_exit!(options, "max_age", libc::time_t); let mut flags = 0; let mut age = 0; match (xattr, read_xattr, write_xattr, clear_xattr, maxage) { (false, false, false, false, None) => { }, (true, false, false, false, None) | (false, true, true, false, None) => { flags = FLAGS_READ_XATTR | FLAGS_WRITE_XATTR; }, (true, false, false, false, Some(maxage)) | (false, true, true, false, Some(maxage)) => { flags = FLAGS_READ_XATTR | FLAGS_WRITE_XATTR | FLAGS_MAX_AGE; age = maxage; }, (false, true, false, false, None) => { flags = FLAGS_READ_XATTR; }, (false, true, false, false, Some(maxage)) => { flags = FLAGS_READ_XATTR | FLAGS_MAX_AGE; age = maxage; }, (false, false, false, true, None) => { flags = FLAGS_CLEAR_XATTR | FLAGS_NO_CALC_HASH; }, (_, _, _, _, Some(age)) => { panic!("--maxage={} specified without sane xattr flags", age); }, (_, _, _, _, _) => { panic!("Invalid flags"); } } match (options.is_present("quiet"), options.occurrences_of("verbose")) { (true, 0) => flags |= FLAGS_VERBOSE0, (false, 0) => flags |= FLAGS_VERBOSE1, (false, 1) => flags |= FLAGS_VERBOSE2, (false, 2) => flags |= FLAGS_VERBOSE3, (_, _) => panic!("Invalid verbosity") } (flags, age) } fn file_type_to_flag(file_type: &std::fs::FileType) -> char { if file_type.is_file() { 'F' } else if file_type.is_symlink() { 'L' } else if file_type.is_dir() { 'D' } else if file_type.is_char_device() { 'C' } else if file_type.is_block_device() { 'B' } else if file_type.is_socket() { 'S' } else if file_type.is_fifo() { 'P' } else { unreachable!() } } fn check_name(name : &OsStr) -> bool { let mut error = false; // Check for control characters for c in name.as_bytes() { if c < &32u8 { error = true; } } let mut sane_name = BytesMut::from(name.as_bytes()); if error { // Replace control chararcters for reporting for c in sane_name.iter_mut() { if *c < 32u8 { *c = '?' as u8; } } } let p: &OsStr = OsStrExt::from_bytes(sane_name.deref()); error |= match p.to_str() { Some(_) => false, None => true, }; if error { reportln!("Invalid path {}", p.to_string_lossy()); } ! error } fn dispatcher(options: clap::ArgMatches, from_walker: Receiver) { let jobs = option_t_or_exit!(options, "jobs", usize).unwrap_or(1); let lookahead = option_t_or_exit!(options, "lookahead", usize) .unwrap_or(jobs * 10); let calc_md5 = options.is_present("md5"); let calc_sha512 = options.is_present("sha512"); let (flags, maxage) = options_to_flags(&options); let zero_terminated = options.is_present("zero-terminated"); macro_rules! zprintln { () => ( if zero_terminated { write!(io::stdout(), "\0").unwrap(); } else { writeln!(io::stdout()).unwrap(); } ) } let mut pool = WorkerPool::::new(jobs); let mut worklist = WorkList::new(); let mut done = false; bprint!("#fields: size:mtime:uid:gid:mode:"); if calc_md5 { bprint!("md5:"); } if calc_sha512 { bprint!("sha512:"); } bprint!("kind:name"); zprintln!(); loop { // reportln!("XXX {} {} {}", done, pending, worklist.used()); if done && worklist.used() == 0 { break; } if !done && pool.free() && worklist.used() < lookahead { // Get paths from walker and enqueue work let message = match from_walker.recv() { Ok(message) => message, Err(_) => break }; match message { WalkerMessage::Start(path) => { worklist.push_back(HashEntry { inode: Inode { path:path.clone(), metadata: None }, kind: ' ', md5: RefCell::new(HashOption::Unused), sha512: RefCell::new(HashOption::Unused), }); }, WalkerMessage::Inode(inode) => { // Push to worklist let path = inode.path; let metadata = inode.metadata.unwrap(); use HashOption::{Pending, Unused, None}; let (kind, md5, sha512) = match file_type_to_flag(&metadata.file_type()) { kind @ 'F' | kind @ 'L' => (kind, if calc_md5 { Pending } else { Unused }, if calc_sha512 { Pending } else { Unused } ), kind @ _ => (kind, if calc_md5 { None } else { Unused }, if calc_sha512 { None } else { Unused } ) }; let index = worklist.push_back(HashEntry { inode: Inode { path:path.clone(), metadata: Some(metadata.clone()) }, kind: kind, md5: RefCell::new(md5), sha512: RefCell::new(sha512), }); match kind { 'F' => { use WorkerMessageHash::{MD5, SHA512}; pool.execute_if(calc_md5, { let path = path.clone(); let metadata = metadata.clone(); move || { let h = md5_file(path.as_path(), metadata, flags, maxage); WorkerMessage { index: index, hash: MD5(h) } } }); pool.execute_if(calc_sha512, { let path = path.clone(); move || { let h = sha512_file(path.as_path(), metadata, flags, maxage); WorkerMessage { index: index, hash: SHA512(h) } } }); }, 'L' => { use WorkerMessageHash::{MD5, SHA512}; pool.execute_if(calc_md5, { let path = path.clone(); move || { let h = md5_symlink(path.as_path()); WorkerMessage { index: index, hash: MD5(h) } } }); pool.execute_if(calc_sha512, { let path = path.clone(); move || { let h = sha512_symlink(path.as_path()); WorkerMessage { index: index, hash: SHA512(h) } } }); }, _ => () }; }, WalkerMessage::Done => done = true, } } else if pool.pending > 0 { // Handle finished results let message = pool.recv(); let index = message.index; let item = worklist.get(index).unwrap(); use WorkerMessageHash::{MD5,SHA512}; match message.hash { MD5(hash) => { *item.md5.borrow_mut() = match hash { Ok(h @ _) => HashOption::Some(h), Err(e @ _) => HashOption::Err(e) } }, SHA512(hash) => { *item.sha512.borrow_mut() = match hash { Ok(h @ _) => HashOption::Some(h), Err(e @ _) => HashOption::Err(e) } } } } // Consume finished entries while worklist.used() > 0 { if { use HashOption::Pending; let front = worklist.get_front().unwrap(); let ref md5 = *front.md5.borrow(); let ref sha512 = *front.sha512.borrow(); match (md5, sha512) { (&Pending, _) | (_, &Pending) => true, (_, _) => false } } { // First entry still has pending work break; } else { let front = worklist.pop_front().unwrap(); { let ref md5 = *front.md5.borrow(); let ref sha512 = *front.sha512.borrow(); use HashOption::{Err}; let error = match ( md5, sha512 ) { (&Err(ref e @ _), _) => Option::Some(e), (_, &Err(ref e @ _)) => Option::Some(e), (_, _) => Option::None }; if flags & FLAGS_CLEAR_XATTR != 0 { continue; } match error { Option::Some(e) => { // Failed to checksum file, report and drop reportln!("Error: {:?} {}", front.inode.path, e); continue } _ => () } }; if ! (zero_terminated || check_name(front.inode.path.as_os_str())) { continue } match front.kind { ' ' => { // Start of new path bprint!("#path: {}", front.inode.path.display()); zprintln!(); continue }, 'F' | 'L' => { let m = &front.inode.metadata.unwrap(); bprint!("{}:{}:{}:{}:{:o}:", m.size(), m.mtime(), m.uid(), m.gid(), m.mode() & 0o7777); }, _ => { let m = &front.inode.metadata.unwrap(); bprint!("::{}:{}:{:o}:", m.uid(), m.gid(), m.mode() & 0o7777); } } use HashOption::{None,Some,Unused}; match *front.md5.borrow() { None => bprint!(":"), Some(ref hash) => bprint!("{}:", hash), Unused => (), _ => unreachable!() } match *front.sha512.borrow() { None => bprint!(":"), Some(ref hash) => bprint!("{}:", hash), Unused => (), _ => unreachable!() } let mut path = front.inode.path.as_path(); loop { match path.strip_prefix("./") { Ok(p) => path = p, _ => break } } bprint!("{}:", front.kind); bwrite!(path.as_os_str().as_bytes()); zprintln!(); } } } bprint!("#endTOC"); zprintln!(); } fn main() { let matches = App::new("hashtoc") .version("1.0") .author("Anders Blomdell ") .about("Generate a Table Of Contents with HASHes for all files") .arg(Arg::with_name("jobs") .short("j") .long("jobs") .takes_value(true) .help("number of jobs [threads] to use for HASH calculations")) .arg(Arg::with_name("lookahead") .short("l") .long("lookahead") .takes_value(true) .help("size of lookahead buffer for HASH calculations [jobs*10]")) .arg(Arg::with_name("xattr") .short("x") .long("xattr") .conflicts_with_all(&["read_xattr", "write_xattr", "clear_xattr"]) .help("read/write HASH extended attribute(s)")) .arg(Arg::with_name("read_xattr") .short("r") .long("read-xattr") .conflicts_with_all(&["xattr", "clear_xattr"]) .help(concat!("read HASH extended attribute(s)\n", "(remove if mtime mismatch detected)"))) .arg(Arg::with_name("write_xattr") .short("w") .long("write-xattr") .conflicts_with_all(&["xattr", "clear_xattr"]) .help("write HASH extended attribute(s)")) .arg(Arg::with_name("clear_xattr") .short("c") .long("clear-xattr") .conflicts_with_all(&["xattr", "read_xattr", "write_xattr"]) .help("clear hash extended attribute(s)")) .arg(Arg::with_name("md5") .long("md5") .help("Calculate MD5 hash")) .arg(Arg::with_name("sha512") .long("sha512") .help("Calculate SHA512 hash")) .arg(Arg::with_name("verbose") .short("v") .conflicts_with("quiet") .multiple(true) .help("Sets the level of verbosity")) .arg(Arg::with_name("quiet") .short("q") .conflicts_with("verbose") .help("Sets the level of verbosity")) .arg(Arg::with_name("zero-terminated") .short("z") .long("zero-terminated") .help("End lines with NULL character")) .arg(Arg::with_name("max_age") .short("m") .long("max-age") .takes_value(true) .help("max age of HASH extended attribute(s)")) .arg(Arg::with_name("PATH") .help("path(s) to traverse") .required(true) .multiple(true) .index(1)) .get_matches(); let (worker, tx) = { let (tx, rx) = channel(); let args = matches.clone(); let worker = thread::spawn(move|| { dispatcher(args, rx); }); (worker, tx) }; let callback = | p:&Path, m:&Metadata | { tx.send( WalkerMessage::Inode( Inode{path:p.to_owned(), metadata:Some(m.clone())})).unwrap(); }; let paths : Vec<_> = matches.values_of_os("PATH").unwrap().collect(); // let mut paths : Vec<_> = matches.values_of_os("PATH").unwrap().collect(); // paths.sort(); for p in paths { let path = std::path::Path::new(p); let _ = tx.send(WalkerMessage::Start(path.to_owned())); walk::visit(std::path::Path::new(p), &callback).unwrap(); } tx.send(WalkerMessage::Done).unwrap(); worker.join().unwrap(); }