Commit 1071a298 authored by Anders Blomdell's avatar Anders Blomdell
Browse files

First working version

parents
target
*~
/Cargo.lock
\ No newline at end of file
[package]
name = "hashtoc"
version = "0.1.0"
authors = ["Anders Blomdell <anders.blomdell@control.lth.se>"]
build = "build.rs"
links = "libhashtoc.a,libssl"
[dependencies]
clap = "2.20.5"
libc = "0.2.21"
threadpool = "1.3.2"
LIB_FILES=md5sum sha512sum
all: CARGO
.PHONY: CARGO
CARGO:
CARGO_HOME=/var/tmp/.cargo cargo build --color=never
.PHONY: SHIPPING
SHIPPING: $(OUT_DIR)libhash.a \
src/libhash.h \
src/libhash.rs
src/libhash.h: src/libhash.sh
./src/libhash.sh --h > $@
src/libhash.rs: src/libhash.sh
./src/libhash.sh --rs > $@
$(OUT_DIR)libhash.a: $(OUT_DIR)libhash.a($(LIB_FILES:%=$(OUT_DIR)%.o)) Makefile
$(OUT_DIR)libhash.a(%.o): $(OUT_DIR)%.o
$(OUT_DIR)%.o: src/%.c Makefile
gcc -fPIC -Wall -Werror -c -o $@ $<
use std::process::Command;
use std::env;
use std::path::Path;
fn main() {
let out_dir = env::var("OUT_DIR").unwrap();
/*
extern crate gcc;
gcc::Config::new()
.file("src/md5sum.c")
.include("src")
.compile("libhashtoc.a");
*/
let status = Command::new("make")
.arg(&format!("OUT_DIR={}/", out_dir))
.arg("SHIPPING")
.status().unwrap();
if (!status.success()) {
panic!();
}
println!("cargo:rustc-link-search=native={}", out_dir);
println!("cargo:rustc-link-lib=static=hashtoc");
println!("cargo:rustc-link-lib=dylib=crypto");
}
/libhash.h
/libhash.rs
\ No newline at end of file
use libc::{c_char, c_int};
use std::ffi::CString;
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use std::io::{Error, Result};
#[link(name = "hash")]
extern {
fn hash_md5_file(path: *const c_char,
flags: c_int,
hash: &mut [u8;16]) -> c_int;
}
pub fn md5_file(path: &Path,
flags: c_int) -> Result<String>
{
let mut hash: [u8;16] = [0;16];
let cstring = CString::new(path.as_os_str().as_bytes()).unwrap();
if unsafe { hash_md5_file(cstring.as_ptr(), flags, &mut hash) } < 0 {
Err(Error::last_os_error())
} else {
let s: Vec<String> = hash.iter().map(
|b| format!("{:02x}", b)).collect();
Ok(s.join(""))
}
}
#[link(name = "hash")]
extern {
fn hash_md5_symlink(path: *const c_char,
flags: c_int,
hash: &mut [u8;16]) -> c_int;
}
pub fn md5_symlink(path: &Path,
flags: c_int) -> Result<String>
{
let mut hash: [u8;16] = [0;16];
let cstring = CString::new(path.as_os_str().as_bytes()).unwrap();
if unsafe { hash_md5_symlink(cstring.as_ptr(), flags, &mut hash) } < 0 {
Err(Error::last_os_error())
} else {
let s: Vec<String> = hash.iter().map(
|b| format!("{:02x}", b)).collect();
Ok(s.join(""))
}
}
#[link(name = "hash")]
extern {
fn hash_sha512_file(path: *const c_char,
flags: c_int,
hash: &mut [u8;64]) -> c_int;
}
pub fn sha512_file(path: &Path,
flags: c_int) -> Result<String>
{
let mut hash: [u8;64] = [0;64];
let cstring = CString::new(path.as_os_str().as_bytes()).unwrap();
if unsafe { hash_sha512_file(cstring.as_ptr(), flags, &mut hash) } < 0 {
Err(Error::last_os_error())
} else {
let s: Vec<String> = hash.iter().map(
|b| format!("{:02x}", b)).collect();
Ok(s.join(""))
}
}
#[link(name = "hash")]
extern {
fn hash_sha512_symlink(path: *const c_char,
flags: c_int,
hash: &mut [u8;64]) -> c_int;
}
pub fn sha512_symlink(path: &Path,
flags: c_int) -> Result<String>
{
let mut hash: [u8;64] = [0;64];
let cstring = CString::new(path.as_os_str().as_bytes()).unwrap();
if unsafe { hash_sha512_symlink(cstring.as_ptr(), flags, &mut hash) } < 0 {
Err(Error::last_os_error())
} else {
let s: Vec<String> = hash.iter().map(
|b| format!("{:02x}", b)).collect();
Ok(s.join(""))
}
}
#!/bin/sh
if [ $# -ne 1 ] ; then
echo ""
exit 1
fi
## FLAGS_READ_XATTR std::os::raw::c_int 0x0001
## FLAGS_WRITE_XATTR std::os::raw::c_int 0x0002
## FLAGS_CLEAR_XATTR std::os::raw::c_int 0x0004
## FLAGS_MAX_AGE std::os::raw::c_int 0x0008
## FLAGS_NULL_TERMINATED std::os::raw::c_int 0x0010
## FLAGS_QUIET std::os::raw::c_int 0x0000
## FLAGS_VERBOSE0 std::os::raw::c_int 0x0000
## FLAGS_VERBOSE1 std::os::raw::c_int 0x0020
## FLAGS_VERBOSE2 std::os::raw::c_int 0x0040
## FLAGS_VERBOSE3 std::os::raw::c_int 0x0060
gen_h() {
echo "enum {"
(grep '^##' $0 | sed -re 's/[ \t]+/ /g' | cut -d' ' -f 2-) \
| while read -r flag type value ; do
echo " ${flag}=${value},"
done
echo "};"
}
gen_rs() {
echo "use std;"
echo
(grep '^##' $0 | sed -re 's/[ \t]+/ /g' | cut -d' ' -f 2-) \
| while read -r flag type value ; do
echo "#[allow(dead_code)]"
echo "pub const ${flag} : ${type} = ${value};"
done
}
case $1 in
--h)
gen_h
;;
--rs)
gen_rs
;;
*)
exit 1
;;
esac
extern crate clap;
extern crate libc;
extern crate threadpool;
use clap::{Arg, App};
use std::os::unix::fs::{FileTypeExt, MetadataExt};
use std::fs::{Metadata};
use std::path::{Path, PathBuf};
use std::sync::mpsc::{channel, Receiver};
use std::thread;
use std::fmt;
use std::process::exit;
use threadpool::ThreadPool;
use std::collections::HashMap;
use std::cell::RefCell;
use libc::c_int;
use std::result::Result;
use std::io::Error as IOError;
use std::io::{self, Write};
mod libhash;
mod hash;
use hash::{md5_file, md5_symlink, sha512_file, sha512_symlink};
mod walk;
macro_rules! reportln {
() => (writeln!(io::stderr()).unwrap_or(()));
($($arg:tt)*) => (writeln!(io::stderr(), $($arg)*).unwrap_or(()));
}
struct Inode {
path: PathBuf,
metadata: Metadata
}
impl std::fmt::Debug for Inode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Inode {{ path: {:?} }}", self.path)
}}
enum WalkerMessage {
Inode{path: PathBuf, metadata: Metadata},
Done
}
#[derive(Debug)]
enum WorkerMessageHash {
MD5(Result<String, IOError>),
SHA512(Result<String, IOError>)
}
#[derive(Debug)]
struct WorkerMessage {
index:usize,
hash: WorkerMessageHash
}
#[derive(Debug)]
enum HashOption<T, E> {
None,
Unused,
Pending,
Err(E),
Some(T)
}
#[derive(Debug)]
struct HashEntry {
inode : Inode,
kind : char,
md5 : RefCell<HashOption<String, std::io::Error>>,
sha512 : RefCell<HashOption<String, std::io::Error>>
}
#[derive(Debug)]
struct WorkList {
first : usize,
last : usize,
list : HashMap<usize, HashEntry>
}
#[allow(dead_code)]
impl WorkList {
fn new() -> WorkList {
WorkList{first: 0,
last: 0,
list: HashMap::<usize, HashEntry>::new()
}
}
fn used(&self) -> usize {
self.last - self.first
}
fn get_front(&self) -> Option<&HashEntry> {
if self.first == self.last {
None
} else {
self.list.get(&self.first)
}
}
fn get(&self, index: usize) -> Option<&HashEntry> {
self.list.get(&index)
}
fn push_back(&mut self, entry: HashEntry) -> usize {
let last = self.last;
self.last += 1;
self.list.insert(last, entry);
last
}
fn pop_front(&mut self) -> Option<HashEntry> {
if self.first == self.last {
None
} else {
let first = self.first;
self.first += 1;
self.list.remove(&first)
}
}
}
fn options_to_flags(options: &clap::ArgMatches) -> c_int {
let xattr = options.is_present("xattr");
let read_xattr = options.is_present("read_xattr");
let write_xattr = options.is_present("write_xattr");
let clear_xattr = options.is_present("clear_xattr");
let mut flags : c_int = 0;
match (xattr, read_xattr, write_xattr, clear_xattr) {
(false, false, false, false) => {
flags = 0;
},
(true, false, false, false) |
(false, true, true, false) => {
flags |= libhash::FLAGS_READ_XATTR | libhash::FLAGS_WRITE_XATTR;
},
(false, false, false, true) => {
flags |= libhash::FLAGS_CLEAR_XATTR;
},
(_, _, _, _) => {
panic!("Invalid flags");
}
}
flags
}
fn file_type_to_flag(file_type: &std::fs::FileType) -> char {
if file_type.is_file() { 'F' }
else if file_type.is_symlink() { 'L' }
else if file_type.is_dir() { 'D' }
else if file_type.is_char_device() { 'C' }
else if file_type.is_block_device() { 'B' }
else if file_type.is_socket() { 'S' }
else if file_type.is_fifo() { 'P' }
else { unreachable!() }
}
fn dispatcher(options: clap::ArgMatches,
from_walker: Receiver<WalkerMessage>) {
macro_rules! parse_opt {
($n:expr, $t:ty, $d:expr) => (
if let Some(v) = options.value_of($n) {
match v.parse::<$t>() {
Ok(val) => val,
Err(_) => {
println!("Argument '--{} {}' cant be parsed as '{}'",
$n, v, stringify!($t));
exit(1);
}
}
} else {
$d
}
)
};
let jobs = parse_opt!("jobs", usize, 1);
let lookahead = parse_opt!("lookahead", usize, jobs * 10);
let calc_md5 = options.is_present("md5");
let calc_sha512 = options.is_present("sha512");
let flags = options_to_flags(&options);
let mut pending = 0 as usize;
let pool = ThreadPool::new(jobs);
let mut worklist = WorkList::new();
let (worker_to_dispatcher, from_pool) = channel::<WorkerMessage>();
let mut done = false;
loop {
// reportln!("XXX {} {} {}", done, pending, worklist.used());
if done && worklist.used() == 0 {
break;
} if !done && pending < jobs && worklist.used() < lookahead {
// Get paths to checksum
let message = match from_walker.recv() {
Ok(message) => message,
Err(_) => break
};
match message {
WalkerMessage::Inode{path, metadata} => {
// Push to worklist
use HashOption::{Pending, Unused, None};
let (kind, md5, sha512) = match
file_type_to_flag(&metadata.file_type()) {
kind @ 'F' | kind @ 'L' =>
(kind,
if calc_md5 { Pending } else { Unused },
if calc_sha512 { Pending } else { Unused }
),
kind @ _ =>
(kind,
if calc_md5 { None } else { Unused },
if calc_sha512 { None } else { Unused }
)
};
let index = worklist.push_back(HashEntry {
inode: Inode {
path:path.clone(),
metadata: metadata.clone()
},
kind: kind,
md5: RefCell::new(md5),
sha512: RefCell::new(sha512),
});
// Spawn off needed work
macro_rules! execute {
($guard:expr, $func:expr, $message:ident) => (
if $guard {
let path = path.clone();
let tx = worker_to_dispatcher.clone();
let flags = flags;
pending += 1;
pool.execute(move || {
let h = $func(path.as_path(), flags);
tx.send( WorkerMessage {
index: index,
hash:$message(h)
}).unwrap();
});
}
)
};
match kind {
'F' => {
use WorkerMessageHash::{MD5, SHA512};
execute!(calc_md5, md5_file, MD5);
execute!(calc_sha512, sha512_file, SHA512);
},
'L' => {
use WorkerMessageHash::{MD5, SHA512};
execute!(calc_md5, md5_symlink, MD5);
execute!(calc_sha512, sha512_symlink, SHA512);
},
_ => ()
};
},
WalkerMessage::Done =>
done = true,
}
} else if pending > 0 {
// Handle paths being processed
let message = from_pool.recv().unwrap();
let index = message.index;
let item = worklist.get(index).unwrap();
pending -= 1;
use WorkerMessageHash::{MD5,SHA512};
match message.hash {
MD5(hash) => {
*item.md5.borrow_mut() = match hash {
Ok(h @ _) => HashOption::Some(h),
Err(e @ _) => HashOption::Err(e)
}
},
SHA512(hash) => {
*item.sha512.borrow_mut() = match hash {
Ok(h @ _) => HashOption::Some(h),
Err(e @ _) => HashOption::Err(e)
}
}
}
}
// Consume finished entries
while worklist.used() > 0 {
if { use HashOption::Pending;
let front = worklist.get_front().unwrap();
let ref md5 = *front.md5.borrow();
let ref sha512 = *front.sha512.borrow();
match (md5, sha512) {
(&Pending, _) | (_, &Pending) => true,
(_, _) => false
} } {
// First entry still has pending work
break;
} else{
let front = worklist.pop_front().unwrap();
{
let ref md5 = *front.md5.borrow();
let ref sha512 = *front.sha512.borrow();
use HashOption::{Err};
let error = match ( md5, sha512 ) {
(&Err(ref e @ _), _) => Option::Some(e),
(_, &Err(ref e @ _)) => Option::Some(e),
(_,_) => Option::None
};
match error {
Option::Some(e) => {
// Failed to checksum file, report and drop
reportln!("Error: {:?} {}", front.inode.path, e);
continue;
}
_ => ()
}
};
match front.kind {
'F' | 'L' => {
let m = &front.inode.metadata;
print!("{}:{}:{}:{}:{:o}:",
m.size(), m.mtime(), m.uid(), m.gid(),
m.mode() & 0o7777);
},
_ => {
let m = &front.inode.metadata;
print!("::{}:{}:{:o}:",
m.uid(), m.gid(), m.mode() & 0o7777);
}
}
use HashOption::{None,Some,Unused};
match *front.md5.borrow() {
None => print!(":"),
Some(ref hash) => print!("{}:", hash),
Unused => (),
_ => unreachable!()
}
match *front.sha512.borrow() {
None => print!(":"),
Some(ref hash) => print!("{}:", hash),
Unused => (),
_ => unreachable!()
}
let mut path = front.inode.path.as_path();
loop {
match path.strip_prefix("./") {
Ok(p) => path = p,
_ => break
}
}
println!("{}:{}", front.kind, path.display());
}
}
}
}
fn main() {
let matches = App::new("hashtoc")
.version("1.0")
.author("Anders Blomdell <anders.blomdell@control.lth.se>")
.about("Generate a Table Of Contents with HASHes for all files")
.arg(Arg::with_name("jobs")
.short("j")
.long("jobs")
.takes_value(true)
.help("number of jobs [threads] to use for HASH calculations"))
.arg(Arg::with_name("lookahead")
.short("l")
.long("lookahead")
.takes_value(true)
.help("size of lookahead buffer for HASH calculations [jobs*10]"))
.arg(Arg::with_name("xattr")
.short("x")
.long("xattr")
.conflicts_with_all(&["read_xattr", "write_xattr", "clear_xattr"])
.help("read/write HASH extended attribute(s)"))
.arg(Arg::with_name("read_xattr")
.short("r")
.long("read-xattr")
.conflicts_with_all(&["xattr", "clear_xattr"])
.help(concat!("read HASH extended attribute(s)\n",
"(remove if mtime mismatch detected)")))
.arg(Arg::with_name("write_xattr")
.short("w")
.long("write-xattr")
.conflicts_with_all(&["xattr", "clear_xattr"])
.help("write HASH extended attribute(s)"))
.arg(Arg::with_name("clear_xattr")
.short("c")
.long("clear-xattr")
.conflicts_with_all(&["xattr", "read_xattr", "write_xattr"])
.help("clear hash extended attribute(s)"))
.arg(Arg::with_name("md5")
.long("md5")
.help("Calculate MD5 hash"))
.arg(Arg::with_name("sha512")
.long("sha512")
.help("Calculate SHA512 hash"))
.arg(Arg::with_name("verbose")
.short("v")
.multiple(true)
.help("Sets the level of verbosity"))
.arg(Arg::with_name("max_age")