-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.pl
99 lines (81 loc) · 2.25 KB
/
convert.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl
use warnings;
use strict;
use File::Basename;
use autodie;
use File::Path;
use File::Find;
use Parallel::Simple qw( prun );
my $path = $ARGV[0];
die "Please specify which directory to search"
unless -d $path;
sub min {
return ( $_[0], $_[1] )[ $_[0] > $_[1] ];
}
sub par_list {
my $one = $_[0];
my $len = $_[1];
my $chunk_size = $len / 8;
my @subs;
for ( my $i = 0 ; $i < $len ; $i += $chunk_size + 1 ) {
my $j = $i;
push @subs, sub { $one->( $j, min( $j + $chunk_size, $len - 1 ) ) };
}
prun(@subs) or die( Parallel::Simple::errplus() );
}
sub to_txt {
my @files = glob( $path . '/*.zip' );
my $one = sub {
for ( @files[ $_[0] .. $_[1] ] ) {
print("$_\n");
my ( $name, $dir, $ext ) = fileparse( $_, '.zip' );
my $newdir = $dir . $name;
system("mkdir -p $newdir");
system("unzip -qq $_ -d $newdir");
for ( glob( $newdir . '/*.fb2' ) ) {
my ( $name, $dir, $ext ) = fileparse( $_, '.fb2' );
system("xml_grep 'p' $_ --text_only > $dir/$name.txt");
}
unlink glob( $newdir . '/*.fb2' );
unlink $_;
}
};
par_list( $one, scalar @files );
}
sub archive {
opendir my $dh, $path or die "$0: opendir: $!";
my @dirs = grep { -d "$path/$_" && !/^\.{1,2}$/ } readdir($dh);
closedir $dh;
my $one = sub {
for ( @dirs[ $_[0] .. $_[1] ] ) {
system("zip -rj -qq $ARGV[1]/$_.zip $path/$_");
}
};
par_list( $one, scalar @dirs );
}
sub unarchive {
my @files = glob( $path . '/*.zip' );
my $one = sub {
for ( @files[ $_[0] .. $_[1] ] ) {
my ( $name, $dir, $ext ) = fileparse( $_, '.zip' );
my $newdir = $dir . $name;
system("mkdir -p $newdir");
system("unzip -qq $_ -d $newdir");
unlink $_;
}
};
par_list( $one, scalar @files );
}
sub unlink_all_non_russian {
system("rg \"и\" --files-without-match $path > $path/nonru");
open( my $FH, '<', "$path/nonru" ) or die $!;
while (<$FH>) {
$_ =~ s/^\s+|\s+$//g;
unlink $_;
}
close(FH);
}
# to_txt()
# unlink_from_file()
# archive()
# unarchive()