https://www.codechef.com/viewsolution/47281353
This solution got accepted without TLE, and it has 10^17 operations.
Please explain, someone.
The value of sum
is never used, so the compiler probably doesn’t even bother to generate the code to compute it.
Man that detection is so cool!
If you see something like
for (i = 0; i < n; i++) for (j = 0; j < m; j++)
sum++; // (O(n * m))
Compiler wont actually loop n * m times, but instead it see that sum variable wiil be just incremented n * m, so it convert above code into:-
sum += n * m; (O(1))
That why, you code is very fast.
UPD:- If you’re testing this locally, add -O2 (flag for second level optimization) flag to your compilling command.
I tried printing sum, getting WA instead of TLE which means sum is calculated and not skipped
Hey, I tried this code:
https://www.codechef.com/viewsolution/47285551
#include <iostream>
#include <algorithm>
using namespace std;
int main(){
long long sum = 1;
for(long long i = 0; i<999999999L; i++)
for(int j = 0; j<10000000; j++)
sum+=i*j;
int n;
cin>>n;
cout<<n;
return 0;
}
This doesn’t allow compiler to pre-compute the value of sum, how did this get accepted then?
Not really - see @ishank_kat162’s post above yours.
As with your first example, sum
isn’t used so the entire calculation is skipped.
Edit:
I’d also be very careful with assuming what a compiler can and can’t pre-compute:
[simon@simon-laptop][06:35:24]
[~/devel/hackerrank/otherpeoples]>cat arnav_07-blah.cpp
#include <iostream>
#include <algorithm>
using namespace std;
int main(){
long long sum = 1;
for(long long i = 0; i<999999999L; i++)
for(int j = 0; j<10000000; j++)
sum+=i*j;
cout << "sum: " << sum << endl;
int n;
cin>>n;
cout<<n;
return 0;
}
[simon@simon-laptop][06:35:31]
[~/devel/hackerrank/otherpeoples]>clang++ -std=c++14 arnav_07-blah.cpp -O3
[simon@simon-laptop][06:35:37]
[~/devel/hackerrank/otherpeoples]>echo "5" | ./a.out
sum: -2744058568320641855
5[simon@simon-laptop][06:35:38]
[~/devel/hackerrank/otherpeoples]>clang++ -std=c++14 arnav_07-blah.cpp -O3 -emit-llvm -c -S
Now search for -2744058568320641855
here:
[simon@simon-laptop][06:35:42]
[~/devel/hackerrank/otherpeoples]>cat arnav_07-blah.ll
; ModuleID = 'arnav_07-blah.cpp'
source_filename = "arnav_07-blah.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%"class.std::ios_base::Init" = type { i8 }
%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_ios" }
%"class.std::basic_ios" = type { %"class.std::ios_base", %"class.std::basic_ostream"*, i8, i8, %"class.std::basic_streambuf"*, %"class.std::ctype"*, %"class.std::num_put"*, %"class.std::num_get"* }
%"class.std::ios_base" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"class.std::locale" }
%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"class.std::ios_base"*, i32)*, i32, i32 }
%"struct.std::ios_base::_Words" = type { i8*, i64 }
%"class.std::locale" = type { %"class.std::locale::_Impl"* }
%"class.std::locale::_Impl" = type { i32, %"class.std::locale::facet"**, i64, %"class.std::locale::facet"**, i8** }
%"class.std::locale::facet" = type <{ i32 (...)**, i32, [4 x i8] }>
%"class.std::basic_streambuf" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"class.std::locale" }
%"class.std::ctype" = type <{ %"class.std::locale::facet.base", [4 x i8], %struct.__locale_struct*, i8, [7 x i8], i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8, [6 x i8] }>
%"class.std::locale::facet.base" = type <{ i32 (...)**, i32 }>
%struct.__locale_struct = type { [13 x %struct.__locale_data*], i16*, i32*, i32*, [13 x i8*] }
%struct.__locale_data = type opaque
%"class.std::num_put" = type { %"class.std::locale::facet.base", [4 x i8] }
%"class.std::num_get" = type { %"class.std::locale::facet.base", [4 x i8] }
%"class.std::basic_istream" = type { i32 (...)**, i64, %"class.std::basic_ios" }
@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
@__dso_handle = external hidden global i8
@_ZSt4cout = external dso_local global %"class.std::basic_ostream", align 8
@.str = private unnamed_addr constant [6 x i8] c"sum: \00", align 1
@_ZSt3cin = external dso_local global %"class.std::basic_istream", align 8
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_arnav_07_blah.cpp, i8* null }]
declare dso_local void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #0
; Function Attrs: nounwind
declare dso_local void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1
; Function Attrs: nofree nounwind
declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) local_unnamed_addr #2
; Function Attrs: norecurse uwtable
define dso_local i32 @main() local_unnamed_addr #3 {
%1 = alloca i32, align 4
%2 = tail call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8) @_ZSt4cout, i8* nonnull getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i64 5)
%3 = tail call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSo9_M_insertIxEERSoT_(%"class.std::basic_ostream"* nonnull @_ZSt4cout, i64 -2744058568320641855)
%4 = bitcast %"class.std::basic_ostream"* %3 to i8**
%5 = load i8*, i8** %4, align 8, !tbaa !2
%6 = getelementptr i8, i8* %5, i64 -24
%7 = bitcast i8* %6 to i64*
%8 = load i64, i64* %7, align 8
%9 = bitcast %"class.std::basic_ostream"* %3 to i8*
%10 = getelementptr inbounds i8, i8* %9, i64 %8
%11 = getelementptr inbounds i8, i8* %10, i64 240
%12 = bitcast i8* %11 to %"class.std::ctype"**
%13 = load %"class.std::ctype"*, %"class.std::ctype"** %12, align 8, !tbaa !5
%14 = icmp eq %"class.std::ctype"* %13, null
br i1 %14, label %15, label %16
15: ; preds = %0
tail call void @_ZSt16__throw_bad_castv() #7
unreachable
16: ; preds = %0
%17 = getelementptr inbounds %"class.std::ctype", %"class.std::ctype"* %13, i64 0, i32 8
%18 = load i8, i8* %17, align 8, !tbaa !10
%19 = icmp eq i8 %18, 0
br i1 %19, label %23, label %20
20: ; preds = %16
%21 = getelementptr inbounds %"class.std::ctype", %"class.std::ctype"* %13, i64 0, i32 9, i64 10
%22 = load i8, i8* %21, align 1, !tbaa !12
br label %29
23: ; preds = %16
tail call void @_ZNKSt5ctypeIcE13_M_widen_initEv(%"class.std::ctype"* nonnull %13)
%24 = bitcast %"class.std::ctype"* %13 to i8 (%"class.std::ctype"*, i8)***
%25 = load i8 (%"class.std::ctype"*, i8)**, i8 (%"class.std::ctype"*, i8)*** %24, align 8, !tbaa !2
%26 = getelementptr inbounds i8 (%"class.std::ctype"*, i8)*, i8 (%"class.std::ctype"*, i8)** %25, i64 6
%27 = load i8 (%"class.std::ctype"*, i8)*, i8 (%"class.std::ctype"*, i8)** %26, align 8
%28 = tail call signext i8 %27(%"class.std::ctype"* nonnull %13, i8 signext 10)
br label %29
29: ; preds = %20, %23
%30 = phi i8 [ %22, %20 ], [ %28, %23 ]
%31 = tail call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSo3putEc(%"class.std::basic_ostream"* nonnull %3, i8 signext %30)
%32 = tail call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSo5flushEv(%"class.std::basic_ostream"* nonnull %31)
%33 = bitcast i32* %1 to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %33) #8
%34 = call nonnull align 8 dereferenceable(16) %"class.std::basic_istream"* @_ZNSirsERi(%"class.std::basic_istream"* nonnull @_ZSt3cin, i32* nonnull align 4 dereferenceable(4) %1)
%35 = load i32, i32* %1, align 4, !tbaa !13
%36 = call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSolsEi(%"class.std::basic_ostream"* nonnull @_ZSt4cout, i32 %35)
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %33) #8
ret i32 0
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4
declare dso_local nonnull align 8 dereferenceable(16) %"class.std::basic_istream"* @_ZNSirsERi(%"class.std::basic_istream"*, i32* nonnull align 4 dereferenceable(4)) local_unnamed_addr #0
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSolsEi(%"class.std::basic_ostream"*, i32) local_unnamed_addr #0
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8), i8*, i64) local_unnamed_addr #0
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSo9_M_insertIxEERSoT_(%"class.std::basic_ostream"*, i64) local_unnamed_addr #0
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSo3putEc(%"class.std::basic_ostream"*, i8 signext) local_unnamed_addr #0
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSo5flushEv(%"class.std::basic_ostream"*) local_unnamed_addr #0
; Function Attrs: noreturn
declare dso_local void @_ZSt16__throw_bad_castv() local_unnamed_addr #5
declare dso_local void @_ZNKSt5ctypeIcE13_M_widen_initEv(%"class.std::ctype"*) local_unnamed_addr #0
; Function Attrs: uwtable
define internal void @_GLOBAL__sub_I_arnav_07_blah.cpp() #6 section ".text.startup" {
tail call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* nonnull @_ZStL8__ioinit)
%1 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i64 0, i32 0), i8* nonnull @__dso_handle) #8
ret void
}
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nofree nounwind }
attributes #3 = { norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { argmemonly nounwind willreturn }
attributes #5 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #6 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #7 = { noreturn }
attributes #8 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git d34c8c70aae2a5421337c2ccac91130c70511f94)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"vtable pointer", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
!5 = !{!6, !7, i64 240}
!6 = !{!"_ZTSSt9basic_iosIcSt11char_traitsIcEE", !7, i64 216, !8, i64 224, !9, i64 225, !7, i64 232, !7, i64 240, !7, i64 248, !7, i64 256}
!7 = !{!"any pointer", !8, i64 0}
!8 = !{!"omnipotent char", !4, i64 0}
!9 = !{!"bool", !8, i64 0}
!10 = !{!11, !8, i64 56}
!11 = !{!"_ZTSSt5ctypeIcE", !7, i64 16, !9, i64 24, !7, i64 32, !7, i64 40, !7, i64 48, !8, i64 56, !8, i64 57, !8, i64 313, !8, i64 569}
!12 = !{!8, !8, i64 0}
!13 = !{!14, !14, i64 0}
!14 = !{!"int", !8, i64 0}
(using clang
as its IR output is more readable than assembly - feel free to do the same with gcc
:))